From d4a5791036b9f394e192e427d4f8dad442575d46 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 23 Oct 2023 14:32:16 +0200 Subject: [PATCH 001/113] Replaced intrinsic by builtin for popcount --- include/bmat8_impl.hpp | 14 +++++++------- include/epu_impl.hpp | 2 +- include/perm16_impl.hpp | 8 ++++---- list_intrin.txt | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 124f9cd5..85fed966 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -276,10 +276,10 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { inline uint64_t BMat8::row_space_size_bitset() const { epu8 res0 {}, res1 {}; row_space_bitset(res0, res1); - return (_mm_popcnt_u64(_mm_extract_epi64(res0, 0)) + - _mm_popcnt_u64(_mm_extract_epi64(res1, 0)) + - _mm_popcnt_u64(_mm_extract_epi64(res0, 1)) + - _mm_popcnt_u64(_mm_extract_epi64(res1, 1))); + return (__builtin_popcountll(_mm_extract_epi64(res0, 0)) + + __builtin_popcountll(_mm_extract_epi64(res1, 0)) + + __builtin_popcountll(_mm_extract_epi64(res0, 1)) + + __builtin_popcountll(_mm_extract_epi64(res1, 1))); } inline uint64_t BMat8::row_space_size_incl1() const { @@ -292,7 +292,7 @@ inline uint64_t BMat8::row_space_size_incl1() const { orincl |= ((in | block) == block) & in; in = permuted(in, rotboth); } - res += _mm_popcnt_u64(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; @@ -308,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl() const { in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } - res += _mm_popcnt_u64(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; @@ -399,7 +399,7 @@ inline std::vector BMat8::rows() const { inline size_t BMat8::nr_rows() const { epu8 x = _mm_set_epi64x(_data, 0); - return _mm_popcnt_u64(_mm_movemask_epi8(x != epu8 {})); + return __builtin_popcountll(_mm_movemask_epi8(x != epu8 {})); } static HPCOMBI_CONSTEXPR epu8 diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 0a2d7f40..c085abff 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -404,7 +404,7 @@ inline epu8 eval16_cycle(epu8 v) { inline epu8 eval16_popcount(epu8 v) { epu8 res{}; for (size_t i = 0; i < 16; i++) { - res[i] = _mm_popcnt_u32(_mm_movemask_epi8(v == Epu8(uint8_t(i)))); + res[i] = __builtin_popcountl(_mm_movemask_epi8(v == Epu8(uint8_t(i)))); } return res; } diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 365f4f6f..5ea3af2a 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -76,7 +76,7 @@ inline uint32_t PTransf16::rank_ref() const { return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0)); } inline uint32_t PTransf16::rank() const { - return _mm_popcnt_u32(image_bitset()); + return __builtin_popcountl(image_bitset()); } inline epu8 PTransf16::fix_points_mask(bool complement) const { @@ -107,7 +107,7 @@ inline uint8_t PTransf16::largest_moved_point() const { } /** Returns the number of fix points of \c *this */ inline uint8_t PTransf16::nb_fix_points() const { - return _mm_popcnt_u32(fix_points_bitset()); + return __builtin_popcountl(fix_points_bitset()); } inline static HPCOMBI_CONSTEXPR uint8_t hilo_exchng_fun(uint8_t i) { @@ -307,7 +307,7 @@ inline uint8_t Perm16::nb_descents_ref() const { return res; } inline uint8_t Perm16::nb_descents() const { - return _mm_popcnt_u32(_mm_movemask_epi8(v < shifted_right(v))); + return __builtin_popcountl(_mm_movemask_epi8(v < shifted_right(v))); } inline uint8_t Perm16::nb_cycles_ref() const { @@ -338,7 +338,7 @@ inline epu8 Perm16::cycles_partition() const { inline uint8_t Perm16::nb_cycles_unroll() const { epu8 res = (epu8id == cycles_partition()); - return _mm_popcnt_u32(_mm_movemask_epi8(res)); + return __builtin_popcountl(_mm_movemask_epi8(res)); } inline bool Perm16::left_weak_leq_ref(Perm16 other) const { diff --git a/list_intrin.txt b/list_intrin.txt index df995b99..263f95d7 100644 --- a/list_intrin.txt +++ b/list_intrin.txt @@ -11,8 +11,8 @@ _mm_max_epu8;__m128i(),__m128i() _mm_min_epi8;__m128i(),__m128i() _mm_min_epu8;__m128i(),__m128i() _mm_movemask_epi8;__m128i() -_mm_popcnt_u32;1 -_mm_popcnt_u64;1 +__builtin_popcountl;1 +__builtin_popcountll;1 _mm_set_epi64x;1,1 _mm_shuffle_epi8;__m128i(),__m128i() _mm_slli_epi32;__m128i(),1 From 282ec03a625f9bc271b4ce2d5f06620aa206fef6 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 23 Oct 2023 14:32:48 +0200 Subject: [PATCH 002/113] Imported simde --- include/simde/arm/neon.h | 103271 +++++++++++++++++++++++++++ include/simde/arm/sve.h | 18581 +++++ include/simde/mips/msa.h | 10738 +++ include/simde/wasm/relaxed-simd.h | 17553 +++++ include/simde/wasm/simd128.h | 17043 +++++ include/simde/x86/avx.h | 33765 +++++++++ include/simde/x86/avx2.h | 39524 ++++++++++ include/simde/x86/avx512.h | 96026 +++++++++++++++++++++++++ include/simde/x86/clmul.h | 35915 ++++++++++ include/simde/x86/f16c.h | 34146 +++++++++ include/simde/x86/fma.h | 34500 +++++++++ include/simde/x86/gfni.h | 53764 ++++++++++++++ include/simde/x86/mmx.h | 10667 +++ include/simde/x86/sse.h | 15489 ++++ include/simde/x86/sse2.h | 23182 ++++++ include/simde/x86/sse3.h | 23700 ++++++ include/simde/x86/sse4.1.h | 27123 +++++++ include/simde/x86/sse4.2.h | 27507 +++++++ include/simde/x86/ssse3.h | 24760 +++++++ include/simde/x86/svml.h | 60311 ++++++++++++++++ include/simde/x86/xop.h | 43267 +++++++++++ 21 files changed, 750832 insertions(+) create mode 100644 include/simde/arm/neon.h create mode 100644 include/simde/arm/sve.h create mode 100644 include/simde/mips/msa.h create mode 100644 include/simde/wasm/relaxed-simd.h create mode 100644 include/simde/wasm/simd128.h create mode 100644 include/simde/x86/avx.h create mode 100644 include/simde/x86/avx2.h create mode 100644 include/simde/x86/avx512.h create mode 100644 include/simde/x86/clmul.h create mode 100644 include/simde/x86/f16c.h create mode 100644 include/simde/x86/fma.h create mode 100644 include/simde/x86/gfni.h create mode 100644 include/simde/x86/mmx.h create mode 100644 include/simde/x86/sse.h create mode 100644 include/simde/x86/sse2.h create mode 100644 include/simde/x86/sse3.h create mode 100644 include/simde/x86/sse4.1.h create mode 100644 include/simde/x86/sse4.2.h create mode 100644 include/simde/x86/ssse3.h create mode 100644 include/simde/x86/svml.h create mode 100644 include/simde/x86/xop.h diff --git a/include/simde/arm/neon.h b/include/simde/arm/neon.h new file mode 100644 index 00000000..4135fd3c --- /dev/null +++ b/include/simde/arm/neon.h @@ -0,0 +1,103271 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_H) +#define SIMDE_ARM_NEON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_TYPES_H) +#define SIMDE_ARM_NEON_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-f16.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_FLOAT16_H) +#define SIMDE_FLOAT16_H + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* Portable version which should work on pretty much any compiler. + * Obviously you can't rely on compiler support for things like + * conversion to/from 32-bit floats, so make sure you always use the + * functions and macros in this file! + * + * The portable implementations are (heavily) based on CC0 code by + * Fabian Giesen: (see also + * ). + * I have basically just modified it to get rid of some UB (lots of + * aliasing, right shifting a negative value), use fixed-width types, + * and work in C. */ +#define SIMDE_FLOAT16_API_PORTABLE 1 +/* _Float16, per C standard (TS 18661-3; + * ). */ +#define SIMDE_FLOAT16_API_FLOAT16 2 +/* clang >= 6.0 supports __fp16 as an interchange format on all + * targets, but only allows you to use them for arguments and return + * values on targets which have defined an ABI. We get around the + * restriction by wrapping the __fp16 in a struct, but we can't do + * that on Arm since it would break compatibility with the NEON F16 + * functions. */ +#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 +/* This is basically __fp16 as specified by Arm, where arugments and + * return values are raw __fp16 values not structs. */ +#define SIMDE_FLOAT16_API_FP16 4 + +/* Choosing an implementation. This is a bit rough, but I don't have + * any ideas on how to improve it. If you do, patches are definitely + * welcome. */ +#if !defined(SIMDE_FLOAT16_API) + #if 0 && !defined(__cplusplus) + /* I haven't found a way to detect this. It seems like defining + * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then + * checking for defined(FLT16_MAX) should work, but both gcc and + * clang will define the constants even if _Float16 is not + * supported. Ideas welcome. */ + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 + #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(SIMDE_ARM_NEON_FP16) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 + #elif defined(__FLT16_MIN__) && (defined(__clang__) && (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI + #else + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE + #endif +#endif + +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 + typedef _Float16 simde_float16; + #define SIMDE_FLOAT16_C(value) value##f16 +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI + typedef struct { __fp16 value; } simde_float16; + #if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) + #else + #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) + #endif +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 + typedef __fp16 simde_float16; + #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE + typedef struct { uint16_t value; } simde_float16; +#else + #error No 16-bit floating point API. +#endif + +#if \ + defined(SIMDE_VECTOR_OPS) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) + #define SIMDE_FLOAT16_VECTOR +#endif + +/* Reinterpret -- you *generally* shouldn't need these, they're really + * intended for internal use. However, on x86 half-precision floats + * get stuffed into a __m128i/__m256i, so it may be useful. */ + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) + +#define SIMDE_NANHF simde_uint16_as_float16(0x7E00) +#define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) + +/* Conversion -- convert between single-precision and half-precision + * floats. */ + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float16 +simde_float16_from_float32 (simde_float32 value) { + simde_float16 res; + + #if \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) + res = HEDLEY_STATIC_CAST(simde_float16, value); + #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) + res.value = HEDLEY_STATIC_CAST(__fp16, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint32_t f32u = simde_float32_as_uint32(value); + static const uint32_t f32u_infty = UINT32_C(255) << 23; + static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; + static const uint32_t denorm_magic = + ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; + uint16_t f16u; + + uint32_t sign = f32u & (UINT32_C(1) << 31); + f32u ^= sign; + + /* NOTE all the integer compares in this function cast the operands + * to signed values to help compilers vectorize to SSE2, which lacks + * unsigned comparison instructions. This is fine since all + * operands are below 0x80000000 (we clear the sign bit). */ + + if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ + f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ + } else { /* (De)normalized number or zero */ + if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ + /* use a magic value to align our 10 mantissa bits at the bottom of + * the float. as long as FP addition is round-to-nearest-even this + * just works. */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); + + /* and one integer subtract of the bias later, we have our final float! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); + } else { + uint32_t mant_odd = (f32u >> 13) & 1; + + /* update exponent, rounding bias part 1 */ + f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); + /* rounding bias part 2 */ + f32u += mant_odd; + /* take the bits! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); + } + } + + f16u |= sign >> 16; + res = simde_uint16_as_float16(f16u); + #endif + + return res; +} + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float32 +simde_float16_to_float32 (simde_float16 value) { + simde_float32 res; + + #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) + res = HEDLEY_STATIC_CAST(simde_float32, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint16_t half = simde_float16_as_uint16(value); + const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); + const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ + uint32_t f32u; + + f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ + uint32_t exp = shifted_exp & f32u; /* just the exponent */ + f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ + + /* handle exponent special cases */ + if (exp == shifted_exp) /* Inf/NaN? */ + f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ + else if (exp == 0) { /* Zero/Denormal? */ + f32u += (1) << 23; /* extra exp adjust */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ + } + + f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ + res = simde_uint32_as_float32(f32u); + #endif + + return res; +} + +#ifdef SIMDE_FLOAT16_C + #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) +#else + #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_FLOAT16_H) */ +/* :: End simde/simde-f16.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_VECTOR_SUBSCRIPT) + #define SIMDE_ARM_NEON_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) +#else + #define SIMDE_ARM_NEON_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] +#endif + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int8_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_int8x8_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int16_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_int16x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int32_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_int32x2_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int64_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_int64x1_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint8_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_uint8x8_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint16_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_uint16x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint32_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_uint32x2_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint64_t, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_uint64x1_private; + +typedef union { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float16, values, 8); + #else + simde_float16 values[4]; + #endif + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_float16x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float32, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_float32x2_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float64, values, 8); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 m64; + #endif +} simde_float64x1_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int8_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_int8x16_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int16_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_int16x8_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int32_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + // SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_int32x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(int64_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_int64x2_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint8_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_uint8x16_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint16_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_uint16x8_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint32_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_uint32x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(uint64_t, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_uint64x2_private; + +typedef union { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float16, values, 16); + #else + simde_float16 values[8]; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128 m128; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_float16x8_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float32, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128 m128; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_float32x4_private; + +typedef union { + SIMDE_ARM_NEON_DECLARE_VECTOR(simde_float64, values, 16); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128d m128d; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_float64x2_private; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32_t simde_float32_t; + + typedef int8x8_t simde_int8x8_t; + typedef int16x4_t simde_int16x4_t; + typedef int32x2_t simde_int32x2_t; + typedef int64x1_t simde_int64x1_t; + typedef uint8x8_t simde_uint8x8_t; + typedef uint16x4_t simde_uint16x4_t; + typedef uint32x2_t simde_uint32x2_t; + typedef uint64x1_t simde_uint64x1_t; + typedef float32x2_t simde_float32x2_t; + + typedef int8x16_t simde_int8x16_t; + typedef int16x8_t simde_int16x8_t; + typedef int32x4_t simde_int32x4_t; + typedef int64x2_t simde_int64x2_t; + typedef uint8x16_t simde_uint8x16_t; + typedef uint16x8_t simde_uint16x8_t; + typedef uint32x4_t simde_uint32x4_t; + typedef uint64x2_t simde_uint64x2_t; + typedef float32x4_t simde_float32x4_t; + + typedef int8x8x2_t simde_int8x8x2_t; + typedef int16x4x2_t simde_int16x4x2_t; + typedef int32x2x2_t simde_int32x2x2_t; + typedef int64x1x2_t simde_int64x1x2_t; + typedef uint8x8x2_t simde_uint8x8x2_t; + typedef uint16x4x2_t simde_uint16x4x2_t; + typedef uint32x2x2_t simde_uint32x2x2_t; + typedef uint64x1x2_t simde_uint64x1x2_t; + typedef float32x2x2_t simde_float32x2x2_t; + + typedef int8x16x2_t simde_int8x16x2_t; + typedef int16x8x2_t simde_int16x8x2_t; + typedef int32x4x2_t simde_int32x4x2_t; + typedef int64x2x2_t simde_int64x2x2_t; + typedef uint8x16x2_t simde_uint8x16x2_t; + typedef uint16x8x2_t simde_uint16x8x2_t; + typedef uint32x4x2_t simde_uint32x4x2_t; + typedef uint64x2x2_t simde_uint64x2x2_t; + typedef float32x4x2_t simde_float32x4x2_t; + + typedef int8x8x3_t simde_int8x8x3_t; + typedef int16x4x3_t simde_int16x4x3_t; + typedef int32x2x3_t simde_int32x2x3_t; + typedef int64x1x3_t simde_int64x1x3_t; + typedef uint8x8x3_t simde_uint8x8x3_t; + typedef uint16x4x3_t simde_uint16x4x3_t; + typedef uint32x2x3_t simde_uint32x2x3_t; + typedef uint64x1x3_t simde_uint64x1x3_t; + typedef float32x2x3_t simde_float32x2x3_t; + + typedef int8x16x3_t simde_int8x16x3_t; + typedef int16x8x3_t simde_int16x8x3_t; + typedef int32x4x3_t simde_int32x4x3_t; + typedef int64x2x3_t simde_int64x2x3_t; + typedef uint8x16x3_t simde_uint8x16x3_t; + typedef uint16x8x3_t simde_uint16x8x3_t; + typedef uint32x4x3_t simde_uint32x4x3_t; + typedef uint64x2x3_t simde_uint64x2x3_t; + typedef float32x4x3_t simde_float32x4x3_t; + + typedef int8x8x4_t simde_int8x8x4_t; + typedef int16x4x4_t simde_int16x4x4_t; + typedef int32x2x4_t simde_int32x2x4_t; + typedef int64x1x4_t simde_int64x1x4_t; + typedef uint8x8x4_t simde_uint8x8x4_t; + typedef uint16x4x4_t simde_uint16x4x4_t; + typedef uint32x2x4_t simde_uint32x2x4_t; + typedef uint64x1x4_t simde_uint64x1x4_t; + typedef float32x2x4_t simde_float32x2x4_t; + + typedef int8x16x4_t simde_int8x16x4_t; + typedef int16x8x4_t simde_int16x8x4_t; + typedef int32x4x4_t simde_int32x4x4_t; + typedef int64x2x4_t simde_int64x2x4_t; + typedef uint8x16x4_t simde_uint8x16x4_t; + typedef uint16x8x4_t simde_uint16x8x4_t; + typedef uint32x4x4_t simde_uint32x4x4_t; + typedef uint64x2x4_t simde_uint64x2x4_t; + typedef float32x4x4_t simde_float32x4x4_t; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64_t simde_float64_t; + typedef float64x1_t simde_float64x1_t; + typedef float64x2_t simde_float64x2_t; + typedef float64x1x2_t simde_float64x1x2_t; + typedef float64x2x2_t simde_float64x2x2_t; + typedef float64x1x3_t simde_float64x1x3_t; + typedef float64x2x3_t simde_float64x2x3_t; + typedef float64x1x4_t simde_float64x1x4_t; + typedef float64x2x4_t simde_float64x2x4_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN + #endif + + #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 + typedef float16_t simde_float16_t; + typedef float16x4_t simde_float16x4_t; + typedef float16x8_t simde_float16x8_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 + #endif +#elif (defined(SIMDE_X86_MMX_NATIVE) || defined(SIMDE_X86_SSE_NATIVE)) && defined(SIMDE_ARM_NEON_FORCE_NATIVE_TYPES) + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 + + #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN + + #if defined(SIMDE_X86_MMX_NATIVE) + typedef __m64 simde_int8x8_t; + typedef __m64 simde_int16x4_t; + typedef __m64 simde_int32x2_t; + typedef __m64 simde_int64x1_t; + typedef __m64 simde_uint8x8_t; + typedef __m64 simde_uint16x4_t; + typedef __m64 simde_uint32x2_t; + typedef __m64 simde_uint64x1_t; + typedef __m64 simde_float32x2_t; + typedef __m64 simde_float64x1_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X8 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X4 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X1 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X8 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X4 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X1 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1 + #endif + + #if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde_float32x4_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32X4 + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde_int8x16_t; + typedef __m128i simde_int16x8_t; + typedef __m128i simde_int32x4_t; + typedef __m128i simde_int64x2_t; + typedef __m128i simde_uint8x16_t; + typedef __m128i simde_uint16x8_t; + typedef __m128i simde_uint32x4_t; + typedef __m128i simde_uint64x2_t; + typedef __m128d simde_float64x2_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_I8X16 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I16X8 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I32X4 + #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U8X16 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U16X8 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U32X4 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 + #endif + + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 +#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_ARM_NEON_FORCE_NATIVE_TYPES) + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 + + #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT + + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN + + typedef v128_t simde_int8x16_t; + typedef v128_t simde_int16x8_t; + typedef v128_t simde_int32x4_t; + typedef v128_t simde_int64x2_t; + typedef v128_t simde_uint8x16_t; + typedef v128_t simde_uint16x8_t; + typedef v128_t simde_uint32x4_t; + typedef v128_t simde_uint64x2_t; + typedef v128_t simde_float32x4_t; + typedef v128_t simde_float64x2_t; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 + + #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN + + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed char) simde_int8x16_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed short) simde_int16x8_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_int32x4_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) simde_uint8x16_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) simde_uint16x8_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) simde_uint32x4_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde_float32x4_t; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed long long) simde_int64x2_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) simde_uint64x2_t; + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde_float64x2_t; + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_I64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_U64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 + #endif + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 +#elif defined(SIMDE_VECTOR) + typedef simde_float32 simde_float32_t; + typedef simde_float64 simde_float64_t; + typedef int8_t simde_int8x8_t SIMDE_VECTOR(8); + typedef int16_t simde_int16x4_t SIMDE_VECTOR(8); + typedef int32_t simde_int32x2_t SIMDE_VECTOR(8); + typedef int64_t simde_int64x1_t SIMDE_VECTOR(8); + typedef uint8_t simde_uint8x8_t SIMDE_VECTOR(8); + typedef uint16_t simde_uint16x4_t SIMDE_VECTOR(8); + typedef uint32_t simde_uint32x2_t SIMDE_VECTOR(8); + typedef uint64_t simde_uint64x1_t SIMDE_VECTOR(8); + typedef simde_float32_t simde_float32x2_t SIMDE_VECTOR(8); + typedef simde_float64_t simde_float64x1_t SIMDE_VECTOR(8); + typedef int8_t simde_int8x16_t SIMDE_VECTOR(16); + typedef int16_t simde_int16x8_t SIMDE_VECTOR(16); + typedef int32_t simde_int32x4_t SIMDE_VECTOR(16); + typedef int64_t simde_int64x2_t SIMDE_VECTOR(16); + typedef uint8_t simde_uint8x16_t SIMDE_VECTOR(16); + typedef uint16_t simde_uint16x8_t SIMDE_VECTOR(16); + typedef uint32_t simde_uint32x4_t SIMDE_VECTOR(16); + typedef uint64_t simde_uint64x2_t SIMDE_VECTOR(16); + typedef simde_float32_t simde_float32x4_t SIMDE_VECTOR(16); + typedef simde_float64_t simde_float64x2_t SIMDE_VECTOR(16); + + #if defined(SIMDE_ARM_NEON_FP16) + typedef simde_float16 simde_float16_t; + typedef simde_float16_t simde_float16x4_t SIMDE_VECTOR(8); + typedef simde_float16_t simde_float16x8_t SIMDE_VECTOR(16); + #else + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 + #endif + + #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN +#else + #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64 + #define SIMDE_ARM_NEON_NEED_PORTABLE_64BIT + #define SIMDE_ARM_NEON_NEED_PORTABLE_128BIT + + #define SIMDE_ARM_NEON_NEED_PORTABLE_VXN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN + #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_int8x8_private simde_int8x8_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_int16x4_private simde_int16x4_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_int32x2_private simde_int32x2_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_int64x1_private simde_int64x1_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_uint8x8_private simde_uint8x8_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_uint16x4_private simde_uint16x4_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_uint32x2_private simde_uint32x2_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_uint64x1_private simde_uint64x1_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_float32x2_private simde_float32x2_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_64BIT) + typedef simde_float64x1_private simde_float64x1_t; +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_int8x16_private simde_int8x16_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_int16x8_private simde_int16x8_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_int32x4_private simde_int32x4_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_I64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_int64x2_private simde_int64x2_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U8X16) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_uint8x16_private simde_uint8x16_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U16X8) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_uint16x8_private simde_uint16x8_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_uint32x4_private simde_uint32x4_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_U64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_uint64x2_private simde_uint64x2_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32X4) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_float32x4_private simde_float32x4_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2) || defined(SIMDE_ARM_NEON_NEED_PORTABLE_128BIT) + typedef simde_float64x2_private simde_float64x2_t; +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F16) + typedef simde_float16 simde_float16_t; + typedef simde_float16x4_private simde_float16x4_t; + typedef simde_float16x8_private simde_float16x8_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32) + typedef simde_float32 simde_float32_t; +#endif +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64) + typedef simde_float64 simde_float64_t; +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_VXN) && !defined(SIMDE_BUG_INTEL_857088) + typedef struct simde_int8x8x2_t { + simde_int8x8_t val[2]; + } simde_int8x8x2_t; + typedef struct simde_int16x4x2_t { + simde_int16x4_t val[2]; + } simde_int16x4x2_t; + typedef struct simde_int32x2x2_t { + simde_int32x2_t val[2]; + } simde_int32x2x2_t; + typedef struct simde_int64x1x2_t { + simde_int64x1_t val[2]; + } simde_int64x1x2_t; + typedef struct simde_uint8x8x2_t { + simde_uint8x8_t val[2]; + } simde_uint8x8x2_t; + typedef struct simde_uint16x4x2_t { + simde_uint16x4_t val[2]; + } simde_uint16x4x2_t; + typedef struct simde_uint32x2x2_t { + simde_uint32x2_t val[2]; + } simde_uint32x2x2_t; + typedef struct simde_uint64x1x2_t { + simde_uint64x1_t val[2]; + } simde_uint64x1x2_t; + typedef struct simde_float32x2x2_t { + simde_float32x2_t val[2]; + } simde_float32x2x2_t; + + typedef struct simde_int8x16x2_t { + simde_int8x16_t val[2]; + } simde_int8x16x2_t; + typedef struct simde_int16x8x2_t { + simde_int16x8_t val[2]; + } simde_int16x8x2_t; + typedef struct simde_int32x4x2_t { + simde_int32x4_t val[2]; + } simde_int32x4x2_t; + typedef struct simde_int64x2x2_t { + simde_int64x2_t val[2]; + } simde_int64x2x2_t; + typedef struct simde_uint8x16x2_t { + simde_uint8x16_t val[2]; + } simde_uint8x16x2_t; + typedef struct simde_uint16x8x2_t { + simde_uint16x8_t val[2]; + } simde_uint16x8x2_t; + typedef struct simde_uint32x4x2_t { + simde_uint32x4_t val[2]; + } simde_uint32x4x2_t; + typedef struct simde_uint64x2x2_t { + simde_uint64x2_t val[2]; + } simde_uint64x2x2_t; + typedef struct simde_float32x4x2_t { + simde_float32x4_t val[2]; + } simde_float32x4x2_t; + + typedef struct simde_int8x8x3_t { + simde_int8x8_t val[3]; + } simde_int8x8x3_t; + typedef struct simde_int16x4x3_t { + simde_int16x4_t val[3]; + } simde_int16x4x3_t; + typedef struct simde_int32x2x3_t { + simde_int32x2_t val[3]; + } simde_int32x2x3_t; + typedef struct simde_int64x1x3_t { + simde_int64x1_t val[3]; + } simde_int64x1x3_t; + typedef struct simde_uint8x8x3_t { + simde_uint8x8_t val[3]; + } simde_uint8x8x3_t; + typedef struct simde_uint16x4x3_t { + simde_uint16x4_t val[3]; + } simde_uint16x4x3_t; + typedef struct simde_uint32x2x3_t { + simde_uint32x2_t val[3]; + } simde_uint32x2x3_t; + typedef struct simde_uint64x1x3_t { + simde_uint64x1_t val[3]; + } simde_uint64x1x3_t; + typedef struct simde_float32x2x3_t { + simde_float32x2_t val[3]; + } simde_float32x2x3_t; + + typedef struct simde_int8x16x3_t { + simde_int8x16_t val[3]; + } simde_int8x16x3_t; + typedef struct simde_int16x8x3_t { + simde_int16x8_t val[3]; + } simde_int16x8x3_t; + typedef struct simde_int32x4x3_t { + simde_int32x4_t val[3]; + } simde_int32x4x3_t; + typedef struct simde_int64x2x3_t { + simde_int64x2_t val[3]; + } simde_int64x2x3_t; + typedef struct simde_uint8x16x3_t { + simde_uint8x16_t val[3]; + } simde_uint8x16x3_t; + typedef struct simde_uint16x8x3_t { + simde_uint16x8_t val[3]; + } simde_uint16x8x3_t; + typedef struct simde_uint32x4x3_t { + simde_uint32x4_t val[3]; + } simde_uint32x4x3_t; + typedef struct simde_uint64x2x3_t { + simde_uint64x2_t val[3]; + } simde_uint64x2x3_t; + typedef struct simde_float32x4x3_t { + simde_float32x4_t val[3]; + } simde_float32x4x3_t; + + typedef struct simde_int8x8x4_t { + simde_int8x8_t val[4]; + } simde_int8x8x4_t; + typedef struct simde_int16x4x4_t { + simde_int16x4_t val[4]; + } simde_int16x4x4_t; + typedef struct simde_int32x2x4_t { + simde_int32x2_t val[4]; + } simde_int32x2x4_t; + typedef struct simde_int64x1x4_t { + simde_int64x1_t val[4]; + } simde_int64x1x4_t; + typedef struct simde_uint8x8x4_t { + simde_uint8x8_t val[4]; + } simde_uint8x8x4_t; + typedef struct simde_uint16x4x4_t { + simde_uint16x4_t val[4]; + } simde_uint16x4x4_t; + typedef struct simde_uint32x2x4_t { + simde_uint32x2_t val[4]; + } simde_uint32x2x4_t; + typedef struct simde_uint64x1x4_t { + simde_uint64x1_t val[4]; + } simde_uint64x1x4_t; + typedef struct simde_float32x2x4_t { + simde_float32x2_t val[4]; + } simde_float32x2x4_t; + + typedef struct simde_int8x16x4_t { + simde_int8x16_t val[4]; + } simde_int8x16x4_t; + typedef struct simde_int16x8x4_t { + simde_int16x8_t val[4]; + } simde_int16x8x4_t; + typedef struct simde_int32x4x4_t { + simde_int32x4_t val[4]; + } simde_int32x4x4_t; + typedef struct simde_int64x2x4_t { + simde_int64x2_t val[4]; + } simde_int64x2x4_t; + typedef struct simde_uint8x16x4_t { + simde_uint8x16_t val[4]; + } simde_uint8x16x4_t; + typedef struct simde_uint16x8x4_t { + simde_uint16x8_t val[4]; + } simde_uint16x8x4_t; + typedef struct simde_uint32x4x4_t { + simde_uint32x4_t val[4]; + } simde_uint32x4x4_t; + typedef struct simde_uint64x2x4_t { + simde_uint64x2_t val[4]; + } simde_uint64x2x4_t; + typedef struct simde_float32x4x4_t { + simde_float32x4_t val[4]; + } simde_float32x4x4_t; +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN) + typedef struct simde_float64x1x2_t { + simde_float64x1_t val[2]; + } simde_float64x1x2_t; + + typedef struct simde_float64x1x3_t { + simde_float64x1_t val[3]; + } simde_float64x1x3_t; + + typedef struct simde_float64x1x4_t { + simde_float64x1_t val[4]; + } simde_float64x1x4_t; +#endif + +#if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN) + typedef struct simde_float64x2x2_t { + simde_float64x2_t val[2]; + } simde_float64x2x2_t; + + typedef struct simde_float64x2x3_t { + simde_float64x2_t val[3]; + } simde_float64x2x3_t; + + typedef struct simde_float64x2x4_t { + simde_float64x2_t val[4]; + } simde_float64x2x4_t; +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) || defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + typedef simde_float16_t float16_t; +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + typedef simde_float32_t float32_t; + + typedef simde_int8x8_t int8x8_t; + typedef simde_int16x4_t int16x4_t; + typedef simde_int32x2_t int32x2_t; + typedef simde_int64x1_t int64x1_t; + typedef simde_uint8x8_t uint8x8_t; + typedef simde_uint16x4_t uint16x4_t; + typedef simde_uint32x2_t uint32x2_t; + typedef simde_uint64x1_t uint64x1_t; + typedef simde_float32x2_t float32x2_t; + + typedef simde_int8x16_t int8x16_t; + typedef simde_int16x8_t int16x8_t; + typedef simde_int32x4_t int32x4_t; + typedef simde_int64x2_t int64x2_t; + typedef simde_uint8x16_t uint8x16_t; + typedef simde_uint16x8_t uint16x8_t; + typedef simde_uint32x4_t uint32x4_t; + typedef simde_uint64x2_t uint64x2_t; + typedef simde_float32x4_t float32x4_t; + + typedef simde_int8x8x2_t int8x8x2_t; + typedef simde_int16x4x2_t int16x4x2_t; + typedef simde_int32x2x2_t int32x2x2_t; + typedef simde_int64x1x2_t int64x1x2_t; + typedef simde_uint8x8x2_t uint8x8x2_t; + typedef simde_uint16x4x2_t uint16x4x2_t; + typedef simde_uint32x2x2_t uint32x2x2_t; + typedef simde_uint64x1x2_t uint64x1x2_t; + typedef simde_float32x2x2_t float32x2x2_t; + + typedef simde_int8x16x2_t int8x16x2_t; + typedef simde_int16x8x2_t int16x8x2_t; + typedef simde_int32x4x2_t int32x4x2_t; + typedef simde_int64x2x2_t int64x2x2_t; + typedef simde_uint8x16x2_t uint8x16x2_t; + typedef simde_uint16x8x2_t uint16x8x2_t; + typedef simde_uint32x4x2_t uint32x4x2_t; + typedef simde_uint64x2x2_t uint64x2x2_t; + typedef simde_float32x4x2_t float32x4x2_t; + + typedef simde_int8x8x3_t int8x8x3_t; + typedef simde_int16x4x3_t int16x4x3_t; + typedef simde_int32x2x3_t int32x2x3_t; + typedef simde_int64x1x3_t int64x1x3_t; + typedef simde_uint8x8x3_t uint8x8x3_t; + typedef simde_uint16x4x3_t uint16x4x3_t; + typedef simde_uint32x2x3_t uint32x2x3_t; + typedef simde_uint64x1x3_t uint64x1x3_t; + typedef simde_float32x2x3_t float32x2x3_t; + + typedef simde_int8x16x3_t int8x16x3_t; + typedef simde_int16x8x3_t int16x8x3_t; + typedef simde_int32x4x3_t int32x4x3_t; + typedef simde_int64x2x3_t int64x2x3_t; + typedef simde_uint8x16x3_t uint8x16x3_t; + typedef simde_uint16x8x3_t uint16x8x3_t; + typedef simde_uint32x4x3_t uint32x4x3_t; + typedef simde_uint64x2x3_t uint64x2x3_t; + typedef simde_float32x4x3_t float32x4x3_t; + + typedef simde_int8x8x4_t int8x8x4_t; + typedef simde_int16x4x4_t int16x4x4_t; + typedef simde_int32x2x4_t int32x2x4_t; + typedef simde_int64x1x4_t int64x1x4_t; + typedef simde_uint8x8x4_t uint8x8x4_t; + typedef simde_uint16x4x4_t uint16x4x4_t; + typedef simde_uint32x2x4_t uint32x2x4_t; + typedef simde_uint64x1x4_t uint64x1x4_t; + typedef simde_float32x2x4_t float32x2x4_t; + + typedef simde_int8x16x4_t int8x16x4_t; + typedef simde_int16x8x4_t int16x8x4_t; + typedef simde_int32x4x4_t int32x4x4_t; + typedef simde_int64x2x4_t int64x2x4_t; + typedef simde_uint8x16x4_t uint8x16x4_t; + typedef simde_uint16x8x4_t uint16x8x4_t; + typedef simde_uint32x4x4_t uint32x4x4_t; + typedef simde_uint64x2x4_t uint64x2x4_t; + typedef simde_float32x4x4_t float32x4x4_t; +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + typedef simde_float64_t float64_t; + typedef simde_float16x4_t float16x4_t; + typedef simde_float64x1_t float64x1_t; + typedef simde_float16x8_t float16x8_t; + typedef simde_float64x2_t float64x2_t; + typedef simde_float64x1x2_t float64x1x2_t; + typedef simde_float64x2x2_t float64x2x2_t; + typedef simde_float64x1x3_t float64x1x3_t; + typedef simde_float64x2x3_t float64x2x3_t; + typedef simde_float64x1x4_t float64x1x4_t; + typedef simde_float64x2x4_t float64x2x4_t; +#endif + +#if defined(SIMDE_X86_MMX_NATIVE) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x8_to_m64, __m64, simde_int8x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x4_to_m64, __m64, simde_int16x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x2_to_m64, __m64, simde_int32x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x1_to_m64, __m64, simde_int64x1_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x8_to_m64, __m64, simde_uint8x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x4_to_m64, __m64, simde_uint16x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x2_to_m64, __m64, simde_uint32x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x1_to_m64, __m64, simde_uint64x1_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x2_to_m64, __m64, simde_float32x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x1_to_m64, __m64, simde_float64x1_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x8_from_m64, simde_int8x8_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x4_from_m64, simde_int16x4_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x2_from_m64, simde_int32x2_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x1_from_m64, simde_int64x1_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x8_from_m64, simde_uint8x8_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x4_from_m64, simde_uint16x4_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x2_from_m64, simde_uint32x2_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x1_from_m64, simde_uint64x1_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x2_from_m64, simde_float32x2_t, __m64) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x1_from_m64, simde_float64x1_t, __m64) +#endif +#if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_to_m128, __m128, simde_float32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_from_m128, simde_float32x4_t, __m128) +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_to_m128i, __m128i, simde_int8x16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_to_m128i, __m128i, simde_int16x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_to_m128i, __m128i, simde_int32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_to_m128i, __m128i, simde_int64x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_to_m128i, __m128i, simde_uint8x16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_to_m128i, __m128i, simde_uint16x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_to_m128i, __m128i, simde_uint32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_to_m128i, __m128i, simde_uint64x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_to_m128d, __m128d, simde_float64x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_from_m128i, simde_int8x16_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_from_m128i, simde_int16x8_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_from_m128i, simde_int32x4_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_from_m128i, simde_int64x2_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_from_m128i, simde_uint8x16_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_from_m128i, simde_uint16x8_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_from_m128i, simde_uint32x4_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_from_m128i, simde_uint64x2_t, __m128i) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_from_m128d, simde_float64x2_t, __m128d) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_to_v128, v128_t, simde_int8x16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_to_v128, v128_t, simde_int16x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_to_v128, v128_t, simde_int32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_to_v128, v128_t, simde_int64x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_to_v128, v128_t, simde_uint8x16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_to_v128, v128_t, simde_uint16x8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_to_v128, v128_t, simde_uint32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_to_v128, v128_t, simde_uint64x2_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_to_v128, v128_t, simde_float32x4_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_to_v128, v128_t, simde_float64x2_t) + + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int8x16_from_v128, simde_int8x16_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int16x8_from_v128, simde_int16x8_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int32x4_from_v128, simde_int32x4_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_int64x2_from_v128, simde_int64x2_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint8x16_from_v128, simde_uint8x16_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16x8_from_v128, simde_uint16x8_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32x4_from_v128, simde_uint32x4_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64x2_from_v128, simde_uint64x2_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32x4_from_v128, simde_float32x4_t, v128_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64x2_from_v128, simde_float64x2_t, v128_t) +#endif + +#define SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_to_private, simde_##T##_private, simde_##T##_t) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_from_private, simde_##T##_t, simde_##T##_private) \ + +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x8) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x2) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x1) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x8) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x2) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x1) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float16x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x2) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x1) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int8x16) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int16x8) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int32x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(int64x2) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint8x16) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint16x8) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint32x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(uint64x2) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float16x8) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float32x4) +SIMDE_ARM_NEON_TYPE_DEFINE_CONVERSIONS_(float64x2) + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_TYPES_H */ +/* :: End simde/arm/neon/types.h :: */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/aba.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ABA_H) +#define SIMDE_ARM_NEON_ABA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/abd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ABD_H) +#define SIMDE_ARM_NEON_ABD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/abs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ABS_H) +#define SIMDE_ARM_NEON_ABS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vabsd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,1,0)) + return vabsd_s64(a); + #else + return a < 0 ? -a : a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabsd_s64 + #define vabsd_s64(a) simde_vabsd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vabs_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabs_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabs_f32 + #define vabs_f32(a) simde_vabs_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vabs_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabs_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabs_f64 + #define vabs_f64(a) simde_vabs_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vabs_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabs_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_abs_pi8(a_.m64); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabs_s8 + #define vabs_s8(a) simde_vabs_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vabs_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabs_s16(a); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_abs_pi16(a_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabs_s16 + #define vabs_s16(a) simde_vabs_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vabs_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabs_s32(a); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_abs_pi32(a_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabs_s32 + #define vabs_s32(a) simde_vabs_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vabs_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabs_s64(a); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabs_s64 + #define vabs_s64(a) simde_vabs_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vabsq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabsq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_abs(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_abs(a_.v128); + #elif defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + r_.m128 = _mm_and_ps(_mm_set1_ps(mask_), a_.m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_fabsf(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabsq_f32 + #define vabsq_f32(a) simde_vabsq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vabsq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabsq_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_abs(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + r_.m128d = _mm_and_pd(_mm_set1_pd(mask_), a_.m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_fabs(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabsq_f64 + #define vabsq_f64(a) simde_vabsq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vabsq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabsq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_abs(a); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_abs_epi8(a_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_min_epu8(a_.m128i, _mm_sub_epi8(_mm_setzero_si128(), a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_abs(a_.v128); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT8_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabsq_s8 + #define vabsq_s8(a) simde_vabsq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabsq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabsq_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_abs(a); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_abs_epi16(a_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_max_epi16(a_.m128i, _mm_sub_epi16(_mm_setzero_si128(), a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_abs(a_.v128); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT16_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabsq_s16 + #define vabsq_s16(a) simde_vabsq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabsq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabsq_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_abs(a); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_abs_epi32(a_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a_.m128i); + r_.m128i = _mm_sub_epi32(_mm_xor_si128(a_.m128i, m), m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_abs(a_.v128); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT32_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabsq_s32 + #define vabsq_s32(a) simde_vabsq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vabsq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabsq_s64(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_s64(vreinterpretq_u64_s64(vshrq_n_s64(a, 63)), vsubq_s64(vdupq_n_s64(0), a), a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_abs(a); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_abs_epi64(a_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a_.m128i, 0xF5), 31); + r_.m128i = _mm_sub_epi64(_mm_xor_si128(a_.m128i, m), m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_abs(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < INT64_C(0)); + r_.values = (-a_.values & m) | (a_.values & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] < 0 ? -a_.values[i] : a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabsq_s64 + #define vabsq_s64(a) simde_vabsq_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ABS_H) */ +/* :: End simde/arm/neon/abs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/subl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_SUBL_H) +#define SIMDE_ARM_NEON_SUBL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/sub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUB_H) +#define SIMDE_ARM_NEON_SUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vsubd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubd_s64(a, b); + #else + return a - b; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubd_s64 + #define vsubd_s64(a, b) simde_vsubd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vsubd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubd_u64(a, b); + #else + return a - b; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubd_u64 + #define vsubd_u64(a, b) simde_vsubd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vsub_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_f32 + #define vsub_f32(a, b) simde_vsub_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vsub_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsub_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsub_f64 + #define vsub_f64(a, b) simde_vsub_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vsub_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_s8 + #define vsub_s8(a, b) simde_vsub_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vsub_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_s16 + #define vsub_s16(a, b) simde_vsub_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vsub_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_s32 + #define vsub_s32(a, b) simde_vsub_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vsub_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsubd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_s64 + #define vsub_s64(a, b) simde_vsub_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_u8 + #define vsub_u8(a, b) simde_vsub_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_u16 + #define vsub_u16(a, b) simde_vsub_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_u32 + #define vsub_u32(a, b) simde_vsub_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsub_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsubd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsub_u64 + #define vsub_u64(a, b) simde_vsub_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vsubq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; + a_ = a; + b_ = b; + r_ = vec_sub(a_, b_); + return r_; + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_sub_ps(a_.m128, b_.m128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_f32 + #define vsubq_f32(a, b) simde_vsubq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vsubq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_sub(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_sub_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubq_f64 + #define vsubq_f64(a, b) simde_vsubq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_s8 + #define vsubq_s8(a, b) simde_vsubq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_s16 + #define vsubq_s16(a, b) simde_vsubq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_s32 + #define vsubq_s32(a, b) simde_vsubq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_sub(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsubd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_s64 + #define vsubq_s64(a, b) simde_vsubq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_u8 + #define vsubq_u8(a, b) simde_vsubq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_u16 + #define vsubq_u16(a, b) simde_vsubq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_u32 + #define vsubq_u32(a, b) simde_vsubq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_sub(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsubd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubq_u64 + #define vsubq_u64(a, b) simde_vsubq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUB_H) */ +/* :: End simde/arm/neon/sub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/movl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MOVL_H) +#define SIMDE_ARM_NEON_MOVL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/combine.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the folhighing conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_COMBINE_H) +#define SIMDE_ARM_NEON_COMBINE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcombine_f32(simde_float32x2_t low, simde_float32x2_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_f32(low, high); + #else + simde_float32x4_private r_; + simde_float32x2_private + low_ = simde_float32x2_to_private(low), + high_ = simde_float32x2_to_private(high); + + /* Note: __builtin_shufflevector can have a the output contain + * twice the number of elements, __builtin_shuffle cannot. + * Using SIMDE_SHUFFLE_VECTOR_ here would not work. */ + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_f32 + #define vcombine_f32(low, high) simde_vcombine_f32((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcombine_f64(simde_float64x1_t low, simde_float64x1_t high) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcombine_f64(low, high); + #else + simde_float64x2_private r_; + simde_float64x1_private + low_ = simde_float64x1_to_private(low), + high_ = simde_float64x1_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcombine_f64 + #define vcombine_f64(low, high) simde_vcombine_f64((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vcombine_s8(simde_int8x8_t low, simde_int8x8_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_s8(low, high); + #else + simde_int8x16_private r_; + simde_int8x8_private + low_ = simde_int8x8_to_private(low), + high_ = simde_int8x8_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_s8 + #define vcombine_s8(low, high) simde_vcombine_s8((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcombine_s16(simde_int16x4_t low, simde_int16x4_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_s16(low, high); + #else + simde_int16x8_private r_; + simde_int16x4_private + low_ = simde_int16x4_to_private(low), + high_ = simde_int16x4_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_s16 + #define vcombine_s16(low, high) simde_vcombine_s16((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcombine_s32(simde_int32x2_t low, simde_int32x2_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_s32(low, high); + #else + simde_int32x4_private r_; + simde_int32x2_private + low_ = simde_int32x2_to_private(low), + high_ = simde_int32x2_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_s32 + #define vcombine_s32(low, high) simde_vcombine_s32((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcombine_s64(simde_int64x1_t low, simde_int64x1_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_s64(low, high); + #else + simde_int64x2_private r_; + simde_int64x1_private + low_ = simde_int64x1_to_private(low), + high_ = simde_int64x1_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_s64 + #define vcombine_s64(low, high) simde_vcombine_s64((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcombine_u8(simde_uint8x8_t low, simde_uint8x8_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_u8(low, high); + #else + simde_uint8x16_private r_; + simde_uint8x8_private + low_ = simde_uint8x8_to_private(low), + high_ = simde_uint8x8_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_u8 + #define vcombine_u8(low, high) simde_vcombine_u8((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcombine_u16(simde_uint16x4_t low, simde_uint16x4_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_u16(low, high); + #else + simde_uint16x8_private r_; + simde_uint16x4_private + low_ = simde_uint16x4_to_private(low), + high_ = simde_uint16x4_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3, 4, 5, 6, 7); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_u16 + #define vcombine_u16(low, high) simde_vcombine_u16((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcombine_u32(simde_uint32x2_t low, simde_uint32x2_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_u32(low, high); + #else + simde_uint32x4_private r_; + simde_uint32x2_private + low_ = simde_uint32x2_to_private(low), + high_ = simde_uint32x2_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1, 2, 3); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_u32 + #define vcombine_u32(low, high) simde_vcombine_u32((low), (high)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcombine_u64(simde_uint64x1_t low, simde_uint64x1_t high) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcombine_u64(low, high); + #else + simde_uint64x2_private r_; + simde_uint64x1_private + low_ = simde_uint64x1_to_private(low), + high_ = simde_uint64x1_to_private(high); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(low_.values, high_.values, 0, 1); + #else + size_t halfway = (sizeof(r_.values) / sizeof(r_.values[0])) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + r_.values[i] = low_.values[i]; + r_.values[i + halfway] = high_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcombine_u64 + #define vcombine_u64(low, high) simde_vcombine_u64((low), (high)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_COMBINE_H) */ +/* :: End simde/arm/neon/combine.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmovl_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int16x8_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(simde_vcombine_s8(a, a)); + + r_.v128 = wasm_i16x8_extend_low_i8x16(a_.v128); + + return simde_int16x8_from_private(r_); + #else + simde_int16x8_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_s8 + #define vmovl_s8(a) simde_vmovl_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmovl_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int32x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(simde_vcombine_s16(a, a)); + + r_.v128 = wasm_i32x4_extend_low_i16x8(a_.v128); + + return simde_int32x4_from_private(r_); + #else + simde_int32x4_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_s16 + #define vmovl_s16(a) simde_vmovl_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmovl_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int64x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(simde_vcombine_s32(a, a)); + + r_.v128 = wasm_i64x2_extend_low_i32x4(a_.v128); + + return simde_int64x2_from_private(r_); + #else + simde_int64x2_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_s32 + #define vmovl_s32(a) simde_vmovl_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmovl_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_u8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint16x8_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a)); + + r_.v128 = wasm_u16x8_extend_low_u8x16(a_.v128); + + return simde_uint16x8_from_private(r_); + #else + simde_uint16x8_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_u8 + #define vmovl_u8(a) simde_vmovl_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmovl_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_u16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint32x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vcombine_u16(a, a)); + + r_.v128 = wasm_u32x4_extend_low_u16x8(a_.v128); + + return simde_uint32x4_from_private(r_); + #else + simde_uint32x4_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_u16 + #define vmovl_u16(a) simde_vmovl_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmovl_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovl_u32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint64x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vcombine_u32(a, a)); + + r_.v128 = wasm_u64x2_extend_low_u32x4(a_.v128); + + return simde_uint64x2_from_private(r_); + #else + simde_uint64x2_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovl_u32 + #define vmovl_u32(a) simde_vmovl_u32((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MOVL_H) */ +/* :: End simde/arm/neon/movl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/movl_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) +#define SIMDE_ARM_NEON_MOVL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/get_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_GET_HIGH_H) +#define SIMDE_ARM_NEON_GET_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vget_high_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_f32(a); + #else + simde_float32x2_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_f32 + #define vget_high_f32(a) simde_vget_high_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vget_high_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vget_high_f64(a); + #else + simde_float64x1_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vget_high_f64 + #define vget_high_f64(a) simde_vget_high_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vget_high_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_s8(a); + #else + simde_int8x8_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_s8 + #define vget_high_s8(a) simde_vget_high_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vget_high_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_s16(a); + #else + simde_int16x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_s16 + #define vget_high_s16(a) simde_vget_high_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vget_high_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_s32(a); + #else + simde_int32x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_s32 + #define vget_high_s32(a) simde_vget_high_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vget_high_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_s64(a); + #else + simde_int64x1_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_s64 + #define vget_high_s64(a) simde_vget_high_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vget_high_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_u8(a); + #else + simde_uint8x8_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 8, 9, 10, 11, 12, 13, 14,15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_u8 + #define vget_high_u8(a) simde_vget_high_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vget_high_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_u16(a); + #else + simde_uint16x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_u16 + #define vget_high_u16(a) simde_vget_high_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vget_high_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_u32(a); + #else + simde_uint32x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_u32 + #define vget_high_u32(a) simde_vget_high_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vget_high_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_high_u64(a); + #else + simde_uint64x1_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i + (sizeof(r_.values) / sizeof(r_.values[0]))]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_high_u64 + #define vget_high_u64(a) simde_vget_high_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_GET_HIGH_H) */ +/* :: End simde/arm/neon/get_high.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmovl_high_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_s8(a); + #else + return simde_vmovl_s8(simde_vget_high_s8(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_s8 + #define vmovl_high_s8(a) simde_vmovl_high_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmovl_high_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_s16(a); + #else + return simde_vmovl_s16(simde_vget_high_s16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_s16 + #define vmovl_high_s16(a) simde_vmovl_high_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmovl_high_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_s32(a); + #else + return simde_vmovl_s32(simde_vget_high_s32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_s32 + #define vmovl_high_s32(a) simde_vmovl_high_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmovl_high_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_u8(a); + #else + return simde_vmovl_u8(simde_vget_high_u8(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_u8 + #define vmovl_high_u8(a) simde_vmovl_high_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmovl_high_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_u16(a); + #else + return simde_vmovl_u16(simde_vget_high_u16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_u16 + #define vmovl_high_u16(a) simde_vmovl_high_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmovl_high_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovl_high_u32(a); + #else + return simde_vmovl_u32(simde_vget_high_u32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovl_high_u32 + #define vmovl_high_u32(a) simde_vmovl_high_u32((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MOVL_HIGH_H) */ +/* :: End simde/arm/neon/movl_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubl_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_s8(a, b); + #else + return simde_vsubq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_s8 + #define vsubl_s8(a, b) simde_vsubl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubl_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_s16(a, b); + #else + return simde_vsubq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_s16 + #define vsubl_s16(a, b) simde_vsubl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubl_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_s32(a, b); + #else + return simde_vsubq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_s32 + #define vsubl_s32(a, b) simde_vsubl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_u8(a, b); + #else + return simde_vsubq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_u8 + #define vsubl_u8(a, b) simde_vsubl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_u16(a, b); + #else + return simde_vsubq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_u16 + #define vsubl_u16(a, b) simde_vsubl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubl_u32(a, b); + #else + return simde_vsubq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubl_u32 + #define vsubl_u32(a, b) simde_vsubl_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBL_H) */ +/* :: End simde/arm/neon/subl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/movn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MOVN_H) +#define SIMDE_ARM_NEON_MOVN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmovn_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_s16(a); + #else + simde_int8x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i]); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_s16 + #define vmovn_s16(a) simde_vmovn_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmovn_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_s32(a); + #else + simde_int16x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_s32 + #define vmovn_s32(a) simde_vmovn_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmovn_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_s64(a); + #else + simde_int32x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_s64 + #define vmovn_s64(a) simde_vmovn_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmovn_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_u16(a); + #else + simde_uint8x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i]); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_u16 + #define vmovn_u16(a) simde_vmovn_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmovn_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_u32(a); + #else + simde_uint16x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_u32 + #define vmovn_u32(a) simde_vmovn_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmovn_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmovn_u64(a); + #else + simde_uint32x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmovn_u64 + #define vmovn_u64(a) simde_vmovn_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MOVN_H) */ +/* :: End simde/arm/neon/movn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/reinterpret.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + + +#if !defined(SIMDE_ARM_NEON_REINTERPRET_H) +#define SIMDE_ARM_NEON_REINTERPRET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_s16(a); + #else + simde_int8x8_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_s16 + #define vreinterpret_s8_s16 simde_vreinterpret_s8_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_s32(a); + #else + simde_int8x8_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_s32 + #define vreinterpret_s8_s32 simde_vreinterpret_s8_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_s64(a); + #else + simde_int8x8_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_s64 + #define vreinterpret_s8_s64 simde_vreinterpret_s8_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_u8(a); + #else + simde_int8x8_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_u8 + #define vreinterpret_s8_u8 simde_vreinterpret_s8_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_u16(a); + #else + simde_int8x8_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_u16 + #define vreinterpret_s8_u16 simde_vreinterpret_s8_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_u32(a); + #else + simde_int8x8_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_u32 + #define vreinterpret_s8_u32 simde_vreinterpret_s8_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_u64(a); + #else + simde_int8x8_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_u64 + #define vreinterpret_s8_u64 simde_vreinterpret_s8_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s8_f32(a); + #else + simde_int8x8_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_f32 + #define vreinterpret_s8_f32 simde_vreinterpret_s8_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_s8_f64(a); + #else + simde_int8x8_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_f64 + #define vreinterpret_s8_f64 simde_vreinterpret_s8_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_s16(a); + #else + simde_int8x16_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_s16 + #define vreinterpretq_s8_s16(a) simde_vreinterpretq_s8_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_s32(a); + #else + simde_int8x16_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_s32 + #define vreinterpretq_s8_s32(a) simde_vreinterpretq_s8_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_s64(a); + #else + simde_int8x16_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_s64 + #define vreinterpretq_s8_s64(a) simde_vreinterpretq_s8_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_u8(a); + #else + simde_int8x16_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_u8 + #define vreinterpretq_s8_u8(a) simde_vreinterpretq_s8_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_u16(a); + #else + simde_int8x16_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_u16 + #define vreinterpretq_s8_u16(a) simde_vreinterpretq_s8_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_u32(a); + #else + simde_int8x16_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_u32 + #define vreinterpretq_s8_u32(a) simde_vreinterpretq_s8_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_u64(a); + #else + simde_int8x16_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_u64 + #define vreinterpretq_s8_u64(a) simde_vreinterpretq_s8_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s8_f32(a); + #else + simde_int8x16_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_f32 + #define vreinterpretq_s8_f32(a) simde_vreinterpretq_s8_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s8_f64(a); + #else + simde_int8x16_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_f64 + #define vreinterpretq_s8_f64(a) simde_vreinterpretq_s8_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_s8(a); + #else + simde_int16x4_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_s8 + #define vreinterpret_s16_s8 simde_vreinterpret_s16_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_s32(a); + #else + simde_int16x4_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_s32 + #define vreinterpret_s16_s32 simde_vreinterpret_s16_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_s64(a); + #else + simde_int16x4_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_s64 + #define vreinterpret_s16_s64 simde_vreinterpret_s16_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_u8(a); + #else + simde_int16x4_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_u8 + #define vreinterpret_s16_u8 simde_vreinterpret_s16_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_u16(a); + #else + simde_int16x4_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_u16 + #define vreinterpret_s16_u16 simde_vreinterpret_s16_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_u32(a); + #else + simde_int16x4_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_u32 + #define vreinterpret_s16_u32 simde_vreinterpret_s16_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_u64(a); + #else + simde_int16x4_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_u64 + #define vreinterpret_s16_u64 simde_vreinterpret_s16_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s16_f32(a); + #else + simde_int16x4_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_f32 + #define vreinterpret_s16_f32 simde_vreinterpret_s16_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_s16_f64(a); + #else + simde_int16x4_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_f64 + #define vreinterpret_s16_f64 simde_vreinterpret_s16_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_s8(a); + #else + simde_int16x8_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_s8 + #define vreinterpretq_s16_s8(a) simde_vreinterpretq_s16_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_s32(a); + #else + simde_int16x8_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_s32 + #define vreinterpretq_s16_s32(a) simde_vreinterpretq_s16_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_s64(a); + #else + simde_int16x8_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_s64 + #define vreinterpretq_s16_s64(a) simde_vreinterpretq_s16_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_u8(a); + #else + simde_int16x8_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_u8 + #define vreinterpretq_s16_u8(a) simde_vreinterpretq_s16_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_u16(a); + #else + simde_int16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_u16 + #define vreinterpretq_s16_u16(a) simde_vreinterpretq_s16_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_u32(a); + #else + simde_int16x8_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_u32 + #define vreinterpretq_s16_u32(a) simde_vreinterpretq_s16_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_u64(a); + #else + simde_int16x8_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_u64 + #define vreinterpretq_s16_u64(a) simde_vreinterpretq_s16_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s16_f32(a); + #else + simde_int16x8_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_f32 + #define vreinterpretq_s16_f32(a) simde_vreinterpretq_s16_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s16_f64(a); + #else + simde_int16x8_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_f64 + #define vreinterpretq_s16_f64(a) simde_vreinterpretq_s16_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_s8(a); + #else + simde_int32x2_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_s8 + #define vreinterpret_s32_s8 simde_vreinterpret_s32_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_s16(a); + #else + simde_int32x2_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_s16 + #define vreinterpret_s32_s16 simde_vreinterpret_s32_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_s64(a); + #else + simde_int32x2_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_s64 + #define vreinterpret_s32_s64 simde_vreinterpret_s32_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_u8(a); + #else + simde_int32x2_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_u8 + #define vreinterpret_s32_u8 simde_vreinterpret_s32_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_u16(a); + #else + simde_int32x2_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_u16 + #define vreinterpret_s32_u16 simde_vreinterpret_s32_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_u32(a); + #else + simde_int32x2_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_u32 + #define vreinterpret_s32_u32 simde_vreinterpret_s32_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_u64(a); + #else + simde_int32x2_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_u64 + #define vreinterpret_s32_u64 simde_vreinterpret_s32_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s32_f32(a); + #else + simde_int32x2_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_f32 + #define vreinterpret_s32_f32 simde_vreinterpret_s32_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_s32_f64(a); + #else + simde_int32x2_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_f64 + #define vreinterpret_s32_f64 simde_vreinterpret_s32_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_s8(a); + #else + simde_int32x4_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_s8 + #define vreinterpretq_s32_s8(a) simde_vreinterpretq_s32_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_s16(a); + #else + simde_int32x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_s16 + #define vreinterpretq_s32_s16(a) simde_vreinterpretq_s32_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_s64(a); + #else + simde_int32x4_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_s64 + #define vreinterpretq_s32_s64(a) simde_vreinterpretq_s32_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_u8(a); + #else + simde_int32x4_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_u8 + #define vreinterpretq_s32_u8(a) simde_vreinterpretq_s32_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_u16(a); + #else + simde_int32x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_u16 + #define vreinterpretq_s32_u16(a) simde_vreinterpretq_s32_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_u32(a); + #else + simde_int32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_u32 + #define vreinterpretq_s32_u32(a) simde_vreinterpretq_s32_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_u64(a); + #else + simde_int32x4_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_u64 + #define vreinterpretq_s32_u64(a) simde_vreinterpretq_s32_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s32_f32(a); + #else + simde_int32x4_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_f32 + #define vreinterpretq_s32_f32(a) simde_vreinterpretq_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s32_f64(a); + #else + simde_int32x4_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_f64 + #define vreinterpretq_s32_f64(a) simde_vreinterpretq_s32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_s8(a); + #else + simde_int64x1_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_s8 + #define vreinterpret_s64_s8 simde_vreinterpret_s64_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_s16(a); + #else + simde_int64x1_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_s16 + #define vreinterpret_s64_s16 simde_vreinterpret_s64_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_s32(a); + #else + simde_int64x1_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_s32 + #define vreinterpret_s64_s32 simde_vreinterpret_s64_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_u8(a); + #else + simde_int64x1_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_u8 + #define vreinterpret_s64_u8 simde_vreinterpret_s64_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_u16(a); + #else + simde_int64x1_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_u16 + #define vreinterpret_s64_u16 simde_vreinterpret_s64_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_u32(a); + #else + simde_int64x1_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_u32 + #define vreinterpret_s64_u32 simde_vreinterpret_s64_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_u64(a); + #else + simde_int64x1_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_u64 + #define vreinterpret_s64_u64 simde_vreinterpret_s64_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_s64_f32(a); + #else + simde_int64x1_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_f32 + #define vreinterpret_s64_f32 simde_vreinterpret_s64_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_s64_f64(a); + #else + simde_int64x1_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_f64 + #define vreinterpret_s64_f64 simde_vreinterpret_s64_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s8(a); + #else + simde_int64x2_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_s8 + #define vreinterpretq_s64_s8(a) simde_vreinterpretq_s64_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s16(a); + #else + simde_int64x2_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_s16 + #define vreinterpretq_s64_s16(a) simde_vreinterpretq_s64_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(a); + #else + simde_int64x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_s32 + #define vreinterpretq_s64_s32(a) simde_vreinterpretq_s64_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_u8(a); + #else + simde_int64x2_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_u8 + #define vreinterpretq_s64_u8(a) simde_vreinterpretq_s64_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_u16(a); + #else + simde_int64x2_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_u16 + #define vreinterpretq_s64_u16(a) simde_vreinterpretq_s64_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_u32(a); + #else + simde_int64x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_u32 + #define vreinterpretq_s64_u32(a) simde_vreinterpretq_s64_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_u64(a); + #else + simde_int64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_u64 + #define vreinterpretq_s64_u64(a) simde_vreinterpretq_s64_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_f32(a); + #else + simde_int64x2_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_f32 + #define vreinterpretq_s64_f32(a) simde_vreinterpretq_s64_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde_int64x2_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_f64 + #define vreinterpretq_s64_f64(a) simde_vreinterpretq_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_s8(a); + #else + simde_uint8x8_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_s8 + #define vreinterpret_u8_s8 simde_vreinterpret_u8_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_s16(a); + #else + simde_uint8x8_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_s16 + #define vreinterpret_u8_s16 simde_vreinterpret_u8_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_s32(a); + #else + simde_uint8x8_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_s32 + #define vreinterpret_u8_s32 simde_vreinterpret_u8_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_s64(a); + #else + simde_uint8x8_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_s64 + #define vreinterpret_u8_s64 simde_vreinterpret_u8_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_u16(a); + #else + simde_uint8x8_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_u16 + #define vreinterpret_u8_u16 simde_vreinterpret_u8_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_u32(a); + #else + simde_uint8x8_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_u32 + #define vreinterpret_u8_u32 simde_vreinterpret_u8_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_u64(a); + #else + simde_uint8x8_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_u64 + #define vreinterpret_u8_u64 simde_vreinterpret_u8_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u8_f32(a); + #else + simde_uint8x8_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_f32 + #define vreinterpret_u8_f32 simde_vreinterpret_u8_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_u8_f64(a); + #else + simde_uint8x8_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_f64 + #define vreinterpret_u8_f64 simde_vreinterpret_u8_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_s8(a); + #else + simde_uint8x16_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_s8 + #define vreinterpretq_u8_s8(a) simde_vreinterpretq_u8_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_s16(a); + #else + simde_uint8x16_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_s16 + #define vreinterpretq_u8_s16(a) simde_vreinterpretq_u8_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_s32(a); + #else + simde_uint8x16_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_s32 + #define vreinterpretq_u8_s32(a) simde_vreinterpretq_u8_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_s64(a); + #else + simde_uint8x16_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_s64 + #define vreinterpretq_u8_s64(a) simde_vreinterpretq_u8_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_u16(a); + #else + simde_uint8x16_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_u16 + #define vreinterpretq_u8_u16(a) simde_vreinterpretq_u8_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_u32(a); + #else + simde_uint8x16_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_u32 + #define vreinterpretq_u8_u32(a) simde_vreinterpretq_u8_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_u64(a); + #else + simde_uint8x16_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_u64 + #define vreinterpretq_u8_u64(a) simde_vreinterpretq_u8_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u8_f32(a); + #else + simde_uint8x16_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_f32 + #define vreinterpretq_u8_f32(a) simde_vreinterpretq_u8_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_u8_f64(a); + #else + simde_uint8x16_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_f64 + #define vreinterpretq_u8_f64(a) simde_vreinterpretq_u8_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_s8(a); + #else + simde_uint16x4_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_s8 + #define vreinterpret_u16_s8 simde_vreinterpret_u16_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_s16(a); + #else + simde_uint16x4_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_s16 + #define vreinterpret_u16_s16 simde_vreinterpret_u16_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_s32(a); + #else + simde_uint16x4_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_s32 + #define vreinterpret_u16_s32 simde_vreinterpret_u16_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_s64(a); + #else + simde_uint16x4_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_s64 + #define vreinterpret_u16_s64 simde_vreinterpret_u16_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_u8(a); + #else + simde_uint16x4_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_u8 + #define vreinterpret_u16_u8 simde_vreinterpret_u16_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_u32(a); + #else + simde_uint16x4_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_u32 + #define vreinterpret_u16_u32 simde_vreinterpret_u16_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_u64(a); + #else + simde_uint16x4_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_u64 + #define vreinterpret_u16_u64 simde_vreinterpret_u16_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_u16_f16(a); + #else + simde_uint16x4_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_f16 + #define vreinterpret_u16_f16(a) simde_vreinterpret_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u16_f32(a); + #else + simde_uint16x4_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_f32 + #define vreinterpret_u16_f32 simde_vreinterpret_u16_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vreinterpret_u16_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_u16_f64(a); + #else + simde_uint16x4_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u16_f64 + #define vreinterpret_u16_f64 simde_vreinterpret_u16_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_s8(a); + #else + simde_uint16x8_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_s8 + #define vreinterpretq_u16_s8(a) simde_vreinterpretq_u16_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_s16(a); + #else + simde_uint16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_s16 + #define vreinterpretq_u16_s16(a) simde_vreinterpretq_u16_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_s32(a); + #else + simde_uint16x8_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_s32 + #define vreinterpretq_u16_s32(a) simde_vreinterpretq_u16_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_s64(a); + #else + simde_uint16x8_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_s64 + #define vreinterpretq_u16_s64(a) simde_vreinterpretq_u16_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_u8(a); + #else + simde_uint16x8_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_u8 + #define vreinterpretq_u16_u8(a) simde_vreinterpretq_u16_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_u32(a); + #else + simde_uint16x8_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_u32 + #define vreinterpretq_u16_u32(a) simde_vreinterpretq_u16_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_u64(a); + #else + simde_uint16x8_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_u64 + #define vreinterpretq_u16_u64(a) simde_vreinterpretq_u16_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u16_f32(a); + #else + simde_uint16x8_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_f32 + #define vreinterpretq_u16_f32(a) simde_vreinterpretq_u16_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_u16_f64(a); + #else + simde_uint16x8_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_f64 + #define vreinterpretq_u16_f64(a) simde_vreinterpretq_u16_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_s8(a); + #else + simde_uint32x2_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_s8 + #define vreinterpret_u32_s8 simde_vreinterpret_u32_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_s16(a); + #else + simde_uint32x2_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_s16 + #define vreinterpret_u32_s16 simde_vreinterpret_u32_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_s32(a); + #else + simde_uint32x2_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_s32 + #define vreinterpret_u32_s32 simde_vreinterpret_u32_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_s64(a); + #else + simde_uint32x2_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_s64 + #define vreinterpret_u32_s64 simde_vreinterpret_u32_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_u8(a); + #else + simde_uint32x2_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_u8 + #define vreinterpret_u32_u8 simde_vreinterpret_u32_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_u16(a); + #else + simde_uint32x2_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_u16 + #define vreinterpret_u32_u16 simde_vreinterpret_u32_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_u64(a); + #else + simde_uint32x2_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_u64 + #define vreinterpret_u32_u64 simde_vreinterpret_u32_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u32_f32(a); + #else + simde_uint32x2_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_f32 + #define vreinterpret_u32_f32 simde_vreinterpret_u32_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_u32_f64(a); + #else + simde_uint32x2_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_f64 + #define vreinterpret_u32_f64 simde_vreinterpret_u32_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_s8(a); + #else + simde_uint32x4_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_s8 + #define vreinterpretq_u32_s8(a) simde_vreinterpretq_u32_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_s16(a); + #else + simde_uint32x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_s16 + #define vreinterpretq_u32_s16(a) simde_vreinterpretq_u32_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_s32(a); + #else + simde_uint32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_s32 + #define vreinterpretq_u32_s32(a) simde_vreinterpretq_u32_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_s64(a); + #else + simde_uint32x4_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_s64 + #define vreinterpretq_u32_s64(a) simde_vreinterpretq_u32_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_u8(a); + #else + simde_uint32x4_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_u8 + #define vreinterpretq_u32_u8(a) simde_vreinterpretq_u32_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_u16(a); + #else + simde_uint32x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_u16 + #define vreinterpretq_u32_u16(a) simde_vreinterpretq_u32_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_u64(a); + #else + simde_uint32x4_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_u64 + #define vreinterpretq_u32_u64(a) simde_vreinterpretq_u32_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vreinterpretq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_u16_f16(a); + #else + simde_uint16x8_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u16_f16 + #define vreinterpretq_u16_f16(a) simde_vreinterpretq_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u32_f32(a); + #else + simde_uint32x4_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_f32 + #define vreinterpretq_u32_f32(a) simde_vreinterpretq_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_u32_f64(a); + #else + simde_uint32x4_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_f64 + #define vreinterpretq_u32_f64(a) simde_vreinterpretq_u32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_s8(a); + #else + simde_uint64x1_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_s8 + #define vreinterpret_u64_s8 simde_vreinterpret_u64_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_s16(a); + #else + simde_uint64x1_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_s16 + #define vreinterpret_u64_s16 simde_vreinterpret_u64_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_s32(a); + #else + simde_uint64x1_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_s32 + #define vreinterpret_u64_s32 simde_vreinterpret_u64_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_s64(a); + #else + simde_uint64x1_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_s64 + #define vreinterpret_u64_s64 simde_vreinterpret_u64_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_u8(a); + #else + simde_uint64x1_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_u8 + #define vreinterpret_u64_u8 simde_vreinterpret_u64_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_u16(a); + #else + simde_uint64x1_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_u16 + #define vreinterpret_u64_u16 simde_vreinterpret_u64_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_u32(a); + #else + simde_uint64x1_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_u32 + #define vreinterpret_u64_u32 simde_vreinterpret_u64_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_u64_f32(a); + #else + simde_uint64x1_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_f32 + #define vreinterpret_u64_f32 simde_vreinterpret_u64_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vreinterpret_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_u64_f64(a); + #else + simde_uint64x1_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u64_f64 + #define vreinterpret_u64_f64 simde_vreinterpret_u64_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s8(a); + #else + simde_uint64x2_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_s8 + #define vreinterpretq_u64_s8(a) simde_vreinterpretq_u64_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s16(a); + #else + simde_uint64x2_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_s16 + #define vreinterpretq_u64_s16(a) simde_vreinterpretq_u64_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s32(a); + #else + simde_uint64x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_s32 + #define vreinterpretq_u64_s32(a) simde_vreinterpretq_u64_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s64(a); + #else + simde_uint64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_s64 + #define vreinterpretq_u64_s64(a) simde_vreinterpretq_u64_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_u8(a); + #else + simde_uint64x2_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_u8 + #define vreinterpretq_u64_u8(a) simde_vreinterpretq_u64_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_u16(a); + #else + simde_uint64x2_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_u16 + #define vreinterpretq_u64_u16(a) simde_vreinterpretq_u64_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_u32(a); + #else + simde_uint64x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_u32 + #define vreinterpretq_u64_u32(a) simde_vreinterpretq_u64_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_f32(a); + #else + simde_uint64x2_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_f32 + #define vreinterpretq_u64_f32(a) simde_vreinterpretq_u64_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_u64_f64(a); + #else + simde_uint64x2_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_f64 + #define vreinterpretq_u64_f64(a) simde_vreinterpretq_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_s8(a); + #else + simde_float32x2_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_s8 + #define vreinterpret_f32_s8 simde_vreinterpret_f32_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_s16(a); + #else + simde_float32x2_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_s16 + #define vreinterpret_f32_s16 simde_vreinterpret_f32_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_s32(a); + #else + simde_float32x2_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_s32 + #define vreinterpret_f32_s32 simde_vreinterpret_f32_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_s64(a); + #else + simde_float32x2_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_s64 + #define vreinterpret_f32_s64 simde_vreinterpret_f32_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_u8(a); + #else + simde_float32x2_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_u8 + #define vreinterpret_f32_u8 simde_vreinterpret_f32_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_u16(a); + #else + simde_float32x2_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_u16 + #define vreinterpret_f32_u16 simde_vreinterpret_f32_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vreinterpret_f16_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_f16_u16(a); + #else + simde_float16x4_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f16_u16 + #define vreinterpret_f16_u16(a) simde_vreinterpret_f16_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_u32(a); + #else + simde_float32x2_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_u32 + #define vreinterpret_f32_u32 simde_vreinterpret_f32_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpret_f32_u64(a); + #else + simde_float32x2_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_u64 + #define vreinterpret_f32_u64 simde_vreinterpret_f32_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f32_f64(a); + #else + simde_float32x2_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_f64 + #define vreinterpret_f32_f64 simde_vreinterpret_f32_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_s8(a); + #else + simde_float32x4_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_s8 + #define vreinterpretq_f32_s8(a) simde_vreinterpretq_f32_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_s16(a); + #else + simde_float32x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_s16 + #define vreinterpretq_f32_s16(a) simde_vreinterpretq_f32_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_s32(a); + #else + simde_float32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_s32 + #define vreinterpretq_f32_s32(a) simde_vreinterpretq_f32_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_s64(a); + #else + simde_float32x4_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_s64 + #define vreinterpretq_f32_s64(a) simde_vreinterpretq_f32_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_u8(a); + #else + simde_float32x4_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_u8 + #define vreinterpretq_f32_u8(a) simde_vreinterpretq_f32_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_u16(a); + #else + simde_float32x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_u16 + #define vreinterpretq_f32_u16(a) simde_vreinterpretq_f32_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vreinterpretq_f16_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_f16_u16(a); + #else + simde_float16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f16_u16 + #define vreinterpretq_f16_u16(a) simde_vreinterpretq_f16_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_u32(a); + #else + simde_float32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_u32 + #define vreinterpretq_f32_u32(a) simde_vreinterpretq_f32_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_f32_u64(a); + #else + simde_float32x4_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_u64 + #define vreinterpretq_f32_u64(a) simde_vreinterpretq_f32_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde_float32x4_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_f64 + #define vreinterpretq_f32_f64(a) simde_vreinterpretq_f32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_s8(a); + #else + simde_float64x1_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_s8 + #define vreinterpret_f64_s8 simde_vreinterpret_f64_s8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_s16(a); + #else + simde_float64x1_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_s16 + #define vreinterpret_f64_s16 simde_vreinterpret_f64_s16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_s32(a); + #else + simde_float64x1_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_s32 + #define vreinterpret_f64_s32 simde_vreinterpret_f64_s32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_s64(a); + #else + simde_float64x1_private r_; + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_s64 + #define vreinterpret_f64_s64 simde_vreinterpret_f64_s64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_u8(a); + #else + simde_float64x1_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_u8 + #define vreinterpret_f64_u8 simde_vreinterpret_f64_u8 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_u16(a); + #else + simde_float64x1_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_u16 + #define vreinterpret_f64_u16 simde_vreinterpret_f64_u16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_u32(a); + #else + simde_float64x1_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_u32 + #define vreinterpret_f64_u32 simde_vreinterpret_f64_u32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_u64(a); + #else + simde_float64x1_private r_; + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_u64 + #define vreinterpret_f64_u64 simde_vreinterpret_f64_u64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpret_f64_f32(a); + #else + simde_float64x1_private r_; + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_f32 + #define vreinterpret_f64_f32 simde_vreinterpret_f64_f32 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s8(a); + #else + simde_float64x2_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_s8 + #define vreinterpretq_f64_s8(a) simde_vreinterpretq_f64_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s16(a); + #else + simde_float64x2_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_s16 + #define vreinterpretq_f64_s16(a) simde_vreinterpretq_f64_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s32(a); + #else + simde_float64x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_s32 + #define vreinterpretq_f64_s32(a) simde_vreinterpretq_f64_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde_float64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_s64 + #define vreinterpretq_f64_s64(a) simde_vreinterpretq_f64_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_u8(a); + #else + simde_float64x2_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_u8 + #define vreinterpretq_f64_u8(a) simde_vreinterpretq_f64_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_u16(a); + #else + simde_float64x2_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_u16 + #define vreinterpretq_f64_u16(a) simde_vreinterpretq_f64_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_u32(a); + #else + simde_float64x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_u32 + #define vreinterpretq_f64_u32(a) simde_vreinterpretq_f64_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_u64(a); + #else + simde_float64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_u64 + #define vreinterpretq_f64_u64(a) simde_vreinterpretq_f64_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde_float64x2_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_f32 + #define vreinterpretq_f64_f32(a) simde_vreinterpretq_f64_f32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/arm/neon/reinterpret.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vabds_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabds_f32(a, b); + #else + simde_float32_t r = a - b; + return r < 0 ? -r : r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabds_f32 + #define vabds_f32(a, b) simde_vabds_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vabdd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdd_f64(a, b); + #else + simde_float64_t r = a - b; + return r < 0 ? -r : r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdd_f64 + #define vabdd_f64(a, b) simde_vabdd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_f32(a, b); + #else + return simde_vabs_f32(simde_vsub_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_f32 + #define vabd_f32(a, b) simde_vabd_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vabd_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabd_f64(a, b); + #else + return simde_vabs_f64(simde_vsub_f64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabd_f64 + #define vabd_f64(a, b) simde_vabd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vabd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_s8(a, b); + #elif defined(SIMDE_X86_MMX_NATIVE) + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + const __m64 m = _mm_cmpgt_pi8(b_.m64, a_.m64); + r_.m64 = + _mm_xor_si64( + _mm_add_pi8( + _mm_sub_pi8(a_.m64, b_.m64), + m + ), + m + ); + + return simde_int8x8_from_private(r_); + #else + return simde_vmovn_s16(simde_vabsq_s16(simde_vsubl_s8(a, b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_s8 + #define vabd_s8(a, b) simde_vabd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vabd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_s16(a, b); + #elif defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + r_.m64 = _mm_sub_pi16(_mm_max_pi16(a_.m64, b_.m64), _mm_min_pi16(a_.m64, b_.m64)); + + return simde_int16x4_from_private(r_); + #else + return simde_vmovn_s32(simde_vabsq_s32(simde_vsubl_s16(a, b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_s16 + #define vabd_s16(a, b) simde_vabd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vabd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_s32(a, b); + #else + return simde_vmovn_s64(simde_vabsq_s64(simde_vsubl_s32(a, b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_s32 + #define vabd_s32(a, b) simde_vabd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vabd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_u8(a, b); + #else + return simde_vmovn_u16( + simde_vreinterpretq_u16_s16( + simde_vabsq_s16( + simde_vsubq_s16( + simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), + simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)))))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_u8 + #define vabd_u8(a, b) simde_vabd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vabd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_u16(a, b); + #else + return simde_vmovn_u32( + simde_vreinterpretq_u32_s32( + simde_vabsq_s32( + simde_vsubq_s32( + simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), + simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)))))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_u16 + #define vabd_u16(a, b) simde_vabd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabd_u32(a, b); + #else + return simde_vmovn_u64( + simde_vreinterpretq_u64_s64( + simde_vabsq_s64( + simde_vsubq_s64( + simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), + simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)))))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabd_u32 + #define vabd_u32(a, b) simde_vabd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_f32(a, b); + #else + return simde_vabsq_f32(simde_vsubq_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_f32 + #define vabdq_f32(a, b) simde_vabdq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vabdq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdq_f64(a, b); + #else + return simde_vabsq_f64(simde_vsubq_f64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdq_f64 + #define vabdq_f64(a, b) simde_vabdq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vabdq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_sub_epi8(_mm_max_epi8(a_.m128i, b_.m128i), _mm_min_epi8(a_.m128i, b_.m128i)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi8(b_.m128i, a_.m128i); + r_.m128i = + _mm_xor_si128( + _mm_add_epi8( + _mm_sub_epi8(a_.m128i, b_.m128i), + m + ), + m + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_sub(wasm_i8x16_max(a_.v128, b_.v128), wasm_i8x16_min(a_.v128, b_.v128)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp < 0 ? -tmp : tmp); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_s8 + #define vabdq_s8(a, b) simde_vabdq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabdq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881658604 */ + r_.m128i = _mm_sub_epi16(_mm_max_epi16(a_.m128i, b_.m128i), _mm_min_epi16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_sub(wasm_i16x8_max(a_.v128, b_.v128), wasm_i16x8_min(a_.v128, b_.v128)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + (a_.values[i] < b_.values[i]) ? + (b_.values[i] - a_.values[i]) : + (a_.values[i] - b_.values[i]); + } + + #endif + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_s16 + #define vabdq_s16(a, b) simde_vabdq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabdq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_sub_epi32(_mm_max_epi32(a_.m128i, b_.m128i), _mm_min_epi32(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_sub(wasm_i32x4_max(a_.v128, b_.v128), wasm_i32x4_min(a_.v128, b_.v128)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(b_.m128i, a_.m128i); + r_.m128i = + _mm_xor_si128( + _mm_add_epi32( + _mm_sub_epi32(a_.m128i, b_.m128i), + m + ), + m + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp < 0 ? -tmp : tmp); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_s32 + #define vabdq_s32(a, b) simde_vabdq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vabdq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) + return vec_absd(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi8(_mm_max_epu8(a_.m128i, b_.m128i), _mm_min_epu8(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_sub(wasm_u8x16_max(a_.v128, b_.v128), wasm_u8x16_min(a_.v128, b_.v128)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int16_t tmp = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp < 0 ? -tmp : tmp); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_u8 + #define vabdq_u8(a, b) simde_vabdq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabdq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) + return vec_absd(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_sub_epi16(_mm_max_epu16(a_.m128i, b_.m128i), _mm_min_epu16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_sub(wasm_u16x8_max(a_.v128, b_.v128), wasm_u16x8_min(a_.v128, b_.v128)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp < 0 ? -tmp : tmp); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_u16 + #define vabdq_u16(a, b) simde_vabdq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabdq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) + return vec_absd(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(vec_max(a, b), vec_min(a, b)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b) - vec_min(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_sub_epi32(_mm_max_epu32(a_.m128i, b_.m128i), _mm_min_epu32(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_sub(wasm_u32x4_max(a_.v128, b_.v128), wasm_u32x4_min(a_.v128, b_.v128)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp < 0 ? -tmp : tmp); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdq_u32 + #define vabdq_u32(a, b) simde_vabdq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ABD_H) */ +/* :: End simde/arm/neon/abd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/add.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADD_H) +#define SIMDE_ARM_NEON_ADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16 +simde_vaddh_f16(simde_float16 a, simde_float16 b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vaddh_f16(a, b); + #else + simde_float32 af = simde_float16_to_float32(a); + simde_float32 bf = simde_float16_to_float32(b); + return simde_float16_from_float32(af + bf); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vaddh_f16 + #define vaddh_f16(a, b) simde_vaddh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vaddd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddd_s64(a, b); + #else + return a + b; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddd_s64 + #define vaddd_s64(a, b) simde_vaddd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vaddd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddd_u64(a, b); + #else + return a + b; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddd_u64 + #define vaddd_u64(a, b) simde_vaddd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vadd_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vadd_f16(a, b); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]); + } + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vadd_f16 + #define vadd_f16(a, b) simde_vadd_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vadd_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_f32 + #define vadd_f32(a, b) simde_vadd_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vadd_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vadd_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vadd_f64 + #define vadd_f64(a, b) simde_vadd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vadd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #elif defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi8(a_.m64, b_.m64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_s8 + #define vadd_s8(a, b) simde_vadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vadd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #elif defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi16(a_.m64, b_.m64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_s16 + #define vadd_s16(a, b) simde_vadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vadd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #elif defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi32(a_.m64, b_.m64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_s32 + #define vadd_s32(a, b) simde_vadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vadd_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_s64 + #define vadd_s64(a, b) simde_vadd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_u8 + #define vadd_u8(a, b) simde_vadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_u16 + #define vadd_u16(a, b) simde_vadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_u32 + #define vadd_u32(a, b) simde_vadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vadd_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vadd_u64 + #define vadd_u64(a, b) simde_vadd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vaddq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vaddq_f16(a, b); + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vaddh_f16(a_.values[i], b_.values[i]); + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vaddq_f16 + #define vaddq_f16(a, b) simde_vaddq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(float) a_ , b_, r_; + a_ = a; + b_ = b; + r_ = vec_add(a_, b_); + return r_; + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_add_ps(a_.m128, b_.m128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_f32 + #define vaddq_f32(a, b) simde_vaddq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_add(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_add_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddq_f64 + #define vaddq_f64(a, b) simde_vaddq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_s8 + #define vaddq_s8(a, b) simde_vaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_s16 + #define vaddq_s16(a, b) simde_vaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_s32 + #define vaddq_s32(a, b) simde_vaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_add(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_s64 + #define vaddq_s64(a, b) simde_vaddq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_u8 + #define vaddq_u8(a, b) simde_vaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_u16 + #define vaddq_u16(a, b) simde_vaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_u32 + #define vaddq_u32(a, b) simde_vaddq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_add(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddq_u64 + #define vaddq_u64(a, b) simde_vaddq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADD_H) */ +/* :: End simde/arm/neon/add.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vaba_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_s8(a, b, c); + #else + return simde_vadd_s8(simde_vabd_s8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_s8 + #define vaba_s8(a, b, c) simde_vaba_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vaba_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_s16(a, b, c); + #else + return simde_vadd_s16(simde_vabd_s16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_s16 + #define vaba_s16(a, b, c) simde_vaba_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vaba_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_s32(a, b, c); + #else + return simde_vadd_s32(simde_vabd_s32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_s32 + #define vaba_s32(a, b, c) simde_vaba_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vaba_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_u8(a, b, c); + #else + return simde_vadd_u8(simde_vabd_u8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_u8 + #define vaba_u8(a, b, c) simde_vaba_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vaba_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_u16(a, b, c); + #else + return simde_vadd_u16(simde_vabd_u16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_u16 + #define vaba_u16(a, b, c) simde_vaba_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vaba_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaba_u32(a, b, c); + #else + return simde_vadd_u32(simde_vabd_u32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaba_u32 + #define vaba_u32(a, b, c) simde_vaba_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vabaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_s8(a, b, c); + #else + return simde_vaddq_s8(simde_vabdq_s8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_s8 + #define vabaq_s8(a, b, c) simde_vabaq_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_s16(a, b, c); + #else + return simde_vaddq_s16(simde_vabdq_s16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_s16 + #define vabaq_s16(a, b, c) simde_vabaq_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_s32(a, b, c); + #else + return simde_vaddq_s32(simde_vabdq_s32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_s32 + #define vabaq_s32(a, b, c) simde_vabaq_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vabaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_u8(a, b, c); + #else + return simde_vaddq_u8(simde_vabdq_u8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_u8 + #define vabaq_u8(a, b, c) simde_vabaq_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_u16(a, b, c); + #else + return simde_vaddq_u16(simde_vabdq_u16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_u16 + #define vabaq_u16(a, b, c) simde_vabaq_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabaq_u32(a, b, c); + #else + return simde_vaddq_u32(simde_vabdq_u32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabaq_u32 + #define vabaq_u32(a, b, c) simde_vabaq_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ABA_H) */ +/* :: End simde/arm/neon/aba.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/abdl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ABDL_H) +#define SIMDE_ARM_NEON_ABDL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabdl_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_s8(a, b); + #else + return simde_vabsq_s16(simde_vsubl_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_s8 + #define vabdl_s8(a, b) simde_vabdl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabdl_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_s16(a, b); + #else + return simde_vabsq_s32(simde_vsubl_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_s16 + #define vabdl_s16(a, b) simde_vabdl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vabdl_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_s32(a, b); + #else + return simde_vabsq_s64(simde_vsubl_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_s32 + #define vabdl_s32(a, b) simde_vabdl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabdl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_u8(a, b); + #else + return simde_vreinterpretq_u16_s16( + simde_vabsq_s16( + simde_vsubq_s16( + simde_vreinterpretq_s16_u16(simde_vmovl_u8(a)), + simde_vreinterpretq_s16_u16(simde_vmovl_u8(b)) + ) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_u8 + #define vabdl_u8(a, b) simde_vabdl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabdl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_u16(a, b); + #else + return simde_vreinterpretq_u32_s32( + simde_vabsq_s32( + simde_vsubq_s32( + simde_vreinterpretq_s32_u32(simde_vmovl_u16(a)), + simde_vreinterpretq_s32_u32(simde_vmovl_u16(b)) + ) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_u16 + #define vabdl_u16(a, b) simde_vabdl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vabdl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vabdl_u32(a, b); + #else + return simde_vreinterpretq_u64_s64( + simde_vabsq_s64( + simde_vsubq_s64( + simde_vreinterpretq_s64_u64(simde_vmovl_u32(a)), + simde_vreinterpretq_s64_u64(simde_vmovl_u32(b)) + ) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vabdl_u32 + #define vabdl_u32(a, b) simde_vabdl_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ABDL_H) */ +/* :: End simde/arm/neon/abdl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addhn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADDHN_H) +#define SIMDE_ARM_NEON_ADDHN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/shr_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_SHR_N_H) +#define SIMDE_ARM_NEON_SHR_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_x_vshrs_n_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return a >> ((n == 32) ? 31 : n); +} + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_x_vshrs_n_u32(uint32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return (n == 32) ? 0 : a >> n; +} + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vshrd_n_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return a >> ((n == 64) ? 63 : n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vshrd_n_s64(a, n) vshrd_n_s64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshrd_n_s64 + #define vshrd_n_s64(a, n) simde_vshrd_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vshrd_n_u64(uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return (n == 64) ? 0 : a >> n; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vshrd_n_u64(a, n) vshrd_n_u64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshrd_n_u64 + #define vshrd_n_u64(a, n) simde_vshrd_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vshr_n_s8 (const simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + int32_t n_ = (n == 8) ? 7 : n; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values >> n_; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> n_); + } + #endif + + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_s8(a, n) vshr_n_s8((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_s8(a, n) \ + simde_int8x8_from_m64( \ + _mm_or_si64(_mm_andnot_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16(simde_int8x8_to_m64(a), (n))), \ + _mm_and_si64(_mm_set1_pi16(0x00FF), _mm_srai_pi16(_mm_slli_pi16(simde_int8x8_to_m64(a), 8), 8 + (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_s8 + #define vshr_n_s8(a, n) simde_vshr_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vshr_n_s16 (const simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + int32_t n_ = (n == 16) ? 15 : n; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values >> n_; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> n_); + } + #endif + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_s16(a, n) vshr_n_s16((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_s16(a, n) simde_int16x4_from_m64(_mm_srai_pi16(simde_int16x4_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_s16 + #define vshr_n_s16(a, n) simde_vshr_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vshr_n_s32 (const simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + int32_t n_ = (n == 32) ? 31 : n; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n_; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n_; + } + #endif + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_s32(a, n) vshr_n_s32((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_s32(a, n) simde_int32x2_from_m64(_mm_srai_pi32(simde_int32x2_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_s32 + #define vshr_n_s32(a, n) simde_vshr_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vshr_n_s64 (const simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + int32_t n_ = (n == 64) ? 63 : n; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n_; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n_; + } + #endif + + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_s64(a, n) vshr_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_s64 + #define vshr_n_s64(a, n) simde_vshr_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vshr_n_u8 (const simde_uint8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + if (n == 8) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_u8(a, n) vshr_n_u8((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_u8(a, n) \ + simde_uint8x8_from_m64(_mm_and_si64(_mm_srli_si64(simde_uint8x8_to_m64(a), (n)), _mm_set1_pi8((1 << (8 - (n))) - 1))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_u8 + #define vshr_n_u8(a, n) simde_vshr_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vshr_n_u16 (const simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + + if (n == 16) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_u16(a, n) vshr_n_u16((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_u16(a, n) simde_uint16x4_from_m64(_mm_srli_pi16(simde_uint16x4_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_u16 + #define vshr_n_u16(a, n) simde_vshr_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vshr_n_u32 (const simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + + if (n == 32) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_u32(a, n) vshr_n_u32((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_u32(a, n) simde_uint32x2_from_m64(_mm_srli_pi32(simde_uint32x2_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_u32 + #define vshr_n_u32(a, n) simde_vshr_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vshr_n_u64 (const simde_uint64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + + if (n == 64) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshr_n_u64(a, n) vshr_n_u64((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshr_n_u64(a, n) simde_uint64x1_from_m64(_mm_srli_si64(simde_uint64x1_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshr_n_u64 + #define vshr_n_u64(a, n) simde_vshr_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vshrq_n_s8 (const simde_int8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + const int shift = (n <= 7) ? n : 7; + const uint64_t matrix = (UINT64_C(0x8182848890A0C000) << (shift * 8)) ^ UINT64_C(0x8080808080808080); + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, matrix)), 0); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_blendv_epi8(_mm_srai_epi16(a_.m128i, n), + _mm_srai_epi16(_mm_slli_epi16(a_.m128i, 8), 8 + (n)), + _mm_set1_epi16(0x00FF)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = + _mm_or_si128(_mm_andnot_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16(a_.m128i, n)), + _mm_and_si128(_mm_set1_epi16(0x00FF), _mm_srai_epi16(_mm_slli_epi16(a_.m128i, 8), 8 + (n)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shr(a_.v128, ((n) == 8) ? 7 : HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> ((n == 8) ? 7 : n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] >> ((n == 8) ? 7 : n)); + } + #endif + + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_s8(a, n) vshrq_n_s8((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_s8(a, n) vec_sra((a), vec_splat_u8(((n) == 8) ? 7 : (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_s8 + #define vshrq_n_s8(a, n) simde_vshrq_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vshrq_n_s16 (const simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_srai_epi16(a_.m128i, n); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shr(a_.v128, ((n) == 16) ? 15 : HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> ((n == 16) ? 15 : n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] >> ((n == 16) ? 15 : n)); + } + #endif + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_s16(a, n) vshrq_n_s16((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_s16(a, n) vec_sra((a), vec_splat_u16(((n) == 16) ? 15 : (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_s16 + #define vshrq_n_s16(a, n) simde_vshrq_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vshrq_n_s32 (const simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_srai_epi32(a_.m128i, n); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shr(a_.v128, ((n) == 32) ? 31 : HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> ((n == 32) ? 31 : n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> ((n == 32) ? 31 : n); + } + #endif + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_s32(a, n) vshrq_n_s32((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_s32(a, n) \ + vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, ((n) == 32) ? 31 : (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_s32 + #define vshrq_n_s32(a, n) simde_vshrq_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vshrq_n_s64 (const simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shr(a_.v128, ((n) == 64) ? 63 : HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> ((n == 64) ? 63 : n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> ((n == 64) ? 63 : n); + } + #endif + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_s64(a, n) vshrq_n_s64((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + #define simde_vshrq_n_s64(a, n) \ + vec_sra((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, ((n) == 64) ? 63 : (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_s64 + #define vshrq_n_s64(a, n) simde_vshrq_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vshrq_n_u8 (const simde_uint8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + r_.m128i = (n > 7) ? _mm_setzero_si128() : _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) << (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(_mm_srli_epi64(a_.m128i, (n)), _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (8 - (n))) - 1))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = (((n) == 8) ? wasm_i8x16_splat(0) : wasm_u8x16_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + #else + if (n == 8) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + #endif + + return simde_uint8x16_from_private(r_);\ +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_u8(a, n) vshrq_n_u8((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_u8(a, n) \ + (((n) == 8) ? vec_splat_u8(0) : vec_sr((a), vec_splat_u8(n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_u8 + #define vshrq_n_u8(a, n) simde_vshrq_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vshrq_n_u16 (const simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_srli_epi16(a_.m128i, n); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = (((n) == 16) ? wasm_i16x8_splat(0) : wasm_u16x8_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + #else + if (n == 16) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + #endif + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_u16(a, n) vshrq_n_u16((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_u16(a, n) \ + (((n) == 16) ? vec_splat_u16(0) : vec_sr((a), vec_splat_u16(n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_u16 + #define vshrq_n_u16(a, n) simde_vshrq_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vshrq_n_u32 (const simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_srli_epi32(a_.m128i, n); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = (((n) == 32) ? wasm_i32x4_splat(0) : wasm_u32x4_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + #else + if (n == 32) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + #endif + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_u32(a, n) vshrq_n_u32((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshrq_n_u32(a, n) \ + (((n) == 32) ? vec_splat_u32(0) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_u32 + #define vshrq_n_u32(a, n) simde_vshrq_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vshrq_n_u64 (const simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_srli_epi64(a_.m128i, n); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = (((n) == 64) ? wasm_i64x2_splat(0) : wasm_u64x2_shr(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + #else + if (n == 64) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) + r_.values = a_.values >> n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] >> n; + } + #endif + } + #endif + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrq_n_u64(a, n) vshrq_n_u64((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + #define simde_vshrq_n_u64(a, n) \ + (((n) == 64) ? vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0)) : vec_sr((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrq_n_u64 + #define vshrq_n_u64(a, n) simde_vshrq_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SHR_N_H) */ +/* :: End simde/arm/neon/shr_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vaddhn_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_s16(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int8x8_private r_; + simde_int8x16_private tmp_ = + simde_int8x16_to_private( + simde_vreinterpretq_s8_s16( + simde_vaddq_s16(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #endif + return simde_int8x8_from_private(r_); + #else + return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddq_s16(a, b), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_s16 + #define vaddhn_s16(a, b) simde_vaddhn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vaddhn_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_s32(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int16x4_private r_; + simde_int16x8_private tmp_ = + simde_int16x8_to_private( + simde_vreinterpretq_s16_s32( + simde_vaddq_s32(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); + #endif + return simde_int16x4_from_private(r_); + #else + return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddq_s32(a, b), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_s32 + #define vaddhn_s32(a, b) simde_vaddhn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vaddhn_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_s64(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int32x2_private r_; + simde_int32x4_private tmp_ = + simde_int32x4_to_private( + simde_vreinterpretq_s32_s64( + simde_vaddq_s64(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); + #endif + return simde_int32x2_from_private(r_); + #else + return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddq_s64(a, b), 32)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_s64 + #define vaddhn_s64(a, b) simde_vaddhn_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vaddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_u16(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint8x8_private r_; + simde_uint8x16_private tmp_ = + simde_uint8x16_to_private( + simde_vreinterpretq_u8_u16( + simde_vaddq_u16(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #endif + return simde_uint8x8_from_private(r_); + #else + return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddq_u16(a, b), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_u16 + #define vaddhn_u16(a, b) simde_vaddhn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vaddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_u32(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint16x4_private r_; + simde_uint16x8_private tmp_ = + simde_uint16x8_to_private( + simde_vreinterpretq_u16_u32( + simde_vaddq_u32(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); + #endif + return simde_uint16x4_from_private(r_); + #else + return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddq_u32(a, b), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_u32 + #define vaddhn_u32(a, b) simde_vaddhn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vaddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddhn_u64(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint32x2_private r_; + simde_uint32x4_private tmp_ = + simde_uint32x4_to_private( + simde_vreinterpretq_u32_u64( + simde_vaddq_u64(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); + #endif + return simde_uint32x2_from_private(r_); + #else + return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddq_u64(a, b), 32)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddhn_u64 + #define vaddhn_u64(a, b) simde_vaddhn_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDHN_H) */ +/* :: End simde/arm/neon/addhn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ADDL_H) +#define SIMDE_ARM_NEON_ADDL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddl_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_s8(a, b); + #else + return simde_vaddq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_s8 + #define vaddl_s8(a, b) simde_vaddl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddl_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_s16(a, b); + #else + return simde_vaddq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_s16 + #define vaddl_s16(a, b) simde_vaddl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddl_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_s32(a, b); + #else + return simde_vaddq_s64(simde_vmovl_s32(a), simde_vmovl_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_s32 + #define vaddl_s32(a, b) simde_vaddl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddl_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_u8(a, b); + #else + return simde_vaddq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_u8 + #define vaddl_u8(a, b) simde_vaddl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddl_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_u16(a, b); + #else + return simde_vaddq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_u16 + #define vaddl_u16(a, b) simde_vaddl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddl_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddl_u32(a, b); + #else + return simde_vaddq_u64(simde_vmovl_u32(a), simde_vmovl_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddl_u32 + #define vaddl_u32(a, b) simde_vaddl_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDL_H) */ +/* :: End simde/arm/neon/addl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addlv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADDLV_H) +#define SIMDE_ARM_NEON_ADDLV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADDV_H) +#define SIMDE_ARM_NEON_ADDV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vaddv_f32(simde_float32x2_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_f32 + #define vaddv_f32(v) simde_vaddv_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vaddv_s8(simde_int8x8_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_s8(a); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_s8 + #define vaddv_s8(v) simde_vaddv_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vaddv_s16(simde_int16x4_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_s16(a); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_s16 + #define vaddv_s16(v) simde_vaddv_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vaddv_s32(simde_int32x2_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_s32(a); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_s32 + #define vaddv_s32(v) simde_vaddv_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vaddv_u8(simde_uint8x8_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_u8(a); + #else + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_u8 + #define vaddv_u8(v) simde_vaddv_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vaddv_u16(simde_uint16x4_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_u16(a); + #else + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_u16 + #define vaddv_u16(v) simde_vaddv_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vaddv_u32(simde_uint32x2_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddv_u32(a); + #else + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddv_u32 + #define vaddv_u32(v) simde_vaddv_u32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vaddvq_f32(simde_float32x4_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_f32 + #define vaddvq_f32(v) simde_vaddvq_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vaddvq_f64(simde_float64x2_t a) { + simde_float64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_f64 + #define vaddvq_f64(v) simde_vaddvq_f64(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vaddvq_s8(simde_int8x16_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_s8(a); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_s8 + #define vaddvq_s8(v) simde_vaddvq_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vaddvq_s16(simde_int16x8_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_s16(a); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_s16 + #define vaddvq_s16(v) simde_vaddvq_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vaddvq_s32(simde_int32x4_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_s32(a); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_s32 + #define vaddvq_s32(v) simde_vaddvq_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vaddvq_s64(simde_int64x2_t a) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_s64(a); + #else + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_s64 + #define vaddvq_s64(v) simde_vaddvq_s64(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vaddvq_u8(simde_uint8x16_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i a_ = simde_uint8x16_to_m128i(a); + a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); + a_ = _mm_add_epi8(a_, _mm_shuffle_epi32(a_, 0xEE)); + return HEDLEY_STATIC_CAST(uint8_t, _mm_cvtsi128_si32(a_)); + #else + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_u8 + #define vaddvq_u8(v) simde_vaddvq_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vaddvq_u16(simde_uint16x8_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(a); + #else + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_u16 + #define vaddvq_u16(v) simde_vaddvq_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vaddvq_u32(simde_uint32x4_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u32(a); + #else + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_u32 + #define vaddvq_u32(v) simde_vaddvq_u32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vaddvq_u64(simde_uint64x2_t a) { + uint64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u64(a); + #else + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddvq_u64 + #define vaddvq_u64(v) simde_vaddvq_u64(v) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDV_H) */ +/* :: End simde/arm/neon/addv.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vaddlv_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_s16(simde_vmovl_s8(a)); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + int16_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_s8 + #define vaddlv_s8(a) simde_vaddlv_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vaddlv_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_s32(simde_vmovl_s16(a)); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + int32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_s16 + #define vaddlv_s16(a) simde_vaddlv_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vaddlv_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_s64(simde_vmovl_s32(a)); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + int64_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_s32 + #define vaddlv_s32(a) simde_vaddlv_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vaddlv_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_u8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_u16(simde_vmovl_u8(a)); + #else + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + uint16_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_u8 + #define vaddlv_u8(a) simde_vaddlv_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vaddlv_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_u16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_u32(simde_vmovl_u16(a)); + #else + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + uint32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_u16 + #define vaddlv_u16(a) simde_vaddlv_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vaddlv_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlv_u32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddvq_u64(simde_vmovl_u32(a)); + #else + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + uint64_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlv_u32 + #define vaddlv_u32(a) simde_vaddlv_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vaddlvq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_s8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i a_ = simde_int8x16_to_m128i(a); + a_ = _mm_xor_si128(a_, _mm_set1_epi8('\x80')); + a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); + a_ = _mm_add_epi16(a_, _mm_shuffle_epi32(a_, 0xEE)); + return HEDLEY_STATIC_CAST(int16_t, _mm_cvtsi128_si32(a_) - 2048); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + int16_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_s8 + #define vaddlvq_s8(a) simde_vaddlvq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vaddlvq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_s16(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) && !defined(HEDLEY_MSVC_VERSION) + __m128i a_ = simde_int16x8_to_m128i(a); + a_ = _mm_xor_si128(a_, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, 0x8000))); + a_ = _mm_shuffle_epi8(a_, _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); + a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); + a_ = _mm_add_epi32(a_, _mm_srli_si128(a_, 7)); + return _mm_cvtsi128_si32(a_) - 262144; + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + int32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_s16 + #define vaddlvq_s16(a) simde_vaddlvq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vaddlvq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_s32(a); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + int64_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_s32 + #define vaddlvq_s32(a) simde_vaddlvq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vaddlvq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_u8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i a_ = simde_uint8x16_to_m128i(a); + a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); + a_ = _mm_add_epi16(a_, _mm_shuffle_epi32(a_, 0xEE)); + return HEDLEY_STATIC_CAST(uint16_t, _mm_cvtsi128_si32(a_)); + #else + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + uint16_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_u8 + #define vaddlvq_u8(a) simde_vaddlvq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vaddlvq_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_u16(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i a_ = simde_uint16x8_to_m128i(a); + a_ = _mm_shuffle_epi8(a_, _mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); + a_ = _mm_sad_epu8(a_, _mm_setzero_si128()); + a_ = _mm_add_epi32(a_, _mm_srli_si128(a_, 7)); + return HEDLEY_STATIC_CAST(uint32_t, _mm_cvtsi128_si32(a_)); + #else + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + uint32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_u16 + #define vaddlvq_u16(a) simde_vaddlvq_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vaddlvq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddlvq_u32(a); + #else + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + uint64_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(+:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r += a_.values[i]; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddlvq_u32 + #define vaddlvq_u32(a) simde_vaddlvq_u32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDLV_H) */ +/* :: End simde/arm/neon/addlv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addl_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) +#define SIMDE_ARM_NEON_ADDL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_s8(a, b); + #else + return simde_vaddq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_s8 + #define vaddl_high_s8(a, b) simde_vaddl_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_s16(a, b); + #else + return simde_vaddq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_s16 + #define vaddl_high_s16(a, b) simde_vaddl_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_s32(a, b); + #else + return simde_vaddq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_s32 + #define vaddl_high_s32(a, b) simde_vaddl_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_u8(a, b); + #else + return simde_vaddq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_u8 + #define vaddl_high_u8(a, b) simde_vaddl_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_u16(a, b); + #else + return simde_vaddq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_u16 + #define vaddl_high_u16(a, b) simde_vaddl_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddl_high_u32(a, b); + #else + return simde_vaddq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddl_high_u32 + #define vaddl_high_u32(a, b) simde_vaddl_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDL_HIGH_H) */ +/* :: End simde/arm/neon/addl_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addw.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ADDW_H) +#define SIMDE_ARM_NEON_ADDW_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s16(a, simde_vmovl_s8(b)); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_s8 + #define vaddw_s8(a, b) simde_vaddw_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s32(a, simde_vmovl_s16(b)); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_s16 + #define vaddw_s16(a, b) simde_vaddw_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s64(a, simde_vmovl_s32(b)); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_s32 + #define vaddw_s32(a, b) simde_vaddw_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u16(a, simde_vmovl_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_u8 + #define vaddw_u8(a, b) simde_vaddw_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u32(a, simde_vmovl_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_u16 + #define vaddw_u16(a, b) simde_vaddw_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddw_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u64(a, simde_vmovl_u32(b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values += a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vaddw_u32 + #define vaddw_u32(a, b) simde_vaddw_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDW_H) */ +/* :: End simde/arm/neon/addw.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/addw_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) +#define SIMDE_ARM_NEON_ADDW_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s16(a, simde_vmovl_high_s8(b)); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s8 + #define vaddw_high_s8(a, b) simde_vaddw_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s32(a, simde_vmovl_high_s16(b)); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s16 + #define vaddw_high_s16(a, b) simde_vaddw_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vaddw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_s64(a, simde_vmovl_high_s32(b)); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_s32 + #define vaddw_high_s32(a, b) simde_vaddw_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u16(a, simde_vmovl_high_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u8 + #define vaddw_high_u8(a, b) simde_vaddw_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u32(a, simde_vmovl_high_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u16 + #define vaddw_high_u16(a, b) simde_vaddw_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vaddw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddw_high_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vaddq_u64(a, simde_vmovl_high_u32(b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddw_high_u32 + #define vaddw_high_u32(a, b) simde_vaddw_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDW_HIGH_H) */ +/* :: End simde/arm/neon/addw_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_AND_H) +#define SIMDE_ARM_NEON_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vand_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_s8 + #define vand_s8(a, b) simde_vand_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vand_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_s16 + #define vand_s16(a, b) simde_vand_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vand_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_s32 + #define vand_s32(a, b) simde_vand_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vand_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_s64 + #define vand_s64(a, b) simde_vand_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vand_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_u8 + #define vand_u8(a, b) simde_vand_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vand_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_u16 + #define vand_u16(a, b) simde_vand_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vand_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_u32 + #define vand_u32(a, b) simde_vand_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vand_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vand_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_and_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vand_u64 + #define vand_u64(a, b) simde_vand_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vandq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_s8 + #define vandq_s8(a, b) simde_vandq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vandq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_s16 + #define vandq_s16(a, b) simde_vandq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vandq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_s32 + #define vandq_s32(a, b) simde_vandq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vandq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_and(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_s64 + #define vandq_s64(a, b) simde_vandq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vandq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_u8 + #define vandq_u8(a, b) simde_vandq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vandq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_u16 + #define vandq_u16(a, b) simde_vandq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vandq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_u32 + #define vandq_u32(a, b) simde_vandq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vandq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_and(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vandq_u64 + #define vandq_u64(a, b) simde_vandq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_AND_H) */ +/* :: End simde/arm/neon/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/bcax.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Atharva Nimbalkar + */ + +#if !defined(SIMDE_ARM_NEON_BCAX_H) +#define SIMDE_ARM_NEON_BCAX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/eor.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_EOR_H) +#define SIMDE_ARM_NEON_EOR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_veor_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_s8 + #define veor_s8(a, b) simde_veor_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_veor_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_s16 + #define veor_s16(a, b) simde_veor_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_veor_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_s32 + #define veor_s32(a, b) simde_veor_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_veor_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_s64 + #define veor_s64(a, b) simde_veor_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_veor_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_u8 + #define veor_u8(a, b) simde_veor_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_veor_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_u16 + #define veor_u16(a, b) simde_veor_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_veor_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_u32 + #define veor_u32(a, b) simde_veor_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_veor_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veor_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_xor_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veor_u64 + #define veor_u64(a, b) simde_veor_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_veorq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_s8 + #define veorq_s8(a, b) simde_veorq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_veorq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_s16 + #define veorq_s16(a, b) simde_veorq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_veorq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_s32 + #define veorq_s32(a, b) simde_veorq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_veorq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_xor(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_s64 + #define veorq_s64(a, b) simde_veorq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_veorq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_u8 + #define veorq_u8(a, b) simde_veorq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_veorq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_u16 + #define veorq_u16(a, b) simde_veorq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_veorq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_xor(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_u32 + #define veorq_u32(a, b) simde_veorq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_veorq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return veorq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_xor(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_xor_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_xor(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values ^ b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] ^ b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef veorq_u64 + #define veorq_u64(a, b) simde_veorq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_EOR_H) */ +/* :: End simde/arm/neon/eor.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/bic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_BIC_H) +#define SIMDE_ARM_NEON_BIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/dup_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_DUP_N_H) +#define SIMDE_ARM_NEON_DUP_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vdup_n_f16(simde_float16 value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vdup_n_f16(value); + #else + simde_float16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_float16x4_from_private(r_); + #endif +} +#define simde_vmov_n_f16 simde_vdup_n_f16 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_f16 + #define vdup_n_f16(value) simde_vdup_n_f16((value)) + #undef vmov_n_f16 + #define vmov_n_f16(value) simde_vmov_n_f16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vdup_n_f32(float value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_f32(value); + #else + simde_float32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_float32x2_from_private(r_); + #endif +} +#define simde_vmov_n_f32 simde_vdup_n_f32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_f32 + #define vdup_n_f32(value) simde_vdup_n_f32((value)) + #undef vmov_n_f32 + #define vmov_n_f32(value) simde_vmov_n_f32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vdup_n_f64(double value) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vdup_n_f64(value); + #else + simde_float64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_float64x1_from_private(r_); + #endif +} +#define simde_vmov_n_f64 simde_vdup_n_f64 +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_n_f64 + #define vdup_n_f64(value) simde_vdup_n_f64((value)) + #undef vmov_n_f64 + #define vmov_n_f64(value) simde_vmov_n_f64((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vdup_n_s8(int8_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_s8(value); + #else + simde_int8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi8(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#define simde_vmov_n_s8 simde_vdup_n_s8 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_s8 + #define vdup_n_s8(value) simde_vdup_n_s8((value)) + #undef vmov_n_s8 + #define vmov_n_s8(value) simde_vmov_n_s8((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vdup_n_s16(int16_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_s16(value); + #else + simde_int16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi16(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#define simde_vmov_n_s16 simde_vdup_n_s16 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_s16 + #define vdup_n_s16(value) simde_vdup_n_s16((value)) + #undef vmov_n_s16 + #define vmov_n_s16(value) simde_vmov_n_s16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vdup_n_s32(int32_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_s32(value); + #else + simde_int32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi32(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#define simde_vmov_n_s32 simde_vdup_n_s32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_s32 + #define vdup_n_s32(value) simde_vdup_n_s32((value)) + #undef vmov_n_s32 + #define vmov_n_s32(value) simde_vmov_n_s32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vdup_n_s64(int64_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_s64(value); + #else + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_int64x1_from_private(r_); + #endif +} +#define simde_vmov_n_s64 simde_vdup_n_s64 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_s64 + #define vdup_n_s64(value) simde_vdup_n_s64((value)) + #undef vmov_n_s64 + #define vmov_n_s64(value) simde_vmov_n_s64((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vdup_n_u8(uint8_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_u8(value); + #else + simde_uint8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi8(HEDLEY_STATIC_CAST(int8_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#define simde_vmov_n_u8 simde_vdup_n_u8 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_u8 + #define vdup_n_u8(value) simde_vdup_n_u8((value)) + #undef vmov_n_u8 + #define vmov_n_u8(value) simde_vmov_n_u8((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vdup_n_u16(uint16_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_u16(value); + #else + simde_uint16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#define simde_vmov_n_u16 simde_vdup_n_u16 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_u16 + #define vdup_n_u16(value) simde_vdup_n_u16((value)) + #undef vmov_n_u16 + #define vmov_n_u16(value) simde_vmov_n_u16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vdup_n_u32(uint32_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_u32(value); + #else + simde_uint32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#define simde_vmov_n_u32 simde_vdup_n_u32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_u32 + #define vdup_n_u32(value) simde_vdup_n_u32((value)) + #undef vmov_n_u32 + #define vmov_n_u32(value) simde_vmov_n_u32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vdup_n_u64(uint64_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdup_n_u64(value); + #else + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_uint64x1_from_private(r_); + #endif +} +#define simde_vmov_n_u64 simde_vdup_n_u64 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_n_u64 + #define vdup_n_u64(value) simde_vdup_n_u64((value)) + #undef vmov_n_u64 + #define vmov_n_u64(value) simde_vmov_n_u64((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vdupq_n_f16(simde_float16 value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vdupq_n_f16(value); + #else + simde_float16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + + return simde_float16x8_from_private(r_); + #endif +} +#define simde_vmovq_n_f32 simde_vdupq_n_f32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_f16 + #define vdupq_n_f16(value) simde_vdupq_n_f16((value)) + #undef vmovq_n_f16 + #define vmovq_n_f16(value) simde_vmovq_n_f16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vdupq_n_f32(float value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) value; + return vec_splats(value); + #else + simde_float32x4_private r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_set1_ps(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#define simde_vmovq_n_f32 simde_vdupq_n_f32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_f32 + #define vdupq_n_f32(value) simde_vdupq_n_f32((value)) + #undef vmovq_n_f32 + #define vmovq_n_f32(value) simde_vmovq_n_f32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vdupq_n_f64(double value) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vdupq_n_f64(value); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + (void) value; + return vec_splats(value); + #else + simde_float64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_set1_pd(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#define simde_vmovq_n_f64 simde_vdupq_n_f64 +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_f64 + #define vdupq_n_f64(value) simde_vdupq_n_f64((value)) + #undef vmovq_n_f64 + #define vmovq_n_f64(value) simde_vmovq_n_f64((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vdupq_n_s8(int8_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_s8(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_int8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi8(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#define simde_vmovq_n_s8 simde_vdupq_n_s8 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_s8 + #define vdupq_n_s8(value) simde_vdupq_n_s8((value)) + #undef vmovq_n_s8 + #define vmovq_n_s8(value) simde_vmovq_n_s8((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vdupq_n_s16(int16_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_s16(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_int16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi16(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#define simde_vmovq_n_s16 simde_vdupq_n_s16 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_s16 + #define vdupq_n_s16(value) simde_vdupq_n_s16((value)) + #undef vmovq_n_s16 + #define vmovq_n_s16(value) simde_vmovq_n_s16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdupq_n_s32(int32_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_s32(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_int32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi32(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#define simde_vmovq_n_s32 simde_vdupq_n_s32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_s32 + #define vdupq_n_s32(value) simde_vdupq_n_s32((value)) + #undef vmovq_n_s32 + #define vmovq_n_s32(value) simde_vmovq_n_s32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vdupq_n_s64(int64_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_s64(value); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(HEDLEY_STATIC_CAST(signed long long, value)); + #else + simde_int64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + r_.m128i = _mm_set1_epi64x(value); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_splat(value); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#define simde_vmovq_n_s64 simde_vdupq_n_s64 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_s64 + #define vdupq_n_s64(value) simde_vdupq_n_s64((value)) + #undef vmovq_n_s64 + #define vmovq_n_s64(value) simde_vmovq_n_s64((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vdupq_n_u8(uint8_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_u8(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_uint8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #elif defined (SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#define simde_vmovq_n_u8 simde_vdupq_n_u8 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_u8 + #define vdupq_n_u8(value) simde_vdupq_n_u8((value)) + #undef vmovq_n_u8 + #define vmovq_n_u8(value) simde_vmovq_n_u8((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vdupq_n_u16(uint16_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_u16(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #elif defined (SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#define simde_vmovq_n_u16 simde_vdupq_n_u16 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_u16 + #define vdupq_n_u16(value) simde_vdupq_n_u16((value)) + #undef vmovq_n_u16 + #define vmovq_n_u16(value) simde_vmovq_n_u16((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdupq_n_u32(uint32_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_u32(value); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(value); + #else + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #elif defined (SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#define simde_vmovq_n_u32 simde_vdupq_n_u32 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_u32 + #define vdupq_n_u32(value) simde_vdupq_n_u32((value)) + #undef vmovq_n_u32 + #define vmovq_n_u32(value) simde_vmovq_n_u32((value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vdupq_n_u64(uint64_t value) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_u64(value); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value)); + #else + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + r_.m128i = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #elif defined (SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, value)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = value; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#define simde_vmovq_n_u64 simde_vdupq_n_u64 +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_n_u64 + #define vdupq_n_u64(value) simde_vdupq_n_u64((value)) + #undef vmovq_n_u64 + #define vmovq_n_u64(value) simde_vmovq_n_u64((value)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_DUP_N_H) */ +/* :: End simde/arm/neon/dup_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_s8 + #define vbic_s8(a, b) simde_vbic_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_s16 + #define vbic_s16(a, b) simde_vbic_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_s32 + #define vbic_s32(a, b) simde_vbic_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_s64 + #define vbic_s64(a, b) simde_vbic_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_u8(a, b); + #else + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_u8 + #define vbic_u8(a, b) simde_vbic_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_u16(a, b); + #else + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_u16 + #define vbic_u16(a, b) simde_vbic_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_u32(a, b); + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_u32 + #define vbic_u32(a, b) simde_vbic_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbic_u64(a, b); + #else + simde_uint64x1_private + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b), + r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(b_.m64, a_.m64); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbic_u64 + #define vbic_u64(a, b) simde_vbic_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_s8 + #define vbicq_s8(a, b) simde_vbicq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_s16 + #define vbicq_s16(a, b) simde_vbicq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_s32 + #define vbicq_s32(a, b) simde_vbicq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_andc(a, b); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_s64 + #define vbicq_s64(a, b) simde_vbicq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_u8 + #define vbicq_u8(a, b) simde_vbicq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_uint16x8_private + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_u16 + #define vbicq_u16(a, b) simde_vbicq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_andc(a, b); + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_u32 + #define vbicq_u32(a, b) simde_vbicq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbicq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_andc(a, b); + #else + simde_uint64x2_private + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(b_.m128i, a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_andnot(a_.v128, b_.v128); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & ~b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbicq_u64 + #define vbicq_u64(a, b) simde_vbicq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_BIC_H) */ +/* :: End simde/arm/neon/bic.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vbcaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_u8(a, b, c); + #else + return simde_veorq_u8(a, simde_vbicq_u8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_u8 + #define vbcaxq_u8(a, b, c) simde_vbcaxq_u8(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vbcaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_u16(a, b, c); + #else + return simde_veorq_u16(a, simde_vbicq_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_u16 + #define vbcaxq_u16(a, b, c) simde_vbcaxq_u16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vbcaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_u32(a, b, c); + #else + return simde_veorq_u32(a, simde_vbicq_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_u32 + #define vbcaxq_u32(a, b, c) simde_vbcaxq_u32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vbcaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_u64(a, b, c); + #else + return simde_veorq_u64(a, simde_vbicq_u64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_u64 + #define vbcaxq_u64(a, b, c) simde_vbcaxq_u64(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vbcaxq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_s8(a, b, c); + #else + return simde_veorq_s8(a, simde_vbicq_s8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_s8 + #define vbcaxq_s8(a, b, c) simde_vbcaxq_s8(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vbcaxq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_s16(a, b, c); + #else + return simde_veorq_s16(a,simde_vbicq_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_s16 + #define vbcaxq_s16(a, b, c) simde_vbcaxq_s16(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vbcaxq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_s32(a, b, c); + #else + return simde_veorq_s32(a, simde_vbicq_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_s32 + #define vbcaxq_s32(a, b, c) simde_vbcaxq_s32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vbcaxq_s64(simde_int64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + return vbcaxq_s64(a, b, c); + #else + return simde_veorq_s64(a, simde_vbicq_s64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vbcaxq_s64 + #define vbcaxq_s64(a, b, c) simde_vbcaxq_s64(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_BCAX_H) */ +/* :: End simde/arm/neon/bcax.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/bsl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_BSL_H) +#define SIMDE_ARM_NEON_BSL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vbsl_f16(simde_uint16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vbsl_f16(a, b, c); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(b)), + c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_f16(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_f16_u16(simde_uint16x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_f16 + #define vbsl_f16(a, b, c) simde_vbsl_f16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vbsl_f32(simde_uint32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_f32(a, b, c); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(b)), + c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_f32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_f32_u32(simde_uint32x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_f32 + #define vbsl_f32(a, b, c) simde_vbsl_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vbsl_f64(simde_uint64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vbsl_f64(a, b, c); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(b)), + c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_f64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_f64_u64(simde_uint64x1_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vbsl_f64 + #define vbsl_f64(a, b, c) simde_vbsl_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vbsl_s8(simde_uint8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_s8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(b)), + c_ = simde_uint8x8_to_private(simde_vreinterpret_u8_s8(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_s8_u8(simde_uint8x8_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_s8 + #define vbsl_s8(a, b, c) simde_vbsl_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vbsl_s16(simde_uint16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_s16(a, b, c); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(b)), + c_ = simde_uint16x4_to_private(simde_vreinterpret_u16_s16(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_s16_u16(simde_uint16x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_s16 + #define vbsl_s16(a, b, c) simde_vbsl_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vbsl_s32(simde_uint32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_s32(a, b, c); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(b)), + c_ = simde_uint32x2_to_private(simde_vreinterpret_u32_s32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_s32_u32(simde_uint32x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_s32 + #define vbsl_s32(a, b, c) simde_vbsl_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vbsl_s64(simde_uint64x1_t a, simde_int64x1_t b, simde_int64x1_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_s64(a, b, c); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(b)), + c_ = simde_uint64x1_to_private(simde_vreinterpret_u64_s64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpret_s64_u64(simde_uint64x1_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_s64 + #define vbsl_s64(a, b, c) simde_vbsl_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vbsl_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_u8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b), + c_ = simde_uint8x8_to_private(c); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_u8 + #define vbsl_u8(a, b, c) simde_vbsl_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vbsl_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_u16(a, b, c); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b), + c_ = simde_uint16x4_to_private(c); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_u16 + #define vbsl_u16(a, b, c) simde_vbsl_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vbsl_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_u32(a, b, c); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b), + c_ = simde_uint32x2_to_private(c); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_u32 + #define vbsl_u32(a, b, c) simde_vbsl_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vbsl_u64(simde_uint64x1_t a, simde_uint64x1_t b, simde_uint64x1_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbsl_u64(a, b, c); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b), + c_ = simde_uint64x1_to_private(c); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbsl_u64 + #define vbsl_u64(a, b, c) simde_vbsl_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vbslq_f16(simde_uint16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vbslq_f16(a, b, c); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(b)), + c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_f16(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_f16_u16(simde_uint16x8_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_f16 + #define vbslq_f16(a, b, c) simde_vbslq_f16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vbslq_f32(simde_uint32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_f32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(b)), + c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_f32(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_f32_u32(simde_uint32x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_f32 + #define vbslq_f32(a, b, c) simde_vbslq_f32((a), (b), (c)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vbslq_f64(simde_uint64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vbslq_f64(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(b)), + c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_f64(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_f64_u64(simde_uint64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vbslq_f64 + #define vbslq_f64(a, b, c) simde_vbslq_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vbslq_s8(simde_uint8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_s8(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(b)), + c_ = simde_uint8x16_to_private(simde_vreinterpretq_u8_s8(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_s8_u8(simde_uint8x16_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_s8 + #define vbslq_s8(a, b, c) simde_vbslq_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vbslq_s16(simde_uint16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_s16(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(b)), + c_ = simde_uint16x8_to_private(simde_vreinterpretq_u16_s16(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_s16_u16(simde_uint16x8_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_s16 + #define vbslq_s16(a, b, c) simde_vbslq_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vbslq_s32(simde_uint32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_s32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(b)), + c_ = simde_uint32x4_to_private(simde_vreinterpretq_u32_s32(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_s32_u32(simde_uint32x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_s32 + #define vbslq_s32(a, b, c) simde_vbslq_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vbslq_s64(simde_uint64x2_t a, simde_int64x2_t b, simde_int64x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_s64(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return + simde_vreinterpretq_s64_s32( + simde_vbslq_s32( + simde_vreinterpretq_u32_u64(a), + simde_vreinterpretq_s32_s64(b), + simde_vreinterpretq_s32_s64(c) + ) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), c), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a)); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(b)), + c_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(c)); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_vreinterpretq_s64_u64(simde_uint64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_s64 + #define vbslq_s64(a, b, c) simde_vbslq_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vbslq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_u8(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b), + c_ = simde_uint8x16_to_private(c); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_u8 + #define vbslq_u8(a, b, c) simde_vbslq_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vbslq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_u16(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b), + c_ = simde_uint16x8_to_private(c); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_u16 + #define vbslq_u16(a, b, c) simde_vbslq_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vbslq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_u32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_sel(c, b, a); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b), + c_ = simde_uint32x4_to_private(c); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_u32 + #define vbslq_u32(a, b, c) simde_vbslq_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vbslq_u64(simde_uint64x2_t a, simde_uint64x2_t b, simde_uint64x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vbslq_u64(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return + simde_vreinterpretq_u64_u32( + simde_vbslq_u32( + simde_vreinterpretq_u32_u64(a), + simde_vreinterpretq_u32_u64(b), + simde_vreinterpretq_u32_u64(c) + ) + ); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b), + c_ = simde_uint64x2_to_private(c); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_bitselect(b_.v128, c_.v128, a_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, c_.m128i, 0xca); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = c_.values ^ ((b_.values ^ c_.values) & a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] & a_.values[i]) | (c_.values[i] & ~a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vbslq_u64 + #define vbslq_u64(a, b, c) simde_vbslq_u64((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_BSL_H) */ +/* :: End simde/arm/neon/bsl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cage.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + * 2021 Atharva Nimbalkar + */ + +#if !defined(SIMDE_ARM_NEON_CAGE_H) +#define SIMDE_ARM_NEON_CAGE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cge.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CGE_H) +#define SIMDE_ARM_NEON_CGE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcgeh_f16(simde_float16_t a, simde_float16_t b){ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(uint16_t, vcgeh_f16(a, b)); + #else + return (simde_float16_to_float32(a) >= simde_float16_to_float32(b)) ? UINT16_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgeh_f16 + #define vcgeh_f16(a, b) simde_vcgeh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgeq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcgeq_f16(a, b); + #else + simde_float16x8_private + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcgeq_f16 + #define vcgeq_f16(a, b) simde_vcgeq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgeq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); + #else + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_cmpge_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_f32 + #define vcgeq_f32(a, b) simde_vcgeq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgeq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgeq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); + #else + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgeq_f64 + #define vcgeq_f64(a, b) simde_vcgeq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgeq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + simde_uint8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(a_.m128i, b_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_s8 + #define vcgeq_s8(a, b) simde_vcgeq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgeq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(a_.m128i, b_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_s16 + #define vcgeq_s16(a, b) simde_vcgeq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgeq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, b_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_s32 + #define vcgeq_s32(a, b) simde_vcgeq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgeq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgeq_s64(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(a_.m128i, b_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgeq_s64 + #define vcgeq_s64(a, b) simde_vcgeq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgeq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a, b)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = + _mm_cmpeq_epi8( + _mm_min_epu8(b_.m128i, a_.m128i), + b_.m128i + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_u8 + #define vcgeq_u8(a, b) simde_vcgeq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgeq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a, b)); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_cmpeq_epi16( + _mm_min_epu16(b_.m128i, a_.m128i), + b_.m128i + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi16(INT16_MIN); + r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_u16 + #define vcgeq_u16(a, b) simde_vcgeq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgeq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgeq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a, b)); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_cmpeq_epi32( + _mm_min_epu32(b_.m128i, a_.m128i), + b_.m128i + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi32(INT32_MIN); + r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_ge(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgeq_u32 + #define vcgeq_u32(a, b) simde_vcgeq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgeq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgeq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a, b)); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = + _mm_cmpeq_epi64( + _mm_min_epu64(b_.m128i, a_.m128i), + b_.m128i + ); + #elif defined(SIMDE_X86_SSE4_2_NATIVE) + __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); + r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgeq_u64 + #define vcgeq_u64(a, b) simde_vcgeq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcge_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcge_f16(a, b); + #else + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgeh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcge_f16 + #define vcge_f16(a, b) simde_vcge_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcge_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_f32(a, b); + #else + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_f32 + #define vcge_f32(a, b) simde_vcge_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcge_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcge_f64(a, b); + #else + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcge_f64 + #define vcge_f64(a, b) simde_vcge_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcge_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + simde_uint8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(a_.m64, b_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_s8 + #define vcge_s8(a, b) simde_vcge_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcge_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + simde_uint16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(a_.m64, b_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_s16 + #define vcge_s16(a, b) simde_vcge_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcge_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(a_.m64, b_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_s32 + #define vcge_s32(a, b) simde_vcge_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcge_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcge_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcge_s64 + #define vcge_s64(a, b) simde_vcge_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcge_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi8(INT8_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_u8 + #define vcge_u8(a, b) simde_vcge_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcge_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi16(INT16_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_u16 + #define vcge_u16(a, b) simde_vcge_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcge_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcge_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi32(INT32_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bits), _mm_xor_si64(b_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcge_u32 + #define vcge_u32(a, b) simde_vcge_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcge_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcge_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= b_.values[i]) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcge_u64 + #define vcge_u64(a, b) simde_vcge_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcged_f64(simde_float64_t a, simde_float64_t b){ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcged_f64(a, b)); + #else + return (a >= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcged_f64 + #define vcged_f64(a, b) simde_vcged_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcged_s64(int64_t a, int64_t b){ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcged_s64(a, b)); + #else + return (a >= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcged_s64 + #define vcged_s64(a, b) simde_vcged_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcged_u64(uint64_t a, uint64_t b){ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcged_u64(a, b)); + #else + return (a >= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcged_u64 + #define vcged_u64(a, b) simde_vcged_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcges_f32(simde_float32_t a, simde_float32_t b){ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcges_f32(a, b)); + #else + return (a >= b) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcges_f32 + #define vcges_f32(a, b) simde_vcges_f32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CGE_H) */ +/* :: End simde/arm/neon/cge.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcageh_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcageh_f16(a, b); + #else + simde_float32_t a_ = simde_float16_to_float32(a); + simde_float32_t b_ = simde_float16_to_float32(b); + return (simde_math_fabsf(a_) >= simde_math_fabsf(b_)) ? UINT16_MAX : UINT16_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcageh_f16 + #define vcageh_f16(a, b) simde_vcageh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcages_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcages_f32(a, b); + #else + return (simde_math_fabsf(a) >= simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcages_f32 + #define vcages_f32(a, b) simde_vcages_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcaged_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcaged_f64(a, b); + #else + return (simde_math_fabs(a) >= simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcaged_f64 + #define vcaged_f64(a, b) simde_vcaged_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcage_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcage_f16(a, b); + #else + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcage_f16 + #define vcage_f16(a, b) simde_vcage_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcage_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcage_f32(a, b); + #else + return simde_vcge_f32(simde_vabs_f32(a), simde_vabs_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcage_f32 + #define vcage_f32(a, b) simde_vcage_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcage_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcage_f64(a, b); + #else + return simde_vcge_f64(simde_vabs_f64(a), simde_vabs_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcage_f64 + #define vcage_f64(a, b) simde_vcage_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcageq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcageq_f16(a, b); + #else + simde_float16x8_private + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcageh_f16(a_.values[i], b_.values[i]); + } + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcageq_f16 + #define vcageq_f16(a, b) simde_vcageq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcageq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcageq_f32(a, b); + #else + return simde_vcgeq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcageq_f32 + #define vcageq_f32(a, b) simde_vcageq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcageq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcageq_f64(a, b); + #else + return simde_vcgeq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcageq_f64 + #define vcageq_f64(a, b) simde_vcageq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CAGE_H) */ +/* :: End simde/arm/neon/cage.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cagt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_CAGT_H) +#define SIMDE_ARM_NEON_CAGT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cgt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CGT_H) +#define SIMDE_ARM_NEON_CGT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/get_low.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_GET_LOW_H) +#define SIMDE_ARM_NEON_GET_LOW_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vget_low_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_f32(a); + #else + simde_float32x2_private r_; + simde_float32x4_private a_ = simde_float32x4_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_f32 + #define vget_low_f32(a) simde_vget_low_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vget_low_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vget_low_f64(a); + #else + simde_float64x1_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vget_low_f64 + #define vget_low_f64(a) simde_vget_low_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vget_low_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_s8(a); + #else + simde_int8x8_private r_; + simde_int8x16_private a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_s8 + #define vget_low_s8(a) simde_vget_low_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vget_low_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_s16(a); + #else + simde_int16x4_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_s16 + #define vget_low_s16(a) simde_vget_low_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vget_low_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_s32(a); + #else + simde_int32x2_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_s32 + #define vget_low_s32(a) simde_vget_low_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vget_low_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_s64(a); + #else + simde_int64x1_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_s64 + #define vget_low_s64(a) simde_vget_low_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vget_low_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_u8(a); + #else + simde_uint8x8_private r_; + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_u8 + #define vget_low_u8(a) simde_vget_low_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vget_low_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_u16(a); + #else + simde_uint16x4_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_u16 + #define vget_low_u16(a) simde_vget_low_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vget_low_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_u32(a); + #else + simde_uint32x2_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_u32 + #define vget_low_u32(a) simde_vget_low_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vget_low_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vget_low_u64(a); + #else + simde_uint64x1_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_movepi64_pi64(a_.m128i); + #else + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.values = __builtin_shufflevector(a_.values, a_.values, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i]; + } + #endif + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_low_u64 + #define vget_low_u64(a) simde_vget_low_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_GET_LOW_H) */ +/* :: End simde/arm/neon/get_low.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgtd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgtd_f64(a, b)); + #else + return (a > b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtd_f64 + #define vcgtd_f64(a, b) simde_vcgtd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgtd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgtd_s64(a, b)); + #else + return (a > b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtd_s64 + #define vcgtd_s64(a, b) simde_vcgtd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgtd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgtd_u64(a, b)); + #else + return (a > b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtd_u64 + #define vcgtd_u64(a, b) simde_vcgtd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcgts_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcgts_f32(a, b)); + #else + return (a > b) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgts_f32 + #define vcgts_f32(a, b) simde_vcgts_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgtq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); + #else + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_f32 + #define vcgtq_f32(a, b) simde_vcgtq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgtq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); + #else + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtq_f64 + #define vcgtq_f64(a, b) simde_vcgtq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgtq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + simde_uint8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpgt_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_s8 + #define vcgtq_s8(a, b) simde_vcgtq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgtq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpgt_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_s16 + #define vcgtq_s16(a, b) simde_vcgtq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgtq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpgt_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_s32 + #define vcgtq_s32(a, b) simde_vcgtq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgtq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtq_s64(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_cmpgt_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a_.m128i, b_.m128i), _mm_sub_epi64(b_.m128i, a_.m128i)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a_.m128i, b_.m128i)); + r_.m128i = _mm_shuffle_epi32(r, _MM_SHUFFLE(3,3,1,1)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtq_s64 + #define vcgtq_s64(a, b) simde_vcgtq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgtq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpgt(a, b)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu8(a_.m128i, b_.m128i); + r_.m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_u8 + #define vcgtq_u8(a, b) simde_vcgtq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgtq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpgt(a, b)); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu16(a_.m128i, b_.m128i); + r_.m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_u16 + #define vcgtq_u16(a, b) simde_vcgtq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgtq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgtq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpgt(a, b)); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = + _mm_xor_si128( + _mm_cmpgt_epi32(a_.m128i, b_.m128i), + _mm_srai_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 31) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_gt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgtq_u32 + #define vcgtq_u32(a, b) simde_vcgtq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgtq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a, b)); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + __m128i sign_bit = _mm_set1_epi64x(INT64_MIN); + r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(a_.m128i, sign_bit), _mm_xor_si128(b_.m128i, sign_bit)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtq_u64 + #define vcgtq_u64(a, b) simde_vcgtq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgt_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_f32(a, b); + #else + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgts_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_f32 + #define vcgt_f32(a, b) simde_vcgt_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgt_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgt_f64(a, b); + #else + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgt_f64 + #define vcgt_f64(a, b) simde_vcgt_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcgt_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + simde_uint8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_s8 + #define vcgt_s8(a, b) simde_vcgt_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgt_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + simde_uint16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_s16 + #define vcgt_s16(a, b) simde_vcgt_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgt_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_s32 + #define vcgt_s32(a, b) simde_vcgt_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgt_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgt_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgt_s64 + #define vcgt_s64(a, b) simde_vcgt_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcgt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bit = _mm_set1_pi8(INT8_MIN); + r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_u8 + #define vcgt_u8(a, b) simde_vcgt_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bit = _mm_set1_pi16(INT16_MIN); + r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_u16 + #define vcgt_u16(a, b) simde_vcgt_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcgt_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bit = _mm_set1_pi32(INT32_MIN); + r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(a_.m64, sign_bit), _mm_xor_si64(b_.m64, sign_bit)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcgt_u32 + #define vcgt_u32(a, b) simde_vcgt_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgt_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgt_u64 + #define vcgt_u64(a, b) simde_vcgt_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CGT_H) */ +/* :: End simde/arm/neon/cgt.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcagth_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcagth_f16(a, b); + #else + simde_float32_t + af = simde_float16_to_float32(a), + bf = simde_float16_to_float32(b); + return (simde_math_fabsf(af) > simde_math_fabsf(bf)) ? UINT16_MAX : UINT16_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcagth_f16 + #define vcagth_f16(a, b) simde_vcagth_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcagts_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcagts_f32(a, b); + #else + return (simde_math_fabsf(a) > simde_math_fabsf(b)) ? ~UINT32_C(0) : UINT32_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcagts_f32 + #define vcagts_f32(a, b) simde_vcagts_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcagtd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcagtd_f64(a, b); + #else + return (simde_math_fabs(a) > simde_math_fabs(b)) ? ~UINT64_C(0) : UINT64_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcagtd_f64 + #define vcagtd_f64(a, b) simde_vcagtd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcagt_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcagt_f16(a, b); + #else + simde_uint16x4_private r_; + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcagt_f16 + #define vcagt_f16(a, b) simde_vcagt_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcagt_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcagt_f32(a, b); + #else + return simde_vcgt_f32(simde_vabs_f32(a), simde_vabs_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcagt_f32 + #define vcagt_f32(a, b) simde_vcagt_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcagt_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcagt_f64(a, b); + #else + return simde_vcgt_f64(simde_vabs_f64(a), simde_vabs_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcagt_f64 + #define vcagt_f64(a, b) simde_vcagt_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcagtq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcagtq_f16(a, b); + #else + simde_uint16x8_private r_; + simde_float16x8_private + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcagth_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcagtq_f16 + #define vcagtq_f16(a, b) simde_vcagtq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcagtq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcagtq_f32(a, b); + #else + return simde_vcgtq_f32(simde_vabsq_f32(a), simde_vabsq_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcagtq_f32 + #define vcagtq_f32(a, b) simde_vcagtq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcagtq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcagtq_f64(a, b); + #else + return simde_vcgtq_f64(simde_vabsq_f64(a), simde_vabsq_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcagtq_f64 + #define vcagtq_f64(a, b) simde_vcagtq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CAGT_H) */ +/* :: End simde/arm/neon/cagt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ceq.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CEQ_H) +#define SIMDE_ARM_NEON_CEQ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vceqh_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceqh_f16(a, b); + #else + return (simde_float16_to_float32(a) == simde_float16_to_float32(b)) ? UINT16_MAX : UINT16_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqh_f16 + #define vceqh_f16(a, b) simde_vceqh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vceqs_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqs_f32(a, b); + #else + return (a == b) ? ~UINT32_C(0) : UINT32_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqs_f32 + #define vceqs_f32(a, b) simde_vceqs_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqd_f64(a, b); + #else + return (a == b) ? ~UINT64_C(0) : UINT64_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqd_f64 + #define vceqd_f64(a, b) simde_vceqd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vceqd_s64(a, b)); + #else + return (a == b) ? ~UINT64_C(0) : UINT64_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqd_s64 + #define vceqd_s64(a, b) simde_vceqd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqd_u64(a, b); + #else + return (a == b) ? ~UINT64_C(0) : UINT64_C(0); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqd_u64 + #define vceqd_u64(a, b) simde_vceqd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceq_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceq_f16(a, b); + #else + simde_uint16x4_private r_; + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]); + } + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vceq_f16 + #define vceq_f16(a, b) simde_vceq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceq_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_f32(a, b); + #else + simde_uint32x2_private r_; + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_f32 + #define vceq_f32(a, b) simde_vceq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceq_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceq_f64(a, b); + #else + simde_uint64x1_private r_; + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceq_f64 + #define vceq_f64(a, b) simde_vceq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vceq_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_s8(a, b); + #else + simde_uint8x8_private r_; + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpeq_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_s8 + #define vceq_s8(a, b) simde_vceq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceq_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_s16(a, b); + #else + simde_uint16x4_private r_; + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpeq_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_s16 + #define vceq_s16(a, b) simde_vceq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceq_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_s32(a, b); + #else + simde_uint32x2_private r_; + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpeq_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_s32 + #define vceq_s32(a, b) simde_vceq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceq_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceq_s64(a, b); + #else + simde_uint64x1_private r_; + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_s64 + #define vceq_s64(a, b) simde_vceq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vceq_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_u8(a, b); + #else + simde_uint8x8_private r_; + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_u8 + #define vceq_u8(a, b) simde_vceq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceq_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_u16(a, b); + #else + simde_uint16x4_private r_; + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_u16 + #define vceq_u16(a, b) simde_vceq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceq_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceq_u32(a, b); + #else + simde_uint32x2_private r_; + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_u32 + #define vceq_u32(a, b) simde_vceq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceq_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceq_u64(a, b); + #else + simde_uint64x1_private r_; + simde_uint64x1_private + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceq_u64 + #define vceq_u64(a, b) simde_vceq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceqq_f16(a, b); + #else + simde_uint16x8_private r_; + simde_float16x8_private + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vceqh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vceqq_f16 + #define vceqq_f16(a, b) simde_vceqq_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); + #else + simde_uint32x4_private r_; + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_cmpeq_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_eq(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_f32 + #define vceqq_f32(a, b) simde_vceqq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); + #else + simde_uint64x2_private r_; + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castpd_si128(_mm_cmpeq_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_eq(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqq_f64 + #define vceqq_f64(a, b) simde_vceqq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vceqq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); + #else + simde_uint8x16_private r_; + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_eq(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_s8 + #define vceqq_s8(a, b) simde_vceqq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); + #else + simde_uint16x8_private r_; + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_eq(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_s16 + #define vceqq_s16(a, b) simde_vceqq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); + #else + simde_uint32x4_private r_; + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_eq(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_s32 + #define vceqq_s32(a, b) simde_vceqq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); + #else + simde_uint64x2_private r_; + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_s64 + #define vceqq_s64(a, b) simde_vceqq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vceqq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(a, b)); + #else + simde_uint8x16_private r_; + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_u8 + #define vceqq_u8(a, b) simde_vceqq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpeq(a, b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_u16 + #define vceqq_u16(a, b) simde_vceqq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vceqq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpeq(a, b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmpeq_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_u32 + #define vceqq_u32(a, b) simde_vceqq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpeq(a, b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_cmpeq_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == b_.values[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqq_u64 + #define vceqq_u64(a, b) simde_vceqq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CEQ_H) */ +/* :: End simde/arm/neon/ceq.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ceqz.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_CEQZ_H) +#define SIMDE_ARM_NEON_CEQZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceqz_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceqz_f16(a); + #else + return simde_vceq_f16(a, simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vceqz_f16 + #define vceqz_f16(a) simde_vceqz_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceqz_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_f32(a); + #else + return simde_vceq_f32(a, simde_vdup_n_f32(0.0f)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_f32 + #define vceqz_f32(a) simde_vceqz_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceqz_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_f64(a); + #else + return simde_vceq_f64(a, simde_vdup_n_f64(0.0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqz_f64 + #define vceqz_f64(a) simde_vceqz_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vceqz_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_s8(a); + #else + return simde_vceq_s8(a, simde_vdup_n_s8(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_s8 + #define vceqz_s8(a) simde_vceqz_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceqz_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_s16(a); + #else + return simde_vceq_s16(a, simde_vdup_n_s16(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_s16 + #define vceqz_s16(a) simde_vceqz_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceqz_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_s32(a); + #else + return simde_vceq_s32(a, simde_vdup_n_s32(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_s32 + #define vceqz_s32(a) simde_vceqz_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceqz_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_s64(a); + #else + return simde_vceq_s64(a, simde_vdup_n_s64(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_s64 + #define vceqz_s64(a) simde_vceqz_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vceqz_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_u8(a); + #else + return simde_vceq_u8(a, simde_vdup_n_u8(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_u8 + #define vceqz_u8(a) simde_vceqz_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vceqz_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_u16(a); + #else + return simde_vceq_u16(a, simde_vdup_n_u16(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_u16 + #define vceqz_u16(a) simde_vceqz_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vceqz_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_u32(a); + #else + return simde_vceq_u32(a, simde_vdup_n_u32(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_u32 + #define vceqz_u32(a) simde_vceqz_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vceqz_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqz_u64(a); + #else + return simde_vceq_u64(a, simde_vdup_n_u64(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqz_u64 + #define vceqz_u64(a) simde_vceqz_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqzq_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceqzq_f16(a); + #else + return simde_vceqq_f16(a, simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vceqzq_f16 + #define vceqzq_f16(a) simde_vceqzq_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqzq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_f32(a); + #else + return simde_vceqq_f32(a, simde_vdupq_n_f32(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_f32 + #define vceqzq_f32(a) simde_vceqzq_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqzq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_f64(a); + #else + return simde_vceqq_f64(a, simde_vdupq_n_f64(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqzq_f64 + #define vceqzq_f64(a) simde_vceqzq_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vceqzq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_s8(a); + #else + return simde_vceqq_s8(a, simde_vdupq_n_s8(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_s8 + #define vceqzq_s8(a) simde_vceqzq_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqzq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_s16(a); + #else + return simde_vceqq_s16(a, simde_vdupq_n_s16(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_s16 + #define vceqzq_s16(a) simde_vceqzq_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqzq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_s32(a); + #else + return simde_vceqq_s32(a, simde_vdupq_n_s32(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_s32 + #define vceqzq_s32(a) simde_vceqzq_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqzq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_s64(a); + #else + return simde_vceqq_s64(a, simde_vdupq_n_s64(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_s64 + #define vceqzq_s64(a) simde_vceqzq_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vceqzq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_u8(a); + #else + return simde_vceqq_u8(a, simde_vdupq_n_u8(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_u8 + #define vceqzq_u8(a) simde_vceqzq_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vceqzq_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_u16(a); + #else + return simde_vceqq_u16(a, simde_vdupq_n_u16(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_u16 + #define vceqzq_u16(a) simde_vceqzq_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vceqzq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_u32(a); + #else + return simde_vceqq_u32(a, simde_vdupq_n_u32(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_u32 + #define vceqzq_u32(a) simde_vceqzq_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vceqzq_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzq_u64(a); + #else + return simde_vceqq_u64(a, simde_vdupq_n_u64(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzq_u64 + #define vceqzq_u64(a) simde_vceqzq_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqzd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vceqzd_s64(a)); + #else + return simde_vceqd_s64(a, INT64_C(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzd_s64 + #define vceqzd_s64(a) simde_vceqzd_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqzd_u64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzd_u64(a); + #else + return simde_vceqd_u64(a, UINT64_C(0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzd_u64 + #define vceqzd_u64(a) simde_vceqzd_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vceqzh_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vceqzh_f16(a); + #else + return simde_vceqh_f16(a, SIMDE_FLOAT16_VALUE(0.0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vceqzh_f16 + #define vceqzh_f16(a) simde_vceqzh_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vceqzs_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzs_f32(a); + #else + return simde_vceqs_f32(a, SIMDE_FLOAT32_C(0.0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzs_f32 + #define vceqzs_f32(a) simde_vceqzs_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vceqzd_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vceqzd_f64(a); + #else + return simde_vceqd_f64(a, SIMDE_FLOAT64_C(0.0)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vceqzd_f64 + #define vceqzd_f64(a) simde_vceqzd_f64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CEQZ_H) */ +/* :: End simde/arm/neon/ceqz.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cgez.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CGEZ_H) +#define SIMDE_ARM_NEON_CGEZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgezd_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgezd_f64(a)); + #else + return (a >= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezd_f64 + #define vcgezd_f64(a) simde_vcgezd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgezd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgezd_s64(a)); + #else + return (a >= 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezd_s64 + #define vcgezd_s64(a) simde_vcgezd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcgezs_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcgezs_f32(a)); + #else + return (a >= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezs_f32 + #define vcgezs_f32(a) simde_vcgezs_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgezq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezs_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_f32 + #define vcgezq_f32(a) simde_vcgezq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgezq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezd_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_f64 + #define vcgezq_f64(a) simde_vcgezq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgezq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_s8(a, simde_vdupq_n_s8(0)); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_uint8x16_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_s8 + #define vcgezq_s8(a) simde_vcgezq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgezq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_s16(a, simde_vdupq_n_s16(0)); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_s16 + #define vcgezq_s16(a) simde_vcgezq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgezq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_s32(a, simde_vdupq_n_s32(0)); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_s32 + #define vcgezq_s32(a) simde_vcgezq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgezq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgezq_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgeq_s64(a, simde_vdupq_n_s64(0)); + #else + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezd_s64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_s64 + #define vcgezq_s64(a) simde_vcgezq_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgez_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezs_f32(a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_f32 + #define vcgez_f32(a) simde_vcgez_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgez_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezd_f64(a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_f64 + #define vcgez_f64(a) simde_vcgez_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcgez_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_s8(a, simde_vdup_n_s8(0)); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_uint8x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_s8 + #define vcgez_s8(a) simde_vcgez_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgez_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_s16(a, simde_vdup_n_s16(0)); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_uint16x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_s16 + #define vcgez_s16(a) simde_vcgez_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgez_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_s32(a, simde_vdup_n_s32(0)); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >= 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_s32 + #define vcgez_s32(a) simde_vcgez_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgez_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgez_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcge_s64(a, simde_vdup_n_s64(0)); + #else + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values >= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezd_s64(a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_s64 + #define vcgez_s64(a) simde_vcgez_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CGEZ_H) */ +/* :: End simde/arm/neon/cgez.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cgtz.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CGTZ_H) +#define SIMDE_ARM_NEON_CGTZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgtzd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_s64(a)); + #else + return (a > 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzd_s64 + #define vcgtzd_s64(a) simde_vcgtzd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcgtzd_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcgtzd_f64(a)); + #else + return (a > SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzd_f64 + #define vcgtzd_f64(a) simde_vcgtzd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcgtzs_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcgtzs_f32(a)); + #else + return (a > SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzs_f32 + #define vcgtzs_f32(a) simde_vcgtzs_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgtzq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzs_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_f32 + #define vcgtzq_f32(a) simde_vcgtzq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgtzq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzd_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_f64 + #define vcgtzq_f64(a) simde_vcgtzq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcgtzq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_s8(a, simde_vdupq_n_s8(0)); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_uint8x16_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_s8 + #define vcgtzq_s8(a) simde_vcgtzq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgtzq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_s16(a, simde_vdupq_n_s16(0)); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_s16 + #define vcgtzq_s16(a) simde_vcgtzq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcgtzq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_s32(a, simde_vdupq_n_s32(0)); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_s32 + #define vcgtzq_s32(a) simde_vcgtzq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcgtzq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtzq_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgtq_s64(a, simde_vdupq_n_s64(0)); + #else + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzd_s64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_s64 + #define vcgtzq_s64(a) simde_vcgtzq_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgtz_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzs_f32(a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_f32 + #define vcgtz_f32(a) simde_vcgtz_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgtz_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzd_f64(a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_f64 + #define vcgtz_f64(a) simde_vcgtz_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcgtz_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_s8(a, simde_vdup_n_s8(0)); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_uint8x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_s8 + #define vcgtz_s8(a) simde_vcgtz_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgtz_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_s16(a, simde_vdup_n_s16(0)); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_uint16x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_s16 + #define vcgtz_s16(a) simde_vcgtz_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcgtz_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_s32(a, simde_vdup_n_s32(0)); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_s32 + #define vcgtz_s32(a) simde_vcgtz_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcgtz_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcgtz_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcgt_s64(a, simde_vdup_n_s64(0)); + #else + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzd_s64(a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_s64 + #define vcgtz_s64(a) simde_vcgtz_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CGTZ_H) */ +/* :: End simde/arm/neon/cgtz.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cle.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CLE_H) +#define SIMDE_ARM_NEON_CLE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcled_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcled_f64(a, b)); + #else + return (a <= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcled_f64 + #define vcled_f64(a, b) simde_vcled_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcled_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcled_s64(a, b)); + #else + return (a <= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcled_s64 + #define vcled_s64(a, b) simde_vcled_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcled_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcled_u64(a, b)); + #else + return (a <= b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcled_u64 + #define vcled_u64(a, b) simde_vcled_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcles_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcles_f32(a, b)); + #else + return (a <= b) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcles_f32 + #define vcles_f32(a, b) simde_vcles_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); + #else + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_cmple_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_f32 + #define vcleq_f32(a, b) simde_vcleq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcleq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcleq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); + #else + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castpd_si128(_mm_cmple_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcleq_f64 + #define vcleq_f64(a, b) simde_vcleq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcleq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + simde_uint8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi8(b_.m128i, a_.m128i), _mm_cmpeq_epi8(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_s8 + #define vcleq_s8(a, b) simde_vcleq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcleq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi16(b_.m128i, a_.m128i), _mm_cmpeq_epi16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_s16 + #define vcleq_s16(a, b) simde_vcleq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcleq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(b_.m128i, a_.m128i), _mm_cmpeq_epi32(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_s32 + #define vcleq_s32(a, b) simde_vcleq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcleq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcleq_s64(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s32(vmvnq_s32(vreinterpretq_s32_s64(vshrq_n_s64(vqsubq_s64(b, a), 63)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_or_si128(_mm_cmpgt_epi64(b_.m128i, a_.m128i), _mm_cmpeq_epi64(a_.m128i, b_.m128i)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcleq_s64 + #define vcleq_s64(a, b) simde_vcleq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcleq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a, b)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* http://www.alfredklomp.com/programming/sse-intrinsics/ */ + r_.m128i = + _mm_cmpeq_epi8( + _mm_min_epu8(a_.m128i, b_.m128i), + a_.m128i + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_u8 + #define vcleq_u8(a, b) simde_vcleq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcleq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a, b)); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_cmpeq_epi16( + _mm_min_epu16(a_.m128i, b_.m128i), + a_.m128i + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi16(INT16_MIN); + r_.m128i = + _mm_or_si128( + _mm_cmpgt_epi16( + _mm_xor_si128(b_.m128i, sign_bits), + _mm_xor_si128(a_.m128i, sign_bits) + ), + _mm_cmpeq_epi16(a_.m128i, b_.m128i) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_u16 + #define vcleq_u16(a, b) simde_vcleq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcleq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcleq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a, b)); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_cmpeq_epi32( + _mm_min_epu32(a_.m128i, b_.m128i), + a_.m128i + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi32(INT32_MIN); + r_.m128i = + _mm_or_si128( + _mm_cmpgt_epi32( + _mm_xor_si128(b_.m128i, sign_bits), + _mm_xor_si128(a_.m128i, sign_bits) + ), + _mm_cmpeq_epi32(a_.m128i, b_.m128i) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_le(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcleq_u32 + #define vcleq_u32(a, b) simde_vcleq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcleq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a, b)); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = + _mm_cmpeq_epi64( + _mm_min_epu64(a_.m128i, b_.m128i), + a_.m128i + ); + #elif defined(SIMDE_X86_SSE4_2_NATIVE) + __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); + r_.m128i = + _mm_or_si128( + _mm_cmpgt_epi64( + _mm_xor_si128(b_.m128i, sign_bits), + _mm_xor_si128(a_.m128i, sign_bits) + ), + _mm_cmpeq_epi64(a_.m128i, b_.m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcleq_u64 + #define vcleq_u64(a, b) simde_vcleq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_f32(a, b); + #else + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcles_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_f32 + #define vcle_f32(a, b) simde_vcle_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcle_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcle_f64(a, b); + #else + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcle_f64 + #define vcle_f64(a, b) simde_vcle_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcle_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + simde_uint8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(b_.m64, a_.m64), _mm_cmpeq_pi8(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_s8 + #define vcle_s8(a, b) simde_vcle_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcle_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + simde_uint16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(b_.m64, a_.m64), _mm_cmpeq_pi16(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_s16 + #define vcle_s16(a, b) simde_vcle_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcle_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(b_.m64, a_.m64), _mm_cmpeq_pi32(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_s32 + #define vcle_s32(a, b) simde_vcle_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcle_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcle_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcle_s64 + #define vcle_s64(a, b) simde_vcle_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcle_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi8(INT8_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi8(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_u8 + #define vcle_u8(a, b) simde_vcle_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcle_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi16(INT16_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi16(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_u16 + #define vcle_u16(a, b) simde_vcle_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcle_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcle_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi32(INT32_MIN); + r_.m64 = _mm_or_si64(_mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)), _mm_cmpeq_pi32(a_.m64, b_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcle_u32 + #define vcle_u32(a, b) simde_vcle_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcle_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcle_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcled_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcle_u64 + #define vcle_u64(a, b) simde_vcle_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLE_H) */ +/* :: End simde/arm/neon/cle.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/clez.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CLEZ_H) +#define SIMDE_ARM_NEON_CLEZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vclezd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vclezd_s64(a)); + #else + return (a <= 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezd_s64 + #define vclezd_s64(a) simde_vclezd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vclezd_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vclezd_f64(a)); + #else + return (a <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezd_f64 + #define vclezd_f64(a) simde_vclezd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vclezs_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vclezs_f32(a)); + #else + return (a <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezs_f32 + #define vclezs_f32(a) simde_vclezs_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vclezq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_f32 + #define vclezq_f32(a) simde_vclezq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vclezq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_f64 + #define vclezq_f64(a) simde_vclezq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vclezq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_s8(a, simde_vdupq_n_s8(0)); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_uint8x16_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_s8 + #define vclezq_s8(a) simde_vclezq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vclezq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_s16(a, simde_vdupq_n_s16(0)); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_s16 + #define vclezq_s16(a) simde_vclezq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vclezq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_s32(a, simde_vdupq_n_s32(0)); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_s32 + #define vclezq_s32(a) simde_vclezq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vclezq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclezq_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcleq_s64(a, simde_vdupq_n_s64(0)); + #else + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclezq_s64 + #define vclezq_s64(a) simde_vclezq_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclez_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_f32 + #define vclez_f32(a) simde_vclez_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vclez_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_f64 + #define vclez_f64(a) simde_vclez_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vclez_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_s8(a, simde_vdup_n_s8(0)); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_uint8x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_s8 + #define vclez_s8(a) simde_vclez_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vclez_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_s16(a, simde_vdup_n_s16(0)); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_uint16x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_s16 + #define vclez_s16(a) simde_vclez_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclez_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_s32(a, simde_vdup_n_s32(0)); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_s32 + #define vclez_s32(a) simde_vclez_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vclez_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclez_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcle_s64(a, simde_vdup_n_s64(0)); + #else + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values <= 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] <= 0) ? UINT64_MAX : 0; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclez_s64 + #define vclez_s64(a) simde_vclez_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLEZ_H) */ +/* :: End simde/arm/neon/clez.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cls.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CLS_H) +#define SIMDE_ARM_NEON_CLS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/clz.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CLZ_H) +#define SIMDE_ARM_NEON_CLZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_x_vclzb_u8(uint8_t a) { + #if \ + defined(SIMDE_BUILTIN_SUFFIX_8_) && \ + ( \ + SIMDE_BUILTIN_HAS_8_(clz) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) \ + ) + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(r); + + return HEDLEY_STATIC_CAST(uint8_t, SIMDE_BUILTIN_8_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_8_, a))); + #else + uint8_t r; + uint8_t shift; + + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(r); + + r = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x0F)) << 2); a >>= r; + shift = HEDLEY_STATIC_CAST(uint8_t, (a > UINT8_C(0x03)) << 1); a >>= shift; r |= shift; + r |= (a >> 1); + + return ((8 * sizeof(r)) - 1) - r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_x_vclzh_u16(uint16_t a) { + #if \ + defined(SIMDE_BUILTIN_SUFFIX_16_) && \ + ( \ + SIMDE_BUILTIN_HAS_16_(clz) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) \ + ) + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(r); + + return HEDLEY_STATIC_CAST(uint16_t, SIMDE_BUILTIN_16_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_16_, a))); + #else + uint16_t r; + uint16_t shift; + + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(r); + + r = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x00FF)) << 3); a >>= r; + shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x000F)) << 2); a >>= shift; r |= shift; + shift = HEDLEY_STATIC_CAST(uint16_t, (a > UINT16_C(0x0003)) << 1); a >>= shift; r |= shift; + r |= (a >> 1); + + return ((8 * sizeof(r)) - 1) - r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_x_vclzs_u32(uint32_t a) { + #if \ + defined(SIMDE_BUILTIN_SUFFIX_32_) && \ + ( \ + SIMDE_BUILTIN_HAS_32_(clz) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) \ + ) + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(a); + + return HEDLEY_STATIC_CAST(uint32_t, SIMDE_BUILTIN_32_(clz)(HEDLEY_STATIC_CAST(unsigned SIMDE_BUILTIN_TYPE_32_, a))); + #else + uint32_t r; + uint32_t shift; + + if (HEDLEY_UNLIKELY(a == 0)) + return 8 * sizeof(a); + + r = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0xFFFF)) << 4); a >>= r; + shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x00FF)) << 3); a >>= shift; r |= shift; + shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x000F)) << 2); a >>= shift; r |= shift; + shift = HEDLEY_STATIC_CAST(uint32_t, (a > UINT32_C(0x0003)) << 1); a >>= shift; r |= shift; + r |= (a >> 1); + + return ((8 * sizeof(r)) - 1) - r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_x_vclzb_s8(int8_t a) { + return HEDLEY_STATIC_CAST(int8_t, simde_x_vclzb_u8(HEDLEY_STATIC_CAST(uint8_t, a))); +} + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_vclzh_s16(int16_t a) { + return HEDLEY_STATIC_CAST(int16_t, simde_x_vclzh_u16(HEDLEY_STATIC_CAST(uint16_t, a))); +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_x_vclzs_s32(int32_t a) { + return HEDLEY_STATIC_CAST(int32_t, simde_x_vclzs_u32(HEDLEY_STATIC_CAST(uint32_t, a))); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vclz_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_s8(a); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzb_s8(a_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_s8 + #define vclz_s8(a) simde_vclz_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vclz_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_s16(a); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzh_s16(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_s16 + #define vclz_s16(a) simde_vclz_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vclz_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_s32(a); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzs_s32(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_s32 + #define vclz_s32(a) simde_vclz_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vclz_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_u8(a); + #else + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzb_u8(a_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_u8 + #define vclz_u8(a) simde_vclz_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vclz_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_u16(a); + #else + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzh_u16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_u16 + #define vclz_u16(a) simde_vclz_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclz_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclz_u32(a); + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzs_u32(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclz_u32 + #define vclz_u32(a) simde_vclz_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vclzq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_s8(a); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + r_; + + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://gist.github.com/animetosho/6cb732ccb5ecd86675ca0a442b3c0622 */ + a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); + a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i); + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzb_s8(a_.values[i]); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_s8 + #define vclzq_s8(a) simde_vclzq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vclzq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_s16(a); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzh_s16(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_s16 + #define vclzq_s16(a) simde_vclzq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vclzq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_s32(a); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzs_s32(a_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_s32 + #define vclzq_s32(a) simde_vclzq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vclzq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_u8(a); + #else + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + r_; + + #if defined(SIMDE_X86_GFNI_NATIVE) + a_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201), HEDLEY_STATIC_CAST(int32_t, 0x80402010), HEDLEY_STATIC_CAST(int32_t, 0x08040201)), 0); + a_.m128i = _mm_andnot_si128(_mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, 0xff))), a_.m128i); + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0, HEDLEY_STATIC_CAST(int32_t, 0xaaccf0ff), 0), 8); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzb_u8(a_.values[i]); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_u8 + #define vclzq_u8(a) simde_vclzq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vclzq_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_u16(a); + #else + simde_uint16x8_private + a_ = simde_uint16x8_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzh_u16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_u16 + #define vclzq_u16(a) simde_vclzq_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vclzq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclzq_u32(a); + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vclzs_u32(a_.values[i]); + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclzq_u32 + #define vclzq_u32(a) simde_vclzq_u32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLZ_H) */ +/* :: End simde/arm/neon/clz.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cltz.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */ + +#if !defined(SIMDE_ARM_NEON_CLTZ_H) +#define SIMDE_ARM_NEON_CLTZ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/clt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_CLT_H) +#define SIMDE_ARM_NEON_CLT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcltd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcltd_f64(a, b)); + #else + return (a < b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltd_f64 + #define vcltd_f64(a, b) simde_vcltd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcltd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcltd_s64(a, b)); + #else + return (a < b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltd_s64 + #define vcltd_s64(a, b) simde_vcltd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcltd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcltd_u64(a, b)); + #else + return (a < b) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltd_u64 + #define vcltd_u64(a, b) simde_vcltd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vclts_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vclts_f32(a, b)); + #else + return (a < b) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclts_f32 + #define vclts_f32(a, b) simde_vclts_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcltq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); + #else + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_cmplt_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_f32 + #define vcltq_f32(a, b) simde_vcltq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcltq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); + #else + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castpd_si128(_mm_cmplt_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltq_f64 + #define vcltq_f64(a, b) simde_vcltq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcltq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + simde_uint8x16_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmplt_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_s8 + #define vcltq_s8(a, b) simde_vcltq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcltq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmplt_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_s16 + #define vcltq_s16(a, b) simde_vcltq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcltq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_cmplt_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_s32 + #define vcltq_s32(a, b) simde_vcltq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcltq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltq_s64(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_u64_s64(vshrq_n_s64(vqsubq_s64(a, b), 63)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = _mm_cmpgt_epi64(b_.m128i, a_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltq_s64 + #define vcltq_s64(a, b) simde_vcltq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcltq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a, b)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128( + _mm_cmpeq_epi8(b_.m128i, a_.m128i), + _mm_cmpeq_epi8(_mm_max_epu8(b_.m128i, a_.m128i), b_.m128i) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_u8 + #define vcltq_u8(a, b) simde_vcltq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcltq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a, b)); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_andnot_si128( + _mm_cmpeq_epi16(b_.m128i, a_.m128i), + _mm_cmpeq_epi16(_mm_max_epu16(b_.m128i, a_.m128i), b_.m128i) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi16(INT16_MIN); + r_.m128i = _mm_cmplt_epi16(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_u16 + #define vcltq_u16(a, b) simde_vcltq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcltq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcltq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a, b)); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_andnot_si128( + _mm_cmpeq_epi32(b_.m128i, a_.m128i), + _mm_cmpeq_epi32(_mm_max_epu32(b_.m128i, a_.m128i), b_.m128i) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i sign_bits = _mm_set1_epi32(INT32_MIN); + r_.m128i = _mm_cmplt_epi32(_mm_xor_si128(a_.m128i, sign_bits), _mm_xor_si128(b_.m128i, sign_bits)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_lt(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltq_u32 + #define vcltq_u32(a, b) simde_vcltq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcltq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmplt(a, b)); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_andnot_si128( + _mm_cmpeq_epi64(b_.m128i, a_.m128i), + _mm_cmpeq_epi64(_mm_max_epu64(b_.m128i, a_.m128i), b_.m128i) + ); + #elif defined(SIMDE_X86_SSE4_2_NATIVE) + __m128i sign_bits = _mm_set1_epi64x(INT64_MIN); + r_.m128i = _mm_cmpgt_epi64(_mm_xor_si128(b_.m128i, sign_bits), _mm_xor_si128(a_.m128i, sign_bits)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltq_u64 + #define vcltq_u64(a, b) simde_vcltq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclt_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_f32(a, b); + #else + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vclts_f32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_f32 + #define vclt_f32(a, b) simde_vclt_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vclt_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclt_f64(a, b); + #else + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_f64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclt_f64 + #define vclt_f64(a, b) simde_vclt_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vclt_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + simde_uint8x8_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi8(b_.m64, a_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_s8 + #define vclt_s8(a, b) simde_vclt_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vclt_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + simde_uint16x4_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi16(b_.m64, a_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_s16 + #define vclt_s16(a, b) simde_vclt_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclt_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_cmpgt_pi32(b_.m64, a_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_s32 + #define vclt_s32(a, b) simde_vclt_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vclt_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclt_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclt_s64 + #define vclt_s64(a, b) simde_vclt_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vclt_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi8(INT8_MIN); + r_.m64 = _mm_cmpgt_pi8(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_u8 + #define vclt_u8(a, b) simde_vclt_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vclt_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi16(INT16_MIN); + r_.m64 = _mm_cmpgt_pi16(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_u16 + #define vclt_u16(a, b) simde_vclt_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vclt_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclt_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + __m64 sign_bits = _mm_set1_pi32(INT32_MIN); + r_.m64 = _mm_cmpgt_pi32(_mm_xor_si64(b_.m64, sign_bits), _mm_xor_si64(a_.m64, sign_bits)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclt_u32 + #define vclt_u32(a, b) simde_vclt_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vclt_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vclt_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < b_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vclt_u64 + #define vclt_u64(a, b) simde_vclt_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLT_H) */ +/* :: End simde/arm/neon/clt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcltzd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcltzd_s64(a)); + #else + return (a < 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltzd_s64 + #define vcltzd_s64(a) simde_vcltzd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcltzd_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vcltzd_f64(a)); + #else + return (a < SIMDE_FLOAT64_C(0.0)) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltzd_f64 + #define vcltzd_f64(a) simde_vcltzd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcltzs_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vcltzs_f32(a)); + #else + return (a < SIMDE_FLOAT32_C(0.0)) ? UINT32_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltzs_f32 + #define vcltzs_f32(a) simde_vcltzs_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcltz_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_f32(a, simde_vdup_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_f32 + #define vcltz_f32(a) simde_vcltz_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcltz_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_f64(a, simde_vdup_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltz_f64 + #define vcltz_f64(a) simde_vcltz_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcltz_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_s8(a, simde_vdup_n_s8(0)); + #else + return simde_vreinterpret_u8_s8(simde_vshr_n_s8(a, 7)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_s8 + #define vcltz_s8(a) simde_vcltz_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcltz_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_s16(a, simde_vdup_n_s16(0)); + #else + return simde_vreinterpret_u16_s16(simde_vshr_n_s16(a, 15)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_s16 + #define vcltz_s16(a) simde_vcltz_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcltz_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_s32(a, simde_vdup_n_s32(0)); + #else + return simde_vreinterpret_u32_s32(simde_vshr_n_s32(a, 31)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_s32 + #define vcltz_s32(a) simde_vcltz_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcltz_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltz_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vclt_s64(a, simde_vdup_n_s64(0)); + #else + return simde_vreinterpret_u64_s64(simde_vshr_n_s64(a, 63)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_s64 + #define vcltz_s64(a) simde_vcltz_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcltzq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_f32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_f32(a, simde_vdupq_n_f32(SIMDE_FLOAT32_C(0.0))); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT32_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < SIMDE_FLOAT32_C(0.0)) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltzq_f32 + #define vcltzq_f32(a) simde_vcltzq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcltzq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_f64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_f64(a, simde_vdupq_n_f64(SIMDE_FLOAT64_C(0.0))); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values < SIMDE_FLOAT64_C(0.0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < SIMDE_FLOAT64_C(0.0)) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltzq_f64 + #define vcltzq_f64(a) simde_vcltzq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcltzq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_s8(a, simde_vdupq_n_s8(0)); + #else + return simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(a, 7)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltzq_s8 + #define vcltzq_s8(a) simde_vcltzq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcltzq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_s16(a, simde_vdupq_n_s16(0)); + #else + return simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(a, 15)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltzq_s16 + #define vcltzq_s16(a) simde_vcltzq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcltzq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_s32(a, simde_vdupq_n_s32(0)); + #else + return simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(a, 31)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltzq_s32 + #define vcltzq_s32(a) simde_vcltzq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcltzq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcltzq_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vcltq_s64(a, simde_vdupq_n_s64(0)); + #else + return simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(a, 63)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltzq_s64 + #define vcltzq_s64(a) simde_vcltzq_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLTZ_H) */ +/* :: End simde/arm/neon/cltz.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mvn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_MVN_H) +#define SIMDE_ARM_NEON_MVN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmvnq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_s8 + #define vmvnq_s8(a) simde_vmvnq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmvnq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_s16 + #define vmvnq_s16(a) simde_vmvnq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmvnq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_s32 + #define vmvnq_s32(a) simde_vmvnq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmvnq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_u8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi8(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_u8 + #define vmvnq_u8(a) simde_vmvnq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmvnq_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_u16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi16(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_u16 + #define vmvnq_u16(a) simde_vmvnq_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmvnq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvnq_u32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_nor(a, a); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, a_.m128i, a_.m128i, 0x55); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(a_.m128i, _mm_cmpeq_epi32(a_.m128i, a_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_not(a_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvnq_u32 + #define vmvnq_u32(a) simde_vmvnq_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmvn_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_s8 + #define vmvn_s8(a) simde_vmvn_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmvn_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_s16(a); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_s16 + #define vmvn_s16(a) simde_vmvn_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmvn_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_s32(a); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_s32 + #define vmvn_s32(a) simde_vmvn_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmvn_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_u8(a); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi8(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_u8 + #define vmvn_u8(a) simde_vmvn_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmvn_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_u16(a); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi16(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_u16 + #define vmvn_u16(a) simde_vmvn_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmvn_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmvn_u32(a); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_andnot_si64(a_.m64, _mm_cmpeq_pi32(a_.m64, a_.m64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = ~a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ~(a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmvn_u32 + #define vmvn_u32(a) simde_vmvn_u32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MVN_H) */ +/* :: End simde/arm/neon/mvn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcls_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s8(a); + #else + return simde_vsub_s8(simde_vclz_s8(simde_vbsl_s8(simde_vcltz_s8(a), simde_vmvn_s8(a), a)), simde_vdup_n_s8(INT8_C(1))); + #endif +} +#define simde_vcls_u8(a) simde_vcls_s8(simde_vreinterpret_s8_u8(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s8 + #define vcls_s8(a) simde_vcls_s8(a) + #undef vcls_u8 + #define vcls_u8(a) simde_vcls_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcls_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s16(a); + #else + return simde_vsub_s16(simde_vclz_s16(simde_vbsl_s16(simde_vcltz_s16(a), simde_vmvn_s16(a), a)), simde_vdup_n_s16(INT16_C(1))); + #endif +} +#define simde_vcls_u16(a) simde_vcls_s16(simde_vreinterpret_s16_u16(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s16 + #define vcls_s16(a) simde_vcls_s16(a) + #undef vcls_u16 + #define vcls_u16(a) simde_vcls_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcls_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcls_s32(a); + #else + return simde_vsub_s32(simde_vclz_s32(simde_vbsl_s32(simde_vcltz_s32(a), simde_vmvn_s32(a), a)), simde_vdup_n_s32(INT32_C(1))); + #endif +} +#define simde_vcls_u32(a) simde_vcls_s32(simde_vreinterpret_s32_u32(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcls_s32 + #define vcls_s32(a) simde_vcls_s32(a) + #undef vcls_u32 + #define vcls_u32(a) simde_vcls_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vclsq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s8(a); + #else + return simde_vsubq_s8(simde_vclzq_s8(simde_vbslq_s8(simde_vcltzq_s8(a), simde_vmvnq_s8(a), a)), simde_vdupq_n_s8(INT8_C(1))); + #endif +} +#define simde_vclsq_u8(a) simde_vclsq_s8(simde_vreinterpretq_s8_u8(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s8 + #define vclsq_s8(a) simde_vclsq_s8(a) + #undef vclsq_u8 + #define vclsq_u8(a) simde_vclsq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vclsq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s16(a); + #else + return simde_vsubq_s16(simde_vclzq_s16(simde_vbslq_s16(simde_vcltzq_s16(a), simde_vmvnq_s16(a), a)), simde_vdupq_n_s16(INT16_C(1))); + #endif +} +#define simde_vclsq_u16(a) simde_vclsq_s16(simde_vreinterpretq_s16_u16(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s16 + #define vclsq_s16(a) simde_vclsq_s16(a) + #undef vclsq_u16 + #define vclsq_u16(a) simde_vclsq_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vclsq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vclsq_s32(a); + #else + return simde_vsubq_s32(simde_vclzq_s32(simde_vbslq_s32(simde_vcltzq_s32(a), simde_vmvnq_s32(a), a)), simde_vdupq_n_s32(INT32_C(1))); + #endif +} +#define simde_vclsq_u32(a) simde_vclsq_s32(simde_vreinterpretq_s32_u32(a)) +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vclsq_s32 + #define vclsq_s32(a) simde_vclsq_s32(a) + #undef vclsq_u32 + #define vclsq_u32(a) simde_vclsq_u32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CLS_H) */ +/* :: End simde/arm/neon/cls.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cmla.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_H) +#define SIMDE_ARM_NEON_CMLA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_f32(r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] += b_.values[i] * a_.values[i & 2]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_f32 + #define vcmla_f32(r, a, b) simde_vcmla_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_f32(r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, wasm_i32x4_shuffle(a_.v128, a_.v128, 0, 0, 2, 2))); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] += b_.values[i] * a_.values[i & 2]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_f32 + #define vcmlaq_f32(r, a, b) simde_vcmlaq_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcmlaq_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_f64(r, a, b); + #else + simde_float64x2_private + r_ = simde_float64x2_to_private(r), + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, wasm_i64x2_shuffle(a_.v128, a_.v128, 0, 0))); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] += b_.values[i] * a_.values[i & 2]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_f64 + #define vcmlaq_f64(r, a, b) simde_vcmlaq_f64(r, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ +/* :: End simde/arm/neon/cmla.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cmla_rot90.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) +#define SIMDE_ARM_NEON_CMLA_ROT90_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot90_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot90_f32(r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot90_f32 + #define vcmla_rot90_f32(r, a, b) simde_vcmla_rot90_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot90_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot90_f32(r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 1, 1, 3, 3); + b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), b_.v128, 1, 4, 3, 6); + r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 1, 4, 3, 6); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_f32 + #define vcmlaq_rot90_f32(r, a, b) simde_vcmlaq_rot90_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcmlaq_rot90_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot90_f64(r, a, b); + #else + simde_float64x2_private + r_ = simde_float64x2_to_private(r), + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 1, 1); + b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), b_.v128, 1, 2); + r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 1, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i + 1]) * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += b_.values[2 * i] * a_.values[2 * i + 1]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot90_f64 + #define vcmlaq_rot90_f64(r, a, b) simde_vcmlaq_rot90_f64(r, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT90_H) */ +/* :: End simde/arm/neon/cmla_rot90.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cmla_rot180.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) +#define SIMDE_ARM_NEON_CMLA_ROT180_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot180_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot180_f32(r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot180_f32 + #define vcmla_rot180_f32(r, a, b) simde_vcmla_rot180_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot180_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot180_f32(r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 0, 0, 2, 2); + b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), wasm_f32x4_neg(b_.v128), 0, 1, 2, 3); + r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 0, 2, 2); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, -b_.values, 0, 1, 2, 3); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_f32 + #define vcmlaq_rot180_f32(r, a, b) simde_vcmlaq_rot180_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcmlaq_rot180_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot180_f64(r, a, b); + #else + simde_float64x2_private + r_ = simde_float64x2_to_private(r), + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 0, 0); + b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), wasm_f64x2_neg(b_.v128), 0, 1); + r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 0); + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, -b_.values, 0, 1); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += -(b_.values[2 * i]) * a_.values[2 * i]; + r_.values[2 * i + 1] += -(b_.values[2 * i + 1]) * a_.values[2 * i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot180_f64 + #define vcmlaq_rot180_f64(r, a, b) simde_vcmlaq_rot180_f64(r, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT180_H) */ +/* :: End simde/arm/neon/cmla_rot180.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cmla_rot270.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) +#define SIMDE_ARM_NEON_CMLA_ROT270_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcmla_rot270_f32(simde_float32x2_t r, simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmla_rot270_f32(r, a, b); + #else + simde_float32x2_private + r_ = simde_float32x2_to_private(r), + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmla_rot270_f32 + #define vcmla_rot270_f32(r, a, b) simde_vcmla_rot270_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcmlaq_rot270_f32(simde_float32x4_t r, simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot270_f32(r, a, b); + #else + simde_float32x4_private + r_ = simde_float32x4_to_private(r), + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i32x4_shuffle(a_.v128, a_.v128, 1, 1, 3, 3); + b_.v128 = wasm_i32x4_shuffle(wasm_f32x4_neg(b_.v128), b_.v128, 5, 0, 7, 2); + r_.v128 = wasm_f32x4_add(r_.v128, wasm_f32x4_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 1, 3, 3); + b_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, -b_.values, b_.values, 5, 0, 7, 2); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_f32 + #define vcmlaq_rot270_f32(r, a, b) simde_vcmlaq_rot270_f32(r, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcmlaq_rot270_f64(simde_float64x2_t r, simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && SIMDE_ARCH_ARM_CHECK(8,3) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0)) + return vcmlaq_rot270_f64(r, a, b); + #else + simde_float64x2_private + r_ = simde_float64x2_to_private(r), + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_i64x2_shuffle(a_.v128, a_.v128, 1, 1); + b_.v128 = wasm_i64x2_shuffle(wasm_f64x2_neg(b_.v128), b_.v128, 3, 0); + r_.v128 = wasm_f64x2_add(r_.v128, wasm_f64x2_mul(b_.v128, a_.v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 1, 1); + b_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, -b_.values, b_.values, 3, 0); + r_.values += b_.values * a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / (2 * sizeof(r_.values[0]))) ; i++) { + r_.values[2 * i] += b_.values[2 * i + 1] * a_.values[2 * i + 1]; + r_.values[2 * i + 1] += -(b_.values[2 * i]) * a_.values[2 * i + 1]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcmlaq_rot270_f64 + #define vcmlaq_rot270_f64(r, a, b) simde_vcmlaq_rot270_f64(r, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_ROT270_H) */ +/* :: End simde/arm/neon/cmla_rot270.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cnt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CNT_H) +#define SIMDE_ARM_NEON_CNT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_x_arm_neon_cntb(uint8_t v) { + v = v - ((v >> 1) & (85)); + v = (v & (51)) + ((v >> (2)) & (51)); + v = (v + (v >> (4))) & (15); + return HEDLEY_STATIC_CAST(uint8_t, v) >> (sizeof(uint8_t) - 1) * CHAR_BIT; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcnt_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcnt_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcnt_s8 + #define vcnt_s8(a) simde_vcnt_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcnt_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcnt_u8(a); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_arm_neon_cntb(a_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcnt_u8 + #define vcnt_u8(a) simde_vcnt_u8((a)) +#endif + +/* The x86 implementations are stolen from + * https://github.com/WebAssembly/simd/pull/379. They could be cleaned + * up a bit if someone is bored; they're mostly just direct + * translations from the assembly. */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vcntq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcntq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a))); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE) + r_.m128i = _mm_popcnt_epi8(a_.m128i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = _mm_andnot_si128(tmp0, a_.m128i); + __m128i y = _mm_and_si128(tmp0, a_.m128i); + tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_srli_epi16(tmp1, 4); + y = _mm_shuffle_epi8(tmp0, y); + tmp1 = _mm_shuffle_epi8(tmp0, tmp1); + r_.m128i = _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = a_.m128i; + tmp1 = _mm_and_si128(tmp1, tmp0); + tmp0 = _mm_andnot_si128(tmp0, a_.m128i); + __m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp0 = _mm_srli_epi16(tmp0, 4); + y = _mm_shuffle_epi8(y, tmp1); + tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_shuffle_epi8(tmp1, tmp0); + r_.m128i = _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_and_si128(_mm_srli_epi16(a_.m128i, 1), _mm_set1_epi8(0x55)); + a_.m128i = _mm_sub_epi8(a_.m128i, tmp); + tmp = a_.m128i; + a_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x33)); + tmp = _mm_and_si128(_mm_srli_epi16(tmp, 2), _mm_set1_epi8(0x33)); + a_.m128i = _mm_add_epi8(a_.m128i, tmp); + tmp = _mm_srli_epi16(a_.m128i, 4); + a_.m128i = _mm_add_epi8(a_.m128i, tmp); + r_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(0x0f)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, simde_x_arm_neon_cntb(HEDLEY_STATIC_CAST(uint8_t, a_.values[i]))); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcntq_s8 + #define vcntq_s8(a) simde_vcntq_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcntq_u8(simde_uint8x16_t a) { + return simde_vreinterpretq_u8_s8(simde_vcntq_s8(simde_vreinterpretq_s8_u8(a))); +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcntq_u8 + #define vcntq_u8(a) simde_vcntq_u8((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CNT_H) */ +/* :: End simde/arm/neon/cnt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cvt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Sean Maher + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_CVT_H) +#define SIMDE_ARM_NEON_CVT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcvt_f16_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_f16_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_float16x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(a_.values[i]); + } + #endif + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f16_f32 + #define vcvt_f16_f32(a) simde_vcvt_f16_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvt_f32_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_f32_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_float32x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_to_float32(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f32_f16 + #define vcvt_f32_f16(a) simde_vcvt_f32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvt_f32_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvt_f32_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_float32x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i]); + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f32_f64 + #define vcvt_f32_f64(a) simde_vcvt_f32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvt_f64_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvt_f64_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_float64x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f64_f32 + #define vcvt_f64_f32(a) simde_vcvt_f64_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_vcvts_s16_f16(simde_float16 a) { + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(int16_t, a); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + return INT16_MIN; + } else if (HEDLEY_UNLIKELY(af > HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + return INT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(af))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int16_t, af); + } + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_x_vcvts_u16_f16(simde_float16 a) { + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, simde_float16_to_float32(a)); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af < SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af > HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + return UINT16_MAX; + } else if (simde_math_isnanf(af)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint16_t, af); + } + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvts_s32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvts_s32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, a); + #else + if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, a); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_s32_f32 + #define vcvts_s32_f32(a) simde_vcvts_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvts_u32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvts_u32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, a); + #else + if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (simde_math_isnanf(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, a); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_u32_f32 + #define vcvts_u32_f32(a) simde_vcvts_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_vcvts_f32_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvts_f32_s32(a); + #else + return HEDLEY_STATIC_CAST(simde_float32, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_f32_s32 + #define vcvts_f32_s32(a) simde_vcvts_f32_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_vcvts_f32_u32 (uint32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvts_f32_u32(a); + #else + return HEDLEY_STATIC_CAST(simde_float32, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_f32_u32 + #define vcvts_f32_u32(a) simde_vcvts_f32_u32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtd_s64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtd_s64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, a); + #else + if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { + return INT64_MAX; + } else if (simde_math_isnanf(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, a); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_s64_f64 + #define vcvtd_s64_f64(a) simde_vcvtd_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtd_u64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtd_u64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, a); + #else + if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT64_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, a); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_u64_f64 + #define vcvtd_u64_f64(a) simde_vcvtd_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_vcvtd_f64_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtd_f64_s64(a); + #else + return HEDLEY_STATIC_CAST(simde_float64, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_f64_s64 + #define vcvtd_f64_s64(a) simde_vcvtd_f64_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_vcvtd_f64_u64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtd_f64_u64(a); + #else + return HEDLEY_STATIC_CAST(simde_float64, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_f64_u64 + #define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvt_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_s16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_s16_f16 + #define vcvt_s16_f16(a) simde_vcvt_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcvt_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcvt_s32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_int32x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_s32_f32 + #define vcvt_s32_f32(a) simde_vcvt_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvt_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_u16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_u16_f16 + #define vcvt_u16_f16(a) simde_vcvt_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcvt_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvt_u32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_u32_f32 + #define vcvt_u32_f32(a) simde_vcvt_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvt_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvt_s64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_s64_f64 + #define vcvt_s64_f64(a) simde_vcvt_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvt_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvt_u64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= SIMDE_FLOAT64_C(0.0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_u64_f64 + #define vcvt_u64_f64(a) simde_vcvt_u64_f64(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtq_s16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_s16_f16 + #define vcvtq_s16_f16(a) simde_vcvtq_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcvtq_s32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcvtq_s32_f32(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) + return vec_signed(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_BUG_GCC_101614) + return (a == a) & vec_signed(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_int32x4_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_trunc_sat_f32x4(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + const __m128i i32_max_mask = _mm_castps_si128(_mm_cmpgt_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(2147483520.0)))); + const __m128 clamped = _mm_max_ps(a_.m128, _mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))); + #else + const __m128 clamped = a_.m128; + #endif + + r_.m128i = _mm_cvttps_epi32(clamped); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(r_.m128i), + _mm_castsi128_ps(_mm_set1_epi32(INT32_MAX)), + _mm_castsi128_ps(i32_max_mask) + ) + ); + #else + r_.m128i = + _mm_or_si128( + _mm_and_si128(i32_max_mask, _mm_set1_epi32(INT32_MAX)), + _mm_andnot_si128(i32_max_mask, r_.m128i) + ); + #endif + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpord_ps(a_.m128, a_.m128))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + + static const float SIMDE_VECTOR(16) max_representable = { SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0) }; + int32_t SIMDE_VECTOR(16) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.values > max_representable); + int32_t SIMDE_VECTOR(16) max_i32 = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; + r_.values = (max_i32 & max_mask) | (r_.values & ~max_mask); + + static const float SIMDE_VECTOR(16) min_representable = { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) }; + int32_t SIMDE_VECTOR(16) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.values < min_representable); + int32_t SIMDE_VECTOR(16) min_i32 = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.values = (min_i32 & min_mask) | (r_.values & ~min_mask); + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_s32_f32(a_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_s32_f32 + #define vcvtq_s32_f32(a) simde_vcvtq_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtq_u16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_u16_f16 + #define vcvtq_u16_f16(a) simde_vcvtq_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcvtq_u32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtq_u32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_trunc_sat_f32x4(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_cvttps_epu32(a_.m128); + #else + __m128 first_oob_high = _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0)); + __m128 neg_zero_if_too_high = + _mm_castsi128_ps( + _mm_slli_epi32( + _mm_castps_si128(_mm_cmple_ps(first_oob_high, a_.m128)), + 31 + ) + ); + r_.m128i = + _mm_xor_si128( + _mm_cvttps_epi32( + _mm_sub_ps(a_.m128, _mm_and_ps(neg_zero_if_too_high, first_oob_high)) + ), + _mm_castps_si128(neg_zero_if_too_high) + ); + #endif + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpgt_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(0.0))))); + r_.m128i = _mm_or_si128 (r_.m128i, _mm_castps_si128(_mm_cmpge_ps(a_.m128, _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0))))); + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castps_si128(_mm_cmpord_ps(a_.m128, a_.m128))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + + const __typeof__(a_.values) max_representable = { SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0) }; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > max_representable); + + const __typeof__(a_.values) min_representable = { SIMDE_FLOAT32_C(0.0), }; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > min_representable); + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_u32_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_u32_f32 + #define vcvtq_u32_f32(a) simde_vcvtq_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtq_s64_f64(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) + return vec_signed(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return (a == a) & vec_signed(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && (defined(SIMDE_ARCH_AMD64) || (defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE))) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + const __m128i i64_max_mask = _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, _mm_set1_pd(HEDLEY_STATIC_CAST(simde_float64, INT64_MAX)))); + const __m128d clamped_low = _mm_max_pd(a_.m128d, _mm_set1_pd(HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))); + #else + const __m128d clamped_low = a_.m128d; + #endif + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.m128i = _mm_cvttpd_epi64(clamped_low); + #else + r_.m128i = + _mm_set_epi64x( + _mm_cvttsd_si64(_mm_unpackhi_pd(clamped_low, clamped_low)), + _mm_cvttsd_si64(clamped_low) + ); + #endif + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_castpd_si128( + _mm_blendv_pd( + _mm_castsi128_pd(r_.m128i), + _mm_castsi128_pd(_mm_set1_epi64x(INT64_MAX)), + _mm_castsi128_pd(i64_max_mask) + ) + ); + #else + r_.m128i = + _mm_or_si128( + _mm_and_si128(i64_max_mask, _mm_set1_epi64x(INT64_MAX)), + _mm_andnot_si128(i64_max_mask, r_.m128i) + ); + #endif + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpord_pd(a_.m128d, a_.m128d))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + + const __typeof__((a_.values)) max_representable = { SIMDE_FLOAT64_C(9223372036854774784.0), SIMDE_FLOAT64_C(9223372036854774784.0) }; + __typeof__(r_.values) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.values > max_representable); + __typeof__(r_.values) max_i64 = { INT64_MAX, INT64_MAX }; + r_.values = (max_i64 & max_mask) | (r_.values & ~max_mask); + + const __typeof__((a_.values)) min_representable = { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN), HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) }; + __typeof__(r_.values) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.values < min_representable); + __typeof__(r_.values) min_i64 = { INT64_MIN, INT64_MIN }; + r_.values = (min_i64 & min_mask) | (r_.values & ~min_mask); + + #if !defined(SIMDE_FAST_NANS) + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values == a_.values); + #endif + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_s64_f64(a_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_s64_f64 + #define vcvtq_s64_f64(a) simde_vcvtq_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtq_u64_f64(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && defined(SIMDE_FAST_NANS) + return vec_unsigned(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_uint64x2_t, (a == a)) & vec_unsigned(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #elif defined(SIMDE_X86_SSE2_NATIVE) && (defined(SIMDE_ARCH_AMD64) || (defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE))) + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_cvttpd_epu64(a_.m128d); + #else + __m128d first_oob_high = _mm_set1_pd(SIMDE_FLOAT64_C(18446744073709551616.0)); + __m128d neg_zero_if_too_high = + _mm_castsi128_pd( + _mm_slli_epi64( + _mm_castpd_si128(_mm_cmple_pd(first_oob_high, a_.m128d)), + 63 + ) + ); + __m128d tmp = _mm_sub_pd(a_.m128d, _mm_and_pd(neg_zero_if_too_high, first_oob_high)); + r_.m128i = + _mm_xor_si128( + _mm_set_epi64x( + _mm_cvttsd_si64(_mm_unpackhi_pd(tmp, tmp)), + _mm_cvttsd_si64(tmp) + ), + _mm_castpd_si128(neg_zero_if_too_high) + ); + #endif + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpgt_pd(a_.m128d, _mm_set1_pd(SIMDE_FLOAT64_C(0.0))))); + r_.m128i = _mm_or_si128 (r_.m128i, _mm_castpd_si128(_mm_cmpge_pd(a_.m128d, _mm_set1_pd(SIMDE_FLOAT64_C(18446744073709551616.0))))); + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.m128i = _mm_and_si128(r_.m128i, _mm_castpd_si128(_mm_cmpord_pd(a_.m128d, a_.m128d))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + + const __typeof__(a_.values) max_representable = { SIMDE_FLOAT64_C(18446744073709549568.0), SIMDE_FLOAT64_C(18446744073709549568.0) }; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > max_representable); + + const __typeof__(a_.values) min_representable = { SIMDE_FLOAT64_C(0.0), }; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values > min_representable); + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values == a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_u64_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_u64_f64 + #define vcvtq_u64_f64(a) simde_vcvtq_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcvt_f16_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_f16_s16(a); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_float16x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); + #else + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); + #endif + } + #endif + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f16_s16 + #define vcvt_f16_s16(a) simde_vcvt_f16_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvt_f32_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcvt_f32_s32(a); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_float32x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_f32_s32 + #define vcvt_f32_s32(a) simde_vcvt_f32_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vcvt_f16_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_f16_u16(a); + #else + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + simde_float16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); + #else + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); + #endif + } + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f16_u16 + #define vcvt_f16_u16(a) simde_vcvt_f16_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvt_f32_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvt_f32_u32(a); + #else + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + simde_float32x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvt_f32_u32 + #define vcvt_f32_u32(a) simde_vcvt_f32_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcvt_f64_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvt_f64_s64(a); + #else + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_float64x1_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f64_s64 + #define vcvt_f64_s64(a) simde_vcvt_f64_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcvt_f64_u64(simde_uint64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvt_f64_u64(a); + #else + simde_uint64x1_private a_ = simde_uint64x1_to_private(a); + simde_float64x1_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_f64_u64 + #define vcvt_f64_u64(a) simde_vcvt_f64_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcvtq_f16_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtq_f16_s16(a); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_float16x8_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); + #else + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); + #endif + } + #endif + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f16_s16 + #define vcvtq_f16_s16(a) simde_vcvtq_f16_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvtq_f32_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcvtq_f32_s32(a); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_float32x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_f32_s32(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f32_s32 + #define vcvtq_f32_s32(a) simde_vcvtq_f32_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcvtq_f16_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) + return vcvtq_f16_u16(a); + #else + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_float16x8_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FLOAT16_VECTOR) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, a_.values[i]); + #else + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a_.values[i])); + #endif + } + #endif + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f16_u16 + #define vcvtq_f16_u16(a) simde_vcvtq_f16_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvtq_f32_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtq_f32_u32(a); + #else + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_float32x4_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvts_f32_u32(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f32_u32 + #define vcvtq_f32_u32(a) simde_vcvtq_f32_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvtq_f64_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtq_f64_s64(a); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_ctd(a, 0); + #else + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_float64x2_private r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.m128d = _mm_cvtepi64_pd(a_.m128i); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_f64_s64(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f64_s64 + #define vcvtq_f64_s64(a) simde_vcvtq_f64_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvtq_f64_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtq_f64_u64(a); + #else + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_float64x2_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtd_f64_u64(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtq_f64_u64 + #define vcvtq_f64_u64(a) simde_vcvtq_f64_u64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_CVT_H */ +/* :: End simde/arm/neon/cvt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/cvtn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Michael R. Crusoe + */ + +#if !defined(SIMDE_ARM_NEON_CVTN_H) +#define SIMDE_ARM_NEON_CVTN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcvtnq_s32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtnq_s32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_int32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtps_epi32(a_.m128); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtps_epi32(a_.m128); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_.values[i])); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_s32_f32 + #define vcvtnq_s32_f32(a) simde_vcvtnq_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtnq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtnq_s64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtpd_epi64(a_.m128d); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtpd_epi64(a_.m128d); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, simde_math_roundeven(a_.values[i])); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_s64_f64 + #define vcvtnq_s64_f64(a) simde_vcvtnq_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtns_u32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtns_u32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); + #else + if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (simde_math_isnanf(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtns_u32_f32 + #define vcvtns_u32_f32(a) simde_vcvtns_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcvtnq_u32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtnq_u32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtps_epu32(a_.m128); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtps_epu32(a_.m128); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtns_u32_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_u32_f32 + #define vcvtnq_u32_f32(a) simde_vcvtnq_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtnd_u64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtnd_u64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a)); + #else + if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT64_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundeven(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnd_u64_f64 + #define vcvtnd_u64_f64(a) simde_vcvtnd_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtnq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtnq_u64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtnd_u64_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtnq_u64_f64 + #define vcvtnq_u64_f64(a) simde_vcvtnq_u64_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_CVTN_H */ +/* :: End simde/arm/neon/cvtn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/create.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +/* N.B. CM: vcreate_f16 and vcreate_bf16 are omitted as + * SIMDe has no 16-bit floating point support. + * Idem for the poly types. */ + +#if !defined(SIMDE_ARM_NEON_CREATE_H) +#define SIMDE_ARM_NEON_CREATE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcreate_s8(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_s8(a); + #else + return simde_vreinterpret_s8_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_s8 + #define vcreate_s8(a) simde_vcreate_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcreate_s16(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_s16(a); + #else + return simde_vreinterpret_s16_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_s16 + #define vcreate_s16(a) simde_vcreate_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcreate_s32(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_s32(a); + #else + return simde_vreinterpret_s32_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_s32 + #define vcreate_s32(a) simde_vcreate_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcreate_s64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_s64(a); + #else + return simde_vreinterpret_s64_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_s64 + #define vcreate_s64(a) simde_vcreate_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcreate_u8(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_u8(a); + #else + return simde_vreinterpret_u8_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_u8 + #define vcreate_u8(a) simde_vcreate_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcreate_u16(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_u16(a); + #else + return simde_vreinterpret_u16_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_u16 + #define vcreate_u16(a) simde_vcreate_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcreate_u32(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_u32(a); + #else + return simde_vreinterpret_u32_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_u32 + #define vcreate_u32(a) simde_vcreate_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcreate_u64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_u64(a); + #else + return simde_vdup_n_u64(a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_u64 + #define vcreate_u64(a) simde_vcreate_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcreate_f32(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vcreate_f32(a); + #else + return simde_vreinterpret_f32_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_f32 + #define vcreate_f32(a) simde_vcreate_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcreate_f64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcreate_f64(a); + #else + return simde_vreinterpret_f64_u64(simde_vdup_n_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcreate_f64 + #define vcreate_f64(a) simde_vcreate_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CREATE_H) */ +/* :: End simde/arm/neon/create.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/dot.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_DOT_H) +#define SIMDE_ARM_NEON_DOT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/paddl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_PADDL_H) +#define SIMDE_ARM_NEON_PADDL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/padd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_PADD_H) +#define SIMDE_ARM_NEON_PADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/uzp1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_UZP1_H) +#define SIMDE_ARM_NEON_UZP1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vuzp1_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2x2_t t = vuzp_f32(a, b); + return t.val[0]; + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_f32 + #define vuzp1_f32(a, b) simde_vuzp1_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vuzp1_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t t = vuzp_s8(a, b); + return t.val[0]; + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_s8 + #define vuzp1_s8(a, b) simde_vuzp1_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vuzp1_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a, b); + return t.val[0]; + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_s16 + #define vuzp1_s16(a, b) simde_vuzp1_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vuzp1_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a, b); + return t.val[0]; + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_s32 + #define vuzp1_s32(a, b) simde_vuzp1_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vuzp1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8x2_t t = vuzp_u8(a, b); + return t.val[0]; + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_u8 + #define vuzp1_u8(a, b) simde_vuzp1_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vuzp1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4x2_t t = vuzp_u16(a, b); + return t.val[0]; + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 2, 4, 6); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_u16 + #define vuzp1_u16(a, b) simde_vuzp1_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vuzp1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2x2_t t = vuzp_u32(a, b); + return t.val[0]; + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1_u32 + #define vuzp1_u32(a, b) simde_vuzp1_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vuzp1q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a, b); + return t.val[0]; + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); + #elif defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_shuffle_ps(a_.m128, b_.m128, 0x88); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_f32 + #define vuzp1q_f32(a, b) simde_vuzp1q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vuzp1q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_f64(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_castps_pd(_mm_movelh_ps(_mm_castpd_ps(a_.m128d), _mm_castpd_ps(b_.m128d))); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_f64 + #define vuzp1q_f64(a, b) simde_vuzp1q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vuzp1q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16x2_t t = vuzpq_s8(a, b); + return t.val[0]; + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_s8 + #define vuzp1q_s8(a, b) simde_vuzp1q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vuzp1q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a, b); + return t.val[0]; + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_s16 + #define vuzp1q_s16(a, b) simde_vuzp1q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vuzp1q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a, b); + return t.val[0]; + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0x88)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_s32 + #define vuzp1q_s32(a, b) simde_vuzp1q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vuzp1q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i))); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_s64 + #define vuzp1q_s64(a, b) simde_vuzp1q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vuzp1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16x2_t t = vuzpq_u8(a, b); + return t.val[0]; + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_u8 + #define vuzp1q_u8(a, b) simde_vuzp1q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vuzp1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8x2_t t = vuzpq_u16(a, b); + return t.val[0]; + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_u16 + #define vuzp1q_u16(a, b) simde_vuzp1q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vuzp1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4x2_t t = vuzpq_u32(a, b); + return t.val[0]; + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 2, 4, 6); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0x88)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 2, 4, 6); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_u32 + #define vuzp1q_u32(a, b) simde_vuzp1q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vuzp1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp1q_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* _mm_movelh_ps?!?! SSE is weird. */ + r_.m128i = _mm_castps_si128(_mm_movelh_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i))); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_u64 + #define vuzp1q_u64(a, b) simde_vuzp1q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_UZP1_H) */ +/* :: End simde/arm/neon/uzp1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/uzp2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_UZP2_H) +#define SIMDE_ARM_NEON_UZP2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vuzp2_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2x2_t t = vuzp_f32(a, b); + return t.val[1]; + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_f32 + #define vuzp2_f32(a, b) simde_vuzp2_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vuzp2_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t t = vuzp_s8(a, b); + return t.val[1]; + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_s8 + #define vuzp2_s8(a, b) simde_vuzp2_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vuzp2_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a, b); + return t.val[1]; + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_s16 + #define vuzp2_s16(a, b) simde_vuzp2_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vuzp2_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a, b); + return t.val[1]; + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_s32 + #define vuzp2_s32(a, b) simde_vuzp2_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vuzp2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8x2_t t = vuzp_u8(a, b); + return t.val[1]; + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_u8 + #define vuzp2_u8(a, b) simde_vuzp2_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vuzp2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4x2_t t = vuzp_u16(a, b); + return t.val[1]; + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 1, 3, 5, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_u16 + #define vuzp2_u16(a, b) simde_vuzp2_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vuzp2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2x2_t t = vuzp_u32(a, b); + return t.val[1]; + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2_u32 + #define vuzp2_u32(a, b) simde_vuzp2_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vuzp2q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a, b); + return t.val[1]; + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); + #elif defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_shuffle_ps(a_.m128, b_.m128, 0xdd); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_f32 + #define vuzp2q_f32(a, b) simde_vuzp2q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vuzp2q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_unpackhi_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_f64 + #define vuzp2q_f64(a, b) simde_vuzp2q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vuzp2q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16x2_t t = vuzpq_s8(a, b); + return t.val[1]; + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_s8 + #define vuzp2q_s8(a, b) simde_vuzp2q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vuzp2q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a, b); + return t.val[1]; + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_s16 + #define vuzp2q_s16(a, b) simde_vuzp2q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vuzp2q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a, b); + return t.val[1]; + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0xdd)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_s32 + #define vuzp2q_s32(a, b) simde_vuzp2q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vuzp2q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_s64 + #define vuzp2q_s64(a, b) simde_vuzp2q_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vuzp2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16x2_t t = vuzpq_u8(a, b); + return t.val[1]; + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_u8 + #define vuzp2q_u8(a, b) simde_vuzp2q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vuzp2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8x2_t t = vuzpq_u16(a, b); + return t.val[1]; + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_u16 + #define vuzp2q_u16(a, b) simde_vuzp2q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vuzp2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4x2_t t = vuzpq_u32(a, b); + return t.val[1]; + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 1, 3, 5, 7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(a_.m128i), _mm_castsi128_ps(b_.m128i), 0xdd)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 1, 3, 5, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_u32 + #define vuzp2q_u32(a, b) simde_vuzp2q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vuzp2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuzp2q_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_u64 + #define vuzp2q_u64(a, b) simde_vuzp2q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_UZP2_H) */ +/* :: End simde/arm/neon/uzp2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/get_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_GET_LANE_H) +#define SIMDE_ARM_NEON_GET_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vget_lane_f32(simde_float32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vget_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); + #else + simde_float32x2_private v_ = simde_float32x2_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_f32 + #define vget_lane_f32(v, lane) simde_vget_lane_f32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vget_lane_f64(simde_float64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + return vget_lane_f64(v, 0); + #else + simde_float64x1_private v_ = simde_float64x1_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vget_lane_f64 + #define vget_lane_f64(v, lane) simde_vget_lane_f64((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vget_lane_s8(simde_int8x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vget_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); + #else + simde_int8x8_private v_ = simde_int8x8_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_s8 + #define vget_lane_s8(v, lane) simde_vget_lane_s8((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vget_lane_s16(simde_int16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vget_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); + #else + simde_int16x4_private v_ = simde_int16x4_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_s16 + #define vget_lane_s16(v, lane) simde_vget_lane_s16((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vget_lane_s32(simde_int32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vget_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); + #else + simde_int32x2_private v_ = simde_int32x2_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_s32 + #define vget_lane_s32(v, lane) simde_vget_lane_s32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vget_lane_s64(simde_int64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + return vget_lane_s64(v, 0); + #else + simde_int64x1_private v_ = simde_int64x1_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_s64 + #define vget_lane_s64(v, lane) simde_vget_lane_s64((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vget_lane_u8(simde_uint8x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vget_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); + #else + simde_uint8x8_private v_ = simde_uint8x8_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_u8 + #define vget_lane_u8(v, lane) simde_vget_lane_u8((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vget_lane_u16(simde_uint16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vget_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); + #else + simde_uint16x4_private v_ = simde_uint16x4_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_u16 + #define vget_lane_u16(v, lane) simde_vget_lane_u16((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vget_lane_u32(simde_uint32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vget_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); + #else + simde_uint32x2_private v_ = simde_uint32x2_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_u32 + #define vget_lane_u32(v, lane) simde_vget_lane_u32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vget_lane_u64(simde_uint64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + uint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + return vget_lane_u64(v, 0); + #else + simde_uint64x1_private v_ = simde_uint64x1_to_private(v); + + r = v_.values[lane]; + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vget_lane_u64 + #define vget_lane_u64(v, lane) simde_vget_lane_u64((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vgetq_lane_f32(simde_float32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vgetq_lane_f32, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v); + #else + simde_float32x4_private v_ = simde_float32x4_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_CONSTIFY_4_(wasm_f32x4_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT32_C(0.0)), lane, v_.v128); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_f32 + #define vgetq_lane_f32(v, lane) simde_vgetq_lane_f32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vgetq_lane_f64(simde_float64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_(vgetq_lane_f64, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v); + #else + simde_float64x2_private v_ = simde_float64x2_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_CONSTIFY_2_(wasm_f64x2_extract_lane, r, (HEDLEY_UNREACHABLE(), SIMDE_FLOAT64_C(0.0)), lane, v_.v128); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_f64 + #define vgetq_lane_f64(v, lane) simde_vgetq_lane_f64((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vgetq_lane_s8(simde_int8x16_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_(vgetq_lane_s8, r, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v); + #else + simde_int8x16_private v_ = simde_int8x16_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int r_; + SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT8_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(int8_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_s8 + #define vgetq_lane_s8(v, lane) simde_vgetq_lane_s8((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vgetq_lane_s16(simde_int16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vgetq_lane_s16, r, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v); + #else + simde_int16x8_private v_ = simde_int16x8_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int r_; + SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT16_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(int16_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_s16 + #define vgetq_lane_s16(v, lane) simde_vgetq_lane_s16((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vgetq_lane_s32(simde_int32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vgetq_lane_s32, r, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v); + #else + simde_int32x4_private v_ = simde_int32x4_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int r_; + SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT32_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(int32_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_s32 + #define vgetq_lane_s32(v, lane) simde_vgetq_lane_s32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vgetq_lane_s64(simde_int64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vgetq_lane_s64, r, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v); + #else + simde_int64x2_private v_ = simde_int64x2_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int64_t r_; + SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), INT64_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(int64_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_s64 + #define vgetq_lane_s64(v, lane) simde_vgetq_lane_s64((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vgetq_lane_u8(simde_uint8x16_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_(vgetq_lane_u8, r, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v); + #else + simde_uint8x16_private v_ = simde_uint8x16_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int r_; + SIMDE_CONSTIFY_16_(wasm_i8x16_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT8_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(uint8_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_u8 + #define vgetq_lane_u8(v, lane) simde_vgetq_lane_u8((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vgetq_lane_u16(simde_uint16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vgetq_lane_u16, r, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v); + #else + simde_uint16x8_private v_ = simde_uint16x8_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int r_; + SIMDE_CONSTIFY_8_(wasm_i16x8_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT16_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(uint16_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_u16 + #define vgetq_lane_u16(v, lane) simde_vgetq_lane_u16((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vgetq_lane_u32(simde_uint32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vgetq_lane_u32, r, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v); + #else + simde_uint32x4_private v_ = simde_uint32x4_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int32_t r_; + SIMDE_CONSTIFY_4_(wasm_i32x4_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT32_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(uint32_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_u32 + #define vgetq_lane_u32(v, lane) simde_vgetq_lane_u32((v), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vgetq_lane_u64(simde_uint64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + uint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vgetq_lane_u64, r, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v); + #else + simde_uint64x2_private v_ = simde_uint64x2_to_private(v); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + int64_t r_; + SIMDE_CONSTIFY_2_(wasm_i64x2_extract_lane, r_, (HEDLEY_UNREACHABLE(), UINT64_C(0)), lane, v_.v128); + r = HEDLEY_STATIC_CAST(uint64_t, r_); + #else + r = v_.values[lane]; + #endif + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vgetq_lane_u64 + #define vgetq_lane_u64(v, lane) simde_vgetq_lane_u64((v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_GET_LANE_H) */ +/* :: End simde/arm/neon/get_lane.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vpaddd_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddd_s64(a); + #else + return simde_vaddd_s64(simde_vgetq_lane_s64(a, 0), simde_vgetq_lane_s64(a, 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpaddd_s64 + #define vpaddd_s64(a) simde_vpaddd_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vpaddd_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddd_u64(a); + #else + return simde_vaddd_u64(simde_vgetq_lane_u64(a, 0), simde_vgetq_lane_u64(a, 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpaddd_u64 + #define vpaddd_u64(a) simde_vpaddd_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vpaddd_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddd_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + return a_.values[0] + a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpaddd_f64 + #define vpaddd_f64(a) simde_vpaddd_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vpadds_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpadds_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + return a_.values[0] + a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpadds_f32 + #define vpadds_f32(a) simde_vpadds_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vpadd_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) + return vpadd_f32(a, b); + #else + return simde_vadd_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_f32 + #define vpadd_f32(a, b) simde_vpadd_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vpadd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_s8(a, b); + #else + return simde_vadd_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_s8 + #define vpadd_s8(a, b) simde_vpadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vpadd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_s16(a, b); + #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return simde_int16x4_from_m64(_mm_hadd_pi16(simde_int16x4_to_m64(a), simde_int16x4_to_m64(b))); + #else + return simde_vadd_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_s16 + #define vpadd_s16(a, b) simde_vpadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vpadd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_s32(a, b); + #elif defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return simde_int32x2_from_m64(_mm_hadd_pi32(simde_int32x2_to_m64(a), simde_int32x2_to_m64(b))); + #else + return simde_vadd_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_s32 + #define vpadd_s32(a, b) simde_vpadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vpadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_u8(a, b); + #else + return simde_vadd_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_u8 + #define vpadd_u8(a, b) simde_vpadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vpadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_u16(a, b); + #else + return simde_vadd_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_u16 + #define vpadd_u16(a, b) simde_vpadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vpadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadd_u32(a, b); + #else + return simde_vadd_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadd_u32 + #define vpadd_u32(a, b) simde_vpadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vpaddq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_f32(a, b); + #elif defined(SIMDE_X86_SSE3_NATIVE) + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE3_NATIVE) + r_.m128 = _mm_hadd_ps(a_.m128, b_.m128); + #endif + + return simde_float32x4_from_private(r_); + #else + return simde_vaddq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_f32 + #define vpaddq_f32(a, b) simde_vpaddq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vpaddq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_f64(a, b); + #elif defined(SIMDE_X86_SSE3_NATIVE) + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE3_NATIVE) + r_.m128d = _mm_hadd_pd(a_.m128d, b_.m128d); + #endif + + return simde_float64x2_from_private(r_); + #else + return simde_vaddq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpaddq_f64 + #define vpaddq_f64(a, b) simde_vpaddq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vpaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_s8(a, b); + #else + return simde_vaddq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_s8 + #define vpaddq_s8(a, b) simde_vpaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vpaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_s16(a, b); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_hadd_epi16(a_.m128i, b_.m128i); + #endif + + return simde_int16x8_from_private(r_); + #else + return simde_vaddq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_s16 + #define vpaddq_s16(a, b) simde_vpaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vpaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_s32(a, b); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_hadd_epi32(a_.m128i, b_.m128i); + #endif + + return simde_int32x4_from_private(r_); + #else + return simde_vaddq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_s32 + #define vpaddq_s32(a, b) simde_vpaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vpaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_s64(a, b); + #else + return simde_vaddq_s64(simde_vuzp1q_s64(a, b), simde_vuzp2q_s64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_s64 + #define vpaddq_s64(a, b) simde_vpaddq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vpaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_u8(a, b); + #else + return simde_vaddq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_u8 + #define vpaddq_u8(a, b) simde_vpaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vpaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_u16(a, b); + #else + return simde_vaddq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_u16 + #define vpaddq_u16(a, b) simde_vpaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vpaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_u32(a, b); + #else + return simde_vaddq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_u32 + #define vpaddq_u32(a, b) simde_vpaddq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vpaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpaddq_u64(a, b); + #else + return simde_vaddq_u64(simde_vuzp1q_u64(a, b), simde_vuzp2q_u64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddq_u64 + #define vpaddq_u64(a, b) simde_vpaddq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_PADD_H) */ +/* :: End simde/arm/neon/padd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/shl_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_SHL_N_H) +#define SIMDE_ARM_NEON_SHL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vshld_n_s64 (const int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + return HEDLEY_STATIC_CAST(int64_t, a << n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vshld_n_s64(a, n) vshld_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshld_n_s64 + #define vshld_n_s64(a, n) simde_vshld_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vshld_n_u64 (const uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + return HEDLEY_STATIC_CAST(uint64_t, a << n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vshld_n_u64(a, n) vshld_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshld_n_u64 + #define vshld_n_u64(a, n) simde_vshld_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vshl_n_s8 (const simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); + } + #endif + + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_s8(a, n) vshl_n_s8((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_s8(a, n) \ + simde_int8x8_from_m64(_mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64(simde_int8x8_to_m64(a), (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_s8 + #define vshl_n_s8(a, n) simde_vshl_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vshl_n_s16 (const simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); + } + #endif + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_s16(a, n) vshl_n_s16((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_s16(a, n) simde_int16x4_from_m64(_mm_slli_pi16(simde_int16x4_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_s16 + #define vshl_n_s16(a, n) simde_vshl_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vshl_n_s32 (const simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); + } + #endif + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_s32(a, n) vshl_n_s32((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_s32(a, n) simde_int32x2_from_m64(_mm_slli_pi32(simde_int32x2_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_s32 + #define vshl_n_s32(a, n) simde_vshl_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vshl_n_s64 (const simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); + } + #endif + + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_s64(a, n) vshl_n_s64((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_s64(a, n) simde_int64x1_from_m64(_mm_slli_si64(simde_int64x1_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_s64 + #define vshl_n_s64(a, n) simde_vshl_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vshl_n_u8 (const simde_uint8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); + } + #endif + + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_u8(a, n) vshl_n_u8((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_u8(a, n) \ + simde_uint8x8_from_m64(_mm_andnot_si64(_mm_set1_pi8((1 << n) - 1), _mm_slli_si64(simde_uint8x8_to_m64(a), (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_u8 + #define vshl_n_u8(a, n) simde_vshl_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vshl_n_u16 (const simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); + } + #endif + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_u16(a, n) vshl_n_u16((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_u16(a, n) simde_uint16x4_from_m64(_mm_slli_pi16(simde_uint16x4_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_u16 + #define vshl_n_u16(a, n) simde_vshl_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vshl_n_u32 (const simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); + } + #endif + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_u32(a, n) vshl_n_u32((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_u32(a, n) simde_uint32x2_from_m64(_mm_slli_pi32(simde_uint32x2_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_u32 + #define vshl_n_u32(a, n) simde_vshl_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vshl_n_u64 (const simde_uint64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); + } + #endif + + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshl_n_u64(a, n) vshl_n_u64((a), (n)) +#elif defined(SIMDE_X86_MMX_NATIVE) + #define simde_vshl_n_u64(a, n) simde_uint64x1_from_m64(_mm_slli_si64(simde_uint64x1_to_m64(a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_n_u64 + #define vshl_n_u64(a, n) simde_vshl_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vshlq_n_s8 (const simde_int8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a_.m128i, n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(int8_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, a_.values[i] << n); + } + #endif + + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_s8(a, n) vshlq_n_s8((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_s8(a, n) (vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned char, int, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_s8 + #define vshlq_n_s8(a, n) simde_vshlq_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vshlq_n_s16 (const simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi16(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(int16_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] << n); + } + #endif + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_s16(a, n) vshlq_n_s16((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_s16(a, n) (vec_sl((a), vec_splats(SIMDE_CHECKED_STATIC_CAST(unsigned short, int, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_s16 + #define vshlq_n_s16(a, n) simde_vshlq_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vshlq_n_s32 (const simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi32(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i] << n); + } + #endif + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_s32(a, n) vshlq_n_s32((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_s32(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_s32 + #define vshlq_n_s32(a, n) simde_vshlq_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vshlq_n_s64 (const simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi64(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i] << n); + } + #endif + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_s64(a, n) vshlq_n_s64((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + #define simde_vshlq_n_s64(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_s64 + #define vshlq_n_s64(a, n) simde_vshlq_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vshlq_n_u8 (const simde_uint8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + #if defined(SIMDE_X86_GFNI_NATIVE) + /* https://wunkolo.github.io/post/2020/11/gf2p8affineqb-int8-shifting/ */ + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(INT64_C(0x0102040810204080) >> (n * 8)), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_andnot_si128(_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << n) - 1)), _mm_slli_epi64(a_.m128i, (n))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(uint8_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); + } + #endif + + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_u8(a, n) vshlq_n_u8((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_u8(a, n) (vec_sl((a), vec_splat_u8(n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_u8 + #define vshlq_n_u8(a, n) simde_vshlq_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vshlq_n_u16 (const simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi16(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << HEDLEY_STATIC_CAST(uint16_t, n); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); + } + #endif + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_u16(a, n) vshlq_n_u16((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_u16(a, n) (vec_sl((a), vec_splat_u16(n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_u16 + #define vshlq_n_u16(a, n) simde_vshlq_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vshlq_n_u32 (const simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi32(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); + } + #endif + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_u32(a, n) vshlq_n_u32((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_vshlq_n_u32(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned int, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_u32 + #define vshlq_n_u32(a, n) simde_vshlq_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vshlq_n_u64 (const simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_slli_epi64(a_.m128i, (n)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values << n; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); + } + #endif + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshlq_n_u64(a, n) vshlq_n_u64((a), (n)) +#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + #define simde_vshlq_n_u64(a, n) (vec_sl((a), vec_splats(HEDLEY_STATIC_CAST(unsigned long long, (n))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_n_u64 + #define vshlq_n_u64(a, n) simde_vshlq_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SHL_N_H) */ +/* :: End simde/arm/neon/shl_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vpaddl_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_s8(a); + #else + simde_int16x8_t tmp = simde_vmovl_s8(a); + return simde_vpadd_s16(simde_vget_low_s16(tmp), simde_vget_high_s16(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_s8 + #define vpaddl_s8(a) simde_vpaddl_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vpaddl_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_s16(a); + #else + simde_int32x4_t tmp = simde_vmovl_s16(a); + return simde_vpadd_s32(simde_vget_low_s32(tmp), simde_vget_high_s32(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_s16 + #define vpaddl_s16(a) simde_vpaddl_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vpaddl_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_s32(a); + #else + simde_int64x2_t tmp = simde_vmovl_s32(a); + return simde_vadd_s64(simde_vget_low_s64(tmp), simde_vget_high_s64(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_s32 + #define vpaddl_s32(a) simde_vpaddl_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vpaddl_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_u8(a); + #else + simde_uint16x8_t tmp = simde_vmovl_u8(a); + return simde_vpadd_u16(simde_vget_low_u16(tmp), simde_vget_high_u16(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_u8 + #define vpaddl_u8(a) simde_vpaddl_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vpaddl_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_u16(a); + #else + simde_uint32x4_t tmp = simde_vmovl_u16(a); + return simde_vpadd_u32(simde_vget_low_u32(tmp), simde_vget_high_u32(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_u16 + #define vpaddl_u16(a) simde_vpaddl_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vpaddl_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddl_u32(a); + #else + simde_uint64x2_t tmp = simde_vmovl_u32(a); + return simde_vadd_u64(simde_vget_low_u64(tmp), simde_vget_high_u64(tmp)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddl_u32 + #define vpaddl_u32(a) simde_vpaddl_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vpaddlq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #elif \ + defined(SIMDE_X86_XOP_NATIVE) || \ + defined(SIMDE_X86_SSSE3_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_int16x8_private r_; + + #if defined(SIMDE_X86_XOP_NATIVE) + r_.m128i = _mm_haddw_epi8(a_.m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_extadd_pairwise_i8x16(a_.v128); + #endif + + return simde_int16x8_from_private(r_); + #else + simde_int16x8_t lo = simde_vshrq_n_s16(simde_vshlq_n_s16(simde_vreinterpretq_s16_s8(a), 8), 8); + simde_int16x8_t hi = simde_vshrq_n_s16(simde_vreinterpretq_s16_s8(a), 8); + return simde_vaddq_s16(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_s8 + #define vpaddlq_s8(a) simde_vpaddlq_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vpaddlq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #elif \ + defined(SIMDE_X86_XOP_NATIVE) || \ + defined(SIMDE_X86_SSE2_NATIVE) + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int32x4_private r_; + + #if defined(SIMDE_X86_XOP_NATIVE) + r_.m128i = _mm_haddd_epi16(a_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_madd_epi16(a_.m128i, _mm_set1_epi16(INT8_C(1))); + #endif + + return simde_int32x4_from_private(r_); + #else + simde_int32x4_t lo = simde_vshrq_n_s32(simde_vshlq_n_s32(simde_vreinterpretq_s32_s16(a), 16), 16); + simde_int32x4_t hi = simde_vshrq_n_s32(simde_vreinterpretq_s32_s16(a), 16); + return simde_vaddq_s32(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_s16 + #define vpaddlq_s16(a) simde_vpaddlq_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vpaddlq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(int) one = vec_splat_s32(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #else + simde_int64x2_t lo = simde_vshrq_n_s64(simde_vshlq_n_s64(simde_vreinterpretq_s64_s32(a), 32), 32); + simde_int64x2_t hi = simde_vshrq_n_s64(simde_vreinterpretq_s64_s32(a), 32); + return simde_vaddq_s64(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_s32 + #define vpaddlq_s32(a) simde_vpaddlq_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vpaddlq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_u8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #elif \ + defined(SIMDE_X86_XOP_NATIVE) || \ + defined(SIMDE_X86_SSSE3_NATIVE) + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_X86_XOP_NATIVE) + r_.m128i = _mm_haddw_epu8(a_.m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_maddubs_epi16(a_.m128i, _mm_set1_epi8(INT8_C(1))); + #endif + + return simde_uint16x8_from_private(r_); + #else + simde_uint16x8_t lo = simde_vshrq_n_u16(simde_vshlq_n_u16(simde_vreinterpretq_u16_u8(a), 8), 8); + simde_uint16x8_t hi = simde_vshrq_n_u16(simde_vreinterpretq_u16_u8(a), 8); + return simde_vaddq_u16(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_u8 + #define vpaddlq_u8(a) simde_vpaddlq_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vpaddlq_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_u16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #elif \ + defined(SIMDE_X86_XOP_NATIVE) || \ + defined(SIMDE_X86_SSSE3_NATIVE) + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddd_epu16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = + _mm_add_epi32( + _mm_srli_epi32(a_.m128i, 16), + _mm_and_si128(a_.m128i, _mm_set1_epi32(INT32_C(0x0000ffff))) + ); + #endif + + return simde_uint32x4_from_private(r_); + #else + simde_uint32x4_t lo = simde_vshrq_n_u32(simde_vshlq_n_u32(simde_vreinterpretq_u32_u16(a), 16), 16); + simde_uint32x4_t hi = simde_vshrq_n_u32(simde_vreinterpretq_u32_u16(a), 16); + return simde_vaddq_u32(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_u16 + #define vpaddlq_u16(a) simde_vpaddlq_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vpaddlq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpaddlq_u32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) one = vec_splat_u32(1); + return + vec_add( + vec_mule(a, one), + vec_mulo(a, one) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint64x2_private r_; + + r_.m128i = + _mm_add_epi64( + _mm_srli_epi64(a_.m128i, 32), + _mm_and_si128(a_.m128i, _mm_set1_epi64x(INT64_C(0x00000000ffffffff))) + ); + + return simde_uint64x2_from_private(r_); + #else + simde_uint64x2_t lo = simde_vshrq_n_u64(simde_vshlq_n_u64(simde_vreinterpretq_u64_u32(a), 32), 32); + simde_uint64x2_t hi = simde_vshrq_n_u64(simde_vreinterpretq_u64_u32(a), 32); + return simde_vaddq_u64(lo, hi); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpaddlq_u32 + #define vpaddlq_u32(a) simde_vpaddlq_u32((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_PADDL_H */ +/* :: End simde/arm/neon/paddl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mull.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MULL_H) +#define SIMDE_ARM_NEON_MULL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mul.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MUL_H) +#define SIMDE_ARM_NEON_MUL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmul_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_f32 + #define vmul_f32(a, b) simde_vmul_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmul_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmul_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_f64 + #define vmul_f64(a, b) simde_vmul_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmul_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_s8 + #define vmul_s8(a, b) simde_vmul_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmul_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _m_pmullw(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_s16 + #define vmul_s16(a, b) simde_vmul_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmul_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_s32 + #define vmul_s32(a, b) simde_vmul_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_x_vmul_s64(simde_int64x1_t a, simde_int64x1_t b) { + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmul_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_u8 + #define vmul_u8(a, b) simde_vmul_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmul_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_u16 + #define vmul_u16(a, b) simde_vmul_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmul_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_u32 + #define vmul_u32(a, b) simde_vmul_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_x_vmul_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_f32(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_mul_ps(a_.m128, b_.m128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_mul(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_f32 + #define vmulq_f32(a, b) simde_vmulq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmulq_f64(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_mul_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_mul(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_f64 + #define vmulq_f64(a, b) simde_vmulq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmulq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mul(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/29155682/501126 */ + const __m128i dst_even = _mm_mullo_epi16(a_.m128i, b_.m128i); + r_.m128i = + _mm_or_si128( + _mm_slli_epi16( + _mm_mullo_epi16( + _mm_srli_epi16(a_.m128i, 8), + _mm_srli_epi16(b_.m128i, 8) + ), + 8 + ), + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_and_si128(dst_even, _mm_set1_epi16(0xFF)) + #else + _mm_srli_epi16( + _mm_slli_epi16(dst_even, 8), + 8 + ) + #endif + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_s8 + #define vmulq_s8(a, b) simde_vmulq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_mullo_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_s16 + #define vmulq_s16(a, b) simde_vmulq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_mul(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_s32 + #define vmulq_s32(a, b) simde_vmulq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_x_vmulq_s64(simde_int64x2_t a, simde_int64x2_t b) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_mul(a_.v128, b_.v128); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.m128i = _mm_mullo_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values * b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmulq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_u8(a, b); + #else + return + simde_vreinterpretq_u8_s8( + simde_vmulq_s8( + simde_vreinterpretq_s8_u8(a), + simde_vreinterpretq_s8_u8(b) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_u8 + #define vmulq_u8(a, b) simde_vmulq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_u16(a, b); + #else + return + simde_vreinterpretq_u16_s16( + simde_vmulq_s16( + simde_vreinterpretq_s16_u16(a), + simde_vreinterpretq_s16_u16(b) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_u16 + #define vmulq_u16(a, b) simde_vmulq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_u32(a, b); + #else + return + simde_vreinterpretq_u32_s32( + simde_vmulq_s32( + simde_vreinterpretq_s32_u32(a), + simde_vreinterpretq_s32_u32(b) + ) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_u32 + #define vmulq_u32(a, b) simde_vmulq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_x_vmulq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + return + simde_vreinterpretq_u64_s64( + simde_x_vmulq_s64( + simde_vreinterpretq_s64_u64(a), + simde_vreinterpretq_s64_u64(b) + ) + ); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MUL_H) */ +/* :: End simde/arm/neon/mul.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmull_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_s16(simde_vmovl_s8(a), simde_vmovl_s8(b)); + #else + simde_int16x8_private r_; + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i]) * HEDLEY_STATIC_CAST(int16_t, b_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_s8 + #define vmull_s8(a, b) simde_vmull_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmull_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_s32(simde_vmovl_s16(a), simde_vmovl_s16(b)); + #else + simde_int32x4_private r_; + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_s16 + #define vmull_s16(a, b) simde_vmull_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmull_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_s32(a, b); + #else + simde_int64x2_private r_; + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_s32 + #define vmull_s32(a, b) simde_vmull_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmull_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_u16(simde_vmovl_u8(a), simde_vmovl_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_u8 + #define vmull_u8(a, b) simde_vmull_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmull_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_u32(simde_vmovl_u16(a), simde_vmovl_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_u16 + #define vmull_u16(a, b) simde_vmull_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmull_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_u32(a, b); + #else + simde_uint64x2_private r_; + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.values) av, bv; + SIMDE_CONVERT_VECTOR_(av, a_.values); + SIMDE_CONVERT_VECTOR_(bv, b_.values); + r_.values = av * bv; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_u32 + #define vmull_u32(a, b) simde_vmull_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ +/* :: End simde/arm/neon/mull.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vdot_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + return vdot_s32(r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_vadd_s32(r, simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(a, b))))); + #else + simde_int32x2_private r_; + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + for (int i = 0 ; i < 2 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); + } + r_.values[i] = acc; + } + return simde_vadd_s32(r, simde_int32x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_s32 + #define vdot_s32(r, a, b) simde_vdot_s32((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vdot_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + return vdot_u32(r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_vadd_u32(r, simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(a, b))))); + #else + simde_uint32x2_private r_; + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + for (int i = 0 ; i < 2 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); + } + r_.values[i] = acc; + } + return simde_vadd_u32(r, simde_uint32x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_u32 + #define vdot_u32(r, a, b) simde_vdot_u32((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdotq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + return vdotq_s32(r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_vaddq_s32(r, + simde_vcombine_s32(simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_low_s8(a), simde_vget_low_s8(b))))), + simde_vmovn_s64(simde_vpaddlq_s32(simde_vpaddlq_s16(simde_vmull_s8(simde_vget_high_s8(a), simde_vget_high_s8(b))))))); + #else + simde_int32x4_private r_; + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + for (int i = 0 ; i < 4 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx]); + } + r_.values[i] = acc; + } + return simde_vaddq_s32(r, simde_int32x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_s32 + #define vdotq_s32(r, a, b) simde_vdotq_s32((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdotq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + return vdotq_u32(r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_vaddq_u32(r, + simde_vcombine_u32(simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_low_u8(a), simde_vget_low_u8(b))))), + simde_vmovn_u64(simde_vpaddlq_u32(simde_vpaddlq_u16(simde_vmull_u8(simde_vget_high_u8(a), simde_vget_high_u8(b))))))); + #else + simde_uint32x4_private r_; + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + for (int i = 0 ; i < 4 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx]); + } + r_.values[i] = acc; + } + return simde_vaddq_u32(r, simde_uint32x4_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_u32 + #define vdotq_u32(r, a, b) simde_vdotq_u32((r), (a), (b)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_DOT_H) */ +/* :: End simde/arm/neon/dot.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/dot_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_DOT_LANE_H) +#define SIMDE_ARM_NEON_DOT_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/dup_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_DUP_LANE_H) +#define SIMDE_ARM_NEON_DUP_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vdups_lane_s32(simde_int32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_int32x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_lane_s32(vec, lane) vdups_lane_s32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_lane_s32 + #define vdups_lane_s32(vec, lane) simde_vdups_lane_s32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vdups_lane_u32(simde_uint32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_uint32x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_lane_u32(vec, lane) vdups_lane_u32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_lane_u32 + #define vdups_lane_u32(vec, lane) simde_vdups_lane_u32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vdups_lane_f32(simde_float32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_float32x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_lane_f32(vec, lane) vdups_lane_f32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_lane_f32 + #define vdups_lane_f32(vec, lane) simde_vdups_lane_f32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vdups_laneq_s32(simde_int32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_int32x4_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_laneq_s32(vec, lane) vdups_laneq_s32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_laneq_s32 + #define vdups_laneq_s32(vec, lane) simde_vdups_laneq_s32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vdups_laneq_u32(simde_uint32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_uint32x4_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_laneq_u32(vec, lane) vdups_laneq_u32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_laneq_u32 + #define vdups_laneq_u32(vec, lane) simde_vdups_laneq_u32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vdups_laneq_f32(simde_float32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_float32x4_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdups_laneq_f32(vec, lane) vdups_laneq_f32(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdups_laneq_f32 + #define vdups_laneq_f32(vec, lane) simde_vdups_laneq_f32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vdupd_lane_s64(simde_int64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_int64x1_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_lane_s64(vec, lane) vdupd_lane_s64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_lane_s64 + #define vdupd_lane_s64(vec, lane) simde_vdupd_lane_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vdupd_lane_u64(simde_uint64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_uint64x1_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_lane_u64(vec, lane) vdupd_lane_u64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_lane_u64 + #define vdupd_lane_u64(vec, lane) simde_vdupd_lane_u64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vdupd_lane_f64(simde_float64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_float64x1_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_lane_f64(vec, lane) vdupd_lane_f64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_lane_f64 + #define vdupd_lane_f64(vec, lane) simde_vdupd_lane_f64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vdupd_laneq_s64(simde_int64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_int64x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_laneq_s64(vec, lane) vdupd_laneq_s64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_laneq_s64 + #define vdupd_laneq_s64(vec, lane) simde_vdupd_laneq_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vdupd_laneq_u64(simde_uint64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_uint64x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_laneq_u64(vec, lane) vdupd_laneq_u64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_laneq_u64 + #define vdupd_laneq_u64(vec, lane) simde_vdupd_laneq_u64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vdupd_laneq_f64(simde_float64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_float64x2_to_private(vec).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupd_laneq_f64(vec, lane) vdupd_laneq_f64(vec, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupd_laneq_f64 + #define vdupd_laneq_f64(vec, lane) simde_vdupd_laneq_f64((vec), (lane)) +#endif + +//simde_vdup_lane_f32 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_f32(vec, lane) vdup_lane_f32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_f32(vec, lane) (__extension__ ({ \ + simde_float32x2_private simde_vdup_lane_f32_vec_ = simde_float32x2_to_private(vec); \ + simde_float32x2_private simde_vdup_lane_f32_r_; \ + simde_vdup_lane_f32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 8, \ + simde_vdup_lane_f32_vec_.values, \ + simde_vdup_lane_f32_vec_.values, \ + lane, lane \ + ); \ + simde_float32x2_from_private(simde_vdup_lane_f32_r_); \ + })) +#else + #define simde_vdup_lane_f32(vec, lane) simde_vdup_n_f32(simde_vdups_lane_f32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_f32 + #define vdup_lane_f32(vec, lane) simde_vdup_lane_f32((vec), (lane)) +#endif + +//simde_vdup_lane_f64 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_lane_f64(vec, lane) vdup_lane_f64(vec, lane) +#else + #define simde_vdup_lane_f64(vec, lane) simde_vdup_n_f64(simde_vdupd_lane_f64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_f64 + #define vdup_lane_f64(vec, lane) simde_vdup_lane_f64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vdup_lane_s8(simde_int8x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdup_n_s8(simde_int8x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_s8(vec, lane) vdup_lane_s8(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_s8(vec, lane) (__extension__ ({ \ + simde_int8x8_private simde_vdup_lane_s8_vec_ = simde_int8x8_to_private(vec); \ + simde_int8x8_private simde_vdup_lane_s8_r_; \ + simde_vdup_lane_s8_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 8, 8, \ + simde_vdup_lane_s8_vec_.values, \ + simde_vdup_lane_s8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_int8x8_from_private(simde_vdup_lane_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_s8 + #define vdup_lane_s8(vec, lane) simde_vdup_lane_s8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vdup_lane_s16(simde_int16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdup_n_s16(simde_int16x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_s16(vec, lane) vdup_lane_s16(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_s16(vec, lane) (__extension__ ({ \ + simde_int16x4_private simde_vdup_lane_s16_vec_ = simde_int16x4_to_private(vec); \ + simde_int16x4_private simde_vdup_lane_s16_r_; \ + simde_vdup_lane_s16_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 8, \ + simde_vdup_lane_s16_vec_.values, \ + simde_vdup_lane_s16_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_int16x4_from_private(simde_vdup_lane_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_s16 + #define vdup_lane_s16(vec, lane) simde_vdup_lane_s16((vec), (lane)) +#endif + +//simde_vdup_lane_s32 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_s32(vec, lane) vdup_lane_s32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_s32(vec, lane) (__extension__ ({ \ + simde_int32x2_private simde_vdup_lane_s32_vec_ = simde_int32x2_to_private(vec); \ + simde_int32x2_private simde_vdup_lane_s32_r_; \ + simde_vdup_lane_s32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 8, \ + simde_vdup_lane_s32_vec_.values, \ + simde_vdup_lane_s32_vec_.values, \ + lane, lane \ + ); \ + simde_int32x2_from_private(simde_vdup_lane_s32_r_); \ + })) +#else + #define simde_vdup_lane_s32(vec, lane) simde_vdup_n_s32(simde_vdups_lane_s32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_s32 + #define vdup_lane_s32(vec, lane) simde_vdup_lane_s32((vec), (lane)) +#endif + +//simde_vdup_lane_s64 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_s64(vec, lane) vdup_lane_s64(vec, lane) +#else + #define simde_vdup_lane_s64(vec, lane) simde_vdup_n_s64(simde_vdupd_lane_s64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_s64 + #define vdup_lane_s64(vec, lane) simde_vdup_lane_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vdup_lane_u8(simde_uint8x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdup_n_u8(simde_uint8x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_u8(vec, lane) vdup_lane_u8(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_u8(vec, lane) (__extension__ ({ \ + simde_uint8x8_private simde_vdup_lane_u8_vec_ = simde_uint8x8_to_private(vec); \ + simde_uint8x8_private simde_vdup_lane_u8_r_; \ + simde_vdup_lane_u8_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 8, 8, \ + simde_vdup_lane_u8_vec_.values, \ + simde_vdup_lane_u8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_uint8x8_from_private(simde_vdup_lane_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_u8 + #define vdup_lane_u8(vec, lane) simde_vdup_lane_u8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vdup_lane_u16(simde_uint16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdup_n_u16(simde_uint16x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_u16(vec, lane) vdup_lane_u16(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_u16(vec, lane) (__extension__ ({ \ + simde_uint16x4_private simde_vdup_lane_u16_vec_ = simde_uint16x4_to_private(vec); \ + simde_uint16x4_private simde_vdup_lane_u16_r_; \ + simde_vdup_lane_u16_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 8, \ + simde_vdup_lane_u16_vec_.values, \ + simde_vdup_lane_u16_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_uint16x4_from_private(simde_vdup_lane_u16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_u16 + #define vdup_lane_u16(vec, lane) simde_vdup_lane_u16((vec), (lane)) +#endif + +//simde_vdup_lane_u32 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_u32(vec, lane) vdup_lane_u32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vdup_lane_u32(vec, lane) (__extension__ ({ \ + simde_uint32x2_private simde_vdup_lane_u32_vec_ = simde_uint32x2_to_private(vec); \ + simde_uint32x2_private simde_vdup_lane_u32_r_; \ + simde_vdup_lane_u32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 8, \ + simde_vdup_lane_u32_vec_.values, \ + simde_vdup_lane_u32_vec_.values, \ + lane, lane \ + ); \ + simde_uint32x2_from_private(simde_vdup_lane_u32_r_); \ + })) +#else + #define simde_vdup_lane_u32(vec, lane) simde_vdup_n_u32(simde_vdups_lane_u32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_u32 + #define vdup_lane_u32(vec, lane) simde_vdup_lane_u32((vec), (lane)) +#endif + +//simde_vdup_lane_u64 +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdup_lane_u64(vec, lane) vdup_lane_u64(vec, lane) +#else + #define simde_vdup_lane_u64(vec, lane) simde_vdup_n_u64(simde_vdupd_lane_u64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdup_lane_u64 + #define vdup_lane_u64(vec, lane) simde_vdup_lane_u64((vec), (lane)) +#endif + +//simde_vdup_laneq_f32 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_f32(vec, lane) vdup_laneq_f32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_f32(vec, lane) (__extension__ ({ \ + simde_float32x4_private simde_vdup_laneq_f32_vec_ = simde_float32x4_to_private(vec); \ + simde_float32x2_private simde_vdup_laneq_f32_r_; \ + simde_vdup_laneq_f32_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_f32_vec_.values, \ + simde_vdup_laneq_f32_vec_.values, \ + lane, lane \ + ); \ + simde_float32x2_from_private(simde_vdup_laneq_f32_r_); \ + })) +#else + #define simde_vdup_laneq_f32(vec, lane) simde_vdup_n_f32(simde_vdups_laneq_f32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_f32 + #define vdup_laneq_f32(vec, lane) simde_vdup_laneq_f32((vec), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_f64(vec, lane) vdup_laneq_f64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_f64(vec, lane) (__extension__ ({ \ + simde_float64x2_private simde_vdup_laneq_f64_vec_ = simde_float64x2_to_private(vec); \ + simde_float64x1_private simde_vdup_laneq_f64_r_; \ + simde_vdup_laneq_f64_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_f64_vec_.values, \ + simde_vdup_laneq_f64_vec_.values, \ + lane \ + ); \ + simde_float64x1_from_private(simde_vdup_laneq_f64_r_); \ + })) +#else + #define simde_vdup_laneq_f64(vec, lane) simde_vdup_n_f64(simde_vdupd_laneq_f64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_f64 + #define vdup_laneq_f64(vec, lane) simde_vdup_laneq_f64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vdup_laneq_s8(simde_int8x16_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + return simde_vdup_n_s8(simde_int8x16_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_s8(vec, lane) vdup_laneq_s8(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_s8(vec, lane) (__extension__ ({ \ + simde_int8x16_private simde_vdup_laneq_s8_vec_ = simde_int8x16_to_private(vec); \ + simde_int8x8_private simde_vdup_laneq_s8_r_; \ + simde_vdup_laneq_s8_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_s8_vec_.values, \ + simde_vdup_laneq_s8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_int8x8_from_private(simde_vdup_laneq_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_s8 + #define vdup_laneq_s8(vec, lane) simde_vdup_laneq_s8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vdup_laneq_s16(simde_int16x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdup_n_s16(simde_int16x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_s16(vec, lane) vdup_laneq_s16(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_s16(vec, lane) (__extension__ ({ \ + simde_int16x8_private simde_vdup_laneq_s16_vec_ = simde_int16x8_to_private(vec); \ + simde_int16x4_private simde_vdup_laneq_s16_r_; \ + simde_vdup_laneq_s16_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_s16_vec_.values, \ + simde_vdup_laneq_s16_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_int16x4_from_private(simde_vdup_laneq_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_s16 + #define vdup_laneq_s16(vec, lane) simde_vdup_laneq_s16((vec), (lane)) +#endif + +//simde_vdup_laneq_s32 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_s32(vec, lane) vdup_laneq_s32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_s32(vec, lane) (__extension__ ({ \ + simde_int32x4_private simde_vdup_laneq_s32_vec_ = simde_int32x4_to_private(vec); \ + simde_int32x2_private simde_vdup_laneq_s32_r_; \ + simde_vdup_laneq_s32_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_s32_vec_.values, \ + simde_vdup_laneq_s32_vec_.values, \ + lane, lane \ + ); \ + simde_int32x2_from_private(simde_vdup_laneq_s32_r_); \ + })) +#else + #define simde_vdup_laneq_s32(vec, lane) simde_vdup_n_s32(simde_vdups_laneq_s32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_s32 + #define vdup_laneq_s32(vec, lane) simde_vdup_laneq_s32((vec), (lane)) +#endif + +//simde_vdup_laneq_s64 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_s64(vec, lane) vdup_laneq_s64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_s64(vec, lane) (__extension__ ({ \ + simde_int64x2_private simde_vdup_laneq_s64_vec_ = simde_int64x2_to_private(vec); \ + simde_int64x1_private simde_vdup_laneq_s64_r_; \ + simde_vdup_laneq_s64_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_s64_vec_.values, \ + simde_vdup_laneq_s64_vec_.values, \ + lane \ + ); \ + simde_int64x1_from_private(simde_vdup_laneq_s64_r_); \ + })) +#else + #define simde_vdup_laneq_s64(vec, lane) simde_vdup_n_s64(simde_vdupd_laneq_s64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_s64 + #define vdup_laneq_s64(vec, lane) simde_vdup_laneq_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vdup_laneq_u8(simde_uint8x16_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + return simde_vdup_n_u8(simde_uint8x16_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_u8(vec, lane) vdup_laneq_u8(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_u8(vec, lane) (__extension__ ({ \ + simde_uint8x16_private simde_vdup_laneq_u8_vec_ = simde_uint8x16_to_private(vec); \ + simde_uint8x8_private simde_vdup_laneq_u8_r_; \ + simde_vdup_laneq_u8_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_u8_vec_.values, \ + simde_vdup_laneq_u8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_uint8x8_from_private(simde_vdup_laneq_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_u8 + #define vdup_laneq_u8(vec, lane) simde_vdup_laneq_u8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vdup_laneq_u16(simde_uint16x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdup_n_u16(simde_uint16x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_u16(vec, lane) vdup_laneq_u16(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_u16(vec, lane) (__extension__ ({ \ + simde_uint16x8_private simde_vdup_laneq_u16_vec_ = simde_uint16x8_to_private(vec); \ + simde_uint16x4_private simde_vdup_laneq_u16_r_; \ + simde_vdup_laneq_u16_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_u16_vec_.values, \ + simde_vdup_laneq_u16_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_uint16x4_from_private(simde_vdup_laneq_u16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_u16 + #define vdup_laneq_u16(vec, lane) simde_vdup_laneq_u16((vec), (lane)) +#endif + +//simde_vdup_laneq_u32 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_u32(vec, lane) vdup_laneq_u32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_u32(vec, lane) (__extension__ ({ \ + simde_uint32x4_private simde_vdup_laneq_u32_vec_ = simde_uint32x4_to_private(vec); \ + simde_uint32x2_private simde_vdup_laneq_u32_r_; \ + simde_vdup_laneq_u32_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_u32_vec_.values, \ + simde_vdup_laneq_u32_vec_.values, \ + lane, lane \ + ); \ + simde_uint32x2_from_private(simde_vdup_laneq_u32_r_); \ + })) +#else + #define simde_vdup_laneq_u32(vec, lane) simde_vdup_n_u32(simde_vdups_laneq_u32(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_u32 + #define vdup_laneq_u32(vec, lane) simde_vdup_laneq_u32((vec), (lane)) +#endif + +//simde_vdup_laneq_u64 +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdup_laneq_u64(vec, lane) vdup_laneq_u64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdup_laneq_u64(vec, lane) (__extension__ ({ \ + simde_uint64x2_private simde_vdup_laneq_u64_vec_ = simde_uint64x2_to_private(vec); \ + simde_uint64x1_private simde_vdup_laneq_u64_r_; \ + simde_vdup_laneq_u64_r_.values = \ + __builtin_shufflevector( \ + simde_vdup_laneq_u64_vec_.values, \ + simde_vdup_laneq_u64_vec_.values, \ + lane \ + ); \ + simde_uint64x1_from_private(simde_vdup_laneq_u64_r_); \ + })) +#else + #define simde_vdup_laneq_u64(vec, lane) simde_vdup_n_u64(simde_vdupd_laneq_u64(vec, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdup_laneq_u64 + #define vdup_laneq_u64(vec, lane) simde_vdup_laneq_u64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vdupq_lane_f32(simde_float32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_f32(simde_float32x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_f32(vec, lane) vdupq_lane_f32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_f32(vec, lane) (__extension__ ({ \ + simde_float32x2_private simde_vdupq_lane_f32_vec_ = simde_float32x2_to_private(vec); \ + simde_float32x4_private simde_vdupq_lane_f32_r_; \ + simde_vdupq_lane_f32_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_f32_vec_.values, \ + simde_vdupq_lane_f32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_float32x4_from_private(simde_vdupq_lane_f32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_f32 + #define vdupq_lane_f32(vec, lane) simde_vdupq_lane_f32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vdupq_lane_f64(simde_float64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_vdupq_n_f64(simde_float64x1_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_lane_f64(vec, lane) vdupq_lane_f64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_f64(vec, lane) (__extension__ ({ \ + simde_float64x1_private simde_vdupq_lane_f64_vec_ = simde_float64x1_to_private(vec); \ + simde_float64x2_private simde_vdupq_lane_f64_r_; \ + simde_vdupq_lane_f64_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_f64_vec_.values, \ + simde_vdupq_lane_f64_vec_.values, \ + lane, lane \ + ); \ + simde_float64x2_from_private(simde_vdupq_lane_f64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_f64 + #define vdupq_lane_f64(vec, lane) simde_vdupq_lane_f64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vdupq_lane_s8(simde_int8x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdupq_n_s8(simde_int8x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_s8(vec, lane) vdupq_lane_s8(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_s8(vec, lane) (__extension__ ({ \ + simde_int8x8_private simde_vdupq_lane_s8_vec_ = simde_int8x8_to_private(vec); \ + simde_int8x16_private simde_vdupq_lane_s8_r_; \ + simde_vdupq_lane_s8_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_s8_vec_.values, \ + simde_vdupq_lane_s8_vec_.values, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane \ + ); \ + simde_int8x16_from_private(simde_vdupq_lane_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_s8 + #define vdupq_lane_s8(vec, lane) simde_vdupq_lane_s8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vdupq_lane_s16(simde_int16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_s16(simde_int16x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_s16(vec, lane) vdupq_lane_s16(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_s16(vec, lane) (__extension__ ({ \ + simde_int16x4_private simde_vdupq_lane_s16_vec_ = simde_int16x4_to_private(vec); \ + simde_int16x8_private simde_vdupq_lane_s16_r_; \ + simde_vdupq_lane_s16_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_s16_vec_.values, \ + simde_vdupq_lane_s16_vec_.values, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane \ + ); \ + simde_int16x8_from_private(simde_vdupq_lane_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_s16 + #define vdupq_lane_s16(vec, lane) simde_vdupq_lane_s16((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdupq_lane_s32(simde_int32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_s32(simde_int32x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_s32(vec, lane) vdupq_lane_s32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_s32(vec, lane) (__extension__ ({ \ + simde_int32x2_private simde_vdupq_lane_s32_vec_ = simde_int32x2_to_private(vec); \ + simde_int32x4_private simde_vdupq_lane_s32_r_; \ + simde_vdupq_lane_s32_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_s32_vec_.values, \ + simde_vdupq_lane_s32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_int32x4_from_private(simde_vdupq_lane_s32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_s32 + #define vdupq_lane_s32(vec, lane) simde_vdupq_lane_s32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vdupq_lane_s64(simde_int64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_vdupq_n_s64(simde_int64x1_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_s64(vec, lane) vdupq_lane_s64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_s64(vec, lane) (__extension__ ({ \ + simde_int64x1_private simde_vdupq_lane_s64_vec_ = simde_int64x1_to_private(vec); \ + simde_int64x2_private simde_vdupq_lane_s64_r_; \ + simde_vdupq_lane_s64_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_s64_vec_.values, \ + simde_vdupq_lane_s64_vec_.values, \ + lane, lane \ + ); \ + simde_int64x2_from_private(simde_vdupq_lane_s64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_s64 + #define vdupq_lane_s64(vec, lane) simde_vdupq_lane_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vdupq_lane_u8(simde_uint8x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdupq_n_u8(simde_uint8x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_u8(vec, lane) vdupq_lane_u8(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_u8(vec, lane) (__extension__ ({ \ + simde_uint8x8_private simde_vdupq_lane_u8_vec_ = simde_uint8x8_to_private(vec); \ + simde_uint8x16_private simde_vdupq_lane_u8_r_; \ + simde_vdupq_lane_u8_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_u8_vec_.values, \ + simde_vdupq_lane_u8_vec_.values, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane \ + ); \ + simde_uint8x16_from_private(simde_vdupq_lane_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_u8 + #define vdupq_lane_u8(vec, lane) simde_vdupq_lane_u8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vdupq_lane_u16(simde_uint16x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_u16(simde_uint16x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_u16(vec, lane) vdupq_lane_u16(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_u16(vec, lane) (__extension__ ({ \ + simde_uint16x4_private simde_vdupq_lane_u16_vec_ = simde_uint16x4_to_private(vec); \ + simde_uint16x8_private simde_vdupq_lane_u16_r_; \ + simde_vdupq_lane_u16_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_u16_vec_.values, \ + simde_vdupq_lane_u16_vec_.values, \ + lane, lane, lane, lane, \ + lane, lane, lane, lane \ + ); \ + simde_uint16x8_from_private(simde_vdupq_lane_u16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_u16 + #define vdupq_lane_u16(vec, lane) simde_vdupq_lane_u16((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdupq_lane_u32(simde_uint32x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_u32(simde_uint32x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_u32(vec, lane) vdupq_lane_u32(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_u32(vec, lane) (__extension__ ({ \ + simde_uint32x2_private simde_vdupq_lane_u32_vec_ = simde_uint32x2_to_private(vec); \ + simde_uint32x4_private simde_vdupq_lane_u32_r_; \ + simde_vdupq_lane_u32_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_u32_vec_.values, \ + simde_vdupq_lane_u32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_uint32x4_from_private(simde_vdupq_lane_u32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_u32 + #define vdupq_lane_u32(vec, lane) simde_vdupq_lane_u32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vdupq_lane_u64(simde_uint64x1_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return simde_vdupq_n_u64(simde_uint64x1_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vdupq_lane_u64(vec, lane) vdupq_lane_u64(vec, lane) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vdupq_lane_u64(vec, lane) (__extension__ ({ \ + simde_uint64x1_private simde_vdupq_lane_u64_vec_ = simde_uint64x1_to_private(vec); \ + simde_uint64x2_private simde_vdupq_lane_u64_r_; \ + simde_vdupq_lane_u64_r_.values = \ + __builtin_shufflevector( \ + simde_vdupq_lane_u64_vec_.values, \ + simde_vdupq_lane_u64_vec_.values, \ + lane, lane \ + ); \ + simde_uint64x2_from_private(simde_vdupq_lane_u64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vdupq_lane_u64 + #define vdupq_lane_u64(vec, lane) simde_vdupq_lane_u64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vdupq_laneq_f32(simde_float32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_f32(simde_float32x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_f32(vec, lane) vdupq_laneq_f32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_f32(vec, lane) (__extension__ ({ \ + simde_float32x4_private simde_vdupq_laneq_f32_vec_ = simde_float32x4_to_private(vec); \ + simde_float32x4_private simde_vdupq_laneq_f32_r_; \ + simde_vdupq_laneq_f32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_vdupq_laneq_f32_vec_.values, \ + simde_vdupq_laneq_f32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_float32x4_from_private(simde_vdupq_laneq_f32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_f32 + #define vdupq_laneq_f32(vec, lane) simde_vdupq_laneq_f32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vdupq_laneq_f64(simde_float64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_f64(simde_float64x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_f64(vec, lane) vdupq_laneq_f64(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_f64(vec, lane) (__extension__ ({ \ + simde_float64x2_private simde_vdupq_laneq_f64_vec_ = simde_float64x2_to_private(vec); \ + simde_float64x2_private simde_vdupq_laneq_f64_r_; \ + simde_vdupq_laneq_f64_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_vdupq_laneq_f64_vec_.values, \ + simde_vdupq_laneq_f64_vec_.values, \ + lane, lane \ + ); \ + simde_float64x2_from_private(simde_vdupq_laneq_f64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_f64 + #define vdupq_laneq_f64(vec, lane) simde_vdupq_laneq_f64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vdupq_laneq_s8(simde_int8x16_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + return simde_vdupq_n_s8(simde_int8x16_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_s8(vec, lane) vdupq_laneq_s8(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_s8(vec, lane) (__extension__ ({ \ + simde_int8x16_private simde_vdupq_laneq_s8_vec_ = simde_int8x16_to_private(vec); \ + simde_int8x16_private simde_vdupq_laneq_s8_r_; \ + simde_vdupq_laneq_s8_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 8, 16, \ + simde_vdupq_laneq_s8_vec_.values, \ + simde_vdupq_laneq_s8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_int8x16_from_private(simde_vdupq_laneq_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_s8 + #define vdupq_laneq_s8(vec, lane) simde_vdupq_laneq_s8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vdupq_laneq_s16(simde_int16x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdupq_n_s16(simde_int16x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_s16(vec, lane) vdupq_laneq_s16(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_s16(vec, lane) (__extension__ ({ \ + simde_int16x8_private simde_vdupq_laneq_s16_vec_ = simde_int16x8_to_private(vec); \ + simde_int16x8_private simde_vdupq_laneq_s16_r_; \ + simde_vdupq_laneq_s16_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_vdupq_laneq_s16_vec_.values, \ + simde_vdupq_laneq_s16_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_int16x8_from_private(simde_vdupq_laneq_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_s16 + #define vdupq_laneq_s16(vec, lane) simde_vdupq_laneq_s16((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdupq_laneq_s32(simde_int32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_s32(simde_int32x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_s32(vec, lane) vdupq_laneq_s32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_s32(vec, lane) (__extension__ ({ \ + simde_int32x4_private simde_vdupq_laneq_s32_vec_ = simde_int32x4_to_private(vec); \ + simde_int32x4_private simde_vdupq_laneq_s32_r_; \ + simde_vdupq_laneq_s32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_vdupq_laneq_s32_vec_.values, \ + simde_vdupq_laneq_s32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_int32x4_from_private(simde_vdupq_laneq_s32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_s32 + #define vdupq_laneq_s32(vec, lane) simde_vdupq_laneq_s32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vdupq_laneq_s64(simde_int64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_s64(simde_int64x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_s64(vec, lane) vdupq_laneq_s64(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_s64(vec, lane) (__extension__ ({ \ + simde_int64x2_private simde_vdupq_laneq_s64_vec_ = simde_int64x2_to_private(vec); \ + simde_int64x2_private simde_vdupq_laneq_s64_r_; \ + simde_vdupq_laneq_s64_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_vdupq_laneq_s64_vec_.values, \ + simde_vdupq_laneq_s64_vec_.values, \ + lane, lane \ + ); \ + simde_int64x2_from_private(simde_vdupq_laneq_s64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_s64 + #define vdupq_laneq_s64(vec, lane) simde_vdupq_laneq_s64((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vdupq_laneq_u8(simde_uint8x16_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + return simde_vdupq_n_u8(simde_uint8x16_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_u8(vec, lane) vdupq_laneq_u8(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_u8(vec, lane) (__extension__ ({ \ + simde_uint8x16_private simde_vdupq_laneq_u8_vec_ = simde_uint8x16_to_private(vec); \ + simde_uint8x16_private simde_vdupq_laneq_u8_r_; \ + simde_vdupq_laneq_u8_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 8, 16, \ + simde_vdupq_laneq_u8_vec_.values, \ + simde_vdupq_laneq_u8_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_uint8x16_from_private(simde_vdupq_laneq_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_u8 + #define vdupq_laneq_u8(vec, lane) simde_vdupq_laneq_u8((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vdupq_laneq_u16(simde_uint16x8_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + return simde_vdupq_n_u16(simde_uint16x8_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_u16(vec, lane) vdupq_laneq_u16(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_u16(vec, lane) (__extension__ ({ \ + simde_uint16x8_private simde_vdupq_laneq_u16_vec_ = simde_uint16x8_to_private(vec); \ + simde_uint16x8_private simde_vdupq_laneq_u16_r_; \ + simde_vdupq_laneq_u16_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_vdupq_laneq_u16_vec_.values, \ + simde_vdupq_laneq_u16_vec_.values, \ + lane, lane, lane, lane, lane, lane, lane, lane \ + ); \ + simde_uint16x8_from_private(simde_vdupq_laneq_u16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_u16 + #define vdupq_laneq_u16(vec, lane) simde_vdupq_laneq_u16((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdupq_laneq_u32(simde_uint32x4_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return simde_vdupq_n_u32(simde_uint32x4_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_u32(vec, lane) vdupq_laneq_u32(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_u32(vec, lane) (__extension__ ({ \ + simde_uint32x4_private simde_vdupq_laneq_u32_vec_ = simde_uint32x4_to_private(vec); \ + simde_uint32x4_private simde_vdupq_laneq_u32_r_; \ + simde_vdupq_laneq_u32_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_vdupq_laneq_u32_vec_.values, \ + simde_vdupq_laneq_u32_vec_.values, \ + lane, lane, lane, lane \ + ); \ + simde_uint32x4_from_private(simde_vdupq_laneq_u32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_u32 + #define vdupq_laneq_u32(vec, lane) simde_vdupq_laneq_u32((vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vdupq_laneq_u64(simde_uint64x2_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return simde_vdupq_n_u64(simde_uint64x2_to_private(vec).values[lane]); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vdupq_laneq_u64(vec, lane) vdupq_laneq_u64(vec, lane) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_vdupq_laneq_u64(vec, lane) (__extension__ ({ \ + simde_uint64x2_private simde_vdupq_laneq_u64_vec_ = simde_uint64x2_to_private(vec); \ + simde_uint64x2_private simde_vdupq_laneq_u64_r_; \ + simde_vdupq_laneq_u64_r_.values = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_vdupq_laneq_u64_vec_.values, \ + simde_vdupq_laneq_u64_vec_.values, \ + lane, lane \ + ); \ + simde_uint64x2_from_private(simde_vdupq_laneq_u64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vdupq_laneq_u64 + #define vdupq_laneq_u64(vec, lane) simde_vdupq_laneq_u64((vec), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_DUP_LANE_H) */ +/* :: End simde/arm/neon/dup_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vdot_lane_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_2_(vdot_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x2_t + b_lane, + b_32 = vreinterpret_s32_s8(b); + + SIMDE_CONSTIFY_2_(vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + result = + vadd_s32( + r, + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(a, vreinterpret_s8_s32(b_lane)) + ) + ) + ) + ); + #else + simde_int32x2_private r_ = simde_int32x2_to_private(r); + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + for (int i = 0 ; i < 2 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_int32x2_from_private(r_); + #endif + + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_lane_s32 + #define vdot_lane_s32(r, a, b, lane) simde_vdot_lane_s32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vdot_lane_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_2_(vdot_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x2_t + b_lane, + b_32 = vreinterpret_u32_u8(b); + + SIMDE_CONSTIFY_2_(vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + result = + vadd_u32( + r, + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(a, vreinterpret_u8_u32(b_lane)) + ) + ) + ) + ); + #else + simde_uint32x2_private r_ = simde_uint32x2_to_private(r); + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + for (int i = 0 ; i < 2 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_uint32x2_from_private(r_); + #endif + + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_lane_u32 + #define vdot_lane_u32(r, a, b, lane) simde_vdot_lane_u32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vdot_laneq_s32(simde_int32x2_t r, simde_int8x8_t a, simde_int8x16_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x2_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_4_(vdot_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x2_t b_lane; + simde_int32x4_t b_32 = vreinterpretq_s32_s8(b); + + SIMDE_CONSTIFY_4_(simde_vdup_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + result = + vadd_s32( + r, + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(a, vreinterpret_s8_s32(b_lane)) + ) + ) + ) + ); + #else + simde_int32x2_private r_ = simde_int32x2_to_private(r); + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + for (int i = 0 ; i < 2 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_int32x2_from_private(r_); + #endif + + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_laneq_s32 + #define vdot_laneq_s32(r, a, b, lane) simde_vdot_laneq_s32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vdot_laneq_u32(simde_uint32x2_t r, simde_uint8x8_t a, simde_uint8x16_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x2_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_4_(vdot_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x2_t b_lane; + simde_uint32x4_t b_32 = vreinterpretq_u32_u8(b); + + SIMDE_CONSTIFY_4_(simde_vdup_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + result = + vadd_u32( + r, + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(a, vreinterpret_u8_u32(b_lane)) + ) + ) + ) + ); + #else + simde_uint32x2_private r_ = simde_uint32x2_to_private(r); + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + for (int i = 0 ; i < 2 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for (int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_uint32x2_from_private(r_); + #endif + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdot_laneq_u32 + #define vdot_laneq_u32(r, a, b, lane) simde_vdot_laneq_u32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdotq_laneq_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x16_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_4_(vdotq_laneq_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x4_t + b_lane, + b_32 = vreinterpretq_u32_u8(b); + SIMDE_CONSTIFY_4_(simde_vdupq_laneq_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + + result = + vcombine_u32( + vadd_u32( + vget_low_u32(r), + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(vget_low_u8(a), vget_low_u8(vreinterpretq_u8_u32(b_lane))) + ) + ) + ) + ), + vadd_u32( + vget_high_u32(r), + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(vget_high_u8(a), vget_high_u8(vreinterpretq_u8_u32(b_lane))) + ) + ) + ) + ) + ); + #else + simde_uint32x4_private r_ = simde_uint32x4_to_private(r); + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + for(int i = 0 ; i < 4 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for(int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_uint32x4_from_private(r_); + #endif + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_laneq_u32 + #define vdotq_laneq_u32(r, a, b, lane) simde_vdotq_laneq_u32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdotq_laneq_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x16_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_4_(vdotq_laneq_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x4_t + b_lane, + b_32 = vreinterpretq_s32_s8(b); + SIMDE_CONSTIFY_4_(simde_vdupq_laneq_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + + result = + vcombine_s32( + vadd_s32( + vget_low_s32(r), + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(vget_low_s8(a), vget_low_s8(vreinterpretq_s8_s32(b_lane))) + ) + ) + ) + ), + vadd_s32( + vget_high_s32(r), + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(vget_high_s8(a), vget_high_s8(vreinterpretq_s8_s32(b_lane))) + ) + ) + ) + ) + ); + #else + simde_int32x4_private r_ = simde_int32x4_to_private(r); + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + for(int i = 0 ; i < 4 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for(int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_int32x4_from_private(r_); + #endif + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_laneq_s32 + #define vdotq_laneq_s32(r, a, b, lane) simde_vdotq_laneq_s32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vdotq_lane_u32(simde_uint32x4_t r, simde_uint8x16_t a, simde_uint8x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x4_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_2_(vdotq_lane_u32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x2_t + b_lane, + b_32 = vreinterpret_u32_u8(b); + SIMDE_CONSTIFY_2_(simde_vdup_lane_u32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + + result = + vcombine_u32( + vadd_u32( + vget_low_u32(r), + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(vget_low_u8(a), vreinterpret_u8_u32(b_lane)) + ) + ) + ) + ), + vadd_u32( + vget_high_u32(r), + vmovn_u64( + vpaddlq_u32( + vpaddlq_u16( + vmull_u8(vget_high_u8(a), vreinterpret_u8_u32(b_lane)) + ) + ) + ) + ) + ); + #else + simde_uint32x4_private r_ = simde_uint32x4_to_private(r); + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + for(int i = 0 ; i < 4 ; i++) { + uint32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for(int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(uint32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(uint32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_uint32x4_from_private(r_); + #endif + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_lane_u32 + #define vdotq_lane_u32(r, a, b, lane) simde_vdotq_lane_u32((r), (a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vdotq_lane_s32(simde_int32x4_t r, simde_int8x16_t a, simde_int8x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x4_t result; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_DOTPROD) + SIMDE_CONSTIFY_2_(vdotq_lane_s32, result, (HEDLEY_UNREACHABLE(), result), lane, r, a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x2_t + b_lane, + b_32 = vreinterpret_s32_s8(b); + SIMDE_CONSTIFY_2_(simde_vdup_lane_s32, b_lane, (HEDLEY_UNREACHABLE(), b_lane), lane, b_32); + + result = + vcombine_s32( + vadd_s32( + vget_low_s32(r), + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(vget_low_s8(a), vreinterpret_s8_s32(b_lane)) + ) + ) + ) + ), + vadd_s32( + vget_high_s32(r), + vmovn_s64( + vpaddlq_s32( + vpaddlq_s16( + vmull_s8(vget_high_s8(a), vreinterpret_s8_s32(b_lane)) + ) + ) + ) + ) + ); + #else + simde_int32x4_private r_ = simde_int32x4_to_private(r); + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + for(int i = 0 ; i < 4 ; i++) { + int32_t acc = 0; + SIMDE_VECTORIZE_REDUCTION(+:acc) + for(int j = 0 ; j < 4 ; j++) { + const int idx_b = j + (lane << 2); + const int idx_a = j + (i << 2); + acc += HEDLEY_STATIC_CAST(int32_t, a_.values[idx_a]) * HEDLEY_STATIC_CAST(int32_t, b_.values[idx_b]); + } + r_.values[i] += acc; + } + + result = simde_int32x4_from_private(r_); + #endif + return result; +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_DOTPROD)) + #undef vdotq_lane_s32 + #define vdotq_lane_s32(r, a, b, lane) simde_vdotq_lane_s32((r), (a), (b), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_DOT_LANE_H) */ +/* :: End simde/arm/neon/dot_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ext.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_EXT_H) +#define SIMDE_ARM_NEON_EXT_H +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vext_f32(simde_float32x2_t a, simde_float32x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_float32x2_t r; + SIMDE_CONSTIFY_2_(vext_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_float32x2_private + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_f32(a, b, n) simde_float32x2_from_m64(_mm_alignr_pi8(simde_float32x2_to_m64(b), simde_float32x2_to_m64(a), n * sizeof(simde_float32))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_f32(a, b, n) (__extension__ ({ \ + simde_float32x2_private simde_vext_f32_r_; \ + simde_vext_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_float32x2_to_private(a).values, simde_float32x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_float32x2_from_private(simde_vext_f32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_f32 + #define vext_f32(a, b, n) simde_vext_f32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vext_f64(simde_float64x1_t a, simde_float64x1_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) n; + return vext_f64(a, b, 0); + #else + simde_float64x1_private + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; + } + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_f64(a, b, n) simde_float64x1_from_m64(_mm_alignr_pi8(simde_float64x1_to_m64(b), simde_float64x1_to_m64(a), n * sizeof(simde_float64))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vext_f64(a, b, n) (__extension__ ({ \ + simde_float64x1_private simde_vext_f64_r_; \ + simde_vext_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_float64x1_to_private(a).values, simde_float64x1_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, (n))); \ + simde_float64x1_from_private(simde_vext_f64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vext_f64 + #define vext_f64(a, b, n) simde_vext_f64((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vext_s8(simde_int8x8_t a, simde_int8x8_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int8x8_t r; + SIMDE_CONSTIFY_8_(vext_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + } + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_s8(a, b, n) simde_int8x8_from_m64(_mm_alignr_pi8(simde_int8x8_to_m64(b), simde_int8x8_to_m64(a), n * sizeof(int8_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_s8(a, b, n) (__extension__ ({ \ + simde_int8x8_private simde_vext_s8_r_; \ + simde_vext_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_int8x8_to_private(a).values, simde_int8x8_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ + simde_int8x8_from_private(simde_vext_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_s8 + #define vext_s8(a, b, n) simde_vext_s8((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vext_s16(simde_int16x4_t a, simde_int16x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int16x4_t r; + SIMDE_CONSTIFY_4_(vext_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_s16(a, b, n) simde_int16x4_from_m64(_mm_alignr_pi8(simde_int16x4_to_m64(b), simde_int16x4_to_m64(a), n * sizeof(int16_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_s16(a, b, n) (__extension__ ({ \ + simde_int16x4_private simde_vext_s16_r_; \ + simde_vext_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_int16x4_to_private(a).values, simde_int16x4_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_int16x4_from_private(simde_vext_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_s16 + #define vext_s16(a, b, n) simde_vext_s16((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vext_s32(simde_int32x2_t a, simde_int32x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x2_t r; + SIMDE_CONSTIFY_2_(vext_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_s32(a, b, n) simde_int32x2_from_m64(_mm_alignr_pi8(simde_int32x2_to_m64(b), simde_int32x2_to_m64(a), n * sizeof(int32_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_s32(a, b, n) (__extension__ ({ \ + simde_int32x2_private simde_vext_s32_r_; \ + simde_vext_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_int32x2_to_private(a).values, simde_int32x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_int32x2_from_private(simde_vext_s32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_s32 + #define vext_s32(a, b, n) simde_vext_s32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vext_s64(simde_int64x1_t a, simde_int64x1_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) n; + return vext_s64(a, b, 0); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; + } + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_s64(a, b, n) simde_int64x1_from_m64(_mm_alignr_pi8(simde_int64x1_to_m64(b), simde_int64x1_to_m64(a), n * sizeof(int64_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vext_s64(a, b, n) (__extension__ ({ \ + simde_int64x1_private simde_vext_s64_r_; \ + simde_vext_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_int64x1_to_private(a).values, simde_int64x1_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \ + simde_int64x1_from_private(simde_vext_s64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_s64 + #define vext_s64(a, b, n) simde_vext_s64((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vext_u8(simde_uint8x8_t a, simde_uint8x8_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint8x8_t r; + SIMDE_CONSTIFY_8_(vext_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + } + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_u8(a, b, n) simde_uint8x8_from_m64(_mm_alignr_pi8(simde_uint8x8_to_m64(b), simde_uint8x8_to_m64(a), n * sizeof(uint8_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_u8(a, b, n) (__extension__ ({ \ + simde_uint8x8_private simde_vext_u8_r_; \ + simde_vext_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, simde_uint8x8_to_private(a).values, simde_uint8x8_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ + simde_uint8x8_from_private(simde_vext_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_u8 + #define vext_u8(a, b, n) simde_vext_u8((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vext_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint16x4_t r; + SIMDE_CONSTIFY_4_(vext_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_u16(a, b, n) simde_uint16x4_from_m64(_mm_alignr_pi8(simde_uint16x4_to_m64(b), simde_uint16x4_to_m64(a), n * sizeof(uint16_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_u16(a, b, n) (__extension__ ({ \ + simde_uint16x4_private simde_vext_u16_r_; \ + simde_vext_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, simde_uint16x4_to_private(a).values, simde_uint16x4_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_uint16x4_from_private(simde_vext_u16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_u16 + #define vext_u16(a, b, n) simde_vext_u16((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vext_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x2_t r; + SIMDE_CONSTIFY_2_(vext_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_u32(a, b, n) simde_uint32x2_from_m64(_mm_alignr_pi8(simde_uint32x2_to_m64(b), simde_uint32x2_to_m64(a), n * sizeof(uint32_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) && !defined(SIMDE_BUG_GCC_100760) + #define simde_vext_u32(a, b, n) (__extension__ ({ \ + simde_uint32x2_private simde_vext_u32_r_; \ + simde_vext_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, simde_uint32x2_to_private(a).values, simde_uint32x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_uint32x2_from_private(simde_vext_u32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_u32 + #define vext_u32(a, b, n) simde_vext_u32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vext_u64(simde_uint64x1_t a, simde_uint64x1_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) n; + return vext_u64(a, b, 0); + #else + simde_uint64x1_private + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 0]; + } + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vext_u64(a, b, n) simde_uint64x1_from_m64(_mm_alignr_pi8(simde_uint64x1_to_m64(b), simde_uint64x1_to_m64(a), n * sizeof(uint64_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vext_u64(a, b, n) (__extension__ ({ \ + simde_uint64x1_private simde_vext_u64_r_; \ + simde_vext_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 8, simde_uint64x1_to_private(a).values, simde_uint64x1_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0))); \ + simde_uint64x1_from_private(simde_vext_u64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vext_u64 + #define vext_u64(a, b, n) simde_vext_u64((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vextq_f32(simde_float32x4_t a, simde_float32x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_float32x4_t r; + SIMDE_CONSTIFY_4_(vextq_f32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_float32x4_private + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_f32(a, b, n) simde_float32x4_from_m128(_mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(simde_float32x4_to_m128(b)), _mm_castps_si128(simde_float32x4_to_m128(a)), (n) * sizeof(simde_float32)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_f32(a, b, n) (__extension__ ({ \ + simde_float32x4_private simde_vextq_f32_r_; \ + simde_vextq_f32_r_.v128 = wasm_i32x4_shuffle(simde_float32x4_to_private(a).v128, simde_float32x4_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_float32x4_from_private(simde_vextq_f32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_f32(a, b, n) (__extension__ ({ \ + simde_float32x4_private simde_vextq_f32_r_; \ + simde_vextq_f32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_float32x4_to_private(a).values, simde_float32x4_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_float32x4_from_private(simde_vextq_f32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_f32 + #define vextq_f32(a, b, n) simde_vextq_f32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vextq_f64(simde_float64x2_t a, simde_float64x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + simde_float64x2_t r; + SIMDE_CONSTIFY_2_(vextq_f64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_float64x2_private + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_f64(a, b, n) simde_float64x2_from_m128d(_mm_castsi128_pd(_mm_alignr_epi8(_mm_castpd_si128(simde_float64x2_to_m128d(b)), _mm_castpd_si128(simde_float64x2_to_m128d(a)), (n) * sizeof(simde_float64)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_f64(a, b, n) (__extension__ ({ \ + simde_float64x2_private simde_vextq_f64_r_; \ + simde_vextq_f64_r_.v128 = wasm_i64x2_shuffle(simde_float64x2_to_private(a).v128, simde_float64x2_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_float64x2_from_private(simde_vextq_f64_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_f64(a, b, n) (__extension__ ({ \ + simde_float64x2_private simde_vextq_f64_r_; \ + simde_vextq_f64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_float64x2_to_private(a).values, simde_float64x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_float64x2_from_private(simde_vextq_f64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vextq_f64 + #define vextq_f64(a, b, n) simde_vextq_f64((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vextq_s8(simde_int8x16_t a, simde_int8x16_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int8x16_t r; + SIMDE_CONSTIFY_16_(vextq_s8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; + } + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_s8(a, b, n) simde_int8x16_from_m128i(_mm_alignr_epi8(simde_int8x16_to_m128i(b), simde_int8x16_to_m128i(a), n * sizeof(int8_t))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_s8(a, b, n) (__extension__ ({ \ + simde_int8x16_private simde_vextq_s8_r_; \ + simde_vextq_s8_r_.v128 = wasm_i8x16_shuffle(simde_int8x16_to_private(a).v128, simde_int8x16_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ + simde_int8x16_from_private(simde_vextq_s8_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_s8(a, b, n) (__extension__ ({ \ + simde_int8x16_private simde_vextq_s8_r_; \ + simde_vextq_s8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_int8x16_to_private(a).values, simde_int8x16_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ + simde_int8x16_from_private(simde_vextq_s8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_s8 + #define vextq_s8(a, b, n) simde_vextq_s8((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vextq_s16(simde_int16x8_t a, simde_int16x8_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int16x8_t r; + SIMDE_CONSTIFY_8_(vextq_s16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + } + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_s16(a, b, n) simde_int16x8_from_m128i(_mm_alignr_epi8(simde_int16x8_to_m128i(b), simde_int16x8_to_m128i(a), n * sizeof(int16_t))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_s16(a, b, n) (__extension__ ({ \ + simde_int16x8_private simde_vextq_s16_r_; \ + simde_vextq_s16_r_.v128 = wasm_i16x8_shuffle(simde_int16x8_to_private(a).v128, simde_int16x8_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ + simde_int16x8_from_private(simde_vextq_s16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_s16(a, b, n) (__extension__ ({ \ + simde_int16x8_private simde_vextq_s16_r_; \ + simde_vextq_s16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_int16x8_to_private(a).values, simde_int16x8_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ + simde_int16x8_from_private(simde_vextq_s16_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_s16 + #define vextq_s16(a, b, n) simde_vextq_s16((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vextq_s32(simde_int32x4_t a, simde_int32x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int32x4_t r; + SIMDE_CONSTIFY_4_(vextq_s32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_s32(a, b, n) simde_int32x4_from_m128i(_mm_alignr_epi8(simde_int32x4_to_m128i(b), simde_int32x4_to_m128i(a), n * sizeof(int32_t))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_s32(a, b, n) (__extension__ ({ \ + simde_int32x4_private simde_vextq_s32_r_; \ + simde_vextq_s32_r_.v128 = wasm_i32x4_shuffle(simde_int32x4_to_private(a).v128, simde_int32x4_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_int32x4_from_private(simde_vextq_s32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_s32(a, b, n) (__extension__ ({ \ + simde_int32x4_private simde_vextq_s32_r_; \ + simde_vextq_s32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_int32x4_to_private(a).values, simde_int32x4_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_int32x4_from_private(simde_vextq_s32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_s32 + #define vextq_s32(a, b, n) simde_vextq_s32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vextq_s64(simde_int64x2_t a, simde_int64x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_int64x2_t r; + SIMDE_CONSTIFY_2_(vextq_s64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_s64(a, b, n) simde_int64x2_from_m128i(_mm_alignr_epi8(simde_int64x2_to_m128i(b), simde_int64x2_to_m128i(a), n * sizeof(int64_t))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_vextq_s64(a, b, n) (__extension__ ({ \ + simde_int64x2_private simde_vextq_s64_r_; \ + simde_vextq_s64_r_.v128 = wasm_i64x2_shuffle(simde_int64x2_to_private(a).v128, simde_int64x2_to_private(b).v128, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_int64x2_from_private(simde_vextq_s64_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_s64(a, b, n) (__extension__ ({ \ + simde_int64x2_private simde_vextq_s64_r_; \ + simde_vextq_s64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_int64x2_to_private(a).values, simde_int64x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_int64x2_from_private(simde_vextq_s64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_s64 + #define vextq_s64(a, b, n) simde_vextq_s64((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vextq_u8(simde_uint8x16_t a, simde_uint8x16_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint8x16_t r; + SIMDE_CONSTIFY_16_(vextq_u8, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 15]; + } + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_u8(a, b, n) simde_uint8x16_from_m128i(_mm_alignr_epi8(simde_uint8x16_to_m128i(b), simde_uint8x16_to_m128i(a), n * sizeof(uint8_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_u8(a, b, n) (__extension__ ({ \ + simde_uint8x16_private simde_vextq_u8_r_; \ + simde_vextq_u8_r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, simde_uint8x16_to_private(a).values, simde_uint8x16_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 8)), HEDLEY_STATIC_CAST(int8_t, ((n) + 9)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 10)), HEDLEY_STATIC_CAST(int8_t, ((n) + 11)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 12)), HEDLEY_STATIC_CAST(int8_t, ((n) + 13)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 14)), HEDLEY_STATIC_CAST(int8_t, ((n) + 15))); \ + simde_uint8x16_from_private(simde_vextq_u8_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_u8 + #define vextq_u8(a, b, n) simde_vextq_u8((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vextq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint16x8_t r; + SIMDE_CONSTIFY_8_(vextq_u16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint16x8_private + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 7]; + } + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_u16(a, b, n) simde_uint16x8_from_m128i(_mm_alignr_epi8(simde_uint16x8_to_m128i(b), simde_uint16x8_to_m128i(a), n * sizeof(uint16_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_u16(a, b, n) (__extension__ ({ \ + simde_uint16x8_private simde_vextq_u16_r_; \ + simde_vextq_u16_r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, simde_uint16x8_to_private(a).values, simde_uint16x8_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 4)), HEDLEY_STATIC_CAST(int8_t, ((n) + 5)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 6)), HEDLEY_STATIC_CAST(int8_t, ((n) + 7))); \ + simde_uint16x8_from_private(simde_vextq_u16_r_); \ + })) +#elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + #define simde_vextq_u16(a, b, n) (__extension__ ({ \ + simde_uint16x8_private r_; \ + r_.values = __builtin_shufflevector( \ + simde_uint16x8_to_private(a).values, \ + simde_uint16x8_to_private(b).values, \ + n + 0, n + 1, n + 2, n + 3, n + 4, n + 5, n + 6, n + 7); \ + simde_uint16x8_from_private(r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_u16 + #define vextq_u16(a, b, n) simde_vextq_u16((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vextq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint32x4_t r; + SIMDE_CONSTIFY_4_(vextq_u32, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 3]; + } + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_u32(a, b, n) simde_uint32x4_from_m128i(_mm_alignr_epi8(simde_uint32x4_to_m128i(b), simde_uint32x4_to_m128i(a), n * sizeof(uint32_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_u32(a, b, n) (__extension__ ({ \ + simde_uint32x4_private simde_vextq_u32_r_; \ + simde_vextq_u32_r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, simde_uint32x4_to_private(a).values, simde_uint32x4_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1)), \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 2)), HEDLEY_STATIC_CAST(int8_t, ((n) + 3))); \ + simde_uint32x4_from_private(simde_vextq_u32_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_u32 + #define vextq_u32(a, b, n) simde_vextq_u32((a), (b), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vextq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde_uint64x2_t r; + SIMDE_CONSTIFY_2_(vextq_u64, r, (HEDLEY_UNREACHABLE(), a), n, a, b); + return r; + #else + simde_uint64x2_private + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b), + r_ = a_; + const size_t n_ = HEDLEY_STATIC_CAST(size_t, n); + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + size_t src = i + n_; + r_.values[i] = (src < (sizeof(r_.values) / sizeof(r_.values[0]))) ? a_.values[src] : b_.values[src & 1]; + } + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_BUG_GCC_SIZEOF_IMMEDIATE) + #define simde_vextq_u64(a, b, n) simde_uint64x2_from_m128i(_mm_alignr_epi8(simde_uint64x2_to_m128i(b), simde_uint64x2_to_m128i(a), n * sizeof(uint64_t))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_VEXT_REV32) + #define simde_vextq_u64(a, b, n) (__extension__ ({ \ + simde_uint64x2_private simde_vextq_u64_r_; \ + simde_vextq_u64_r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, simde_uint64x2_to_private(a).values, simde_uint64x2_to_private(b).values, \ + HEDLEY_STATIC_CAST(int8_t, ((n) + 0)), HEDLEY_STATIC_CAST(int8_t, ((n) + 1))); \ + simde_uint64x2_from_private(simde_vextq_u64_r_); \ + })) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vextq_u64 + #define vextq_u64(a, b, n) simde_vextq_u64((a), (b), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_EXT_H) */ +/* :: End simde/arm/neon/ext.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/fma.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_FMA_H) +#define SIMDE_ARM_NEON_FMA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + return vfma_f32(a, b, c); + #else + return simde_vadd_f32(a, simde_vmul_f32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfma_f32 + #define vfma_f32(a, b, c) simde_vfma_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + return vfma_f64(a, b, c); + #else + return simde_vadd_f64(a, simde_vmul_f64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfma_f64 + #define vfma_f64(a, b, c) simde_vfma_f64(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + return vfmaq_f32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_madd(b, c, a); + #elif \ + defined(SIMDE_X86_FMA_NATIVE) + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b), + c_ = simde_float32x4_to_private(c); + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128); + #endif + + return simde_float32x4_from_private(r_); + #else + return simde_vaddq_f32(a, simde_vmulq_f32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmaq_f32 + #define vfmaq_f32(a, b, c) simde_vfmaq_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vfmaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + return vfmaq_f64(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_madd(b, c, a); + #elif \ + defined(SIMDE_X86_FMA_NATIVE) + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b), + c_ = simde_float64x2_to_private(c); + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d); + #endif + + return simde_float64x2_from_private(r_); + #else + return simde_vaddq_f64(a, simde_vmulq_f64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmaq_f64 + #define vfmaq_f64(a, b, c) simde_vfmaq_f64(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ +/* :: End simde/arm/neon/fma.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/fma_lane.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Atharva Nimbalkar +*/ + +#if !defined(SIMDE_ARM_NEON_FMA_LANE_H) +#define SIMDE_ARM_NEON_FMA_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mul_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MUL_LANE_H) +#define SIMDE_ARM_NEON_MUL_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vmuld_lane_f64(simde_float64_t a, simde_float64x1_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + return a * simde_float64x1_to_private(b).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmuld_lane_f64(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_lane_f64(a, b, lane)) + #else + #define simde_vmuld_lane_f64(a, b, lane) vmuld_lane_f64((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmuld_lane_f64 + #define vmuld_lane_f64(a, b, lane) simde_vmuld_lane_f64(a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vmuld_laneq_f64(simde_float64_t a, simde_float64x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return a * simde_float64x2_to_private(b).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmuld_laneq_f64(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuld_laneq_f64(a, b, lane)) + #else + #define simde_vmuld_laneq_f64(a, b, lane) vmuld_laneq_f64((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmuld_laneq_f64 + #define vmuld_laneq_f64(a, b, lane) simde_vmuld_laneq_f64(a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vmuls_lane_f32(simde_float32_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + return a * simde_float32x2_to_private(b).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmuls_lane_f32(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_lane_f32(a, b, lane)) + #else + #define simde_vmuls_lane_f32(a, b, lane) vmuls_lane_f32((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmuls_lane_f32 + #define vmuls_lane_f32(a, b, lane) simde_vmuls_lane_f32(a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vmuls_laneq_f32(simde_float32_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + return a * simde_float32x4_to_private(b).values[lane]; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vmuls_laneq_f32(a, b, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmuls_laneq_f32(a, b, lane)) + #else + #define simde_vmuls_laneq_f32(a, b, lane) vmuls_laneq_f32((a), (b), (lane)) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmuls_laneq_f32 + #define vmuls_laneq_f32(a, b, lane) simde_vmuls_laneq_f32(a, b, lane) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmul_lane_f32(simde_float32x2_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_f32(a, b, lane) vmul_lane_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_f32 + #define vmul_lane_f32(a, b, lane) simde_vmul_lane_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmul_lane_f64(simde_float64x1_t a, simde_float64x1_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_lane_f64(a, b, lane) vmul_lane_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_f64 + #define vmul_lane_f64(a, b, lane) simde_vmul_lane_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmul_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_s16(a, b, lane) vmul_lane_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_s16 + #define vmul_lane_s16(a, b, lane) simde_vmul_lane_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmul_lane_s32(simde_int32x2_t a, simde_int32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_s32(a, b, lane) vmul_lane_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_s32 + #define vmul_lane_s32(a, b, lane) simde_vmul_lane_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmul_lane_u16(simde_uint16x4_t a, simde_uint16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_u16(a, b, lane) vmul_lane_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_u16 + #define vmul_lane_u16(a, b, lane) simde_vmul_lane_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmul_lane_u32(simde_uint32x2_t a, simde_uint32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmul_lane_u32(a, b, lane) vmul_lane_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_lane_u32 + #define vmul_lane_u32(a, b, lane) simde_vmul_lane_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmul_laneq_s16(simde_int16x4_t a, simde_int16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + simde_int16x8_private + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_s16(a, b, lane) vmul_laneq_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_s16 + #define vmul_laneq_s16(a, b, lane) simde_vmul_laneq_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmul_laneq_s32(simde_int32x2_t a, simde_int32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + simde_int32x4_private + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_s32(a, b, lane) vmul_laneq_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_s32 + #define vmul_laneq_s32(a, b, lane) simde_vmul_laneq_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmul_laneq_u16(simde_uint16x4_t a, simde_uint16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_uint16x8_private + b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_u16(a, b, lane) vmul_laneq_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_u16 + #define vmul_laneq_u16(a, b, lane) simde_vmul_laneq_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmul_laneq_u32(simde_uint32x2_t a, simde_uint32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_uint32x4_private + b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_u32(a, b, lane) vmul_laneq_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_u32 + #define vmul_laneq_u32(a, b, lane) simde_vmul_laneq_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_lane_f32(simde_float32x4_t a, simde_float32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + simde_float32x2_private b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_f32(a, b, lane) vmulq_lane_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_f32 + #define vmulq_lane_f32(a, b, lane) simde_vmulq_lane_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_lane_f64(simde_float64x2_t a, simde_float64x1_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + simde_float64x1_private b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_lane_f64(a, b, lane) vmulq_lane_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_f64 + #define vmulq_lane_f64(a, b, lane) simde_vmulq_lane_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_lane_s16(simde_int16x8_t a, simde_int16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_s16(a, b, lane) vmulq_lane_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_s16 + #define vmulq_lane_s16(a, b, lane) simde_vmulq_lane_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_lane_s32(simde_int32x4_t a, simde_int32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_s32(a, b, lane) vmulq_lane_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_s32 + #define vmulq_lane_s32(a, b, lane) simde_vmulq_lane_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_lane_u16(simde_uint16x8_t a, simde_uint16x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_u16(a, b, lane) vmulq_lane_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_u16 + #define vmulq_lane_u16(a, b, lane) simde_vmulq_lane_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_lane_u32(simde_uint32x4_t a, simde_uint32x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmulq_lane_u32(a, b, lane) vmulq_lane_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_lane_u32 + #define vmulq_lane_u32(a, b, lane) simde_vmulq_lane_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_laneq_f32(simde_float32x4_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_f32(a, b, lane) vmulq_laneq_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_f32 + #define vmulq_laneq_f32(a, b, lane) simde_vmulq_laneq_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_laneq_f64(simde_float64x2_t a, simde_float64x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_f64(a, b, lane) vmulq_laneq_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_f64 + #define vmulq_laneq_f64(a, b, lane) simde_vmulq_laneq_f64((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_laneq_s16(simde_int16x8_t a, simde_int16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_s16(a, b, lane) vmulq_laneq_s16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_s16 + #define vmulq_laneq_s16(a, b, lane) simde_vmulq_laneq_s16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_laneq_s32(simde_int32x4_t a, simde_int32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_s32(a, b, lane) vmulq_laneq_s32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_s32 + #define vmulq_laneq_s32(a, b, lane) simde_vmulq_laneq_s32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_laneq_u16(simde_uint16x8_t a, simde_uint16x8_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_u16(a, b, lane) vmulq_laneq_u16((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_u16 + #define vmulq_laneq_u16(a, b, lane) simde_vmulq_laneq_u16((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_laneq_u32(simde_uint32x4_t a, simde_uint32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmulq_laneq_u32(a, b, lane) vmulq_laneq_u32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_laneq_u32 + #define vmulq_laneq_u32(a, b, lane) simde_vmulq_laneq_u32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmul_laneq_f32(simde_float32x2_t a, simde_float32x4_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + simde_float32x4_private b_ = simde_float32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_f32(a, b, lane) vmul_laneq_f32((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_f32 + #define vmul_laneq_f32(a, b, lane) simde_vmul_laneq_f32((a), (b), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmul_laneq_f64(simde_float64x1_t a, simde_float64x2_t b, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + simde_float64x2_private b_ = simde_float64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] * b_.values[lane]; + } + + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmul_laneq_f64(a, b, lane) vmul_laneq_f64((a), (b), (lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_laneq_f64 + #define vmul_laneq_f64(a, b, lane) simde_vmul_laneq_f64((a), (b), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MUL_LANE_H) */ +/* :: End simde/arm/neon/mul_lane.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* simde_vfmad_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmad_lane_f64(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_lane_f64(a, b, v, lane)) + #else + #define simde_vfmad_lane_f64(a, b, v, lane) vfmad_lane_f64((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmad_lane_f64(a, b, v, lane) \ + simde_vget_lane_f64( \ + simde_vadd_f64( \ + simde_vdup_n_f64(a), \ + simde_vdup_n_f64(simde_vmuld_lane_f64(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmad_lane_f64 + #define vfmad_lane_f64(a, b, v, lane) simde_vfmad_lane_f64(a, b, v, lane) +#endif + +/* simde_vfmad_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmad_laneq_f64(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_laneq_f64(a, b, v, lane)) + #else + #define simde_vfmad_laneq_f64(a, b, v, lane) vfmad_laneq_f64((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmad_laneq_f64(a, b, v, lane) \ + simde_vget_lane_f64( \ + simde_vadd_f64( \ + simde_vdup_n_f64(a), \ + simde_vdup_n_f64(simde_vmuld_laneq_f64(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmad_laneq_f64 + #define vfmad_laneq_f64(a, b, v, lane) simde_vfmad_laneq_f64(a, b, v, lane) +#endif + +/* simde_vfmas_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmas_lane_f32(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_lane_f32(a, b, v, lane)) + #else + #define simde_vfmas_lane_f32(a, b, v, lane) vfmas_lane_f32((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmas_lane_f32(a, b, v, lane) \ + simde_vget_lane_f32( \ + simde_vadd_f32( \ + simde_vdup_n_f32(a), \ + simde_vdup_n_f32(simde_vmuls_lane_f32(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmas_lane_f32 + #define vfmas_lane_f32(a, b, v, lane) simde_vfmas_lane_f32(a, b, v, lane) +#endif + +/* simde_vfmas_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vfmas_laneq_f32(a, b, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_laneq_f32(a, b, v, lane)) + #else + #define simde_vfmas_laneq_f32(a, b, v, lane) vfmas_laneq_f32((a), (b), (v), (lane)) + #endif +#else + #define simde_vfmas_laneq_f32(a, b, v, lane) \ + simde_vget_lane_f32( \ + simde_vadd_f32( \ + simde_vdup_n_f32(a), \ + simde_vdup_n_f32(simde_vmuls_laneq_f32(b, v, lane)) \ + ), \ + 0 \ + ) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmas_laneq_f32 + #define vfmas_laneq_f32(a, b, v, lane) simde_vfmas_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfma_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane) +#else + #define simde_vfma_lane_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_lane_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_lane_f32 + #define vfma_lane_f32(a, b, v, lane) simde_vfma_lane_f32(a, b, v, lane) +#endif + +/* simde_vfma_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfma_lane_f64(a, b, v, lane) vfma_lane_f64((a), (b), (v), (lane)) +#else + #define simde_vfma_lane_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_lane_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_lane_f64 + #define vfma_lane_f64(a, b, v, lane) simde_vfma_lane_f64(a, b, v, lane) +#endif + +/* simde_vfma_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vfma_laneq_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_laneq_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_laneq_f32 + #define vfma_laneq_f32(a, b, v, lane) simde_vfma_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfma_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfma_laneq_f64(a, b, v, lane) vfma_laneq_f64((a), (b), (v), (lane)) +#else + #define simde_vfma_laneq_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_laneq_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfma_laneq_f64 + #define vfma_laneq_f64(a, b, v, lane) simde_vfma_laneq_f64(a, b, v, lane) +#endif + +/* simde_vfmaq_lane_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfmaq_lane_f64(a, b, v, lane) vfmaq_lane_f64((a), (b), (v), (lane)) +#else + #define simde_vfmaq_lane_f64(a, b, v, lane) simde_vaddq_f64(a, simde_vmulq_lane_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_lane_f64 + #define vfmaq_lane_f64(a, b, v, lane) simde_vfmaq_lane_f64(a, b, v, lane) +#endif + +/* simde_vfmaq_lane_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vfmaq_lane_f32(a, b, v, lane) simde_vaddq_f32(a, simde_vmulq_lane_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_lane_f32 + #define vfmaq_lane_f32(a, b, v, lane) simde_vfmaq_lane_f32(a, b, v, lane) +#endif + +/* simde_vfmaq_laneq_f32 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane)) +#else + #define simde_vfmaq_laneq_f32(a, b, v, lane) \ + simde_vaddq_f32(a, simde_vmulq_laneq_f32(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_laneq_f32 + #define vfmaq_laneq_f32(a, b, v, lane) simde_vfmaq_laneq_f32(a, b, v, lane) +#endif + +/* simde_vfmaq_laneq_f64 */ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #define simde_vfmaq_laneq_f64(a, b, v, lane) vfmaq_laneq_f64((a), (b), (v), (lane)) +#else + #define simde_vfmaq_laneq_f64(a, b, v, lane) \ + simde_vaddq_f64(a, simde_vmulq_laneq_f64(b, v, lane)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vfmaq_laneq_f64 + #define vfmaq_laneq_f64(a, b, v, lane) simde_vfmaq_laneq_f64(a, b, v, lane) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_FMA_LANE_H) */ +/* :: End simde/arm/neon/fma_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/fma_n.h :: */ +/* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, copy, +* modify, merge, publish, distribute, sublicense, and/or sell copies +* of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +* +* Copyright: +* 2021 Evan Nemerson +*/ + +#if !defined(SIMDE_ARM_NEON_FMA_N_H) +#define SIMDE_ARM_NEON_FMA_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + return vfma_n_f32(a, b, c); + #else + return simde_vfma_f32(a, b, simde_vdup_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfma_n_f32 + #define vfma_n_f32(a, b, c) simde_vfma_n_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vfma_n_f64(a, b, c); + #else + return simde_vfma_f64(a, b, simde_vdup_n_f64(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfma_n_f64 + #define vfma_n_f64(a, b, c) simde_vfma_n_f64(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + return vfmaq_n_f32(a, b, c); + #else + return simde_vfmaq_f32(a, b, simde_vdupq_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmaq_n_f32 + #define vfmaq_n_f32(a, b, c) simde_vfmaq_n_f32(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vfmaq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vfmaq_n_f64(a, b, c); + #else + return simde_vfmaq_f64(a, b, simde_vdupq_n_f64(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vfmaq_n_f64 + #define vfmaq_n_f64(a, b, c) simde_vfmaq_n_f64(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_CMLA_H) */ +/* :: End simde/arm/neon/fma_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/hadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* TODO: the 128-bit versions only require AVX-512 because of the final + * conversions from larger types down to smaller ones. We could get + * the same results from AVX/AVX2 instructions with some shuffling + * to extract the low half of each input element to the low half + * of a 256-bit vector, then cast that to a 128-bit vector. */ + +#if !defined(SIMDE_ARM_NEON_HADD_H) +#define SIMDE_ARM_NEON_HADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_s8(a, b); + #else + return simde_vmovn_s16(simde_vshrq_n_s16(simde_vaddl_s8(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_s8 + #define vhadd_s8(a, b) simde_vhadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_s16(a, b); + #else + return simde_vmovn_s32(simde_vshrq_n_s32(simde_vaddl_s16(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_s16 + #define vhadd_s16(a, b) simde_vhadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_s32(a, b); + #else + return simde_vmovn_s64(simde_vshrq_n_s64(simde_vaddl_s32(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_s32 + #define vhadd_s32(a, b) simde_vhadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_u8(a, b); + #else + return simde_vmovn_u16(simde_vshrq_n_u16(simde_vaddl_u8(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_u8 + #define vhadd_u8(a, b) simde_vhadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_u16(a, b); + #else + return simde_vmovn_u32(simde_vshrq_n_u32(simde_vaddl_u16(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_u16 + #define vhadd_u16(a, b) simde_vhadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhadd_u32(a, b); + #else + return simde_vmovn_u64(simde_vshrq_n_u64(simde_vaddl_u32(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhadd_u32 + #define vhadd_u32(a, b) simde_vhadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_add_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) + HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_s8 + #define vhaddq_s8(a, b) simde_vhaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_add_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) + HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_s16 + #define vhaddq_s16(a, b) simde_vhaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_add_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) + HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_s32 + #define vhaddq_s32(a, b) simde_vhaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_add_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t lo = + wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_low_u8x16(a_.v128), + wasm_u16x8_extend_low_u8x16(b_.v128)), + 1); + v128_t hi = + wasm_u16x8_shr(wasm_i16x8_add(wasm_u16x8_extend_high_u8x16(a_.v128), + wasm_u16x8_extend_high_u8x16(b_.v128)), + 1); + r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_u8 + #define vhaddq_u8(a, b) simde_vhaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_add_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_u16 + #define vhaddq_u16(a, b) simde_vhaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhaddq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_add_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) + HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhaddq_u32 + #define vhaddq_u32(a, b) simde_vhaddq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_HADD_H) */ +/* :: End simde/arm/neon/hadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/hsub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* TODO: the 128-bit versions only require AVX-512 because of the final + * conversions from larger types down to smaller ones. We could get + * the same results from AVX/AVX2 instructions with some shuffling + * to extract the low half of each input element to the low half + * of a 256-bit vector, then cast that to a 128-bit vector. */ + +#if !defined(SIMDE_ARM_NEON_HSUB_H) +#define SIMDE_ARM_NEON_HSUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vhsub_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_s8(a, b); + #else + return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubl_s8(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_s8 + #define vhsub_s8(a, b) simde_vhsub_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vhsub_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_s16(a, b); + #else + return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubl_s16(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_s16 + #define vhsub_s16(a, b) simde_vhsub_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vhsub_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_s32(a, b); + #else + return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubl_s32(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_s32 + #define vhsub_s32(a, b) simde_vhsub_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vhsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_u8(a, b); + #else + return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubl_u8(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_u8 + #define vhsub_u8(a, b) simde_vhsub_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vhsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_u16(a, b); + #else + return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubl_u16(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_u16 + #define vhsub_u16(a, b) simde_vhsub_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vhsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsub_u32(a, b); + #else + return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubl_u32(a, b), 1)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsub_u32 + #define vhsub_u32(a, b) simde_vhsub_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vhsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + r_.m128i = _mm256_cvtepi16_epi8(_mm256_srai_epi16(_mm256_sub_epi16(_mm256_cvtepi8_epi16(a_.m128i), _mm256_cvtepi8_epi16(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (HEDLEY_STATIC_CAST(int16_t, a_.values[i]) - HEDLEY_STATIC_CAST(int16_t, b_.values[i])) >> 1); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_s8 + #define vhsubq_s8(a, b) simde_vhsubq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vhsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi32_epi16(_mm256_srai_epi32(_mm256_sub_epi32(_mm256_cvtepi16_epi32(a_.m128i), _mm256_cvtepi16_epi32(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (HEDLEY_STATIC_CAST(int32_t, a_.values[i]) - HEDLEY_STATIC_CAST(int32_t, b_.values[i])) >> 1); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_s16 + #define vhsubq_s16(a, b) simde_vhsubq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vhsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi64_epi32(_mm256_srai_epi64(_mm256_sub_epi64(_mm256_cvtepi32_epi64(a_.m128i), _mm256_cvtepi32_epi64(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int64_t, a_.values[i]) - HEDLEY_STATIC_CAST(int64_t, b_.values[i])) >> 1); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_s32 + #define vhsubq_s32(a, b) simde_vhsubq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vhsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + r_.m128i = _mm256_cvtepi16_epi8(_mm256_srli_epi16(_mm256_sub_epi16(_mm256_cvtepu8_epi16(a_.m128i), _mm256_cvtepu8_epi16(b_.m128i)), 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t lo = + wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_low_u8x16(a_.v128), + wasm_u16x8_extend_low_u8x16(b_.v128)), + 1); + v128_t hi = + wasm_u16x8_shr(wasm_i16x8_sub(wasm_u16x8_extend_high_u8x16(a_.v128), + wasm_u16x8_extend_high_u8x16(b_.v128)), + 1); + r_.v128 = wasm_i8x16_shuffle(lo, hi, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint16_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_u8 + #define vhsubq_u8(a, b) simde_vhsubq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vhsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi32_epi16(_mm256_srli_epi32(_mm256_sub_epi32(_mm256_cvtepu16_epi32(a_.m128i), _mm256_cvtepu16_epi32(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint32_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_u16 + #define vhsubq_u16(a, b) simde_vhsubq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vhsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vhsubq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm256_cvtepi64_epi32(_mm256_srli_epi64(_mm256_sub_epi64(_mm256_cvtepu32_epi64(a_.m128i), _mm256_cvtepu32_epi64(b_.m128i)), 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) - HEDLEY_STATIC_CAST(uint64_t, b_.values[i])) >> 1); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vhsubq_u32 + #define vhsubq_u32(a, b) simde_vhsubq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_HSUB_H) */ +/* :: End simde/arm/neon/hsub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_LD1_H) +#define SIMDE_ARM_NEON_LD1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vld1_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld1_f16(ptr); + #else + simde_float16x4_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f16 + #define vld1_f16(a) simde_vld1_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vld1_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_f32(ptr); + #else + simde_float32x2_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f32 + #define vld1_f32(a) simde_vld1_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vld1_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(1)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1_f64(ptr); + #else + simde_float64x1_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1_f64 + #define vld1_f64(a) simde_vld1_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vld1_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_s8(ptr); + #else + simde_int8x8_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s8 + #define vld1_s8(a) simde_vld1_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vld1_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_s16(ptr); + #else + simde_int16x4_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s16 + #define vld1_s16(a) simde_vld1_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vld1_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_s32(ptr); + #else + simde_int32x2_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s32 + #define vld1_s32(a) simde_vld1_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vld1_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_s64(ptr); + #else + simde_int64x1_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s64 + #define vld1_s64(a) simde_vld1_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vld1_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_u8(ptr); + #else + simde_uint8x8_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u8 + #define vld1_u8(a) simde_vld1_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vld1_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_u16(ptr); + #else + simde_uint16x4_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u16 + #define vld1_u16(a) simde_vld1_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vld1_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_u32(ptr); + #else + simde_uint32x2_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u32 + #define vld1_u32(a) simde_vld1_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vld1_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(1)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_u64(ptr); + #else + simde_uint64x1_private r_; + simde_memcpy(&r_, ptr, sizeof(r_)); + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u64 + #define vld1_u64(a) simde_vld1_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vld1q_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld1q_f16(ptr); + #else + simde_float16x8_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f16 + #define vld1q_f16(a) simde_vld1q_f16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vld1q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_f32(ptr); + #else + simde_float32x4_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f32 + #define vld1q_f32(a) simde_vld1q_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vld1q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(ptr); + #else + simde_float64x2_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1q_f64 + #define vld1q_f64(a) simde_vld1q_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vld1q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_s8(ptr); + #else + simde_int8x16_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s8 + #define vld1q_s8(a) simde_vld1q_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vld1q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_s16(ptr); + #else + simde_int16x8_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s16 + #define vld1q_s16(a) simde_vld1q_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vld1q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_s32(ptr); + #else + simde_int32x4_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s32 + #define vld1q_s32(a) simde_vld1q_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vld1q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_s64(ptr); + #else + simde_int64x2_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s64 + #define vld1q_s64(a) simde_vld1q_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vld1q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_u8(ptr); + #else + simde_uint8x16_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u8 + #define vld1q_u8(a) simde_vld1q_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vld1q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_u16(ptr); + #else + simde_uint16x8_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u16 + #define vld1q_u16(a) simde_vld1q_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vld1q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_u32(ptr); + #else + simde_uint32x4_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u32 + #define vld1q_u32(a) simde_vld1q_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vld1q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_u64(ptr); + #else + simde_uint64x2_private r_; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_load(ptr); + #else + simde_memcpy(&r_, ptr, sizeof(r_)); + #endif + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u64 + #define vld1q_u64(a) simde_vld1q_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_H) */ +/* :: End simde/arm/neon/ld1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1_dup.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_LD1_DUP_H) +#define SIMDE_ARM_NEON_LD1_DUP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vld1_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_f32(ptr); + #else + return simde_vdup_n_f32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_f32 + #define vld1_dup_f32(a) simde_vld1_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vld1_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1_dup_f64(ptr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde_vreinterpret_f64_s64(vld1_dup_s64(HEDLEY_REINTERPRET_CAST(int64_t const*, ptr))); + #else + return simde_vdup_n_f64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_f64 + #define vld1_dup_f64(a) simde_vld1_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vld1_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_s8(ptr); + #else + return simde_vdup_n_s8(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_s8 + #define vld1_dup_s8(a) simde_vld1_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vld1_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_s16(ptr); + #else + return simde_vdup_n_s16(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_s16 + #define vld1_dup_s16(a) simde_vld1_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vld1_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_s32(ptr); + #else + return simde_vdup_n_s32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_s32 + #define vld1_dup_s32(a) simde_vld1_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vld1_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_s64(ptr); + #else + return simde_vdup_n_s64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_s64 + #define vld1_dup_s64(a) simde_vld1_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vld1_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_u8(ptr); + #else + return simde_vdup_n_u8(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_u8 + #define vld1_dup_u8(a) simde_vld1_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vld1_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_u16(ptr); + #else + return simde_vdup_n_u16(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_u16 + #define vld1_dup_u16(a) simde_vld1_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vld1_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_u32(ptr); + #else + return simde_vdup_n_u32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_u32 + #define vld1_dup_u32(a) simde_vld1_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vld1_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1_dup_u64(ptr); + #else + return simde_vdup_n_u64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_dup_u64 + #define vld1_dup_u64(a) simde_vld1_dup_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vld1q_dup_f32(simde_float32 const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_f32(ptr); + #elif \ + defined(SIMDE_X86_SSE_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_float32x4_private r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_load_ps1(ptr); + #else + r_.v128 = wasm_v128_load32_splat(ptr); + #endif + + return simde_float32x4_from_private(r_); + #else + return simde_vdupq_n_f32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_f32 + #define vld1q_dup_f32(a) simde_vld1q_dup_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vld1q_dup_f64(simde_float64 const * ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_dup_f64(ptr); + #else + return simde_vdupq_n_f64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_f64 + #define vld1q_dup_f64(a) simde_vld1q_dup_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vld1q_dup_s8(int8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_s8(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int8x16_private r_; + + r_.v128 = wasm_v128_load8_splat(ptr); + + return simde_int8x16_from_private(r_); + #else + return simde_vdupq_n_s8(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_s8 + #define vld1q_dup_s8(a) simde_vld1q_dup_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vld1q_dup_s16(int16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_s16(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int16x8_private r_; + + r_.v128 = wasm_v128_load16_splat(ptr); + + return simde_int16x8_from_private(r_); + #else + return simde_vdupq_n_s16(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_s16 + #define vld1q_dup_s16(a) simde_vld1q_dup_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vld1q_dup_s32(int32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_s32(ptr); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr))); + #else + r_.v128 = wasm_v128_load32_splat(ptr); + #endif + + return simde_int32x4_from_private(r_); + #else + return simde_vdupq_n_s32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_s32 + #define vld1q_dup_s32(a) simde_vld1q_dup_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vld1q_dup_s64(int64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_s64(ptr); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi64x(*ptr); + #else + r_.v128 = wasm_v128_load64_splat(ptr); + #endif + + return simde_int64x2_from_private(r_); + #else + return simde_vdupq_n_s64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_s64 + #define vld1q_dup_s64(a) simde_vld1q_dup_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vld1q_dup_u8(uint8_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_u8(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint8x16_private r_; + + r_.v128 = wasm_v128_load8_splat(ptr); + + return simde_uint8x16_from_private(r_); + #else + return simde_vdupq_n_u8(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_u8 + #define vld1q_dup_u8(a) simde_vld1q_dup_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vld1q_dup_u16(uint16_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_u16(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint16x8_private r_; + + r_.v128 = wasm_v128_load16_splat(ptr); + + return simde_uint16x8_from_private(r_); + #else + return simde_vdupq_n_u16(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_u16 + #define vld1q_dup_u16(a) simde_vld1q_dup_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vld1q_dup_u32(uint32_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_u32(ptr); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint32x4_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_castps_si128(_mm_load_ps1(HEDLEY_REINTERPRET_CAST(float const *, ptr))); + #else + r_.v128 = wasm_v128_load32_splat(ptr); + #endif + + return simde_uint32x4_from_private(r_); + #else + return simde_vdupq_n_u32(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_u32 + #define vld1q_dup_u32(a) simde_vld1q_dup_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vld1q_dup_u64(uint64_t const * ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld1q_dup_u64(ptr); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint64x2_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_set1_epi64x(*HEDLEY_REINTERPRET_CAST(int64_t const *, ptr)); + #else + r_.v128 = wasm_v128_load64_splat(ptr); + #endif + + return simde_uint64x2_from_private(r_); + #else + return simde_vdupq_n_u64(*ptr); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_dup_u64 + #define vld1q_dup_u64(a) simde_vld1q_dup_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_DUP_H) */ +/* :: End simde/arm/neon/ld1_dup.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_LD1_LANE_H) +#define SIMDE_ARM_NEON_LD1_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t simde_vld1_lane_s8(int8_t const *ptr, simde_int8x8_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int8x8_private r = simde_int8x8_to_private(src); + r.values[lane] = *ptr; + return simde_int8x8_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_s8(ptr, src, lane) vld1_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_s8 + #define vld1_lane_s8(ptr, src, lane) simde_vld1_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t simde_vld1_lane_s16(int16_t const *ptr, simde_int16x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_private r = simde_int16x4_to_private(src); + r.values[lane] = *ptr; + return simde_int16x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_s16(ptr, src, lane) vld1_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_s16 + #define vld1_lane_s16(ptr, src, lane) simde_vld1_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t simde_vld1_lane_s32(int32_t const *ptr, simde_int32x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_private r = simde_int32x2_to_private(src); + r.values[lane] = *ptr; + return simde_int32x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_s32(ptr, src, lane) vld1_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_s32 + #define vld1_lane_s32(ptr, src, lane) simde_vld1_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t simde_vld1_lane_s64(int64_t const *ptr, simde_int64x1_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_int64x1_private r = simde_int64x1_to_private(src); + r.values[lane] = *ptr; + return simde_int64x1_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_s64(ptr, src, lane) vld1_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_s64 + #define vld1_lane_s64(ptr, src, lane) simde_vld1_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t simde_vld1_lane_u8(uint8_t const *ptr, simde_uint8x8_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint8x8_private r = simde_uint8x8_to_private(src); + r.values[lane] = *ptr; + return simde_uint8x8_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_u8(ptr, src, lane) vld1_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_u8 + #define vld1_lane_u8(ptr, src, lane) simde_vld1_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t simde_vld1_lane_u16(uint16_t const *ptr, simde_uint16x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4_private r = simde_uint16x4_to_private(src); + r.values[lane] = *ptr; + return simde_uint16x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_u16(ptr, src, lane) vld1_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_u16 + #define vld1_lane_u16(ptr, src, lane) simde_vld1_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t simde_vld1_lane_u32(uint32_t const *ptr, simde_uint32x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2_private r = simde_uint32x2_to_private(src); + r.values[lane] = *ptr; + return simde_uint32x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_u32(ptr, src, lane) vld1_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_u32 + #define vld1_lane_u32(ptr, src, lane) simde_vld1_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t simde_vld1_lane_u64(uint64_t const *ptr, simde_uint64x1_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_uint64x1_private r = simde_uint64x1_to_private(src); + r.values[lane] = *ptr; + return simde_uint64x1_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_u64(ptr, src, lane) vld1_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_u64 + #define vld1_lane_u64(ptr, src, lane) simde_vld1_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t simde_vld1_lane_f32(simde_float32_t const *ptr, simde_float32x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_private r = simde_float32x2_to_private(src); + r.values[lane] = *ptr; + return simde_float32x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1_lane_f32(ptr, src, lane) vld1_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_f32 + #define vld1_lane_f32(ptr, src, lane) simde_vld1_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t simde_vld1_lane_f64(simde_float64_t const *ptr, simde_float64x1_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1_private r = simde_float64x1_to_private(src); + r.values[lane] = *ptr; + return simde_float64x1_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld1_lane_f64(ptr, src, lane) vld1_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1_lane_f64 + #define vld1_lane_f64(ptr, src, lane) simde_vld1_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t simde_vld1q_lane_s8(int8_t const *ptr, simde_int8x16_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_int8x16_private r = simde_int8x16_to_private(src); + r.values[lane] = *ptr; + return simde_int8x16_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_s8(ptr, src, lane) vld1q_lane_s8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_s8 + #define vld1q_lane_s8(ptr, src, lane) simde_vld1q_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t simde_vld1q_lane_s16(int16_t const *ptr, simde_int16x8_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_private r = simde_int16x8_to_private(src); + r.values[lane] = *ptr; + return simde_int16x8_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_s16(ptr, src, lane) vld1q_lane_s16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_s16 + #define vld1q_lane_s16(ptr, src, lane) simde_vld1q_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t simde_vld1q_lane_s32(int32_t const *ptr, simde_int32x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_private r = simde_int32x4_to_private(src); + r.values[lane] = *ptr; + return simde_int32x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_s32(ptr, src, lane) vld1q_lane_s32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_s32 + #define vld1q_lane_s32(ptr, src, lane) simde_vld1q_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t simde_vld1q_lane_s64(int64_t const *ptr, simde_int64x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2_private r = simde_int64x2_to_private(src); + r.values[lane] = *ptr; + return simde_int64x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_s64(ptr, src, lane) vld1q_lane_s64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_s64 + #define vld1q_lane_s64(ptr, src, lane) simde_vld1q_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t simde_vld1q_lane_u8(uint8_t const *ptr, simde_uint8x16_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_uint8x16_private r = simde_uint8x16_to_private(src); + r.values[lane] = *ptr; + return simde_uint8x16_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_u8(ptr, src, lane) vld1q_lane_u8(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_u8 + #define vld1q_lane_u8(ptr, src, lane) simde_vld1q_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t simde_vld1q_lane_u16(uint16_t const *ptr, simde_uint16x8_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8_private r = simde_uint16x8_to_private(src); + r.values[lane] = *ptr; + return simde_uint16x8_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_u16(ptr, src, lane) vld1q_lane_u16(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_u16 + #define vld1q_lane_u16(ptr, src, lane) simde_vld1q_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t simde_vld1q_lane_u32(uint32_t const *ptr, simde_uint32x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4_private r = simde_uint32x4_to_private(src); + r.values[lane] = *ptr; + return simde_uint32x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_u32(ptr, src, lane) vld1q_lane_u32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_u32 + #define vld1q_lane_u32(ptr, src, lane) simde_vld1q_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t simde_vld1q_lane_u64(uint64_t const *ptr, simde_uint64x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint64x2_private r = simde_uint64x2_to_private(src); + r.values[lane] = *ptr; + return simde_uint64x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_u64(ptr, src, lane) vld1q_lane_u64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_u64 + #define vld1q_lane_u64(ptr, src, lane) simde_vld1q_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t simde_vld1q_lane_f32(simde_float32_t const *ptr, simde_float32x4_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4_private r = simde_float32x4_to_private(src); + r.values[lane] = *ptr; + return simde_float32x4_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vld1q_lane_f32(ptr, src, lane) vld1q_lane_f32(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_f32 + #define vld1q_lane_f32(ptr, src, lane) simde_vld1q_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t simde_vld1q_lane_f64(simde_float64_t const *ptr, simde_float64x2_t src, + const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2_private r = simde_float64x2_to_private(src); + r.values[lane] = *ptr; + return simde_float64x2_from_private(r); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vld1q_lane_f64(ptr, src, lane) vld1q_lane_f64(ptr, src, lane) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld1q_lane_f64 + #define vld1q_lane_f64(ptr, src, lane) simde_vld1q_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_LANE_H) */ +/* :: End simde/arm/neon/ld1_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1_x2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_LD1_X2_H) +#define SIMDE_ARM_NEON_LD1_X2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vld1_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f32_x2(ptr); + #else + simde_float32x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float32x2x2_t s_ = { { simde_float32x2_from_private(a_[0]), + simde_float32x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f32_x2 + #define vld1_f32_x2(a) simde_vld1_f32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x2_t +simde_vld1_f64_x2(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f64_x2(ptr); + #else + simde_float64x1_private a_[2]; + for (size_t i = 0; i < 2; i++) { + a_[i].values[0] = ptr[i]; + } + simde_float64x1x2_t s_ = { { simde_float64x1_from_private(a_[0]), + simde_float64x1_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f64_x2 + #define vld1_f64_x2(a) simde_vld1_f64_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vld1_s8_x2(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s8_x2(ptr); + #else + simde_int8x8_private a_[2]; + for (size_t i = 0; i < 16; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int8x8x2_t s_ = { { simde_int8x8_from_private(a_[0]), + simde_int8x8_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s8_x2 + #define vld1_s8_x2(a) simde_vld1_s8_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vld1_s16_x2(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s16_x2(ptr); + #else + simde_int16x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int16x4x2_t s_ = { { simde_int16x4_from_private(a_[0]), + simde_int16x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s16_x2 + #define vld1_s16_x2(a) simde_vld1_s16_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vld1_s32_x2(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s32_x2(ptr); + #else + simde_int32x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int32x2x2_t s_ = { { simde_int32x2_from_private(a_[0]), + simde_int32x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s32_x2 + #define vld1_s32_x2(a) simde_vld1_s32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x2_t +simde_vld1_s64_x2(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s64_x2(ptr); + #else + simde_int64x1_private a_[2]; + for (size_t i = 0; i < 2; i++) { + a_[i].values[0] = ptr[i]; + } + simde_int64x1x2_t s_ = { { simde_int64x1_from_private(a_[0]), + simde_int64x1_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s64_x2 + #define vld1_s64_x2(a) simde_vld1_s64_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vld1_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u8_x2(ptr); + #else + simde_uint8x8_private a_[2]; + for (size_t i = 0; i < 16; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint8x8x2_t s_ = { { simde_uint8x8_from_private(a_[0]), + simde_uint8x8_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u8_x2 + #define vld1_u8_x2(a) simde_vld1_u8_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vld1_u16_x2(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u16_x2(ptr); + #else + simde_uint16x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint16x4x2_t s_ = { { simde_uint16x4_from_private(a_[0]), + simde_uint16x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u16_x2 + #define vld1_u16_x2(a) simde_vld1_u16_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vld1_u32_x2(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u32_x2(ptr); + #else + simde_uint32x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint32x2x2_t s_ = { { simde_uint32x2_from_private(a_[0]), + simde_uint32x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u32_x2 + #define vld1_u32_x2(a) simde_vld1_u32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x2_t +simde_vld1_u64_x2(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u64_x2(ptr); + #else + simde_uint64x1_private a_[2]; + for (size_t i = 0; i < 2; i++) { + a_[i].values[0] = ptr[i]; + } + simde_uint64x1x2_t s_ = { { simde_uint64x1_from_private(a_[0]), + simde_uint64x1_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u64_x2 + #define vld1_u64_x2(a) simde_vld1_u64_x2((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_X2_H) */ +/* :: End simde/arm/neon/ld1_x2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1_x3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_LD1_X3_H) +#define SIMDE_ARM_NEON_LD1_X3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x3_t +simde_vld1_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f32_x3(ptr); + #else + simde_float32x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float32x2x3_t s_ = { { simde_float32x2_from_private(a_[0]), + simde_float32x2_from_private(a_[1]), + simde_float32x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f32_x3 + #define vld1_f32_x3(a) simde_vld1_f32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x3_t +simde_vld1_f64_x3(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f64_x3(ptr); + #else + simde_float64x1_private a_[3]; + for (size_t i = 0; i < 3; i++) { + a_[i].values[0] = ptr[i]; + } + simde_float64x1x3_t s_ = { { simde_float64x1_from_private(a_[0]), + simde_float64x1_from_private(a_[1]), + simde_float64x1_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f64_x3 + #define vld1_f64_x3(a) simde_vld1_f64_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x3_t +simde_vld1_s8_x3(int8_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s8_x3(ptr); + #else + simde_int8x8_private a_[3]; + for (size_t i = 0; i < 24; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int8x8x3_t s_ = { { simde_int8x8_from_private(a_[0]), + simde_int8x8_from_private(a_[1]), + simde_int8x8_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s8_x3 + #define vld1_s8_x3(a) simde_vld1_s8_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x3_t +simde_vld1_s16_x3(int16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s16_x3(ptr); + #else + simde_int16x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int16x4x3_t s_ = { { simde_int16x4_from_private(a_[0]), + simde_int16x4_from_private(a_[1]), + simde_int16x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s16_x3 + #define vld1_s16_x3(a) simde_vld1_s16_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x3_t +simde_vld1_s32_x3(int32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s32_x3(ptr); + #else + simde_int32x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int32x2x3_t s_ = { { simde_int32x2_from_private(a_[0]), + simde_int32x2_from_private(a_[1]), + simde_int32x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s32_x3 + #define vld1_s32_x3(a) simde_vld1_s32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x3_t +simde_vld1_s64_x3(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s64_x3(ptr); + #else + simde_int64x1_private a_[3]; + for (size_t i = 0; i < 3; i++) { + a_[i].values[0] = ptr[i]; + } + simde_int64x1x3_t s_ = { { simde_int64x1_from_private(a_[0]), + simde_int64x1_from_private(a_[1]), + simde_int64x1_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s64_x3 + #define vld1_s64_x3(a) simde_vld1_s64_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x3_t +simde_vld1_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u8_x3(ptr); + #else + simde_uint8x8_private a_[3]; + for (size_t i = 0; i < 24; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint8x8x3_t s_ = { { simde_uint8x8_from_private(a_[0]), + simde_uint8x8_from_private(a_[1]), + simde_uint8x8_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u8_x3 + #define vld1_u8_x3(a) simde_vld1_u8_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x3_t +simde_vld1_u16_x3(uint16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u16_x3(ptr); + #else + simde_uint16x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint16x4x3_t s_ = { { simde_uint16x4_from_private(a_[0]), + simde_uint16x4_from_private(a_[1]), + simde_uint16x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u16_x3 + #define vld1_u16_x3(a) simde_vld1_u16_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x3_t +simde_vld1_u32_x3(uint32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u32_x3(ptr); + #else + simde_uint32x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint32x2x3_t s_ = { { simde_uint32x2_from_private(a_[0]), + simde_uint32x2_from_private(a_[1]), + simde_uint32x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u32_x3 + #define vld1_u32_x3(a) simde_vld1_u32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x3_t +simde_vld1_u64_x3(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u64_x3(ptr); + #else + simde_uint64x1_private a_[3]; + for (size_t i = 0; i < 3; i++) { + a_[i].values[0] = ptr[i]; + } + simde_uint64x1x3_t s_ = { { simde_uint64x1_from_private(a_[0]), + simde_uint64x1_from_private(a_[1]), + simde_uint64x1_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u64_x3 + #define vld1_u64_x3(a) simde_vld1_u64_x3((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_X3_H) */ +/* :: End simde/arm/neon/ld1_x3.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1_x4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_LD1_X4_H) +#define SIMDE_ARM_NEON_LD1_X4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x4_t +simde_vld1_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f32_x4(ptr); + #else + simde_float32x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), + simde_float32x2_from_private(a_[1]), + simde_float32x2_from_private(a_[2]), + simde_float32x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f32_x4 + #define vld1_f32_x4(a) simde_vld1_f32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x4_t +simde_vld1_f64_x4(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_f64_x4(ptr); + #else + simde_float64x1_private a_[4]; + for (size_t i = 0; i < 4; i++) { + a_[i].values[0] = ptr[i]; + } + simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), + simde_float64x1_from_private(a_[1]), + simde_float64x1_from_private(a_[2]), + simde_float64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_f64_x4 + #define vld1_f64_x4(a) simde_vld1_f64_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x4_t +simde_vld1_s8_x4(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s8_x4(ptr); + #else + simde_int8x8_private a_[4]; + for (size_t i = 0; i < 32; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), + simde_int8x8_from_private(a_[1]), + simde_int8x8_from_private(a_[2]), + simde_int8x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s8_x4 + #define vld1_s8_x4(a) simde_vld1_s8_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x4_t +simde_vld1_s16_x4(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s16_x4(ptr); + #else + simde_int16x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), + simde_int16x4_from_private(a_[1]), + simde_int16x4_from_private(a_[2]), + simde_int16x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s16_x4 + #define vld1_s16_x4(a) simde_vld1_s16_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x4_t +simde_vld1_s32_x4(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s32_x4(ptr); + #else + simde_int32x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), + simde_int32x2_from_private(a_[1]), + simde_int32x2_from_private(a_[2]), + simde_int32x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s32_x4 + #define vld1_s32_x4(a) simde_vld1_s32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x4_t +simde_vld1_s64_x4(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_s64_x4(ptr); + #else + simde_int64x1_private a_[4]; + for (size_t i = 0; i < 4; i++) { + a_[i].values[0] = ptr[i]; + } + simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), + simde_int64x1_from_private(a_[1]), + simde_int64x1_from_private(a_[2]), + simde_int64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_s64_x4 + #define vld1_s64_x4(a) simde_vld1_s64_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x4_t +simde_vld1_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u8_x4(ptr); + #else + simde_uint8x8_private a_[4]; + for (size_t i = 0; i < 32; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), + simde_uint8x8_from_private(a_[1]), + simde_uint8x8_from_private(a_[2]), + simde_uint8x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u8_x4 + #define vld1_u8_x4(a) simde_vld1_u8_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x4_t +simde_vld1_u16_x4(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u16_x4(ptr); + #else + simde_uint16x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), + simde_uint16x4_from_private(a_[1]), + simde_uint16x4_from_private(a_[2]), + simde_uint16x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u16_x4 + #define vld1_u16_x4(a) simde_vld1_u16_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x4_t +simde_vld1_u32_x4(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u32_x4(ptr); + #else + simde_uint32x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), + simde_uint32x2_from_private(a_[1]), + simde_uint32x2_from_private(a_[2]), + simde_uint32x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u32_x4 + #define vld1_u32_x4(a) simde_vld1_u32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x4_t +simde_vld1_u64_x4(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1_u64_x4(ptr); + #else + simde_uint64x1_private a_[4]; + for (size_t i = 0; i < 4; i++) { + a_[i].values[0] = ptr[i]; + } + simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), + simde_uint64x1_from_private(a_[1]), + simde_uint64x1_from_private(a_[2]), + simde_uint64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1_u64_x4 + #define vld1_u64_x4(a) simde_vld1_u64_x4((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1_X4_H) */ +/* :: End simde/arm/neon/ld1_x4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1q_x2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_LD1Q_X2_H) +#define SIMDE_ARM_NEON_LD1Q_X2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vld1q_f32_x2(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f32_x2(ptr); + #else + simde_float32x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float32x4x2_t s_ = { { simde_float32x4_from_private(a_[0]), + simde_float32x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f32_x2 + #define vld1q_f32_x2(a) simde_vld1q_f32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x2_t +simde_vld1q_f64_x2(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f64_x2(ptr); + #else + simde_float64x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float64x2x2_t s_ = { { simde_float64x2_from_private(a_[0]), + simde_float64x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f64_x2 + #define vld1q_f64_x2(a) simde_vld1q_f64_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vld1q_s8_x2(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s8_x2(ptr); + #else + simde_int8x16_private a_[2]; + for (size_t i = 0; i < 32; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_int8x16x2_t s_ = { { simde_int8x16_from_private(a_[0]), + simde_int8x16_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s8_x2 + #define vld1q_s8_x2(a) simde_vld1q_s8_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vld1q_s16_x2(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s16_x2(ptr); + #else + simde_int16x8_private a_[2]; + for (size_t i = 0; i < 16; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int16x8x2_t s_ = { { simde_int16x8_from_private(a_[0]), + simde_int16x8_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s16_x2 + #define vld1q_s16_x2(a) simde_vld1q_s16_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vld1q_s32_x2(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s32_x2(ptr); + #else + simde_int32x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int32x4x2_t s_ = { { simde_int32x4_from_private(a_[0]), + simde_int32x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s32_x2 + #define vld1q_s32_x2(a) simde_vld1q_s32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x2_t +simde_vld1q_s64_x2(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s64_x2(ptr); + #else + simde_int64x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int64x2x2_t s_ = { { simde_int64x2_from_private(a_[0]), + simde_int64x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s64_x2 + #define vld1q_s64_x2(a) simde_vld1q_s64_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vld1q_u8_x2(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u8_x2(ptr); + #else + simde_uint8x16_private a_[2]; + for (size_t i = 0; i < 32; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_uint8x16x2_t s_ = { { simde_uint8x16_from_private(a_[0]), + simde_uint8x16_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u8_x2 + #define vld1q_u8_x2(a) simde_vld1q_u8_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vld1q_u16_x2(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u16_x2(ptr); + #else + simde_uint16x8_private a_[2]; + for (size_t i = 0; i < 16; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint16x8x2_t s_ = { { simde_uint16x8_from_private(a_[0]), + simde_uint16x8_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u16_x2 + #define vld1q_u16_x2(a) simde_vld1q_u16_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vld1q_u32_x2(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u32_x2(ptr); + #else + simde_uint32x4_private a_[2]; + for (size_t i = 0; i < 8; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint32x4x2_t s_ = { { simde_uint32x4_from_private(a_[0]), + simde_uint32x4_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u32_x2 + #define vld1q_u32_x2(a) simde_vld1q_u32_x2((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x2_t +simde_vld1q_u64_x2(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u64_x2(ptr); + #else + simde_uint64x2_private a_[2]; + for (size_t i = 0; i < 4; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint64x2x2_t s_ = { { simde_uint64x2_from_private(a_[0]), + simde_uint64x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u64_x2 + #define vld1q_u64_x2(a) simde_vld1q_u64_x2((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X2_H) */ +/* :: End simde/arm/neon/ld1q_x2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1q_x3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_LD1Q_X3_H) +#define SIMDE_ARM_NEON_LD1Q_X3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x3_t +simde_vld1q_f32_x3(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(12)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f32_x3(ptr); + #else + simde_float32x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float32x4x3_t s_ = { { simde_float32x4_from_private(a_[0]), + simde_float32x4_from_private(a_[1]), + simde_float32x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f32_x3 + #define vld1q_f32_x3(a) simde_vld1q_f32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x3_t +simde_vld1q_f64_x3(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f64_x3(ptr); + #else + simde_float64x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float64x2x3_t s_ = { { simde_float64x2_from_private(a_[0]), + simde_float64x2_from_private(a_[1]), + simde_float64x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f64_x3 + #define vld1q_f64_x3(a) simde_vld1q_f64_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x3_t +simde_vld1q_s8_x3(int8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s8_x3(ptr); + #else + simde_int8x16_private a_[3]; + for (size_t i = 0; i < 48; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_int8x16x3_t s_ = { { simde_int8x16_from_private(a_[0]), + simde_int8x16_from_private(a_[1]), + simde_int8x16_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s8_x3 + #define vld1q_s8_x3(a) simde_vld1q_s8_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x3_t +simde_vld1q_s16_x3(int16_t const ptr[HEDLEY_ARRAY_PARAM(12)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s16_x3(ptr); + #else + simde_int16x8_private a_[3]; + for (size_t i = 0; i < 24; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int16x8x3_t s_ = { { simde_int16x8_from_private(a_[0]), + simde_int16x8_from_private(a_[1]), + simde_int16x8_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s16_x3 + #define vld1q_s16_x3(a) simde_vld1q_s16_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x3_t +simde_vld1q_s32_x3(int32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s32_x3(ptr); + #else + simde_int32x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int32x4x3_t s_ = { { simde_int32x4_from_private(a_[0]), + simde_int32x4_from_private(a_[1]), + simde_int32x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s32_x3 + #define vld1q_s32_x3(a) simde_vld1q_s32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x3_t +simde_vld1q_s64_x3(int64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s64_x3(ptr); + #else + simde_int64x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int64x2x3_t s_ = { { simde_int64x2_from_private(a_[0]), + simde_int64x2_from_private(a_[1]), + simde_int64x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s64_x3 + #define vld1q_s64_x3(a) simde_vld1q_s64_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x3_t +simde_vld1q_u8_x3(uint8_t const ptr[HEDLEY_ARRAY_PARAM(48)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u8_x3(ptr); + #else + simde_uint8x16_private a_[3]; + for (size_t i = 0; i < 48; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_uint8x16x3_t s_ = { { simde_uint8x16_from_private(a_[0]), + simde_uint8x16_from_private(a_[1]), + simde_uint8x16_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u8_x3 + #define vld1q_u8_x3(a) simde_vld1q_u8_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x3_t +simde_vld1q_u16_x3(uint16_t const ptr[HEDLEY_ARRAY_PARAM(24)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u16_x3(ptr); + #else + simde_uint16x8_private a_[3]; + for (size_t i = 0; i < 24; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint16x8x3_t s_ = { { simde_uint16x8_from_private(a_[0]), + simde_uint16x8_from_private(a_[1]), + simde_uint16x8_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u16_x3 + #define vld1q_u16_x3(a) simde_vld1q_u16_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x3_t +simde_vld1q_u32_x3(uint32_t const ptr[HEDLEY_ARRAY_PARAM(6)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u32_x3(ptr); + #else + simde_uint32x4_private a_[3]; + for (size_t i = 0; i < 12; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint32x4x3_t s_ = { { simde_uint32x4_from_private(a_[0]), + simde_uint32x4_from_private(a_[1]), + simde_uint32x4_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u32_x3 + #define vld1q_u32_x3(a) simde_vld1q_u32_x3((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x3_t +simde_vld1q_u64_x3(uint64_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u64_x3(ptr); + #else + simde_uint64x2_private a_[3]; + for (size_t i = 0; i < 6; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint64x2x3_t s_ = { { simde_uint64x2_from_private(a_[0]), + simde_uint64x2_from_private(a_[1]), + simde_uint64x2_from_private(a_[2]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u64_x3 + #define vld1q_u64_x3(a) simde_vld1q_u64_x3((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X3_H) */ +/* :: End simde/arm/neon/ld1q_x3.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld1q_x4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_LD1Q_X4_H) +#define SIMDE_ARM_NEON_LD1Q_X4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x4_t +simde_vld1q_f32_x4(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f32_x4(ptr); + #else + simde_float32x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), + simde_float32x4_from_private(a_[1]), + simde_float32x4_from_private(a_[2]), + simde_float32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f32_x4 + #define vld1q_f32_x4(a) simde_vld1q_f32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x4_t +simde_vld1q_f64_x4(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_f64_x4(ptr); + #else + simde_float64x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), + simde_float64x2_from_private(a_[1]), + simde_float64x2_from_private(a_[2]), + simde_float64x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_f64_x4 + #define vld1q_f64_x4(a) simde_vld1q_f64_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x4_t +simde_vld1q_s8_x4(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s8_x4(ptr); + #else + simde_int8x16_private a_[4]; + for (size_t i = 0; i < 64; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), + simde_int8x16_from_private(a_[1]), + simde_int8x16_from_private(a_[2]), + simde_int8x16_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s8_x4 + #define vld1q_s8_x4(a) simde_vld1q_s8_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x4_t +simde_vld1q_s16_x4(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s16_x4(ptr); + #else + simde_int16x8_private a_[4]; + for (size_t i = 0; i < 32; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), + simde_int16x8_from_private(a_[1]), + simde_int16x8_from_private(a_[2]), + simde_int16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s16_x4 + #define vld1q_s16_x4(a) simde_vld1q_s16_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x4_t +simde_vld1q_s32_x4(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s32_x4(ptr); + #else + simde_int32x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), + simde_int32x4_from_private(a_[1]), + simde_int32x4_from_private(a_[2]), + simde_int32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s32_x4 + #define vld1q_s32_x4(a) simde_vld1q_s32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x4_t +simde_vld1q_s64_x4(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_s64_x4(ptr); + #else + simde_int64x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), + simde_int64x2_from_private(a_[1]), + simde_int64x2_from_private(a_[1]), + simde_int64x2_from_private(a_[1]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_s64_x4 + #define vld1q_s64_x4(a) simde_vld1q_s64_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x4_t +simde_vld1q_u8_x4(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u8_x4(ptr); + #else + simde_uint8x16_private a_[4]; + for (size_t i = 0; i < 64; i++) { + a_[i / 16].values[i % 16] = ptr[i]; + } + simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), + simde_uint8x16_from_private(a_[1]), + simde_uint8x16_from_private(a_[2]), + simde_uint8x16_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u8_x4 + #define vld1q_u8_x4(a) simde_vld1q_u8_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x4_t +simde_vld1q_u16_x4(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u16_x4(ptr); + #else + simde_uint16x8_private a_[4]; + for (size_t i = 0; i < 32; i++) { + a_[i / 8].values[i % 8] = ptr[i]; + } + simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), + simde_uint16x8_from_private(a_[1]), + simde_uint16x8_from_private(a_[2]), + simde_uint16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u16_x4 + #define vld1q_u16_x4(a) simde_vld1q_u16_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x4_t +simde_vld1q_u32_x4(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u32_x4(ptr); + #else + simde_uint32x4_private a_[4]; + for (size_t i = 0; i < 16; i++) { + a_[i / 4].values[i % 4] = ptr[i]; + } + simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), + simde_uint32x4_from_private(a_[1]), + simde_uint32x4_from_private(a_[2]), + simde_uint32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u32_x4 + #define vld1q_u32_x4(a) simde_vld1q_u32_x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x4_t +simde_vld1q_u64_x4(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + return vld1q_u64_x4(ptr); + #else + simde_uint64x2_private a_[4]; + for (size_t i = 0; i < 8; i++) { + a_[i / 2].values[i % 2] = ptr[i]; + } + simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), + simde_uint64x2_from_private(a_[1]), + simde_uint64x2_from_private(a_[2]), + simde_uint64x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld1q_u64_x4 + #define vld1q_u64_x4(a) simde_vld1q_u64_x4((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD1Q_X4_H) */ +/* :: End simde/arm/neon/ld1q_x4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_LD2_H) +#define SIMDE_ARM_NEON_LD2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/uzp.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_UZP_H) && !defined(SIMDE_BUG_INTEL_857088) +#define SIMDE_ARM_NEON_UZP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vuzp_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_f32(a, b); + #else + simde_float32x2x2_t r = { { simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_f32 + #define vuzp_f32(a, b) simde_vuzp_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vuzp_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_s8(a, b); + #else + simde_int8x8x2_t r = { { simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_s8 + #define vuzp_s8(a, b) simde_vuzp_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vuzp_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_s16(a, b); + #else + simde_int16x4x2_t r = { { simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_s16 + #define vuzp_s16(a, b) simde_vuzp_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vuzp_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_s32(a, b); + #else + simde_int32x2x2_t r = { { simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_s32 + #define vuzp_s32(a, b) simde_vuzp_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vuzp_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_u8(a, b); + #else + simde_uint8x8x2_t r = { { simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_u8 + #define vuzp_u8(a, b) simde_vuzp_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vuzp_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_u16(a, b); + #else + simde_uint16x4x2_t r = { { simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_u16 + #define vuzp_u16(a, b) simde_vuzp_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vuzp_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzp_u32(a, b); + #else + simde_uint32x2x2_t r = { { simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_u32 + #define vuzp_u32(a, b) simde_vuzp_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vuzpq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_f32(a, b); + #else + simde_float32x4x2_t r = { { simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_f32 + #define vuzpq_f32(a, b) simde_vuzpq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vuzpq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_s8(a, b); + #else + simde_int8x16x2_t r = { { simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_s8 + #define vuzpq_s8(a, b) simde_vuzpq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vuzpq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_s16(a, b); + #else + simde_int16x8x2_t r = { { simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_s16 + #define vuzpq_s16(a, b) simde_vuzpq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vuzpq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_s32(a, b); + #else + simde_int32x4x2_t r = { { simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_s32 + #define vuzpq_s32(a, b) simde_vuzpq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vuzpq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_u8(a, b); + #else + simde_uint8x16x2_t r = { { simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_u8 + #define vuzpq_u8(a, b) simde_vuzpq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vuzpq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_u16(a, b); + #else + simde_uint16x8x2_t r = { { simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_u16 + #define vuzpq_u16(a, b) simde_vuzpq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vuzpq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vuzpq_u32(a, b); + #else + simde_uint32x4x2_t r = { { simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_u32 + #define vuzpq_u32(a, b) simde_vuzpq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_UZP_H) */ +/* :: End simde/arm/neon/uzp.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vld2_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_s8(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t a = wasm_v128_load(ptr); + simde_int8x16_private q_; + q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + simde_int8x16_t q = simde_int8x16_from_private(q_); + + simde_int8x8x2_t u = { + simde_vget_low_s8(q), + simde_vget_high_s8(q) + }; + return u; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_int8x16_private a_ = simde_int8x16_to_private(simde_vld1q_s8(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + simde_int8x8x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_int8x8_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int8x8x2_t r = { { + simde_int8x8_from_private(r_[0]), + simde_int8x8_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_s8 + #define vld2_s8(a) simde_vld2_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vld2_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_s16(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_int16x8_private a_ = simde_int16x8_to_private(simde_vld1q_s16(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7); + simde_int16x4x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ + #endif + simde_int16x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_POP + #endif + + simde_int16x4x2_t r = { { + simde_int16x4_from_private(r_[0]), + simde_int16x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_s16 + #define vld2_s16(a) simde_vld2_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vld2_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_s32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_int32x4_private a_ = simde_int32x4_to_private(simde_vld1q_s32(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); + simde_int32x2x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_int32x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int32x2x2_t r = { { + simde_int32x2_from_private(r_[0]), + simde_int32x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_s32 + #define vld2_s32(a) simde_vld2_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x2_t +simde_vld2_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_s64(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_int64x2_private a_ = simde_int64x2_to_private(simde_vld1q_s64(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); + simde_int64x1x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_int64x1_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int64x1x2_t r = { { + simde_int64x1_from_private(r_[0]), + simde_int64x1_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_s64 + #define vld2_s64(a) simde_vld2_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vld2_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_u8(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t a = wasm_v128_load(ptr); + simde_uint8x16_private q_; + q_.v128 = wasm_i8x16_shuffle(a, a, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + simde_uint8x16_t q = simde_uint8x16_from_private(q_); + + simde_uint8x8x2_t u = { + simde_vget_low_u8(q), + simde_vget_high_u8(q) + }; + return u; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_uint8x16_private a_ = simde_uint8x16_to_private(simde_vld1q_u8(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + simde_uint8x8x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_uint8x8_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint8x8x2_t r = { { + simde_uint8x8_from_private(r_[0]), + simde_uint8x8_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_u8 + #define vld2_u8(a) simde_vld2_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vld2_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_u16(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_uint16x8_private a_ = simde_uint16x8_to_private(simde_vld1q_u16(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7); + simde_uint16x4x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ + #endif + simde_uint16x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_POP + #endif + + simde_uint16x4x2_t r = { { + simde_uint16x4_from_private(r_[0]), + simde_uint16x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_u16 + #define vld2_u16(a) simde_vld2_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vld2_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_u32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_uint32x4_private a_ = simde_uint32x4_to_private(simde_vld1q_u32(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); + simde_uint32x2x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_uint32x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint32x2x2_t r = { { + simde_uint32x2_from_private(r_[0]), + simde_uint32x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_u32 + #define vld2_u32(a) simde_vld2_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x2_t +simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_u64(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_uint64x2_private a_ = simde_uint64x2_to_private(simde_vld1q_u64(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); + simde_uint64x1x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_uint64x1_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint64x1x2_t r = { { + simde_uint64x1_from_private(r_[0]), + simde_uint64x1_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_u64 + #define vld2_u64(a) simde_vld2_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vld2_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2_f32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_float32x4_private a_ = simde_float32x4_to_private(simde_vld1q_f32(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 0, 2, 1, 3); + simde_float32x2x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_float32x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float32x2x2_t r = { { + simde_float32x2_from_private(r_[0]), + simde_float32x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2_f32 + #define vld2_f32(a) simde_vld2_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x2_t +simde_vld2_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2_f64(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) + simde_float64x2_private a_ = simde_float64x2_to_private(simde_vld1q_f64(ptr)); + a_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, a_.values, 0, 1); + simde_float64x1x2_t r; + simde_memcpy(&r, &a_, sizeof(r)); + return r; + #else + simde_float64x1_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float64x1x2_t r = { { + simde_float64x1_from_private(r_[0]), + simde_float64x1_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2_f64 + #define vld2_f64(a) simde_vld2_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vld2q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_s8(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_s8( + simde_vld1q_s8(&(ptr[0])), + simde_vld1q_s8(&(ptr[16])) + ); + #else + simde_int8x16_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int8x16x2_t r = { { + simde_int8x16_from_private(r_[0]), + simde_int8x16_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_s8 + #define vld2q_s8(a) simde_vld2q_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vld2q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_s32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_s32( + simde_vld1q_s32(&(ptr[0])), + simde_vld1q_s32(&(ptr[4])) + ); + #else + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ + #endif + simde_int32x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_POP + #endif + + simde_int32x4x2_t r = { { + simde_int32x4_from_private(r_[0]), + simde_int32x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_s32 + #define vld2q_s32(a) simde_vld2q_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vld2q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_s16(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_s16( + simde_vld1q_s16(&(ptr[0])), + simde_vld1q_s16(&(ptr[8])) + ); + #else + simde_int16x8_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int16x8x2_t r = { { + simde_int16x8_from_private(r_[0]), + simde_int16x8_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_s16 + #define vld2q_s16(a) simde_vld2q_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x2_t +simde_vld2q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2q_s64(ptr); + #else + simde_int64x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int64x2x2_t r = { { + simde_int64x2_from_private(r_[0]), + simde_int64x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_s64 + #define vld2q_s64(a) simde_vld2q_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vld2q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_u8(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_u8( + simde_vld1q_u8(&(ptr[ 0])), + simde_vld1q_u8(&(ptr[16])) + ); + #else + simde_uint8x16_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint8x16x2_t r = { { + simde_uint8x16_from_private(r_[0]), + simde_uint8x16_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_u8 + #define vld2q_u8(a) simde_vld2q_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vld2q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_u16(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_u16( + simde_vld1q_u16(&(ptr[0])), + simde_vld1q_u16(&(ptr[8])) + ); + #else + simde_uint16x8_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint16x8x2_t r = { { + simde_uint16x8_from_private(r_[0]), + simde_uint16x8_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_u16 + #define vld2q_u16(a) simde_vld2q_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vld2q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_u32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_u32( + simde_vld1q_u32(&(ptr[0])), + simde_vld1q_u32(&(ptr[4])) + ); + #else + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ + #endif + simde_uint32x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_POP + #endif + + simde_uint32x4x2_t r = { { + simde_uint32x4_from_private(r_[0]), + simde_uint32x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_u32 + #define vld2q_u32(a) simde_vld2q_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x2_t +simde_vld2q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2q_u64(ptr); + #else + simde_uint64x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint64x2x2_t r = { { + simde_uint64x2_from_private(r_[0]), + simde_uint64x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_u64 + #define vld2q_u64(a) simde_vld2q_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vld2q_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld2q_f32(ptr); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return + simde_vuzpq_f32( + simde_vld1q_f32(&(ptr[0])), + simde_vld1q_f32(&(ptr[4])) + ); + #else + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ + #endif + simde_float32x4_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) && HEDLEY_GCC_VERSION_CHECK(12,0,0) + HEDLEY_DIAGNOSTIC_POP + #endif + + simde_float32x4x2_t r = { { + simde_float32x4_from_private(r_[0]), + simde_float32x4_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld2q_f32 + #define vld2q_f32(a) simde_vld2q_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x2_t +simde_vld2q_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld2q_f64(ptr); + #else + simde_float64x2_private r_[2]; + + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float64x2x2_t r = { { + simde_float64x2_from_private(r_[0]), + simde_float64x2_from_private(r_[1]), + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld2q_f64 + #define vld2q_f64(a) simde_vld2q_f64((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD2_H) */ +/* :: End simde/arm/neon/ld2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher + */ + +#if !defined(SIMDE_ARM_NEON_LD3_H) +#define SIMDE_ARM_NEON_LD3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x3_t +simde_vld3_f32(simde_float32 const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_f32(ptr); + #else + simde_float32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float32x2x3_t r = { { + simde_float32x2_from_private(r_[0]), + simde_float32x2_from_private(r_[1]), + simde_float32x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_f32 + #define vld3_f32(a) simde_vld3_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x3_t +simde_vld3_f64(simde_float64 const *ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3_f64(ptr); + #else + simde_float64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float64x1x3_t r = { { + simde_float64x1_from_private(r_[0]), + simde_float64x1_from_private(r_[1]), + simde_float64x1_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_f64 + #define vld3_f64(a) simde_vld3_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x3_t +simde_vld3_s8(int8_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_s8(ptr); + #else + simde_int8x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int8x8x3_t r = { { + simde_int8x8_from_private(r_[0]), + simde_int8x8_from_private(r_[1]), + simde_int8x8_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_s8 + #define vld3_s8(a) simde_vld3_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x3_t +simde_vld3_s16(int16_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_s16(ptr); + #else + simde_int16x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int16x4x3_t r = { { + simde_int16x4_from_private(r_[0]), + simde_int16x4_from_private(r_[1]), + simde_int16x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_s16 + #define vld3_s16(a) simde_vld3_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x3_t +simde_vld3_s32(int32_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_s32(ptr); + #else + simde_int32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int32x2x3_t r = { { + simde_int32x2_from_private(r_[0]), + simde_int32x2_from_private(r_[1]), + simde_int32x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_s32 + #define vld3_s32(a) simde_vld3_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x3_t +simde_vld3_s64(int64_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_s64(ptr); + #else + simde_int64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int64x1x3_t r = { { + simde_int64x1_from_private(r_[0]), + simde_int64x1_from_private(r_[1]), + simde_int64x1_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_s64 + #define vld3_s64(a) simde_vld3_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x3_t +simde_vld3_u8(uint8_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_u8(ptr); + #else + simde_uint8x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint8x8x3_t r = { { + simde_uint8x8_from_private(r_[0]), + simde_uint8x8_from_private(r_[1]), + simde_uint8x8_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_u8 + #define vld3_u8(a) simde_vld3_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x3_t +simde_vld3_u16(uint16_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_u16(ptr); + #else + simde_uint16x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint16x4x3_t r = { { + simde_uint16x4_from_private(r_[0]), + simde_uint16x4_from_private(r_[1]), + simde_uint16x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_u16 + #define vld3_u16(a) simde_vld3_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x3_t +simde_vld3_u32(uint32_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_u32(ptr); + #else + simde_uint32x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint32x2x3_t r = { { + simde_uint32x2_from_private(r_[0]), + simde_uint32x2_from_private(r_[1]), + simde_uint32x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3_u32 + #define vld3_u32(a) simde_vld3_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x3_t +simde_vld3_u64(uint64_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3_u64(ptr); + #else + simde_uint64x1_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint64x1x3_t r = { { + simde_uint64x1_from_private(r_[0]), + simde_uint64x1_from_private(r_[1]), + simde_uint64x1_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3_u64 + #define vld3_u64(a) simde_vld3_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x3_t +simde_vld3q_f32(simde_float32 const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_f32(ptr); + #else + simde_float32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float32x4x3_t r = { { + simde_float32x4_from_private(r_[0]), + simde_float32x4_from_private(r_[1]), + simde_float32x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_f32 + #define vld3q_f32(a) simde_vld3q_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x3_t +simde_vld3q_f64(simde_float64 const *ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3q_f64(ptr); + #else + simde_float64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float64x2x3_t r = { { + simde_float64x2_from_private(r_[0]), + simde_float64x2_from_private(r_[1]), + simde_float64x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_f64 + #define vld3q_f64(a) simde_vld3q_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x3_t +simde_vld3q_s8(int8_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_s8(ptr); + #else + simde_int8x16_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int8x16x3_t r = { { + simde_int8x16_from_private(r_[0]), + simde_int8x16_from_private(r_[1]), + simde_int8x16_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_s8 + #define vld3q_s8(a) simde_vld3q_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x3_t +simde_vld3q_s16(int16_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_s16(ptr); + #else + simde_int16x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int16x8x3_t r = { { + simde_int16x8_from_private(r_[0]), + simde_int16x8_from_private(r_[1]), + simde_int16x8_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_s16 + #define vld3q_s16(a) simde_vld3q_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x3_t +simde_vld3q_s32(int32_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_s32(ptr); + #else + simde_int32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int32x4x3_t r = { { + simde_int32x4_from_private(r_[0]), + simde_int32x4_from_private(r_[1]), + simde_int32x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_s32 + #define vld3q_s32(a) simde_vld3q_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x3_t +simde_vld3q_s64(int64_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3q_s64(ptr); + #else + simde_int64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_int64x2x3_t r = { { + simde_int64x2_from_private(r_[0]), + simde_int64x2_from_private(r_[1]), + simde_int64x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_s64 + #define vld3q_s64(a) simde_vld3q_s64((a)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x3_t +simde_vld3q_u8(uint8_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_u8(ptr); + #else + simde_uint8x16_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint8x16x3_t r = { { + simde_uint8x16_from_private(r_[0]), + simde_uint8x16_from_private(r_[1]), + simde_uint8x16_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_u8 + #define vld3q_u8(a) simde_vld3q_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x3_t +simde_vld3q_u16(uint16_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_u16(ptr); + #else + simde_uint16x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint16x8x3_t r = { { + simde_uint16x8_from_private(r_[0]), + simde_uint16x8_from_private(r_[1]), + simde_uint16x8_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_u16 + #define vld3q_u16(a) simde_vld3q_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x3_t +simde_vld3q_u32(uint32_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld3q_u32(ptr); + #else + simde_uint32x4_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint32x4x3_t r = { { + simde_uint32x4_from_private(r_[0]), + simde_uint32x4_from_private(r_[1]), + simde_uint32x4_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_u32 + #define vld3q_u32(a) simde_vld3q_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x3_t +simde_vld3q_u64(uint64_t const *ptr) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld3q_u64(ptr); + #else + simde_uint64x2_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_uint64x2x3_t r = { { + simde_uint64x2_from_private(r_[0]), + simde_uint64x2_from_private(r_[1]), + simde_uint64x2_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld3q_u64 + #define vld3q_u64(a) simde_vld3q_u64((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD3_H) */ +/* :: End simde/arm/neon/ld3.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher + */ + +#if !defined(SIMDE_ARM_NEON_LD4_H) +#define SIMDE_ARM_NEON_LD4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x4_t +simde_vld4_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_f32(ptr); + #else + simde_float32x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float32x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float32x2x4_t s_ = { { simde_float32x2_from_private(a_[0]), simde_float32x2_from_private(a_[1]), + simde_float32x2_from_private(a_[2]), simde_float32x2_from_private(a_[3]) } }; + return (s_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_f32 + #define vld4_f32(a) simde_vld4_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x4_t +simde_vld4_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4_f64(ptr); + #else + simde_float64x1_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float64x1_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float64x1x4_t s_ = { { simde_float64x1_from_private(a_[0]), simde_float64x1_from_private(a_[1]), + simde_float64x1_from_private(a_[2]), simde_float64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_f64 + #define vld4_f64(a) simde_vld4_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x4_t +simde_vld4_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_s8(ptr); + #else + simde_int8x8_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int8x8_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int8x8x4_t s_ = { { simde_int8x8_from_private(a_[0]), simde_int8x8_from_private(a_[1]), + simde_int8x8_from_private(a_[2]), simde_int8x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_s8 + #define vld4_s8(a) simde_vld4_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x4_t +simde_vld4_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_s16(ptr); + #else + simde_int16x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int16x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int16x4x4_t s_ = { { simde_int16x4_from_private(a_[0]), simde_int16x4_from_private(a_[1]), + simde_int16x4_from_private(a_[2]), simde_int16x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_s16 + #define vld4_s16(a) simde_vld4_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x4_t +simde_vld4_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_s32(ptr); + #else + simde_int32x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int32x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int32x2x4_t s_ = { { simde_int32x2_from_private(a_[0]), simde_int32x2_from_private(a_[1]), + simde_int32x2_from_private(a_[2]), simde_int32x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_s32 + #define vld4_s32(a) simde_vld4_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x4_t +simde_vld4_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_s64(ptr); + #else + simde_int64x1_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int64x1_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int64x1x4_t s_ = { { simde_int64x1_from_private(a_[0]), simde_int64x1_from_private(a_[1]), + simde_int64x1_from_private(a_[2]), simde_int64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_s64 + #define vld4_s64(a) simde_vld4_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x4_t +simde_vld4_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_u8(ptr); + #else + simde_uint8x8_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint8x8_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint8x8x4_t s_ = { { simde_uint8x8_from_private(a_[0]), simde_uint8x8_from_private(a_[1]), + simde_uint8x8_from_private(a_[2]), simde_uint8x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_u8 + #define vld4_u8(a) simde_vld4_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x4_t +simde_vld4_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_u16(ptr); + #else + simde_uint16x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint16x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint16x4x4_t s_ = { { simde_uint16x4_from_private(a_[0]), simde_uint16x4_from_private(a_[1]), + simde_uint16x4_from_private(a_[2]), simde_uint16x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_u16 + #define vld4_u16(a) simde_vld4_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x4_t +simde_vld4_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_u32(ptr); + #else + simde_uint32x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint32x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint32x2x4_t s_ = { { simde_uint32x2_from_private(a_[0]), simde_uint32x2_from_private(a_[1]), + simde_uint32x2_from_private(a_[2]), simde_uint32x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_u32 + #define vld4_u32(a) simde_vld4_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x4_t +simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4_u64(ptr); + #else + simde_uint64x1_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint64x1_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint64x1x4_t s_ = { { simde_uint64x1_from_private(a_[0]), simde_uint64x1_from_private(a_[1]), + simde_uint64x1_from_private(a_[2]), simde_uint64x1_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_u64 + #define vld4_u64(a) simde_vld4_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x4_t +simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_f32(ptr); + #else + simde_float32x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float32x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float32x4x4_t s_ = { { simde_float32x4_from_private(a_[0]), simde_float32x4_from_private(a_[1]), + simde_float32x4_from_private(a_[2]), simde_float32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_f32 + #define vld4q_f32(a) simde_vld4q_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x4_t +simde_vld4q_f64(simde_float64 const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4q_f64(ptr); + #else + simde_float64x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float64x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float64x2x4_t s_ = { { simde_float64x2_from_private(a_[0]), simde_float64x2_from_private(a_[1]), + simde_float64x2_from_private(a_[2]), simde_float64x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_f64 + #define vld4q_f64(a) simde_vld4q_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x4_t +simde_vld4q_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_s8(ptr); + #else + simde_int8x16_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int8x16_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int8x16x4_t s_ = { { simde_int8x16_from_private(a_[0]), simde_int8x16_from_private(a_[1]), + simde_int8x16_from_private(a_[2]), simde_int8x16_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_s8 + #define vld4q_s8(a) simde_vld4q_s8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x4_t +simde_vld4q_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_s16(ptr); + #else + simde_int16x8_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int16x8_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int16x8x4_t s_ = { { simde_int16x8_from_private(a_[0]), simde_int16x8_from_private(a_[1]), + simde_int16x8_from_private(a_[2]), simde_int16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_s16 + #define vld4q_s16(a) simde_vld4q_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x4_t +simde_vld4q_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_s32(ptr); + #else + simde_int32x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int32x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int32x4x4_t s_ = { { simde_int32x4_from_private(a_[0]), simde_int32x4_from_private(a_[1]), + simde_int32x4_from_private(a_[2]), simde_int32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_s32 + #define vld4q_s32(a) simde_vld4q_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x4_t +simde_vld4q_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4q_s64(ptr); + #else + simde_int64x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_int64x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_int64x2x4_t s_ = { { simde_int64x2_from_private(a_[0]), simde_int64x2_from_private(a_[1]), + simde_int64x2_from_private(a_[2]), simde_int64x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_s64 + #define vld4q_s64(a) simde_vld4q_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x4_t +simde_vld4q_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(64)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_u8(ptr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + // Let a, b, c, d be the 4 uint8x16 to return, they are laid out in memory: + // [a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, b3, c3, d3, + // a4, b4, c4, d4, a5, b5, c5, d5, a6, b6, c6, d6, a7, b7, c7, d7, + // a8, b8, c8, d8, a9, b9, c9, d9, a10, b10, c10, d10, a11, b11, c11, d11, + // a12, b12, c12, d12, a13, b13, c13, d13, a14, b14, c14, d14, a15, b15, c15, d15] + v128_t a_ = wasm_v128_load(&ptr[0]); + v128_t b_ = wasm_v128_load(&ptr[16]); + v128_t c_ = wasm_v128_load(&ptr[32]); + v128_t d_ = wasm_v128_load(&ptr[48]); + + v128_t a_low_b_low = wasm_i8x16_shuffle(a_, b_, 0, 4, 8, 12, 16, 20, 24, 28, + 1, 5, 9, 13, 17, 21, 25, 29); + v128_t a_high_b_high = wasm_i8x16_shuffle(c_, d_, 0, 4, 8, 12, 16, 20, 24, + 28, 1, 5, 9, 13, 17, 21, 25, 29); + v128_t a = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 0, 1, 2, 3, 4, 5, + 6, 7, 16, 17, 18, 19, 20, 21, 22, 23); + v128_t b = wasm_i8x16_shuffle(a_low_b_low, a_high_b_high, 8, 9, 10, 11, 12, + 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31); + + v128_t c_low_d_low = wasm_i8x16_shuffle(a_, b_, 2, 6, 10, 14, 18, 22, 26, + 30, 3, 7, 11, 15, 19, 23, 27, 31); + v128_t c_high_d_high = wasm_i8x16_shuffle(c_, d_, 2, 6, 10, 14, 18, 22, 26, + 30, 3, 7, 11, 15, 19, 23, 27, 31); + v128_t c = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 0, 1, 2, 3, 4, 5, + 6, 7, 16, 17, 18, 19, 20, 21, 22, 23); + v128_t d = wasm_i8x16_shuffle(c_low_d_low, c_high_d_high, 8, 9, 10, 11, 12, + 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31); + + simde_uint8x16_private r_[4]; + r_[0].v128 = a; + r_[1].v128 = b; + r_[2].v128 = c; + r_[3].v128 = d; + simde_uint8x16x4_t s_ = {{simde_uint8x16_from_private(r_[0]), + simde_uint8x16_from_private(r_[1]), + simde_uint8x16_from_private(r_[2]), + simde_uint8x16_from_private(r_[3])}}; + return s_; + #else + simde_uint8x16_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint8x16_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint8x16x4_t s_ = { { simde_uint8x16_from_private(a_[0]), simde_uint8x16_from_private(a_[1]), + simde_uint8x16_from_private(a_[2]), simde_uint8x16_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_u8 + #define vld4q_u8(a) simde_vld4q_u8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x4_t +simde_vld4q_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_u16(ptr); + #else + simde_uint16x8_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint16x8_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint16x8x4_t s_ = { { simde_uint16x8_from_private(a_[0]), simde_uint16x8_from_private(a_[1]), + simde_uint16x8_from_private(a_[2]), simde_uint16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_u16 + #define vld4q_u16(a) simde_vld4q_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x4_t +simde_vld4q_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vld4q_u32(ptr); + #else + simde_uint32x4_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint32x4_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint32x4x4_t s_ = { { simde_uint32x4_from_private(a_[0]), simde_uint32x4_from_private(a_[1]), + simde_uint32x4_from_private(a_[2]), simde_uint32x4_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_u32 + #define vld4q_u32(a) simde_vld4q_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x4_t +simde_vld4q_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld4q_u64(ptr); + #else + simde_uint64x2_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_uint64x2_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_uint64x2x4_t s_ = { { simde_uint64x2_from_private(a_[0]), simde_uint64x2_from_private(a_[1]), + simde_uint64x2_from_private(a_[2]), simde_uint64x2_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_u64 + #define vld4q_u64(a) simde_vld4q_u64((a)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD4_H) */ +/* :: End simde/arm/neon/ld4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/ld4_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Evan Nemerson + */ + +/* In older versions of clang, __builtin_neon_vld4_lane_v would + * generate a diagnostic for most variants (those which didn't + * use signed 8-bit integers). I believe this was fixed by + * 78ad22e0cc6390fcd44b2b7b5132f1b960ff975d. + * + * Since we have to use macros (due to the immediate-mode parameter) + * we can't just disable it once in this file; we have to use statement + * exprs and push / pop the stack for each macro. */ + +#if !defined(SIMDE_ARM_NEON_LD4_LANE_H) +#define SIMDE_ARM_NEON_LD4_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x4_t +simde_vld4_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int8x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int8x8_private tmp_ = simde_int8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x8_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_s8(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s8(ptr, src, lane)) + #else + #define simde_vld4_lane_s8(ptr, src, lane) vld4_lane_s8(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_s8 + #define vld4_lane_s8(ptr, src, lane) simde_vld4_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x4_t +simde_vld4_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int16x4_private tmp_ = simde_int16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_s16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s16(ptr, src, lane)) + #else + #define simde_vld4_lane_s16(ptr, src, lane) vld4_lane_s16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_s16 + #define vld4_lane_s16(ptr, src, lane) simde_vld4_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x4_t +simde_vld4_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int32x2_private tmp_ = simde_int32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_s32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s32(ptr, src, lane)) + #else + #define simde_vld4_lane_s32(ptr, src, lane) vld4_lane_s32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_s32 + #define vld4_lane_s32(ptr, src, lane) simde_vld4_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1x4_t +simde_vld4_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_int64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int64x1_private tmp_ = simde_int64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x1_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_s64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_s64(ptr, src, lane)) + #else + #define simde_vld4_lane_s64(ptr, src, lane) vld4_lane_s64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_s64 + #define vld4_lane_s64(ptr, src, lane) simde_vld4_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x4_t +simde_vld4_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint8x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint8x8_private tmp_ = simde_uint8x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x8_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_u8(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u8(ptr, src, lane)) + #else + #define simde_vld4_lane_u8(ptr, src, lane) vld4_lane_u8(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_u8 + #define vld4_lane_u8(ptr, src, lane) simde_vld4_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x4_t +simde_vld4_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint16x4_private tmp_ = simde_uint16x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_u16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u16(ptr, src, lane)) + #else + #define simde_vld4_lane_u16(ptr, src, lane) vld4_lane_u16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_u16 + #define vld4_lane_u16(ptr, src, lane) simde_vld4_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x4_t +simde_vld4_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint32x2_private tmp_ = simde_uint32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_u32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u32(ptr, src, lane)) + #else + #define simde_vld4_lane_u32(ptr, src, lane) vld4_lane_u32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_u32 + #define vld4_lane_u32(ptr, src, lane) simde_vld4_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1x4_t +simde_vld4_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_uint64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint64x1_private tmp_ = simde_uint64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x1_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_u64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_u64(ptr, src, lane)) + #else + #define simde_vld4_lane_u64(ptr, src, lane) vld4_lane_u64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_u64 + #define vld4_lane_u64(ptr, src, lane) simde_vld4_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x4_t +simde_vld4_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float32x2_private tmp_ = simde_float32x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_f32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f32(ptr, src, lane)) + #else + #define simde_vld4_lane_f32(ptr, src, lane) vld4_lane_f32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_f32 + #define vld4_lane_f32(ptr, src, lane) simde_vld4_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1x4_t +simde_vld4_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float64x1_private tmp_ = simde_float64x1_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x1_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4_lane_f64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f64(ptr, src, lane)) + #else + #define simde_vld4_lane_f64(ptr, src, lane) vld4_lane_f64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4_lane_f64 + #define vld4_lane_f64(ptr, src, lane) simde_vld4_lane_f64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x4_t +simde_vld4q_lane_s8(int8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_int8x16x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int8x16_private tmp_ = simde_int8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int8x16_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_s8(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s8(ptr, src, lane)) + #else + #define simde_vld4q_lane_s8(ptr, src, lane) vld4q_lane_s8(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_s8 + #define vld4q_lane_s8(ptr, src, lane) simde_vld4q_lane_s8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x4_t +simde_vld4q_lane_s16(int16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int16x8_private tmp_ = simde_int16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int16x8_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_s16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s16(ptr, src, lane)) + #else + #define simde_vld4q_lane_s16(ptr, src, lane) vld4q_lane_s16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_s16 + #define vld4q_lane_s16(ptr, src, lane) simde_vld4q_lane_s16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x4_t +simde_vld4q_lane_s32(int32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int32x4_private tmp_ = simde_int32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int32x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_s32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s32(ptr, src, lane)) + #else + #define simde_vld4q_lane_s32(ptr, src, lane) vld4q_lane_s32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_s32 + #define vld4q_lane_s32(ptr, src, lane) simde_vld4q_lane_s32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2x4_t +simde_vld4q_lane_s64(int64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_int64x2_private tmp_ = simde_int64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_int64x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_s64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_s64(ptr, src, lane)) + #else + #define simde_vld4q_lane_s64(ptr, src, lane) vld4q_lane_s64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_s64 + #define vld4q_lane_s64(ptr, src, lane) simde_vld4q_lane_s64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x4_t +simde_vld4q_lane_u8(uint8_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_uint8x16x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint8x16_private tmp_ = simde_uint8x16_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint8x16_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_u8(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u8(ptr, src, lane)) + #else + #define simde_vld4q_lane_u8(ptr, src, lane) vld4q_lane_u8(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_u8 + #define vld4q_lane_u8(ptr, src, lane) simde_vld4q_lane_u8((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x4_t +simde_vld4q_lane_u16(uint16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint16x8_private tmp_ = simde_uint16x8_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint16x8_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_u16(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u16(ptr, src, lane)) + #else + #define simde_vld4q_lane_u16(ptr, src, lane) vld4q_lane_u16(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_u16 + #define vld4q_lane_u16(ptr, src, lane) simde_vld4q_lane_u16((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x4_t +simde_vld4q_lane_u32(uint32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint32x4_private tmp_ = simde_uint32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint32x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_u32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u32(ptr, src, lane)) + #else + #define simde_vld4q_lane_u32(ptr, src, lane) vld4q_lane_u32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_u32 + #define vld4q_lane_u32(ptr, src, lane) simde_vld4q_lane_u32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2x4_t +simde_vld4q_lane_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_uint64x2_private tmp_ = simde_uint64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_uint64x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_u64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_u64(ptr, src, lane)) + #else + #define simde_vld4q_lane_u64(ptr, src, lane) vld4q_lane_u64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_u64 + #define vld4q_lane_u64(ptr, src, lane) simde_vld4q_lane_u64((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x4_t +simde_vld4q_lane_f32(simde_float32_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float32x4_private tmp_ = simde_float32x4_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float32x4_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_f32(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f32(ptr, src, lane)) + #else + #define simde_vld4q_lane_f32(ptr, src, lane) vld4q_lane_f32(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_f32 + #define vld4q_lane_f32(ptr, src, lane) simde_vld4q_lane_f32((ptr), (src), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2x4_t +simde_vld4q_lane_f64(simde_float64_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t src, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2x4_t r; + + for (size_t i = 0 ; i < 4 ; i++) { + simde_float64x2_private tmp_ = simde_float64x2_to_private(src.val[i]); + tmp_.values[lane] = ptr[i]; + r.val[i] = simde_float64x2_from_private(tmp_); + } + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) + #define simde_vld4q_lane_f64(ptr, src, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f64(ptr, src, lane)) + #else + #define simde_vld4q_lane_f64(ptr, src, lane) vld4q_lane_f64(ptr, src, lane) + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vld4q_lane_f64 + #define vld4q_lane_f64(ptr, src, lane) simde_vld4q_lane_f64((ptr), (src), (lane)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_LD4_LANE_H) */ +/* :: End simde/arm/neon/ld4_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/max.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MAX_H) +#define SIMDE_ARM_NEON_MAX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmax_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); + #else + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_f32 + #define vmax_f32(a, b) simde_vmax_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmax_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmax_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); + #else + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmax_f64 + #define vmax_f64(a, b) simde_vmax_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmax_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s8(simde_vcgt_s8(a, b), a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_s8 + #define vmax_s8(a, b) simde_vmax_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmax_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s16(simde_vcgt_s16(a, b), a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_s16 + #define vmax_s16(a, b) simde_vmax_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmax_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s32(simde_vcgt_s32(a, b), a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_s32 + #define vmax_s32(a, b) simde_vmax_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_x_vmax_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s64(simde_vcgt_s64(a, b), a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int64x1_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u8(simde_vcgt_u8(a, b), a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_u8 + #define vmax_u8(a, b) simde_vmax_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_u16(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE) + return simde_vbsl_u16(simde_vcgt_u16(a, b), a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.m64 = _mm_add_pi16(b_.m64, _mm_subs_pu16(a_.m64, b_.m64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_u16 + #define vmax_u16(a, b) simde_vmax_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmax_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u32(simde_vcgt_u32(a, b), a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmax_u32 + #define vmax_u32(a, b) simde_vmax_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_x_vmax_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u64(simde_vcgt_u64(a, b), a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint64x1_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return + vec_sel( + b, + a, + vec_orc( + vec_cmpgt(a, b), + vec_cmpeq(a, a) + ) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) cmpres = vec_cmpeq(a, a); + return + vec_sel( + b, + a, + vec_or( + vec_cmpgt(a, b), + vec_nor(cmpres, cmpres) + ) + ); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS) + r_.m128 = _mm_max_ps(a_.m128, b_.m128); + #elif defined(SIMDE_X86_SSE_NATIVE) + __m128 m = _mm_or_ps(_mm_cmpneq_ps(a_.m128, a_.m128), _mm_cmpgt_ps(a_.m128, b_.m128)); + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_blendv_ps(b_.m128, a_.m128, m); + #else + r_.m128 = + _mm_or_ps( + _mm_and_ps(m, a_.m128), + _mm_andnot_ps(m, b_.m128) + ); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_max(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NANF); + #else + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_f32 + #define vmaxq_f32(a, b) simde_vmaxq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmaxq_f64(a, b); + #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) + return vec_max(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS) + r_.m128d = _mm_max_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128d m = _mm_or_pd(_mm_cmpneq_pd(a_.m128d, a_.m128d), _mm_cmpgt_pd(a_.m128d, b_.m128d)); + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_blendv_pd(b_.m128d, a_.m128d, m); + #else + r_.m128d = + _mm_or_pd( + _mm_and_pd(m, a_.m128d), + _mm_andnot_pd(m, b_.m128d) + ); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_max(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + r_.values[i] = (a_.values[i] >= b_.values[i]) ? a_.values[i] : ((a_.values[i] < b_.values[i]) ? b_.values[i] : SIMDE_MATH_NAN); + #else + r_.values[i] = (a_.values[i] > b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxq_f64 + #define vmaxq_f64(a, b) simde_vmaxq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_max_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a_.m128i, b_.m128i); + r_.m128i = _mm_or_si128(_mm_and_si128(m, a_.m128i), _mm_andnot_si128(m, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_max(a_.v128, b_.v128); + #endif + + return simde_int8x16_from_private(r_); + #else + return simde_vbslq_s8(simde_vcgtq_s8(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_s8 + #define vmaxq_s8(a, b) simde_vmaxq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_max_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_max(a_.v128, b_.v128); + #endif + + return simde_int16x8_from_private(r_); + #else + return simde_vbslq_s16(simde_vcgtq_s16(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_s16 + #define vmaxq_s16(a, b) simde_vmaxq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE4_1_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_max_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_max(a_.v128, b_.v128); + #endif + + return simde_int32x4_from_private(r_); + #else + return simde_vbslq_s32(simde_vcgtq_s32(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_s32 + #define vmaxq_s32(a, b) simde_vmaxq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_x_vmaxq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #else + return simde_vbslq_s64(simde_vcgtq_s64(a, b), a, b); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_max_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_max(a_.v128, b_.v128); + #endif + + return simde_uint8x16_from_private(r_); + #else + return simde_vbslq_u8(simde_vcgtq_u8(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_u8 + #define vmaxq_u8(a, b) simde_vmaxq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_max_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.m128i = _mm_add_epi16(b_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_max(a_.v128, b_.v128); + #endif + + return simde_uint16x8_from_private(r_); + #else + return simde_vbslq_u16(simde_vcgtq_u16(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_u16 + #define vmaxq_u16(a, b) simde_vmaxq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmaxq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #elif \ + defined(SIMDE_X86_SSE4_1_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_max_epu32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_max(a_.v128, b_.v128); + #endif + + return simde_uint32x4_from_private(r_); + #else + return simde_vbslq_u32(simde_vcgtq_u32(a, b), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxq_u32 + #define vmaxq_u32(a, b) simde_vmaxq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_x_vmaxq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_max(a, b); + #else + return simde_vbslq_u64(simde_vcgtq_u64(a, b), a, b); + #endif +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MAX_H) */ +/* :: End simde/arm/neon/max.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/maxnm.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MAXNM_H) +#define SIMDE_ARM_NEON_MAXNM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmaxnm_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) + return vmaxnm_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmaxf) + r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]); + #else + if (a_.values[i] > b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] < b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxnm_f32 + #define vmaxnm_f32(a, b) simde_vmaxnm_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmaxnm_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmaxnm_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmax) + r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]); + #else + if (a_.values[i] > b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] < b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxnm_f64 + #define vmaxnm_f64(a, b) simde_vmaxnm_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmaxnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) + return vmaxnmq_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_max(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) + #if !defined(SIMDE_FAST_NANS) + __m128 r = _mm_max_ps(a_.m128, b_.m128); + __m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128); + r = _mm_andnot_ps(bnan, r); + r = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan)); + r_.m128 = r; + #else + r_.m128 = _mm_max_ps(a_.m128, b_.m128); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.v128 = wasm_f32x4_max(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmaxf) + r_.values[i] = simde_math_fmaxf(a_.values[i], b_.values[i]); + #else + if (a_.values[i] > b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] < b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmaxnmq_f32 + #define vmaxnmq_f32(a, b) simde_vmaxnmq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmaxnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmaxnmq_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_max(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + #if !defined(SIMDE_FAST_NANS) + __m128d r = _mm_max_pd(a_.m128d, b_.m128d); + __m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d); + r = _mm_andnot_pd(bnan, r); + r = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan)); + r_.m128d = r; + #else + r_.m128d = _mm_max_pd(a_.m128d, b_.m128d); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.v128 = wasm_f64x2_max(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmax) + r_.values[i] = simde_math_fmax(a_.values[i], b_.values[i]); + #else + if (a_.values[i] > b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] < b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxnmq_f64 + #define vmaxnmq_f64(a, b) simde_vmaxnmq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MAXNM_H) */ +/* :: End simde/arm/neon/maxnm.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/maxv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MAXV_H) +#define SIMDE_ARM_NEON_MAXV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vmaxv_f32(simde_float32x2_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + + r = -SIMDE_MATH_INFINITYF; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_f32 + #define vmaxv_f32(v) simde_vmaxv_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vmaxv_s8(simde_int8x8_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_s8(a); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + + r = INT8_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_s8 + #define vmaxv_s8(v) simde_vmaxv_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vmaxv_s16(simde_int16x4_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_s16(a); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + r = INT16_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_s16 + #define vmaxv_s16(v) simde_vmaxv_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vmaxv_s32(simde_int32x2_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_s32(a); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + r = INT32_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_s32 + #define vmaxv_s32(v) simde_vmaxv_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vmaxv_u8(simde_uint8x8_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_u8(a); + #else + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_u8 + #define vmaxv_u8(v) simde_vmaxv_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vmaxv_u16(simde_uint16x4_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_u16(a); + #else + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_u16 + #define vmaxv_u16(v) simde_vmaxv_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vmaxv_u32(simde_uint32x2_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxv_u32(a); + #else + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxv_u32 + #define vmaxv_u32(v) simde_vmaxv_u32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vmaxvq_f32(simde_float32x4_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + + r = -SIMDE_MATH_INFINITYF; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_f32 + #define vmaxvq_f32(v) simde_vmaxvq_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vmaxvq_f64(simde_float64x2_t a) { + simde_float64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + + r = -SIMDE_MATH_INFINITY; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_f64 + #define vmaxvq_f64(v) simde_vmaxvq_f64(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vmaxvq_s8(simde_int8x16_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_s8(a); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + + r = INT8_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_s8 + #define vmaxvq_s8(v) simde_vmaxvq_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vmaxvq_s16(simde_int16x8_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_s16(a); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + r = INT16_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_s16 + #define vmaxvq_s16(v) simde_vmaxvq_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vmaxvq_s32(simde_int32x4_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_s32(a); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + r = INT32_MIN; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_s32 + #define vmaxvq_s32(v) simde_vmaxvq_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vmaxvq_u8(simde_uint8x16_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_u8(a); + #else + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_u8 + #define vmaxvq_u8(v) simde_vmaxvq_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vmaxvq_u16(simde_uint16x8_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_u16(a); + #else + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_u16 + #define vmaxvq_u16(v) simde_vmaxvq_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vmaxvq_u32(simde_uint32x4_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vmaxvq_u32(a); + #else + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + r = 0; + SIMDE_VECTORIZE_REDUCTION(max:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] > r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmaxvq_u32 + #define vmaxvq_u32(v) simde_vmaxvq_u32(v) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MAXV_H) */ +/* :: End simde/arm/neon/maxv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/min.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MIN_H) +#define SIMDE_ARM_NEON_MIN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmin_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_f32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) + simde_float32x2_t r = simde_vbsl_f32(simde_vcgt_f32(b, a), a, b); + + #if !defined(SIMDE_FAST_NANS) + r = simde_vbsl_f32(simde_vceq_f32(a, a), simde_vbsl_f32(simde_vceq_f32(b, b), r, b), a); + #endif + + return r; + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + if (simde_math_isnanf(a_.values[i])) { + r_.values[i] = a_.values[i]; + } else if (simde_math_isnanf(b_.values[i])) { + r_.values[i] = b_.values[i]; + } else { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #else + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_f32 + #define vmin_f32(a, b) simde_vmin_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmin_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmin_f64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) + simde_float64x1_t r = simde_vbsl_f64(simde_vcgt_f64(b, a), a, b); + + #if !defined(SIMDE_FAST_NANS) + r = simde_vbsl_f64(simde_vceq_f64(a, a), simde_vbsl_f64(simde_vceq_f64(b, b), r, b), a); + #endif + + return r; + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + if (simde_math_isnan(a_.values[i])) { + r_.values[i] = a_.values[i]; + } else if (simde_math_isnan(b_.values[i])) { + r_.values[i] = b_.values[i]; + } else { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #else + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmin_f64 + #define vmin_f64(a, b) simde_vmin_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmin_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s8(simde_vcgt_s8(b, a), a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_s8 + #define vmin_s8(a, b) simde_vmin_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmin_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s16(simde_vcgt_s16(b, a), a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(b_.m64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_s16 + #define vmin_s16(a, b) simde_vmin_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmin_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s32(simde_vcgt_s32(b, a), a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_s32 + #define vmin_s32(a, b) simde_vmin_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_x_vmin_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_s64(simde_vcgt_s64(b, a), a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_int64x1_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u8(simde_vcgt_u8(b, a), a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_u8 + #define vmin_u8(a, b) simde_vmin_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_u16(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && !defined(SIMDE_X86_SSE2_NATIVE) + return simde_vbsl_u16(simde_vcgt_u16(b, a), a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.m64 = _mm_sub_pi16(a_.m64, _mm_subs_pu16(a_.m64, b_.m64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_u16 + #define vmin_u16(a, b) simde_vmin_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmin_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u32(simde_vcgt_u32(b, a), a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmin_u32 + #define vmin_u32(a, b) simde_vmin_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_x_vmin_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vbsl_u64(simde_vcgt_u64(b, a), a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint64x1_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vminq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_f32(a, b); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) + return vec_min(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_FAST_NANS) + r_.m128 = _mm_min_ps(a_.m128, b_.m128); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_blendv_ps(_mm_set1_ps(SIMDE_MATH_NANF), _mm_min_ps(a_.m128, b_.m128), _mm_cmpord_ps(a_.m128, b_.m128)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + if (simde_math_isnanf(a_.values[i])) { + r_.values[i] = a_.values[i]; + } else if (simde_math_isnanf(b_.values[i])) { + r_.values[i] = b_.values[i]; + } else { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #else + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_f32 + #define vminq_f32(a, b) simde_vminq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vminq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vminq_f64(a, b); + #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && defined(SIMDE_FAST_NANS) + return vec_min(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_FAST_NANS) + r_.m128d = _mm_min_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_blendv_pd(_mm_set1_pd(SIMDE_MATH_NAN), _mm_min_pd(a_.m128d, b_.m128d), _mm_cmpord_pd(a_.m128d, b_.m128d)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if !defined(SIMDE_FAST_NANS) + if (simde_math_isnan(a_.values[i])) { + r_.values[i] = a_.values[i]; + } else if (simde_math_isnan(b_.values[i])) { + r_.values[i] = b_.values[i]; + } else { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #else + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + #endif + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminq_f64 + #define vminq_f64(a, b) simde_vminq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vminq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_s8 + #define vminq_s8(a, b) simde_vminq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vminq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_min_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_s16 + #define vminq_s16(a, b) simde_vminq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vminq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_s32 + #define vminq_s32(a, b) simde_vminq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_x_vminq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_min_epi64(a_.m128i, b_.m128i); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_min_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_u8 + #define vminq_u8(a, b) simde_vminq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.m128i = _mm_sub_epi16(a_.m128i, _mm_subs_epu16(a_.m128i, b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_u16 + #define vminq_u16(a, b) simde_vminq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vminq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epu32(a_.m128i, b_.m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i); + __m128i m = + _mm_cmpeq_epi32( + /* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */ + _mm_and_si128( + difference, + _mm_xor_si128( + _mm_cmpgt_epi32( + _mm_xor_si128(difference, i32_min), + _mm_xor_si128(a_.m128i, i32_min) + ), + _mm_set1_epi32(~INT32_C(0)) + ) + ), + _mm_setzero_si128() + ); + r_.m128i = + _mm_or_si128( + _mm_and_si128(m, a_.m128i), + _mm_andnot_si128(m, b_.m128i) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u32x4_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminq_u32 + #define vminq_u32(a, b) simde_vminq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_x_vminq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_min(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] < b_.values[i]) ? a_.values[i] : b_.values[i]; + } + + return simde_uint64x2_from_private(r_); + #endif +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MIN_H) */ +/* :: End simde/arm/neon/min.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/minnm.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MINNM_H) +#define SIMDE_ARM_NEON_MINNM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vminnm_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) + return vminnm_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fminf) + r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]); + #else + if (a_.values[i] < b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] > b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminnm_f32 + #define vminnm_f32(a, b) simde_vminnm_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vminnm_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vminnm_f64(a, b); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a), + b_ = simde_float64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmin) + r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]); + #else + if (a_.values[i] < b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] > b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminnm_f64 + #define vminnm_f64(a, b) simde_vminnm_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vminnmq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (__ARM_NEON_FP >= 6) + return vminnmq_f32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) + return simde_vbslq_f32(simde_vcleq_f32(a, b), a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_min(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE_NATIVE) + #if !defined(SIMDE_FAST_NANS) + __m128 r = _mm_min_ps(a_.m128, b_.m128); + __m128 bnan = _mm_cmpunord_ps(b_.m128, b_.m128); + r = _mm_andnot_ps(bnan, r); + r_.m128 = _mm_or_ps(r, _mm_and_ps(a_.m128, bnan)); + #else + r_.m128 = _mm_min_ps(a_.m128, b_.m128); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.v128 = wasm_f32x4_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fminf) + r_.values[i] = simde_math_fminf(a_.values[i], b_.values[i]); + #else + if (a_.values[i] < b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] > b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vminnmq_f32 + #define vminnmq_f32(a, b) simde_vminnmq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vminnmq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vminnmq_f64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_NANS) + return simde_vbslq_f64(simde_vcleq_f64(a, b), a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_min(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + #if !defined(SIMDE_FAST_NANS) + __m128d r = _mm_min_pd(a_.m128d, b_.m128d); + __m128d bnan = _mm_cmpunord_pd(b_.m128d, b_.m128d); + r = _mm_andnot_pd(bnan, r); + r_.m128d = _mm_or_pd(r, _mm_and_pd(a_.m128d, bnan)); + #else + r_.m128d = _mm_min_pd(a_.m128d, b_.m128d); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.v128 = wasm_f64x2_min(a_.v128, b_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if defined(simde_math_fmin) + r_.values[i] = simde_math_fmin(a_.values[i], b_.values[i]); + #else + if (a_.values[i] < b_.values[i]) { + r_.values[i] = a_.values[i]; + } else if (a_.values[i] > b_.values[i]) { + r_.values[i] = b_.values[i]; + } else if (a_.values[i] == a_.values[i]) { + r_.values[i] = a_.values[i]; + } else { + r_.values[i] = b_.values[i]; + } + #endif + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminnmq_f64 + #define vminnmq_f64(a, b) simde_vminnmq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MINNM_H) */ +/* :: End simde/arm/neon/minnm.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/minv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MINV_H) +#define SIMDE_ARM_NEON_MINV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vminv_f32(simde_float32x2_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + + r = SIMDE_MATH_INFINITYF; + #if defined(SIMDE_FAST_NANS) + SIMDE_VECTORIZE_REDUCTION(min:r) + #else + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + #if defined(SIMDE_FAST_NANS) + r = a_.values[i] < r ? a_.values[i] : r; + #else + r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); + #endif + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_f32 + #define vminv_f32(v) simde_vminv_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vminv_s8(simde_int8x8_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_s8(a); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + + r = INT8_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_s8 + #define vminv_s8(v) simde_vminv_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vminv_s16(simde_int16x4_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_s16(a); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + r = INT16_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_s16 + #define vminv_s16(v) simde_vminv_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vminv_s32(simde_int32x2_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_s32(a); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + r = INT32_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_s32 + #define vminv_s32(v) simde_vminv_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vminv_u8(simde_uint8x8_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_u8(a); + #else + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + + r = UINT8_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_u8 + #define vminv_u8(v) simde_vminv_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vminv_u16(simde_uint16x4_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_u16(a); + #else + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + r = UINT16_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_u16 + #define vminv_u16(v) simde_vminv_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vminv_u32(simde_uint32x2_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminv_u32(a); + #else + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + r = UINT32_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminv_u32 + #define vminv_u32(v) simde_vminv_u32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vminvq_f32(simde_float32x4_t a) { + simde_float32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + + r = SIMDE_MATH_INFINITYF; + #if defined(SIMDE_FAST_NANS) + SIMDE_VECTORIZE_REDUCTION(min:r) + #else + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + #if defined(SIMDE_FAST_NANS) + r = a_.values[i] < r ? a_.values[i] : r; + #else + r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); + #endif + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_f32 + #define vminvq_f32(v) simde_vminvq_f32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vminvq_f64(simde_float64x2_t a) { + simde_float64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + + r = SIMDE_MATH_INFINITY; + #if defined(SIMDE_FAST_NANS) + SIMDE_VECTORIZE_REDUCTION(min:r) + #else + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + #if defined(SIMDE_FAST_NANS) + r = a_.values[i] < r ? a_.values[i] : r; + #else + r = (a_.values[i] < r) ? a_.values[i] : ((a_.values[i] >= r) ? r : ((a_.values[i] == a_.values[i]) ? r : a_.values[i])); + #endif + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_f64 + #define vminvq_f64(v) simde_vminvq_f64(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vminvq_s8(simde_int8x16_t a) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_s8(a); + #else + simde_int8x16_private a_ = simde_int8x16_to_private(a); + + r = INT8_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_s8 + #define vminvq_s8(v) simde_vminvq_s8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vminvq_s16(simde_int16x8_t a) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_s16(a); + #else + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + r = INT16_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_s16 + #define vminvq_s16(v) simde_vminvq_s16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vminvq_s32(simde_int32x4_t a) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_s32(a); + #else + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + r = INT32_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_s32 + #define vminvq_s32(v) simde_vminvq_s32(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vminvq_u8(simde_uint8x16_t a) { + uint8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_u8(a); + #else + simde_uint8x16_private a_ = simde_uint8x16_to_private(a); + + r = UINT8_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_u8 + #define vminvq_u8(v) simde_vminvq_u8(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vminvq_u16(simde_uint16x8_t a) { + uint16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_u16(a); + #else + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + r = UINT16_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_u16 + #define vminvq_u16(v) simde_vminvq_u16(v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vminvq_u32(simde_uint32x4_t a) { + uint32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vminvq_u32(a); + #else + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + r = UINT32_MAX; + SIMDE_VECTORIZE_REDUCTION(min:r) + for (size_t i = 0 ; i < (sizeof(a_.values) / sizeof(a_.values[0])) ; i++) { + r = a_.values[i] < r ? a_.values[i] : r; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vminvq_u32 + #define vminvq_u32(v) simde_vminvq_u32(v) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MINV_H) */ +/* :: End simde/arm/neon/minv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mla.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MLA_H) +#define SIMDE_ARM_NEON_MLA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmla_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_f32(a, b, c); + #else + return simde_vadd_f32(simde_vmul_f32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_f32 + #define vmla_f32(a, b, c) simde_vmla_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmla_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmla_f64(a, b, c); + #else + return simde_vadd_f64(simde_vmul_f64(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmla_f64 + #define vmla_f64(a, b, c) simde_vmla_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmla_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_s8(a, b, c); + #else + return simde_vadd_s8(simde_vmul_s8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_s8 + #define vmla_s8(a, b, c) simde_vmla_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmla_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_s16(a, b, c); + #else + return simde_vadd_s16(simde_vmul_s16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_s16 + #define vmla_s16(a, b, c) simde_vmla_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmla_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_s32(a, b, c); + #else + return simde_vadd_s32(simde_vmul_s32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_s32 + #define vmla_s32(a, b, c) simde_vmla_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmla_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_u8(a, b, c); + #else + return simde_vadd_u8(simde_vmul_u8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_u8 + #define vmla_u8(a, b, c) simde_vmla_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmla_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_u16(a, b, c); + #else + return simde_vadd_u16(simde_vmul_u16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_u16 + #define vmla_u16(a, b, c) simde_vmla_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmla_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_u32(a, b, c); + #else + return simde_vadd_u32(simde_vmul_u32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_u32 + #define vmla_u32(a, b, c) simde_vmla_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmlaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_f32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_madd(b, c, a); + #elif \ + defined(SIMDE_X86_FMA_NATIVE) + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b), + c_ = simde_float32x4_to_private(c); + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128 = _mm_fmadd_ps(b_.m128, c_.m128, a_.m128); + #endif + + return simde_float32x4_from_private(r_); + #else + return simde_vaddq_f32(simde_vmulq_f32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_f32 + #define vmlaq_f32(a, b, c) simde_vmlaq_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmlaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlaq_f64(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_madd(b, c, a); + #elif \ + defined(SIMDE_X86_FMA_NATIVE) + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b), + c_ = simde_float64x2_to_private(c); + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128d = _mm_fmadd_pd(b_.m128d, c_.m128d, a_.m128d); + #endif + + return simde_float64x2_from_private(r_); + #else + return simde_vaddq_f64(simde_vmulq_f64(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlaq_f64 + #define vmlaq_f64(a, b, c) simde_vmlaq_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmlaq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_s8(a, b, c); + #else + return simde_vaddq_s8(simde_vmulq_s8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_s8 + #define vmlaq_s8(a, b, c) simde_vmlaq_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlaq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_s16(a, b, c); + #else + return simde_vaddq_s16(simde_vmulq_s16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_s16 + #define vmlaq_s16(a, b, c) simde_vmlaq_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlaq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_s32(a, b, c); + #else + return simde_vaddq_s32(simde_vmulq_s32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_s32 + #define vmlaq_s32(a, b, c) simde_vmlaq_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmlaq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_u8(a, b, c); + #else + return simde_vaddq_u8(simde_vmulq_u8(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_u8 + #define vmlaq_u8(a, b, c) simde_vmlaq_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlaq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_u16(a, b, c); + #else + return simde_vaddq_u16(simde_vmulq_u16(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_u16 + #define vmlaq_u16(a, b, c) simde_vmlaq_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlaq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_u32(a, b, c); + #else + return simde_vaddq_u32(simde_vmulq_u32(b, c), a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_u32 + #define vmlaq_u32(a, b, c) simde_vmlaq_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLA_H) */ +/* :: End simde/arm/neon/mla.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mla_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MLA_LANE_H) +#define SIMDE_ARM_NEON_MLA_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmla_lane_f32(a, b, v, lane) vmla_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vmla_lane_f32(a, b, v, lane) simde_vmla_f32((a), (b), simde_vdup_lane_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_lane_f32 + #define vmla_lane_f32(a, b, v, lane) simde_vmla_lane_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmla_lane_s16(a, b, v, lane) vmla_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmla_lane_s16(a, b, v, lane) simde_vmla_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_lane_s16 + #define vmla_lane_s16(a, b, v, lane) simde_vmla_lane_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmla_lane_s32(a, b, v, lane) vmla_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmla_lane_s32(a, b, v, lane) simde_vmla_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_lane_s32 + #define vmla_lane_s32(a, b, v, lane) simde_vmla_lane_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmla_lane_u16(a, b, v, lane) vmla_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmla_lane_u16(a, b, v, lane) simde_vmla_u16((a), (b), simde_vdup_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_lane_u16 + #define vmla_lane_u16(a, b, v, lane) simde_vmla_lane_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmla_lane_u32(a, b, v, lane) vmla_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmla_lane_u32(a, b, v, lane) simde_vmla_u32((a), (b), simde_vdup_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_lane_u32 + #define vmla_lane_u32(a, b, v, lane) simde_vmla_lane_u32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlaq_lane_f32(a, b, v, lane) vmlaq_lane_f32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_lane_f32(a, b, v, lane) simde_vmlaq_f32((a), (b), simde_vdupq_lane_f32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_lane_f32 + #define vmlaq_lane_f32(a, b, v, lane) simde_vmlaq_lane_f32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlaq_lane_s16(a, b, v, lane) vmlaq_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmlaq_lane_s16(a, b, v, lane) simde_vmlaq_s16((a), (b), simde_vdupq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_lane_s16 + #define vmlaq_lane_s16(a, b, v, lane) simde_vmlaq_lane_s16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlaq_lane_s32(a, b, v, lane) vmlaq_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_lane_s32(a, b, v, lane) simde_vmlaq_s32((a), (b), simde_vdupq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_lane_s32 + #define vmlaq_lane_s32(a, b, v, lane) simde_vmlaq_lane_s32((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlaq_lane_u16(a, b, v, lane) vmlaq_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmlaq_lane_u16(a, b, v, lane) simde_vmlaq_u16((a), (b), simde_vdupq_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_lane_u16 + #define vmlaq_lane_u16(a, b, v, lane) simde_vmlaq_lane_u16((a), (b), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlaq_lane_u32(a, b, v, lane) vmlaq_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmlaq_lane_u32(a, b, v, lane) simde_vmlaq_u32((a), (b), simde_vdupq_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_lane_u32 + #define vmlaq_lane_u32(a, b, v, lane) simde_vmlaq_lane_u32((a), (b), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLA_LANE_H) */ +/* :: End simde/arm/neon/mla_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mla_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MLA_N_H) +#define SIMDE_ARM_NEON_MLA_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mul_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MUL_N_H) +#define SIMDE_ARM_NEON_MUL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmul_n_f32(simde_float32x2_t a, simde_float32 b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_n_f32(a, b); + #else + return simde_vmul_f32(a, simde_vdup_n_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_n_f32 + #define vmul_n_f32(a, b) simde_vmul_n_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmul_n_f64(simde_float64x1_t a, simde_float64 b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmul_n_f64(a, b); + #else + return simde_vmul_f64(a, simde_vdup_n_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmul_n_f64 + #define vmul_n_f64(a, b) simde_vmul_n_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmul_n_s16(simde_int16x4_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_n_s16(a, b); + #else + return simde_vmul_s16(a, simde_vdup_n_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_n_s16 + #define vmul_n_s16(a, b) simde_vmul_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmul_n_s32(simde_int32x2_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_n_s32(a, b); + #else + return simde_vmul_s32(a, simde_vdup_n_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_n_s32 + #define vmul_n_s32(a, b) simde_vmul_n_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmul_n_u16(simde_uint16x4_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_n_u16(a, b); + #else + return simde_vmul_u16(a, simde_vdup_n_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_n_u16 + #define vmul_n_u16(a, b) simde_vmul_n_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmul_n_u32(simde_uint32x2_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmul_n_u32(a, b); + #else + return simde_vmul_u32(a, simde_vdup_n_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmul_n_u32 + #define vmul_n_u32(a, b) simde_vmul_n_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmulq_n_f32(simde_float32x4_t a, simde_float32 b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_n_f32(a, b); + #else + return simde_vmulq_f32(a, simde_vdupq_n_f32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_f32 + #define vmulq_n_f32(a, b) simde_vmulq_n_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmulq_n_f64(simde_float64x2_t a, simde_float64 b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmulq_n_f64(a, b); + #else + return simde_vmulq_f64(a, simde_vdupq_n_f64(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_f64 + #define vmulq_n_f64(a, b) simde_vmulq_n_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmulq_n_s16(simde_int16x8_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_n_s16(a, b); + #else + return simde_vmulq_s16(a, simde_vdupq_n_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_s16 + #define vmulq_n_s16(a, b) simde_vmulq_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmulq_n_s32(simde_int32x4_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_n_s32(a, b); + #else + return simde_vmulq_s32(a, simde_vdupq_n_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_s32 + #define vmulq_n_s32(a, b) simde_vmulq_n_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmulq_n_u16(simde_uint16x8_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_n_u16(a, b); + #else + return simde_vmulq_u16(a, simde_vdupq_n_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_u16 + #define vmulq_n_u16(a, b) simde_vmulq_n_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmulq_n_u32(simde_uint32x4_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmulq_n_u32(a, b); + #else + return simde_vmulq_u32(a, simde_vdupq_n_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmulq_n_u32 + #define vmulq_n_u32(a, b) simde_vmulq_n_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MUL_N_H) */ +/* :: End simde/arm/neon/mul_n.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmla_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_n_f32(a, b, c); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_n_f32 + #define vmla_n_f32(a, b, c) simde_vmla_n_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmla_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_n_s16(a, b, c); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_n_s16 + #define vmla_n_s16(a, b, c) simde_vmla_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmla_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_n_s32(a, b, c); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_n_s32 + #define vmla_n_s32(a, b, c) simde_vmla_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmla_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_n_u16(a, b, c); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_n_u16 + #define vmla_n_u16(a, b, c) simde_vmla_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmla_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmla_n_u32(a, b, c); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmla_n_u32 + #define vmla_n_u32(a, b, c) simde_vmla_n_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmlaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_n_f32(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + return simde_vaddq_f32(simde_vmulq_n_f32(b, c), a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_n_f32 + #define vmlaq_n_f32(a, b, c) simde_vmlaq_n_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlaq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_n_s16(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + return simde_vaddq_s16(simde_vmulq_n_s16(b, c), a); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_n_s16 + #define vmlaq_n_s16(a, b, c) simde_vmlaq_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlaq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_n_s32(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + return simde_vaddq_s32(simde_vmulq_n_s32(b, c), a); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_n_s32 + #define vmlaq_n_s32(a, b, c) simde_vmlaq_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlaq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_n_u16(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + return simde_vaddq_u16(simde_vmulq_n_u16(b, c), a); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_n_u16 + #define vmlaq_n_u16(a, b, c) simde_vmlaq_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlaq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_n_u32(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + return simde_vaddq_u32(simde_vmulq_n_u32(b, c), a); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (b_.values * c) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c) + a_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlaq_n_u32 + #define vmlaq_n_u32(a, b, c) simde_vmlaq_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLA_N_H) */ +/* :: End simde/arm/neon/mla_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlal.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_H) +#define SIMDE_ARM_NEON_MLAL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_s8(a, b, c); + #else + return simde_vmlaq_s16(a, simde_vmovl_s8(b), simde_vmovl_s8(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_s8 + #define vmlal_s8(a, b, c) simde_vmlal_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_s16(a, b, c); + #else + return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vmovl_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_s16 + #define vmlal_s16(a, b, c) simde_vmlal_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), + c_ = simde_int64x2_to_private(simde_vmovl_s32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_s32 + #define vmlal_s32(a, b, c) simde_vmlal_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_u8(a, b, c); + #else + return simde_vmlaq_u16(a, simde_vmovl_u8(b), simde_vmovl_u8(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_u8 + #define vmlal_u8(a, b, c) simde_vmlal_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_u16(a, b, c); + #else + return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vmovl_u16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_u16 + #define vmlal_u16(a, b, c) simde_vmlal_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), + c_ = simde_uint64x2_to_private(simde_vmovl_u32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_u32 + #define vmlal_u32(a, b, c) simde_vmlal_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_H) */ +/* :: End simde/arm/neon/mlal.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlal_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) +#define SIMDE_ARM_NEON_MLAL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlal_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_s8(a, b, c); + #else + return simde_vmlaq_s16(a, simde_vmovl_high_s8(b), simde_vmovl_high_s8(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_s8 + #define vmlal_high_s8(a, b, c) simde_vmlal_high_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_s16(a, b, c); + #else + return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_s16 + #define vmlal_high_s16(a, b, c) simde_vmlal_high_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), + c_ = simde_int64x2_to_private(simde_vmovl_high_s32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_s32 + #define vmlal_high_s32(a, b, c) simde_vmlal_high_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlal_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_u8(a, b, c); + #else + return simde_vmlaq_u16(a, simde_vmovl_high_u8(b), simde_vmovl_high_u8(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_u8 + #define vmlal_high_u8(a, b, c) simde_vmlal_high_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_u16(a, b, c); + #else + return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vmovl_high_u16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_u16 + #define vmlal_high_u16(a, b, c) simde_vmlal_high_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), + c_ = simde_uint64x2_to_private(simde_vmovl_high_u32(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_u32 + #define vmlal_high_u32(a, b, c) simde_vmlal_high_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_H) */ +/* :: End simde/arm/neon/mlal_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlal_high_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) +#define SIMDE_ARM_NEON_MLAL_HIGH_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_n_s16(a, b, c); + #else + return simde_vmlaq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_n_s16 + #define vmlal_high_n_s16(a, b, c) simde_vmlal_high_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_n_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), + c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_n_s32 + #define vmlal_high_n_s32(a, b, c) simde_vmlal_high_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_n_u16(a, b, c); + #else + return simde_vmlaq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_n_u16 + #define vmlal_high_n_u16(a, b, c) simde_vmlal_high_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlal_high_n_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), + c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_high_n_u32 + #define vmlal_high_n_u32(a, b, c) simde_vmlal_high_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_HIGH_N_H) */ +/* :: End simde/arm/neon/mlal_high_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlal_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_LANE_H) +#define SIMDE_ARM_NEON_MLAL_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlal_lane_s16(a, b, v, lane) vmlal_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmlal_lane_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_lane_s16 + #define vmlal_lane_s16(a, b, c, lane) simde_vmlal_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlal_lane_s32(a, b, v, lane) vmlal_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmlal_lane_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_lane_s32 + #define vmlal_lane_s32(a, b, c, lane) simde_vmlal_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlal_lane_u16(a, b, v, lane) vmlal_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmlal_lane_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_lane_u16 + #define vmlal_lane_u16(a, b, c, lane) simde_vmlal_lane_u16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlal_lane_u32(a, b, v, lane) vmlal_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmlal_lane_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_lane_u32 + #define vmlal_lane_u32(a, b, c, lane) simde_vmlal_lane_u32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_laneq_s16(a, b, v, lane) vmlal_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmlal_laneq_s16(a, b, v, lane) simde_vmlal_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_laneq_s16 + #define vmlal_laneq_s16(a, b, c, lane) simde_vmlal_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_laneq_s32(a, b, v, lane) vmlal_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmlal_laneq_s32(a, b, v, lane) simde_vmlal_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_laneq_s32 + #define vmlal_laneq_s32(a, b, c, lane) simde_vmlal_laneq_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_laneq_u16(a, b, v, lane) vmlal_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmlal_laneq_u16(a, b, v, lane) simde_vmlal_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_laneq_u16 + #define vmlal_laneq_u16(a, b, c, lane) simde_vmlal_laneq_u16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlal_laneq_u32(a, b, v, lane) vmlal_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmlal_laneq_u32(a, b, v, lane) simde_vmlal_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlal_laneq_u32 + #define vmlal_laneq_u32(a, b, c, lane) simde_vmlal_laneq_u32((a), (b), (c), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_LANE_H) */ +/* :: End simde/arm/neon/mlal_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlal_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLAL_N_H) +#define SIMDE_ARM_NEON_MLAL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlal_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_s16(a, b, c); + #else + return simde_vmlaq_s32(a, simde_vmovl_s16(b), simde_vdupq_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_s16 + #define vmlal_n_s16(a, b, c) simde_vmlal_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlal_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_s32(b)), + c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_s32 + #define vmlal_n_s32(a, b, c) simde_vmlal_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlal_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_u16(a, b, c); + #else + return simde_vmlaq_u32(a, simde_vmovl_u16(b), simde_vdupq_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_u16 + #define vmlal_n_u16(a, b, c) simde_vmlal_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlal_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlal_n_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_u32(b)), + c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = (b_.values * c_.values) + a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] * c_.values[i]) + a_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlal_n_u32 + #define vmlal_n_u32(a, b, c) simde_vmlal_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLAL_N_H) */ +/* :: End simde/arm/neon/mlal_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mls.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLS_H) +#define SIMDE_ARM_NEON_MLS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmls_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_f32(a, b, c); + #else + return simde_vsub_f32(a, simde_vmul_f32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_f32 + #define vmls_f32(a, b, c) simde_vmls_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vmls_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmls_f64(a, b, c); + #else + return simde_vsub_f64(a, simde_vmul_f64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_f64 + #define vmls_f64(a, b, c) simde_vmls_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vmls_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_s8(a, b, c); + #else + return simde_vsub_s8(a, simde_vmul_s8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_s8 + #define vmls_s8(a, b, c) simde_vmls_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmls_s16(simde_int16x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_s16(a, b, c); + #else + return simde_vsub_s16(a, simde_vmul_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_s16 + #define vmls_s16(a, b, c) simde_vmls_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmls_s32(simde_int32x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_s32(a, b, c); + #else + return simde_vsub_s32(a, simde_vmul_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_s32 + #define vmls_s32(a, b, c) simde_vmls_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vmls_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_u8(a, b, c); + #else + return simde_vsub_u8(a, simde_vmul_u8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_u8 + #define vmls_u8(a, b, c) simde_vmls_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmls_u16(simde_uint16x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_u16(a, b, c); + #else + return simde_vsub_u16(a, simde_vmul_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_u16 + #define vmls_u16(a, b, c) simde_vmls_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmls_u32(simde_uint32x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_u32(a, b, c); + #else + return simde_vsub_u32(a, simde_vmul_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_u32 + #define vmls_u32(a, b, c) simde_vmls_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmlsq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_f32(a, b, c); + #elif defined(SIMDE_X86_FMA_NATIVE) + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b), + c_ = simde_float32x4_to_private(c); + r_.m128 = _mm_fnmadd_ps(b_.m128, c_.m128, a_.m128); + return simde_float32x4_from_private(r_); + #else + return simde_vsubq_f32(a, simde_vmulq_f32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_f32 + #define vmlsq_f32(a, b, c) simde_vmlsq_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vmlsq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsq_f64(a, b, c); + #elif defined(SIMDE_X86_FMA_NATIVE) + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b), + c_ = simde_float64x2_to_private(c); + r_.m128d = _mm_fnmadd_pd(b_.m128d, c_.m128d, a_.m128d); + return simde_float64x2_from_private(r_); + #else + return simde_vsubq_f64(a, simde_vmulq_f64(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_f64 + #define vmlsq_f64(a, b, c) simde_vmlsq_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmlsq_s8(simde_int8x16_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_s8(a, b, c); + #else + return simde_vsubq_s8(a, simde_vmulq_s8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_s8 + #define vmlsq_s8(a, b, c) simde_vmlsq_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlsq_s16(simde_int16x8_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_s16(a, b, c); + #else + return simde_vsubq_s16(a, simde_vmulq_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_s16 + #define vmlsq_s16(a, b, c) simde_vmlsq_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsq_s32(simde_int32x4_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_s32(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmulq_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_s32 + #define vmlsq_s32(a, b, c) simde_vmlsq_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmlsq_u8(simde_uint8x16_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_u8(a, b, c); + #else + return simde_vsubq_u8(a, simde_vmulq_u8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_u8 + #define vmlsq_u8(a, b, c) simde_vmlsq_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlsq_u16(simde_uint16x8_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_u16(a, b, c); + #else + return simde_vsubq_u16(a, simde_vmulq_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_u16 + #define vmlsq_u16(a, b, c) simde_vmlsq_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsq_u32(simde_uint32x4_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_u32(a, b, c); + #else + return simde_vsubq_u32(a, simde_vmulq_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_u32 + #define vmlsq_u32(a, b, c) simde_vmlsq_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLS_H) */ +/* :: End simde/arm/neon/mls.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mls_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLS_N_H) +#define SIMDE_ARM_NEON_MLS_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vmls_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32 c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_n_f32(a, b, c); + #else + return simde_vmls_f32(a, b, simde_vdup_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_n_f32 + #define vmls_n_f32(a, b, c) simde_vmls_n_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vmls_n_s16(simde_int16x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_n_s16(a, b, c); + #else + return simde_vmls_s16(a, b, simde_vdup_n_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_n_s16 + #define vmls_n_s16(a, b, c) simde_vmls_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vmls_n_s32(simde_int32x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_n_s32(a, b, c); + #else + return simde_vmls_s32(a, b, simde_vdup_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_n_s32 + #define vmls_n_s32(a, b, c) simde_vmls_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vmls_n_u16(simde_uint16x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_n_u16(a, b, c); + #else + return simde_vmls_u16(a, b, simde_vdup_n_u16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_n_u16 + #define vmls_n_u16(a, b, c) simde_vmls_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vmls_n_u32(simde_uint32x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmls_n_u32(a, b, c); + #else + return simde_vmls_u32(a, b, simde_vdup_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmls_n_u32 + #define vmls_n_u32(a, b, c) simde_vmls_n_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vmlsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32 c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_n_f32(a, b, c); + #else + return simde_vmlsq_f32(a, b, simde_vdupq_n_f32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_n_f32 + #define vmlsq_n_f32(a, b, c) simde_vmlsq_n_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlsq_n_s16(simde_int16x8_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_n_s16(a, b, c); + #else + return simde_vmlsq_s16(a, b, simde_vdupq_n_s16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_n_s16 + #define vmlsq_n_s16(a, b, c) simde_vmlsq_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsq_n_s32(simde_int32x4_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_n_s32(a, b, c); + #else + return simde_vmlsq_s32(a, b, simde_vdupq_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_n_s32 + #define vmlsq_n_s32(a, b, c) simde_vmlsq_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlsq_n_u16(simde_uint16x8_t a, simde_uint16x8_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_n_u16(a, b, c); + #else + return simde_vmlsq_u16(a, b, simde_vdupq_n_u16(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_n_u16 + #define vmlsq_n_u16(a, b, c) simde_vmlsq_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsq_n_u32(simde_uint32x4_t a, simde_uint32x4_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsq_n_u32(a, b, c); + #else + return simde_vmlsq_u32(a, b, simde_vdupq_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsq_n_u32 + #define vmlsq_n_u32(a, b, c) simde_vmlsq_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLS_N_H) */ +/* :: End simde/arm/neon/mls_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlsl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_H) +#define SIMDE_ARM_NEON_MLSL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlsl_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_s8(a, b, c); + #else + return simde_vsubq_s16(a, simde_vmull_s8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_s8 + #define vmlsl_s8(a, b, c) simde_vmlsl_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmull_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_s16 + #define vmlsl_s16(a, b, c) simde_vmlsl_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_s32(a, b, c); + #else + return simde_vsubq_s64(a, simde_vmull_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_s32 + #define vmlsl_s32(a, b, c) simde_vmlsl_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlsl_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_u8(a, b, c); + #else + return simde_vsubq_u16(a, simde_vmull_u8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_u8 + #define vmlsl_u8(a, b, c) simde_vmlsl_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_u16(a, b, c); + #else + return simde_vsubq_u32(a, simde_vmull_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_u16 + #define vmlsl_u16(a, b, c) simde_vmlsl_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_u32(a, b, c); + #else + return simde_vsubq_u64(a, simde_vmull_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_u32 + #define vmlsl_u32(a, b, c) simde_vmlsl_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_H) */ +/* :: End simde/arm/neon/mlsl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlsl_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) +#define SIMDE_ARM_NEON_MLSL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mull_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MULL_HIGH_H) +#define SIMDE_ARM_NEON_MULL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmull_high_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_s8(a, b); + #else + return simde_vmulq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_s8 + #define vmull_high_s8(a, b) simde_vmull_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmull_high_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_s16(a, b); + #else + return simde_vmulq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_s16 + #define vmull_high_s16(a, b) simde_vmull_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmull_high_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_s32(a, b); + #else + return simde_x_vmulq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_s32 + #define vmull_high_s32(a, b) simde_vmull_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmull_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_u8(a, b); + #else + return simde_vmulq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_u8 + #define vmull_high_u8(a, b) simde_vmull_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmull_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_u16(a, b); + #else + return simde_vmulq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_u16 + #define vmull_high_u16(a, b) simde_vmull_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmull_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmull_high_u32(a, b); + #else + return simde_x_vmulq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_high_u32 + #define vmull_high_u32(a, b) simde_vmull_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MULL_HIGH_H) */ +/* :: End simde/arm/neon/mull_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmlsl_high_s8(simde_int16x8_t a, simde_int8x16_t b, simde_int8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_s8(a, b, c); + #else + return simde_vsubq_s16(a, simde_vmull_high_s8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_s8 + #define vmlsl_high_s8(a, b, c) simde_vmlsl_high_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmull_high_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_s16 + #define vmlsl_high_s16(a, b, c) simde_vmlsl_high_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_s32(a, b, c); + #else + return simde_vsubq_s64(a, simde_vmull_high_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_s32 + #define vmlsl_high_s32(a, b, c) simde_vmlsl_high_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmlsl_high_u8(simde_uint16x8_t a, simde_uint8x16_t b, simde_uint8x16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_u8(a, b, c); + #else + return simde_vsubq_u16(a, simde_vmull_high_u8(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_u8 + #define vmlsl_high_u8(a, b, c) simde_vmlsl_high_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_high_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_u16(a, b, c); + #else + return simde_vsubq_u32(a, simde_vmull_high_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_u16 + #define vmlsl_high_u16(a, b, c) simde_vmlsl_high_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_high_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_u32(a, b, c); + #else + return simde_vsubq_u64(a, simde_vmull_high_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_u32 + #define vmlsl_high_u32(a, b, c) simde_vmlsl_high_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_H) */ +/* :: End simde/arm/neon/mlsl_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlsl_high_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) +#define SIMDE_ARM_NEON_MLSL_HIGH_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_n_s16(a, b, c); + #else + return simde_vmlsq_s32(a, simde_vmovl_high_s16(b), simde_vdupq_n_s32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_n_s16 + #define vmlsl_high_n_s16(a, b, c) simde_vmlsl_high_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_n_s32(a, b, c); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(simde_vmovl_high_s32(b)), + c_ = simde_int64x2_to_private(simde_vdupq_n_s64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - (b_.values * c_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_n_s32 + #define vmlsl_high_n_s32(a, b, c) simde_vmlsl_high_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_high_n_u16(simde_uint32x4_t a, simde_uint16x8_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_n_u16(a, b, c); + #else + return simde_vmlsq_u32(a, simde_vmovl_high_u16(b), simde_vdupq_n_u32(c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_n_u16 + #define vmlsl_high_n_u16(a, b, c) simde_vmlsl_high_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_high_n_u32(simde_uint64x2_t a, simde_uint32x4_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmlsl_high_n_u32(a, b, c); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(simde_vmovl_high_u32(b)), + c_ = simde_uint64x2_to_private(simde_vdupq_n_u64(c)); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - (b_.values * c_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - (b_.values[i] * c_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_high_n_u32 + #define vmlsl_high_n_u32(a, b, c) simde_vmlsl_high_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_HIGH_N_H) */ +/* :: End simde/arm/neon/mlsl_high_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlsl_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_LANE_H) +#define SIMDE_ARM_NEON_MLSL_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsl_lane_s16(a, b, v, lane) vmlsl_lane_s16((a), (b), (v), (lane)) +#else + #define simde_vmlsl_lane_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_lane_s16 + #define vmlsl_lane_s16(a, b, c, lane) simde_vmlsl_lane_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsl_lane_s32(a, b, v, lane) vmlsl_lane_s32((a), (b), (v), (lane)) +#else + #define simde_vmlsl_lane_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_lane_s32 + #define vmlsl_lane_s32(a, b, c, lane) simde_vmlsl_lane_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsl_lane_u16(a, b, v, lane) vmlsl_lane_u16((a), (b), (v), (lane)) +#else + #define simde_vmlsl_lane_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_lane_u16 + #define vmlsl_lane_u16(a, b, c, lane) simde_vmlsl_lane_u16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmlsl_lane_u32(a, b, v, lane) vmlsl_lane_u32((a), (b), (v), (lane)) +#else + #define simde_vmlsl_lane_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_lane_u32 + #define vmlsl_lane_u32(a, b, c, lane) simde_vmlsl_lane_u32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_laneq_s16(a, b, v, lane) vmlsl_laneq_s16((a), (b), (v), (lane)) +#else + #define simde_vmlsl_laneq_s16(a, b, v, lane) simde_vmlsl_s16((a), (b), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_laneq_s16 + #define vmlsl_laneq_s16(a, b, c, lane) simde_vmlsl_laneq_s16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_laneq_s32(a, b, v, lane) vmlsl_laneq_s32((a), (b), (v), (lane)) +#else + #define simde_vmlsl_laneq_s32(a, b, v, lane) simde_vmlsl_s32((a), (b), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_laneq_s32 + #define vmlsl_laneq_s32(a, b, c, lane) simde_vmlsl_laneq_s32((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_laneq_u16(a, b, v, lane) vmlsl_laneq_u16((a), (b), (v), (lane)) +#else + #define simde_vmlsl_laneq_u16(a, b, v, lane) simde_vmlsl_u16((a), (b), simde_vdup_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_laneq_u16 + #define vmlsl_laneq_u16(a, b, c, lane) simde_vmlsl_laneq_u16((a), (b), (c), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmlsl_laneq_u32(a, b, v, lane) vmlsl_laneq_u32((a), (b), (v), (lane)) +#else + #define simde_vmlsl_laneq_u32(a, b, v, lane) simde_vmlsl_u32((a), (b), simde_vdup_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmlsl_laneq_u32 + #define vmlsl_laneq_u32(a, b, c, lane) simde_vmlsl_laneq_u32((a), (b), (c), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_LANE_H) */ +/* :: End simde/arm/neon/mlsl_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mlsl_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MLSL_N_H) +#define SIMDE_ARM_NEON_MLSL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mull_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_MULL_N_H) +#define SIMDE_ARM_NEON_MULL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmull_n_s16(simde_int16x4_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_n_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_n_s32(simde_vmovl_s16(a), b); + #else + simde_int32x4_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av; + SIMDE_CONVERT_VECTOR_(av, a_.values); + r_.values = av * b; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) * HEDLEY_STATIC_CAST(int32_t, b); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_n_s16 + #define vmull_n_s16(a, b) simde_vmull_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmull_n_s32(simde_int32x2_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_n_s32(a, b); + #else + simde_int64x2_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av; + SIMDE_CONVERT_VECTOR_(av, a_.values); + r_.values = av * b; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) * HEDLEY_STATIC_CAST(int64_t, b); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_n_s32 + #define vmull_n_s32(a, b) simde_vmull_n_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmull_n_u16(simde_uint16x4_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_n_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vmulq_n_u32(simde_vmovl_u16(a), b); + #else + simde_uint32x4_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100761) + __typeof__(r_.values) av; + SIMDE_CONVERT_VECTOR_(av, a_.values); + r_.values = av * b; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint32_t, b); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_n_u16 + #define vmull_n_u16(a, b) simde_vmull_n_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmull_n_u32(simde_uint32x2_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmull_n_u32(a, b); + #else + simde_uint64x2_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) av; + SIMDE_CONVERT_VECTOR_(av, a_.values); + r_.values = av * b; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) * HEDLEY_STATIC_CAST(uint64_t, b); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_n_u32 + #define vmull_n_u32(a, b) simde_vmull_n_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MULL_H) */ +/* :: End simde/arm/neon/mull_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmlsl_n_s16(simde_int32x4_t a, simde_int16x4_t b, int16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_s16(a, b, c); + #else + return simde_vsubq_s32(a, simde_vmull_n_s16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_s16 + #define vmlsl_n_s16(a, b, c) simde_vmlsl_n_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vmlsl_n_s32(simde_int64x2_t a, simde_int32x2_t b, int32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_s32(a, b, c); + #else + return simde_vsubq_s64(a, simde_vmull_n_s32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_s32 + #define vmlsl_n_s32(a, b, c) simde_vmlsl_n_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmlsl_n_u16(simde_uint32x4_t a, simde_uint16x4_t b, uint16_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_u16(a, b, c); + #else + return simde_vsubq_u32(a, simde_vmull_n_u16(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_u16 + #define vmlsl_n_u16(a, b, c) simde_vmlsl_n_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vmlsl_n_u32(simde_uint64x2_t a, simde_uint32x2_t b, uint32_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlsl_n_u32(a, b, c); + #else + return simde_vsubq_u64(a, simde_vmull_n_u32(b, c)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmlsl_n_u32 + #define vmlsl_n_u32(a, b, c) simde_vmlsl_n_u32((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MLSL_N_H) */ +/* :: End simde/arm/neon/mlsl_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/movn_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) +#define SIMDE_ARM_NEON_MOVN_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s16(r, a); + #else + return simde_vcombine_s8(r, simde_vmovn_s16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s16 + #define vmovn_high_s16(r, a) simde_vmovn_high_s16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s32(r, a); + #else + return simde_vcombine_s16(r, simde_vmovn_s32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s32 + #define vmovn_high_s32(r, a) simde_vmovn_high_s32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_s64(r, a); + #else + return simde_vcombine_s32(r, simde_vmovn_s64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_s64 + #define vmovn_high_s64(r, a) simde_vmovn_high_s64((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u16(r, a); + #else + return simde_vcombine_u8(r, simde_vmovn_u16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u16 + #define vmovn_high_u16(r, a) simde_vmovn_high_u16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u32(r, a); + #else + return simde_vcombine_u16(r, simde_vmovn_u32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u32 + #define vmovn_high_u32(r, a) simde_vmovn_high_u32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vmovn_high_u64(r, a); + #else + return simde_vcombine_u32(r, simde_vmovn_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmovn_high_u64 + #define vmovn_high_u64(r, a) simde_vmovn_high_u64((r), (a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MOVN_HIGH_H) */ +/* :: End simde/arm/neon/movn_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/mull_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_MULL_LANE_H) +#define SIMDE_ARM_NEON_MULL_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmull_lane_s16(a, v, lane) vmull_lane_s16((a), (v), (lane)) +#else + #define simde_vmull_lane_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_lane_s16 + #define vmull_lane_s16(a, v, lane) simde_vmull_lane_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmull_lane_s32(a, v, lane) vmull_lane_s32((a), (v), (lane)) +#else + #define simde_vmull_lane_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_lane_s32 + #define vmull_lane_s32(a, v, lane) simde_vmull_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmull_lane_u16(a, v, lane) vmull_lane_u16((a), (v), (lane)) +#else + #define simde_vmull_lane_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_lane_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_lane_u16 + #define vmull_lane_u16(a, v, lane) simde_vmull_lane_u16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vmull_lane_u32(a, v, lane) vmull_lane_u32((a), (v), (lane)) +#else + #define simde_vmull_lane_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_lane_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vmull_lane_u32 + #define vmull_lane_u32(a, v, lane) simde_vmull_lane_u32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmull_laneq_s16(a, v, lane) vmull_laneq_s16((a), (v), (lane)) +#else + #define simde_vmull_laneq_s16(a, v, lane) simde_vmull_s16((a), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_laneq_s16 + #define vmull_laneq_s16(a, v, lane) simde_vmull_laneq_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmull_laneq_s32(a, v, lane) vmull_laneq_s32((a), (v), (lane)) +#else + #define simde_vmull_laneq_s32(a, v, lane) simde_vmull_s32((a), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_laneq_s32 + #define vmull_laneq_s32(a, v, lane) simde_vmull_laneq_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmull_laneq_u16(a, v, lane) vmull_laneq_u16((a), (v), (lane)) +#else + #define simde_vmull_laneq_u16(a, v, lane) simde_vmull_u16((a), simde_vdup_laneq_u16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_laneq_u16 + #define vmull_laneq_u16(a, v, lane) simde_vmull_laneq_u16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vmull_laneq_u32(a, v, lane) vmull_laneq_u32((a), (v), (lane)) +#else + #define simde_vmull_laneq_u32(a, v, lane) simde_vmull_u32((a), simde_vdup_laneq_u32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vmull_laneq_u32 + #define vmull_laneq_u32(a, v, lane) simde_vmull_laneq_u32((a), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_MULL_LANE_H) */ +/* :: End simde/arm/neon/mull_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/neg.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_NEG_H) +#define SIMDE_ARM_NEON_NEG_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vnegd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(9,0,0)) + return vnegd_s64(a); + #else + return -a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vnegd_s64 + #define vnegd_s64(a) simde_vnegd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vneg_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vneg_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vneg_f32 + #define vneg_f32(a) simde_vneg_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vneg_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vneg_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vneg_f64 + #define vneg_f64(a) simde_vneg_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vneg_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vneg_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vneg_s8 + #define vneg_s8(a) simde_vneg_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vneg_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vneg_s16(a); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vneg_s16 + #define vneg_s16(a) simde_vneg_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vneg_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vneg_s32(a); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_GCC_100762) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vneg_s32 + #define vneg_s32(a) simde_vneg_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vneg_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vneg_s64(a); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegd_s64(a_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vneg_s64 + #define vneg_s64(a) simde_vneg_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vnegq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vnegq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128 = _mm_castsi128_ps(_mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), _mm_castps_si128(a_.m128))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vnegq_f32 + #define vnegq_f32(a) simde_vnegq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vnegq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vnegq_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_castsi128_pd(_mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), _mm_castpd_si128(a_.m128d))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vnegq_f64 + #define vnegq_f64(a) simde_vnegq_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vnegq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vnegq_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vnegq_s8 + #define vnegq_s8(a) simde_vnegq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vnegq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vnegq_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vnegq_s16 + #define vnegq_s16(a) simde_vnegq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vnegq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vnegq_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = -(a_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vnegq_s32 + #define vnegq_s32(a) simde_vnegq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vnegq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vnegq_s64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return vec_neg(a); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_neg(a_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = -a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegd_s64(a_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vnegq_s64 + #define vnegq_s64(a) simde_vnegq_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_NEG_H) */ +/* :: End simde/arm/neon/neg.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/orn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ORN_H) +#define SIMDE_ARM_NEON_ORN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/orr.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_ORR_H) +#define SIMDE_ARM_NEON_ORR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vorr_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_s8 + #define vorr_s8(a, b) simde_vorr_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vorr_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_s16 + #define vorr_s16(a, b) simde_vorr_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vorr_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_s32 + #define vorr_s32(a, b) simde_vorr_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vorr_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_s64 + #define vorr_s64(a, b) simde_vorr_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vorr_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_u8 + #define vorr_u8(a, b) simde_vorr_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vorr_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_u16 + #define vorr_u16(a, b) simde_vorr_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vorr_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_u32 + #define vorr_u32(a, b) simde_vorr_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vorr_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorr_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_or_si64(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorr_u64 + #define vorr_u64(a, b) simde_vorr_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vorrq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_s8 + #define vorrq_s8(a, b) simde_vorrq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vorrq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_s16 + #define vorrq_s16(a, b) simde_vorrq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vorrq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_s32 + #define vorrq_s32(a, b) simde_vorrq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vorrq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_or(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_s64 + #define vorrq_s64(a, b) simde_vorrq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vorrq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_u8 + #define vorrq_u8(a, b) simde_vorrq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vorrq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_u16 + #define vorrq_u16(a, b) simde_vorrq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vorrq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_or(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_u32 + #define vorrq_u32(a, b) simde_vorrq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vorrq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorrq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_or(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_or_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorrq_u64 + #define vorrq_u64(a, b) simde_vorrq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ORR_H) */ +/* :: End simde/arm/neon/orr.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vorn_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_s8(a, b); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_s8 + #define vorn_s8(a, b) simde_vorn_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vorn_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_s16(a, b); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_s16 + #define vorn_s16(a, b) simde_vorn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vorn_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_s32(a, b); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_s32 + #define vorn_s32(a, b) simde_vorn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vorn_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_s64(a, b); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_s64 + #define vorn_s64(a, b) simde_vorn_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vorn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_u8(a, b); + #else + simde_uint8x8_private + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_u8 + #define vorn_u8(a, b) simde_vorn_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vorn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_u16(a, b); + #else + simde_uint16x4_private + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_u16 + #define vorn_u16(a, b) simde_vorn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vorn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_u32(a, b); + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_u32 + #define vorn_u32(a, b) simde_vorn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vorn_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vorn_u64(a, b); + #else + simde_uint64x1_private + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vorn_u64 + #define vorn_u64(a, b) simde_vorn_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vornq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_s8 + #define vornq_s8(a, b) simde_vornq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vornq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_s16 + #define vornq_s16(a, b) simde_vornq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vornq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_s32 + #define vornq_s32(a, b) simde_vornq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vornq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_s64 + #define vornq_s64(a, b) simde_vornq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vornq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_uint8x16_private + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_u8 + #define vornq_u8(a, b) simde_vornq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vornq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_uint16x8_private + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_u16 + #define vornq_u16(a, b) simde_vornq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vornq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_u32 + #define vornq_u32(a, b) simde_vornq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vornq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vornq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_orc(a, b); + #else + simde_uint64x2_private + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, a_.m128i, 0xf3); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values | ~(b_.values); + #else + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] | ~b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vornq_u64 + #define vornq_u64(a, b) simde_vornq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ORN_H) */ +/* :: End simde/arm/neon/orn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/padal.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_PADAL_H) +#define SIMDE_ARM_NEON_PADAL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vpadal_s8(simde_int16x4_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_s8(a, b); + #else + return simde_vadd_s16(a, simde_vpaddl_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_s8 + #define vpadal_s8(a, b) simde_vpadal_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vpadal_s16(simde_int32x2_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_s16(a, b); + #else + return simde_vadd_s32(a, simde_vpaddl_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_s16 + #define vpadal_s16(a, b) simde_vpadal_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vpadal_s32(simde_int64x1_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_s32(a, b); + #else + return simde_vadd_s64(a, simde_vpaddl_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_s32 + #define vpadal_s32(a, b) simde_vpadal_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vpadal_u8(simde_uint16x4_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_u8(a, b); + #else + return simde_vadd_u16(a, simde_vpaddl_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_u8 + #define vpadal_u8(a, b) simde_vpadal_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vpadal_u16(simde_uint32x2_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_u16(a, b); + #else + return simde_vadd_u32(a, simde_vpaddl_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_u16 + #define vpadal_u16(a, b) simde_vpadal_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vpadal_u32(simde_uint64x1_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadal_u32(a, b); + #else + return simde_vadd_u64(a, simde_vpaddl_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadal_u32 + #define vpadal_u32(a, b) simde_vpadal_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vpadalq_s8(simde_int16x8_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_s8(a, b); + #else + return simde_vaddq_s16(a, simde_vpaddlq_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_s8 + #define vpadalq_s8(a, b) simde_vpadalq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vpadalq_s16(simde_int32x4_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_s16(a, b); + #else + return simde_vaddq_s32(a, simde_vpaddlq_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_s16 + #define vpadalq_s16(a, b) simde_vpadalq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vpadalq_s32(simde_int64x2_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_s32(a, b); + #else + return simde_vaddq_s64(a, simde_vpaddlq_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_s32 + #define vpadalq_s32(a, b) simde_vpadalq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vpadalq_u8(simde_uint16x8_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_u8(a, b); + #else + return simde_vaddq_u16(a, simde_vpaddlq_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_u8 + #define vpadalq_u8(a, b) simde_vpadalq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vpadalq_u16(simde_uint32x4_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_u16(a, b); + #else + return simde_vaddq_u32(a, simde_vpaddlq_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_u16 + #define vpadalq_u16(a, b) simde_vpadalq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vpadalq_u32(simde_uint64x2_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpadalq_u32(a, b); + #else + return simde_vaddq_u64(a, simde_vpaddlq_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpadalq_u32 + #define vpadalq_u32(a, b) simde_vpadalq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_PADAL_H */ +/* :: End simde/arm/neon/padal.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/pmax.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_PMAX_H) +#define SIMDE_ARM_NEON_PMAX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vpmaxs_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxs_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpmaxs_f32 + #define vpmaxs_f32(a) simde_vpmaxs_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vpmaxqd_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxqd_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + return (a_.values[0] > a_.values[1]) ? a_.values[0] : a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpmaxqd_f64 + #define vpmaxqd_f64(a) simde_vpmaxqd_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vpmax_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_f32(a, b); + #else + return simde_vmax_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_f32 + #define vpmax_f32(a, b) simde_vpmax_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vpmax_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_s8(a, b); + #else + return simde_vmax_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_s8 + #define vpmax_s8(a, b) simde_vpmax_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vpmax_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_s16(a, b); + #else + return simde_vmax_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_s16 + #define vpmax_s16(a, b) simde_vpmax_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vpmax_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_s32(a, b); + #else + return simde_vmax_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_s32 + #define vpmax_s32(a, b) simde_vpmax_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vpmax_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_u8(a, b); + #else + return simde_vmax_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_u8 + #define vpmax_u8(a, b) simde_vpmax_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vpmax_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_u16(a, b); + #else + return simde_vmax_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_u16 + #define vpmax_u16(a, b) simde_vpmax_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vpmax_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmax_u32(a, b); + #else + return simde_vmax_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmax_u32 + #define vpmax_u32(a, b) simde_vpmax_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vpmaxq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_f32(a, b); + #else + return simde_vmaxq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_f32 + #define vpmaxq_f32(a, b) simde_vpmaxq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vpmaxq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_f64(a, b); + #else + return simde_vmaxq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_f64 + #define vpmaxq_f64(a, b) simde_vpmaxq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vpmaxq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_s8(a, b); + #else + return simde_vmaxq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_s8 + #define vpmaxq_s8(a, b) simde_vpmaxq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vpmaxq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_s16(a, b); + #else + return simde_vmaxq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_s16 + #define vpmaxq_s16(a, b) simde_vpmaxq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vpmaxq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_s32(a, b); + #else + return simde_vmaxq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_s32 + #define vpmaxq_s32(a, b) simde_vpmaxq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vpmaxq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_u8(a, b); + #else + return simde_vmaxq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_u8 + #define vpmaxq_u8(a, b) simde_vpmaxq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vpmaxq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_u16(a, b); + #else + return simde_vmaxq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_u16 + #define vpmaxq_u16(a, b) simde_vpmaxq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vpmaxq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmaxq_u32(a, b); + #else + return simde_vmaxq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmaxq_u32 + #define vpmaxq_u32(a, b) simde_vpmaxq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_PMAX_H) */ +/* :: End simde/arm/neon/pmax.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/pmin.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_PMIN_H) +#define SIMDE_ARM_NEON_PMIN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vpmins_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpmins_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpmins_f32 + #define vpmins_f32(a) simde_vpmins_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vpminqd_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminqd_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + return (a_.values[0] < a_.values[1]) ? a_.values[0] : a_.values[1]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpminqd_f64 + #define vpminqd_f64(a) simde_vpminqd_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vpmin_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_f32(a, b); + #else + return simde_vmin_f32(simde_vuzp1_f32(a, b), simde_vuzp2_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_f32 + #define vpmin_f32(a, b) simde_vpmin_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vpmin_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_s8(a, b); + #else + return simde_vmin_s8(simde_vuzp1_s8(a, b), simde_vuzp2_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_s8 + #define vpmin_s8(a, b) simde_vpmin_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vpmin_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_s16(a, b); + #else + return simde_vmin_s16(simde_vuzp1_s16(a, b), simde_vuzp2_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_s16 + #define vpmin_s16(a, b) simde_vpmin_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vpmin_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_s32(a, b); + #else + return simde_vmin_s32(simde_vuzp1_s32(a, b), simde_vuzp2_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_s32 + #define vpmin_s32(a, b) simde_vpmin_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vpmin_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_u8(a, b); + #else + return simde_vmin_u8(simde_vuzp1_u8(a, b), simde_vuzp2_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_u8 + #define vpmin_u8(a, b) simde_vpmin_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vpmin_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_u16(a, b); + #else + return simde_vmin_u16(simde_vuzp1_u16(a, b), simde_vuzp2_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_u16 + #define vpmin_u16(a, b) simde_vpmin_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vpmin_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vpmin_u32(a, b); + #else + return simde_vmin_u32(simde_vuzp1_u32(a, b), simde_vuzp2_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpmin_u32 + #define vpmin_u32(a, b) simde_vpmin_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vpminq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_f32(a, b); + #elif defined(SIMDE_X86_SSE3_NATIVE) + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_X86_SSE3_NATIVE) + __m128 e = _mm_shuffle_ps(a_.m128, b_.m128, _MM_SHUFFLE(2, 0, 2, 0)); + __m128 o = _mm_shuffle_ps(a_.m128, b_.m128, _MM_SHUFFLE(3, 1, 3, 1)); + r_.m128 = _mm_min_ps(e, o); + #endif + + return simde_float32x4_from_private(r_); + #else + return simde_vminq_f32(simde_vuzp1q_f32(a, b), simde_vuzp2q_f32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_f32 + #define vpminq_f32(a, b) simde_vpminq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vpminq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_f64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128d e = _mm_unpacklo_pd(a_.m128d, b_.m128d); + __m128d o = _mm_unpackhi_pd(a_.m128d, b_.m128d); + r_.m128d = _mm_min_pd(e, o); + #endif + + return simde_float64x2_from_private(r_); + #else + return simde_vminq_f64(simde_vuzp1q_f64(a, b), simde_vuzp2q_f64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vpminq_f64 + #define vpminq_f64(a, b) simde_vpminq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vpminq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_s8(a, b); + #else + return simde_vminq_s8(simde_vuzp1q_s8(a, b), simde_vuzp2q_s8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_s8 + #define vpminq_s8(a, b) simde_vpminq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vpminq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_s16(a, b); + #else + return simde_vminq_s16(simde_vuzp1q_s16(a, b), simde_vuzp2q_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_s16 + #define vpminq_s16(a, b) simde_vpminq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vpminq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_s32(a, b); + #else + return simde_vminq_s32(simde_vuzp1q_s32(a, b), simde_vuzp2q_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_s32 + #define vpminq_s32(a, b) simde_vpminq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vpminq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_u8(a, b); + #else + return simde_vminq_u8(simde_vuzp1q_u8(a, b), simde_vuzp2q_u8(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_u8 + #define vpminq_u8(a, b) simde_vpminq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vpminq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_u16(a, b); + #else + return simde_vminq_u16(simde_vuzp1q_u16(a, b), simde_vuzp2q_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_u16 + #define vpminq_u16(a, b) simde_vpminq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vpminq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vpminq_u32(a, b); + #else + return simde_vminq_u32(simde_vuzp1q_u32(a, b), simde_vuzp2q_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vpminq_u32 + #define vpminq_u32(a, b) simde_vpminq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_PMIN_H) */ +/* :: End simde/arm/neon/pmin.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qabs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_QABS_H) +#define SIMDE_ARM_NEON_QABS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqabsb_s8(int8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabsb_s8(a); + #else + return a == INT8_MIN ? INT8_MAX : (a < 0 ? -a : a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabsb_s8 + #define vqabsb_s8(a) simde_vqabsb_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqabsh_s16(int16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabsh_s16(a); + #else + return a == INT16_MIN ? INT16_MAX : (a < 0 ? -a : a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabsh_s16 + #define vqabsh_s16(a) simde_vqabsh_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqabss_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabss_s32(a); + #else + return a == INT32_MIN ? INT32_MAX : (a < 0 ? -a : a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabss_s32 + #define vqabss_s32(a) simde_vqabss_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqabsd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabsd_s64(a); + #else + return a == INT64_MIN ? INT64_MAX : (a < 0 ? -a : a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabsd_s64 + #define vqabsd_s64(a) simde_vqabsd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqabs_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabs_s8(a); + #else + simde_int8x8_t tmp = simde_vabs_s8(a); + return simde_vadd_s8(tmp, simde_vshr_n_s8(tmp, 7)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabs_s8 + #define vqabs_s8(a) simde_vqabs_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqabs_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabs_s16(a); + #else + simde_int16x4_t tmp = simde_vabs_s16(a); + return simde_vadd_s16(tmp, simde_vshr_n_s16(tmp, 15)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabs_s16 + #define vqabs_s16(a) simde_vqabs_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqabs_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabs_s32(a); + #else + simde_int32x2_t tmp = simde_vabs_s32(a); + return simde_vadd_s32(tmp, simde_vshr_n_s32(tmp, 31)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabs_s32 + #define vqabs_s32(a) simde_vqabs_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqabs_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabs_s64(a); + #else + simde_int64x1_t tmp = simde_vabs_s64(a); + return simde_vadd_s64(tmp, simde_vshr_n_s64(tmp, 63)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabs_s64 + #define vqabs_s64(a) simde_vqabs_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqabsq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabsq_s8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(simde_vabsq_s8(a)); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epu8(a_.m128i, _mm_set1_epi8(INT8_MAX)); + #else + r_.m128i = + _mm_add_epi8( + a_.m128i, + _mm_cmpgt_epi8(_mm_setzero_si128(), a_.m128i) + ); + #endif + + return simde_int8x16_from_private(r_); + #else + simde_int8x16_t tmp = simde_vabsq_s8(a); + return + simde_vbslq_s8( + simde_vreinterpretq_u8_s8(simde_vshrq_n_s8(tmp, 7)), + simde_vmvnq_s8(tmp), + tmp + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabsq_s8 + #define vqabsq_s8(a) simde_vqabsq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqabsq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabsq_s16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(simde_vabsq_s16(a)); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epu16(a_.m128i, _mm_set1_epi16(INT16_MAX)); + #else + r_.m128i = + _mm_add_epi16( + a_.m128i, + _mm_srai_epi16(a_.m128i, 15) + ); + #endif + + return simde_int16x8_from_private(r_); + #else + simde_int16x8_t tmp = simde_vabsq_s16(a); + return + simde_vbslq_s16( + simde_vreinterpretq_u16_s16(simde_vshrq_n_s16(tmp, 15)), + simde_vmvnq_s16(tmp), + tmp + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabsq_s16 + #define vqabsq_s16(a) simde_vqabsq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqabsq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqabsq_s32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(simde_vabsq_s32(a)); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_min_epu32(a_.m128i, _mm_set1_epi32(INT32_MAX)); + #else + r_.m128i = + _mm_add_epi32( + a_.m128i, + _mm_srai_epi32(a_.m128i, 31) + ); + #endif + + return simde_int32x4_from_private(r_); + #else + simde_int32x4_t tmp = simde_vabsq_s32(a); + return + simde_vbslq_s32( + simde_vreinterpretq_u32_s32(simde_vshrq_n_s32(tmp, 31)), + simde_vmvnq_s32(tmp), + tmp + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqabsq_s32 + #define vqabsq_s32(a) simde_vqabsq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqabsq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqabsq_s64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(simde_vabsq_s64(a)); + + #if defined(SIMDE_X86_SSE4_2_NATIVE) + r_.m128i = + _mm_add_epi64( + a_.m128i, + _mm_cmpgt_epi64(_mm_setzero_si128(), a_.m128i) + ); + #else + r_.m128i = + _mm_add_epi64( + a_.m128i, + _mm_shuffle_epi32( + _mm_srai_epi32(a_.m128i, 31), + _MM_SHUFFLE(3, 3, 1, 1) + ) + ); + #endif + + return simde_int64x2_from_private(r_); + #else + simde_int64x2_t tmp = simde_vabsq_s64(a); + return + simde_vbslq_s64( + simde_vreinterpretq_u64_s64(simde_vshrq_n_s64(tmp, 63)), + simde_vreinterpretq_s64_s32(simde_vmvnq_s32(simde_vreinterpretq_s32_s64(tmp))), + tmp + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqabsq_s64 + #define vqabsq_s64(a) simde_vqabsq_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QABS_H) */ +/* :: End simde/arm/neon/qabs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QADD_H) +#define SIMDE_ARM_NEON_QADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqaddb_s8(int8_t a, int8_t b) { + return simde_math_adds_i8(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddb_s8 + #define vqaddb_s8(a, b) simde_vqaddb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqaddh_s16(int16_t a, int16_t b) { + return simde_math_adds_i16(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddh_s16 + #define vqaddh_s16(a, b) simde_vqaddh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqadds_s32(int32_t a, int32_t b) { + return simde_math_adds_i32(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqadds_s32 + #define vqadds_s32(a, b) simde_vqadds_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqaddd_s64(int64_t a, int64_t b) { + return simde_math_adds_i64(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddd_s64 + #define vqaddd_s64(a, b) simde_vqaddd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqaddb_u8(uint8_t a, uint8_t b) { + return simde_math_adds_u8(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddb_u8 + #define vqaddb_u8(a, b) simde_vqaddb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqaddh_u16(uint16_t a, uint16_t b) { + return simde_math_adds_u16(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddh_u16 + #define vqaddh_u16(a, b) simde_vqaddh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqadds_u32(uint32_t a, uint32_t b) { + return simde_math_adds_u32(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqadds_u32 + #define vqadds_u32(a, b) simde_vqadds_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqaddd_u64(uint64_t a, uint64_t b) { + return simde_math_adds_u64(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqaddd_u64 + #define vqaddd_u64(a, b) simde_vqaddd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqadd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_adds_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + uint8_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint8_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint8_t ru SIMDE_VECTOR(8) = au + bu; + + au = (au >> 7) + INT8_MAX; + + uint8_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_s8 + #define vqadd_s8(a, b) simde_vqadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqadd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_adds_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + uint16_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint16_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint16_t ru SIMDE_VECTOR(8) = au + bu; + + au = (au >> 15) + INT16_MAX; + + uint16_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_s16 + #define vqadd_s16(a, b) simde_vqadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqadd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + uint32_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint32_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint32_t ru SIMDE_VECTOR(8) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_s32 + #define vqadd_s32(a, b) simde_vqadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqadd_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + uint64_t au SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint64_t bu SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint64_t ru SIMDE_VECTOR(8) = au + bu; + + au = (au >> 63) + INT64_MAX; + + uint64_t m SIMDE_VECTOR(8) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_s64 + #define vqadd_s64(a, b) simde_vqadd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_adds_pu8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_u8 + #define vqadd_u8(a, b) simde_vqadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_adds_pu16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_u16 + #define vqadd_u16(a, b) simde_vqadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && !defined(SIMDE_BUG_GCC_100762) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_u32 + #define vqadd_u32(a, b) simde_vqadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqadd_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqadd_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqadd_u64 + #define vqadd_u64(a, b) simde_vqadd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SCALAR) + uint8_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint8_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint8_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 7) + INT8_MAX; + + uint8_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddb_s8(a_.values[i], b_.values[i]); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_s8 + #define vqaddq_s8(a, b) simde_vqaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SCALAR) + uint16_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint16_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint16_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 15) + INT16_MAX; + + uint16_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddh_s16(a_.values[i], b_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_s16 + #define vqaddq_s16(a, b) simde_vqaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/56544654/501126 */ + const __m128i int_max = _mm_set1_epi32(INT32_MAX); + + /* normal result (possibly wraps around) */ + const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); + + /* If result saturates, it has the same sign as both a and b */ + const __m128i sign_bit = _mm_srli_epi32(a_.m128i, 31); /* shift sign to lowest bit */ + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i overflow = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, sum, 0x42); + #else + const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); + const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); + #endif + + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_mask_add_epi32(sum, _mm_movepi32_mask(overflow), int_max, sign_bit); + #else + const __m128i saturated = _mm_add_epi32(int_max, sign_bit); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(sum), + _mm_castsi128_ps(saturated), + _mm_castsi128_ps(overflow) + ) + ); + #else + const __m128i overflow_mask = _mm_srai_epi32(overflow, 31); + r_.m128i = + _mm_or_si128( + _mm_and_si128(overflow_mask, saturated), + _mm_andnot_si128(overflow_mask, sum) + ); + #endif + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint32_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqadds_s32(a_.values[i], b_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_s32 + #define vqaddq_s32(a, b) simde_vqaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqaddq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + /* https://stackoverflow.com/a/56544654/501126 */ + const __m128i int_max = _mm_set1_epi64x(INT64_MAX); + + /* normal result (possibly wraps around) */ + const __m128i sum = _mm_add_epi64(a_.m128i, b_.m128i); + + /* If result saturates, it has the same sign as both a and b */ + const __m128i sign_bit = _mm_srli_epi64(a_.m128i, 63); /* shift sign to lowest bit */ + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i overflow = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, sum, 0x42); + #else + const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); + const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); + #endif + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.m128i = _mm_mask_add_epi64(sum, _mm_movepi64_mask(overflow), int_max, sign_bit); + #else + const __m128i saturated = _mm_add_epi64(int_max, sign_bit); + + r_.m128i = + _mm_castpd_si128( + _mm_blendv_pd( + _mm_castsi128_pd(sum), + _mm_castsi128_pd(saturated), + _mm_castsi128_pd(overflow) + ) + ); + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + uint64_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint64_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint64_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 63) + INT64_MAX; + + uint64_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_s64 + #define vqaddq_s64(a, b) simde_vqaddq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddb_u8(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_u8 + #define vqaddq_u8(a, b) simde_vqaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddh_u16(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_u16 + #define vqaddq_u16(a, b) simde_vqaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__AVX512VL__) + __m128i notb = _mm_ternarylogic_epi32(b_.m128i, b_.m128i, b_.m128i, 0x0f); + #else + __m128i notb = _mm_xor_si128(b_.m128i, _mm_set1_epi32(~INT32_C(0))); + #endif + r_.m128i = + _mm_add_epi32( + b_.m128i, + _mm_min_epu32( + a_.m128i, + notb + ) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); + const __m128i i32min = _mm_set1_epi32(INT32_MIN); + a_.m128i = _mm_xor_si128(a_.m128i, i32min); + r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, _mm_xor_si128(i32min, sum)), sum); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqadds_u32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_u32 + #define vqaddq_u32(a, b) simde_vqaddq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqaddq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqaddd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqaddq_u64 + #define vqaddq_u64(a, b) simde_vqaddq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QADD_H) */ +/* :: End simde/arm/neon/qadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qdmulh.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULH_H) +#define SIMDE_ARM_NEON_QDMULH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qdmull.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +/* Implementation notes (seanptmaher): + * + * It won't overflow during the multiplication, it'll ever only double + * the bit length, we only care about the overflow during the shift, + * so do the multiplication, then the shift with saturation + */ + +#if !defined(SIMDE_ARM_NEON_QDMULL_H) +#define SIMDE_ARM_NEON_QDMULL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmullh_s16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmullh_s16(a, b); + #else + int32_t mul = (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)); + return (simde_math_labs(mul) & (1 << 30)) ? ((mul < 0) ? INT32_MIN : INT32_MAX) : mul << 1; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmullh_s16 + #define vqdmullh_s16(a, b) simde_vqdmullh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqdmulls_s32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmulls_s32(a, b); + #else + int64_t mul = (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)); + return ((a > 0 ? a : -a) & (HEDLEY_STATIC_CAST(int64_t, 1) << 62)) ? ((mul < 0) ? INT64_MIN : INT64_MAX) : mul << 1; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulls_s16 + #define vqdmulls_s16(a, b) simde_vqdmulls_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmull_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmull_s16(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int32x4_private r_; + simde_int16x8_private v_ = simde_int16x8_to_private(simde_vcombine_s16(a, b)); + + const v128_t lo = wasm_i32x4_extend_low_i16x8(v_.v128); + const v128_t hi = wasm_i32x4_extend_high_i16x8(v_.v128); + + const v128_t product = wasm_i32x4_mul(lo, hi); + const v128_t uflow = wasm_i32x4_lt(product, wasm_i32x4_splat(-INT32_C(0x40000000))); + const v128_t oflow = wasm_i32x4_gt(product, wasm_i32x4_splat( INT32_C(0x3FFFFFFF))); + r_.v128 = wasm_i32x4_shl(product, 1); + r_.v128 = wasm_v128_bitselect(wasm_i32x4_splat(INT32_MIN), r_.v128, uflow); + r_.v128 = wasm_v128_bitselect(wasm_i32x4_splat(INT32_MAX), r_.v128, oflow); + + return simde_int32x4_from_private(r_); + #else + simde_int32x4_private r_; + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmullh_s16(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_s16 + #define vqdmull_s16(a, b) simde_vqdmull_s16((a), (b)) +#endif +SIMDE_FUNCTION_ATTRIBUTES + +simde_int64x2_t +simde_vqdmull_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmull_s32(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int64x2_private r_; + simde_int32x4_private v_ = simde_int32x4_to_private(simde_vcombine_s32(a, b)); + + const v128_t lo = wasm_i64x2_extend_low_i32x4(v_.v128); + const v128_t hi = wasm_i64x2_extend_high_i32x4(v_.v128); + + const v128_t product = wasm_i64x2_mul(lo, hi); + const v128_t uflow = wasm_i64x2_lt(product, wasm_i64x2_splat(-INT64_C(0x4000000000000000))); + const v128_t oflow = wasm_i64x2_gt(product, wasm_i64x2_splat( INT64_C(0x3FFFFFFFFFFFFFFF))); + r_.v128 = wasm_i64x2_shl(product, 1); + r_.v128 = wasm_v128_bitselect(wasm_i64x2_splat(INT64_MIN), r_.v128, uflow); + r_.v128 = wasm_v128_bitselect(wasm_i64x2_splat(INT64_MAX), r_.v128, oflow); + + return simde_int64x2_from_private(r_); + #else + simde_int64x2_private r_; + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmulls_s32(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmull_s32 + #define vqdmull_s32(a, b) simde_vqdmull_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULL_H) */ +/* :: End simde/arm/neon/qdmull.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqdmulhs_s32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqdmulhs_s32(a, b); + #else + int64_t tmp = simde_vqdmulls_s32(a, b); + return HEDLEY_STATIC_CAST(int32_t, tmp >> 32); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhs_s32 + #define vqdmulhs_s32(a) simde_vqdmulhs_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmulh_s16(a, b); + #else + simde_int16x4_private r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !(HEDLEY_GCC_VERSION_CHECK(12,1,0) && defined(SIMDE_ARCH_ZARCH)) + simde_int16x8_private tmp_ = + simde_int16x8_to_private( + simde_vreinterpretq_s16_s32( + simde_vqdmull_s16(a, b) + ) + ); + + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); + #else + simde_int32x4_private tmp = simde_int32x4_to_private(simde_vqdmull_s16(a, b)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, tmp.values[i] >> 16); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_s16 + #define vqdmulh_s16(a, b) simde_vqdmulh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmulh_s32(a, b); + #else + simde_int32x2_private r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !(HEDLEY_GCC_VERSION_CHECK(12,1,0) && defined(SIMDE_ARCH_ZARCH)) + simde_int32x4_private tmp_ = + simde_int32x4_to_private( + simde_vreinterpretq_s32_s64( + simde_vqdmull_s32(a, b) + ) + ); + + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqdmulhs_s32(a_.values[i], b_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_s32 + #define vqdmulh_s32(a, b) simde_vqdmulh_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmulhq_s16(a, b); + #else + return simde_vcombine_s16(simde_vqdmulh_s16(simde_vget_low_s16(a), simde_vget_low_s16(b)), + simde_vqdmulh_s16(simde_vget_high_s16(a), simde_vget_high_s16(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_s16 + #define vqdmulhq_s16(a, b) simde_vqdmulhq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqdmulhq_s32(a, b); + #else + return simde_vcombine_s32(simde_vqdmulh_s32(simde_vget_low_s32(a), simde_vget_low_s32(b)), + simde_vqdmulh_s32(simde_vget_high_s32(a), simde_vget_high_s32(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_s32 + #define vqdmulhq_s32(a, b) simde_vqdmulhq_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULH_H) */ +/* :: End simde/arm/neon/qdmulh.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qdmulh_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULH_LANE_H) +#define SIMDE_ARM_NEON_QDMULH_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qdmulh_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QDMULH_N_H) +#define SIMDE_ARM_NEON_QDMULH_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulh_n_s16(a, b) vqdmulh_n_s16((a), (b)) +#else + #define simde_vqdmulh_n_s16(a, b) simde_vqdmulh_s16((a), simde_vdup_n_s16(b)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_n_s16 + #define vqdmulh_n_s16(a, b) simde_vqdmulh_n_s16((a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulh_n_s32(a, b) vqdmulh_n_s32((a), (b)) +#else + #define simde_vqdmulh_n_s32(a, b) simde_vqdmulh_s32((a), simde_vdup_n_s32(b)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_n_s32 + #define vqdmulh_n_s32(a, b) simde_vqdmulh_n_s32((a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulhq_n_s16(a, b) vqdmulhq_n_s16((a), (b)) +#else + #define simde_vqdmulhq_n_s16(a, b) simde_vqdmulhq_s16((a), simde_vdupq_n_s16(b)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_n_s16 + #define vqdmulhq_n_s16(a, b) simde_vqdmulhq_n_s16((a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulhq_n_s32(a, b) vqdmulhq_n_s32((a), (b)) +#else + #define simde_vqdmulhq_n_s32(a, b) simde_vqdmulhq_s32((a), simde_vdupq_n_s32(b)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_n_s32 + #define vqdmulhq_n_s32(a, b) simde_vqdmulhq_n_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULH_N_H) */ +/* :: End simde/arm/neon/qdmulh_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulh_lane_s16(a, v, lane) vqdmulh_lane_s16((a), (v), (lane)) +#else + #define simde_vqdmulh_lane_s16(a, v, lane) \ + simde_vqdmulh_n_s16((a), simde_vget_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_lane_s16 + #define vqdmulh_lane_s16(a, v, lane) simde_vqdmulh_lane_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulh_lane_s32(a, v, lane) vqdmulh_lane_s32((a), (v), (lane)) +#else + #define simde_vqdmulh_lane_s32(a, v, lane) \ + simde_vqdmulh_n_s32((a), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_lane_s32 + #define vqdmulh_lane_s32(a, v, lane) simde_vqdmulh_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulhq_lane_s16(a, v, lane) vqdmulhq_lane_s16((a), (v), (lane)) +#else + #define simde_vqdmulhq_lane_s16(a, v, lane) \ + simde_vqdmulhq_n_s16((a), simde_vget_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_lane_s16 + #define vqdmulhq_lane_s16(a, v, lane) simde_vqdmulhq_lane_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqdmulhq_lane_s32(a, v, lane) vqdmulhq_lane_s32((a), (v), (lane)) +#else + #define simde_vqdmulhq_lane_s32(a, v, lane) \ + simde_vqdmulhq_n_s32((a), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_lane_s32 + #define vqdmulhq_lane_s32(a, v, lane) simde_vqdmulhq_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulh_laneq_s16(a, v, lane) vqdmulh_laneq_s16((a), (v), (lane)) +#else + #define simde_vqdmulh_laneq_s16(a, v, lane) \ + simde_vqdmulh_n_s16((a), simde_vgetq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_laneq_s16 + #define vqdmulh_laneq_s16(a, v, lane) simde_vqdmulh_laneq_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulh_laneq_s32(a, v, lane) vqdmulh_laneq_s32((a), (v), (lane)) +#else + #define simde_vqdmulh_laneq_s32(a, v, lane) \ + simde_vqdmulh_n_s32((a), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulh_laneq_s32 + #define vqdmulh_laneq_s32(a, v, lane) simde_vqdmulh_laneq_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulhq_laneq_s16(a, v, lane) vqdmulhq_laneq_s16((a), (v), (lane)) +#else + #define simde_vqdmulhq_laneq_s16(a, v, lane) \ + simde_vqdmulhq_n_s16((a), simde_vgetq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_laneq_s16 + #define vqdmulhq_laneq_s16(a, v, lane) simde_vqdmulhq_laneq_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqdmulhq_laneq_s32(a, v, lane) vqdmulhq_laneq_s32((a), (v), (lane)) +#else + #define simde_vqdmulhq_laneq_s32(a, v, lane) \ + simde_vqdmulhq_n_s32((a), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhq_laneq_s32 + #define vqdmulhq_laneq_s32(a, v, lane) simde_vqdmulhq_laneq_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vqdmulhs_lane_s32(a, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqdmulhs_lane_s32((a), (v), (lane))) + #else + #define simde_vqdmulhs_lane_s32(a, v, lane) vqdmulhs_lane_s32(a, v, lane) + #endif +#else + #define simde_vqdmulhs_lane_s32(a, v, lane) \ + simde_vqdmulhs_s32((a), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhs_lane_s32 + #define vqdmulhs_lane_s32(a, v, lane) simde_vqdmulhs_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vqdmulhs_laneq_s32(a, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqdmulhs_laneq_s32((a), (v), (lane))) + #else + #define simde_vqdmulhs_laneq_s32(a, v, lane) vqdmulhs_laneq_s32(a, v, lane) + #endif +#else + #define simde_vqdmulhs_laneq_s32(a, v, lane) \ + simde_vqdmulhs_s32((a), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqdmulhs_laneq_s32 + #define vqdmulhs_laneq_s32(a, v, lane) simde_vqdmulhs_laneq_s32((a), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QDMULH_LANE_H) */ +/* :: End simde/arm/neon/qdmulh_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qrdmulh.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QRDMULH_H) +#define SIMDE_ARM_NEON_QRDMULH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqrdmulhh_s16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqrdmulhh_s16(a, b); + #else + return HEDLEY_STATIC_CAST(int16_t, (((1 << 15) + ((HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)))) << 1)) >> 16) & 0xffff); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhh_s16 + #define vqrdmulhh_s16(a, b) simde_vqrdmulhh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqrdmulhs_s32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqrdmulhs_s32(a, b); + #else + return HEDLEY_STATIC_CAST(int32_t, (((HEDLEY_STATIC_CAST(int64_t, 1) << 31) + ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)))) << 1)) >> 32) & 0xffffffff); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhs_s32 + #define vqrdmulhs_s32(a, b) simde_vqrdmulhs_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqrdmulh_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulh_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_s16 + #define vqrdmulh_s16(a, b) simde_vqrdmulh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqrdmulh_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulh_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_s32 + #define vqrdmulh_s32(a, b) simde_vqrdmulh_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqrdmulhq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulhq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + /* https://github.com/WebAssembly/simd/pull/365 */ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqrdmulhq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i y = _mm_mulhrs_epi16(a_.m128i, b_.m128i); + __m128i tmp = _mm_cmpeq_epi16(y, _mm_set1_epi16(INT16_MAX)); + r_.m128i = _mm_xor_si128(y, tmp); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i prod_lo = _mm_mullo_epi16(a_.m128i, b_.m128i); + const __m128i prod_hi = _mm_mulhi_epi16(a_.m128i, b_.m128i); + const __m128i tmp = + _mm_add_epi16( + _mm_avg_epu16( + _mm_srli_epi16(prod_lo, 14), + _mm_setzero_si128() + ), + _mm_add_epi16(prod_hi, prod_hi) + ); + r_.m128i = + _mm_xor_si128( + tmp, + _mm_cmpeq_epi16(_mm_set1_epi16(INT16_MAX), tmp) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_s16 + #define vqrdmulhq_s16(a, b) simde_vqrdmulhq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqrdmulhq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulhq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_s32 + #define vqrdmulhq_s32(a, b) simde_vqrdmulhq_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ +/* :: End simde/arm/neon/qrdmulh.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qrdmulh_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_QRDMULH_LANE_H) +#define SIMDE_ARM_NEON_QRDMULH_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vqrdmulhs_lane_s32(a, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmulhs_lane_s32((a), (v), (lane))) + #else + #define simde_vqrdmulhs_lane_s32(a, v, lane) vqrdmulhs_lane_s32((a), (v), (lane)) + #endif +#else + #define simde_vqrdmulhs_lane_s32(a, v, lane) simde_vqrdmulhs_s32((a), simde_vget_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhs_lane_s32 + #define vqrdmulhs_lane_s32(a, v, lane) simde_vqrdmulhs_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) + #define simde_vqrdmulhs_laneq_s32(a, v, lane) \ + SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vqrdmulhs_laneq_s32((a), (v), (lane))) + #else + #define simde_vqrdmulhs_laneq_s32(a, v, lane) vqrdmulhs_laneq_s32((a), (v), (lane)) + #endif +#else + #define simde_vqrdmulhs_laneq_s32(a, v, lane) simde_vqrdmulhs_s32((a), simde_vgetq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhs_laneq_s32 + #define vqrdmulhs_laneq_s32(a, v, lane) simde_vqrdmulhs_laneq_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrdmulh_lane_s16(a, v, lane) vqrdmulh_lane_s16((a), (v), (lane)) +#else + #define simde_vqrdmulh_lane_s16(a, v, lane) simde_vqrdmulh_s16((a), simde_vdup_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_lane_s16 + #define vqrdmulh_lane_s16(a, v, lane) simde_vqrdmulh_lane_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrdmulh_lane_s32(a, v, lane) vqrdmulh_lane_s32((a), (v), (lane)) +#else + #define simde_vqrdmulh_lane_s32(a, v, lane) simde_vqrdmulh_s32((a), simde_vdup_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_lane_s32 + #define vqrdmulh_lane_s32(a, v, lane) simde_vqrdmulh_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrdmulhq_lane_s16(a, v, lane) vqrdmulhq_lane_s16((a), (v), (lane)) +#else + #define simde_vqrdmulhq_lane_s16(a, v, lane) simde_vqrdmulhq_s16((a), simde_vdupq_lane_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_lane_s16 + #define vqrdmulhq_lane_s16(a, v, lane) simde_vqrdmulhq_lane_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrdmulhq_lane_s32(a, v, lane) vqrdmulhq_lane_s32((a), (v), (lane)) +#else + #define simde_vqrdmulhq_lane_s32(a, v, lane) simde_vqrdmulhq_s32((a), simde_vdupq_lane_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_lane_s32 + #define vqrdmulhq_lane_s32(a, v, lane) simde_vqrdmulhq_lane_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrdmulh_laneq_s16(a, v, lane) vqrdmulh_laneq_s16((a), (v), (lane)) +#else + #define simde_vqrdmulh_laneq_s16(a, v, lane) simde_vqrdmulh_s16((a), simde_vdup_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_laneq_s16 + #define vqrdmulh_laneq_s16(a, v, lane) simde_vqrdmulh_laneq_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrdmulh_laneq_s32(a, v, lane) vqrdmulh_laneq_s32((a), (v), (lane)) +#else + #define simde_vqrdmulh_laneq_s32(a, v, lane) simde_vqrdmulh_s32((a), simde_vdup_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_laneq_s32 + #define vqrdmulh_laneq_s32(a, v, lane) simde_vqrdmulh_laneq_s32((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrdmulhq_laneq_s16(a, v, lane) vqrdmulhq_laneq_s16((a), (v), (lane)) +#else + #define simde_vqrdmulhq_laneq_s16(a, v, lane) simde_vqrdmulhq_s16((a), simde_vdupq_laneq_s16((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_laneq_s16 + #define vqrdmulhq_laneq_s16(a, v, lane) simde_vqrdmulhq_laneq_s16((a), (v), (lane)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrdmulhq_laneq_s32(a, v, lane) vqrdmulhq_laneq_s32((a), (v), (lane)) +#else + #define simde_vqrdmulhq_laneq_s32(a, v, lane) simde_vqrdmulhq_s32((a), simde_vdupq_laneq_s32((v), (lane))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_laneq_s32 + #define vqrdmulhq_laneq_s32(a, v, lane) simde_vqrdmulhq_laneq_s32((a), (v), (lane)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_LANE_H) */ +/* :: End simde/arm/neon/qrdmulh_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qrdmulh_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QRDMULH_N_H) +#define SIMDE_ARM_NEON_QRDMULH_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqrdmulh_n_s16(simde_int16x4_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulh_n_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_n_s16 + #define vqrdmulh_n_s16(a, b) simde_vqrdmulh_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqrdmulh_n_s32(simde_int32x2_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulh_n_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulh_n_s32 + #define vqrdmulh_n_s32(a, b) simde_vqrdmulh_n_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqrdmulhq_n_s16(simde_int16x8_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulhq_n_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhh_s16(a_.values[i], b); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_n_s16 + #define vqrdmulhq_n_s16(a, b) simde_vqrdmulhq_n_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqrdmulhq_n_s32(simde_int32x4_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrdmulhq_n_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrdmulhs_s32(a_.values[i], b); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrdmulhq_n_s32 + #define vqrdmulhq_n_s32(a, b) simde_vqrdmulhq_n_s32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRDMULH_H) */ +/* :: End simde/arm/neon/qrdmulh_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qrshrn_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QRSHRN_N_H) +#define SIMDE_ARM_NEON_QRSHRN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rshr_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_RSHR_N_H) +#define SIMDE_ARM_NEON_RSHR_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/tst.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_TST_H) +#define SIMDE_ARM_NEON_TST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vtstd_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vtstd_s64(a, b)); + #else + return ((a & b) != 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtstd_s64 + #define vtstd_s64(a, b) simde_vtstd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vtstd_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint64_t, vtstd_u64(a, b)); + #else + return ((a & b) != 0) ? UINT64_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtstd_u64 + #define vtstd_u64(a, b) simde_vtstd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vtstq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u8(simde_vceqzq_s8(simde_vandq_s8(a, b))); + #else + simde_int8x16_private + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + simde_uint8x16_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i8x16_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_s8 + #define vtstq_s8(a, b) simde_vtstq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vtstq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u16(simde_vceqzq_s16(simde_vandq_s16(a, b))); + #else + simde_int16x8_private + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + simde_uint16x8_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i16x8_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_s16 + #define vtstq_s16(a, b) simde_vtstq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vtstq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u32(simde_vceqzq_s32(simde_vandq_s32(a, b))); + #else + simde_int32x4_private + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + simde_uint32x4_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i32x4_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_s32 + #define vtstq_s32(a, b) simde_vtstq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vtstq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtstq_s64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vceqzq_u64(simde_vceqzq_s64(simde_vandq_s64(a, b))); + #else + simde_int64x2_private + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + simde_uint64x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vtstd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtstq_s64 + #define vtstq_s64(a, b) simde_vtstq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vtstq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u8(simde_vceqzq_u8(simde_vandq_u8(a, b))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i8x16_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_u8 + #define vtstq_u8(a, b) simde_vtstq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vtstq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u16(simde_vceqzq_u16(simde_vandq_u16(a, b))); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i16x8_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_u16 + #define vtstq_u16(a, b) simde_vtstq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vtstq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtstq_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvnq_u32(simde_vceqzq_u32(simde_vandq_u32(a, b))); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_ne(wasm_v128_and(a_.v128, b_.v128), wasm_i32x4_splat(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtstq_u32 + #define vtstq_u32(a, b) simde_vtstq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vtstq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtstq_u64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vceqzq_u64(simde_vceqzq_u64(simde_vandq_u64(a, b))); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vtstd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtstq_u64 + #define vtstq_u64(a, b) simde_vtstq_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtst_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u8(simde_vceqz_s8(simde_vand_s8(a, b))); + #else + simde_int8x8_private + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + simde_uint8x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_s8 + #define vtst_s8(a, b) simde_vtst_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vtst_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u16(simde_vceqz_s16(simde_vand_s16(a, b))); + #else + simde_int16x4_private + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + simde_uint16x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_s16 + #define vtst_s16(a, b) simde_vtst_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vtst_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u32(simde_vceqz_s32(simde_vand_s32(a, b))); + #else + simde_int32x2_private + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_s32 + #define vtst_s32(a, b) simde_vtst_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vtst_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtst_s64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vceqz_u64(simde_vceqz_s64(simde_vand_s64(a, b))); + #else + simde_int64x1_private + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vtstd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtst_s64 + #define vtst_s64(a, b) simde_vtst_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtst_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u8(simde_vceqz_u8(simde_vand_u8(a, b))); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT8_MAX : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_u8 + #define vtst_u8(a, b) simde_vtst_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vtst_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u16(simde_vceqz_u16(simde_vand_u16(a, b))); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT16_MAX : 0; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_u16 + #define vtst_u16(a, b) simde_vtst_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vtst_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtst_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmvn_u32(simde_vceqz_u32(simde_vand_u32(a, b))); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] & b_.values[i]) != 0) ? UINT32_MAX : 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtst_u32 + #define vtst_u32(a, b) simde_vtst_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vtst_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtst_u64(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vceqz_u64(simde_vceqz_u64(simde_vand_u64(a, b))); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values & b_.values) != 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vtstd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtst_u64 + #define vtst_u64(a, b) simde_vtst_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TST_H) */ +/* :: End simde/arm/neon/tst.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_x_vrshrs_n_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return (a >> ((n == 32) ? 31 : n)) + ((a & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); +} + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_x_vrshrs_n_u32(uint32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return ((n == 32) ? 0 : (a >> n)) + ((a & (UINT32_C(1) << (n - 1))) != 0); +} + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vrshrd_n_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return (a >> ((n == 64) ? 63 : n)) + ((a & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrd_n_s64(a, n) vrshrd_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrd_n_s64 + #define vrshrd_n_s64(a, n) simde_vrshrd_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vrshrd_n_u64(uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return ((n == 64) ? 0 : (a >> n)) + ((a & (UINT64_C(1) << (n - 1))) != 0); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrd_n_u64(a, n) vrshrd_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrd_n_u64 + #define vrshrd_n_u64(a, n) simde_vrshrd_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrshrq_n_s8 (const simde_int8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_s8(a, n) vrshrq_n_s8((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_s8(a, n) simde_vsubq_s8(simde_vshrq_n_s8((a), (n)), simde_vreinterpretq_s8_u8( \ + simde_vtstq_u8(simde_vreinterpretq_u8_s8(a), \ + simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_s8 + #define vrshrq_n_s8(a, n) simde_vrshrq_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vrshrq_n_s16 (const simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_s16(a, n) vrshrq_n_s16((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_s16(a, n) simde_vsubq_s16(simde_vshrq_n_s16((a), (n)), simde_vreinterpretq_s16_u16( \ + simde_vtstq_u16(simde_vreinterpretq_u16_s16(a), \ + simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_s16 + #define vrshrq_n_s16(a, n) simde_vrshrq_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vrshrq_n_s32 (const simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_s32(a, n) vrshrq_n_s32((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_s32(a, n) simde_vsubq_s32(simde_vshrq_n_s32((a), (n)), \ + simde_vreinterpretq_s32_u32(simde_vtstq_u32(simde_vreinterpretq_u32_s32(a), \ + simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_s32 + #define vrshrq_n_s32(a, n) simde_vrshrq_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vrshrq_n_s64 (const simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_s64(a, n) vrshrq_n_s64((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_s64(a, n) simde_vsubq_s64(simde_vshrq_n_s64((a), (n)), \ + simde_vreinterpretq_s64_u64(simde_vtstq_u64(simde_vreinterpretq_u64_s64(a), \ + simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_s64 + #define vrshrq_n_s64(a, n) simde_vrshrq_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrshrq_n_u8 (const simde_uint8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_u8(a, n) vrshrq_n_u8((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_u8(a, n) simde_vsubq_u8(simde_vshrq_n_u8((a), (n)), \ + simde_vtstq_u8((a), simde_vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_u8 + #define vrshrq_n_u8(a, n) simde_vrshrq_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vrshrq_n_u16 (const simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_u16(a, n) vrshrq_n_u16((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_u16(a, n) simde_vsubq_u16(simde_vshrq_n_u16((a), (n)), \ + simde_vtstq_u16((a), simde_vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_u16 + #define vrshrq_n_u16(a, n) simde_vrshrq_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrshrq_n_u32 (const simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_u32(a, n) vrshrq_n_u32((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_u32(a, n) simde_vsubq_u32(simde_vshrq_n_u32((a), (n)), \ + simde_vtstq_u32((a), simde_vdupq_n_u32(UINT32_C(1) << ((n) - 1)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_u32 + #define vrshrq_n_u32(a, n) simde_vrshrq_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vrshrq_n_u64 (const simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); + } + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrq_n_u64(a, n) vrshrq_n_u64((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshrq_n_u64(a, n) simde_vsubq_u64(simde_vshrq_n_u64((a), (n)), \ + simde_vtstq_u64((a), simde_vdupq_n_u64(UINT64_C(1) << ((n) - 1)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrq_n_u64 + #define vrshrq_n_u64(a, n) simde_vrshrq_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrshr_n_s8 (const simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_s8(a, n) vrshr_n_s8((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_s8(a, n) simde_vsub_s8(simde_vshr_n_s8((a), (n)), simde_vreinterpret_s8_u8( \ + simde_vtst_u8(simde_vreinterpret_u8_s8(a), \ + simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1)))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_s8 + #define vrshr_n_s8(a, n) simde_vrshr_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrshr_n_s16 (const simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_s16(a, n) vrshr_n_s16((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_s16(a, n) simde_vsub_s16(simde_vshr_n_s16((a), (n)), simde_vreinterpret_s16_u16( \ + simde_vtst_u16(simde_vreinterpret_u16_s16(a), \ + simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1)))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_s16 + #define vrshr_n_s16(a, n) simde_vrshr_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vrshr_n_s32 (const simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_s32(a, n) vrshr_n_s32((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_s32(a, n) simde_vsub_s32(simde_vshr_n_s32((a), (n)), \ + simde_vreinterpret_s32_u32(simde_vtst_u32(simde_vreinterpret_u32_s32(a), \ + simde_vdup_n_u32(UINT32_C(1) << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_s32 + #define vrshr_n_s32(a, n) simde_vrshr_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vrshr_n_s64 (const simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); + } + + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_s64(a, n) vrshr_n_s64((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_s64(a, n) simde_vsub_s64(simde_vshr_n_s64((a), (n)), \ + simde_vreinterpret_s64_u64(simde_vtst_u64(simde_vreinterpret_u64_s64(a), \ + simde_vdup_n_u64(UINT64_C(1) << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_s64 + #define vrshr_n_s64(a, n) simde_vrshr_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrshr_n_u8 (const simde_uint8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_u8(a, n) vrshr_n_u8((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_u8(a, n) simde_vsub_u8(simde_vshr_n_u8((a), (n)), \ + simde_vtst_u8((a), simde_vdup_n_u8(HEDLEY_STATIC_CAST(uint8_t, 1 << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_u8 + #define vrshr_n_u8(a, n) simde_vrshr_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrshr_n_u16 (const simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); + } + + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_u16(a, n) vrshr_n_u16((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_u16(a, n) simde_vsub_u16(simde_vshr_n_u16((a), (n)), \ + simde_vtst_u16((a), simde_vdup_n_u16(HEDLEY_STATIC_CAST(uint16_t, 1 << ((n) - 1))))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_u16 + #define vrshr_n_u16(a, n) simde_vrshr_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrshr_n_u32 (const simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((n == 32) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT32_C(1) << (n - 1))) != 0); + } + + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_u32(a, n) vrshr_n_u32((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_u32(a, n) simde_vsub_u32(simde_vshr_n_u32((a), (n)), \ + simde_vtst_u32((a), simde_vdup_n_u32(UINT32_C(1) << ((n) - 1)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_u32 + #define vrshr_n_u32(a, n) simde_vrshr_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vrshr_n_u64 (const simde_uint64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((n == 64) ? 0 : (a_.values[i] >> n)) + ((a_.values[i] & (UINT64_C(1) << (n - 1))) != 0); + } + + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshr_n_u64(a, n) vrshr_n_u64((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vrshr_n_u64(a, n) simde_vsub_u64(simde_vshr_n_u64((a), (n)), \ + simde_vtst_u64((a), simde_vdup_n_u64(UINT64_C(1) << ((n) - 1)))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshr_n_u64 + #define vrshr_n_u64(a, n) simde_vrshr_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSHR_N_H) */ +/* :: End simde/arm/neon/rshr_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qmovn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QMOVN_H) +#define SIMDE_ARM_NEON_QMOVN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqmovnh_s16(int16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovnh_s16(a); + #else + return (a > INT8_MAX) ? INT8_MAX : ((a < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovnh_s16 + #define vqmovnh_s16(a) simde_vqmovnh_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqmovns_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovns_s32(a); + #else + return (a > INT16_MAX) ? INT16_MAX : ((a < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovns_s32 + #define vqmovns_s32(a) simde_vqmovns_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqmovnd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovnd_s64(a); + #else + return (a > INT32_MAX) ? INT32_MAX : ((a < INT32_MIN) ? INT32_MIN : HEDLEY_STATIC_CAST(int32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovnd_s64 + #define vqmovnd_s64(a) simde_vqmovnd_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqmovnh_u16(uint16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovnh_u16(a); + #else + return (a > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovnh_u16 + #define vqmovnh_u16(a) simde_vqmovnh_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqmovns_u32(uint32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovns_u32(a); + #else + return (a > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovns_u32 + #define vqmovns_u32(a) simde_vqmovns_u32((a)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqmovnd_u64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovnd_u64(a); + #else + return (a > UINT32_MAX) ? UINT32_MAX : HEDLEY_STATIC_CAST(uint32_t, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovnd_u64 + #define vqmovnd_u64(a) simde_vqmovnd_u64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqmovn_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_s16(simde_vmaxq_s16(simde_vdupq_n_s16(INT8_MIN), simde_vminq_s16(simde_vdupq_n_s16(INT8_MAX), a))); + #else + simde_int8x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovnh_s16(a_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_s16 + #define vqmovn_s16(a) simde_vqmovn_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqmovn_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_s32(simde_vmaxq_s32(simde_vdupq_n_s32(INT16_MIN), simde_vminq_s32(simde_vdupq_n_s32(INT16_MAX), a))); + #else + simde_int16x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovns_s32(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_s32 + #define vqmovn_s32(a) simde_vqmovn_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqmovn_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(INT32_MIN), simde_x_vminq_s64(simde_vdupq_n_s64(INT32_MAX), a))); + #else + simde_int32x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovnd_s64(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_s64 + #define vqmovn_s64(a) simde_vqmovn_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqmovn_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_u16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u16(simde_vminq_u16(a, simde_vdupq_n_u16(UINT8_MAX))); + #else + simde_uint8x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovnh_u16(a_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_u16 + #define vqmovn_u16(a) simde_vqmovn_u16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqmovn_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_u32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u32(simde_vminq_u32(a, simde_vdupq_n_u32(UINT16_MAX))); + #else + simde_uint16x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovns_u32(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_u32 + #define vqmovn_u32(a) simde_vqmovn_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqmovn_u64(simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovn_u64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u64(simde_x_vminq_u64(a, simde_vdupq_n_u64(UINT32_MAX))); + #else + simde_uint32x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovnd_u64(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovn_u64 + #define vqmovn_u64(a) simde_vqmovn_u64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QMOVN_H) */ +/* :: End simde/arm/neon/qmovn.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrns_n_s32(a, n) vqrshrns_n_s32(a, n) +#else + #define simde_vqrshrns_n_s32(a, n) simde_vqmovns_s32(simde_x_vrshrs_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrns_n_s32 + #define vqrshrns_n_s32(a, n) simde_vqrshrns_n_s32(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrns_n_u32(a, n) vqrshrns_n_u32(a, n) +#else + #define simde_vqrshrns_n_u32(a, n) simde_vqmovns_u32(simde_x_vrshrs_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrns_n_u32 + #define vqrshrns_n_u32(a, n) simde_vqrshrns_n_u32(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrnd_n_s64(a, n) vqrshrnd_n_s64(a, n) +#else + #define simde_vqrshrnd_n_s64(a, n) simde_vqmovnd_s64(simde_vrshrd_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrnd_n_s64 + #define vqrshrnd_n_s64(a, n) simde_vqrshrnd_n_s64(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrnd_n_u64(a, n) vqrshrnd_n_u64(a, n) +#else + #define simde_vqrshrnd_n_u64(a, n) simde_vqmovnd_u64(simde_vrshrd_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrnd_n_u64 + #define vqrshrnd_n_u64(a, n) simde_vqrshrnd_n_u64(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_s16(a, n) vqrshrn_n_s16((a), (n)) +#else + #define simde_vqrshrn_n_s16(a, n) simde_vqmovn_s16(simde_vrshrq_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_s16 + #define vqrshrn_n_s16(a, n) simde_vqrshrn_n_s16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_s32(a, n) vqrshrn_n_s32((a), (n)) +#else + #define simde_vqrshrn_n_s32(a, n) simde_vqmovn_s32(simde_vrshrq_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_s32 + #define vqrshrn_n_s32(a, n) simde_vqrshrn_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_s64(a, n) vqrshrn_n_s64((a), (n)) +#else + #define simde_vqrshrn_n_s64(a, n) simde_vqmovn_s64(simde_vrshrq_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_s64 + #define vqrshrn_n_s64(a, n) simde_vqrshrn_n_s64((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_u16(a, n) vqrshrn_n_u16((a), (n)) +#else + #define simde_vqrshrn_n_u16(a, n) simde_vqmovn_u16(simde_vrshrq_n_u16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_u16 + #define vqrshrn_n_u16(a, n) simde_vqrshrn_n_u16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_u32(a, n) vqrshrn_n_u32((a), (n)) +#else + #define simde_vqrshrn_n_u32(a, n) simde_vqmovn_u32(simde_vrshrq_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_u32 + #define vqrshrn_n_u32(a, n) simde_vqrshrn_n_u32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrn_n_u64(a, n) vqrshrn_n_u64((a), (n)) +#else + #define simde_vqrshrn_n_u64(a, n) simde_vqmovn_u64(simde_vrshrq_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_n_u64 + #define vqrshrn_n_u64(a, n) simde_vqrshrn_n_u64((a), (n)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRSHRN_N_H) */ +/* :: End simde/arm/neon/qrshrn_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qrshrun_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QRSHRUN_N_H) +#define SIMDE_ARM_NEON_QRSHRUN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qmovun.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QMOVUN_H) +#define SIMDE_ARM_NEON_QMOVUN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqmovunh_s16(int16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint8_t, vqmovunh_s16(a)); + #else + return (a > UINT8_MAX) ? UINT8_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint8_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovunh_s16 + #define vqmovunh_s16(a) simde_vqmovunh_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqmovuns_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint16_t, vqmovuns_s32(a)); + #else + return (a > UINT16_MAX) ? UINT16_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint16_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovuns_s32 + #define vqmovuns_s32(a) simde_vqmovuns_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqmovund_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(uint32_t, vqmovund_s64(a)); + #else + return (a > UINT32_MAX) ? UINT32_MAX : ((a < 0) ? 0 : HEDLEY_STATIC_CAST(uint32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovund_s64 + #define vqmovund_s64(a) simde_vqmovund_s64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqmovun_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovun_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u16(simde_vreinterpretq_u16_s16(simde_vmaxq_s16(simde_vdupq_n_s16(0), simde_vminq_s16(simde_vdupq_n_s16(UINT8_MAX), a)))); + #else + simde_uint8x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovunh_s16(a_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovun_s16 + #define vqmovun_s16(a) simde_vqmovun_s16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqmovun_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovun_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u32(simde_vreinterpretq_u32_s32(simde_vmaxq_s32(simde_vdupq_n_s32(0), simde_vminq_s32(simde_vdupq_n_s32(UINT16_MAX), a)))); + #else + simde_uint16x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovuns_s32(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovun_s32 + #define vqmovun_s32(a) simde_vqmovun_s32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqmovun_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqmovun_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE > 0 + return simde_vmovn_u64(simde_vreinterpretq_u64_s64(simde_x_vmaxq_s64(simde_vdupq_n_s64(0), simde_x_vminq_s64(simde_vdupq_n_s64(UINT32_MAX), a)))); + #else + simde_uint32x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqmovund_s64(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqmovun_s64 + #define vqmovun_s64(a) simde_vqmovun_s64((a)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QMOVUN_H) */ +/* :: End simde/arm/neon/qmovun.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshruns_n_s32(a, n) vqrshruns_n_s32(a, n) +#else + #define simde_vqrshruns_n_s32(a, n) simde_vqmovuns_s32(simde_x_vrshrs_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshruns_n_s32 + #define vqrshruns_n_s32(a, n) simde_vqrshruns_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrund_n_s64(a, n) vqrshrund_n_s64(a, n) +#else + #define simde_vqrshrund_n_s64(a, n) simde_vqmovund_s64(simde_vrshrd_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrund_n_s64 + #define vqrshrund_n_s64(a, n) simde_vqrshrund_n_s64((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrun_n_s16(a, n) vqrshrun_n_s16((a), (n)) +#else + #define simde_vqrshrun_n_s16(a, n) simde_vqmovun_s16(simde_vrshrq_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_n_s16 + #define vqrshrun_n_s16(a, n) simde_vqrshrun_n_s16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrun_n_s32(a, n) vqrshrun_n_s32((a), (n)) +#else + #define simde_vqrshrun_n_s32(a, n) simde_vqmovun_s32(simde_vrshrq_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_n_s32 + #define vqrshrun_n_s32(a, n) simde_vqrshrun_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqrshrun_n_s64(a, n) vqrshrun_n_s64((a), (n)) +#else + #define simde_vqrshrun_n_s64(a, n) simde_vqmovun_s64(simde_vrshrq_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_n_s64 + #define vqrshrun_n_s64(a, n) simde_vqrshrun_n_s64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRSHRUN_N_H) */ +/* :: End simde/arm/neon/qrshrun_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qmovn_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) +#define SIMDE_ARM_NEON_QMOVN_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqmovn_high_s16(simde_int8x8_t r, simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_s16(r, a); + #else + return simde_vcombine_s8(r, simde_vqmovn_s16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_s16 + #define vqmovn_high_s16(r, a) simde_vqmovn_high_s16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqmovn_high_s32(simde_int16x4_t r, simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_s32(r, a); + #else + return simde_vcombine_s16(r, simde_vqmovn_s32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_s32 + #define vqmovn_high_s32(r, a) simde_vqmovn_high_s32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqmovn_high_s64(simde_int32x2_t r, simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_s64(r, a); + #else + return simde_vcombine_s32(r, simde_vqmovn_s64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_s64 + #define vqmovn_high_s64(r, a) simde_vqmovn_high_s64((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqmovn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_u16(r, a); + #else + return simde_vcombine_u8(r, simde_vqmovn_u16(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_u16 + #define vqmovn_high_u16(r, a) simde_vqmovn_high_u16((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqmovn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_u32(r, a); + #else + return simde_vcombine_u16(r, simde_vqmovn_u32(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_u32 + #define vqmovn_high_u32(r, a) simde_vqmovn_high_u32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqmovn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqmovn_high_u64(r, a); + #else + return simde_vcombine_u32(r, simde_vqmovn_u64(a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqmovn_high_u64 + #define vqmovn_high_u64(r, a) simde_vqmovn_high_u64((r), (a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QMOVN_HIGH_H) */ +/* :: End simde/arm/neon/qmovn_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qneg.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_QNEG_H) +#define SIMDE_ARM_NEON_QNEG_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE) || 1 +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqnegb_s8(int8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqnegb_s8(a); + #else + return a == INT8_MIN ? INT8_MAX : -a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqnegb_s8 + #define vqnegb_s8(a) simde_vqnegb_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqnegh_s16(int16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqnegh_s16(a); + #else + return a == INT16_MIN ? INT16_MAX : -a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqnegh_s16 + #define vqnegh_s16(a) simde_vqnegh_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqnegs_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqnegs_s32(a); + #else + return a == INT32_MIN ? INT32_MAX : -a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqnegs_s32 + #define vqnegs_s32(a) simde_vqnegs_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqnegd_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqnegd_s64(a); + #else + return a == INT64_MIN ? INT64_MAX : -a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqnegd_s64 + #define vqnegd_s64(a) simde_vqnegd_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqneg_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqneg_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) + return simde_vneg_s8(simde_vmax_s8(a, simde_vdup_n_s8(INT8_MIN + 1))); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqneg_s8 + #define vqneg_s8(a) simde_vqneg_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqneg_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqneg_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) + return simde_vneg_s16(simde_vmax_s16(a, simde_vdup_n_s16(INT16_MIN + 1))); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqneg_s16 + #define vqneg_s16(a) simde_vqneg_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqneg_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqneg_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(64) + return simde_vneg_s32(simde_vmax_s32(a, simde_vdup_n_s32(INT32_MIN + 1))); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqneg_s32 + #define vqneg_s32(a) simde_vqneg_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqneg_s64(simde_int64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqneg_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vneg_s64(simde_x_vmax_s64(a, simde_vdup_n_s64(INT64_MIN + 1))); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqneg_s64 + #define vqneg_s64(a) simde_vqneg_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqnegq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqnegq_s8(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vnegq_s8(simde_vmaxq_s8(a, simde_vdupq_n_s8(INT8_MIN + 1))); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT8_MIN) ? INT8_MAX : -(a_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqnegq_s8 + #define vqnegq_s8(a) simde_vqnegq_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqnegq_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqnegq_s16(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vnegq_s16(simde_vmaxq_s16(a, simde_vdupq_n_s16(INT16_MIN + 1))); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT16_MIN) ? INT16_MAX : -(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqnegq_s16 + #define vqnegq_s16(a) simde_vqnegq_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqnegq_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqnegq_s32(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vnegq_s32(simde_vmaxq_s32(a, simde_vdupq_n_s32(INT32_MIN + 1))); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT32_MIN) ? INT32_MAX : -(a_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqnegq_s32 + #define vqnegq_s32(a) simde_vqnegq_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqnegq_s64(simde_int64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqnegq_s64(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vnegq_s64(simde_x_vmaxq_s64(a, simde_vdupq_n_s64(INT64_MIN + 1))); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] == INT64_MIN) ? INT64_MAX : -(a_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqnegq_s64 + #define vqnegq_s64(a) simde_vqnegq_s64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QNEG_H) */ +/* :: End simde/arm/neon/qneg.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qsub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_QSUB_H) +#define SIMDE_ARM_NEON_QSUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqsubb_s8(int8_t a, int8_t b) { + return simde_math_subs_i8(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubb_s8 + #define vqsubb_s8(a, b) simde_vqsubb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqsubh_s16(int16_t a, int16_t b) { + return simde_math_subs_i16(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubh_s16 + #define vqsubh_s16(a, b) simde_vqsubh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqsubs_s32(int32_t a, int32_t b) { + return simde_math_subs_i32(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubs_s32 + #define vqsubs_s32(a, b) simde_vqsubs_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqsubd_s64(int64_t a, int64_t b) { + return simde_math_subs_i64(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubd_s64 + #define vqsubd_s64(a, b) simde_vqsubd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqsubb_u8(uint8_t a, uint8_t b) { + return simde_math_subs_u8(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubb_u8 + #define vqsubb_u8(a, b) simde_vqsubb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqsubh_u16(uint16_t a, uint16_t b) { + return simde_math_subs_u16(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubh_u16 + #define vqsubh_u16(a, b) simde_vqsubh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqsubs_u32(uint32_t a, uint32_t b) { + return simde_math_subs_u32(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubs_u32 + #define vqsubs_u32(a, b) simde_vqsubs_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqsubd_u64(uint64_t a, uint64_t b) { + return simde_math_subs_u64(a, b); +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqsubd_u64 + #define vqsubd_u64(a, b) simde_vqsubd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqsub_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_subs_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT8_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 7; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_s8 + #define vqsub_s8(a, b) simde_vqsub_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqsub_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_subs_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT16_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 15; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_s16 + #define vqsub_s16(a, b) simde_vqsub_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqsub_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT32_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 31; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_s32 + #define vqsub_s32(a, b) simde_vqsub_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqsub_s64(simde_int64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT64_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 63; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_s64 + #define vqsub_s64(a, b) simde_vqsub_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqsub_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_subs_pu8(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_u8 + #define vqsub_u8(a, b) simde_vqsub_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqsub_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_subs_pu16(a_.m64, b_.m64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_u16 + #define vqsub_u16(a, b) simde_vqsub_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqsub_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_u32 + #define vqsub_u32(a, b) simde_vqsub_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqsub_u64(simde_uint64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsub_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a), + b_ = simde_uint64x1_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsub_u64 + #define vqsub_u64(a, b) simde_vqsub_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqsubq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_sub_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_subs_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT8_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 7; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubb_s8(a_.values[i], b_.values[i]); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_s8 + #define vqsubq_s8(a, b) simde_vqsubq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqsubq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_sub_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_subs_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT16_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 15; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubh_s16(a_.values[i], b_.values[i]); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_s16 + #define vqsubq_s16(a, b) simde_vqsubq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqsubq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + const __m128i diff_sat = _mm_xor_si128(_mm_set1_epi32(INT32_MAX), _mm_cmpgt_epi32(b_.m128i, a_.m128i)); + const __m128i diff = _mm_sub_epi32(a_.m128i, b_.m128i); + + const __m128i t = _mm_xor_si128(diff_sat, diff); + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(diff), + _mm_castsi128_ps(diff_sat), + _mm_castsi128_ps(t) + ) + ); + #else + r_.m128i = _mm_xor_si128(diff, _mm_and_si128(t, _mm_srai_epi32(t, 31))); + #endif + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT32_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 31; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubs_s32(a_.values[i], b_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_s32 + #define vqsubq_s32(a, b) simde_vqsubq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqsubq_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.values) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (b_.values > a_.values) ^ INT64_MAX); + const __typeof__(r_.values) diff = a_.values - b_.values; + const __typeof__(r_.values) saturate = diff_sat ^ diff; + const __typeof__(r_.values) m = saturate >> 63; + r_.values = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubd_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_s64 + #define vqsubq_s64(a, b) simde_vqsubq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqsubq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_sub_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_subs_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values <= a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubb_u8(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_u8 + #define vqsubq_u8(a, b) simde_vqsubq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqsubq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_sub_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_subs_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values <= a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubh_u16(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_u16 + #define vqsubq_u16(a, b) simde_vqsubq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqsubq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_subs(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i difference = _mm_sub_epi32(a_.m128i, b_.m128i); + r_.m128i = + _mm_and_si128( + difference, + _mm_xor_si128( + _mm_cmpgt_epi32( + _mm_xor_si128(difference, i32_min), + _mm_xor_si128(a_.m128i, i32_min) + ), + _mm_set1_epi32(~INT32_C(0)) + ) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubs_u32(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_u32 + #define vqsubq_u32(a, b) simde_vqsubq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqsubq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqsubq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values - b_.values; + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (r_.values <= a_.values)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqsubd_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqsubq_u64 + #define vqsubq_u64(a, b) simde_vqsubq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSUB_H) */ +/* :: End simde/arm/neon/qsub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qshl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_QSHL_H) +#define SIMDE_ARM_NEON_QSHL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqshlb_s8(int8_t a, int8_t b) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshlb_s8(a, b); + #else + if (b < -7) + b = -7; + + if (b <= 0) { + r = a >> -b; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(int8_t, a << b); + if ((r >> b) != a) { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_s8 + #define vqshlb_s8(a, b) simde_vqshlb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqshlh_s16(int16_t a, int16_t b) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshlh_s16(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -15) + b8 = -15; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 15) { + r = HEDLEY_STATIC_CAST(int16_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_s16 + #define vqshlh_s16(a, b) simde_vqshlh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqshls_s32(int32_t a, int32_t b) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshls_s32(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -31) + b8 = -31; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 31) { + r = HEDLEY_STATIC_CAST(int32_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_s32 + #define vqshls_s32(a, b) simde_vqshls_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqshld_s64(int64_t a, int64_t b) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqshld_s64(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 < -63) + b8 = -63; + + if (b8 <= 0) { + r = a >> -b8; + } else if (b8 < 63) { + r = HEDLEY_STATIC_CAST(int64_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshld_s64 + #define vqshld_s64(a, b) simde_vqshld_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqshlb_u8(uint8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshlb_u8(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshlb_u8(a, b); + #endif + #else + uint8_t r; + + if (b < -7) + b = -7; + + if (b <= 0) { + r = a >> -b; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(uint8_t, a << b); + if ((r >> b) != a) { + r = UINT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT8_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_u8 + #define vqshlb_u8(a, b) simde_vqshlb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqshlh_u16(uint16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshlh_u16(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshlh_u16(a, b); + #endif + #else + uint16_t r; + + if (b < -15) + b = -15; + + if (b <= 0) { + r = a >> -b; + } else if (b < 15) { + r = HEDLEY_STATIC_CAST(uint16_t, a << b); + if ((r >> b) != a) { + r = UINT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT16_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_u16 + #define vqshlh_u16(a, b) simde_vqshlh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqshls_u32(uint32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshls_u32(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshls_u32(a, b); + #endif + #else + uint32_t r; + + if (b < -31) + b = -31; + + if (b <= 0) { + r = HEDLEY_STATIC_CAST(uint32_t, a >> -b); + } else if (b < 31) { + r = a << b; + if ((r >> b) != a) { + r = UINT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT32_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_u32 + #define vqshls_u32(a, b) simde_vqshls_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqshld_u64(uint64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqshld_u64(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqshld_u64(a, b); + #endif + #else + uint64_t r; + + if (b < -63) + b = -63; + + if (b <= 0) { + r = a >> -b; + } else if (b < 63) { + r = HEDLEY_STATIC_CAST(uint64_t, a << b); + if ((r >> b) != a) { + r = UINT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT64_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshldb_u64 + #define vqshld_u64(a, b) simde_vqshld_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s8 + #define vqshl_s8(a, b) simde_vqshl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s16 + #define vqshl_s16(a, b) simde_vqshl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s32 + #define vqshl_s32(a, b) simde_vqshl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_s64 + #define vqshl_s64(a, b) simde_vqshl_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u8 + #define vqshl_u8(a, b) simde_vqshl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u16 + #define vqshl_u16(a, b) simde_vqshl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u32 + #define vqshl_u32(a, b) simde_vqshl_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshl_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_u64 + #define vqshl_u64(a, b) simde_vqshl_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s8 + #define vqshlq_s8(a, b) simde_vqshlq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s16 + #define vqshlq_s16(a, b) simde_vqshlq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s32 + #define vqshlq_s32(a, b) simde_vqshlq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_s64 + #define vqshlq_s64(a, b) simde_vqshlq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private + b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u8 + #define vqshlq_u8(a, b) simde_vqshlq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u16 + #define vqshlq_u16(a, b) simde_vqshlq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u32 + #define vqshlq_u32(a, b) simde_vqshlq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqshlq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private + b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_u64 + #define vqshlq_u64(a, b) simde_vqshlq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHL_H) */ +/* :: End simde/arm/neon/qshl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qshlu_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Atharva Nimbalkar + */ + +#if !defined(SIMDE_ARM_NEON_QSHLU_N_H) +#define SIMDE_ARM_NEON_QSHLU_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if defined(SIMDE_WASM_SIMD128_NATIVE) +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqshlub_n_s8(int8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + uint8_t r = HEDLEY_STATIC_CAST(uint8_t, a << n); + r |= (((r >> n) != HEDLEY_STATIC_CAST(uint8_t, a)) ? UINT8_MAX : 0); + return (a < 0) ? 0 : r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlub_n_s8(a, n) HEDLEY_STATIC_CAST(uint8_t, vqshlub_n_s8(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlub_n_s8 + #define vqshlub_n_s8(a, n) simde_vqshlub_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqshlus_n_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + uint32_t r = HEDLEY_STATIC_CAST(uint32_t, a << n); + r |= (((r >> n) != HEDLEY_STATIC_CAST(uint32_t, a)) ? UINT32_MAX : 0); + return (a < 0) ? 0 : r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlus_n_s32(a, n) HEDLEY_STATIC_CAST(uint32_t, vqshlus_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlus_n_s32 + #define vqshlus_n_s32(a, n) simde_vqshlus_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqshlud_n_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + uint32_t r = HEDLEY_STATIC_CAST(uint32_t, a << n); + r |= (((r >> n) != HEDLEY_STATIC_CAST(uint32_t, a)) ? UINT32_MAX : 0); + return (a < 0) ? 0 : r; +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlud_n_s64(a, n) HEDLEY_STATIC_CAST(uint64_t, vqshlud_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlud_n_s64 + #define vqshlud_n_s64(a, n) simde_vqshlud_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqshlu_n_s8(simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int16x8_private + R_, + A_ = simde_int16x8_to_private(simde_vmovl_s8(a)); + + const v128_t shifted = wasm_i16x8_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + R_.v128 = wasm_i16x8_min(shifted, wasm_i16x8_const_splat(UINT8_MAX)); + R_.v128 = wasm_i16x8_max(R_.v128, wasm_i16x8_const_splat(0)); + + return simde_vmovn_u16(simde_vreinterpretq_u16_s16( simde_int16x8_from_private(R_))); + #else + simde_int8x8_private a_ = simde_int8x8_to_private(a); + simde_uint8x8_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint8_t, a_.values[i])) ? UINT8_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlu_n_s8(a, n) vqshlu_n_s8(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlu_n_s8 + #define vqshlu_n_s8(a, n) simde_vqshlu_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqshlu_n_s16(simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int32x4_private + R_, + A_ = simde_int32x4_to_private(simde_vmovl_s16(a)); + + const v128_t shifted = wasm_i32x4_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + R_.v128 = wasm_i32x4_min(shifted, wasm_i32x4_const_splat(UINT16_MAX)); + R_.v128 = wasm_i32x4_max(R_.v128, wasm_i32x4_const_splat(0)); + + return simde_vmovn_u32(simde_vreinterpretq_u32_s32( simde_int32x4_from_private(R_))); + #else + simde_int16x4_private a_ = simde_int16x4_to_private(a); + simde_uint16x4_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint16_t, a_.values[i])) ? UINT16_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlu_n_s16(a, n) vqshlu_n_s16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlu_n_s16 + #define vqshlu_n_s16(a, n) simde_vqshlu_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqshlu_n_s32(simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + simde_int64x2_private + R_, + A_ = simde_int64x2_to_private(simde_vmovl_s32(a)); + + const v128_t max = wasm_i64x2_const_splat(UINT32_MAX); + + const v128_t shifted = wasm_i64x2_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + R_.v128 = wasm_v128_bitselect(shifted, max, wasm_i64x2_gt(max, shifted)); + R_.v128 = wasm_v128_and(R_.v128, wasm_i64x2_gt(R_.v128, wasm_i64x2_const_splat(0))); + + return simde_vmovn_u64(simde_vreinterpretq_u64_s64( simde_int64x2_from_private(R_))); + #else + simde_int32x2_private a_ = simde_int32x2_to_private(a); + simde_uint32x2_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint32_t, a_.values[i])) ? UINT32_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlu_n_s32(a, n) vqshlu_n_s32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlu_n_s32 + #define vqshlu_n_s32(a, n) simde_vqshlu_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqshlu_n_s64(simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint64x2_private + R_, + A_ = simde_uint64x2_to_private(simde_vreinterpretq_u64_s64(simde_vcombine_s64(a, a))); + + R_.v128 = wasm_i64x2_shl(A_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + const v128_t overflow = wasm_i64x2_ne(A_.v128, wasm_u64x2_shr(R_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + R_.v128 = wasm_v128_or(R_.v128, overflow); + R_.v128 = wasm_v128_andnot(R_.v128, wasm_i64x2_shr(A_.v128, 63)); + + return simde_vget_low_u64(simde_uint64x2_from_private(R_)); + #else + simde_int64x1_private a_ = simde_int64x1_to_private(a); + simde_uint64x1_private r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint64_t, a_.values[i])) ? UINT64_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlu_n_s64(a, n) vqshlu_n_s64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlu_n_s64 + #define vqshlu_n_s64(a, n) simde_vqshlu_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqshluq_n_s8(simde_int8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_int8x16_private a_ = simde_int8x16_to_private(a); + simde_uint8x16_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + const v128_t overflow = wasm_i8x16_ne(a_.v128, wasm_u8x16_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + r_.v128 = wasm_v128_or(r_.v128, overflow); + r_.v128 = wasm_v128_andnot(r_.v128, wasm_i8x16_shr(a_.v128, 7)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint8_t, a_.values[i])) ? UINT8_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshluq_n_s8(a, n) vqshluq_n_s8(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshluq_n_s8 + #define vqshluq_n_s8(a, n) simde_vqshluq_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqshluq_n_s16(simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_uint16x8_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + const v128_t overflow = wasm_i16x8_ne(a_.v128, wasm_u16x8_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + r_.v128 = wasm_v128_or(r_.v128, overflow); + r_.v128 = wasm_v128_andnot(r_.v128, wasm_i16x8_shr(a_.v128, 15)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint16_t, a_.values[i])) ? UINT16_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshluq_n_s16(a, n) vqshluq_n_s16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshluq_n_s16 + #define vqshluq_n_s16(a, n) simde_vqshluq_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqshluq_n_s32(simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_uint32x4_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + const v128_t overflow = wasm_i32x4_ne(a_.v128, wasm_u32x4_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + r_.v128 = wasm_v128_or(r_.v128, overflow); + r_.v128 = wasm_v128_andnot(r_.v128, wasm_i32x4_shr(a_.v128, 31)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint32_t, a_.values[i])) ? UINT32_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshluq_n_s32(a, n) vqshluq_n_s32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshluq_n_s32 + #define vqshluq_n_s32(a, n) simde_vqshluq_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqshluq_n_s64(simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_uint64x2_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shl(a_.v128, HEDLEY_STATIC_CAST(uint32_t, n)); + const v128_t overflow = wasm_i64x2_ne(a_.v128, wasm_u64x2_shr(r_.v128, HEDLEY_STATIC_CAST(uint32_t, n))); + r_.v128 = wasm_v128_or(r_.v128, overflow); + r_.v128 = wasm_v128_andnot(r_.v128, wasm_i64x2_shr(a_.v128, 63)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(r_.values) shifted = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values) << n; + + __typeof__(r_.values) overflow = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (shifted >> n) != HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), a_.values)); + + r_.values = (shifted & ~overflow) | overflow; + + r_.values &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (a_.values >= 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i] << n); + r_.values[i] |= (((r_.values[i] >> n) != HEDLEY_STATIC_CAST(uint64_t, a_.values[i])) ? UINT64_MAX : 0); + r_.values[i] = (a_.values[i] < 0) ? 0 : r_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshluq_n_s64(a, n) vqshluq_n_s64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshluq_n_s64 + #define vqshluq_n_s64(a, n) simde_vqshluq_n_s64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHLU_N_H) */ +/* :: End simde/arm/neon/qshlu_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qshrn_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_QSHRN_N_H) +#define SIMDE_ARM_NEON_QSHRN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrns_n_s32(a, n) vqshrns_n_s32(a, n) +#else + #define simde_vqshrns_n_s32(a, n) simde_vqmovns_s32(simde_x_vshrs_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrns_n_s32 + #define vqshrns_n_s32(a, n) simde_vqshrns_n_s32(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrns_n_u32(a, n) vqshrns_n_u32(a, n) +#else + #define simde_vqshrns_n_u32(a, n) simde_vqmovns_u32(simde_x_vshrs_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrns_n_u32 + #define vqshrns_n_u32(a, n) simde_vqshrns_n_u32(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrnd_n_s64(a, n) vqshrnd_n_s64(a, n) +#else + #define simde_vqshrnd_n_s64(a, n) simde_vqmovnd_s64(simde_vshrd_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrnd_n_s64 + #define vqshrnd_n_s64(a, n) simde_vqshrnd_n_s64(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrnd_n_u64(a, n) vqshrnd_n_u64(a, n) +#else + #define simde_vqshrnd_n_u64(a, n) simde_vqmovnd_u64(simde_vshrd_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrnd_n_u64 + #define vqshrnd_n_u64(a, n) simde_vqshrnd_n_u64(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_s16(a, n) vqshrn_n_s16((a), (n)) +#else + #define simde_vqshrn_n_s16(a, n) simde_vqmovn_s16(simde_vshrq_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_s16 + #define vqshrn_n_s16(a, n) simde_vqshrn_n_s16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_s32(a, n) vqshrn_n_s32((a), (n)) +#else + #define simde_vqshrn_n_s32(a, n) simde_vqmovn_s32(simde_vshrq_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_s32 + #define vqshrn_n_s32(a, n) simde_vqshrn_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_s64(a, n) vqshrn_n_s64((a), (n)) +#else + #define simde_vqshrn_n_s64(a, n) simde_vqmovn_s64(simde_vshrq_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_s64 + #define vqshrn_n_s64(a, n) simde_vqshrn_n_s64((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_u16(a, n) vqshrn_n_u16((a), (n)) +#else + #define simde_vqshrn_n_u16(a, n) simde_vqmovn_u16(simde_vshrq_n_u16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_u16 + #define vqshrn_n_u16(a, n) simde_vqshrn_n_u16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_u32(a, n) vqshrn_n_u32((a), (n)) +#else + #define simde_vqshrn_n_u32(a, n) simde_vqmovn_u32(simde_vshrq_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_u32 + #define vqshrn_n_u32(a, n) simde_vqshrn_n_u32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrn_n_u64(a, n) vqshrn_n_u64((a), (n)) +#else + #define simde_vqshrn_n_u64(a, n) simde_vqmovn_u64(simde_vshrq_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrn_n_u64 + #define vqshrn_n_u64(a, n) simde_vqshrn_n_u64((a), (n)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHRN_N_H) */ +/* :: End simde/arm/neon/qshrn_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qshrun_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_QSHRUN_N_H) +#define SIMDE_ARM_NEON_QSHRUN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshruns_n_s32(a, n) HEDLEY_STATIC_CAST(uint16_t, vqshruns_n_s32((a), (n))) +#else + #define simde_vqshruns_n_s32(a, n) simde_vqmovuns_s32(simde_x_vshrs_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshruns_n_s32 + #define vqshruns_n_s32(a, n) simde_vqshruns_n_s32(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrund_n_s64(a, n) HEDLEY_STATIC_CAST(uint32_t, vqshrund_n_s64((a), (n))) +#else + #define simde_vqshrund_n_s64(a, n) simde_vqmovund_s64(simde_vshrd_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrund_n_s64 + #define vqshrund_n_s64(a, n) simde_vqshrund_n_s64(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrun_n_s16(a, n) vqshrun_n_s16((a), (n)) +#else + #define simde_vqshrun_n_s16(a, n) simde_vqmovun_s16(simde_vshrq_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrun_n_s16 + #define vqshrun_n_s16(a, n) simde_vqshrun_n_s16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrun_n_s32(a, n) vqshrun_n_s32((a), (n)) +#else + #define simde_vqshrun_n_s32(a, n) simde_vqmovun_s32(simde_vshrq_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrun_n_s32 + #define vqshrun_n_s32(a, n) simde_vqshrun_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshrun_n_s64(a, n) vqshrun_n_s64((a), (n)) +#else + #define simde_vqshrun_n_s64(a, n) simde_vqmovun_s64(simde_vshrq_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshrun_n_s64 + #define vqshrun_n_s64(a, n) simde_vqshrun_n_s64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHRUN_N_H) */ +/* :: End simde/arm/neon/qshrun_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qtbl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_QTBL_H) +#define SIMDE_ARM_NEON_QTBL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbl1_u8(simde_uint8x16_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl1_u8(t, idx); + #else + simde_uint8x16_private t_ = simde_uint8x16_to_private(t); + simde_uint8x8_private + r_, + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + __m128i r128 = _mm_shuffle_epi8(t_.m128i, _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15)))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl1_u8 + #define vqtbl1_u8(t, idx) simde_vqtbl1_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbl1_s8(simde_int8x16_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl1_s8(t, idx); + #else + return simde_vreinterpret_s8_u8(simde_vqtbl1_u8(simde_vreinterpretq_u8_s8(t), idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl1_s8 + #define vqtbl1_s8(t, idx) simde_vqtbl1_s8((t), (idx)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbl2_u8(simde_uint8x16x2_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl2_u8(t, idx); + #else + simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; + simde_uint8x8_private + r_, + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl2_u8 + #define vqtbl2_u8(t, idx) simde_vqtbl2_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbl2_s8(simde_int8x16x2_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl2_s8(t, idx); + #else + simde_uint8x16x2_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbl2_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl2_s8 + #define vqtbl2_s8(t, idx) simde_vqtbl2_s8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbl3_u8(simde_uint8x16x3_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl3_u8(t, idx); + #else + simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), + simde_uint8x16_to_private(t.val[2]) }; + simde_uint8x8_private + r_, + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); + __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl3_u8 + #define vqtbl3_u8(t, idx) simde_vqtbl3_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbl3_s8(simde_int8x16x3_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl3_s8(t, idx); + #else + simde_uint8x16x3_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbl3_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl3_s8 + #define vqtbl3_s8(t, idx) simde_vqtbl3_s8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbl4_u8(simde_uint8x16x4_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl4_u8(t, idx); + #else + simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), + simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; + simde_uint8x8_private + r_, + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); + __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); + __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); + __m128i r128_3 = _mm_shuffle_epi8(t_[3].m128i, idx128); + __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl4_u8 + #define vqtbl4_u8(t, idx) simde_vqtbl4_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbl4_s8(simde_int8x16x4_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl4_s8(t, idx); + #else + simde_uint8x16x4_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbl4_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl4_s8 + #define vqtbl4_s8(t, idx) simde_vqtbl4_s8((t), (idx)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbl1q_u8(simde_uint8x16_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl1q_u8(t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(vec_perm(t, t, idx), vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); + #else + simde_uint8x16_private t_ = simde_uint8x16_to_private(t); + simde_uint8x16_private + r_, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m128i = _mm_shuffle_epi8(t_.m128i, _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(15)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_swizzle(t_.v128, idx_.v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl1q_u8 + #define vqtbl1q_u8(t, idx) simde_vqtbl1q_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbl1q_s8(simde_int8x16_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl1q_s8(t, idx); + #else + return simde_vreinterpretq_s8_u8(simde_vqtbl1q_u8(simde_vreinterpretq_u8_s8(t), idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl1q_s8 + #define vqtbl1q_s8(t, idx) simde_vqtbl1q_s8((t), (idx)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbl2q_u8(simde_uint8x16x2_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl2q_u8(t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(vec_perm(t.val[0], t.val[1], idx), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); + #else + simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; + simde_uint8x16_private + r_, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(31))); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + r_.m128i = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl2q_u8 + #define vqtbl2q_u8(t, idx) simde_vqtbl2q_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbl2q_s8(simde_int8x16x2_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl2q_s8(t, idx); + #else + simde_uint8x16x2_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbl2q_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl2q_s8 + #define vqtbl2q_s8(t, idx) simde_vqtbl2q_s8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbl3q_u8(simde_uint8x16x3_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl3q_u8(t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); + return vec_and(vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); + #else + simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), + simde_uint8x16_to_private(t.val[2]) }; + simde_uint8x16_private + r_, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(47))); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); + __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); + r_.m128i = _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx_.m128i, 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), + wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl3q_u8 + #define vqtbl3q_u8(t, idx) simde_vqtbl3q_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbl3q_s8(simde_int8x16x3_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl3q_s8(t, idx); + #else + simde_uint8x16x3_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbl3q_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl3q_s8 + #define vqtbl3q_s8(t, idx) simde_vqtbl3q_s8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbl4q_u8(simde_uint8x16x4_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl4q_u8(t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); + return vec_and(vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); + #else + simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), + simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; + simde_uint8x16_private + r_, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(63))); + __m128i idx_shl3 = _mm_slli_epi32(idx_.m128i, 3); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); + __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); + __m128i r_3 = _mm_shuffle_epi8(t_[3].m128i, idx_.m128i); + __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); + r_.m128i = _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx_.m128i, 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), + wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))), + wasm_i8x16_swizzle(t_[3].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(48))))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : 0; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl4q_u8 + #define vqtbl4q_u8(t, idx) simde_vqtbl4q_u8((t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbl4q_s8(simde_int8x16x4_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbl4q_s8(t, idx); + #else + simde_uint8x16x4_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbl4q_u8(t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbl4q_s8 + #define vqtbl4q_s8(t, idx) simde_vqtbl4q_s8((t), (idx)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QTBL_H) */ +/* :: End simde/arm/neon/qtbl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/qtbx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_QTBX_H) +#define SIMDE_ARM_NEON_QTBX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbx1_u8(simde_uint8x8_t a, simde_uint8x16_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx1_u8(a, t, idx); + #else + simde_uint8x16_private t_ = simde_uint8x16_to_private(t); + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(15))); + __m128i r128 = _mm_shuffle_epi8(t_.m128i, idx128); + r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx1_u8 + #define vqtbx1_u8(a, t, idx) simde_vqtbx1_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbx1_s8(simde_int8x8_t a, simde_int8x16_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx1_s8(a, t, idx); + #else + return simde_vreinterpret_s8_u8(simde_vqtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx1_s8 + #define vqtbx1_s8(a, t, idx) simde_vqtbx1_s8((a), (t), (idx)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbx2_u8(simde_uint8x8_t a, simde_uint8x16x2_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx2_u8(a, t, idx); + #else + simde_uint8x16_private t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }; + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(31))); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); + r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx2_u8 + #define vqtbx2_u8(a, t, idx) simde_vqtbx2_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbx2_s8(simde_int8x8_t a, simde_int8x16x2_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx2_s8(a, t, idx); + #else + simde_uint8x16x2_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbx2_u8(simde_vreinterpret_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx2_s8 + #define vqtbx2_s8(a, t, idx) simde_vqtbx2_s8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbx3_u8(simde_uint8x8_t a, simde_uint8x16x3_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx3_u8(a, t, idx); + #else + simde_uint8x16_private t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }; + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(47))); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, _mm_slli_epi32(idx128, 3)); + __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(idx128, 2)); + r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx3_u8 + #define vqtbx3_u8(a, t, idx) simde_vqtbx3_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbx3_s8(simde_int8x8_t a, simde_int8x16x3_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx3_s8(a, t, idx); + #else + simde_uint8x16x3_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbx3_u8(simde_vreinterpret_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx3_s8 + #define vqtbx3_s8(a, t, idx) simde_vqtbx3_s8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqtbx4_u8(simde_uint8x8_t a, simde_uint8x16x4_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx4_u8(a, t, idx); + #else + simde_uint8x16_private t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }; + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + idx_ = simde_uint8x8_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i idx128 = _mm_set1_epi64(idx_.m64); + idx128 = _mm_or_si128(idx128, _mm_cmpgt_epi8(idx128, _mm_set1_epi8(63))); + __m128i idx128_shl3 = _mm_slli_epi32(idx128, 3); + __m128i r128_0 = _mm_shuffle_epi8(t_[0].m128i, idx128); + __m128i r128_1 = _mm_shuffle_epi8(t_[1].m128i, idx128); + __m128i r128_01 = _mm_blendv_epi8(r128_0, r128_1, idx128_shl3); + __m128i r128_2 = _mm_shuffle_epi8(t_[2].m128i, idx128); + __m128i r128_3 = _mm_shuffle_epi8(t_[3].m128i, idx128); + __m128i r128_23 = _mm_blendv_epi8(r128_2, r128_3, idx128_shl3); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(idx128, 2)); + r128 = _mm_blendv_epi8(r128, _mm_set1_epi64(a_.m64), idx128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx4_u8 + #define vqtbx4_u8(a, t, idx) simde_vqtbx4_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqtbx4_s8(simde_int8x8_t a, simde_int8x16x4_t t, simde_uint8x8_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx4_s8(a, t, idx); + #else + simde_uint8x16x4_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpret_s8_u8(simde_vqtbx4_u8(simde_vreinterpret_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx4_s8 + #define vqtbx4_s8(a, t, idx) simde_vqtbx4_s8((a), (t), (idx)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbx1q_u8(simde_uint8x16_t a, simde_uint8x16_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx1q_u8(a, t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sel(a, + vec_perm(t, t, idx), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 16)))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + t_ = simde_uint8x16_to_private(t), + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(15))); + r_.m128i = _mm_blendv_epi8(_mm_shuffle_epi8(t_.m128i, idx_.m128i), a_.m128i, idx_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_i8x16_swizzle(t_.v128, idx_.v128), + wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(15)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 16) ? t_.values[idx_.values[i]] : a_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx1q_u8 + #define vqtbx1q_u8(a, t, idx) simde_vqtbx1q_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbx1q_s8(simde_int8x16_t a, simde_int8x16_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx1q_s8(a, t, idx); + #else + return simde_vreinterpretq_s8_u8(simde_vqtbx1q_u8(simde_vreinterpretq_u8_s8(a), simde_vreinterpretq_u8_s8(t), idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx1q_s8 + #define vqtbx1q_s8(a, t, idx) simde_vqtbx1q_s8((a), (t), (idx)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbx2q_u8(simde_uint8x16_t a, simde_uint8x16x2_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx2q_u8(a, t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sel(a, vec_perm(t.val[0], t.val[1], idx), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 32)))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + t_[2] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]) }, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(31))); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + __m128i r = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); + r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), + wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(31)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 32) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx2q_u8 + #define vqtbx2q_u8(a, t, idx) simde_vqtbx2q_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbx2q_s8(simde_int8x16_t a, simde_int8x16x2_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx2q_s8(a, t, idx); + #else + simde_uint8x16x2_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbx2q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx2q_s8 + #define vqtbx2q_s8(a, t, idx) simde_vqtbx2q_s8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbx3q_u8(simde_uint8x16_t a, simde_uint8x16x3_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx3q_u8(a, t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_2 = vec_perm(t.val[2], t.val[2], idx); + return vec_sel(a, + vec_sel(r_01, r_2, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 48)))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + t_[3] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]) }, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(47))); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + __m128i r_01 = _mm_blendv_epi8(r_0, r_1, _mm_slli_epi32(idx_.m128i, 3)); + __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); + __m128i r = _mm_blendv_epi8(r_01, r_2, _mm_slli_epi32(idx_.m128i, 2)); + r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), + wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))) , + wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(47))))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 48) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx3q_u8 + #define vqtbx3q_u8(a, t, idx) simde_vqtbx3q_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbx3q_s8(simde_int8x16_t a, simde_int8x16x3_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx3q_s8(a, t, idx); + #else + simde_uint8x16x3_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbx3q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx3q_s8 + #define vqtbx3q_s8(a, t, idx) simde_vqtbx3q_s8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqtbx4q_u8(simde_uint8x16_t a, simde_uint8x16x4_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx4q_u8(a, t, idx); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_01 = vec_perm(t.val[0], t.val[1], idx); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r_23 = vec_perm(t.val[2], t.val[3], idx); + return vec_sel(a, + vec_sel(r_01, r_23, vec_cmpgt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 31)))), + vec_cmplt(idx, vec_splats(HEDLEY_STATIC_CAST(unsigned char, 64)))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + t_[4] = { simde_uint8x16_to_private(t.val[0]), simde_uint8x16_to_private(t.val[1]), simde_uint8x16_to_private(t.val[2]), simde_uint8x16_to_private(t.val[3]) }, + idx_ = simde_uint8x16_to_private(idx); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + idx_.m128i = _mm_or_si128(idx_.m128i, _mm_cmpgt_epi8(idx_.m128i, _mm_set1_epi8(63))); + __m128i idx_shl3 = _mm_slli_epi32(idx_.m128i, 3); + __m128i r_0 = _mm_shuffle_epi8(t_[0].m128i, idx_.m128i); + __m128i r_1 = _mm_shuffle_epi8(t_[1].m128i, idx_.m128i); + __m128i r_01 = _mm_blendv_epi8(r_0, r_1, idx_shl3); + __m128i r_2 = _mm_shuffle_epi8(t_[2].m128i, idx_.m128i); + __m128i r_3 = _mm_shuffle_epi8(t_[3].m128i, idx_.m128i); + __m128i r_23 = _mm_blendv_epi8(r_2, r_3, idx_shl3); + __m128i r = _mm_blendv_epi8(r_01, r_23, _mm_slli_epi32(idx_.m128i, 2)); + r_.m128i = _mm_blendv_epi8(r, a_.m128i, idx_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_or(wasm_v128_or(wasm_v128_or(wasm_i8x16_swizzle(t_[0].v128, idx_.v128), + wasm_i8x16_swizzle(t_[1].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(16)))), + wasm_v128_or(wasm_i8x16_swizzle(t_[2].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(32))), + wasm_i8x16_swizzle(t_[3].v128, wasm_i8x16_sub(idx_.v128, wasm_i8x16_splat(48))))), + wasm_v128_and(a_.v128, wasm_u8x16_gt(idx_.v128, wasm_i8x16_splat(63)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (idx_.values[i] < 64) ? t_[idx_.values[i] / 16].values[idx_.values[i] & 15] : a_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx4q_u8 + #define vqtbx4q_u8(a, t, idx) simde_vqtbx4q_u8((a), (t), (idx)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqtbx4q_s8(simde_int8x16_t a, simde_int8x16x4_t t, simde_uint8x16_t idx) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqtbx4q_s8(a, t, idx); + #else + simde_uint8x16x4_t t_; + simde_memcpy(&t_, &t, sizeof(t_)); + return simde_vreinterpretq_s8_u8(simde_vqtbx4q_u8(simde_vreinterpretq_u8_s8(a), t_, idx)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqtbx4q_s8 + #define vqtbx4q_s8(a, t, idx) simde_vqtbx4q_s8((a), (t), (idx)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QTBX_H) */ +/* :: End simde/arm/neon/qtbx.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rbit.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +/* The GFNI implementation is based on Wojciech Muła's work at + * http://0x80.pl/articles/avx512-galois-field-for-bit-shuffling.html#bit-shuffling via + * https://github.com/InstLatx64/InstLatX64_Demo/blob/49c27effdfd5a45f27e0ccb6e2f3be5f27c3845d/GFNI_Demo.h#L173 */ + +#if !defined(SIMDE_ARM_NEON_RBIT_H) +#define SIMDE_ARM_NEON_RBIT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrbit_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrbit_u8(a); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_X86_GFNI_NATIVE) + __m128i tmp = _mm_movpi64_epi64(a_.m64); + tmp = _mm_gf2p8affine_epi64_epi8(tmp, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); + r_.m64 = _mm_movepi64_pi64(tmp); + #elif defined(SIMDE_X86_MMX_NATIVE) + __m64 mask; + mask = _mm_set1_pi8(0x55); + a_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 1)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 1))); + mask = _mm_set1_pi8(0x33); + a_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 2)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 2))); + mask = _mm_set1_pi8(0x0F); + r_.m64 = _mm_or_si64(_mm_andnot_si64(mask, _mm_slli_pi16(a_.m64, 4)), _mm_and_si64(mask, _mm_srli_pi16(a_.m64, 4))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) + r_.values[i] = __builtin_bitreverse8(a_.values[i]); + #else + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); + #endif + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrbit_u8 + #define vrbit_u8(a) simde_vrbit_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrbit_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrbit_s8(a); + #else + return simde_vreinterpret_s8_u8(simde_vrbit_u8(simde_vreinterpret_u8_s8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrbit_s8 + #define vrbit_s8(a) simde_vrbit_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrbitq_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrbitq_u8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) shift; + shift = vec_splat_u8(1); + a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x55))); + shift = vec_splat_u8(2); + a = vec_sel(vec_sl(a, shift), vec_sr(a, shift), vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x33))); + shift = vec_splat_u8(4); + return vec_or(vec_sl(a, shift), vec_sr(a, shift)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + #if defined(SIMDE_X86_GFNI_NATIVE) + r_.m128i = _mm_gf2p8affine_epi64_epi8(a_.m128i, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i mask; + mask = _mm_set1_epi8(0x55); + a_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 1)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 1))); + mask = _mm_set1_epi8(0x33); + a_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 2)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 2))); + mask = _mm_set1_epi8(0x0F); + r_.m128i = _mm_or_si128(_mm_andnot_si128(mask, _mm_slli_epi16(a_.m128i, 4)), _mm_and_si128(mask, _mm_srli_epi16(a_.m128i, 4))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.v128 = wasm_v128_bitselect(wasm_u8x16_shr(a_.v128, 1), wasm_i8x16_shl(a_.v128, 1), wasm_i8x16_splat(0x55)); + a_.v128 = wasm_v128_bitselect(wasm_u8x16_shr(a_.v128, 2), wasm_i8x16_shl(a_.v128, 2), wasm_i8x16_splat(0x33)); + r_.v128 = wasm_v128_or(wasm_u8x16_shr(a_.v128, 4), wasm_i8x16_shl(a_.v128, 4)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) + r_.values[i] = __builtin_bitreverse8(a_.values[i]); + #else + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, (((a_.values[i] * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101)) >> 32); + #endif + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrbitq_u8 + #define vrbitq_u8(a) simde_vrbitq_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrbitq_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrbitq_s8(a); + #else + return simde_vreinterpretq_s8_u8(simde_vrbitq_u8(simde_vreinterpretq_u8_s8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrbitq_s8 + #define vrbitq_s8(a) simde_vrbitq_s8(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RBIT_H) */ +/* :: End simde/arm/neon/rbit.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/recpe.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_RECPE_H) +#define SIMDE_ARM_NEON_RECPE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vrecpes_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpes_f32(a); + #else + return SIMDE_FLOAT32_C(1.0) / a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpes_f32 + #define vrecpes_f32(a) simde_vrecpes_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vrecped_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecped_f64(a); + #else + return SIMDE_FLOAT64_C(1.0) / a; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecped_f64 + #define vrecped_f64(a) simde_vrecped_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrecpe_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecpe_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + #if defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.values[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.values[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.values[i] = simde_vrecpes_f32(a_.values[i]); + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecpe_f32 + #define vrecpe_f32(a) simde_vrecpe_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrecpe_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpe_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = 1.0 / a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrecped_f64(a_.values[i]); + } + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpe_f64 + #define vrecpe_f64(a) simde_vrecpe_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrecpeq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpeq_f64(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = 1.0 / a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrecped_f64(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpeq_f64 + #define vrecpeq_f64(a) simde_vrecpeq_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrecpeq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecpeq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_re(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_rcp_ps(a_.m128); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.values[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.values[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.values[i] = simde_vrecpes_f32(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecpeq_f32 + #define vrecpeq_f32(a) simde_vrecpeq_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrecpe_u32(simde_uint32x2_t a){ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecpe_u32(a); + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + r_; + + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + if(a_.values[i] <= 0x7FFFFFFF){ + r_.values[i] = UINT32_MAX; + } else { + uint32_t a_temp = (a_.values[i] >> 23) & 511; + a_temp = a_temp * 2 + 1; + uint32_t b = (1 << 19) / a_temp; + r_.values[i] = (b+1) / 2; + r_.values[i] = r_.values[i] << 23; + } + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecpe_u32 + #define vrecpe_u32(a) simde_vrecpe_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrecpeq_u32(simde_uint32x4_t a){ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecpeq_u32(a); + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + r_; + + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + if(a_.values[i] <= 0x7FFFFFFF){ + r_.values[i] = UINT32_MAX; + } else { + uint32_t a_temp = (a_.values[i] >> 23) & 511; + a_temp = a_temp * 2 + 1; + uint32_t b = (1 << 19) / a_temp; + r_.values[i] = (b+1) / 2; + r_.values[i] = r_.values[i] << 23; + } + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecpeq_u32 + #define vrecpeq_u32(a) simde_vrecpeq_u32((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP +#endif /* !defined(SIMDE_ARM_NEON_RECPE_H) */ +/* :: End simde/arm/neon/recpe.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/recps.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_RECPS_H) +#define SIMDE_ARM_NEON_RECPS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vrecpss_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpss_f32(a, b); + #else + return SIMDE_FLOAT32_C(2.0) - (a * b); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpss_f32 + #define vrecpss_f32(a, b) simde_vrecpss_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vrecpsd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpsd_f64(a, b); + #else + return SIMDE_FLOAT64_C(2.0) - (a * b); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpsd_f64 + #define vrecpsd_f64(a, b) simde_vrecpsd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrecps_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecps_f64(a, b); + #else + return simde_vmls_f64(simde_vdup_n_f64(SIMDE_FLOAT64_C(2.0)), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecps_f64 + #define vrecps_f64(a, b) simde_vrecps_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrecps_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecps_f32(a, b); + #else + return simde_vmls_f32(simde_vdup_n_f32(SIMDE_FLOAT32_C(2.0)), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecps_f32 + #define vrecps_f32(a, b) simde_vrecps_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrecpsq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrecpsq_f64(a, b); + #else + return simde_vmlsq_f64(simde_vdupq_n_f64(SIMDE_FLOAT64_C(2.0)), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrecpsq_f64 + #define vrecpsq_f64(a, b) simde_vrecpsq_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrecpsq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrecpsq_f32(a, b); + #else + return simde_vmlsq_f32(simde_vdupq_n_f32(SIMDE_FLOAT32_C(2.0)), a, b); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrecpsq_f32 + #define vrecpsq_f32(a, b) simde_vrecpsq_f32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP +#endif /* !defined(SIMDE_ARM_NEON_RECPS_H) */ +/* :: End simde/arm/neon/recps.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rev16.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_REV16_H) +#define SIMDE_ARM_NEON_REV16_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrev16_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev16_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(6, 7, 4, 5, 2, 3, 0, 1)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev16_s8 + #define vrev16_s8(a) simde_vrev16_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrev16_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev16_u8(a); + #else + return simde_vreinterpret_u8_s8(simde_vrev16_s8(simde_vreinterpret_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev16_u8 + #define vrev16_u8(a) simde_vrev16_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrev16q_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev16q_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), a))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_reve(a)))); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev16q_s8 + #define vrev16q_s8(a) simde_vrev16q_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrev16q_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev16q_u8(a); + #else + return simde_vreinterpretq_u8_s8(simde_vrev16q_s8(simde_vreinterpretq_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev16q_u8 + #define vrev16q_u8(a) simde_vrev16q_u8(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_REV16_H) */ +/* :: End simde/arm/neon/rev16.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rev32.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_REV32_H) +#define SIMDE_ARM_NEON_REV32_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrev32_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(4, 5, 6, 7, 0, 1, 2, 3)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 3]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32_s8 + #define vrev32_s8(a) simde_vrev32_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrev32_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32_s16(a); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi16(a_.m64, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 1, 0, 3, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32_s16 + #define vrev32_s16(a) simde_vrev32_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrev32_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32_u8(a); + #else + return simde_vreinterpret_u8_s8(simde_vrev32_s8(simde_vreinterpret_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32_u8 + #define vrev32_u8(a) simde_vrev32_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrev32_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32_u16(a); + #else + return simde_vreinterpret_u16_s16(simde_vrev32_s16(simde_vreinterpret_s16_u16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32_u16 + #define vrev32_u16(a) simde_vrev32_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrev32q_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32q_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), a))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, + 4, 5, 6, 7, 0, 1, 2, 3)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 3]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32q_s8 + #define vrev32q_s8(a) simde_vrev32q_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vrev32q_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32q_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_reve(a)))); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, + 5, 4, 7, 6, 1, 0, 3, 2)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_shufflehi_epi16(_mm_shufflelo_epi16(a_.m128i, + (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)), + (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 1, 0, 3, 2, 5, 4, 7, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32q_s16 + #define vrev32q_s16(a) simde_vrev32q_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrev32q_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32q_u8(a); + #else + return simde_vreinterpretq_u8_s8(simde_vrev32q_s8(simde_vreinterpretq_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32q_u8 + #define vrev32q_u8(a) simde_vrev32q_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vrev32q_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev32q_u16(a); + #else + return simde_vreinterpretq_u16_s16(simde_vrev32q_s16(simde_vreinterpretq_s16_u16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev32q_u16 + #define vrev32q_u16(a) simde_vrev32q_u16(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_REV32_H) */ +/* :: End simde/arm/neon/rev32.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rev64.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +/* N.B. CM: vrev64_f16 and vrev64q_f16 are omitted as + * SIMDe has no 16-bit floating point support. */ + +#if !defined(SIMDE_ARM_NEON_REV64_H) +#define SIMDE_ARM_NEON_REV64_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrev64_s8(simde_int8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_s8(a); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_set_pi8(0, 1, 2, 3, 4, 5, 6, 7)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 7]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_s8 + #define vrev64_s8(a) simde_vrev64_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrev64_s16(simde_int16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_s16(a); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi16(a_.m64, (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, a_.values, 3, 2, 1, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 3]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_s16 + #define vrev64_s16(a) simde_vrev64_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vrev64_s32(simde_int32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_s32(a); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi16(a_.m64, (1 << 6) | (0 << 4) | (3 << 2) | (2 << 0)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, a_.values, 1, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_s32 + #define vrev64_s32(a) simde_vrev64_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrev64_u8(simde_uint8x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_u8(a); + #else + return simde_vreinterpret_u8_s8(simde_vrev64_s8(simde_vreinterpret_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_u8 + #define vrev64_u8(a) simde_vrev64_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrev64_u16(simde_uint16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_u16(a); + #else + return simde_vreinterpret_u16_s16(simde_vrev64_s16(simde_vreinterpret_s16_u16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_u16 + #define vrev64_u16(a) simde_vrev64_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrev64_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_u32(a); + #else + return simde_vreinterpret_u32_s32(simde_vrev64_s32(simde_vreinterpret_s32_u32(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_u32 + #define vrev64_u32(a) simde_vrev64_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrev64_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64_f32(a); + #else + return simde_vreinterpret_f32_s32(simde_vrev64_s32(simde_vreinterpret_s32_f32(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_f32 + #define vrev64_f32(a) simde_vrev64_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrev64q_s8(simde_int8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_revb(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, + 0, 1, 2, 3, 4, 5, 6, 7)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, a_.values, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 7]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_s8 + #define vrev64q_s8(a) simde_vrev64q_s8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vrev64q_s16(simde_int16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_shuffle_epi8(a_.m128i, _mm_set_epi8(9, 8, 11, 10, 13, 12, 15, 14, + 1, 0, 3, 2, 5, 4, 7, 6)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_shufflehi_epi16(_mm_shufflelo_epi16(a_.m128i, + (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)), + (0 << 6) | (1 << 4) | (2 << 2) | (3 << 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 6, 7, 4, 5, 2, 3, 0, 1, 14, 15, 12, 13, 10, 11, 8, 9); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 3, 2, 1, 0, 7, 6, 5, 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 3]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_s16 + #define vrev64q_s16(a) simde_vrev64q_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vrev64q_s32(simde_int32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), + vec_reve(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_reve(a)))); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_shuffle_epi32(a_.m128i, (2 << 6) | (3 << 4) | (0 << 2) | (1 << 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, a_.v128, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, a_.values, 1, 0, 3, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i ^ 1]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_s32 + #define vrev64q_s32(a) simde_vrev64q_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrev64q_u8(simde_uint8x16_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_u8(a); + #else + return simde_vreinterpretq_u8_s8(simde_vrev64q_s8(simde_vreinterpretq_s8_u8(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_u8 + #define vrev64q_u8(a) simde_vrev64q_u8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vrev64q_u16(simde_uint16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_u16(a); + #else + return simde_vreinterpretq_u16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_u16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_u16 + #define vrev64q_u16(a) simde_vrev64q_u16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrev64q_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_u32(a); + #else + return simde_vreinterpretq_u32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_u32(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_u32 + #define vrev64q_u32(a) simde_vrev64q_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrev64q_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrev64q_f32(a); + #else + return simde_vreinterpretq_f32_s32(simde_vrev64q_s32(simde_vreinterpretq_s32_f32(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_f32 + #define vrev64q_f32(a) simde_vrev64q_f32(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_REV64_H) */ +/* :: End simde/arm/neon/rev64.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rhadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +/* Formula to average two unsigned integers without overflow is from Hacker's Delight (ISBN 978-0-321-84268-8). + * https://web.archive.org/web/20180831033349/http://hackersdelight.org/basics2.pdf#G525596 + * avg_u = (x | y) - ((x ^ y) >> 1); + * + * Formula to average two signed integers (without widening): + * avg_s = (x >> 1) + (y >> 1) + ((x | y) & 1); // use arithmetic shifts + * + * If hardware has avg_u but not avg_s then rebase input to be unsigned. + * For example: s8 (-128..127) can be converted to u8 (0..255) by adding +128. + * Idea borrowed from Intel's ARM_NEON_2_x86_SSE project. + * https://github.com/intel/ARM_NEON_2_x86_SSE/blob/3c9879bf2dbef3274e0ed20f93cb8da3a2115ba1/NEON_2_SSE.h#L3171 + * avg_s8 = avg_u8(a ^ 0x80, b ^ 0x80) ^ 0x80; + */ + +#if !defined(SIMDE_ARM_NEON_RHADD_H) +#define SIMDE_ARM_NEON_RHADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrhadd_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_s8 + #define vrhadd_s8(a, b) simde_vrhadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrhadd_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi16(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), + _mm_add_pi16(_m_psrawi(a_.m64, 1), _m_psrawi(b_.m64, 1))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_s16 + #define vrhadd_s16(a, b) simde_vrhadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vrhadd_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi32(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), + _mm_add_pi32(_m_psradi(a_.m64, 1), _m_psradi(b_.m64, 1))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_s32 + #define vrhadd_s32(a, b) simde_vrhadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrhadd_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100762) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint8_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint8_t, 1))); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_u8 + #define vrhadd_u8(a, b) simde_vrhadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrhadd_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi16(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi16(HEDLEY_STATIC_CAST(int16_t, 1))), + _mm_add_pi16(_mm_srli_pi16(a_.m64, 1), _mm_srli_pi16(b_.m64, 1))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint16_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint16_t, 1))); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_u16 + #define vrhadd_u16(a, b) simde_vrhadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrhadd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhadd_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_add_pi32(_m_pand(_m_por(a_.m64, b_.m64), _mm_set1_pi32(HEDLEY_STATIC_CAST(int32_t, 1))), + _mm_add_pi32(_mm_srli_pi32(a_.m64, 1), _mm_srli_pi32(b_.m64, 1))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_100760) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(uint32_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(uint32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(uint32_t, 1))); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhadd_u32 + #define vrhadd_u32(a, b) simde_vrhadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrhaddq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + const __m128i msb = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, -128)); /* 0x80 */ + r_.m128i = _mm_xor_si128(_mm_avg_epu8(_mm_xor_si128(a_.m128i, msb), _mm_xor_si128(b_.m128i, msb)), msb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t msb = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, -128)); /* 0x80 */ + r_.v128 = wasm_v128_xor(wasm_u8x16_avgr(wasm_v128_xor(a_.v128, msb), wasm_v128_xor(b_.v128, msb)), msb); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int8_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int8_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int8_t, 1))); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_s8 + #define vrhaddq_s8(a, b) simde_vrhaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vrhaddq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + const __m128i msb = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, -32768)); /* 0x8000 */ + r_.m128i = _mm_xor_si128(_mm_avg_epu16(_mm_xor_si128(a_.m128i, msb), _mm_xor_si128(b_.m128i, msb)), msb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t msb = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, -32768)); /* 0x8000 */ + r_.v128 = wasm_v128_xor(wasm_u16x8_avgr(wasm_v128_xor(a_.v128, msb), wasm_v128_xor(b_.v128, msb)), msb); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int16_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int16_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int16_t, 1))); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_s16 + #define vrhaddq_s16(a, b) simde_vrhaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vrhaddq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi32(_mm_and_si128(_mm_or_si128(a_.m128i, b_.m128i), _mm_set1_epi32(1)), + _mm_add_epi32(_mm_srai_epi32(a_.m128i, 1), _mm_srai_epi32(b_.m128i, 1))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_add(wasm_v128_and(wasm_v128_or(a_.v128, b_.v128), wasm_i32x4_splat(1)), + wasm_i32x4_add(wasm_i32x4_shr(a_.v128, 1), wasm_i32x4_shr(b_.v128, 1))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (((a_.values >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values | b_.values) & HEDLEY_STATIC_CAST(int32_t, 1))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (((a_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1)) + (b_.values[i] >> HEDLEY_STATIC_CAST(int32_t, 1))) + ((a_.values[i] | b_.values[i]) & HEDLEY_STATIC_CAST(int32_t, 1))); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_s32 + #define vrhaddq_s32(a, b) simde_vrhaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrhaddq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_avg_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_avgr(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint8_t, 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint8_t, 1)); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_u8 + #define vrhaddq_u8(a, b) simde_vrhaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vrhaddq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_avg_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_avgr(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint16_t, 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint16_t, 1)); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_u16 + #define vrhaddq_u16(a, b) simde_vrhaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrhaddq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrhaddq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi32(_mm_or_si128(a_.m128i, b_.m128i), _mm_srli_epi32(_mm_xor_si128(a_.m128i, b_.m128i), 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_sub(wasm_v128_or(a_.v128, b_.v128), wasm_u32x4_shr(wasm_v128_xor(a_.v128, b_.v128), 1)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = (a_.values | b_.values) - ((a_.values ^ b_.values) >> HEDLEY_STATIC_CAST(uint32_t, 1)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (a_.values[i] | b_.values[i]) - ((a_.values[i] ^ b_.values[i]) >> HEDLEY_STATIC_CAST(uint32_t, 1)); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrhaddq_u32 + #define vrhaddq_u32(a, b) simde_vrhaddq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RHADD_H) */ +/* :: End simde/arm/neon/rhadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rnd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RND_H) +#define SIMDE_ARM_NEON_RND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrnd_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrnd_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_truncf(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrnd_f32 + #define vrnd_f32(a) simde_vrnd_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrnd_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrnd_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_trunc(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrnd_f64 + #define vrnd_f64(a) simde_vrnd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_trunc(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_ZERO); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_trunc_ps(a_.m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_truncf(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndq_f32 + #define vrndq_f32(a) simde_vrndq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndq_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_trunc(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_ZERO); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128d = _mm_trunc_pd(a_.m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_trunc(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndq_f64 + #define vrndq_f64(a) simde_vrndq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RND_H) */ +/* :: End simde/arm/neon/rnd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rndm.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RNDM_H) +#define SIMDE_ARM_NEON_RNDM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrndm_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndm_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_floorf(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndm_f32 + #define vrndm_f32(a) simde_vrndm_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrndm_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndm_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_floor(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndm_f64 + #define vrndm_f64(a) simde_vrndm_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndmq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndmq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_floor(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_NEG_INF); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_floor_ps(a_.m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_floorf(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndmq_f32 + #define vrndmq_f32(a) simde_vrndmq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndmq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndmq_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_floor(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_NEG_INF); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128d = _mm_floor_pd(a_.m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_floor(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndmq_f64 + #define vrndmq_f64(a) simde_vrndmq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RNDM_H) */ +/* :: End simde/arm/neon/rndm.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rndi.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RNDI_H) +#define SIMDE_ARM_NEON_RNDI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrndi_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + return vrndi_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_nearbyintf(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndi_f32 + #define vrndi_f32(a) simde_vrndi_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrndi_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + return vrndi_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_nearbyint(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndi_f64 + #define vrndi_f64(a) simde_vrndi_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndiq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + return vrndiq_f32(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_CUR_DIRECTION); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_nearbyintf(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndiq_f32 + #define vrndiq_f32(a) simde_vrndiq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndiq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + return vrndiq_f64(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_CUR_DIRECTION); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_nearbyint(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndiq_f64 + #define vrndiq_f64(a) simde_vrndiq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RNDI_H) */ +/* :: End simde/arm/neon/rndi.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rndn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RNDN_H) +#define SIMDE_ARM_NEON_RNDN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vrndns_f32(simde_float32_t a) { + #if \ + defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ + (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && \ + (!defined(HEDLEY_GCC_VERSION) || (defined(SIMDE_ARM_NEON_A64V8_NATIVE) && HEDLEY_GCC_VERSION_CHECK(8,0,0))) + return vrndns_f32(a); + #else + return simde_math_roundevenf(a); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vrndns_f32 + #define vrndns_f32(a) simde_vrndns_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrndn_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndn_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrndns_f32(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vrndn_f32 + #define vrndn_f32(a) simde_vrndn_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrndn_f64(simde_float64x1_t a) { + #if \ + defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndn_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_roundeven(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vrndn_f64 + #define vrndn_f64(a) simde_vrndn_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndnq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndnq_f32(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_NEAREST_INT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrndns_f32(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vrndnq_f32 + #define vrndnq_f32(a) simde_vrndnq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndnq_f64(simde_float64x2_t a) { + #if \ + defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndnq_f64(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_NEAREST_INT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_roundeven(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vrndnq_f64 + #define vrndnq_f64(a) simde_vrndnq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RNDN_H) */ +/* :: End simde/arm/neon/rndn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rndp.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RNDP_H) +#define SIMDE_ARM_NEON_RNDP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrndp_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndp_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_ceilf(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndp_f32 + #define vrndp_f32(a) simde_vrndp_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrndp_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndp_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_ceil(a_.values[i]); + } + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndp_f64 + #define vrndp_f64(a) simde_vrndp_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrndpq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vrndpq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_ceil(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128 = _mm_round_ps(a_.m128, _MM_FROUND_TO_POS_INF); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_ceil_ps(a_.m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_ceilf(a_.values[i]); + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrndpq_f32 + #define vrndpq_f32(a) simde_vrndpq_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrndpq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrndpq_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_ceil(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128d = _mm_round_pd(a_.m128d, _MM_FROUND_TO_POS_INF); + #elif defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + r_.m128d = _mm_ceil_pd(a_.m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_ceil(a_.values[i]); + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrndpq_f64 + #define vrndpq_f64(a) simde_vrndpq_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RNDP_H) */ +/* :: End simde/arm/neon/rndp.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rshl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_RSHL_H) +#define SIMDE_ARM_NEON_RSHL_H +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Notes from the implementer (Christopher Moore aka rosbif) + * + * I have tried to exactly reproduce the documented behaviour of the + * ARM NEON rshl and rshlq intrinsics. + * This is complicated for the following reasons:- + * + * a) Negative shift counts shift right. + * + * b) Only the low byte of the shift count is used but the shift count + * is not limited to 8-bit values (-128 to 127). + * + * c) Overflow must be avoided when rounding, together with sign change + * warning/errors in the C versions. + * + * d) Intel SIMD is not nearly as complete as NEON and AltiVec. + * There were no intrisics with a vector shift count before AVX2 which + * only has 32 and 64-bit logical ones and only a 32-bit arithmetic + * one. The others need AVX512. There are no 8-bit shift intrinsics at + * all, even with a scalar shift count. It is surprising to use AVX2 + * and even AVX512 to implement a 64-bit vector operation. + * + * e) Many shift implementations, and the C standard, do not treat a + * shift count >= the object's size in bits as one would expect. + * (Personally I feel that > is silly but == can be useful.) + * + * Note that even the C17/18 standard does not define the behaviour of + * a right shift of a negative value. + * However Evan and I agree that all compilers likely to be used + * implement this as an arithmetic right shift with sign extension. + * If this is not the case it could be replaced by a logical right shift + * if negative values are complemented before and after the shift. + * + * Some of the SIMD translations may be slower than the portable code, + * particularly those for vectors with only one or two elements. + * But I had fun writing them ;-) + * + */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vrshld_s64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrshld_s64(a, b); + #else + b = HEDLEY_STATIC_CAST(int8_t, b); + return + (simde_math_llabs(b) >= 64) + ? 0 + : (b >= 0) + ? (a << b) + : ((a + (INT64_C(1) << (-b - 1))) >> -b); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshld_s64 + #define vrshld_s64(a, b) simde_vrshld_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vrshld_u64(uint64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrshld_u64(a, HEDLEY_STATIC_CAST(int64_t, b)); + #else + b = HEDLEY_STATIC_CAST(int8_t, b); + return + (b >= 64) ? 0 : + (b >= 0) ? (a << b) : + (b >= -64) ? (((b == -64) ? 0 : (a >> -b)) + ((a >> (-b - 1)) & 1)) : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshld_u64 + #define vrshld_u64(a, b) simde_vrshld_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi16(zero, zero); + __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); + __m128i a128_shr = _mm_srav_epi16(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), + _mm_srai_epi16(_mm_sub_epi16(a128_shr, ff), 1), + _mm_cmpgt_epi16(zero, b128)); + r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi32(zero, zero); + __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a_.m64)); + __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); + __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), + _mm256_cmpgt_epi32(zero, b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); + r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, + (simde_math_abs(b_.values[i]) >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_s8 + #define vrshl_s8(a, b) simde_vrshl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), + _mm_cmpgt_epi32(zero, b128)); + r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, + (simde_math_abs(b_.values[i]) >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_s16 + #define vrshl_s16(a, b) simde_vrshl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i a128_shr = _mm_srav_epi32(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srai_epi32(_mm_sub_epi32(a128_shr, ff), 1), + _mm_cmpgt_epi32(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, + (simde_math_abs(b_.values[i]) >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_s32 + #define vrshl_s32(a, b) simde_vrshl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vrshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi64(zero, zero); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); + __m128i a128_shr = _mm_srav_epi64(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), + _mm_srai_epi64(_mm_sub_epi64(a128_shr, ff), 1), + _mm_cmpgt_epi64(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ones = _mm_set1_epi64x(1); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + __m128i maska = _mm_cmpgt_epi64(zero, a128); + __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); + __m128i a128_rnd = _mm_and_si128(_mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)), ones); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), + _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b128_abs), maska), a128_rnd), + _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrshld_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_s64 + #define vrshl_s64(a, b) simde_vrshl_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi16(zero, zero); + __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); + __m128i a128_shr = _mm_srlv_epi16(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), + _mm_srli_epi16(_mm_sub_epi16(a128_shr, ff), 1), + _mm_cmpgt_epi16(zero, b128)); + r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi32(zero, zero); + __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a_.m64)); + __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); + __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), + _mm256_cmpgt_epi32(zero, b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); + r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, + (b_.values[i] >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_u8 + #define vrshl_u8(a, b) simde_vrshl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), + _mm_cmpgt_epi32(zero, b128)); + r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, + (b_.values[i] >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_u16 + #define vrshl_u16(a, b) simde_vrshl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i a128_shr = _mm_srlv_epi32(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srli_epi32(_mm_sub_epi32(a128_shr, ff), 1), + _mm_cmpgt_epi32(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = + (b_.values[i] >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_u32 + #define vrshl_u32(a, b) simde_vrshl_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vrshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshl_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi64(zero, zero); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); + __m128i a128_shr = _mm_srlv_epi64(a128, _mm_xor_si128(b128, ff)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), + _mm_srli_epi64(_mm_sub_epi64(a128_shr, ff), 1), + _mm_cmpgt_epi64(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + const __m128i ones = _mm_set1_epi64x(1); + const __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + __m128i b128_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); + __m128i a128_shr = _mm_srlv_epi64(a128, _mm_sub_epi64(b128_abs, ones)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128_abs), + _mm_srli_epi64(_mm_add_epi64(a128_shr, ones), 1), + _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrshld_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); +#endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshl_u64 + #define vrshl_u64(a, b) simde_vrshl_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; + + b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); + a_shr = vec_sra(a, vec_sub(b_abs, ones)); + return vec_and(vec_sel(vec_sl(a, b_abs), + vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), ones))), + vec_cmplt(b, zero)), + vec_cmplt(b_abs, max)); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi16(zero, zero); + __m256i a256 = _mm256_cvtepi8_epi16(a_.m128i); + __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); + __m256i a256_shr = _mm256_srav_epi16(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), + _mm256_srai_epi16(_mm256_sub_epi16(a256_shr, ff), 1), + _mm256_cmpgt_epi16(zero, b256)); + r_.m128i = _mm256_cvtepi16_epi8(r256); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, + (simde_math_abs(b_.values[i]) >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_s8 + #define vrshlq_s8(a, b) simde_vrshlq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + a_shr = vec_sra(a, vec_sub(b_abs, ones)); + return vec_and(vec_sel(vec_sl(a, b_abs), + vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), ones))), + vec_cmplt(vec_sl(b, shift), zero)), + vec_cmplt(b_abs, max)); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi16(zero, zero); + __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); + __m128i a_shr = _mm_srav_epi16(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), + _mm_srai_epi16(_mm_sub_epi16(a_shr, ff), 1), + _mm_cmpgt_epi16(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi32(zero, zero); + __m256i a256 = _mm256_cvtepi16_epi32(a_.m128i); + __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); + b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); + __m256i a256_shr = _mm256_srav_epi32(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srai_epi32(_mm256_sub_epi32(a256_shr, ff), 1), + _mm256_cmpgt_epi32(zero, b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); + r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, + (simde_math_abs(b_.values[i]) >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_s16 + #define vrshlq_s16(a, b) simde_vrshlq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + a_shr = vec_sra(a, vec_sub(b_abs, ones)); + return vec_and(vec_sel(vec_sl(a, b_abs), + vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), ones))), + vec_cmplt(vec_sl(b, shift), zero)), + vec_cmplt(b_abs, max)); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); + __m128i a_shr = _mm_srav_epi32(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), + _mm_srai_epi32(_mm_sub_epi32(a_shr, ff), 1), + _mm_cmpgt_epi32(zero, B)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, + (simde_math_abs(b_.values[i]) >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i])); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_s32 + #define vrshlq_s32(a, b) simde_vrshlq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vrshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + a_shr = vec_sra(a, vec_sub(b_abs, ones)); + + HEDLEY_DIAGNOSTIC_PUSH + #if defined(SIMDE_BUG_CLANG_46770) + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + return vec_and(vec_sel(vec_sl(a, b_abs), + vec_add(vec_sra(a_shr, ones), vec_and(a_shr, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), ones))), + vec_cmplt(vec_sl(b, shift), zero)), + vec_cmplt(b_abs, max)); + HEDLEY_DIAGNOSTIC_POP + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); + __m128i a_shr = _mm_srav_epi64(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), + _mm_srai_epi64(_mm_sub_epi64(a_shr, ff), 1), + _mm_cmpgt_epi64(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ones = _mm_set1_epi64x(1); + __m128i maska = _mm_cmpgt_epi64(zero, a_.m128i); + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); + __m128i a_rnd = _mm_and_si128(_mm_srlv_epi64(a_.m128i, _mm_sub_epi64(b_abs, ones)), ones); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), + _mm_add_epi64(_mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a_.m128i, maska), b_abs), maska), a_rnd), + _mm_cmpgt_epi64(zero, _mm_slli_epi64(b_.m128i, 56))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrshld_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_s64 + #define vrshlq_s64(a, b) simde_vrshlq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vrshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed char) zero = vec_splats(HEDLEY_STATIC_CAST( signed char, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) max = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 8)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_abs_dec, a_shr; + + b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); + b_abs_dec = vec_sub(b_abs, ones); + a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); + return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), + vec_sr(vec_add(a_shr, ones), ones), + vec_cmplt(b, zero)); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi32(zero, zero); + __m256i a256 = _mm256_cvtepu8_epi16(a_.m128i); + __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); + __m256i a256_shr = _mm256_srlv_epi16(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), + _mm256_srli_epi16(_mm256_sub_epi16(a256_shr, ff), 1), + _mm256_cmpgt_epi16(zero, b256)); + r_.m128i = _mm256_cvtepi16_epi8(r256); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, + (b_.values[i] >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -8) ? (((b_.values[i] == -8) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_u8 + #define vrshlq_u8(a, b) simde_vrshlq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vrshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed short) zero = vec_splats(HEDLEY_STATIC_CAST( signed short, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) max = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_abs_dec, a_shr; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + b_abs_dec = vec_sub(b_abs, ones); + a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); + return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), + vec_sr(vec_add(a_shr, ones), ones), + vec_cmplt(vec_sl(b, shift), zero)); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi16(zero, zero); + __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); + __m128i a_shr = _mm_srlv_epi16(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), + _mm_srli_epi16(_mm_sub_epi16(a_shr, ff), 1), + _mm_cmpgt_epi16(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) + const __m256i zero = _mm256_setzero_si256(); + const __m256i ff = _mm256_cmpeq_epi32(zero, zero); + __m256i a256 = _mm256_cvtepu16_epi32(a_.m128i); + __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); + b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); + __m256i a256_shr = _mm256_srlv_epi32(a256, _mm256_xor_si256(b256, ff)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srli_epi32(_mm256_sub_epi32(a256_shr, ff), 1), + _mm256_cmpgt_epi32(zero, b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); + r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, + (b_.values[i] >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -16) ? (((b_.values[i] == -16) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_u16 + #define vrshlq_u16(a, b) simde_vrshlq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed int) zero = vec_splats(HEDLEY_STATIC_CAST( signed int, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_abs_dec, a_shr; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + b_abs_dec = vec_sub(b_abs, ones); + a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); + return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), + vec_sr(vec_add(a_shr, ones), ones), + vec_cmplt(vec_sl(b, shift), zero)); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi32(zero, zero); + __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); + __m128i a_shr = _mm_srlv_epi32(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), + _mm_srli_epi32(_mm_sub_epi32(a_shr, ff), 1), + _mm_cmpgt_epi32(zero, B)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = + (b_.values[i] >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (b_.values[i] >= -32) ? (((b_.values[i] == -32) ? 0 : (a_.values[i] >> -b_.values[i])) + ((a_.values[i] >> (-b_.values[i] - 1)) & 1)) : + 0; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_u32 + #define vrshlq_u32(a, b) simde_vrshlq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vrshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrshlq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR( signed long long) zero = vec_splats(HEDLEY_STATIC_CAST( signed long long, 0)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ones = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 1)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) shift = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - 8)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) ff = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_abs_dec, a_shr; + + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + ff); + b_abs_dec = vec_sub(b_abs, ones); + a_shr = vec_and(vec_sr(a, b_abs_dec), vec_cmplt(b_abs_dec, max)); + HEDLEY_DIAGNOSTIC_PUSH + #if defined(SIMDE_BUG_CLANG_46770) + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + return vec_sel(vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, max)), + vec_sr(vec_add(a_shr, ones), ones), + vec_cmplt(vec_sl(b, shift), zero)); + HEDLEY_DIAGNOSTIC_POP + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i zero = _mm_setzero_si128(); + const __m128i ff = _mm_cmpeq_epi64(zero, zero); + __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); + __m128i a_shr = _mm_srlv_epi64(a_.m128i, _mm_xor_si128(B, ff)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), + _mm_srli_epi64(_mm_sub_epi64(a_shr, ff), 1), + _mm_cmpgt_epi64(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i ones = _mm_set1_epi64x(1); + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); + __m128i a_shr = _mm_srlv_epi64(a_.m128i, _mm_sub_epi64(b_abs, ones)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), + _mm_srli_epi64(_mm_add_epi64(a_shr, ones), 1), + _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b_.m128i, 56))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vrshld_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshlq_u64 + #define vrshlq_u64(a, b) simde_vrshlq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSHL_H) */ +/* :: End simde/arm/neon/rshl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rshrn_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_RSHRN_N_H) +#define SIMDE_ARM_NEON_RSHRN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_s16(a, n) vrshrn_n_s16((a), (n)) +#else + #define simde_vrshrn_n_s16(a, n) simde_vmovn_s16(simde_vrshrq_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_s16 + #define vrshrn_n_s16(a, n) simde_vrshrn_n_s16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_s32(a, n) vrshrn_n_s32((a), (n)) +#else + #define simde_vrshrn_n_s32(a, n) simde_vmovn_s32(simde_vrshrq_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_s32 + #define vrshrn_n_s32(a, n) simde_vrshrn_n_s32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_s64(a, n) vrshrn_n_s64((a), (n)) +#else + #define simde_vrshrn_n_s64(a, n) simde_vmovn_s64(simde_vrshrq_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_s64 + #define vrshrn_n_s64(a, n) simde_vrshrn_n_s64((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_u16(a, n) vrshrn_n_u16((a), (n)) +#else + #define simde_vrshrn_n_u16(a, n) simde_vmovn_u16(simde_vrshrq_n_u16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_u16 + #define vrshrn_n_u16(a, n) simde_vrshrn_n_u16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_u32(a, n) vrshrn_n_u32((a), (n)) +#else + #define simde_vrshrn_n_u32(a, n) simde_vmovn_u32(simde_vrshrq_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_u32 + #define vrshrn_n_u32(a, n) simde_vrshrn_n_u32((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrshrn_n_u64(a, n) vrshrn_n_u64((a), (n)) +#else + #define simde_vrshrn_n_u64(a, n) simde_vmovn_u64(simde_vrshrq_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrshrn_n_u64 + #define vrshrn_n_u64(a, n) simde_vrshrn_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSHRN_N_H) */ +/* :: End simde/arm/neon/rshrn_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rsqrte.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_RSQRTE_H) +#define SIMDE_ARM_NEON_RSQRTE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vrsqrtes_f32(simde_float32_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrtes_f32(a); + #else + #if defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + #if SIMDE_ACCURACY_PREFERENCE <= 0 + return (INT32_C(0x5F37624F) - (a >> 1)); + #else + simde_float32 x = a; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + return x; + #endif + #elif defined(simde_math_sqrtf) + return 1.0f / simde_math_sqrtf(a); + #else + HEDLEY_UNREACHABLE(); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrtes_f32 + #define vrsqrtes_f32(a) simde_vrsqrtes_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vrsqrted_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrted_f64(a); + #else + #if defined(SIMDE_IEEE754_STORAGE) + //https://www.mdpi.com/1099-4300/23/1/86/htm + simde_float64_t x = a; + simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; + int64_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); + simde_memcpy(&x, &ix, sizeof(x)); + x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); + x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); + return x; + #elif defined(simde_math_sqrtf) + return SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); + #else + HEDLEY_UNREACHABLE(); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrted_f64 + #define vrsqrted_f64(a) simde_vrsqrted_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrsqrte_u32(simde_uint32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrte_u32(a); + #else + simde_uint32x2_private + a_ = simde_uint32x2_to_private(a), + r_; + + for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[i])) ; i++) { + if(a_.values[i] < 0x3FFFFFFF) { + r_.values[i] = UINT32_MAX; + } else { + uint32_t a_temp = (a_.values[i] >> 23) & 511; + if(a_temp < 256) { + a_temp = a_temp * 2 + 1; + } else { + a_temp = (a_temp >> 1) << 1; + a_temp = (a_temp + 1) * 2; + } + uint32_t b = 512; + while((a_temp * (b + 1) * (b + 1)) < (1 << 28)) + b = b + 1; + r_.values[i] = (b + 1) / 2; + r_.values[i] = r_.values[i] << 23; + } + } + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrte_u32 + #define vrsqrte_u32(a) simde_vrsqrte_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrsqrte_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrte_f32(a); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a); + + #if defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.values[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.values[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrte_f32 + #define vrsqrte_f32(a) simde_vrsqrte_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrsqrte_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrte_f64(a); + #else + simde_float64x1_private + r_, + a_ = simde_float64x1_to_private(a); + + #if defined(SIMDE_IEEE754_STORAGE) + //https://www.mdpi.com/1099-4300/23/1/86/htm + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_.values)/sizeof(r_.values[0])) ; i++) { + simde_float64_t x = a_.values[i]; + simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; + int64_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); + simde_memcpy(&x, &ix, sizeof(x)); + x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); + x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); + r_.values[i] = x; + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrte_f64 + #define vrsqrte_f64(a) simde_vrsqrte_f64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vrsqrteq_u32(simde_uint32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrteq_u32(a); + #else + simde_uint32x4_private + a_ = simde_uint32x4_to_private(a), + r_; + + for(size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[i])) ; i++) { + if(a_.values[i] < 0x3FFFFFFF) { + r_.values[i] = UINT32_MAX; + } else { + uint32_t a_temp = (a_.values[i] >> 23) & 511; + if(a_temp < 256) { + a_temp = a_temp * 2 + 1; + } else { + a_temp = (a_temp >> 1) << 1; + a_temp = (a_temp + 1) * 2; + } + uint32_t b = 512; + while((a_temp * (b + 1) * (b + 1)) < (1 << 28)) + b = b + 1; + r_.values[i] = (b + 1) / 2; + r_.values[i] = r_.values[i] << 23; + } + } + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrteq_u32 + #define vrsqrteq_u32(a) simde_vrsqrteq_u32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrsqrteq_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrteq_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_rsqrte(a); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a); + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_rsqrt_ps(a_.m128); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.values[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.values[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrteq_f32 + #define vrsqrteq_f32(a) simde_vrsqrteq_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrsqrteq_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrteq_f64(a); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a); + + #if defined(SIMDE_IEEE754_STORAGE) + //https://www.mdpi.com/1099-4300/23/1/86/htm + SIMDE_VECTORIZE + for(size_t i = 0 ; i < (sizeof(r_.values)/sizeof(r_.values[0])) ; i++) { + simde_float64_t x = a_.values[i]; + simde_float64_t xhalf = SIMDE_FLOAT64_C(0.5) * x; + int64_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + ix = INT64_C(0x5FE6ED2102DCBFDA) - (ix >> 1); + simde_memcpy(&x, &ix, sizeof(x)); + x = x * (SIMDE_FLOAT64_C(1.50087895511633457) - xhalf * x * x); + x = x * (SIMDE_FLOAT64_C(1.50000057967625766) - xhalf * x * x); + r_.values[i] = x; + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = SIMDE_FLOAT64_C(1.0) / simde_math_sqrt(a_.values[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrteq_f64 + #define vrsqrteq_f64(a) simde_vrsqrteq_f64((a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP +#endif /* !defined(SIMDE_ARM_NEON_RSQRTE_H) */ +/* :: End simde/arm/neon/rsqrte.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rsqrts.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_RSQRTS_H) +#define SIMDE_ARM_NEON_RSQRTS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vrsqrtss_f32(simde_float32_t a, simde_float32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrtss_f32(a, b); + #else + return SIMDE_FLOAT32_C(0.5) * (SIMDE_FLOAT32_C(3.0) - (a * b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrtss_f32 + #define vrsqrtss_f32(a, b) simde_vrsqrtss_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vrsqrtsd_f64(simde_float64_t a, simde_float64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrtsd_f64(a, b); + #else + return SIMDE_FLOAT64_C(0.5) * (SIMDE_FLOAT64_C(3.0) - (a * b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrtsd_f64 + #define vrsqrtsd_f64(a, b) simde_vrsqrtsd_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vrsqrts_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrts_f32(a, b); + #else + return + simde_vmul_n_f32( + simde_vmls_f32( + simde_vdup_n_f32(SIMDE_FLOAT32_C(3.0)), + a, + b), + SIMDE_FLOAT32_C(0.5) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrts_f32 + #define vrsqrts_f32(a, b) simde_vrsqrts_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vrsqrts_f64(simde_float64x1_t a, simde_float64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrts_f64(a, b); + #else + return + simde_vmul_n_f64( + simde_vmls_f64( + simde_vdup_n_f64(SIMDE_FLOAT64_C(3.0)), + a, + b), + SIMDE_FLOAT64_C(0.5) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrts_f64 + #define vrsqrts_f64(a, b) simde_vrsqrts_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vrsqrtsq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsqrtsq_f32(a, b); + #else + return + simde_vmulq_n_f32( + simde_vmlsq_f32( + simde_vdupq_n_f32(SIMDE_FLOAT32_C(3.0)), + a, + b), + SIMDE_FLOAT32_C(0.5) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsqrtsq_f32 + #define vrsqrtsq_f32(a, b) simde_vrsqrtsq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vrsqrtsq_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vrsqrtsq_f64(a, b); + #else + return + simde_vmulq_n_f64( + simde_vmlsq_f64( + simde_vdupq_n_f64(SIMDE_FLOAT64_C(3.0)), + a, + b), + SIMDE_FLOAT64_C(0.5) + ); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsqrtsq_f64 + #define vrsqrtsq_f64(a, b) simde_vrsqrtsq_f64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP +#endif /* !defined(SIMDE_ARM_NEON_RSQRTS_H) */ +/* :: End simde/arm/neon/rsqrts.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/rsra_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_RSRA_N_H) +#define SIMDE_ARM_NEON_RSRA_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* Remark: For these instructions + * 1 <= n <= data element size in bits + * so 0 <= n - 1 < data element size in bits + */ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsrad_n_s64(a, b, n) vrsrad_n_s64(a, b, n) +#else + #define simde_vrsrad_n_s64(a, b, n) simde_vaddd_s64((a), simde_vrshrd_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsrad_n_s64 + #define vrsrad_n_s64(a, b, n) simde_vrsrad_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsrad_n_u64(a, b, n) vrsrad_n_u64(a, b, n) +#else + #define simde_vrsrad_n_u64(a, b, n) simde_vaddd_u64((a), simde_vrshrd_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsrad_n_u64 + #define vrsrad_n_u64(a, b, n) simde_vrsrad_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_s8(a, b, n) vrsraq_n_s8((a), (b), (n)) +#else + #define simde_vrsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vrshrq_n_s8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_s8 + #define vrsraq_n_s8(a, b, n) simde_vrsraq_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_s16(a, b, n) vrsraq_n_s16((a), (b), (n)) +#else + #define simde_vrsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vrshrq_n_s16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_s16 + #define vrsraq_n_s16(a, b, n) simde_vrsraq_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_s32(a, b, n) vrsraq_n_s32((a), (b), (n)) +#else + #define simde_vrsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vrshrq_n_s32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_s32 + #define vrsraq_n_s32(a, b, n) simde_vrsraq_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_s64(a, b, n) vrsraq_n_s64((a), (b), (n)) +#else + #define simde_vrsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vrshrq_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_s64 + #define vrsraq_n_s64(a, b, n) simde_vrsraq_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_u8(a, b, n) vrsraq_n_u8((a), (b), (n)) +#else + #define simde_vrsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vrshrq_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_u8 + #define vrsraq_n_u8(a, b, n) simde_vrsraq_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_u16(a, b, n) vrsraq_n_u16((a), (b), (n)) +#else + #define simde_vrsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vrshrq_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_u16 + #define vrsraq_n_u16(a, b, n) simde_vrsraq_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_u32(a, b, n) vrsraq_n_u32((a), (b), (n)) +#else + #define simde_vrsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vrshrq_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_u32 + #define vrsraq_n_u32(a, b, n) simde_vrsraq_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsraq_n_u64(a, b, n) vrsraq_n_u64((a), (b), (n)) +#else + #define simde_vrsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vrshrq_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsraq_n_u64 + #define vrsraq_n_u64(a, b, n) simde_vrsraq_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_s8(a, b, n) vrsra_n_s8((a), (b), (n)) +#else + #define simde_vrsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vrshr_n_s8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_s8 + #define vrsra_n_s8(a, b, n) simde_vrsra_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_s16(a, b, n) vrsra_n_s16((a), (b), (n)) +#else + #define simde_vrsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vrshr_n_s16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_s16 + #define vrsra_n_s16(a, b, n) simde_vrsra_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_s32(a, b, n) vrsra_n_s32((a), (b), (n)) +#else + #define simde_vrsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vrshr_n_s32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_s32 + #define vrsra_n_s32(a, b, n) simde_vrsra_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_s64(a, b, n) vrsra_n_s64((a), (b), (n)) +#else + #define simde_vrsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vrshr_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_s64 + #define vrsra_n_s64(a, b, n) simde_vrsra_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_u8(a, b, n) vrsra_n_u8((a), (b), (n)) +#else + #define simde_vrsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vrshr_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_u8 + #define vrsra_n_u8(a, b, n) simde_vrsra_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_u16(a, b, n) vrsra_n_u16((a), (b), (n)) +#else + #define simde_vrsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vrshr_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_u16 + #define vrsra_n_u16(a, b, n) simde_vrsra_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_u32(a, b, n) vrsra_n_u32((a), (b), (n)) +#else + #define simde_vrsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vrshr_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_u32 + #define vrsra_n_u32(a, b, n) simde_vrsra_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vrsra_n_u64(a, b, n) vrsra_n_u64((a), (b), (n)) +#else + #define simde_vrsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vrshr_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsra_n_u64 + #define vrsra_n_u64(a, b, n) simde_vrsra_n_u64((a), (b), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSRA_N_H) */ +/* :: End simde/arm/neon/rsra_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/set_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_SET_LANE_H) +#define SIMDE_ARM_NEON_SET_LANE_H +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vset_lane_f32(simde_float32_t a, simde_float32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float32x2_t r; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_(vset_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_float32x2_private v_ = simde_float32x2_to_private(v); + v_.values[lane] = a; + r = simde_float32x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_f32 + #define vset_lane_f32(a, b, c) simde_vset_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vset_lane_f64(simde_float64_t a, simde_float64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_float64x1_t r; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + r = vset_lane_f64(a, v, 0); + #else + simde_float64x1_private v_ = simde_float64x1_to_private(v); + v_.values[lane] = a; + r = simde_float64x1_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vset_lane_f64 + #define vset_lane_f64(a, b, c) simde_vset_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vset_lane_s8(int8_t a, simde_int8x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int8x8_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vset_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int8x8_private v_ = simde_int8x8_to_private(v); + v_.values[lane] = a; + r = simde_int8x8_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_s8 + #define vset_lane_s8(a, b, c) simde_vset_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vset_lane_s16(int16_t a, simde_int16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int16x4_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vset_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int16x4_private v_ = simde_int16x4_to_private(v); + v_.values[lane] = a; + r = simde_int16x4_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_s16 + #define vset_lane_s16(a, b, c) simde_vset_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vset_lane_s32(int32_t a, simde_int32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int32x2_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vset_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int32x2_private v_ = simde_int32x2_to_private(v); + v_.values[lane] = a; + r = simde_int32x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_s32 + #define vset_lane_s32(a, b, c) simde_vset_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vset_lane_s64(int64_t a, simde_int64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_int64x1_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + r = vset_lane_s64(a, v, 0); + #else + simde_int64x1_private v_ = simde_int64x1_to_private(v); + v_.values[lane] = a; + r = simde_int64x1_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_s64 + #define vset_lane_s64(a, b, c) simde_vset_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vset_lane_u8(uint8_t a, simde_uint8x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint8x8_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vset_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint8x8_private v_ = simde_uint8x8_to_private(v); + v_.values[lane] = a; + r = simde_uint8x8_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_u8 + #define vset_lane_u8(a, b, c) simde_vset_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vset_lane_u16(uint16_t a, simde_uint16x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint16x4_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vset_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint16x4_private v_ = simde_uint16x4_to_private(v); + v_.values[lane] = a; + r = simde_uint16x4_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_u16 + #define vset_lane_u16(a, b, c) simde_vset_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vset_lane_u32(uint32_t a, simde_uint32x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint32x2_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vset_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint32x2_private v_ = simde_uint32x2_to_private(v); + v_.values[lane] = a; + r = simde_uint32x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_u32 + #define vset_lane_u32(a, b, c) simde_vset_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vset_lane_u64(uint64_t a, simde_uint64x1_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + simde_uint64x1_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + r = vset_lane_u64(a, v, 0); + #else + simde_uint64x1_private v_ = simde_uint64x1_to_private(v); + v_.values[lane] = a; + r = simde_uint64x1_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vset_lane_u64 + #define vset_lane_u64(a, b, c) simde_vset_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vsetq_lane_f32(simde_float32_t a, simde_float32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float32x4_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vsetq_lane_f32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_float32x4_private v_ = simde_float32x4_to_private(v); + v_.values[lane] = a; + r = simde_float32x4_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_f32 + #define vsetq_lane_f32(a, b, c) simde_vsetq_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vsetq_lane_f64(simde_float64_t a, simde_float64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_float64x2_t r; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_(vsetq_lane_f64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_float64x2_private v_ = simde_float64x2_to_private(v); + v_.values[lane] = a; + r = simde_float64x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_f64 + #define vsetq_lane_f64(a, b, c) simde_vsetq_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vsetq_lane_s8(int8_t a, simde_int8x16_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_int8x16_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_(vsetq_lane_s8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int8x16_private v_ = simde_int8x16_to_private(v); + v_.values[lane] = a; + r = simde_int8x16_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_s8 + #define vsetq_lane_s8(a, b, c) simde_vsetq_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsetq_lane_s16(int16_t a, simde_int16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_int16x8_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vsetq_lane_s16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int16x8_private v_ = simde_int16x8_to_private(v); + v_.values[lane] = a; + r = simde_int16x8_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_s16 + #define vsetq_lane_s16(a, b, c) simde_vsetq_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsetq_lane_s32(int32_t a, simde_int32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_int32x4_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vsetq_lane_s32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int32x4_private v_ = simde_int32x4_to_private(v); + v_.values[lane] = a; + r = simde_int32x4_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_s32 + #define vsetq_lane_s32(a, b, c) simde_vsetq_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsetq_lane_s64(int64_t a, simde_int64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_int64x2_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vsetq_lane_s64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_int64x2_private v_ = simde_int64x2_to_private(v); + v_.values[lane] = a; + r = simde_int64x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_s64 + #define vsetq_lane_s64(a, b, c) simde_vsetq_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vsetq_lane_u8(uint8_t a, simde_uint8x16_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_uint8x16_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_(vsetq_lane_u8, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint8x16_private v_ = simde_uint8x16_to_private(v); + v_.values[lane] = a; + r = simde_uint8x16_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_u8 + #define vsetq_lane_u8(a, b, c) simde_vsetq_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsetq_lane_u16(uint16_t a, simde_uint16x8_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_uint16x8_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_(vsetq_lane_u16, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint16x8_private v_ = simde_uint16x8_to_private(v); + v_.values[lane] = a; + r = simde_uint16x8_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_u16 + #define vsetq_lane_u16(a, b, c) simde_vsetq_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsetq_lane_u32(uint32_t a, simde_uint32x4_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_uint32x4_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_(vsetq_lane_u32, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint32x4_private v_ = simde_uint32x4_to_private(v); + v_.values[lane] = a; + r = simde_uint32x4_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_u32 + #define vsetq_lane_u32(a, b, c) simde_vsetq_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsetq_lane_u64(uint64_t a, simde_uint64x2_t v, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_uint64x2_t r; + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_(vsetq_lane_u64, r, (HEDLEY_UNREACHABLE(), v), lane, a, v); + #else + simde_uint64x2_private v_ = simde_uint64x2_to_private(v); + v_.values[lane] = a; + r = simde_uint64x2_from_private(v_); + #endif + return r; +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsetq_lane_u64 + #define vsetq_lane_u64(a, b, c) simde_vsetq_lane_u64((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SET_LANE_H) */ +/* :: End simde/arm/neon/set_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/shl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_SHL_H) +#define SIMDE_ARM_NEON_SHL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Notes from the implementer (Christopher Moore aka rosbif) + * + * I have tried to exactly reproduce the documented behaviour of the + * ARM NEON shl and shlq intrinsics. + * This is complicated for the following reasons:- + * + * a) Negative shift counts shift right. + * + * b) Only the low byte of the shift count is used but the shift count + * is not limited to 8-bit values (-128 to 127). + * + * c) Intel SIMD is not nearly as complete as NEON and AltiVec. + * There were no intrisics with a vector shift count before AVX2 which + * only has 32 and 64-bit logical ones and only a 32-bit arithmetic + * one. The others need AVX512. There are no 8-bit shift intrinsics at + * all, even with a scalar shift count. It is surprising to use AVX2 + * and even AVX512 to implement a 64-bit vector operation. + * + * d) Many shift implementations, and the C standard, do not treat a + * shift count >= the object's size in bits as one would expect. + * (Personally I feel that > is silly but == can be useful.) + * + * Maybe it would be useful for SIMDe to have a flag enabling a fast + * implementation where the result is only guaranteed for shift counts + * conforming to the C standard. + * + * Note that even the C17/18 standard does not define the behaviour of + * a right shift of a negative value. + * However Evan and I agree that all compilers likely to be used + * implement this as an arithmetic right shift with sign extension. + * If this is not the case it could be replaced by a logical right shift + * if negative values are complemented before and after the shift. + * + * Some of the SIMD translations may be slower than the portable code, + * particularly those for vectors with only one or two elements. + * But I had fun writing them ;-) + * + */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vshld_s64 (const int64_t a, const int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vshld_s64(a, b); + #else + int8_t b_ = HEDLEY_STATIC_CAST(int8_t, b); + return + (b_ >= 0) + ? (b_ >= 64) + ? 0 + : (a << b_) + : (b_ <= -64) + ? (a >> 63) + : (a >> -b_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshld_s64 + #define vshld_s64(a, b) simde_vshld_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vshld_u64 (const uint64_t a, const int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vshld_u64(a, HEDLEY_STATIC_CAST(int64_t, b)); + #else + int8_t b_ = HEDLEY_STATIC_CAST(int8_t, b); + return + (simde_math_llabs(b_) >= 64) + ? 0 + : (b_ >= 0) + ? (a << b_) + : (a >> -b_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vshld_u64 + #define vshld_u64(a, b) simde_vshld_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), + _mm_srav_epi16(a128, _mm_abs_epi16(b128)), + _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m256i a256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(a_.m64)); + __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), + _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); + r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, + (b_.values[i] >= 0) ? + (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_s8 + #define vshl_s8(a, b) simde_vshl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srav_epi32(a128, _mm_abs_epi32(b128)), + _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, + (b_.values[i] >= 0) ? + (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_s16 + #define vshl_s16(a, b) simde_vshl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srav_epi32(a128, _mm_abs_epi32(b128)), + _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = + (b_.values[i] >= 0) ? + (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_s32 + #define vshl_s32(a, b) simde_vshl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), + _mm_srav_epi64(a128, _mm_sub_epi64(zero, b128)), + _mm_cmpgt_epi64(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + __m128i maska = _mm_cmpgt_epi64(zero, a128); + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), + _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a128, maska), b_abs), maska), + _mm_cmpgt_epi64(zero, _mm_slli_epi64(b128, 56))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vshld_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_s64 + #define vshl_s64(a, b) simde_vshl_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_cvtepu8_epi16(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi8_epi16(_mm_movpi64_epi64(b_.m64)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi16(a128, b128), + _mm_srlv_epi16(a128, _mm_abs_epi16(b128)), + _mm_cmpgt_epi16(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(_mm_cvtepi16_epi8(r128)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m256i a256 = _mm256_cvtepu8_epi32(_mm_movpi64_epi64(a_.m64)); + __m256i b256 = _mm256_cvtepi8_epi32(_mm_movpi64_epi64(b_.m64)); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), + _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi32(0x0C080400)); + r_.m64 = _mm_set_pi32(simde_mm256_extract_epi32(r256, 4), simde_mm256_extract_epi32(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, + (simde_math_abs(b_.values[i]) >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_u8 + #define vshl_u8(a, b) simde_vshl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_cvtepu16_epi32(_mm_movpi64_epi64(a_.m64)); + __m128i b128 = _mm_cvtepi16_epi32(_mm_movpi64_epi64(b_.m64)); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), + _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(_mm_shuffle_epi8(r128, _mm_set1_epi64x(0x0D0C090805040100))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, + (simde_math_abs(b_.values[i]) >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_u16 + #define vshl_u16(a, b) simde_vshl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi32(_mm_slli_epi32(b128, 24), 24); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi32(a128, b128), + _mm_srlv_epi32(a128, _mm_abs_epi32(b128)), + _mm_cmpgt_epi32(_mm_setzero_si128(), b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = + (simde_math_abs(b_.values[i]) >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i]); + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_u32 + #define vshl_u32(a, b) simde_vshl_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshl_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private b_ = simde_int64x1_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + b128 = _mm_srai_epi64(_mm_slli_epi64(b128, 56), 56); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b128), + _mm_srlv_epi64(a128, _mm_sub_epi64(zero, b128)), + _mm_cmpgt_epi64(zero, b128)); + r_.m64 = _mm_movepi64_pi64(r128); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_movpi64_epi64(a_.m64); + __m128i b128 = _mm_movpi64_epi64(b_.m64); + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b128), _mm_set1_epi64x(0xFF)); + __m128i r128 = _mm_blendv_epi8(_mm_sllv_epi64(a128, b_abs), + _mm_srlv_epi64(a128, b_abs), + _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b128, 56))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vshld_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x1_from_private(r_); +#endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshl_u64 + #define vshl_u64(a, b) simde_vshl_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) a_shl, a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs, b_max; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; + b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); + b_max = vec_splat_u8(7); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); + #else + a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splat_u8(8))); + #endif + a_shr = vec_sra(a, vec_min(b_abs, b_max)); + b_mask = vec_cmplt(b, vec_splat_s8(0)); + return vec_sel(a_shl, a_shr, b_mask); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m256i a256 = _mm256_cvtepi8_epi16(a_.m128i); + __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), + _mm256_srav_epi16(a256, _mm256_abs_epi16(b256)), + _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); + r_.m128i = _mm256_cvtepi16_epi8(r256); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, + (b_.values[i] >= 0) ? + (b_.values[i] >= 8) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -8) ? (a_.values[i] >> 7) : (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_s8 + #define vshlq_s8(a, b) simde_vshlq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) a_shl, a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs, b_max; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); + b_max = vec_splat_u16(15); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); + #else + a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); + #endif + a_shr = vec_sra(a, vec_min(b_abs, b_max)); + b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); + return vec_sel(a_shl, a_shr, b_mask); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), + _mm_srav_epi16(a_.m128i, _mm_abs_epi16(B)), + _mm_cmpgt_epi16(_mm_setzero_si128(), B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) + __m256i a256 = _mm256_cvtepi16_epi32(a_.m128i); + __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); + b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srav_epi32(a256, _mm256_abs_epi32(b256)), + _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); + r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, + (b_.values[i] >= 0) ? + (b_.values[i] >= 16) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -16) ? (a_.values[i] >> 15) : (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_s16 + #define vshlq_s16(a, b) simde_vshlq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) a_shl, a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs, b_max; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); + b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 31)); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); + #else + a_shl = vec_and(vec_sl(a, b_abs), vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); + #endif + a_shr = vec_sra(a, vec_min(b_abs, b_max)); + b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), + vec_splat_s32(0)); + return vec_sel(a_shl, a_shr, b_mask); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), + _mm_srav_epi32(a_.m128i, _mm_abs_epi32(B)), + _mm_cmpgt_epi32(_mm_setzero_si128(), B)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = + (b_.values[i] >= 0) ? + (b_.values[i] >= 32) ? 0 : (a_.values[i] << b_.values[i]) : + (b_.values[i] <= -32) ? (a_.values[i] >> 31) : (a_.values[i] >> -b_.values[i]); + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_s32 + #define vshlq_s32(a, b) simde_vshlq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) a_shl, a_shr; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs, b_max; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); + b_max = vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63)); + a_shl = vec_and(vec_sl(a, b_abs), vec_cmple(b_abs, b_max)); + a_shr = vec_sra(a, vec_min(b_abs, b_max)); + b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), + vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); + HEDLEY_DIAGNOSTIC_PUSH + #if defined(SIMDE_BUG_CLANG_46770) + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + return vec_sel(a_shl, a_shr, b_mask); + HEDLEY_DIAGNOSTIC_POP + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), + _mm_srav_epi64(a_.m128i, _mm_sub_epi64(zero, B)), + _mm_cmpgt_epi64(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i maska = _mm_cmpgt_epi64(zero, a_.m128i); + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), + _mm_xor_si128(_mm_srlv_epi64(_mm_xor_si128(a_.m128i, maska), b_abs), maska), + _mm_cmpgt_epi64(zero, _mm_slli_epi64(b_.m128i, 56))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vshld_s64(a_.values[i], b_.values[i]); + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_s64 + #define vshlq_s64(a, b) simde_vshlq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) b_abs; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) b_mask; + b_abs = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_abs(b)); + b_mask = vec_cmplt(b, vec_splat_s8(0)); + return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), + vec_cmplt(b_abs, vec_splat_u8(8))); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m256i a256 = _mm256_cvtepu8_epi16(a_.m128i); + __m256i b256 = _mm256_cvtepi8_epi16(b_.m128i); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi16(a256, b256), + _mm256_srlv_epi16(a256, _mm256_abs_epi16(b256)), + _mm256_cmpgt_epi16(_mm256_setzero_si256(), b256)); + r_.m128i = _mm256_cvtepi16_epi8(r256); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, + (simde_math_abs(b_.values[i]) >= 8) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_u8 + #define vshlq_u8(a, b) simde_vshlq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) b_abs; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0xFF))); + b_mask = vec_cmplt(vec_sl(b, vec_splat_u16(8)), vec_splat_s16(0)); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), + vec_cmple(b_abs, vec_splat_u16(15))); + #else + return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), + vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 16)))); + #endif + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i B = _mm_srai_epi16(_mm_slli_epi16(b_.m128i, 8), 8); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi16(a_.m128i, B), + _mm_srlv_epi16(a_.m128i, _mm_abs_epi16(B)), + _mm_cmpgt_epi16(_mm_setzero_si128(), B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_ARCH_AMD64) + __m256i a256 = _mm256_cvtepu16_epi32(a_.m128i); + __m256i b256 = _mm256_cvtepi16_epi32(b_.m128i); + b256 = _mm256_srai_epi32(_mm256_slli_epi32(b256, 24), 24); + __m256i r256 = _mm256_blendv_epi8(_mm256_sllv_epi32(a256, b256), + _mm256_srlv_epi32(a256, _mm256_abs_epi32(b256)), + _mm256_cmpgt_epi32(_mm256_setzero_si256(), b256)); + r256 = _mm256_shuffle_epi8(r256, _mm256_set1_epi64x(0x0D0C090805040100)); + r_.m128i = _mm_set_epi64x(simde_mm256_extract_epi64(r256, 2), simde_mm256_extract_epi64(r256, 0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, + (simde_math_abs(b_.values[i]) >= 16) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i])); + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_u16 + #define vshlq_u16(a, b) simde_vshlq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) b_abs; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0xFF))); + b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 24))), vec_splat_s32(0)); + return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), + vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)))); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m128i B = _mm_srai_epi32(_mm_slli_epi32(b_.m128i, 24), 24); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi32(a_.m128i, B), + _mm_srlv_epi32(a_.m128i, _mm_abs_epi32(B)), + _mm_cmpgt_epi32(_mm_setzero_si128(), B)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]); + r_.values[i] = (simde_math_abs(b_.values[i]) >= 32) ? 0 : + (b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) : + (a_.values[i] >> -b_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_u32 + #define vshlq_u32(a, b) simde_vshlq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vshlq_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) b_abs; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) b_mask; + b_abs = vec_and(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), + vec_abs(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), b))), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 0xFF))); + b_mask = vec_cmplt(vec_sl(b, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 56))), + vec_splats(HEDLEY_STATIC_CAST(signed long long, 0))); + HEDLEY_DIAGNOSTIC_PUSH + #if defined(SIMDE_BUG_CLANG_46770) + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + return vec_and(vec_sel(vec_sl(a, b_abs), vec_sr(a, b_abs), b_mask), + vec_cmplt(b_abs, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)))); + HEDLEY_DIAGNOSTIC_POP + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i zero = _mm_setzero_si128(); + __m128i B = _mm_srai_epi64(_mm_slli_epi64(b_.m128i, 56), 56); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, B), + _mm_srlv_epi64(a_.m128i, _mm_sub_epi64(zero, B)), + _mm_cmpgt_epi64(zero, B)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m128i b_abs = _mm_and_si128(_mm_abs_epi8(b_.m128i), _mm_set1_epi64x(0xFF)); + r_.m128i = _mm_blendv_epi8(_mm_sllv_epi64(a_.m128i, b_abs), + _mm_srlv_epi64(a_.m128i, b_abs), + _mm_cmpgt_epi64(_mm_setzero_si128(), _mm_slli_epi64(b_.m128i, 56))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vshld_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshlq_u64 + #define vshlq_u64(a, b) simde_vshlq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SHL_H) */ +/* :: End simde/arm/neon/shl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/shll_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_SHLL_N_H) +#define SIMDE_ARM_NEON_SHLL_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* + * The constant range requirements for the shift amount *n* looks strange. + * The ARM Neon Intrinsics Reference states that for *_s8, 0 << n << 7. This + * does not match the actual instruction decoding in the ARM Reference manual, + * which states that the shift amount "must be equal to the source element width + * in bits" (ARM DDI 0487F.b C7-1959). So for *_s8 instructions, *n* must be 8, + * for *_s16, it must be 16, and *_s32 must be 32 (similarly for unsigned). + */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vshll_n_s8 (const simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 7) { + simde_int16x8_private r_; + simde_int8x8_private a_ = simde_int8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, HEDLEY_STATIC_CAST(int16_t, a_.values[i]) << n); + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_s8(a, n) vshll_n_s8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_s8 + #define vshll_n_s8(a, n) simde_vshll_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vshll_n_s16 (const simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 15) { + simde_int32x4_private r_; + simde_int16x4_private a_ = simde_int16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, a_.values[i]) << n; + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_s16(a, n) vshll_n_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_s16 + #define vshll_n_s16(a, n) simde_vshll_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vshll_n_s32 (const simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 31) { + simde_int64x2_private r_; + simde_int32x2_private a_ = simde_int32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int64_t, a_.values[i]) << n; + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_s32(a, n) vshll_n_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_s32 + #define vshll_n_s32(a, n) simde_vshll_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vshll_n_u8 (const simde_uint8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 7) { + simde_uint16x8_private r_; + simde_uint8x8_private a_ = simde_uint8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint16_t, a_.values[i]) << n); + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_u8(a, n) vshll_n_u8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_u8 + #define vshll_n_u8(a, n) simde_vshll_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vshll_n_u16 (const simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 15) { + simde_uint32x4_private r_; + simde_uint16x4_private a_ = simde_uint16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i]) << n; + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_u16(a, n) vshll_n_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_u16 + #define vshll_n_u16(a, n) simde_vshll_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vshll_n_u32 (const simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 31) { + simde_uint64x2_private r_; + simde_uint32x2_private a_ = simde_uint32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint64_t, a_.values[i]) << n; + } + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshll_n_u32(a, n) vshll_n_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshll_n_u32 + #define vshll_n_u32(a, n) simde_vshll_n_u32((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SHLL_N_H) */ +/* :: End simde/arm/neon/shll_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/shrn_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_SHRN_N_H) +#define SIMDE_ARM_NEON_SHRN_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vshrn_n_s16 (const simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int8x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, (a_.values[i] >> n) & UINT8_MAX); + } + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrn_n_s16(a, n) vshrn_n_s16((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vshrn_n_s16(a, n) simde_vmovn_s16(simde_vshrq_n_s16((a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_s16 + #define vshrn_n_s16(a, n) simde_vshrn_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vshrn_n_s32 (const simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int16x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] >> n) & UINT16_MAX); + } + + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrn_n_s32(a, n) vshrn_n_s32((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vshrn_n_s32(a, n) simde_vmovn_s32(simde_vshrq_n_s32((a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_s32 + #define vshrn_n_s32(a, n) simde_vshrn_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vshrn_n_s64 (const simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int32x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, (a_.values[i] >> n) & UINT32_MAX); + } + + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vshrn_n_s64(a, n) vshrn_n_s64((a), (n)) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_vshrn_n_s64(a, n) simde_vmovn_s64(simde_vshrq_n_s64((a), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_s64 + #define vshrn_n_s64(a, n) simde_vshrn_n_s64((a), (n)) +#endif + +#define simde_vshrn_n_u16(a, n) \ + simde_vreinterpret_u8_s8( \ + simde_vshrn_n_s16(simde_vreinterpretq_s16_u16(a), (n))) + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #undef simde_vshrn_n_u16 + #define simde_vshrn_n_u16(a, n) vshrn_n_u16((a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_u16 + #define vshrn_n_u16(a, n) simde_vshrn_n_u16((a), (n)) +#endif + +#define simde_vshrn_n_u32(a, n) \ + simde_vreinterpret_u16_s16( \ + simde_vshrn_n_s32(simde_vreinterpretq_s32_u32(a), (n))) + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #undef simde_vshrn_n_u32 + #define simde_vshrn_n_u32(a, n) vshrn_n_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_u32 + #define vshrn_n_u32(a, n) simde_vshrn_n_u32((a), (n)) +#endif + +#define simde_vshrn_n_u64(a, n) \ + simde_vreinterpret_u32_s32( \ + simde_vshrn_n_s64(simde_vreinterpretq_s64_u64(a), (n))) + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #undef simde_vshrn_n_u64 + #define simde_vshrn_n_u64(a, n) vshrn_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vshrn_n_u64 + #define vshrn_n_u64(a, n) simde_vshrn_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SHRN_N_H) */ +/* :: End simde/arm/neon/shrn_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/sqadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Atharva Nimbalkar + */ + +#if !defined(SIMDE_ARM_NEON_SQADD_H) +#define SIMDE_ARM_NEON_SQADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +// Workaround on ARM64 windows due to windows SDK bug +// https://developercommunity.visualstudio.com/t/In-arm64_neonh-vsqaddb_u8-vsqaddh_u16/10271747?sort=newest +#if (defined _MSC_VER) && (defined SIMDE_ARM_NEON_A64V8_NATIVE) +#undef vsqaddb_u8 +#define vsqaddb_u8(src1, src2) neon_usqadds8(__uint8ToN8_v(src1), __int8ToN8_v(src2)).n8_u8[0] +#undef vsqaddh_u16 +#define vsqaddh_u16(src1, src2) neon_usqadds16(__uint16ToN16_v(src1), __int16ToN16_v(src2)).n16_u16[0] +#undef vsqadds_u32 +#define vsqadds_u32(src1, src2) _CopyUInt32FromFloat(neon_usqadds32(_CopyFloatFromUInt32(src1), _CopyFloatFromInt32(src2))) +#undef vsqaddd_u64 +#define vsqaddd_u64(src1, src2) neon_usqadds64(__uint64ToN64_v(src1), __int64ToN64_v(src2)).n64_u64[0] +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vsqaddb_u8(uint8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_365298) + return vsqaddb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); + #else + return vsqaddb_u8(a, b); + #endif + #else + int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); + return (r_ < 0) ? 0 : ((r_ > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddb_u8 + #define vsqaddb_u8(a, b) simde_vsqaddb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vsqaddh_u16(uint16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_365298) + return vsqaddh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #else + return vsqaddh_u16(a, b); + #endif + #else + int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); + return (r_ < 0) ? 0 : ((r_ > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddh_u16 + #define vsqaddh_u16(a, b) simde_vsqaddh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vsqadds_u32(uint32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_365298) + return vsqadds_u32(a, HEDLEY_STATIC_CAST(uint32_t, b)); + #else + return vsqadds_u32(a, b); + #endif + #else + int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); + return (r_ < 0) ? 0 : ((r_ > UINT32_MAX) ? UINT32_MAX : HEDLEY_STATIC_CAST(uint32_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqadds_u32 + #define vsqadds_u32(a, b) simde_vsqadds_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vsqaddd_u64(uint64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_365298) + return vsqaddd_u64(a, HEDLEY_STATIC_CAST(uint64_t, b)); + #else + return vsqaddd_u64(a, b); + #endif + #else + uint64_t r_; + + if (b > 0) { + uint64_t ub = HEDLEY_STATIC_CAST(uint64_t, b); + r_ = ((UINT64_MAX - a) < ub) ? UINT64_MAX : a + ub; + } else { + uint64_t nb = HEDLEY_STATIC_CAST(uint64_t, -b); + r_ = (nb > a) ? 0 : a - nb; + } + return r_; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddd_u64 + #define vsqaddd_u64(a, b) simde_vsqaddd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vsqadd_u8(simde_uint8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqadd_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqadd_u8 + #define vsqadd_u8(a, b) simde_vsqadd_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vsqadd_u16(simde_uint16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqadd_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqadd_u16 + #define vsqadd_u16(a, b) simde_vsqadd_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vsqadd_u32(simde_uint32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqadd_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqadds_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqadd_u32 + #define vsqadd_u32(a, b) simde_vsqadd_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vsqadd_u64(simde_uint64x1_t a, simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqadd_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddd_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqadd_u64 + #define vsqadd_u64(a, b) simde_vsqadd_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vsqaddq_u8(simde_uint8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqaddq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddq_u8 + #define vsqaddq_u8(a, b) simde_vsqaddq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsqaddq_u16(simde_uint16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqaddq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddq_u16 + #define vsqaddq_u16(a, b) simde_vsqaddq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsqaddq_u32(simde_uint32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqaddq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqadds_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddq_u32 + #define vsqaddq_u32(a, b) simde_vsqaddq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsqaddq_u64(simde_uint64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsqaddq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vsqaddd_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsqaddq_u64 + #define vsqaddq_u64(a, b) simde_vsqaddq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SQADD_H) */ +/* :: End simde/arm/neon/sqadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/sra_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_SRA_N_H) +#define SIMDE_ARM_NEON_SRA_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vsrad_n_s64(a, b, n) vsrad_n_s64((a), (b), (n)) +#else + #define simde_vsrad_n_s64(a, b, n) simde_vaddd_s64((a), simde_vshrd_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsrad_n_s64 + #define vsrad_n_s64(a, b, n) simde_vsrad_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vsrad_n_u64(a, b, n) vsrad_n_u64((a), (b), (n)) +#else + #define simde_vsrad_n_u64(a, b, n) simde_vaddd_u64((a), simde_vshrd_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsrad_n_u64 + #define vsrad_n_u64(a, b, n) simde_vsrad_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_s8(a, b, n) vsra_n_s8((a), (b), (n)) +#else + #define simde_vsra_n_s8(a, b, n) simde_vadd_s8((a), simde_vshr_n_s8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_s8 + #define vsra_n_s8(a, b, n) simde_vsra_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_s16(a, b, n) vsra_n_s16((a), (b), (n)) +#else + #define simde_vsra_n_s16(a, b, n) simde_vadd_s16((a), simde_vshr_n_s16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_s16 + #define vsra_n_s16(a, b, n) simde_vsra_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_s32(a, b, n) vsra_n_s32((a), (b), (n)) +#else + #define simde_vsra_n_s32(a, b, n) simde_vadd_s32((a), simde_vshr_n_s32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_s32 + #define vsra_n_s32(a, b, n) simde_vsra_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_s64(a, b, n) vsra_n_s64((a), (b), (n)) +#else + #define simde_vsra_n_s64(a, b, n) simde_vadd_s64((a), simde_vshr_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_s64 + #define vsra_n_s64(a, b, n) simde_vsra_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_u8(a, b, n) vsra_n_u8((a), (b), (n)) +#else + #define simde_vsra_n_u8(a, b, n) simde_vadd_u8((a), simde_vshr_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_u8 + #define vsra_n_u8(a, b, n) simde_vsra_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_u16(a, b, n) vsra_n_u16((a), (b), (n)) +#else + #define simde_vsra_n_u16(a, b, n) simde_vadd_u16((a), simde_vshr_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_u16 + #define vsra_n_u16(a, b, n) simde_vsra_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_u32(a, b, n) vsra_n_u32((a), (b), (n)) +#else + #define simde_vsra_n_u32(a, b, n) simde_vadd_u32((a), simde_vshr_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_u32 + #define vsra_n_u32(a, b, n) simde_vsra_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsra_n_u64(a, b, n) vsra_n_u64((a), (b), (n)) +#else + #define simde_vsra_n_u64(a, b, n) simde_vadd_u64((a), simde_vshr_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsra_n_u64 + #define vsra_n_u64(a, b, n) simde_vsra_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_s8(a, b, n) vsraq_n_s8((a), (b), (n)) +#else + #define simde_vsraq_n_s8(a, b, n) simde_vaddq_s8((a), simde_vshrq_n_s8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_s8 + #define vsraq_n_s8(a, b, n) simde_vsraq_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_s16(a, b, n) vsraq_n_s16((a), (b), (n)) +#else + #define simde_vsraq_n_s16(a, b, n) simde_vaddq_s16((a), simde_vshrq_n_s16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_s16 + #define vsraq_n_s16(a, b, n) simde_vsraq_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_s32(a, b, n) vsraq_n_s32((a), (b), (n)) +#else + #define simde_vsraq_n_s32(a, b, n) simde_vaddq_s32((a), simde_vshrq_n_s32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_s32 + #define vsraq_n_s32(a, b, n) simde_vsraq_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_s64(a, b, n) vsraq_n_s64((a), (b), (n)) +#else + #define simde_vsraq_n_s64(a, b, n) simde_vaddq_s64((a), simde_vshrq_n_s64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_s64 + #define vsraq_n_s64(a, b, n) simde_vsraq_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_u8(a, b, n) vsraq_n_u8((a), (b), (n)) +#else + #define simde_vsraq_n_u8(a, b, n) simde_vaddq_u8((a), simde_vshrq_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_u8 + #define vsraq_n_u8(a, b, n) simde_vsraq_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_u16(a, b, n) vsraq_n_u16((a), (b), (n)) +#else + #define simde_vsraq_n_u16(a, b, n) simde_vaddq_u16((a), simde_vshrq_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_u16 + #define vsraq_n_u16(a, b, n) simde_vsraq_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_u32(a, b, n) vsraq_n_u32((a), (b), (n)) +#else + #define simde_vsraq_n_u32(a, b, n) simde_vaddq_u32((a), simde_vshrq_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_u32 + #define vsraq_n_u32(a, b, n) simde_vsraq_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsraq_n_u64(a, b, n) vsraq_n_u64((a), (b), (n)) +#else + #define simde_vsraq_n_u64(a, b, n) simde_vaddq_u64((a), simde_vshrq_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsraq_n_u64 + #define vsraq_n_u64(a, b, n) simde_vsraq_n_u64((a), (b), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SRA_N_H) */ +/* :: End simde/arm/neon/sra_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/sri_n.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SRI_N_H) +#define SIMDE_ARM_NEON_SRI_N_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vsrid_n_s64(a, b, n) vsrid_n_s64(a, b, n) +#else + #define simde_vsrid_n_s64(a, b, n) \ + HEDLEY_STATIC_CAST(int64_t, \ + simde_vsrid_n_u64(HEDLEY_STATIC_CAST(uint64_t, a), HEDLEY_STATIC_CAST(uint64_t, b), n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsrid_n_s64 + #define vsrid_n_s64(a, b, n) simde_vsrid_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vsrid_n_u64(a, b, n) vsrid_n_u64(a, b, n) +#else +#define simde_vsrid_n_u64(a, b, n) \ + (((a & (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n))) | simde_vshrd_n_u64((b), (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsrid_n_u64 + #define vsrid_n_u64(a, b, n) simde_vsrid_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_s8(a, b, n) vsri_n_s8((a), (b), (n)) +#else + #define simde_vsri_n_s8(a, b, n) \ + simde_vreinterpret_s8_u8(simde_vsri_n_u8( \ + simde_vreinterpret_u8_s8((a)), simde_vreinterpret_u8_s8((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_s8 + #define vsri_n_s8(a, b, n) simde_vsri_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_u8(a, b, n) vsri_n_u8((a), (b), (n)) +#else + #define simde_vsri_n_u8(a, b, n) \ + simde_vorr_u8( \ + simde_vand_u8((a), simde_vdup_n_u8((UINT8_C(0xff) >> (8 - n) << (8 - n)))), \ + simde_vshr_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_u8 + #define vsri_n_u8(a, b, n) simde_vsri_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_s16(a, b, n) vsri_n_s16((a), (b), (n)) +#else + #define simde_vsri_n_s16(a, b, n) \ + simde_vreinterpret_s16_u16(simde_vsri_n_u16( \ + simde_vreinterpret_u16_s16((a)), simde_vreinterpret_u16_s16((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_s16 + #define vsri_n_s16(a, b, n) simde_vsri_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_u16(a, b, n) vsri_n_u16((a), (b), (n)) +#else + #define simde_vsri_n_u16(a, b, n) \ + simde_vorr_u16( \ + simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0xffff) >> (16 - n) << (16 - n)))), \ + simde_vshr_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_u16 + #define vsri_n_u16(a, b, n) simde_vsri_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_s32(a, b, n) vsri_n_s32((a), (b), (n)) +#else + #define simde_vsri_n_s32(a, b, n) \ + simde_vreinterpret_s32_u32(simde_vsri_n_u32( \ + simde_vreinterpret_u32_s32((a)), simde_vreinterpret_u32_s32((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_s32 + #define vsri_n_s32(a, b, n) simde_vsri_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_u32(a, b, n) vsri_n_u32((a), (b), (n)) +#else + #define simde_vsri_n_u32(a, b, n) \ + simde_vorr_u32( \ + simde_vand_u32((a), \ + simde_vdup_n_u32((UINT32_C(0xffffffff) >> (32 - n) << (32 - n)))), \ + simde_vshr_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_u32 + #define vsri_n_u32(a, b, n) simde_vsri_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_s64(a, b, n) vsri_n_s64((a), (b), (n)) +#else + #define simde_vsri_n_s64(a, b, n) \ + simde_vreinterpret_s64_u64(simde_vsri_n_u64( \ + simde_vreinterpret_u64_s64((a)), simde_vreinterpret_u64_s64((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_s64 + #define vsri_n_s64(a, b, n) simde_vsri_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsri_n_u64(a, b, n) vsri_n_u64((a), (b), (n)) +#else +#define simde_vsri_n_u64(a, b, n) \ + simde_vorr_u64( \ + simde_vand_u64((a), simde_vdup_n_u64( \ + (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n)))), \ + simde_vshr_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsri_n_u64 + #define vsri_n_u64(a, b, n) simde_vsri_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_s8(a, b, n) vsriq_n_s8((a), (b), (n)) +#else + #define simde_vsriq_n_s8(a, b, n) \ + simde_vreinterpretq_s8_u8(simde_vsriq_n_u8( \ + simde_vreinterpretq_u8_s8((a)), simde_vreinterpretq_u8_s8((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_s8 + #define vsriq_n_s8(a, b, n) simde_vsriq_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_u8(a, b, n) vsriq_n_u8((a), (b), (n)) +#else + #define simde_vsriq_n_u8(a, b, n) \ + simde_vorrq_u8( \ + simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0xff) >> (8 - n) << (8 - n)))), \ + simde_vshrq_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_u8 + #define vsriq_n_u8(a, b, n) simde_vsriq_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_s16(a, b, n) vsriq_n_s16((a), (b), (n)) +#else + #define simde_vsriq_n_s16(a, b, n) \ + simde_vreinterpretq_s16_u16(simde_vsriq_n_u16( \ + simde_vreinterpretq_u16_s16((a)), simde_vreinterpretq_u16_s16((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_s16 + #define vsriq_n_s16(a, b, n) simde_vsriq_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_u16(a, b, n) vsriq_n_u16((a), (b), (n)) +#else + #define simde_vsriq_n_u16(a, b, n) \ + simde_vorrq_u16( \ + simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0xffff) >> (16 - n) << (16 - n)))), \ + simde_vshrq_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_u16 + #define vsriq_n_u16(a, b, n) simde_vsriq_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_s32(a, b, n) vsriq_n_s32((a), (b), (n)) +#else + #define simde_vsriq_n_s32(a, b, n) \ + simde_vreinterpretq_s32_u32(simde_vsriq_n_u32( \ + simde_vreinterpretq_u32_s32((a)), simde_vreinterpretq_u32_s32((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_s32 + #define vsriq_n_s32(a, b, n) simde_vsriq_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_u32(a, b, n) vsriq_n_u32((a), (b), (n)) +#else + #define simde_vsriq_n_u32(a, b, n) \ + simde_vorrq_u32( \ + simde_vandq_u32((a), \ + simde_vdupq_n_u32((UINT32_C(0xffffffff) >> (32 - n) << (32 - n)))), \ + simde_vshrq_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_u32 + #define vsriq_n_u32(a, b, n) simde_vsriq_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_s64(a, b, n) vsriq_n_s64((a), (b), (n)) +#else + #define simde_vsriq_n_s64(a, b, n) \ + simde_vreinterpretq_s64_u64(simde_vsriq_n_u64( \ + simde_vreinterpretq_u64_s64((a)), simde_vreinterpretq_u64_s64((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_s64 + #define vsriq_n_s64(a, b, n) simde_vsriq_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsriq_n_u64(a, b, n) vsriq_n_u64((a), (b), (n)) +#else +#define simde_vsriq_n_u64(a, b, n) \ + simde_vorrq_u64( \ + simde_vandq_u64((a), simde_vdupq_n_u64( \ + (UINT64_C(0xffffffffffffffff) >> (64 - n) << (64 - n)))), \ + simde_vshrq_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsriq_n_u64 + #define vsriq_n_u64(a, b, n) simde_vsriq_n_u64((a), (b), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SRI_N_H) */ +/* :: End simde/arm/neon/sri_n.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_ST1_H) +#define SIMDE_ARM_NEON_ST1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1_f16(ptr, val); + #else + simde_float16x4_private val_ = simde_float16x4_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_f16 + #define vst1_f16(a, b) simde_vst1_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1_f32(ptr, val); + #else + simde_float32x2_private val_ = simde_float32x2_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_f32 + #define vst1_f32(a, b) simde_vst1_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_float64x1_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1_f64(ptr, val); + #else + simde_float64x1_private val_ = simde_float64x1_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst1_f64 + #define vst1_f64(a, b) simde_vst1_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int8x8_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_s8(ptr, val); + #else + simde_int8x8_private val_ = simde_int8x8_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_s8 + #define vst1_s8(a, b) simde_vst1_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_s16(ptr, val); + #else + simde_int16x4_private val_ = simde_int16x4_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_s16 + #define vst1_s16(a, b) simde_vst1_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_s32(ptr, val); + #else + simde_int32x2_private val_ = simde_int32x2_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_s32 + #define vst1_s32(a, b) simde_vst1_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_int64x1_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_s64(ptr, val); + #else + simde_int64x1_private val_ = simde_int64x1_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_s64 + #define vst1_s64(a, b) simde_vst1_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint8x8_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_u8(ptr, val); + #else + simde_uint8x8_private val_ = simde_uint8x8_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_u8 + #define vst1_u8(a, b) simde_vst1_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_u16(ptr, val); + #else + simde_uint16x4_private val_ = simde_uint16x4_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_u16 + #define vst1_u16(a, b) simde_vst1_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_u32(ptr, val); + #else + simde_uint32x2_private val_ = simde_uint32x2_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_u32 + #define vst1_u32(a, b) simde_vst1_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(1)], simde_uint64x1_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_u64(ptr, val); + #else + simde_uint64x1_private val_ = simde_uint64x1_to_private(val); + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_u64 + #define vst1_u64(a, b) simde_vst1_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_float16x8_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1q_f16(ptr, val); + #else + simde_float16x8_private val_ = simde_float16x8_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst1q_f16 + #define vst1q_f16(a, b) simde_vst1q_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(ptr, val); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(val, 0, ptr); + #else + simde_float32x4_private val_ = simde_float32x4_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_f32 + #define vst1q_f32(a, b) simde_vst1q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(ptr, val); + #else + simde_float64x2_private val_ = simde_float64x2_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst1q_f64 + #define vst1q_f64(a, b) simde_vst1q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int8x16_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s8(ptr, val); + #else + simde_int8x16_private val_ = simde_int8x16_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_s8 + #define vst1q_s8(a, b) simde_vst1q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int16x8_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s16(ptr, val); + #else + simde_int16x8_private val_ = simde_int16x8_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_s16 + #define vst1q_s16(a, b) simde_vst1q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(ptr, val); + #else + simde_int32x4_private val_ = simde_int32x4_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_s32 + #define vst1q_s32(a, b) simde_vst1q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(ptr, val); + #else + simde_int64x2_private val_ = simde_int64x2_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_s64 + #define vst1q_s64(a, b) simde_vst1q_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint8x16_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_u8(ptr, val); + #else + simde_uint8x16_private val_ = simde_uint8x16_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_u8 + #define vst1q_u8(a, b) simde_vst1q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint16x8_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_u16(ptr, val); + #else + simde_uint16x8_private val_ = simde_uint16x8_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_u16 + #define vst1q_u16(a, b) simde_vst1q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_u32(ptr, val); + #else + simde_uint32x4_private val_ = simde_uint32x4_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_u32 + #define vst1q_u32(a, b) simde_vst1q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_u64(ptr, val); + #else + simde_uint64x2_private val_ = simde_uint64x2_to_private(val); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(ptr, val_.v128); + #else + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_u64 + #define vst1q_u64(a, b) simde_vst1q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST1_H) */ +/* :: End simde/arm/neon/st1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st1_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ST1_LANE_H) +#define SIMDE_ARM_NEON_ST1_LANE_H +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_f32(simde_float32_t *ptr, simde_float32x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x2_private val_ = simde_float32x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_f32 + #define vst1_lane_f32(a, b, c) simde_vst1_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_f64(simde_float64_t *ptr, simde_float64x1_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + vst1_lane_f64(ptr, val, 0); + #else + simde_float64x1_private val_ = simde_float64x1_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_f64 + #define vst1_lane_f64(a, b, c) simde_vst1_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_s8(int8_t *ptr, simde_int8x8_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x8_private val_ = simde_int8x8_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_s8 + #define vst1_lane_s8(a, b, c) simde_vst1_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_s16(int16_t *ptr, simde_int16x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x4_private val_ = simde_int16x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_s16 + #define vst1_lane_s16(a, b, c) simde_vst1_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_s32(int32_t *ptr, simde_int32x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x2_private val_ = simde_int32x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_s32 + #define vst1_lane_s32(a, b, c) simde_vst1_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_s64(int64_t *ptr, simde_int64x1_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + vst1_lane_s64(ptr, val, 0); + #else + simde_int64x1_private val_ = simde_int64x1_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_s64 + #define vst1_lane_s64(a, b, c) simde_vst1_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_u8(uint8_t *ptr, simde_uint8x8_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst1_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x8_private val_ = simde_uint8x8_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_u8 + #define vst1_lane_u8(a, b, c) simde_vst1_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_u16(uint16_t *ptr, simde_uint16x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x4_private val_ = simde_uint16x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_u16 + #define vst1_lane_u16(a, b, c) simde_vst1_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_u32(uint32_t *ptr, simde_uint32x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x2_private val_ = simde_uint32x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_u32 + #define vst1_lane_u32(a, b, c) simde_vst1_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_u64(uint64_t *ptr, simde_uint64x1_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + (void) lane; + vst1_lane_u64(ptr, val, 0); + #else + simde_uint64x1_private val_ = simde_uint64x1_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_u64 + #define vst1_lane_u64(a, b, c) simde_vst1_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_f32(simde_float32_t *ptr, simde_float32x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x4_private val_ = simde_float32x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_f32 + #define vst1q_lane_f32(a, b, c) simde_vst1q_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_f64(simde_float64_t *ptr, simde_float64x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float64x2_private val_ = simde_float64x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_f64 + #define vst1q_lane_f64(a, b, c) simde_vst1q_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_s8(int8_t *ptr, simde_int8x16_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x16_private val_ = simde_int8x16_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_s8 + #define vst1q_lane_s8(a, b, c) simde_vst1q_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_s16(int16_t *ptr, simde_int16x8_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x8_private val_ = simde_int16x8_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_s16 + #define vst1q_lane_s16(a, b, c) simde_vst1q_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_s32(int32_t *ptr, simde_int32x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x4_private val_ = simde_int32x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_s32 + #define vst1q_lane_s32(a, b, c) simde_vst1q_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_s64(int64_t *ptr, simde_int64x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int64x2_private val_ = simde_int64x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_s64 + #define vst1q_lane_s64(a, b, c) simde_vst1q_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_u8(uint8_t *ptr, simde_uint8x16_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst1q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x16_private val_ = simde_uint8x16_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_u8 + #define vst1q_lane_u8(a, b, c) simde_vst1q_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_u16(uint16_t *ptr, simde_uint16x8_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x8_private val_ = simde_uint16x8_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_u16 + #define vst1q_lane_u16(a, b, c) simde_vst1q_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_u32(uint32_t *ptr, simde_uint32x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x4_private val_ = simde_uint32x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_u32 + #define vst1q_lane_u32(a, b, c) simde_vst1q_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_u64(uint64_t *ptr, simde_uint64x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst1q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint64x2_private val_ = simde_uint64x2_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_u64 + #define vst1q_lane_u64(a, b, c) simde_vst1q_lane_u64((a), (b), (c)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST1_LANE_H) */ + +/* :: End simde/arm/neon/st1_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ST2_H) +#define SIMDE_ARM_NEON_ST2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/zip.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ZIP_H) && !defined(SIMDE_BUG_INTEL_857088) +#define SIMDE_ARM_NEON_ZIP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/zip1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ZIP1_H) +#define SIMDE_ARM_NEON_ZIP1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vzip1_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2x2_t tmp = vzip_f32(a, b); + return tmp.val[0]; + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_f32 + #define vzip1_f32(a, b) simde_vzip1_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vzip1_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t tmp = vzip_s8(a, b); + return tmp.val[0]; + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_s8 + #define vzip1_s8(a, b) simde_vzip1_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vzip1_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t tmp = vzip_s16(a, b); + return tmp.val[0]; + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_s16 + #define vzip1_s16(a, b) simde_vzip1_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vzip1_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t tmp = vzip_s32(a, b); + return tmp.val[0]; + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_s32 + #define vzip1_s32(a, b) simde_vzip1_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vzip1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8x2_t tmp = vzip_u8(a, b); + return tmp.val[0]; + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_u8 + #define vzip1_u8(a, b) simde_vzip1_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vzip1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4x2_t tmp = vzip_u16(a, b); + return tmp.val[0]; + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 0, 4, 1, 5); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_u16 + #define vzip1_u16(a, b) simde_vzip1_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vzip1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2x2_t tmp = vzip_u32(a, b); + return tmp.val[0]; + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpacklo_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1_u32 + #define vzip1_u32(a, b) simde_vzip1_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vzip1q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_f32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2x2_t tmp = vzip_f32(vget_low_f32(a), vget_low_f32(b)); + return vcombine_f32(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); + #elif defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_unpacklo_ps(a_.m128, b_.m128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_f32 + #define vzip1q_f32(a, b) simde_vzip1q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vzip1q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergeh(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_unpacklo_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_f64 + #define vzip1q_f64(a, b) simde_vzip1q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vzip1q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_s8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t tmp = vzip_s8(vget_low_s8(a), vget_low_s8(b)); + return vcombine_s8(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_s8 + #define vzip1q_s8(a, b) simde_vzip1q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vzip1q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_s16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t tmp = vzip_s16(vget_low_s16(a), vget_low_s16(b)); + return vcombine_s16(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_s16 + #define vzip1q_s16(a, b) simde_vzip1q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vzip1q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_s32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t tmp = vzip_s32(vget_low_s32(a), vget_low_s32(b)); + return vcombine_s32(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_s32 + #define vzip1q_s32(a, b) simde_vzip1q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vzip1q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergeh(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_s64 + #define vzip1q_s64(a, b) simde_vzip1q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vzip1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_u8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8x2_t tmp = vzip_u8(vget_low_u8(a), vget_low_u8(b)); + return vcombine_u8(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_u8 + #define vzip1q_u8(a, b) simde_vzip1q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vzip1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_u16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4x2_t tmp = vzip_u16(vget_low_u16(a), vget_low_u16(b)); + return vcombine_u16(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 0, 8, 1, 9, 2, 10, 3, 11); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_u16 + #define vzip1q_u16(a, b) simde_vzip1q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vzip1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_u32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2x2_t tmp = vzip_u32(vget_low_u32(a), vget_low_u32(b)); + return vcombine_u32(tmp.val[0], tmp.val[1]); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergeh(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 0, 4, 1, 5); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 0, 4, 1, 5); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_u32 + #define vzip1q_u32(a, b) simde_vzip1q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vzip1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip1q_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergeh(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 0, 2); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpacklo_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 0, 2); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[2 * i ] = a_.values[i]; + r_.values[2 * i + 1] = b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip1q_u64 + #define vzip1q_u64(a, b) simde_vzip1q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ZIP1_H) */ +/* :: End simde/arm/neon/zip1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/zip2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ZIP2_H) +#define SIMDE_ARM_NEON_ZIP2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vzip2_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_f32 + #define vzip2_f32(a, b) simde_vzip2_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vzip2_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_s8 + #define vzip2_s8(a, b) simde_vzip2_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vzip2_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_s16 + #define vzip2_s16(a, b) simde_vzip2_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vzip2_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_s32 + #define vzip2_s32(a, b) simde_vzip2_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vzip2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi8(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_u8 + #define vzip2_u8(a, b) simde_vzip2_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vzip2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi16(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.values, b_.values, 2, 6, 3, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_u16 + #define vzip2_u16(a, b) simde_vzip2_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vzip2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_unpackhi_pi32(a_.m64, b_.m64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2_u32 + #define vzip2_u32(a, b) simde_vzip2_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vzip2q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_f32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); + #elif defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_unpackhi_ps(a_.m128, b_.m128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_f32 + #define vzip2q_f32(a, b) simde_vzip2q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vzip2q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_f64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_unpackhi_pd(a_.m128d, b_.m128d); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_f64 + #define vzip2q_f64(a, b) simde_vzip2q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vzip2q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_s8 + #define vzip2q_s8(a, b) simde_vzip2q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vzip2q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_s16 + #define vzip2q_s16(a, b) simde_vzip2q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vzip2q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_s32 + #define vzip2q_s32(a, b) simde_vzip2q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vzip2q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_s64 + #define vzip2q_s64(a, b) simde_vzip2q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vzip2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_shuffle(a_.v128, b_.v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.values, b_.values, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_u8 + #define vzip2q_u8(a, b) simde_vzip2q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vzip2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_shuffle(a_.v128, b_.v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, b_.values, 4, 12, 5, 13, 6, 14, 7, 15); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_u16 + #define vzip2q_u16(a, b) simde_vzip2q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vzip2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_mergel(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_shuffle(a_.v128, b_.v128, 2, 6, 3, 7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.values, b_.values, 2, 6, 3, 7); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_u32 + #define vzip2q_u32(a, b) simde_vzip2q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vzip2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vzip2q_u64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_mergel(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_shuffle(a_.v128, b_.v128, 1, 3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_unpackhi_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.values = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.values, b_.values, 1, 3); + #else + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + r_.values[(2 * i) ] = a_.values[halfway_point + i]; + r_.values[(2 * i) + 1] = b_.values[halfway_point + i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vzip2q_u64 + #define vzip2q_u64(a, b) simde_vzip2q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ZIP2_H) */ +/* :: End simde/arm/neon/zip2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vzip_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_f32(a, b); + #else + simde_float32x2x2_t r = { { simde_vzip1_f32(a, b), simde_vzip2_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_f32 + #define vzip_f32(a, b) simde_vzip_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vzip_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_s8(a, b); + #else + simde_int8x8x2_t r = { { simde_vzip1_s8(a, b), simde_vzip2_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_s8 + #define vzip_s8(a, b) simde_vzip_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vzip_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_s16(a, b); + #else + simde_int16x4x2_t r = { { simde_vzip1_s16(a, b), simde_vzip2_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_s16 + #define vzip_s16(a, b) simde_vzip_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vzip_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_s32(a, b); + #else + simde_int32x2x2_t r = { { simde_vzip1_s32(a, b), simde_vzip2_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_s32 + #define vzip_s32(a, b) simde_vzip_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vzip_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_u8(a, b); + #else + simde_uint8x8x2_t r = { { simde_vzip1_u8(a, b), simde_vzip2_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_u8 + #define vzip_u8(a, b) simde_vzip_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vzip_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_u16(a, b); + #else + simde_uint16x4x2_t r = { { simde_vzip1_u16(a, b), simde_vzip2_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_u16 + #define vzip_u16(a, b) simde_vzip_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vzip_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzip_u32(a, b); + #else + simde_uint32x2x2_t r = { { simde_vzip1_u32(a, b), simde_vzip2_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzip_u32 + #define vzip_u32(a, b) simde_vzip_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vzipq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_f32(a, b); + #else + simde_float32x4x2_t r = { { simde_vzip1q_f32(a, b), simde_vzip2q_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_f32 + #define vzipq_f32(a, b) simde_vzipq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vzipq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_s8(a, b); + #else + simde_int8x16x2_t r = { { simde_vzip1q_s8(a, b), simde_vzip2q_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_s8 + #define vzipq_s8(a, b) simde_vzipq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vzipq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_s16(a, b); + #else + simde_int16x8x2_t r = { { simde_vzip1q_s16(a, b), simde_vzip2q_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_s16 + #define vzipq_s16(a, b) simde_vzipq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vzipq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_s32(a, b); + #else + simde_int32x4x2_t r = { { simde_vzip1q_s32(a, b), simde_vzip2q_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_s32 + #define vzipq_s32(a, b) simde_vzipq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vzipq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_u8(a, b); + #else + simde_uint8x16x2_t r = { { simde_vzip1q_u8(a, b), simde_vzip2q_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_u8 + #define vzipq_u8(a, b) simde_vzipq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vzipq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_u16(a, b); + #else + simde_uint16x8x2_t r = { { simde_vzip1q_u16(a, b), simde_vzip2q_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_u16 + #define vzipq_u16(a, b) simde_vzipq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vzipq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vzipq_u32(a, b); + #else + simde_uint32x4x2_t r = { { simde_vzip1q_u32(a, b), simde_vzip2q_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vzipq_u32 + #define vzipq_u32(a, b) simde_vzipq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ZIP_H) */ +/* :: End simde/arm/neon/zip.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_f32(simde_float32_t *ptr, simde_float32x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_f32(ptr, val); + #else + simde_float32_t buf[4]; + simde_float32x2_private a_[2] = {simde_float32x2_to_private(val.val[0]), + simde_float32x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_f32 + #define vst2_f32(a, b) simde_vst2_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_f64(simde_float64_t *ptr, simde_float64x1x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst2_f64(ptr, val); + #else + simde_float64_t buf[2]; + simde_float64x1_private a_[2] = {simde_float64x1_to_private(val.val[0]), + simde_float64x1_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2_f64 + #define vst2_f64(a, b) simde_vst2_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_s8(int8_t *ptr, simde_int8x8x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_s8(ptr, val); + #else + int8_t buf[16]; + simde_int8x8_private a_[2] = {simde_int8x8_to_private(val.val[0]), + simde_int8x8_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_s8 + #define vst2_s8(a, b) simde_vst2_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_s16(int16_t *ptr, simde_int16x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_s16(ptr, val); + #else + int16_t buf[8]; + simde_int16x4_private a_[2] = {simde_int16x4_to_private(val.val[0]), + simde_int16x4_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_s16 + #define vst2_s16(a, b) simde_vst2_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_s32(int32_t *ptr, simde_int32x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_s32(ptr, val); + #else + int32_t buf[4]; + simde_int32x2_private a_[2] = {simde_int32x2_to_private(val.val[0]), + simde_int32x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_s32 + #define vst2_s32(a, b) simde_vst2_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_s64(int64_t *ptr, simde_int64x1x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_s64(ptr, val); + #else + int64_t buf[2]; + simde_int64x1_private a_[2] = {simde_int64x1_to_private(val.val[0]), + simde_int64x1_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_s64 + #define vst2_s64(a, b) simde_vst2_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_u8(uint8_t *ptr, simde_uint8x8x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_u8(ptr, val); + #else + uint8_t buf[16]; + simde_uint8x8_private a_[2] = {simde_uint8x8_to_private(val.val[0]), + simde_uint8x8_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_u8 + #define vst2_u8(a, b) simde_vst2_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_u16(uint16_t *ptr, simde_uint16x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_u16(ptr, val); + #else + uint16_t buf[8]; + simde_uint16x4_private a_[2] = {simde_uint16x4_to_private(val.val[0]), + simde_uint16x4_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_u16 + #define vst2_u16(a, b) simde_vst2_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_u32(uint32_t *ptr, simde_uint32x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_u32(ptr, val); + #else + uint32_t buf[4]; + simde_uint32x2_private a_[2] = {simde_uint32x2_to_private(val.val[0]), + simde_uint32x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_u32 + #define vst2_u32(a, b) simde_vst2_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_u64(uint64_t *ptr, simde_uint64x1x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2_u64(ptr, val); + #else + uint64_t buf[2]; + simde_uint64x1_private a_[2] = {simde_uint64x1_to_private(val.val[0]), + simde_uint64x1_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_u64 + #define vst2_u64(a, b) simde_vst2_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_f32(simde_float32_t *ptr, simde_float32x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_f32(ptr, val); + #else + simde_float32x4x2_t r = simde_vzipq_f32(val.val[0], val.val[1]); + simde_vst1q_f32(ptr, r.val[0]); + simde_vst1q_f32(ptr+4, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_f32 + #define vst2q_f32(a, b) simde_vst2q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_f64(simde_float64_t *ptr, simde_float64x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst2q_f64(ptr, val); + #else + simde_float64_t buf[4]; + simde_float64x2_private a_[2] = {simde_float64x2_to_private(val.val[0]), + simde_float64x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_f64 + #define vst2q_f64(a, b) simde_vst2q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_s8(int8_t *ptr, simde_int8x16x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_s8(ptr, val); + #else + simde_int8x16x2_t r = simde_vzipq_s8(val.val[0], val.val[1]); + simde_vst1q_s8(ptr, r.val[0]); + simde_vst1q_s8(ptr+16, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_s8 + #define vst2q_s8(a, b) simde_vst2q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_s16(int16_t *ptr, simde_int16x8x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_s16(ptr, val); + #else + simde_int16x8x2_t r = simde_vzipq_s16(val.val[0], val.val[1]); + simde_vst1q_s16(ptr, r.val[0]); + simde_vst1q_s16(ptr+8, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_s16 + #define vst2q_s16(a, b) simde_vst2q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_s32(int32_t *ptr, simde_int32x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_s32(ptr, val); + #else + simde_int32x4x2_t r = simde_vzipq_s32(val.val[0], val.val[1]); + simde_vst1q_s32(ptr, r.val[0]); + simde_vst1q_s32(ptr+4, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_s32 + #define vst2q_s32(a, b) simde_vst2q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_s64(int64_t *ptr, simde_int64x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst2q_s64(ptr, val); + #else + int64_t buf[4]; + simde_int64x2_private a_[2] = {simde_int64x2_to_private(val.val[0]), + simde_int64x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_s64 + #define vst2q_s64(a, b) simde_vst2q_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_u8(uint8_t *ptr, simde_uint8x16x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_u8(ptr, val); + #else + simde_uint8x16x2_t r = simde_vzipq_u8(val.val[0], val.val[1]); + simde_vst1q_u8(ptr, r.val[0]); + simde_vst1q_u8(ptr+16, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_u8 + #define vst2q_u8(a, b) simde_vst2q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_u16(uint16_t *ptr, simde_uint16x8x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_u16(ptr, val); + #else + simde_uint16x8x2_t r = simde_vzipq_u16(val.val[0], val.val[1]); + simde_vst1q_u16(ptr, r.val[0]); + simde_vst1q_u16(ptr+8, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_u16 + #define vst2q_u16(a, b) simde_vst2q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_u32(uint32_t *ptr, simde_uint32x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst2q_u32(ptr, val); + #else + simde_uint32x4x2_t r = simde_vzipq_u32(val.val[0], val.val[1]); + simde_vst1q_u32(ptr, r.val[0]); + simde_vst1q_u32(ptr+4, r.val[1]); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_u32 + #define vst2q_u32(a, b) simde_vst2q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_u64(uint64_t *ptr, simde_uint64x2x2_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst2q_u64(ptr, val); + #else + uint64_t buf[4]; + simde_uint64x2_private a_[2] = {simde_uint64x2_to_private(val.val[0]), + simde_uint64x2_to_private(val.val[1])}; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 2 ; i++) { + buf[i] = a_[i % 2].values[i / 2]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_u64 + #define vst2q_u64(a, b) simde_vst2q_u64((a), (b)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST2_H) */ +/* :: End simde/arm/neon/st2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st2_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ST2_LANE_H) +#define SIMDE_ARM_NEON_ST2_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x8x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst2_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x8_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_s8 + #define vst2_lane_s8(a, b, c) simde_vst2_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x4_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_s16 + #define vst2_lane_s16(a, b, c) simde_vst2_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x2_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_s32 + #define vst2_lane_s32(a, b, c) simde_vst2_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x1x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst2_lane_s64(ptr, val, 0); + #else + simde_int64x1_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_s64 + #define vst2_lane_s64(a, b, c) simde_vst2_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x8x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst2_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x8_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_u8 + #define vst2_lane_u8(a, b, c) simde_vst2_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x4_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_u16 + #define vst2_lane_u16(a, b, c) simde_vst2_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x2_private r; + for (size_t i = 0 ; i < 2 ; i ++) { + r = simde_uint32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_u32 + #define vst2_lane_u32(a, b, c) simde_vst2_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst2_lane_u64(ptr, val, 0); + #else + simde_uint64x1_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_u64 + #define vst2_lane_u64(a, b, c) simde_vst2_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x2_private r; + for (size_t i = 0 ; i < 2 ; i ++) { + r = simde_float32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_f32 + #define vst2_lane_f32(a, b, c) simde_vst2_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x1x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst2_lane_f64(ptr, val, 0); + #else + simde_float64x1_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_float64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_f64 + #define vst2_lane_f64(a, b, c) simde_vst2_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int8x16x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 16) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst2q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x16_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_s8 + #define vst2q_lane_s8(a, b, c) simde_vst2q_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int16x8x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x8_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_s16 + #define vst2q_lane_s16(a, b, c) simde_vst2q_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int32x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x4_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_s32 + #define vst2q_lane_s32(a, b, c) simde_vst2q_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_int64x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int64x2_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_int64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_s64 + #define vst2q_lane_s64(a, b, c) simde_vst2q_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint8x16x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 16) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst2q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x16_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_u8 + #define vst2q_lane_u8(a, b, c) simde_vst2q_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint16x8x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x8_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_u16 + #define vst2q_lane_u16(a, b, c) simde_vst2q_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint32x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x4_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_u32 + #define vst2q_lane_u32(a, b, c) simde_vst2q_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint64x2_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_uint64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_u64 + #define vst2q_lane_u64(a, b, c) simde_vst2q_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x4_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_float32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_f32 + #define vst2q_lane_f32(a, b, c) simde_vst2q_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float64x2x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst2q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float64x2_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_float64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_f64 + #define vst2q_lane_f64(a, b, c) simde_vst2q_lane_f64((a), (b), (c)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST2_LANE_H) */ +/* :: End simde/arm/neon/st2_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher + */ + +#if !defined(SIMDE_ARM_NEON_ST3_H) +#define SIMDE_ARM_NEON_ST3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_f32(ptr, val); + #else + simde_float32x2_private a[3] = { simde_float32x2_to_private(val.val[0]), + simde_float32x2_to_private(val.val[1]), + simde_float32x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + simde_float32_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_f32 + #define vst3_f32(a, b) simde_vst3_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst3_f64(ptr, val); + #else + simde_float64x1_private a_[3] = { simde_float64x1_to_private(val.val[0]), + simde_float64x1_to_private(val.val[1]), + simde_float64x1_to_private(val.val[2]) }; + simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); + simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); + simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_f64 + #define vst3_f64(a, b) simde_vst3_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int8x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_s8(ptr, val); + #else + simde_int8x8_private a_[3] = { simde_int8x8_to_private(val.val[0]), + simde_int8x8_to_private(val.val[1]), + simde_int8x8_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[1].values, + 0, 8, 3, 1, 9, 4, 2, 10); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 8, r0, a_[2].values, + 0, 1, 8, 3, 4, 9, 6, 7); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[2].values, a_[1].values, + 2, 5, 11, 3, 6, 12, 4, 7); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 8, r1, a_[0].values, + 0, 11, 2, 3, 12, 5, 6, 13); + simde_memcpy(&ptr[8], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[2].values, + 13, 6, 0, 14, 7, 0, 15, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 8, r2, a_[1].values, + 13, 0, 1, 14, 3, 4, 15, 6); + simde_memcpy(&ptr[16], &m2, sizeof(m2)); + #else + int8_t buf[24]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_s8 + #define vst3_s8(a, b) simde_vst3_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int16x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_s16(ptr, val); + #else + simde_int16x4_private a_[3] = { simde_int16x4_to_private(val.val[0]), + simde_int16x4_to_private(val.val[1]), + simde_int16x4_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[0].values, a_[1].values, + 0, 4, 1, 0); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 8, r0, a_[2].values, + 0, 1, 4, 2); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[1].values, a_[2].values, + 1, 5, 2, 0); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 8, r1, a_[0].values, + 0, 1, 6, 2); + simde_memcpy(&ptr[4], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[2].values, a_[0].values, + 2, 7, 3, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 8, r2, a_[1].values, + 0, 1, 7, 2); + simde_memcpy(&ptr[8], &m2, sizeof(m2)); + #else + int16_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_s16 + #define vst3_s16(a, b) simde_vst3_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int32x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_s32(ptr, val); + #else + simde_int32x2_private a[3] = { simde_int32x2_to_private(val.val[0]), + simde_int32x2_to_private(val.val[1]), + simde_int32x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + int32_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_s32 + #define vst3_s32(a, b) simde_vst3_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_s64(ptr, val); + #else + simde_int64x1_private a_[3] = { simde_int64x1_to_private(val.val[0]), + simde_int64x1_to_private(val.val[1]), + simde_int64x1_to_private(val.val[2]) }; + simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); + simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); + simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_s64 + #define vst3_s64(a, b) simde_vst3_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint8x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_u8(ptr, val); + #else + simde_uint8x8_private a_[3] = { simde_uint8x8_to_private(val.val[0]), + simde_uint8x8_to_private(val.val[1]), + simde_uint8x8_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[1].values, + 0, 8, 3, 1, 9, 4, 2, 10); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 8, r0, a_[2].values, + 0, 1, 8, 3, 4, 9, 6, 7); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[2].values, a_[1].values, + 2, 5, 11, 3, 6, 12, 4, 7); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 8, r1, a_[0].values, + 0, 11, 2, 3, 12, 5, 6, 13); + simde_memcpy(&ptr[8], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_[0].values, a_[2].values, + 13, 6, 0, 14, 7, 0, 15, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 8, r2, a_[1].values, + 13, 0, 1, 14, 3, 4, 15, 6); + simde_memcpy(&ptr[16], &m2, sizeof(m2)); + #else + uint8_t buf[24]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_u8 + #define vst3_u8(a, b) simde_vst3_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint16x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_u16(ptr, val); + #else + simde_uint16x4_private a_[3] = { simde_uint16x4_to_private(val.val[0]), + simde_uint16x4_to_private(val.val[1]), + simde_uint16x4_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[0].values, a_[1].values, + 0, 4, 1, 0); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 8, r0, a_[2].values, + 0, 1, 4, 2); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[1].values, a_[2].values, + 1, 5, 2, 0); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 8, r1, a_[0].values, + 0, 1, 6, 2); + simde_memcpy(&ptr[4], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_[2].values, a_[0].values, + 2, 7, 3, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 8, r2, a_[1].values, + 0, 1, 7, 2); + simde_memcpy(&ptr[8], &m2, sizeof(m2)); + #else + uint16_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_u16 + #define vst3_u16(a, b) simde_vst3_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint32x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_u32(ptr, val); + #else + simde_uint32x2_private a[3] = { simde_uint32x2_to_private(val.val[0]), + simde_uint32x2_to_private(val.val[1]), + simde_uint32x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(32, 8, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + uint32_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_u32 + #define vst3_u32(a, b) simde_vst3_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3_u64(ptr, val); + #else + simde_uint64x1_private a_[3] = { simde_uint64x1_to_private(val.val[0]), + simde_uint64x1_to_private(val.val[1]), + simde_uint64x1_to_private(val.val[2]) }; + simde_memcpy(ptr, &a_[0].values, sizeof(a_[0].values)); + simde_memcpy(&ptr[1], &a_[1].values, sizeof(a_[1].values)); + simde_memcpy(&ptr[2], &a_[2].values, sizeof(a_[2].values)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_u64 + #define vst3_u64(a, b) simde_vst3_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_f32(ptr, val); + #else + simde_float32x4_private a_[3] = { simde_float32x4_to_private(val.val[0]), + simde_float32x4_to_private(val.val[1]), + simde_float32x4_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, + 0, 4, 1, 0); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, + 0, 1, 4, 2); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, + 1, 5, 2, 0); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, + 0, 1, 6, 2); + simde_memcpy(&ptr[4], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, + 2, 7, 3, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, + 0, 1, 7, 2); + simde_memcpy(&ptr[8], &m2, sizeof(m2)); + #else + simde_float32_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_f32 + #define vst3q_f32(a, b) simde_vst3q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float64x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst3q_f64(ptr, val); + #else + simde_float64x2_private a[3] = { simde_float64x2_to_private(val.val[0]), + simde_float64x2_to_private(val.val[1]), + simde_float64x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + simde_float64_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_f64 + #define vst3q_f64(a, b) simde_vst3q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_int8x16x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_s8(ptr, val); + #else + simde_int8x16_private a_[3] = { simde_int8x16_to_private(val.val[0]), + simde_int8x16_to_private(val.val[1]), + simde_int8x16_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[0].values, a_[1].values, + 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, + 4, 20, 10, 5); + + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 16, r0, a_[2].values, + 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[1].values, a_[2].values, + 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, + 14, 9, 25, 15, 10); + + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 16, r1, r0, + 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); + simde_memcpy(&ptr[16], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[2].values, a_[0].values, + 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, 30, 0, 14, 31, 0, 15); + + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 16, r2, r1, + 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); + simde_memcpy(&ptr[32], &m2, sizeof(m2)); + #else + int8_t buf[48]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_s8 + #define vst3q_s8(a, b) simde_vst3q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_int16x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_s16(ptr, val); + #else + simde_int16x8_private a_[3] = { simde_int16x8_to_private(val.val[0]), + simde_int16x8_to_private(val.val[1]), + simde_int16x8_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[1].values, + 0, 8, 3, 1, 9, 4, 2, 10); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 16, r0, a_[2].values, + 0, 1, 8, 3, 4, 9, 6, 7); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[2].values, a_[1].values, + 2, 5, 11, 3, 6, 12, 4, 7); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 16, r1, a_[0].values, + 0, 11, 2, 3, 12, 5, 6, 13); + simde_memcpy(&ptr[8], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[2].values, + 13, 6, 0, 14, 7, 0, 15, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 16, r2, a_[1].values, + 13, 0, 1, 14, 3, 4, 15, 6); + simde_memcpy(&ptr[16], &m2, sizeof(m2)); + #else + int16_t buf[24]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_s16 + #define vst3q_s16(a, b) simde_vst3q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_int32x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_s32(ptr, val); + #else + simde_int32x4_private a_[3] = { simde_int32x4_to_private(val.val[0]), + simde_int32x4_to_private(val.val[1]), + simde_int32x4_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, + 0, 4, 1, 0); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, + 0, 1, 4, 2); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, + 1, 5, 2, 0); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, + 0, 1, 6, 2); + simde_memcpy(&ptr[4], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, + 2, 7, 3, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, + 0, 1, 7, 2); + simde_memcpy(&ptr[8], &m2, sizeof(m2)); + #else + int32_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_s32 + #define vst3q_s32(a, b) simde_vst3q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_int64x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst3q_s64(ptr, val); + #else + simde_int64x2_private a[3] = { simde_int64x2_to_private(val.val[0]), + simde_int64x2_to_private(val.val[1]), + simde_int64x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + int64_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_s64 + #define vst3q_s64(a, b) simde_vst3q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(48)], simde_uint8x16x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_u8(ptr, val); + #else + simde_uint8x16_private a_[3] = {simde_uint8x16_to_private(val.val[0]), + simde_uint8x16_to_private(val.val[1]), + simde_uint8x16_to_private(val.val[2])}; + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t a = a_[0].v128; + v128_t b = a_[1].v128; + v128_t c = a_[2].v128; + + // r0 = [a0, b0, a6, a1, b1, a7, a2, b2, a8, a3, b3, a9, a4, b4, a10, a5] + v128_t r0 = wasm_i8x16_shuffle(a, b, 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, + 4, 20, 10, 5); + // m0 = [a0, b0, c0, a1, b1, c1, a2, b2, c2, a3, b3, c3, a4, b4, c4, a5] + v128_t m0 = wasm_i8x16_shuffle(r0, c, 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, + 19, 12, 13, 20, 15); + wasm_v128_store(ptr, m0); + + // r1 = [b5, c5, b11, b6, c6, b12, b7, c7, b13, b8, c8, b14, b9, c9, b15, + // b10] + v128_t r1 = wasm_i8x16_shuffle(b, c, 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, + 14, 9, 25, 15, 10); + // m1 = [b5, c5, a6, b6, c6, a7, b7, c7, a8, b8, c8, a9, b9, c9, a10, b10] + v128_t m1 = wasm_i8x16_shuffle(r1, r0, 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, + 27, 12, 13, 30, 15); + wasm_v128_store(ptr + 16, m1); + + // r2 = [c10, a11, X, c11, a12, X, c12, a13, X, c13, a14, X, c14, a15, X, + // c15] + v128_t r2 = wasm_i8x16_shuffle(c, a, 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, + 30, 0, 14, 31, 0, 15); + // m2 = [c10, a11, b11, c11, a12, b12, c12, a13, b13, c13, a14, b14, c14, + // a15, b15, c15] + v128_t m2 = wasm_i8x16_shuffle(r2, r1, 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, + 27, 12, 13, 30, 15); + wasm_v128_store(ptr + 32, m2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[0].values, a_[1].values, + 0, 16, 6, 1, 17, 7, 2, 18, 8, 3, 19, 9, + 4, 20, 10, 5); + + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(8, 16, r0, a_[2].values, + 0, 1, 16, 3, 4, 17, 6, 7, 18, 9, 10, 19, 12, 13, 20, 15); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[1].values, a_[2].values, + 5, 21, 11, 6, 22, 12, 7, 23, 13, 8, 24, + 14, 9, 25, 15, 10); + + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(8, 16, r1, r0, + 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); + simde_memcpy(&ptr[16], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_[2].values, a_[0].values, + 10, 27, 0, 11, 28, 0, 12, 29, 0, 13, 30, 0, 14, 31, 0, 15); + + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(8, 16, r2, r1, + 0, 1, 18, 3, 4, 21, 6, 7, 24, 9, 10, 27, 12, 13, 30, 15); + simde_memcpy(&ptr[32], &m2, sizeof(m2)); + #else + uint8_t buf[48]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_u8 + #define vst3q_u8(a, b) simde_vst3q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_uint16x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_u16(ptr, val); + #else + simde_uint16x8_private a_[3] = { simde_uint16x8_to_private(val.val[0]), + simde_uint16x8_to_private(val.val[1]), + simde_uint16x8_to_private(val.val[2]) }; + + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[1].values, + 0, 8, 3, 1, 9, 4, 2, 10); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(16, 16, r0, a_[2].values, + 0, 1, 8, 3, 4, 9, 6, 7); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[2].values, a_[1].values, + 2, 5, 11, 3, 6, 12, 4, 7); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(16, 16, r1, a_[0].values, + 0, 11, 2, 3, 12, 5, 6, 13); + simde_memcpy(&ptr[8], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_[0].values, a_[2].values, + 13, 6, 0, 14, 7, 0, 15, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(16, 16, r2, a_[1].values, + 13, 0, 1, 14, 3, 4, 15, 6); + simde_memcpy(&ptr[16], &m2, sizeof(m2)); + #else + uint16_t buf[24]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_u16 + #define vst3q_u16(a, b) simde_vst3q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_uint32x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst3q_u32(ptr, val); + #else + simde_uint32x4_private a_[3] = { simde_uint32x4_to_private(val.val[0]), + simde_uint32x4_to_private(val.val[1]), + simde_uint32x4_to_private(val.val[2]) }; + + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a_[0].values) r0 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[0].values, a_[1].values, + 0, 4, 1, 0); + __typeof__(a_[0].values) m0 = SIMDE_SHUFFLE_VECTOR_(32, 16, r0, a_[2].values, + 0, 1, 4, 2); + simde_memcpy(ptr, &m0, sizeof(m0)); + + __typeof__(a_[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[1].values, a_[2].values, + 1, 5, 2, 0); + __typeof__(a_[0].values) m1 = SIMDE_SHUFFLE_VECTOR_(32, 16, r1, a_[0].values, + 0, 1, 6, 2); + simde_memcpy(&ptr[4], &m1, sizeof(m1)); + + __typeof__(a_[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_[2].values, a_[0].values, + 2, 7, 3, 0); + __typeof__(a_[0].values) m2 = SIMDE_SHUFFLE_VECTOR_(32, 16, r2, a_[1].values, + 0, 1, 7, 2); + simde_memcpy(&ptr[8], &m2, sizeof(m2)); + #else + uint32_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_u32 + #define vst3q_u32(a, b) simde_vst3q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_uint64x2x3_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst3q_u64(ptr, val); + #else + simde_uint64x2_private a[3] = { simde_uint64x2_to_private(val.val[0]), + simde_uint64x2_to_private(val.val[1]), + simde_uint64x2_to_private(val.val[2]) }; + #if defined(SIMDE_SHUFFLE_VECTOR_) + __typeof__(a[0].values) r1 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[0].values, a[1].values, 0, 2); + __typeof__(a[0].values) r2 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[2].values, a[0].values, 0, 3); + __typeof__(a[0].values) r3 = SIMDE_SHUFFLE_VECTOR_(64, 16, a[1].values, a[2].values, 1, 3); + simde_memcpy(ptr, &r1, sizeof(r1)); + simde_memcpy(&ptr[2], &r2, sizeof(r2)); + simde_memcpy(&ptr[4], &r3, sizeof(r3)); + #else + uint64_t buf[6]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_u64 + #define vst3q_u64(a, b) simde_vst3q_u64((a), (b)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST3_H) */ +/* :: End simde/arm/neon/st3.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st3_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ST3_LANE_H) +#define SIMDE_ARM_NEON_ST3_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x8x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst3_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x8_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_s8 + #define vst3_lane_s8(a, b, c) simde_vst3_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_s16 + #define vst3_lane_s16(a, b, c) simde_vst3_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x2x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_s32 + #define vst3_lane_s32(a, b, c) simde_vst3_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x1x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst3_lane_s64(ptr, val, 0); + #else + simde_int64x1_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_s64 + #define vst3_lane_s64(a, b, c) simde_vst3_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x8x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst3_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x8_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_u8 + #define vst3_lane_u8(a, b, c) simde_vst3_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_u16 + #define vst3_lane_u16(a, b, c) simde_vst3_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x2x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_u32 + #define vst3_lane_u32(a, b, c) simde_vst3_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst3_lane_u64(ptr, val, 0); + #else + simde_uint64x1_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_u64 + #define vst3_lane_u64(a, b, c) simde_vst3_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x2x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_f32 + #define vst3_lane_f32(a, b, c) simde_vst3_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x1x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + HEDLEY_STATIC_CAST(void, lane); + vst3_lane_f64(ptr, val, 0); + #else + simde_float64x1_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_f64 + #define vst3_lane_f64(a, b, c) simde_vst3_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int8x16x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst3q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x16_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_s8 + #define vst3q_lane_s8(a, b, c) simde_vst3q_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int16x8x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x8_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_s16 + #define vst3q_lane_s16(a, b, c) simde_vst3q_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int32x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_s32 + #define vst3q_lane_s32(a, b, c) simde_vst3q_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_int64x2x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int64x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_int64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_s64 + #define vst3q_lane_s64(a, b, c) simde_vst3q_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint8x16x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst3q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x16_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_u8 + #define vst3q_lane_u8(a, b, c) simde_vst3q_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint16x8x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x8_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_u16 + #define vst3q_lane_u16(a, b, c) simde_vst3q_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint32x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_u32 + #define vst3q_lane_u32(a, b, c) simde_vst3q_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x2x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint64x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_uint64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_u64 + #define vst3q_lane_u64(a, b, c) simde_vst3q_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_f32 + #define vst3q_lane_f32(a, b, c) simde_vst3q_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float64x2x3_t val, const int lane){ + //SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst3q_lane_f64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float64x2_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_f64 + #define vst3q_lane_f64(a, b, c) simde_vst3q_lane_f64((a), (b), (c)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST3_LANE_H) */ +/* :: End simde/arm/neon/st3_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher + */ + +#if !defined(SIMDE_ARM_NEON_ST4_H) +#define SIMDE_ARM_NEON_ST4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_f32(simde_float32_t *ptr, simde_float32x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_f32(ptr, val); + #else + simde_float32_t buf[8]; + simde_float32x2_private a_[4] = { simde_float32x2_to_private(val.val[0]), simde_float32x2_to_private(val.val[1]), + simde_float32x2_to_private(val.val[2]), simde_float32x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_f32 + #define vst4_f32(a, b) simde_vst4_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_f64(simde_float64_t *ptr, simde_float64x1x4_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst4_f64(ptr, val); + #else + simde_float64_t buf[4]; + simde_float64x1_private a_[4] = { simde_float64x1_to_private(val.val[0]), simde_float64x1_to_private(val.val[1]), + simde_float64x1_to_private(val.val[2]), simde_float64x1_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_f64 + #define vst4_f64(a, b) simde_vst4_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_s8(int8_t *ptr, simde_int8x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_s8(ptr, val); + #else + int8_t buf[32]; + simde_int8x8_private a_[4] = { simde_int8x8_to_private(val.val[0]), simde_int8x8_to_private(val.val[1]), + simde_int8x8_to_private(val.val[2]), simde_int8x8_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_s8 + #define vst4_s8(a, b) simde_vst4_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_s16(int16_t *ptr, simde_int16x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_s16(ptr, val); + #else + int16_t buf[16]; + simde_int16x4_private a_[4] = { simde_int16x4_to_private(val.val[0]), simde_int16x4_to_private(val.val[1]), + simde_int16x4_to_private(val.val[2]), simde_int16x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_s16 + #define vst4_s16(a, b) simde_vst4_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_s32(int32_t *ptr, simde_int32x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_s32(ptr, val); + #else + int32_t buf[8]; + simde_int32x2_private a_[4] = { simde_int32x2_to_private(val.val[0]), simde_int32x2_to_private(val.val[1]), + simde_int32x2_to_private(val.val[2]), simde_int32x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_s32 + #define vst4_s32(a, b) simde_vst4_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_s64(int64_t *ptr, simde_int64x1x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_s64(ptr, val); + #else + int64_t buf[4]; + simde_int64x1_private a_[4] = { simde_int64x1_to_private(val.val[0]), simde_int64x1_to_private(val.val[1]), + simde_int64x1_to_private(val.val[2]), simde_int64x1_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_s64 + #define vst4_s64(a, b) simde_vst4_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_u8(uint8_t *ptr, simde_uint8x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_u8(ptr, val); + #else + uint8_t buf[32]; + simde_uint8x8_private a_[4] = { simde_uint8x8_to_private(val.val[0]), simde_uint8x8_to_private(val.val[1]), + simde_uint8x8_to_private(val.val[2]), simde_uint8x8_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_u8 + #define vst4_u8(a, b) simde_vst4_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_u16(uint16_t *ptr, simde_uint16x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_u16(ptr, val); + #else + uint16_t buf[16]; + simde_uint16x4_private a_[4] = { simde_uint16x4_to_private(val.val[0]), simde_uint16x4_to_private(val.val[1]), + simde_uint16x4_to_private(val.val[2]), simde_uint16x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_u16 + #define vst4_u16(a, b) simde_vst4_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_u32(uint32_t *ptr, simde_uint32x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_u32(ptr, val); + #else + uint32_t buf[8]; + simde_uint32x2_private a_[4] = { simde_uint32x2_to_private(val.val[0]), simde_uint32x2_to_private(val.val[1]), + simde_uint32x2_to_private(val.val[2]), simde_uint32x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_u32 + #define vst4_u32(a, b) simde_vst4_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_u64(uint64_t *ptr, simde_uint64x1x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4_u64(ptr, val); + #else + uint64_t buf[4]; + simde_uint64x1_private a_[4] = { simde_uint64x1_to_private(val.val[0]), simde_uint64x1_to_private(val.val[1]), + simde_uint64x1_to_private(val.val[2]), simde_uint64x1_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_u64 + #define vst4_u64(a, b) simde_vst4_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_f32(simde_float32_t *ptr, simde_float32x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_f32(ptr, val); + #else + simde_float32_t buf[16]; + simde_float32x4_private a_[4] = { simde_float32x4_to_private(val.val[0]), simde_float32x4_to_private(val.val[1]), + simde_float32x4_to_private(val.val[2]), simde_float32x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_f32 + #define vst4q_f32(a, b) simde_vst4q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_f64(simde_float64_t *ptr, simde_float64x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst4q_f64(ptr, val); + #else + simde_float64_t buf[8]; + simde_float64x2_private a_[4] = { simde_float64x2_to_private(val.val[0]), simde_float64x2_to_private(val.val[1]), + simde_float64x2_to_private(val.val[2]), simde_float64x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_f64 + #define vst4q_f64(a, b) simde_vst4q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_s8(int8_t *ptr, simde_int8x16x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_s8(ptr, val); + #else + int8_t buf[64]; + simde_int8x16_private a_[4] = { simde_int8x16_to_private(val.val[0]), simde_int8x16_to_private(val.val[1]), + simde_int8x16_to_private(val.val[2]), simde_int8x16_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_s8 + #define vst4q_s8(a, b) simde_vst4q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_s16(int16_t *ptr, simde_int16x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_s16(ptr, val); + #else + int16_t buf[32]; + simde_int16x8_private a_[4] = { simde_int16x8_to_private(val.val[0]), simde_int16x8_to_private(val.val[1]), + simde_int16x8_to_private(val.val[2]), simde_int16x8_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_s16 + #define vst4q_s16(a, b) simde_vst4q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_s32(int32_t *ptr, simde_int32x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_s32(ptr, val); + #else + int32_t buf[16]; + simde_int32x4_private a_[4] = { simde_int32x4_to_private(val.val[0]), simde_int32x4_to_private(val.val[1]), + simde_int32x4_to_private(val.val[2]), simde_int32x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_s32 + #define vst4q_s32(a, b) simde_vst4q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_s64(int64_t *ptr, simde_int64x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst4q_s64(ptr, val); + #else + int64_t buf[8]; + simde_int64x2_private a_[4] = { simde_int64x2_to_private(val.val[0]), simde_int64x2_to_private(val.val[1]), + simde_int64x2_to_private(val.val[2]), simde_int64x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_s64 + #define vst4q_s64(a, b) simde_vst4q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_u8(uint8_t *ptr, simde_uint8x16x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_u8(ptr, val); + #else + uint8_t buf[64]; + simde_uint8x16_private a_[4] = { simde_uint8x16_to_private(val.val[0]), simde_uint8x16_to_private(val.val[1]), + simde_uint8x16_to_private(val.val[2]), simde_uint8x16_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_u8 + #define vst4q_u8(a, b) simde_vst4q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_u16(uint16_t *ptr, simde_uint16x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_u16(ptr, val); + #else + uint16_t buf[32]; + simde_uint16x8_private a_[4] = { simde_uint16x8_to_private(val.val[0]), simde_uint16x8_to_private(val.val[1]), + simde_uint16x8_to_private(val.val[2]), simde_uint16x8_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_u16 + #define vst4q_u16(a, b) simde_vst4q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_u32(uint32_t *ptr, simde_uint32x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst4q_u32(ptr, val); + #else + uint32_t buf[16]; + simde_uint32x4_private a_[4] = { simde_uint32x4_to_private(val.val[0]), simde_uint32x4_to_private(val.val[1]), + simde_uint32x4_to_private(val.val[2]), simde_uint32x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_u32 + #define vst4q_u32(a, b) simde_vst4q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_u64(uint64_t *ptr, simde_uint64x2x4_t val) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst4q_u64(ptr, val); + #else + uint64_t buf[8]; + simde_uint64x2_private a_[4] = { simde_uint64x2_to_private(val.val[0]), simde_uint64x2_to_private(val.val[1]), + simde_uint64x2_to_private(val.val[2]), simde_uint64x2_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_u64 + #define vst4q_u64(a, b) simde_vst4q_u64((a), (b)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST4_H) */ +/* :: End simde/arm/neon/st4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/st4_lane.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + * 2021 Zhi An Ng (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_ST4_LANE_H) +#define SIMDE_ARM_NEON_ST4_LANE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x8x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst4_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x8_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_s8 + #define vst4_lane_s8(a, b, c) simde_vst4_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_s16 + #define vst4_lane_s16(a, b, c) simde_vst4_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_s32 + #define vst4_lane_s32(a, b, c) simde_vst4_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + vst4_lane_s64(ptr, val, 0); + #else + simde_int64x1_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_s64 + #define vst4_lane_s64(a, b, c) simde_vst4_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x8x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst4_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x8_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint8x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_u8 + #define vst4_lane_u8(a, b, c) simde_vst4_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst4_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_u16 + #define vst4_lane_u16(a, b, c) simde_vst4_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_u32 + #define vst4_lane_u32(a, b, c) simde_vst4_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + vst4_lane_u64(ptr, val, 0); + #else + simde_uint64x1_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_u64 + #define vst4_lane_u64(a, b, c) simde_vst4_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst4_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float32x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_f32 + #define vst4_lane_f32(a, b, c) simde_vst4_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 0) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + vst4_lane_f64(ptr, val, 0); + #else + simde_float64x1_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float64x1_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_f64 + #define vst4_lane_f64(a, b, c) simde_vst4_lane_f64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_s8(int8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int8x16x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst4q_lane_s8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int8x16_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_s8 + #define vst4q_lane_s8(a, b, c) simde_vst4q_lane_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_s16(int16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int16x8x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_s16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int16x8_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_s16 + #define vst4q_lane_s16(a, b, c) simde_vst4q_lane_s16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_s32(int32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int32x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_s32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int32x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_s32 + #define vst4q_lane_s32(a, b, c) simde_vst4q_lane_s32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_s64(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst4q_lane_s64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_int64x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_int64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_s64 + #define vst4q_lane_s64(a, b, c) simde_vst4q_lane_s64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_u8(uint8_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint8x16x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_16_NO_RESULT_(vst4q_lane_u8, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint8x16_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint8x16_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_u8 + #define vst4q_lane_u8(a, b, c) simde_vst4q_lane_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_u16(uint16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint16x8x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_u16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint16x8_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_u16 + #define vst4q_lane_u16(a, b, c) simde_vst4q_lane_u16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_u32(uint32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint32x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_u32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint32x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_u32 + #define vst4q_lane_u32(a, b, c) simde_vst4q_lane_u32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_CONSTIFY_2_NO_RESULT_(vst4q_lane_u64, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_uint64x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_uint64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_u64 + #define vst4q_lane_u64(a, b, c) simde_vst4q_lane_u64((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_CONSTIFY_4_NO_RESULT_(vst4q_lane_f32, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float32x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float32x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_f32 + #define vst4q_lane_f32(a, b, c) simde_vst4q_lane_f32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_f64(simde_float64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x2x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + (void) lane; + vst4q_lane_f64(ptr, val, 0); + #else + simde_float64x2_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float64x2_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_f64 + #define vst4q_lane_f64(a, b, c) simde_vst4q_lane_f64((a), (b), (c)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ST4_LANE_H) */ +/* :: End simde/arm/neon/st4_lane.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/subhn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUBHN_H) +#define SIMDE_ARM_NEON_SUBHN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vsubhn_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_s16(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int8x8_private r_; + simde_int8x16_private tmp_ = + simde_int8x16_to_private( + simde_vreinterpretq_s8_s16( + simde_vsubq_s16(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #endif + return simde_int8x8_from_private(r_); + #else + return simde_vmovn_s16(simde_vshrq_n_s16(simde_vsubq_s16(a, b), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_s16 + #define vsubhn_s16(a, b) simde_vsubhn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vsubhn_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_s32(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int16x4_private r_; + simde_int16x8_private tmp_ = + simde_int16x8_to_private( + simde_vreinterpretq_s16_s32( + simde_vsubq_s32(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); + #endif + return simde_int16x4_from_private(r_); + #else + return simde_vmovn_s32(simde_vshrq_n_s32(simde_vsubq_s32(a, b), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_s32 + #define vsubhn_s32(a, b) simde_vsubhn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vsubhn_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_s64(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_int32x2_private r_; + simde_int32x4_private tmp_ = + simde_int32x4_to_private( + simde_vreinterpretq_s32_s64( + simde_vsubq_s64(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); + #endif + return simde_int32x2_from_private(r_); + #else + return simde_vmovn_s64(simde_vshrq_n_s64(simde_vsubq_s64(a, b), 32)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_s64 + #define vsubhn_s64(a, b) simde_vsubhn_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vsubhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_u16(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint8x8_private r_; + simde_uint8x16_private tmp_ = + simde_uint8x16_to_private( + simde_vreinterpretq_u8_u16( + simde_vsubq_u16(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7, 9, 11, 13, 15); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6, 8, 10, 12, 14); + #endif + return simde_uint8x8_from_private(r_); + #else + return simde_vmovn_u16(simde_vshrq_n_u16(simde_vsubq_u16(a, b), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_u16 + #define vsubhn_u16(a, b) simde_vsubhn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vsubhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_u32(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint16x4_private r_; + simde_uint16x8_private tmp_ = + simde_uint16x8_to_private( + simde_vreinterpretq_u16_u32( + simde_vsubq_u32(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3, 5, 7); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2, 4, 6); + #endif + return simde_uint16x4_from_private(r_); + #else + return simde_vmovn_u32(simde_vshrq_n_u32(simde_vsubq_u32(a, b), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_u32 + #define vsubhn_u32(a, b) simde_vsubhn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vsubhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubhn_u64(a, b); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + simde_uint32x2_private r_; + simde_uint32x4_private tmp_ = + simde_uint32x4_to_private( + simde_vreinterpretq_u32_u64( + simde_vsubq_u64(a, b) + ) + ); + #if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 1, 3); + #else + r_.values = __builtin_shufflevector(tmp_.values, tmp_.values, 0, 2); + #endif + return simde_uint32x2_from_private(r_); + #else + return simde_vmovn_u64(simde_vshrq_n_u64(simde_vsubq_u64(a, b), 32)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubhn_u64 + #define vsubhn_u64(a, b) simde_vsubhn_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBHN_H) */ +/* :: End simde/arm/neon/subhn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/subl_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Décio Luiz Gazzoni Filho + */ + +#if !defined(SIMDE_ARM_NEON_SUBL_HIGH_H) +#define SIMDE_ARM_NEON_SUBL_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_s8(a, b); + #else + return simde_vsubq_s16(simde_vmovl_high_s8(a), simde_vmovl_high_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_s8 + #define vsubl_high_s8(a, b) simde_vsubl_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_s16(a, b); + #else + return simde_vsubq_s32(simde_vmovl_high_s16(a), simde_vmovl_high_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_s16 + #define vsubl_high_s16(a, b) simde_vsubl_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_s32(a, b); + #else + return simde_vsubq_s64(simde_vmovl_high_s32(a), simde_vmovl_high_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_s32 + #define vsubl_high_s32(a, b) simde_vsubl_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_u8(a, b); + #else + return simde_vsubq_u16(simde_vmovl_high_u8(a), simde_vmovl_high_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_u8 + #define vsubl_high_u8(a, b) simde_vsubl_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_u16(a, b); + #else + return simde_vsubq_u32(simde_vmovl_high_u16(a), simde_vmovl_high_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_u16 + #define vsubl_high_u16(a, b) simde_vsubl_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubl_high_u32(a, b); + #else + return simde_vsubq_u64(simde_vmovl_high_u32(a), simde_vmovl_high_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubl_high_u32 + #define vsubl_high_u32(a, b) simde_vsubl_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBL_HIGH_H) */ +/* :: End simde/arm/neon/subl_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/subw.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUBW_H) +#define SIMDE_ARM_NEON_SUBW_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubw_s8(simde_int16x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s16(a, simde_vmovl_s8(b)); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s8 + #define vsubw_s8(a, b) simde_vsubw_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubw_s16(simde_int32x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s32(a, simde_vmovl_s16(b)); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s16 + #define vsubw_s16(a, b) simde_vsubw_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubw_s32(simde_int64x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s64(a, simde_vmovl_s32(b)); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_s32 + #define vsubw_s32(a, b) simde_vsubw_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u16(a, simde_vmovl_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u8 + #define vsubw_u8(a, b) simde_vsubw_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u32(a, simde_vmovl_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u16 + #define vsubw_u16(a, b) simde_vsubw_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubw_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u64(a, simde_vmovl_u32(b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsubw_u32 + #define vsubw_u32(a, b) simde_vsubw_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBW_H) */ +/* :: End simde/arm/neon/subw.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/subw_high.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) +#define SIMDE_ARM_NEON_SUBW_HIGH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vsubw_high_s8(simde_int16x8_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s16(a, simde_vmovl_high_s8(b)); + #else + simde_int16x8_private r_; + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s8 + #define vsubw_high_s8(a, b) simde_vsubw_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vsubw_high_s16(simde_int32x4_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s32(a, simde_vmovl_high_s16(b)); + #else + simde_int32x4_private r_; + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s16 + #define vsubw_high_s16(a, b) simde_vsubw_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vsubw_high_s32(simde_int64x2_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_s32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_s64(a, simde_vmovl_high_s32(b)); + #else + simde_int64x2_private r_; + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_s32 + #define vsubw_high_s32(a, b) simde_vsubw_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vsubw_high_u8(simde_uint16x8_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u8(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u16(a, simde_vmovl_high_u8(b)); + #else + simde_uint16x8_private r_; + simde_uint16x8_private a_ = simde_uint16x8_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u8 + #define vsubw_high_u8(a, b) simde_vsubw_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vsubw_high_u16(simde_uint32x4_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u16(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u32(a, simde_vmovl_high_u16(b)); + #else + simde_uint32x4_private r_; + simde_uint32x4_private a_ = simde_uint32x4_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u16 + #define vsubw_high_u16(a, b) simde_vsubw_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vsubw_high_u32(simde_uint64x2_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsubw_high_u32(a, b); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_vsubq_u64(a, simde_vmovl_high_u32(b)); + #else + simde_uint64x2_private r_; + simde_uint64x2_private a_ = simde_uint64x2_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.values, b_.values); + r_.values -= a_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i + ((sizeof(b_.values) / sizeof(b_.values[0])) / 2)]; + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vsubw_high_u32 + #define vsubw_high_u32(a, b) simde_vsubw_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SUBW_HIGH_H) */ +/* :: End simde/arm/neon/subw_high.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/tbl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_TBL_H) +#define SIMDE_ARM_NEON_TBL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbl1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl1_u8(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(simde_vcombine_u8(a, a)), + b_ = simde_uint8x16_to_private(simde_vcombine_u8(b, b)); + + r_.v128 = wasm_i8x16_swizzle(a_.v128, b_.v128); + r_.v128 = wasm_v128_and(r_.v128, wasm_u8x16_lt(b_.v128, wasm_i8x16_splat(8))); + + return simde_vget_low_u8(simde_uint8x16_from_private(r_)); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + r_.m64 = _mm_shuffle_pi8(a_.m64, _mm_or_si64(b_.m64, _mm_cmpgt_pi8(b_.m64, _mm_set1_pi8(7)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] < 8) ? a_.values[b_.values[i]] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl1_u8 + #define vtbl1_u8(a, b) simde_vtbl1_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbl1_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl1_s8(a, b); + #else + return simde_vreinterpret_s8_u8(simde_vtbl1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl1_s8 + #define vtbl1_s8(a, b) simde_vtbl1_s8((a), (b)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbl2_u8(simde_uint8x8x2_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl2_u8(a, b); + #else + simde_uint8x8_private + r_, + a_[2] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]) }, + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_set_epi64(a_[1].m64, a_[0].m64); + __m128i b128 = _mm_set1_epi64(b_.m64); + __m128i r128 = _mm_shuffle_epi8(a128, _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(15)))); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] < 16) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl2_u8 + #define vtbl2_u8(a, b) simde_vtbl2_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbl2_s8(simde_int8x8x2_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl2_s8(a, b); + #else + simde_uint8x8x2_t a_; + simde_memcpy(&a_, &a, sizeof(a_)); + return simde_vreinterpret_s8_u8(simde_vtbl2_u8(a_, simde_vreinterpret_u8_s8(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl2_s8 + #define vtbl2_s8(a, b) simde_vtbl2_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbl3_u8(simde_uint8x8x3_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl3_u8(a, b); + #else + simde_uint8x8_private + r_, + a_[3] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]) }, + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i b128 = _mm_set1_epi64(b_.m64); + b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(23))); + __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a_[1].m64, a_[0].m64), b128); + __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(a_[2].m64), b128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(b128, 3)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] < 24) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl3_u8 + #define vtbl3_u8(a, b) simde_vtbl3_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbl3_s8(simde_int8x8x3_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl3_s8(a, b); + #else + simde_uint8x8x3_t a_; + simde_memcpy(&a_, &a, sizeof(a_)); + return simde_vreinterpret_s8_u8(simde_vtbl3_u8(a_, simde_vreinterpret_u8_s8(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl3_s8 + #define vtbl3_s8(a, b) simde_vtbl3_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbl4_u8(simde_uint8x8x4_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl4_u8(a, b); + #else + simde_uint8x8_private + r_, + a_[4] = { simde_uint8x8_to_private(a.val[0]), simde_uint8x8_to_private(a.val[1]), simde_uint8x8_to_private(a.val[2]), simde_uint8x8_to_private(a.val[3]) }, + b_ = simde_uint8x8_to_private(b); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i b128 = _mm_set1_epi64(b_.m64); + b128 = _mm_or_si128(b128, _mm_cmpgt_epi8(b128, _mm_set1_epi8(31))); + __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(a_[1].m64, a_[0].m64), b128); + __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(a_[3].m64, a_[2].m64), b128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(b128, 3)); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (b_.values[i] < 32) ? a_[b_.values[i] / 8].values[b_.values[i] & 7] : 0; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl4_u8 + #define vtbl4_u8(a, b) simde_vtbl4_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbl4_s8(simde_int8x8x4_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbl4_s8(a, b); + #else + simde_uint8x8x4_t a_; + simde_memcpy(&a_, &a, sizeof(a_)); + return simde_vreinterpret_s8_u8(simde_vtbl4_u8(a_, simde_vreinterpret_u8_s8(b))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbl4_s8 + #define vtbl4_s8(a, b) simde_vtbl4_s8((a), (b)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TBL_H) */ +/* :: End simde/arm/neon/tbl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/tbx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_ARM_NEON_TBX_H) +#define SIMDE_ARM_NEON_TBX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbx1_u8(simde_uint8x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx1_u8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b), + c_ = simde_uint8x8_to_private(c); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_set1_epi64(a_.m64); + __m128i b128 = _mm_set1_epi64(b_.m64); + __m128i c128 = _mm_set1_epi64(c_.m64); + c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(7))); + __m128i r128 = _mm_shuffle_epi8(b128, c128); + r128 = _mm_blendv_epi8(r128, a128, c128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (c_.values[i] < 8) ? b_.values[c_.values[i]] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx1_u8 + #define vtbx1_u8(a, b, c) simde_vtbx1_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbx1_s8(simde_int8x8_t a, simde_int8x8_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx1_s8(a, b, c); + #else + return simde_vreinterpret_s8_u8(simde_vtbx1_u8(simde_vreinterpret_u8_s8(a), simde_vreinterpret_u8_s8(b), simde_vreinterpret_u8_s8(c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx1_s8 + #define vtbx1_s8(a, b, c) simde_vtbx1_s8((a), (b), (c)) +#endif + +#if !defined(SIMDE_BUG_INTEL_857088) + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbx2_u8(simde_uint8x8_t a, simde_uint8x8x2_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx2_u8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_[2] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]) }, + c_ = simde_uint8x8_to_private(c); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_set1_epi64(a_.m64); + __m128i b128 = _mm_set_epi64(b_[1].m64, b_[0].m64); + __m128i c128 = _mm_set1_epi64(c_.m64); + c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(15))); + __m128i r128 = _mm_shuffle_epi8(b128, c128); + r128 = _mm_blendv_epi8(r128, a128, c128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (c_.values[i] < 16) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx2_u8 + #define vtbx2_u8(a, b, c) simde_vtbx2_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbx2_s8(simde_int8x8_t a, simde_int8x8x2_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx2_s8(a, b, c); + #else + simde_uint8x8x2_t b_; + simde_memcpy(&b_, &b, sizeof(b_)); + return simde_vreinterpret_s8_u8(simde_vtbx2_u8(simde_vreinterpret_u8_s8(a), + b_, + simde_vreinterpret_u8_s8(c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx2_s8 + #define vtbx2_s8(a, b, c) simde_vtbx2_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbx3_u8(simde_uint8x8_t a, simde_uint8x8x3_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx3_u8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_[3] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]) }, + c_ = simde_uint8x8_to_private(c); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_set1_epi64(a_.m64); + __m128i c128 = _mm_set1_epi64(c_.m64); + c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(23))); + __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b_[1].m64, b_[0].m64), c128); + __m128i r128_2 = _mm_shuffle_epi8(_mm_set1_epi64(b_[2].m64), c128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_2, _mm_slli_epi32(c128, 3)); + r128 = _mm_blendv_epi8(r128, a128, c128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (c_.values[i] < 24) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx3_u8 + #define vtbx3_u8(a, b, c) simde_vtbx3_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbx3_s8(simde_int8x8_t a, simde_int8x8x3_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx3_s8(a, b, c); + #else + simde_uint8x8x3_t b_; + simde_memcpy(&b_, &b, sizeof(b_)); + return simde_vreinterpret_s8_u8(simde_vtbx3_u8(simde_vreinterpret_u8_s8(a), + b_, + simde_vreinterpret_u8_s8(c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx3_s8 + #define vtbx3_s8(a, b, c) simde_vtbx3_s8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtbx4_u8(simde_uint8x8_t a, simde_uint8x8x4_t b, simde_uint8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx4_u8(a, b, c); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_[4] = { simde_uint8x8_to_private(b.val[0]), simde_uint8x8_to_private(b.val[1]), simde_uint8x8_to_private(b.val[2]), simde_uint8x8_to_private(b.val[3]) }, + c_ = simde_uint8x8_to_private(c); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + __m128i a128 = _mm_set1_epi64(a_.m64); + __m128i c128 = _mm_set1_epi64(c_.m64); + c128 = _mm_or_si128(c128, _mm_cmpgt_epi8(c128, _mm_set1_epi8(31))); + __m128i r128_01 = _mm_shuffle_epi8(_mm_set_epi64(b_[1].m64, b_[0].m64), c128); + __m128i r128_23 = _mm_shuffle_epi8(_mm_set_epi64(b_[3].m64, b_[2].m64), c128); + __m128i r128 = _mm_blendv_epi8(r128_01, r128_23, _mm_slli_epi32(c128, 3)); + r128 = _mm_blendv_epi8(r128, a128, c128); + r_.m64 = _mm_movepi64_pi64(r128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = (c_.values[i] < 32) ? b_[c_.values[i] / 8].values[c_.values[i] & 7] : a_.values[i]; + } + #endif + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx4_u8 + #define vtbx4_u8(a, b, c) simde_vtbx4_u8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtbx4_s8(simde_int8x8_t a, simde_int8x8x4_t b, simde_int8x8_t c) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtbx4_s8(a, b, c); + #else + simde_uint8x8x4_t b_; + simde_memcpy(&b_, &b, sizeof(b_)); + return simde_vreinterpret_s8_u8(simde_vtbx4_u8(simde_vreinterpret_u8_s8(a), + b_, + simde_vreinterpret_u8_s8(c))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtbx4_s8 + #define vtbx4_s8(a, b, c) simde_vtbx4_s8((a), (b), (c)) +#endif + +#endif /* !defined(SIMDE_BUG_INTEL_857088) */ + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TBX_H) */ +/* :: End simde/arm/neon/tbx.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/trn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_TRN_H) && !defined(SIMDE_BUG_INTEL_857088) +#define SIMDE_ARM_NEON_TRN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/trn1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_TRN1_H) +#define SIMDE_ARM_NEON_TRN1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vtrn1_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_f32 + #define vtrn1_f32(a, b) simde_vtrn1_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtrn1_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_s8 + #define vtrn1_s8(a, b) simde_vtrn1_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vtrn1_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_s16 + #define vtrn1_s16(a, b) simde_vtrn1_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vtrn1_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_s32 + #define vtrn1_s32(a, b) simde_vtrn1_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtrn1_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_u8 + #define vtrn1_u8(a, b) simde_vtrn1_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vtrn1_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_u16 + #define vtrn1_u16(a, b) simde_vtrn1_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vtrn1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_u32 + #define vtrn1_u32(a, b) simde_vtrn1_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vtrn1q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_f32(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_f32 + #define vtrn1q_f32(a, b) simde_vtrn1q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vtrn1q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_f64(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_f64 + #define vtrn1q_f64(a, b) simde_vtrn1q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vtrn1q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_s8 + #define vtrn1q_s8(a, b) simde_vtrn1q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vtrn1q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_s16 + #define vtrn1q_s16(a, b) simde_vtrn1q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vtrn1q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_s32 + #define vtrn1q_s32(a, b) simde_vtrn1q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vtrn1q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_s64 + #define vtrn1q_s64(a, b) simde_vtrn1q_s64((a), (b)) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vtrn1q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_u8 + #define vtrn1q_u8(a, b) simde_vtrn1q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vtrn1q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_u16 + #define vtrn1q_u16(a, b) simde_vtrn1q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vtrn1q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_u32 + #define vtrn1q_u32(a, b) simde_vtrn1q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vtrn1q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn1q_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_u64 + #define vtrn1q_u64(a, b) simde_vtrn1q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TRN1_H) */ +/* :: End simde/arm/neon/trn1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/trn2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Sean Maher (Copyright owned by Google, LLC) + */ + +#if !defined(SIMDE_ARM_NEON_TRN2_H) +#define SIMDE_ARM_NEON_TRN2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_f32(a, b); + #else + simde_float32x2_private + r_, + a_ = simde_float32x2_to_private(a), + b_ = simde_float32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_f32 + #define vtrn2_f32(a, b) simde_vtrn2_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vtrn2_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_s8 + #define vtrn2_s8(a, b) simde_vtrn2_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vtrn2_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_s16 + #define vtrn2_s16(a, b) simde_vtrn2_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vtrn2_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_s32 + #define vtrn2_s32(a, b) simde_vtrn2_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vtrn2_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a), + b_ = simde_uint8x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_u8 + #define vtrn2_u8(a, b) simde_vtrn2_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vtrn2_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a), + b_ = simde_uint16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_u16 + #define vtrn2_u16(a, b) simde_vtrn2_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a), + b_ = simde_uint32x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_u32 + #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_f32(a, b); + #else + simde_float32x4_private + r_, + a_ = simde_float32x4_to_private(a), + b_ = simde_float32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_f32 + #define vtrn2q_f32(a, b) simde_vtrn2q_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vtrn2q_f64(simde_float64x2_t a, simde_float64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_f64(a, b); + #else + simde_float64x2_private + r_, + a_ = simde_float64x2_to_private(a), + b_ = simde_float64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_f64 + #define vtrn2q_f64(a, b) simde_vtrn2q_f64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vtrn2q_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_s8 + #define vtrn2q_s8(a, b) simde_vtrn2q_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vtrn2q_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_s16 + #define vtrn2q_s16(a, b) simde_vtrn2q_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vtrn2q_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_s32 + #define vtrn2q_s32(a, b) simde_vtrn2q_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vtrn2q_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_s64 + #define vtrn2q_s64(a, b) simde_vtrn2q_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vtrn2q_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a), + b_ = simde_uint8x16_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_u8 + #define vtrn2q_u8(a, b) simde_vtrn2q_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vtrn2q_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_u16 + #define vtrn2q_u16(a, b) simde_vtrn2q_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vtrn2q_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_u32 + #define vtrn2q_u32(a, b) simde_vtrn2q_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vtrn2q_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vtrn2q_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_u64 + #define vtrn2q_u64(a, b) simde_vtrn2q_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TRN2_H) */ +/* :: End simde/arm/neon/trn2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2x2_t +simde_vtrn_f32(simde_float32x2_t a, simde_float32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_f32(a, b); + #else + simde_float32x2x2_t r = { { simde_vtrn1_f32(a, b), simde_vtrn2_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_f32 + #define vtrn_f32(a, b) simde_vtrn_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8x2_t +simde_vtrn_s8(simde_int8x8_t a, simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_s8(a, b); + #else + simde_int8x8x2_t r = { { simde_vtrn1_s8(a, b), simde_vtrn2_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_s8 + #define vtrn_s8(a, b) simde_vtrn_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4x2_t +simde_vtrn_s16(simde_int16x4_t a, simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_s16(a, b); + #else + simde_int16x4x2_t r = { { simde_vtrn1_s16(a, b), simde_vtrn2_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_s16 + #define vtrn_s16(a, b) simde_vtrn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2x2_t +simde_vtrn_s32(simde_int32x2_t a, simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_s32(a, b); + #else + simde_int32x2x2_t r = { { simde_vtrn1_s32(a, b), simde_vtrn2_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_s32 + #define vtrn_s32(a, b) simde_vtrn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8x2_t +simde_vtrn_u8(simde_uint8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_u8(a, b); + #else + simde_uint8x8x2_t r = { { simde_vtrn1_u8(a, b), simde_vtrn2_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_u8 + #define vtrn_u8(a, b) simde_vtrn_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4x2_t +simde_vtrn_u16(simde_uint16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_u16(a, b); + #else + simde_uint16x4x2_t r = { { simde_vtrn1_u16(a, b), simde_vtrn2_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_u16 + #define vtrn_u16(a, b) simde_vtrn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2x2_t +simde_vtrn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrn_u32(a, b); + #else + simde_uint32x2x2_t r = { { simde_vtrn1_u32(a, b), simde_vtrn2_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_u32 + #define vtrn_u32(a, b) simde_vtrn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4x2_t +simde_vtrnq_f32(simde_float32x4_t a, simde_float32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_f32(a, b); + #else + simde_float32x4x2_t r = { { simde_vtrn1q_f32(a, b), simde_vtrn2q_f32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_f32 + #define vtrnq_f32(a, b) simde_vtrnq_f32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16x2_t +simde_vtrnq_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_s8(a, b); + #else + simde_int8x16x2_t r = { { simde_vtrn1q_s8(a, b), simde_vtrn2q_s8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_s8 + #define vtrnq_s8(a, b) simde_vtrnq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8x2_t +simde_vtrnq_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_s16(a, b); + #else + simde_int16x8x2_t r = { { simde_vtrn1q_s16(a, b), simde_vtrn2q_s16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_s16 + #define vtrnq_s16(a, b) simde_vtrnq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4x2_t +simde_vtrnq_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_s32(a, b); + #else + simde_int32x4x2_t r = { { simde_vtrn1q_s32(a, b), simde_vtrn2q_s32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_s32 + #define vtrnq_s32(a, b) simde_vtrnq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16x2_t +simde_vtrnq_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_u8(a, b); + #else + simde_uint8x16x2_t r = { { simde_vtrn1q_u8(a, b), simde_vtrn2q_u8(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_u8 + #define vtrnq_u8(a, b) simde_vtrnq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8x2_t +simde_vtrnq_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_u16(a, b); + #else + simde_uint16x8x2_t r = { { simde_vtrn1q_u16(a, b), simde_vtrn2q_u16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_u16 + #define vtrnq_u16(a, b) simde_vtrnq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4x2_t +simde_vtrnq_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vtrnq_u32(a, b); + #else + simde_uint32x4x2_t r = { { simde_vtrn1q_u32(a, b), simde_vtrn2q_u32(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_u32 + #define vtrnq_u32(a, b) simde_vtrnq_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_TRN_H) */ +/* :: End simde/arm/neon/trn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/uqadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_NEON_UQADD_H) +#define SIMDE_ARM_NEON_UQADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +// Workaround on ARM64 windows due to windows SDK bug +// https://developercommunity.visualstudio.com/t/In-arm64_neonh-vsqaddb_u8-vsqaddh_u16/10271747?sort=newest +#if (defined _MSC_VER) && (defined SIMDE_ARM_NEON_A64V8_NATIVE) +#undef vuqaddh_s16 +#define vuqaddh_s16(src1, src2) neon_suqadds16(__int16ToN16_v(src1), __uint16ToN16_v(src2)).n16_i16[0] +#undef vuqadds_s32 +#define vuqadds_s32(src1, src2) _CopyInt32FromFloat(neon_suqadds32(_CopyFloatFromInt32(src1), _CopyFloatFromUInt32(src2))) +#undef vuqaddd_s64 +#define vuqaddd_s64(src1, src2) neon_suqadds64(__int64ToN64_v(src1), __uint64ToN64_v(src2)).n64_i64[0] +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vuqaddb_s8(int8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) + return vuqaddb_s8(a, HEDLEY_STATIC_CAST(int8_t, b)); + #else + return vuqaddb_s8(a, b); + #endif + #else + int16_t r_ = HEDLEY_STATIC_CAST(int16_t, a) + HEDLEY_STATIC_CAST(int16_t, b); + return (r_ < INT8_MIN) ? INT8_MIN : ((r_ > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddb_s8 + #define vuqaddb_s8(a, b) simde_vuqaddb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vuqaddh_s16(int16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) + return vuqaddh_s16(a, HEDLEY_STATIC_CAST(int16_t, b)); + #else + return vuqaddh_s16(a, b); + #endif + #else + int32_t r_ = HEDLEY_STATIC_CAST(int32_t, a) + HEDLEY_STATIC_CAST(int32_t, b); + return (r_ < INT16_MIN) ? INT16_MIN : ((r_ > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddh_s16 + #define vuqaddh_s16(a, b) simde_vuqaddh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vuqadds_s32(int32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) + return vuqadds_s32(a, HEDLEY_STATIC_CAST(int32_t, b)); + #else + return vuqadds_s32(a, b); + #endif + #else + int64_t r_ = HEDLEY_STATIC_CAST(int64_t, a) + HEDLEY_STATIC_CAST(int64_t, b); + return (r_ < INT32_MIN) ? INT32_MIN : ((r_ > INT32_MAX) ? INT32_MAX : HEDLEY_STATIC_CAST(int32_t, r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadds_s32 + #define vuqadds_s32(a, b) simde_vuqadds_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vuqaddd_s64(int64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_GIT_4EC445B8) + return vuqaddd_s64(a, HEDLEY_STATIC_CAST(int64_t, b)); + #else + return vuqaddd_s64(a, b); + #endif + #else + /* TODO: I suspect there is room for improvement here. This is + * just the first thing that worked, and I don't feel like messing + * with it now. */ + int64_t r; + + if (a < 0) { + uint64_t na = HEDLEY_STATIC_CAST(uint64_t, -a); + if (na > b) { + uint64_t t = na - b; + r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) + 1)) ? INT64_MIN : -HEDLEY_STATIC_CAST(int64_t, t); + } else { + uint64_t t = b - na; + r = (t > (HEDLEY_STATIC_CAST(uint64_t, INT64_MAX) )) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, t); + } + } else { + uint64_t ua = HEDLEY_STATIC_CAST(uint64_t, a); + r = ((INT64_MAX - ua) < b) ? INT64_MAX : HEDLEY_STATIC_CAST(int64_t, ua + b); + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddd_s64 + #define vuqaddd_s64(a, b) simde_vuqaddd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vuqadd_s8(simde_int8x8_t a, simde_uint8x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + simde_uint8x8_private b_ = simde_uint8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s8 + #define vuqadd_s8(a, b) simde_vuqadd_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vuqadd_s16(simde_int16x4_t a, simde_uint16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + simde_uint16x4_private b_ = simde_uint16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s16 + #define vuqadd_s16(a, b) simde_vuqadd_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vuqadd_s32(simde_int32x2_t a, simde_uint32x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + simde_uint32x2_private b_ = simde_uint32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s32 + #define vuqadd_s32(a, b) simde_vuqadd_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vuqadd_s64(simde_int64x1_t a, simde_uint64x1_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqadd_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + simde_uint64x1_private b_ = simde_uint64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqadd_s64 + #define vuqadd_s64(a, b) simde_vuqadd_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vuqaddq_s8(simde_int8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + simde_uint8x16_private b_ = simde_uint8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s8 + #define vuqaddq_s8(a, b) simde_vuqaddq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vuqaddq_s16(simde_int16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + simde_uint16x8_private b_ = simde_uint16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s16 + #define vuqaddq_s16(a, b) simde_vuqaddq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vuqaddq_s32(simde_int32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + simde_uint32x4_private b_ = simde_uint32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqadds_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s32 + #define vuqaddq_s32(a, b) simde_vuqaddq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vuqaddq_s64(simde_int64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vuqaddq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + simde_uint64x2_private b_ = simde_uint64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vuqaddd_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuqaddq_s64 + #define vuqaddq_s64(a, b) simde_vuqaddq_s64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_UQADD_H) */ +/* :: End simde/arm/neon/uqadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/neon/xar.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Atharva Nimbalkar + */ + +#if !defined(SIMDE_ARM_NEON_XAR_H) +#define SIMDE_ARM_NEON_XAR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vxarq_u64(simde_uint64x2_t a, simde_uint64x2_t b, const int d) + SIMDE_REQUIRE_CONSTANT_RANGE(d, 0, 63) { + simde_uint64x2_private + r_, + t = simde_uint64x2_to_private(simde_veorq_u64(a,b)); + + SIMDE_VECTORIZE + for (size_t i=0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((t.values[i] >> d) | (t.values[i] << (64 - d))); + } + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_SHA3) + #define simde_vxarq_u64(a, b, d) vxarq_u64((a), (b), (d)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__ARM_FEATURE_SHA3)) + #undef vxarq_u64 + #define vxarq_u64(a, b, d) simde_vxarq_u64((a), (b), (d)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_XAR_H) */ +/* :: End simde/arm/neon/xar.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#endif /* SIMDE_ARM_NEON_H */ +/* :: End simde/arm/neon.h :: */ diff --git a/include/simde/arm/sve.h b/include/simde/arm/sve.h new file mode 100644 index 00000000..ed39a90d --- /dev/null +++ b/include/simde/arm/sve.h @@ -0,0 +1,18581 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_H) +#define SIMDE_ARM_SVE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* TODO: SVE2 is going to be a bit awkward with this setup. We currently + * either use SVE vectors or assume that the vector length is known at + * compile-time. For CPUs which provide SVE but not SVE2 we're going + * to be getting scalable vectors, so we may need to loop through them. + * + * Currently I'm thinking we'll have a separate function for non-SVE + * types. We can call that function in a loop from an SVE version, + * and we can call it once from a resolver. + * + * Unfortunately this is going to mean a lot of boilerplate for SVE, + * which already has several variants of a lot of functions (*_z, *_m, + * etc.), plus overloaded functions in C++ and generic selectors in C. + * + * Anyways, all this means that we're going to need to always define + * the portable types. + * + * The good news is that at least we don't have to deal with + * to/from_private functions; since the no-SVE versions will only be + * called with non-SVE params. */ + +#if !defined(SIMDE_ARM_SVE_TYPES_H) +#define SIMDE_ARM_SVE_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-f16.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_FLOAT16_H) +#define SIMDE_FLOAT16_H + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* Portable version which should work on pretty much any compiler. + * Obviously you can't rely on compiler support for things like + * conversion to/from 32-bit floats, so make sure you always use the + * functions and macros in this file! + * + * The portable implementations are (heavily) based on CC0 code by + * Fabian Giesen: (see also + * ). + * I have basically just modified it to get rid of some UB (lots of + * aliasing, right shifting a negative value), use fixed-width types, + * and work in C. */ +#define SIMDE_FLOAT16_API_PORTABLE 1 +/* _Float16, per C standard (TS 18661-3; + * ). */ +#define SIMDE_FLOAT16_API_FLOAT16 2 +/* clang >= 6.0 supports __fp16 as an interchange format on all + * targets, but only allows you to use them for arguments and return + * values on targets which have defined an ABI. We get around the + * restriction by wrapping the __fp16 in a struct, but we can't do + * that on Arm since it would break compatibility with the NEON F16 + * functions. */ +#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 +/* This is basically __fp16 as specified by Arm, where arugments and + * return values are raw __fp16 values not structs. */ +#define SIMDE_FLOAT16_API_FP16 4 + +/* Choosing an implementation. This is a bit rough, but I don't have + * any ideas on how to improve it. If you do, patches are definitely + * welcome. */ +#if !defined(SIMDE_FLOAT16_API) + #if 0 && !defined(__cplusplus) + /* I haven't found a way to detect this. It seems like defining + * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then + * checking for defined(FLT16_MAX) should work, but both gcc and + * clang will define the constants even if _Float16 is not + * supported. Ideas welcome. */ + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 + #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(SIMDE_ARM_NEON_FP16) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 + #elif defined(__FLT16_MIN__) && (defined(__clang__) && (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI + #else + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE + #endif +#endif + +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 + typedef _Float16 simde_float16; + #define SIMDE_FLOAT16_C(value) value##f16 +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI + typedef struct { __fp16 value; } simde_float16; + #if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) + #else + #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) + #endif +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 + typedef __fp16 simde_float16; + #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE + typedef struct { uint16_t value; } simde_float16; +#else + #error No 16-bit floating point API. +#endif + +#if \ + defined(SIMDE_VECTOR_OPS) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) + #define SIMDE_FLOAT16_VECTOR +#endif + +/* Reinterpret -- you *generally* shouldn't need these, they're really + * intended for internal use. However, on x86 half-precision floats + * get stuffed into a __m128i/__m256i, so it may be useful. */ + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) + +#define SIMDE_NANHF simde_uint16_as_float16(0x7E00) +#define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) + +/* Conversion -- convert between single-precision and half-precision + * floats. */ + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float16 +simde_float16_from_float32 (simde_float32 value) { + simde_float16 res; + + #if \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) + res = HEDLEY_STATIC_CAST(simde_float16, value); + #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) + res.value = HEDLEY_STATIC_CAST(__fp16, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint32_t f32u = simde_float32_as_uint32(value); + static const uint32_t f32u_infty = UINT32_C(255) << 23; + static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; + static const uint32_t denorm_magic = + ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; + uint16_t f16u; + + uint32_t sign = f32u & (UINT32_C(1) << 31); + f32u ^= sign; + + /* NOTE all the integer compares in this function cast the operands + * to signed values to help compilers vectorize to SSE2, which lacks + * unsigned comparison instructions. This is fine since all + * operands are below 0x80000000 (we clear the sign bit). */ + + if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ + f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ + } else { /* (De)normalized number or zero */ + if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ + /* use a magic value to align our 10 mantissa bits at the bottom of + * the float. as long as FP addition is round-to-nearest-even this + * just works. */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); + + /* and one integer subtract of the bias later, we have our final float! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); + } else { + uint32_t mant_odd = (f32u >> 13) & 1; + + /* update exponent, rounding bias part 1 */ + f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); + /* rounding bias part 2 */ + f32u += mant_odd; + /* take the bits! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); + } + } + + f16u |= sign >> 16; + res = simde_uint16_as_float16(f16u); + #endif + + return res; +} + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float32 +simde_float16_to_float32 (simde_float16 value) { + simde_float32 res; + + #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) + res = HEDLEY_STATIC_CAST(simde_float32, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint16_t half = simde_float16_as_uint16(value); + const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); + const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ + uint32_t f32u; + + f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ + uint32_t exp = shifted_exp & f32u; /* just the exponent */ + f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ + + /* handle exponent special cases */ + if (exp == shifted_exp) /* Inf/NaN? */ + f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ + else if (exp == 0) { /* Zero/Denormal? */ + f32u += (1) << 23; /* extra exp adjust */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ + } + + f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ + res = simde_uint32_as_float32(f32u); + #endif + + return res; +} + +#ifdef SIMDE_FLOAT16_C + #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) +#else + #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_FLOAT16_H) */ +/* :: End simde/simde-f16.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_VECTOR_SUBSCRIPT) + #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) +#else + #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] +#endif + +#if defined(SIMDE_ARM_SVE_NATIVE) + typedef svbool_t simde_svbool_t; + typedef svint8_t simde_svint8_t; + typedef svint16_t simde_svint16_t; + typedef svint32_t simde_svint32_t; + typedef svint64_t simde_svint64_t; + typedef svuint8_t simde_svuint8_t; + typedef svuint16_t simde_svuint16_t; + typedef svuint32_t simde_svuint32_t; + typedef svuint64_t simde_svuint64_t; + #if defined(__ARM_FEATURE_SVE_BF16) + typedef svbfloat16_t simde_svbfloat16_t; + #endif + typedef svfloat16_t simde_svfloat16_t; + typedef svfloat32_t simde_svfloat32_t; + typedef svfloat64_t simde_svfloat64_t; + typedef float32_t simde_float32_t; + typedef float64_t simde_float64_t; +#else + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + #define SIMDE_ARM_SVE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #else + #define SIMDE_ARM_SVE_VECTOR_SIZE (128) + #endif + + typedef simde_float32 simde_float32_t; + typedef simde_float64 simde_float64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint8_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint8_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + float16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svbfloat16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float32, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512 m512; + #endif + #if defined(SIMDE_X86_AVX_NATIVE) + __m256 m256[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256)]; + #endif + #if defined(SIMDE_X86_SSE_NATIVE) + __m128 m128[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(float) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float64, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512d m512d; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256d m256d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256d)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128d m128d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128d)]; + #endif + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(double) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat64_t; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + typedef struct { + __mmask64 value; + int type; + } simde_svbool_t; + + #if defined(__BMI2__) + static const uint64_t simde_arm_sve_mask_bp_lo_ = UINT64_C(0x5555555555555555); + static const uint64_t simde_arm_sve_mask_bp_hi_ = UINT64_C(0xaaaaaaaaaaaaaaaa); + + SIMDE_FUNCTION_ATTRIBUTES + __mmask64 + simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { + return HEDLEY_STATIC_CAST(__mmask64, + _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_lo_) | + _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_hi_)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { + return HEDLEY_STATIC_CAST(__mmask32, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask16, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { + return HEDLEY_STATIC_CAST(__mmask32, + _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { + return HEDLEY_STATIC_CAST(__mmask16, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + #else + SIMDE_FUNCTION_ATTRIBUTES + __mmask64 + simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { + uint64_t e = HEDLEY_STATIC_CAST(uint64_t, m); + uint64_t o = HEDLEY_STATIC_CAST(uint64_t, m); + + e = (e | (e << 16)) & UINT64_C(0x0000ffff0000ffff); + e = (e | (e << 8)) & UINT64_C(0x00ff00ff00ff00ff); + e = (e | (e << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + e = (e | (e << 2)) & UINT64_C(0x3333333333333333); + e = (e | (e << 1)) & UINT64_C(0x5555555555555555); + + o = (o | (o << 16)) & UINT64_C(0x0000ffff0000ffff); + o = (o | (o << 8)) & UINT64_C(0x00ff00ff00ff00ff); + o = (o | (o << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + o = (o | (o << 2)) & UINT64_C(0x3333333333333333); + o = (o | (o << 1)) & UINT64_C(0x5555555555555555); + + return HEDLEY_STATIC_CAST(__mmask64, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { + uint32_t e = HEDLEY_STATIC_CAST(uint32_t, m); + uint32_t o = HEDLEY_STATIC_CAST(uint32_t, m); + + e = (e | (e << 8)) & UINT32_C(0x00FF00FF); + e = (e | (e << 4)) & UINT32_C(0x0F0F0F0F); + e = (e | (e << 2)) & UINT32_C(0x33333333); + e = (e | (e << 1)) & UINT32_C(0x55555555); + + o = (o | (o << 8)) & UINT32_C(0x00FF00FF); + o = (o | (o << 4)) & UINT32_C(0x0F0F0F0F); + o = (o | (o << 2)) & UINT32_C(0x33333333); + o = (o | (o << 1)) & UINT32_C(0x55555555); + + return HEDLEY_STATIC_CAST(__mmask32, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { + uint16_t e = HEDLEY_STATIC_CAST(uint16_t, m); + uint16_t o = HEDLEY_STATIC_CAST(uint16_t, m); + + e = (e | (e << 4)) & UINT16_C(0x0f0f); + e = (e | (e << 2)) & UINT16_C(0x3333); + e = (e | (e << 1)) & UINT16_C(0x5555); + + o = (o | (o << 4)) & UINT16_C(0x0f0f); + o = (o | (o << 2)) & UINT16_C(0x3333); + o = (o | (o << 1)) & UINT16_C(0x5555); + + return HEDLEY_STATIC_CAST(uint16_t, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { + uint8_t e = HEDLEY_STATIC_CAST(uint8_t, m); + uint8_t o = HEDLEY_STATIC_CAST(uint8_t, m); + + e = (e | (e << 2)) & UINT8_C(0x33); + e = (e | (e << 1)) & UINT8_C(0x55); + + o = (o | (o << 2)) & UINT8_C(0x33); + o = (o | (o << 1)) & UINT8_C(0x55); + + return HEDLEY_STATIC_CAST(uint8_t, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { + uint64_t l = (HEDLEY_STATIC_CAST(uint64_t, m) ) & UINT64_C(0x5555555555555555); + l = (l | (l >> 1)) & UINT64_C(0x3333333333333333); + l = (l | (l >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + l = (l | (l >> 4)) & UINT64_C(0x00ff00ff00ff00ff); + l = (l | (l >> 8)) & UINT64_C(0x0000ffff0000ffff); + + uint64_t h = (HEDLEY_STATIC_CAST(uint64_t, m) >> 1) & UINT64_C(0x5555555555555555); + h = (h | (h >> 1)) & UINT64_C(0x3333333333333333); + h = (h | (h >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + h = (h | (h >> 4)) & UINT64_C(0x00ff00ff00ff00ff); + h = (h | (h >> 8)) & UINT64_C(0x0000ffff0000ffff); + + return HEDLEY_STATIC_CAST(uint32_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { + uint32_t l = (HEDLEY_STATIC_CAST(uint32_t, m) ) & UINT32_C(0x55555555); + l = (l | (l >> 1)) & UINT32_C(0x33333333); + l = (l | (l >> 2)) & UINT32_C(0x0f0f0f0f); + l = (l | (l >> 4)) & UINT32_C(0x00ff00ff); + l = (l | (l >> 8)) & UINT32_C(0x0000ffff); + + uint32_t h = (HEDLEY_STATIC_CAST(uint32_t, m) >> 1) & UINT32_C(0x55555555); + h = (h | (h >> 1)) & UINT32_C(0x33333333); + h = (h | (h >> 2)) & UINT32_C(0x0f0f0f0f); + h = (h | (h >> 4)) & UINT32_C(0x00ff00ff); + h = (h | (h >> 8)) & UINT32_C(0x0000ffff); + + return HEDLEY_STATIC_CAST(uint16_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { + uint16_t l = (HEDLEY_STATIC_CAST(uint16_t, m) ) & UINT16_C(0x5555); + l = (l | (l >> 1)) & UINT16_C(0x3333); + l = (l | (l >> 2)) & UINT16_C(0x0f0f); + l = (l | (l >> 4)) & UINT16_C(0x00ff); + + uint16_t h = (HEDLEY_STATIC_CAST(uint16_t, m) >> 1) & UINT16_C(0x5555); + h = (h | (h >> 1)) & UINT16_C(0x3333); + h = (h | (h >> 2)) & UINT16_C(0x0f0f); + h = (h | (h >> 4)) & UINT16_C(0x00ff); + + return HEDLEY_STATIC_CAST(uint8_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { + uint8_t l = (HEDLEY_STATIC_CAST(uint8_t, m) ) & UINT8_C(0x55); + l = (l | (l >> 1)) & UINT8_C(0x33); + l = (l | (l >> 2)) & UINT8_C(0x0f); + l = (l | (l >> 4)) & UINT8_C(0xff); + + uint8_t h = (HEDLEY_STATIC_CAST(uint8_t, m) >> 1) & UINT8_C(0x55); + h = (h | (h >> 1)) & UINT8_C(0x33); + h = (h | (h >> 2)) & UINT8_C(0x0f); + h = (h | (h >> 4)) & UINT8_C(0xff); + + return HEDLEY_STATIC_CAST(uint8_t, l & h); + } + #endif + + typedef enum { + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8, + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4, + #endif + } simde_svbool_mmask_type; + + HEDLEY_CONST HEDLEY_ALWAYS_INLINE + simde_svbool_t + simde_svbool_from_mmask64(__mmask64 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask32(__mmask32 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask16(__mmask16 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask8(__mmask8 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8; + + return b; + } + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask4(__mmask8 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask8 + simde_svbool_to_mmask4(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask4(HEDLEY_STATIC_CAST(__mmask8, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask8, tmp); + } + #endif + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask8 + simde_svbool_to_mmask8(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + #endif + } + + return HEDLEY_STATIC_CAST(__mmask8, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask16 + simde_svbool_to_mmask16(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask16, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask32 + simde_svbool_to_mmask32(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask32, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask64 + simde_svbool_to_mmask64(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask64(HEDLEY_STATIC_CAST(__mmask32, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask64, tmp); + } + + /* TODO: we're going to need need svbool_to/from_svint* functions + * for when we can't implement a function using AVX-512. */ + #else + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR( int8_t, values_i8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int16_t, values_i16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int32_t, values_i32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int64_t, values_i64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( uint8_t, values_u8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values_u16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values_u32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values_u64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon_i8; + int16x8_t neon_i16; + int32x4_t neon_i32; + int64x2_t neon_i64; + uint8x16_t neon_u8; + uint16x8_t neon_u16; + uint32x4_t neon_u32; + uint64x2_t neon_u64; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) altivec_b8; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) altivec_b16; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) altivec_b32; + #endif + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) altivec_b64; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svbool_t; + + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint8, simde_svint8_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_from_svint8, simde_svbool_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint16, simde_svint16_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint16, simde_svbool_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint32, simde_svint32_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint32, simde_svbool_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint64, simde_svint64_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint64, simde_svbool_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint8, simde_svuint8_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint8, simde_svbool_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint16, simde_svuint16_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint16, simde_svbool_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint32, simde_svuint32_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint32, simde_svbool_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint64, simde_svuint64_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint64, simde_svbool_t, simde_svuint64_t) + #endif + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + typedef simde_svbool_t svbool_t; + typedef simde_svint8_t svint8_t; + typedef simde_svint16_t svint16_t; + typedef simde_svint32_t svint32_t; + typedef simde_svint64_t svint64_t; + typedef simde_svuint8_t svuint8_t; + typedef simde_svuint16_t svuint16_t; + typedef simde_svuint32_t svuint32_t; + typedef simde_svuint64_t svuint64_t; + typedef simde_svfloat16_t svfloat16_t; + typedef simde_svbfloat16_t svbfloat16_t; + typedef simde_svfloat32_t svfloat32_t; + typedef simde_svfloat64_t svfloat64_t; + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) + #define SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX z +#endif +#define SIMDE_ARM_SVE_UNDEFINED_SYMBOL(name) HEDLEY_CONCAT3(name, _, SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +/* These are going to be used pretty much everywhere since they are + * used to create the loops SVE requires. Since we want to support + * only including the files you need instead of just using sve.h, + * it's helpful to pull these in here. While this file is called + * arm/sve/types.h, it might be better to think of it more as + * arm/sve/common.h. */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/cnt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_CNT_H) +#define SIMDE_ARM_SVE_CNT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_svcntb(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcntb(); + #else + return sizeof(simde_svint8_t) / sizeof(int8_t); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcntb + #define svcntb() simde_svcntb() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_svcnth(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcnth(); + #else + return sizeof(simde_svint16_t) / sizeof(int16_t); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcnth + #define svcnth() simde_svcnth() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_svcntw(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcntw(); + #else + return sizeof(simde_svint32_t) / sizeof(int32_t); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcntw + #define svcntw() simde_svcntw() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_svcntd(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcntd(); + #else + return sizeof(simde_svint64_t) / sizeof(int64_t); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcntd + #define svcntd() simde_svcntd() +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_CNT_H */ +/* :: End simde/arm/sve/cnt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/ld1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* Note: we don't have vector implementations for most of these because + * we can't just load everything and mask out the uninteresting bits; + * that might cause a fault, for example if the end of the buffer buts + * up against a protected page. + * + * One thing we might be able to do would be to check if the predicate + * is all ones and, if so, use an unpredicated load instruction. This + * would probably we worthwhile for smaller types, though perhaps not + * for larger types since it would mean branching for every load plus + * the overhead of checking whether all bits are 1. */ + +#if !defined(SIMDE_ARM_SVE_LD1_H) +#define SIMDE_ARM_SVE_LD1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svld1_s8(simde_svbool_t pg, const int8_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_s8(pg, base); + #else + simde_svint8_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi8(simde_svbool_to_mmask64(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi8(simde_svbool_to_mmask32(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = pg.values_i8[i] ? base[i] : INT8_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_s8 + #define svld1_s8(pg, base) simde_svld1_s8((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svld1_s16(simde_svbool_t pg, const int16_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_s16(pg, base); + #else + simde_svint16_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi16(simde_svbool_to_mmask32(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi16(simde_svbool_to_mmask16(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = pg.values_i16[i] ? base[i] : INT16_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_s16 + #define svld1_s16(pg, base) simde_svld1_s16((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svld1_s32(simde_svbool_t pg, const int32_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_s32(pg, base); + #else + simde_svint32_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi32(simde_svbool_to_mmask16(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi32(simde_svbool_to_mmask8(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = pg.values_i32[i] ? base[i] : INT32_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_s32 + #define svld1_s32(pg, base) simde_svld1_s32((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svld1_s64(simde_svbool_t pg, const int64_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_s64(pg, base); + #else + simde_svint64_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi64(simde_svbool_to_mmask8(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi64(simde_svbool_to_mmask4(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = pg.values_i64[i] ? base[i] : INT64_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_s64 + #define svld1_s64(pg, base) simde_svld1_s64((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svld1_u8(simde_svbool_t pg, const uint8_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_u8(pg, base); + #else + simde_svuint8_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi8(simde_svbool_to_mmask64(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi8(simde_svbool_to_mmask32(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = pg.values_i8[i] ? base[i] : UINT8_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_u8 + #define svld1_u8(pg, base) simde_svld1_u8((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svld1_u16(simde_svbool_t pg, const uint16_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_u16(pg, base); + #else + simde_svuint16_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi16(simde_svbool_to_mmask32(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi16(simde_svbool_to_mmask16(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = pg.values_i16[i] ? base[i] : UINT16_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_u16 + #define svld1_u16(pg, base) simde_svld1_u16((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svld1_u32(simde_svbool_t pg, const uint32_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_u32(pg, base); + #else + simde_svuint32_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi32(simde_svbool_to_mmask16(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi32(simde_svbool_to_mmask8(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = pg.values_i32[i] ? base[i] : UINT32_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_u32 + #define svld1_u32(pg, base) simde_svld1_u32((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svld1_u64(simde_svbool_t pg, const uint64_t * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_u64(pg, base); + #else + simde_svuint64_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_loadu_epi64(simde_svbool_to_mmask8(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_loadu_epi64(simde_svbool_to_mmask4(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = pg.values_i64[i] ? base[i] : UINT64_C(0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_u64 + #define svld1_u64(pg, base) simde_svld1_u64((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svld1_f32(simde_svbool_t pg, const simde_float32 * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_f32(pg, base); + #else + simde_svfloat32_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512 = _mm512_maskz_loadu_ps(simde_svbool_to_mmask16(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256[0] = _mm256_maskz_loadu_ps(simde_svbool_to_mmask8(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = pg.values_i32[i] ? base[i] : SIMDE_FLOAT32_C(0.0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_f32 + #define svld1_f32(pg, base) simde_svld1_f32((pg), (base)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svld1_f64(simde_svbool_t pg, const simde_float64 * base) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svld1_f64(pg, base); + #else + simde_svfloat64_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512d = _mm512_maskz_loadu_pd(simde_svbool_to_mmask8(pg), base); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256d[0] = _mm256_maskz_loadu_pd(simde_svbool_to_mmask4(pg), base); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = pg.values_i64[i] ? base[i] : SIMDE_FLOAT64_C(0.0); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svld1_f64 + #define svld1_f64(pg, base) simde_svld1_f64((pg), (base)) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svld1(simde_svbool_t pg, const int8_t * base) { return simde_svld1_s8 (pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svld1(simde_svbool_t pg, const int16_t * base) { return simde_svld1_s16(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svld1(simde_svbool_t pg, const int32_t * base) { return simde_svld1_s32(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svld1(simde_svbool_t pg, const int64_t * base) { return simde_svld1_s64(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svld1(simde_svbool_t pg, const uint8_t * base) { return simde_svld1_u8 (pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svld1(simde_svbool_t pg, const uint16_t * base) { return simde_svld1_u16(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svld1(simde_svbool_t pg, const uint32_t * base) { return simde_svld1_u32(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svld1(simde_svbool_t pg, const uint64_t * base) { return simde_svld1_u64(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svld1(simde_svbool_t pg, const simde_float32 * base) { return simde_svld1_f32(pg, base); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svld1(simde_svbool_t pg, const simde_float64 * base) { return simde_svld1_f64(pg, base); } +#elif defined(SIMDE_GENERIC_) + #define simde_svld1(pg, base) \ + (SIMDE_GENERIC_((base), \ + const int8_t *: simde_svld1_s8 , \ + const int16_t *: simde_svld1_s16, \ + const int32_t *: simde_svld1_s32, \ + const int64_t *: simde_svld1_s64, \ + const uint8_t *: simde_svld1_u8 , \ + const uint16_t *: simde_svld1_u16, \ + const uint32_t *: simde_svld1_u32, \ + const uint64_t *: simde_svld1_u64, \ + const simde_float32 *: simde_svld1_f32, \ + const simde_float64 *: simde_svld1_f64)(pg, base)) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svld1 + #define svld1(pg, base) simde_svld1((pg), (base)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_LD1_H */ +/* :: End simde/arm/sve/ld1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/ptest.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_PTEST_H) +#define SIMDE_ARM_SVE_PTEST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_svptest_first(simde_svbool_t pg, simde_svbool_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svptest_first(pg, op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_LIKELY(pg.value & 1)) + return op.value & 1; + + if (pg.value == 0 || op.value == 0) + return 0; + + #if defined(_MSC_VER) + unsigned long r = 0; + _BitScanForward64(&r, HEDLEY_STATIC_CAST(uint64_t, pg.value)); + return (op.value >> r) & 1; + #else + return (op.value >> __builtin_ctzll(HEDLEY_STATIC_CAST(unsigned long long, pg.value))) & 1; + #endif + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + if (pg.values_i8[i]) { + return !!op.values_i8[i]; + } + } + + return 0; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svptest_first + #define svptest_first(pg, op) simde_svptest_first(pg, op) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_PTEST_H */ +/* :: End simde/arm/sve/ptest.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/ptrue.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_PTRUE_H) +#define SIMDE_ARM_SVE_PTRUE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svptrue_b8(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svptrue_b8(); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svbool_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r = simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, ~UINT64_C(0))); + #else + r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0))); + #endif + + return r; + #else + simde_svint8_t r; + + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = ~INT8_C(0); + } + + return simde_svbool_from_svint8(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svptrue_b8 + #define svptrue_b8() simde_svptrue_b8() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svptrue_b16(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svptrue_b16(); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svbool_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0))); + #else + r = simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0))); + #endif + + return r; + #else + simde_svint16_t r; + + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = ~INT16_C(0); + } + + return simde_svbool_from_svint16(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svptrue_b16 + #define svptrue_b16() simde_svptrue_b16() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svptrue_b32(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svptrue_b32(); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svbool_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r = simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0))); + #else + r = simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); + #endif + + return r; + #else + simde_svint32_t r; + + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = ~INT32_C(0); + } + + return simde_svbool_from_svint32(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svptrue_b32 + #define svptrue_b32() simde_svptrue_b32() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svptrue_b64(void) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svptrue_b64(); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svbool_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r = simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); + #else + r = simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0))); + #endif + + return r; + #else + simde_svint64_t r; + + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = ~INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svptrue_b64 + #define svptrue_b64() simde_svptrue_b64() +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_PTRUE_H */ +/* :: End simde/arm/sve/ptrue.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/st1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_ST1_H) +#define SIMDE_ARM_SVE_ST1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_s8(simde_svbool_t pg, int8_t * base, simde_svint8_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_s8(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi8(base, simde_svbool_to_mmask64(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi8(base, simde_svbool_to_mmask32(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + if (pg.values_i8[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_s8 + #define svst1_s8(pg, base, data) simde_svst1_s8((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_s16(simde_svbool_t pg, int16_t * base, simde_svint16_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_s16(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi16(base, simde_svbool_to_mmask32(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi16(base, simde_svbool_to_mmask16(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + if (pg.values_i16[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_s16 + #define svst1_s16(pg, base, data) simde_svst1_s16((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_s32(simde_svbool_t pg, int32_t * base, simde_svint32_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_s32(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi32(base, simde_svbool_to_mmask16(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi32(base, simde_svbool_to_mmask8(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + if (pg.values_i32[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_s32 + #define svst1_s32(pg, base, data) simde_svst1_s32((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_s64(simde_svbool_t pg, int64_t * base, simde_svint64_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_s64(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi64(base, simde_svbool_to_mmask8(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi64(base, simde_svbool_to_mmask4(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + if (pg.values_i64[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_s64 + #define svst1_s64(pg, base, data) simde_svst1_s64((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_u8(simde_svbool_t pg, uint8_t * base, simde_svuint8_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_u8(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi8(base, simde_svbool_to_mmask64(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi8(base, simde_svbool_to_mmask32(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + if (pg.values_u8[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_u8 + #define svst1_u8(pg, base, data) simde_svst1_u8((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_u16(simde_svbool_t pg, uint16_t * base, simde_svuint16_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_u16(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi16(base, simde_svbool_to_mmask32(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi16(base, simde_svbool_to_mmask16(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + if (pg.values_u16[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_u16 + #define svst1_u16(pg, base, data) simde_svst1_u16((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_u32(simde_svbool_t pg, uint32_t * base, simde_svuint32_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_u32(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi32(base, simde_svbool_to_mmask16(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi32(base, simde_svbool_to_mmask8(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + if (pg.values_u32[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_u32 + #define svst1_u32(pg, base, data) simde_svst1_u32((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_u64(simde_svbool_t pg, uint64_t * base, simde_svuint64_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_u64(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_epi64(base, simde_svbool_to_mmask8(pg), data.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_epi64(base, simde_svbool_to_mmask4(pg), data.m256i[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + if (pg.values_u64[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_u64 + #define svst1_u64(pg, base, data) simde_svst1_u64((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_f32(simde_svbool_t pg, simde_float32 * base, simde_svfloat32_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_f32(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_ps(base, simde_svbool_to_mmask16(pg), data.m512); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_ps(base, simde_svbool_to_mmask8(pg), data.m256[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + if (pg.values_i32[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_f32 + #define svst1_f32(pg, base, data) simde_svst1_f32((pg), (base), (data)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_svst1_f64(simde_svbool_t pg, simde_float64 * base, simde_svfloat64_t data) { + #if defined(SIMDE_ARM_SVE_NATIVE) + svst1_f64(pg, base, data); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm512_mask_storeu_pd(base, simde_svbool_to_mmask8(pg), data.m512d); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + _mm256_mask_storeu_pd(base, simde_svbool_to_mmask4(pg), data.m256d[0]); + #else + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + if (pg.values_i64[i]) { + base[i] = data.values[i]; + } + } + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svst1_f64 + #define svst1_f64(pg, base, data) simde_svst1_f64((pg), (base), (data)) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int8_t * base, simde_svint8_t data) { simde_svst1_s8 (pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int16_t * base, simde_svint16_t data) { simde_svst1_s16(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int32_t * base, simde_svint32_t data) { simde_svst1_s32(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, int64_t * base, simde_svint64_t data) { simde_svst1_s64(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint8_t * base, simde_svuint8_t data) { simde_svst1_u8 (pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint16_t * base, simde_svuint16_t data) { simde_svst1_u16(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint32_t * base, simde_svuint32_t data) { simde_svst1_u32(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, uint64_t * base, simde_svuint64_t data) { simde_svst1_u64(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, simde_float32 * base, simde_svfloat32_t data) { simde_svst1_f32(pg, base, data); } + SIMDE_FUNCTION_ATTRIBUTES void simde_svst1(simde_svbool_t pg, simde_float64 * base, simde_svfloat64_t data) { simde_svst1_f64(pg, base, data); } +#elif defined(SIMDE_GENERIC_) + #define simde_svst1(pg, base, data) \ + (SIMDE_GENERIC_((data), \ + simde_svint8_t: simde_svst1_s8 , \ + simde_svint16_t: simde_svst1_s16, \ + simde_svint32_t: simde_svst1_s32, \ + simde_svint64_t: simde_svst1_s64, \ + simde_svuint8_t: simde_svst1_u8 , \ + simde_svuint16_t: simde_svst1_u16, \ + simde_svuint32_t: simde_svst1_u32, \ + simde_svuint64_t: simde_svst1_u64, \ + simde_svfloat32_t: simde_svst1_f32, \ + simde_svfloat64_t: simde_svst1_f64)((pg), (base), (data))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svst1 + #define svst1(pg, base, data) simde_svst1((pg), (base), (data)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_ST1_H */ +/* :: End simde/arm/sve/st1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/whilelt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_WHILELT_H) +#define SIMDE_ARM_SVE_WHILELT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b8_s32(int32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b8_s32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); + if (HEDLEY_UNLIKELY(remaining < 64)) { + r >>= 64 - remaining; + } + + return simde_svbool_from_mmask64(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #else + simde_svint8_t r; + + int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); + } + + return simde_svbool_from_svint8(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b8_s32 + #define svwhilelt_b8_s32(op1, op2) simde_svwhilelt_b8_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b16_s32(int32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b16_s32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #else + simde_svint16_t r; + + int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); + } + + return simde_svbool_from_svint16(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b16_s32 + #define svwhilelt_b16_s32(op1, op2) simde_svwhilelt_b16_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b32_s32(int32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b32_s32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #else + simde_svint32_t r; + + int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT32_C(0) : INT32_C(0); + } + + return simde_svbool_from_svint32(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b32_s32 + #define svwhilelt_b32_s32(op1, op2) simde_svwhilelt_b32_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b64_s32(int32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b64_s32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast32_t remaining = (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); + if (HEDLEY_UNLIKELY(remaining < 4)) { + r >>= 4 - remaining; + } + + return simde_svbool_from_mmask4(r); + #else + simde_svint64_t r; + + int_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast32_t, op2) - HEDLEY_STATIC_CAST(int_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b64_s32 + #define svwhilelt_b64_s32(op1, op2) simde_svwhilelt_b64_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b8_s64(int64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b8_s64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); + if (HEDLEY_UNLIKELY(remaining < 64)) { + r >>= 64 - remaining; + } + + return simde_svbool_from_mmask64(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #else + simde_svint8_t r; + + int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); + } + + return simde_svbool_from_svint8(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b8_s64 + #define svwhilelt_b8_s64(op1, op2) simde_svwhilelt_b8_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b16_s64(int64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b16_s64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #else + simde_svint16_t r; + + int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); + } + + return simde_svbool_from_svint16(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b16_s64 + #define svwhilelt_b16_s64(op1, op2) simde_svwhilelt_b16_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b32_s64(int64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b32_s64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #else + simde_svint64_t r; + + int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b32_s64 + #define svwhilelt_b32_s64(op1, op2) simde_svwhilelt_b32_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b64_s64(int64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b64_s64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); + + int_fast64_t remaining = (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); + if (HEDLEY_UNLIKELY(remaining < 4)) { + r >>= 4 - remaining; + } + + return simde_svbool_from_mmask4(r); + #else + simde_svint64_t r; + + int_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(int_fast64_t, op2) - HEDLEY_STATIC_CAST(int_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b64_s64 + #define svwhilelt_b64_s64(op1, op2) simde_svwhilelt_b64_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b8_u32(uint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b8_u32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); + if (HEDLEY_UNLIKELY(remaining < 64)) { + r >>= 64 - remaining; + } + + return simde_svbool_from_mmask64(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #else + simde_svint8_t r; + + uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); + } + + return simde_svbool_from_svint8(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b8_u32 + #define svwhilelt_b8_u32(op1, op2) simde_svwhilelt_b8_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b16_u32(uint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b16_u32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #else + simde_svint16_t r; + + uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); + } + + return simde_svbool_from_svint16(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b16_u32 + #define svwhilelt_b16_u32(op1, op2) simde_svwhilelt_b16_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b32_u32(uint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b32_u32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #else + simde_svuint32_t r; + + uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT32_C(0) : UINT32_C(0); + } + + return simde_svbool_from_svuint32(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b32_u32 + #define svwhilelt_b32_u32(op1, op2) simde_svwhilelt_b32_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b64_u32(uint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b64_u32(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast32_t remaining = (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); + if (HEDLEY_UNLIKELY(remaining < 4)) { + r >>= 4 - remaining; + } + + return simde_svbool_from_mmask4(r); + #else + simde_svint64_t r; + + uint_fast32_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast32_t, op2) - HEDLEY_STATIC_CAST(uint_fast32_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b64_u32 + #define svwhilelt_b64_u32(op1, op2) simde_svwhilelt_b64_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b8_u64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask64(HEDLEY_STATIC_CAST(__mmask64, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask64 r = ~HEDLEY_STATIC_CAST(__mmask64, 0); + if (HEDLEY_UNLIKELY(remaining < 64)) { + r >>= 64 - remaining; + } + + return simde_svbool_from_mmask64(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT64_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #else + simde_svint8_t r; + + uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntb()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT8_C(0) : UINT8_C(0); + } + + return simde_svbool_from_svint8(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b8_u64 + #define svwhilelt_b8_u64(op1, op2) simde_svwhilelt_b8_u64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b16_u64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask32(HEDLEY_STATIC_CAST(__mmask32, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask32 r = HEDLEY_STATIC_CAST(__mmask32, ~UINT32_C(0)); + if (HEDLEY_UNLIKELY(remaining < 32)) { + r >>= 32 - remaining; + } + + return simde_svbool_from_mmask32(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #else + simde_svint16_t r; + + uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcnth()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT16_C(0) : UINT16_C(0); + } + + return simde_svbool_from_svint16(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b16_u64 + #define svwhilelt_b16_u64(op1, op2) simde_svwhilelt_b16_u64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b32_u64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask16(HEDLEY_STATIC_CAST(__mmask16, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask16 r = HEDLEY_STATIC_CAST(__mmask16, ~UINT16_C(0)); + if (HEDLEY_UNLIKELY(remaining < 16)) { + r >>= 16 - remaining; + } + + return simde_svbool_from_mmask16(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #else + simde_svuint64_t r; + + uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntw()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~UINT64_C(0) : UINT64_C(0); + } + + return simde_svbool_from_svuint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b32_u64 + #define svwhilelt_b32_u64(op1, op2) simde_svwhilelt_b32_u64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svwhilelt_b64_u64(op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask8(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, ~UINT8_C(0)); + if (HEDLEY_UNLIKELY(remaining < 8)) { + r >>= 8 - remaining; + } + + return simde_svbool_from_mmask8(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + if (HEDLEY_UNLIKELY(op1 >= op2)) + return simde_svbool_from_mmask4(HEDLEY_STATIC_CAST(__mmask8, 0)); + + uint_fast64_t remaining = (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + __mmask8 r = HEDLEY_STATIC_CAST(__mmask8, 0x0f); + if (HEDLEY_UNLIKELY(remaining < 4)) { + r >>= 4 - remaining; + } + + return simde_svbool_from_mmask4(r); + #else + simde_svint64_t r; + + uint_fast64_t remaining = (op1 >= op2) ? 0 : (HEDLEY_STATIC_CAST(uint_fast64_t, op2) - HEDLEY_STATIC_CAST(uint_fast64_t, op1)); + + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, simde_svcntd()) ; i++) { + r.values[i] = (remaining-- > 0) ? ~INT64_C(0) : INT64_C(0); + } + + return simde_svbool_from_svint64(r); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svwhilelt_b64_u64 + #define svwhilelt_b64_u64(op1, op2) simde_svwhilelt_b64_u64(op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 ( int32_t op1, int32_t op2) { return simde_svwhilelt_b8_s32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 ( int64_t op1, int64_t op2) { return simde_svwhilelt_b8_s64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 (uint32_t op1, uint32_t op2) { return simde_svwhilelt_b8_u32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b8 (uint64_t op1, uint64_t op2) { return simde_svwhilelt_b8_u64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16( int32_t op1, int32_t op2) { return simde_svwhilelt_b16_s32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16( int64_t op1, int64_t op2) { return simde_svwhilelt_b16_s64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b16_u32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b16(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b16_u64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32( int32_t op1, int32_t op2) { return simde_svwhilelt_b32_s32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32( int64_t op1, int64_t op2) { return simde_svwhilelt_b32_s64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b32_u32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b32(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b32_u64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64( int32_t op1, int32_t op2) { return simde_svwhilelt_b64_s32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64( int64_t op1, int64_t op2) { return simde_svwhilelt_b64_s64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64(uint32_t op1, uint32_t op2) { return simde_svwhilelt_b64_u32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svwhilelt_b64(uint64_t op1, uint64_t op2) { return simde_svwhilelt_b64_u64(op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_svwhilelt_b8(op1, op2) \ + (SIMDE_GENERIC_((op1), \ + int32_t: simde_svwhilelt_b8_s32, \ + uint32_t: simde_svwhilelt_b8_u32, \ + int64_t: simde_svwhilelt_b8_s64, \ + uint64_t: simde_svwhilelt_b8_u64)((op1), (op2))) + #define simde_svwhilelt_b16(op1, op2) \ + (SIMDE_GENERIC_((op1), \ + int32_t: simde_svwhilelt_b16_s32, \ + uint32_t: simde_svwhilelt_b16_u32, \ + int64_t: simde_svwhilelt_b16_s64, \ + uint64_t: simde_svwhilelt_b16_u64)((op1), (op2))) + #define simde_svwhilelt_b32(op1, op2) \ + (SIMDE_GENERIC_((op1), \ + int32_t: simde_svwhilelt_b32_s32, \ + uint32_t: simde_svwhilelt_b32_u32, \ + int64_t: simde_svwhilelt_b32_s64, \ + uint64_t: simde_svwhilelt_b32_u64)((op1), (op2))) + #define simde_svwhilelt_b64(op1, op2) \ + (SIMDE_GENERIC_((op1), \ + int32_t: simde_svwhilelt_b64_s32, \ + uint32_t: simde_svwhilelt_b64_u32, \ + int64_t: simde_svwhilelt_b64_s64, \ + uint64_t: simde_svwhilelt_b64_u64)((op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svwhilelt_b8 + #undef svwhilelt_b16 + #undef svwhilelt_b32 + #undef svwhilelt_b64 + #define svwhilelt_b8(op1, op2) simde_svwhilelt_b8((op1), (op2)) + #define svwhilelt_b16(op1, op2) simde_svwhilelt_b16((op1), (op2)) + #define svwhilelt_b32(op1, op2) simde_svwhilelt_b32((op1), (op2)) + #define svwhilelt_b64(op1, op2) simde_svwhilelt_b64((op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_WHILELT_H */ +/* :: End simde/arm/sve/whilelt.h :: */ + +#endif /* SIMDE_ARM_SVE_TYPES_H */ +/* :: End simde/arm/sve/types.h :: */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/add.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_ADD_H) +#define SIMDE_ARM_SVE_ADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/sel.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_SEL_H) +#define SIMDE_ARM_SVE_SEL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/reinterpret.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_REINTERPRET_H) +#define SIMDE_ARM_SVE_REINTERPRET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_ARM_SVE_NATIVE) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s16( simde_svint16_t op) { return svreinterpret_s8_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s32( simde_svint32_t op) { return svreinterpret_s8_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_s64( simde_svint64_t op) { return svreinterpret_s8_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u8( simde_svuint8_t op) { return svreinterpret_s8_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u16( simde_svuint16_t op) { return svreinterpret_s8_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u32( simde_svuint32_t op) { return svreinterpret_s8_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_u64( simde_svuint64_t op) { return svreinterpret_s8_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f16( simde_svfloat16_t op) { return svreinterpret_s8_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f32( simde_svfloat32_t op) { return svreinterpret_s8_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8_f64( simde_svfloat64_t op) { return svreinterpret_s8_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s8( simde_svint8_t op) { return svreinterpret_s16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s32( simde_svint32_t op) { return svreinterpret_s16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_s64( simde_svint64_t op) { return svreinterpret_s16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u8( simde_svuint8_t op) { return svreinterpret_s16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u16( simde_svuint16_t op) { return svreinterpret_s16_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u32( simde_svuint32_t op) { return svreinterpret_s16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_u64( simde_svuint64_t op) { return svreinterpret_s16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f16( simde_svfloat16_t op) { return svreinterpret_s16_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f32( simde_svfloat32_t op) { return svreinterpret_s16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16_f64( simde_svfloat64_t op) { return svreinterpret_s16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s8( simde_svint8_t op) { return svreinterpret_s32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s16( simde_svint16_t op) { return svreinterpret_s32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_s64( simde_svint64_t op) { return svreinterpret_s32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u8( simde_svuint8_t op) { return svreinterpret_s32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u16( simde_svuint16_t op) { return svreinterpret_s32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u32( simde_svuint32_t op) { return svreinterpret_s32_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_u64( simde_svuint64_t op) { return svreinterpret_s32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f16( simde_svfloat16_t op) { return svreinterpret_s32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f32( simde_svfloat32_t op) { return svreinterpret_s32_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32_f64( simde_svfloat64_t op) { return svreinterpret_s32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s8( simde_svint8_t op) { return svreinterpret_s64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s16( simde_svint16_t op) { return svreinterpret_s64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_s32( simde_svint32_t op) { return svreinterpret_s64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u8( simde_svuint8_t op) { return svreinterpret_s64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u16( simde_svuint16_t op) { return svreinterpret_s64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u32( simde_svuint32_t op) { return svreinterpret_s64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_u64( simde_svuint64_t op) { return svreinterpret_s64_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f16( simde_svfloat16_t op) { return svreinterpret_s64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f32( simde_svfloat32_t op) { return svreinterpret_s64_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64_f64( simde_svfloat64_t op) { return svreinterpret_s64_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s8( simde_svint8_t op) { return svreinterpret_u8_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s16( simde_svint16_t op) { return svreinterpret_u8_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s32( simde_svint32_t op) { return svreinterpret_u8_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_s64( simde_svint64_t op) { return svreinterpret_u8_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u16( simde_svuint16_t op) { return svreinterpret_u8_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u32( simde_svuint32_t op) { return svreinterpret_u8_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_u64( simde_svuint64_t op) { return svreinterpret_u8_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f16( simde_svfloat16_t op) { return svreinterpret_u8_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f32( simde_svfloat32_t op) { return svreinterpret_u8_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8_f64( simde_svfloat64_t op) { return svreinterpret_u8_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s8( simde_svint8_t op) { return svreinterpret_u16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s16( simde_svint16_t op) { return svreinterpret_u16_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s32( simde_svint32_t op) { return svreinterpret_u16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_s64( simde_svint64_t op) { return svreinterpret_u16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u8( simde_svuint8_t op) { return svreinterpret_u16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u32( simde_svuint32_t op) { return svreinterpret_u16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_u64( simde_svuint64_t op) { return svreinterpret_u16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f16( simde_svfloat16_t op) { return svreinterpret_u16_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f32( simde_svfloat32_t op) { return svreinterpret_u16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16_f64( simde_svfloat64_t op) { return svreinterpret_u16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s8( simde_svint8_t op) { return svreinterpret_u32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s16( simde_svint16_t op) { return svreinterpret_u32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s32( simde_svint32_t op) { return svreinterpret_u32_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_s64( simde_svint64_t op) { return svreinterpret_u32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u8( simde_svuint8_t op) { return svreinterpret_u32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u16( simde_svuint16_t op) { return svreinterpret_u32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_u64( simde_svuint64_t op) { return svreinterpret_u32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f16( simde_svfloat16_t op) { return svreinterpret_u32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f32( simde_svfloat32_t op) { return svreinterpret_u32_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32_f64( simde_svfloat64_t op) { return svreinterpret_u32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s8( simde_svint8_t op) { return svreinterpret_u64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s16( simde_svint16_t op) { return svreinterpret_u64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s32( simde_svint32_t op) { return svreinterpret_u64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_s64( simde_svint64_t op) { return svreinterpret_u64_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u8( simde_svuint8_t op) { return svreinterpret_u64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u16( simde_svuint16_t op) { return svreinterpret_u64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_u32( simde_svuint32_t op) { return svreinterpret_u64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f16( simde_svfloat16_t op) { return svreinterpret_u64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f32( simde_svfloat32_t op) { return svreinterpret_u64_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64_f64( simde_svfloat64_t op) { return svreinterpret_u64_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s8( simde_svint8_t op) { return svreinterpret_f16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s16( simde_svint16_t op) { return svreinterpret_f16_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s32( simde_svint32_t op) { return svreinterpret_f16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_s64( simde_svint64_t op) { return svreinterpret_f16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u8( simde_svuint8_t op) { return svreinterpret_f16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u16( simde_svuint16_t op) { return svreinterpret_f16_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u32( simde_svuint32_t op) { return svreinterpret_f16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_u64( simde_svuint64_t op) { return svreinterpret_f16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_f32( simde_svfloat32_t op) { return svreinterpret_f16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16_f64( simde_svfloat64_t op) { return svreinterpret_f16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s8( simde_svint8_t op) { return svreinterpret_f32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s16( simde_svint16_t op) { return svreinterpret_f32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s32( simde_svint32_t op) { return svreinterpret_f32_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_s64( simde_svint64_t op) { return svreinterpret_f32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u8( simde_svuint8_t op) { return svreinterpret_f32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u16( simde_svuint16_t op) { return svreinterpret_f32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u32( simde_svuint32_t op) { return svreinterpret_f32_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_u64( simde_svuint64_t op) { return svreinterpret_f32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_f16( simde_svfloat16_t op) { return svreinterpret_f32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32_f64( simde_svfloat64_t op) { return svreinterpret_f32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s8( simde_svint8_t op) { return svreinterpret_f64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s16( simde_svint16_t op) { return svreinterpret_f64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s32( simde_svint32_t op) { return svreinterpret_f64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_s64( simde_svint64_t op) { return svreinterpret_f64_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u8( simde_svuint8_t op) { return svreinterpret_f64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u16( simde_svuint16_t op) { return svreinterpret_f64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u32( simde_svuint32_t op) { return svreinterpret_f64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_u64( simde_svuint64_t op) { return svreinterpret_f64_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_f16( simde_svfloat16_t op) { return svreinterpret_f64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64_f32( simde_svfloat32_t op) { return svreinterpret_f64_f32(op); } +#else + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s16, simde_svint8_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s32, simde_svint8_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_s64, simde_svint8_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u8, simde_svint8_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u16, simde_svint8_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u32, simde_svint8_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_u64, simde_svint8_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f16, simde_svint8_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f32, simde_svint8_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s8_f64, simde_svint8_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s8, simde_svint16_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s32, simde_svint16_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_s64, simde_svint16_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u8, simde_svint16_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u16, simde_svint16_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u32, simde_svint16_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_u64, simde_svint16_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f16, simde_svint16_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f32, simde_svint16_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s16_f64, simde_svint16_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s8, simde_svint32_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s16, simde_svint32_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_s64, simde_svint32_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u8, simde_svint32_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u16, simde_svint32_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u32, simde_svint32_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_u64, simde_svint32_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f16, simde_svint32_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f32, simde_svint32_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s32_f64, simde_svint32_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s8, simde_svint64_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s16, simde_svint64_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_s32, simde_svint64_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u8, simde_svint64_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u16, simde_svint64_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u32, simde_svint64_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_u64, simde_svint64_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f16, simde_svint64_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f32, simde_svint64_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_s64_f64, simde_svint64_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s8, simde_svuint8_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s16, simde_svuint8_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s32, simde_svuint8_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_s64, simde_svuint8_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u16, simde_svuint8_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u32, simde_svuint8_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_u64, simde_svuint8_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f16, simde_svuint8_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f32, simde_svuint8_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u8_f64, simde_svuint8_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s8, simde_svuint16_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s16, simde_svuint16_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s32, simde_svuint16_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_s64, simde_svuint16_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u8, simde_svuint16_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u32, simde_svuint16_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_u64, simde_svuint16_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f16, simde_svuint16_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f32, simde_svuint16_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u16_f64, simde_svuint16_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s8, simde_svuint32_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s16, simde_svuint32_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s32, simde_svuint32_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_s64, simde_svuint32_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u8, simde_svuint32_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u16, simde_svuint32_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_u64, simde_svuint32_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f16, simde_svuint32_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f32, simde_svuint32_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u32_f64, simde_svuint32_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s8, simde_svuint64_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s16, simde_svuint64_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s32, simde_svuint64_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_s64, simde_svuint64_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u8, simde_svuint64_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u16, simde_svuint64_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_u32, simde_svuint64_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f16, simde_svuint64_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f32, simde_svuint64_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_u64_f64, simde_svuint64_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s8, simde_svfloat16_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s16, simde_svfloat16_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s32, simde_svfloat16_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_s64, simde_svfloat16_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u8, simde_svfloat16_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u16, simde_svfloat16_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u32, simde_svfloat16_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_u64, simde_svfloat16_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_f32, simde_svfloat16_t, simde_svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f16_f64, simde_svfloat16_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s8, simde_svfloat32_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s16, simde_svfloat32_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s32, simde_svfloat32_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_s64, simde_svfloat32_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u8, simde_svfloat32_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u16, simde_svfloat32_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u32, simde_svfloat32_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_u64, simde_svfloat32_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_f16, simde_svfloat32_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f32_f64, simde_svfloat32_t, simde_svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s8, simde_svfloat64_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s16, simde_svfloat64_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s32, simde_svfloat64_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_s64, simde_svfloat64_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u8, simde_svfloat64_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u16, simde_svfloat64_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u32, simde_svfloat64_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_u64, simde_svfloat64_t, simde_svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_f16, simde_svfloat64_t, simde_svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svreinterpret_f64_f32, simde_svfloat64_t, simde_svfloat32_t) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint16_t op) { return simde_svreinterpret_s8_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint32_t op) { return simde_svreinterpret_s8_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svint64_t op) { return simde_svreinterpret_s8_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint8_t op) { return simde_svreinterpret_s8_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint16_t op) { return simde_svreinterpret_s8_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint32_t op) { return simde_svreinterpret_s8_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svuint64_t op) { return simde_svreinterpret_s8_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat16_t op) { return simde_svreinterpret_s8_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat32_t op) { return simde_svreinterpret_s8_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svreinterpret_s8( simde_svfloat64_t op) { return simde_svreinterpret_s8_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint8_t op) { return simde_svreinterpret_s16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint32_t op) { return simde_svreinterpret_s16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svint64_t op) { return simde_svreinterpret_s16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint8_t op) { return simde_svreinterpret_s16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint16_t op) { return simde_svreinterpret_s16_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint32_t op) { return simde_svreinterpret_s16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svuint64_t op) { return simde_svreinterpret_s16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat16_t op) { return simde_svreinterpret_s16_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat32_t op) { return simde_svreinterpret_s16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svreinterpret_s16( simde_svfloat64_t op) { return simde_svreinterpret_s16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint8_t op) { return simde_svreinterpret_s32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint16_t op) { return simde_svreinterpret_s32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svint64_t op) { return simde_svreinterpret_s32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint8_t op) { return simde_svreinterpret_s32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint16_t op) { return simde_svreinterpret_s32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint32_t op) { return simde_svreinterpret_s32_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svuint64_t op) { return simde_svreinterpret_s32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat16_t op) { return simde_svreinterpret_s32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat32_t op) { return simde_svreinterpret_s32_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svreinterpret_s32( simde_svfloat64_t op) { return simde_svreinterpret_s32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint8_t op) { return simde_svreinterpret_s64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint16_t op) { return simde_svreinterpret_s64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svint32_t op) { return simde_svreinterpret_s64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint8_t op) { return simde_svreinterpret_s64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint16_t op) { return simde_svreinterpret_s64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint32_t op) { return simde_svreinterpret_s64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svuint64_t op) { return simde_svreinterpret_s64_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat16_t op) { return simde_svreinterpret_s64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat32_t op) { return simde_svreinterpret_s64_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svreinterpret_s64( simde_svfloat64_t op) { return simde_svreinterpret_s64_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint8_t op) { return simde_svreinterpret_u8_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint16_t op) { return simde_svreinterpret_u8_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint32_t op) { return simde_svreinterpret_u8_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svint64_t op) { return simde_svreinterpret_u8_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint16_t op) { return simde_svreinterpret_u8_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint32_t op) { return simde_svreinterpret_u8_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svuint64_t op) { return simde_svreinterpret_u8_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat16_t op) { return simde_svreinterpret_u8_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat32_t op) { return simde_svreinterpret_u8_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svreinterpret_u8( simde_svfloat64_t op) { return simde_svreinterpret_u8_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint8_t op) { return simde_svreinterpret_u16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint16_t op) { return simde_svreinterpret_u16_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint32_t op) { return simde_svreinterpret_u16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svint64_t op) { return simde_svreinterpret_u16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint8_t op) { return simde_svreinterpret_u16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint32_t op) { return simde_svreinterpret_u16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svuint64_t op) { return simde_svreinterpret_u16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat16_t op) { return simde_svreinterpret_u16_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat32_t op) { return simde_svreinterpret_u16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svreinterpret_u16( simde_svfloat64_t op) { return simde_svreinterpret_u16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint8_t op) { return simde_svreinterpret_u32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint16_t op) { return simde_svreinterpret_u32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint32_t op) { return simde_svreinterpret_u32_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svint64_t op) { return simde_svreinterpret_u32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint8_t op) { return simde_svreinterpret_u32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint16_t op) { return simde_svreinterpret_u32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svuint64_t op) { return simde_svreinterpret_u32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat16_t op) { return simde_svreinterpret_u32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat32_t op) { return simde_svreinterpret_u32_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svreinterpret_u32( simde_svfloat64_t op) { return simde_svreinterpret_u32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint8_t op) { return simde_svreinterpret_u64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint16_t op) { return simde_svreinterpret_u64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint32_t op) { return simde_svreinterpret_u64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svint64_t op) { return simde_svreinterpret_u64_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint8_t op) { return simde_svreinterpret_u64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint16_t op) { return simde_svreinterpret_u64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svuint32_t op) { return simde_svreinterpret_u64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat16_t op) { return simde_svreinterpret_u64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat32_t op) { return simde_svreinterpret_u64_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svreinterpret_u64( simde_svfloat64_t op) { return simde_svreinterpret_u64_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint8_t op) { return simde_svreinterpret_f16_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint16_t op) { return simde_svreinterpret_f16_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint32_t op) { return simde_svreinterpret_f16_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svint64_t op) { return simde_svreinterpret_f16_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint8_t op) { return simde_svreinterpret_f16_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint16_t op) { return simde_svreinterpret_f16_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint32_t op) { return simde_svreinterpret_f16_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svuint64_t op) { return simde_svreinterpret_f16_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svfloat32_t op) { return simde_svreinterpret_f16_f32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat16_t simde_svreinterpret_f16( simde_svfloat64_t op) { return simde_svreinterpret_f16_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint8_t op) { return simde_svreinterpret_f32_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint16_t op) { return simde_svreinterpret_f32_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint32_t op) { return simde_svreinterpret_f32_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svint64_t op) { return simde_svreinterpret_f32_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint8_t op) { return simde_svreinterpret_f32_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint16_t op) { return simde_svreinterpret_f32_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint32_t op) { return simde_svreinterpret_f32_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svuint64_t op) { return simde_svreinterpret_f32_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svfloat16_t op) { return simde_svreinterpret_f32_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svreinterpret_f32( simde_svfloat64_t op) { return simde_svreinterpret_f32_f64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint8_t op) { return simde_svreinterpret_f64_s8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint16_t op) { return simde_svreinterpret_f64_s16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint32_t op) { return simde_svreinterpret_f64_s32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svint64_t op) { return simde_svreinterpret_f64_s64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint8_t op) { return simde_svreinterpret_f64_u8(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint16_t op) { return simde_svreinterpret_f64_u16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint32_t op) { return simde_svreinterpret_f64_u32(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svuint64_t op) { return simde_svreinterpret_f64_u64(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svfloat16_t op) { return simde_svreinterpret_f64_f16(op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svreinterpret_f64( simde_svfloat32_t op) { return simde_svreinterpret_f64_f32(op); } + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s8, svint8_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s16, svint16_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s32, svint32_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_s64, svint64_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u8, svuint8_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u16, svuint16_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u32, svuint32_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_u64, svuint64_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svfloat32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f16, svfloat16_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f32, svfloat32_t, svfloat64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svuint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svfloat16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( svreinterpret_f64, svfloat64_t, svfloat32_t) + #endif /* defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) */ +#elif defined(SIMDE_GENERIC_) + #define simde_svreinterpret_f64(op) \ + (_Generic((op), \ + simde_svint16_t: simde_svreinterpret_s8_s16, \ + simde_svint32_t: simde_svreinterpret_s8_s32, \ + simde_svint64_t: simde_svreinterpret_s8_s64, \ + simde_svuint8_t: simde_svreinterpret_s8_u8, \ + simde_svuint16_t: simde_svreinterpret_s8_u16, \ + simde_svuint32_t: simde_svreinterpret_s8_u32, \ + simde_svuint64_t: simde_svreinterpret_s8_u64, \ + simde_svfloat16_t: simde_svreinterpret_s8_f16, \ + simde_svfloat32_t: simde_svreinterpret_s8_f32, \ + simde_svfloat64_t: simde_svreinterpret_s8_f64)(op)) + #define simde_svreinterpret_s8(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_s16_s8, \ + simde_svint32_t: simde_svreinterpret_s16_s32, \ + simde_svint64_t: simde_svreinterpret_s16_s64, \ + simde_svuint8_t: simde_svreinterpret_s16_u8, \ + simde_svuint16_t: simde_svreinterpret_s16_u16, \ + simde_svuint32_t: simde_svreinterpret_s16_u32, \ + simde_svuint64_t: simde_svreinterpret_s16_u64, \ + simde_svfloat16_t: simde_svreinterpret_s16_f16, \ + simde_svfloat32_t: simde_svreinterpret_s16_f32, \ + simde_svfloat64_t: simde_svreinterpret_s16_f64)(op)) + #define simde_svreinterpret_s16(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_s32_s8, \ + simde_svint16_t: simde_svreinterpret_s32_s16, \ + simde_svint64_t: simde_svreinterpret_s32_s64, \ + simde_svuint8_t: simde_svreinterpret_s32_u8, \ + simde_svuint16_t: simde_svreinterpret_s32_u16, \ + simde_svuint32_t: simde_svreinterpret_s32_u32, \ + simde_svuint64_t: simde_svreinterpret_s32_u64, \ + simde_svfloat16_t: simde_svreinterpret_s32_f16, \ + simde_svfloat32_t: simde_svreinterpret_s32_f32, \ + simde_svfloat64_t: simde_svreinterpret_s32_f64)(op)) + #define simde_svreinterpret_s32(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_s64_s8, \ + simde_svint16_t: simde_svreinterpret_s64_s16, \ + simde_svint32_t: simde_svreinterpret_s64_s32, \ + simde_svuint8_t: simde_svreinterpret_s64_u8, \ + simde_svuint16_t: simde_svreinterpret_s64_u16, \ + simde_svuint32_t: simde_svreinterpret_s64_u32, \ + simde_svuint64_t: simde_svreinterpret_s64_u64, \ + simde_svfloat16_t: simde_svreinterpret_s64_f16, \ + simde_svfloat32_t: simde_svreinterpret_s64_f32, \ + simde_svfloat64_t: simde_svreinterpret_s64_f64)(op)) + #define simde_svreinterpret_s64(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_u8_s8, \ + simde_svint16_t: simde_svreinterpret_u8_s16, \ + simde_svint32_t: simde_svreinterpret_u8_s32, \ + simde_svint64_t: simde_svreinterpret_u8_s64, \ + simde_svuint16_t: simde_svreinterpret_u8_u16, \ + simde_svuint32_t: simde_svreinterpret_u8_u32, \ + simde_svuint64_t: simde_svreinterpret_u8_u64, \ + simde_svfloat16_t: simde_svreinterpret_u8_f16, \ + simde_svfloat32_t: simde_svreinterpret_u8_f32, \ + simde_svfloat64_t: simde_svreinterpret_u8_f64)(op)) + #define simde_svreinterpret_u8(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_u16_s8, \ + simde_svint16_t: simde_svreinterpret_u16_s16, \ + simde_svint32_t: simde_svreinterpret_u16_s32, \ + simde_svint64_t: simde_svreinterpret_u16_s64, \ + simde_svuint8_t: simde_svreinterpret_u16_u8, \ + simde_svuint32_t: simde_svreinterpret_u16_u32, \ + simde_svuint64_t: simde_svreinterpret_u16_u64, \ + simde_svfloat16_t: simde_svreinterpret_u16_f16, \ + simde_svfloat32_t: simde_svreinterpret_u16_f32, \ + simde_svfloat64_t: simde_svreinterpret_u16_f64)(op)) + #define simde_svreinterpret_u16(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_u32_s8, \ + simde_svint16_t: simde_svreinterpret_u32_s16, \ + simde_svint32_t: simde_svreinterpret_u32_s32, \ + simde_svint64_t: simde_svreinterpret_u32_s64, \ + simde_svuint8_t: simde_svreinterpret_u32_u8, \ + simde_svuint16_t: simde_svreinterpret_u32_u16, \ + simde_svuint64_t: simde_svreinterpret_u32_u64, \ + simde_svfloat16_t: simde_svreinterpret_u32_f16, \ + simde_svfloat32_t: simde_svreinterpret_u32_f32, \ + simde_svfloat64_t: simde_svreinterpret_u32_f64)(op)) + #define simde_svreinterpret_u32(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_u64_s8, \ + simde_svint16_t: simde_svreinterpret_u64_s16, \ + simde_svint32_t: simde_svreinterpret_u64_s32, \ + simde_svint64_t: simde_svreinterpret_u64_s64, \ + simde_svuint8_t: simde_svreinterpret_u64_u8, \ + simde_svuint16_t: simde_svreinterpret_u64_u16, \ + simde_svuint32_t: simde_svreinterpret_u64_u32, \ + simde_svfloat16_t: simde_svreinterpret_u64_f16, \ + simde_svfloat32_t: simde_svreinterpret_u64_f32, \ + simde_svfloat64_t: simde_svreinterpret_u64_f64)(op)) + #define simde_svreinterpret_u64(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_f16_s8, \ + simde_svint16_t: simde_svreinterpret_f16_s16, \ + simde_svint32_t: simde_svreinterpret_f16_s32, \ + simde_svint64_t: simde_svreinterpret_f16_s64, \ + simde_svuint8_t: simde_svreinterpret_f16_u8, \ + simde_svuint16_t: simde_svreinterpret_f16_u16, \ + simde_svuint32_t: simde_svreinterpret_f16_u32, \ + simde_svuint64_t: simde_svreinterpret_f16_u64, \ + simde_svfloat32_t: simde_svreinterpret_f16_f32, \ + simde_svfloat64_t: simde_svreinterpret_f16_f64)(op)) + #define simde_svreinterpret_f16(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_f32_s8, \ + simde_svint16_t: simde_svreinterpret_f32_s16, \ + simde_svint32_t: simde_svreinterpret_f32_s32, \ + simde_svint64_t: simde_svreinterpret_f32_s64, \ + simde_svuint8_t: simde_svreinterpret_f32_u8, \ + simde_svuint16_t: simde_svreinterpret_f32_u16, \ + simde_svuint32_t: simde_svreinterpret_f32_u32, \ + simde_svuint64_t: simde_svreinterpret_f32_u64, \ + simde_svfloat16_t: simde_svreinterpret_f32_f16, \ + simde_svfloat64_t: simde_svreinterpret_f32_f64)(op)) + #define simde_svreinterpret_f32(op) \ + (_Generic((op), \ + simde_svint8_t: simde_svreinterpret_f64_s8, \ + simde_svint16_t: simde_svreinterpret_f64_s16, \ + simde_svint32_t: simde_svreinterpret_f64_s32, \ + simde_svint64_t: simde_svreinterpret_f64_s64, \ + simde_svuint8_t: simde_svreinterpret_f64_u8, \ + simde_svuint16_t: simde_svreinterpret_f64_u16, \ + simde_svuint32_t: simde_svreinterpret_f64_u32, \ + simde_svuint64_t: simde_svreinterpret_f64_u64, \ + simde_svfloat16_t: simde_svreinterpret_f64_f16, \ + simde_svfloat32_t: simde_svreinterpret_f64_f32)(op)) + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #define svreinterpret_f64(op) \ + (_Generic((op), \ + svint16_t: svreinterpret_s8_s16, \ + svint32_t: svreinterpret_s8_s32, \ + svint64_t: svreinterpret_s8_s64, \ + svuint8_t: svreinterpret_s8_u8, \ + svuint16_t: svreinterpret_s8_u16, \ + svuint32_t: svreinterpret_s8_u32, \ + svuint64_t: svreinterpret_s8_u64, \ + svfloat16_t: svreinterpret_s8_f16, \ + svfloat32_t: svreinterpret_s8_f32, \ + svfloat64_t: svreinterpret_s8_f64)(op)) + #define svreinterpret_s8(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_s16_s8, \ + svint32_t: svreinterpret_s16_s32, \ + svint64_t: svreinterpret_s16_s64, \ + svuint8_t: svreinterpret_s16_u8, \ + svuint16_t: svreinterpret_s16_u16, \ + svuint32_t: svreinterpret_s16_u32, \ + svuint64_t: svreinterpret_s16_u64, \ + svfloat16_t: svreinterpret_s16_f16, \ + svfloat32_t: svreinterpret_s16_f32, \ + svfloat64_t: svreinterpret_s16_f64)(op)) + #define svreinterpret_s16(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_s32_s8, \ + svint16_t: svreinterpret_s32_s16, \ + svint64_t: svreinterpret_s32_s64, \ + svuint8_t: svreinterpret_s32_u8, \ + svuint16_t: svreinterpret_s32_u16, \ + svuint32_t: svreinterpret_s32_u32, \ + svuint64_t: svreinterpret_s32_u64, \ + svfloat16_t: svreinterpret_s32_f16, \ + svfloat32_t: svreinterpret_s32_f32, \ + svfloat64_t: svreinterpret_s32_f64)(op)) + #define svreinterpret_s32(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_s64_s8, \ + svint16_t: svreinterpret_s64_s16, \ + svint32_t: svreinterpret_s64_s32, \ + svuint8_t: svreinterpret_s64_u8, \ + svuint16_t: svreinterpret_s64_u16, \ + svuint32_t: svreinterpret_s64_u32, \ + svuint64_t: svreinterpret_s64_u64, \ + svfloat16_t: svreinterpret_s64_f16, \ + svfloat32_t: svreinterpret_s64_f32, \ + svfloat64_t: svreinterpret_s64_f64)(op)) + #define svreinterpret_s64(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_u8_s8, \ + svint16_t: svreinterpret_u8_s16, \ + svint32_t: svreinterpret_u8_s32, \ + svint64_t: svreinterpret_u8_s64, \ + svuint16_t: svreinterpret_u8_u16, \ + svuint32_t: svreinterpret_u8_u32, \ + svuint64_t: svreinterpret_u8_u64, \ + svfloat16_t: svreinterpret_u8_f16, \ + svfloat32_t: svreinterpret_u8_f32, \ + svfloat64_t: svreinterpret_u8_f64)(op)) + #define svreinterpret_u8(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_u16_s8, \ + svint16_t: svreinterpret_u16_s16, \ + svint32_t: svreinterpret_u16_s32, \ + svint64_t: svreinterpret_u16_s64, \ + svuint8_t: svreinterpret_u16_u8, \ + svuint32_t: svreinterpret_u16_u32, \ + svuint64_t: svreinterpret_u16_u64, \ + svfloat16_t: svreinterpret_u16_f16, \ + svfloat32_t: svreinterpret_u16_f32, \ + svfloat64_t: svreinterpret_u16_f64)(op)) + #define svreinterpret_u16(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_u32_s8, \ + svint16_t: svreinterpret_u32_s16, \ + svint32_t: svreinterpret_u32_s32, \ + svint64_t: svreinterpret_u32_s64, \ + svuint8_t: svreinterpret_u32_u8, \ + svuint16_t: svreinterpret_u32_u16, \ + svuint64_t: svreinterpret_u32_u64, \ + svfloat16_t: svreinterpret_u32_f16, \ + svfloat32_t: svreinterpret_u32_f32, \ + svfloat64_t: svreinterpret_u32_f64)(op)) + #define svreinterpret_u32(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_u64_s8, \ + svint16_t: svreinterpret_u64_s16, \ + svint32_t: svreinterpret_u64_s32, \ + svint64_t: svreinterpret_u64_s64, \ + svuint8_t: svreinterpret_u64_u8, \ + svuint16_t: svreinterpret_u64_u16, \ + svuint32_t: svreinterpret_u64_u32, \ + svfloat16_t: svreinterpret_u64_f16, \ + svfloat32_t: svreinterpret_u64_f32, \ + svfloat64_t: svreinterpret_u64_f64)(op)) + #define svreinterpret_u64(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_f16_s8, \ + svint16_t: svreinterpret_f16_s16, \ + svint32_t: svreinterpret_f16_s32, \ + svint64_t: svreinterpret_f16_s64, \ + svuint8_t: svreinterpret_f16_u8, \ + svuint16_t: svreinterpret_f16_u16, \ + svuint32_t: svreinterpret_f16_u32, \ + svuint64_t: svreinterpret_f16_u64, \ + svfloat32_t: svreinterpret_f16_f32, \ + svfloat64_t: svreinterpret_f16_f64)(op)) + #define svreinterpret_f16(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_f32_s8, \ + svint16_t: svreinterpret_f32_s16, \ + svint32_t: svreinterpret_f32_s32, \ + svint64_t: svreinterpret_f32_s64, \ + svuint8_t: svreinterpret_f32_u8, \ + svuint16_t: svreinterpret_f32_u16, \ + svuint32_t: svreinterpret_f32_u32, \ + svuint64_t: svreinterpret_f32_u64, \ + svfloat16_t: svreinterpret_f32_f16, \ + svfloat64_t: svreinterpret_f32_f64)(op)) + #define svreinterpret_f32(op) \ + (_Generic((op), \ + svint8_t: svreinterpret_f64_s8, \ + svint16_t: svreinterpret_f64_s16, \ + svint32_t: svreinterpret_f64_s32, \ + svint64_t: svreinterpret_f64_s64, \ + svuint8_t: svreinterpret_f64_u8, \ + svuint16_t: svreinterpret_f64_u16, \ + svuint32_t: svreinterpret_f64_u32, \ + svuint64_t: svreinterpret_f64_u64, \ + svfloat16_t: svreinterpret_f64_f16, \ + svfloat32_t: svreinterpret_f64_f32)(op)) + #endif /* defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) */ +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_REINTERPRET_H */ +/* :: End simde/arm/sve/reinterpret.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_x_svsel_s8_z(simde_svbool_t pg, simde_svint8_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s8_z(pg, op1, op1); + #else + simde_svint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s8(pg.neon_i8, op1.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_mov_epi8(simde_svbool_to_mmask64(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_mov_epi8(simde_svbool_to_mmask32(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(pg.altivec_b8, op1.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = pg.values_i8 & op1.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, op1.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = pg.values_i8 & op1.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = pg.values_i8[i] & op1.values[i]; + } + #endif + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsel_s8(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_s8(pg, op1, op2); + #else + simde_svint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vbslq_s8(pg.neon_u8, op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_mask_mov_epi8(op2.m512i, simde_svbool_to_mmask64(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_mask_mov_epi8(op2.m256i[0], simde_svbool_to_mmask32(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); + } + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = (pg.values_i8 & op1.values) | (~pg.values_i8 & op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = (pg.values_i8[i] & op1.values[i]) | (~pg.values_i8[i] & op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_s8 + #define svsel_s8(pg, op1, op2) simde_svsel_s8(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_x_svsel_s16_z(simde_svbool_t pg, simde_svint16_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s16_z(pg, op1, op1); + #else + simde_svint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s16(pg.neon_i16, op1.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_mov_epi16(simde_svbool_to_mmask32(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_mov_epi16(simde_svbool_to_mmask16(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(pg.altivec_b16, op1.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = pg.values_i16 & op1.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, op1.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = pg.values_i16 & op1.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = pg.values_i16[i] & op1.values[i]; + } + #endif + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsel_s16(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_s16(pg, op1, op2); + #else + simde_svint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vbslq_s16(pg.neon_u16, op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_mask_mov_epi16(op2.m512i, simde_svbool_to_mmask32(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_mask_mov_epi16(op2.m256i[0], simde_svbool_to_mmask16(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); + } + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = (pg.values_i16 & op1.values) | (~pg.values_i16 & op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = (pg.values_i16[i] & op1.values[i]) | (~pg.values_i16[i] & op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_s16 + #define svsel_s16(pg, op1, op2) simde_svsel_s16(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_x_svsel_s32_z(simde_svbool_t pg, simde_svint32_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s32_z(pg, op1, op1); + #else + simde_svint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s32(pg.neon_i32, op1.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_mov_epi32(simde_svbool_to_mmask16(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_mov_epi32(simde_svbool_to_mmask8(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(pg.altivec_b32, op1.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = pg.values_i32 & op1.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, op1.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = pg.values_i32 & op1.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = pg.values_i32[i] & op1.values[i]; + } + #endif + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsel_s32(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_s32(pg, op1, op2); + #else + simde_svint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vbslq_s32(pg.neon_u32, op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_mask_mov_epi32(op2.m512i, simde_svbool_to_mmask16(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_mask_mov_epi32(op2.m256i[0], simde_svbool_to_mmask8(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); + } + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = (pg.values_i32 & op1.values) | (~pg.values_i32 & op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = (pg.values_i32[i] & op1.values[i]) | (~pg.values_i32[i] & op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_s32 + #define svsel_s32(pg, op1, op2) simde_svsel_s32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_x_svsel_s64_z(simde_svbool_t pg, simde_svint64_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s64_z(pg, op1, op1); + #else + simde_svint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s64(pg.neon_i64, op1.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_maskz_mov_epi64(simde_svbool_to_mmask8(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_maskz_mov_epi64(simde_svbool_to_mmask4(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(pg.m256i[i], op1.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], op1.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r.altivec = vec_and(pg.altivec_b64, op1.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = HEDLEY_REINTERPRET_CAST(__typeof__(op1.altivec), pg.values_i64) & op1.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, op1.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = pg.values_i64 & op1.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = pg.values_i64[i] & op1.values[i]; + } + #endif + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsel_s64(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_s64(pg, op1, op2); + #else + simde_svint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vbslq_s64(pg.neon_u64, op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m512i = _mm512_mask_mov_epi64(op2.m512i, simde_svbool_to_mmask8(pg), op1.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r.m256i[0] = _mm256_mask_mov_epi64(op2.m256i[0], simde_svbool_to_mmask4(pg), op1.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_blendv_epi8(op2.m256i[i], op1.m256i[i], pg.m256i[i]); + } + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_blendv_epi8(op2.m128i[i], op1.m128i[i], pg.m128i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_or_si128(_mm_and_si128(pg.m128i[i], op1.m128i[i]), _mm_andnot_si128(pg.m128i[i], op2.m128i[i])); + } + #elif (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) && !defined(SIMDE_BUG_CLANG_46770) + r.altivec = vec_sel(op2.altivec, op1.altivec, pg.altivec_b64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_bitselect(op1.v128, op2.v128, pg.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = (pg.values_i64 & op1.values) | (~pg.values_i64 & op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = (pg.values_i64[i] & op1.values[i]) | (~pg.values_i64[i] & op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_s64 + #define svsel_s64(pg, op1, op2) simde_svsel_s64(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_x_svsel_u8_z(simde_svbool_t pg, simde_svuint8_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u8_z(pg, op1, op1); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svuint8_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_maskz_mov_epi8(simde_svbool_to_mmask64(pg), op1.m512i); + #else + r.m256i[0] = _mm256_maskz_mov_epi8(simde_svbool_to_mmask32(pg), op1.m256i[0]); + #endif + + return r; + #else + return simde_svreinterpret_u8_s8(simde_x_svsel_s8_z(pg, simde_svreinterpret_s8_u8(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsel_u8(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_u8(pg, op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svuint8_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_mask_mov_epi8(op2.m512i, simde_svbool_to_mmask64(pg), op1.m512i); + #else + r.m256i[0] = _mm256_mask_mov_epi8(op2.m256i[0], simde_svbool_to_mmask32(pg), op1.m256i[0]); + #endif + + return r; + #else + return simde_svreinterpret_u8_s8(simde_svsel_s8(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_u8 + #define svsel_u8(pg, op1, op2) simde_svsel_u8(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_x_svsel_u16_z(simde_svbool_t pg, simde_svuint16_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u16_z(pg, op1, op1); + #else + return simde_svreinterpret_u16_s16(simde_x_svsel_s16_z(pg, simde_svreinterpret_s16_u16(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsel_u16(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_u16(pg, op1, op2); + #else + return simde_svreinterpret_u16_s16(simde_svsel_s16(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_u16 + #define svsel_u16(pg, op1, op2) simde_svsel_u16(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_x_svsel_u32_z(simde_svbool_t pg, simde_svuint32_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u32_z(pg, op1, op1); + #else + return simde_svreinterpret_u32_s32(simde_x_svsel_s32_z(pg, simde_svreinterpret_s32_u32(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsel_u32(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_u32(pg, op1, op2); + #else + return simde_svreinterpret_u32_s32(simde_svsel_s32(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_u32 + #define svsel_u32(pg, op1, op2) simde_svsel_u32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_x_svsel_u64_z(simde_svbool_t pg, simde_svuint64_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u64_z(pg, op1, op1); + #else + return simde_svreinterpret_u64_s64(simde_x_svsel_s64_z(pg, simde_svreinterpret_s64_u64(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsel_u64(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_u64(pg, op1, op2); + #else + return simde_svreinterpret_u64_s64(simde_svsel_s64(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_u64 + #define svsel_u64(pg, op1, op2) simde_svsel_u64(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_x_svsel_f32_z(simde_svbool_t pg, simde_svfloat32_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return simde_svreinterpret_f32_s32(svand_s32_z(pg, simde_svreinterpret_s32_f32(op1), simde_svreinterpret_s32_f32(op1))); + #else + return simde_svreinterpret_f32_s32(simde_x_svsel_s32_z(pg, simde_svreinterpret_s32_f32(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsel_f32(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_f32(pg, op1, op2); + #else + return simde_svreinterpret_f32_s32(simde_svsel_s32(pg, simde_svreinterpret_s32_f32(op1), simde_svreinterpret_s32_f32(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_f32 + #define svsel_f32(pg, op1, op2) simde_svsel_f32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_x_svsel_f64_z(simde_svbool_t pg, simde_svfloat64_t op1) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return simde_svreinterpret_f64_s64(svand_s64_z(pg, simde_svreinterpret_s64_f64(op1), simde_svreinterpret_s64_f64(op1))); + #else + return simde_svreinterpret_f64_s64(simde_x_svsel_s64_z(pg, simde_svreinterpret_s64_f64(op1))); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsel_f64(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsel_f64(pg, op1, op2); + #else + return simde_svreinterpret_f64_s64(simde_svsel_s64(pg, simde_svreinterpret_s64_f64(op1), simde_svreinterpret_s64_f64(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsel_f64 + #define svsel_f64(pg, op1, op2) simde_svsel_f64(pg, op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_x_svsel_z(simde_svbool_t pg, simde_svint8_t op1) { return simde_x_svsel_s8_z (pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_x_svsel_z(simde_svbool_t pg, simde_svint16_t op1) { return simde_x_svsel_s16_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_x_svsel_z(simde_svbool_t pg, simde_svint32_t op1) { return simde_x_svsel_s32_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_x_svsel_z(simde_svbool_t pg, simde_svint64_t op1) { return simde_x_svsel_s64_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint8_t op1) { return simde_x_svsel_u8_z (pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint16_t op1) { return simde_x_svsel_u16_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint32_t op1) { return simde_x_svsel_u32_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_x_svsel_z(simde_svbool_t pg, simde_svuint64_t op1) { return simde_x_svsel_u64_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_x_svsel_z(simde_svbool_t pg, simde_svfloat32_t op1) { return simde_x_svsel_f32_z(pg, op1); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_x_svsel_z(simde_svbool_t pg, simde_svfloat64_t op1) { return simde_x_svsel_f64_z(pg, op1); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsel(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsel_s8 (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsel(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsel_s16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsel(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsel_s32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsel(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsel_s64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsel(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsel_u8 (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsel(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsel_u16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsel(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsel_u32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsel(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsel_u64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsel(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsel_f32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsel(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsel_f64(pg, op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_x_svsel_z(pg, op1) \ + (SIMDE_GENERIC_((op1), \ + simde_svint8_t: simde_x_svsel_s8_z, \ + simde_svint16_t: simde_x_svsel_s16_z, \ + simde_svint32_t: simde_x_svsel_s32_z, \ + simde_svint64_t: simde_x_svsel_s64_z, \ + simde_svuint8_t: simde_x_svsel_u8_z, \ + simde_svuint16_t: simde_x_svsel_u16_z, \ + simde_svuint32_t: simde_x_svsel_u32_z, \ + simde_svuint64_t: simde_x_svsel_u64_z, \ + simde_svfloat32_t: simde_x_svsel_f32_z, \ + simde_svfloat64_t: simde_x_svsel_f64_z)((pg), (op1))) + + #define simde_svsel(pg, op1, op2) \ + (SIMDE_GENERIC_((op1), \ + simde_svint8_t: simde_svsel_s8, \ + simde_svint16_t: simde_svsel_s16, \ + simde_svint32_t: simde_svsel_s32, \ + simde_svint64_t: simde_svsel_s64, \ + simde_svuint8_t: simde_svsel_u8, \ + simde_svuint16_t: simde_svsel_u16, \ + simde_svuint32_t: simde_svsel_u32, \ + simde_svuint64_t: simde_svsel_u64, \ + simde_svfloat32_t: simde_svsel_f32, \ + simde_svfloat64_t: simde_svsel_f64)((pg), (op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svsel + #define svsel(pg, op1) simde_svsel((pg), (op1)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_SEL_H */ +/* :: End simde/arm/sve/sel.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/dup.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_DUP_H) +#define SIMDE_ARM_SVE_DUP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_n_s8(int8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s8(op); + #else + simde_svint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_s8(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi8(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi8(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi8(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s8 + #define svdup_n_s8(op) simde_svdup_n_s8((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_s8(int8_t op) { + return simde_svdup_n_s8(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s8 + #define svdup_s8(op) simde_svdup_n_s8((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_n_s8_z(simde_svbool_t pg, int8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s8_z(pg, op); + #else + return simde_x_svsel_s8_z(pg, simde_svdup_n_s8(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s8_z + #define svdup_n_s8_z(pg, op) simde_svdup_n_s8_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_s8_z(simde_svbool_t pg, int8_t op) { + return simde_svdup_n_s8_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s8_z + #define svdup_s8_z(pg, op) simde_svdup_n_s8_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_n_s8_m(simde_svint8_t inactive, simde_svbool_t pg, int8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s8_m(inactive, pg, op); + #else + return simde_svsel_s8(pg, simde_svdup_n_s8(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s8_m + #define svdup_n_s8_m(inactive, pg, op) simde_svdup_n_s8_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svdup_s8_m(simde_svint8_t inactive, simde_svbool_t pg, int8_t op) { + return simde_svdup_n_s8_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s8_m + #define svdup_s8_m(inactive, pg, op) simde_svdup_n_s8_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_n_s16(int16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s16(op); + #else + simde_svint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_s16(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi16(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi16(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi16(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s16 + #define svdup_n_s16(op) simde_svdup_n_s16((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_s16(int16_t op) { + return simde_svdup_n_s16(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s16 + #define svdup_s16(op) simde_svdup_n_s16((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_n_s16_z(simde_svbool_t pg, int16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s16_z(pg, op); + #else + return simde_x_svsel_s16_z(pg, simde_svdup_n_s16(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s16_z + #define svdup_n_s16_z(pg, op) simde_svdup_n_s16_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_s16_z(simde_svbool_t pg, int8_t op) { + return simde_svdup_n_s16_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s16_z + #define svdup_s16_z(pg, op) simde_svdup_n_s16_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_n_s16_m(simde_svint16_t inactive, simde_svbool_t pg, int16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s16_m(inactive, pg, op); + #else + return simde_svsel_s16(pg, simde_svdup_n_s16(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s16_m + #define svdup_n_s16_m(inactive, pg, op) simde_svdup_n_s16_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svdup_s16_m(simde_svint16_t inactive, simde_svbool_t pg, int16_t op) { + return simde_svdup_n_s16_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s16_m + #define svdup_s16_m(inactive, pg, op) simde_svdup_n_s16_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_n_s32(int32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s32(op); + #else + simde_svint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_s32(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi32(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi32(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi32(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s32 + #define svdup_n_s32(op) simde_svdup_n_s32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_s32(int8_t op) { + return simde_svdup_n_s32(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s32 + #define svdup_s32(op) simde_svdup_n_s32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_n_s32_z(simde_svbool_t pg, int32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s32_z(pg, op); + #else + return simde_x_svsel_s32_z(pg, simde_svdup_n_s32(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s32_z + #define svdup_n_s32_z(pg, op) simde_svdup_n_s32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_s32_z(simde_svbool_t pg, int32_t op) { + return simde_svdup_n_s32_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s32_z + #define svdup_s32_z(pg, op) simde_svdup_n_s32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_n_s32_m(simde_svint32_t inactive, simde_svbool_t pg, int32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s32_m(inactive, pg, op); + #else + return simde_svsel_s32(pg, simde_svdup_n_s32(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s32_m + #define svdup_n_s32_m(inactive, pg, op) simde_svdup_n_s32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svdup_s32_m(simde_svint32_t inactive, simde_svbool_t pg, int32_t op) { + return simde_svdup_n_s32_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s32_m + #define svdup_s32_m(inactive, pg, op) simde_svdup_n_s32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_n_s64(int64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s64(op); + #else + simde_svint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_s64(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi64(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi64x(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi64x(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(HEDLEY_STATIC_CAST(signed long long int, op)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s64 + #define svdup_n_s64(op) simde_svdup_n_s64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_s64(int64_t op) { + return simde_svdup_n_s64(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s64 + #define svdup_s64(op) simde_svdup_n_s64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_n_s64_z(simde_svbool_t pg, int64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s64_z(pg, op); + #else + return simde_x_svsel_s64_z(pg, simde_svdup_n_s64(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s64_z + #define svdup_n_s64_z(pg, op) simde_svdup_n_s64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_s64_z(simde_svbool_t pg, int64_t op) { + return simde_svdup_n_s64_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s64_z + #define svdup_s64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_n_s64_m(simde_svint64_t inactive, simde_svbool_t pg, int64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_s64_m(inactive, pg, op); + #else + return simde_svsel_s64(pg, simde_svdup_n_s64(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_s64_m + #define svdup_n_s64_m(inactive, pg, op) simde_svdup_n_s64_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svdup_s64_m(simde_svint64_t inactive, simde_svbool_t pg, int64_t op) { + return simde_svdup_n_s64_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_s64_m + #define svdup_s64_m(inactive, pg, op) simde_svdup_n_s64_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_n_u8(uint8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u8(op); + #else + simde_svuint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_u8(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, op)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, op)); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u8 + #define svdup_n_u8(op) simde_svdup_n_u8((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_u8(uint8_t op) { + return simde_svdup_n_u8(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u8 + #define svdup_u8(op) simde_svdup_n_u8((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_n_u8_z(simde_svbool_t pg, uint8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u8_z(pg, op); + #else + return simde_x_svsel_u8_z(pg, simde_svdup_n_u8(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u8_z + #define svdup_n_u8_z(pg, op) simde_svdup_n_u8_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_u8_z(simde_svbool_t pg, uint8_t op) { + return simde_svdup_n_u8_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u8_z + #define svdup_u8_z(pg, op) simde_svdup_n_u8_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_n_u8_m(simde_svuint8_t inactive, simde_svbool_t pg, uint8_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u8_m(inactive, pg, op); + #else + return simde_svsel_u8(pg, simde_svdup_n_u8(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u8_m + #define svdup_n_u8_m(inactive, pg, op) simde_svdup_n_u8_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svdup_u8_m(simde_svuint8_t inactive, simde_svbool_t pg, uint8_t op) { + return simde_svdup_n_u8_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u8_m + #define svdup_u8_m(inactive, pg, op) simde_svdup_n_u8_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_n_u16(uint16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u16(op); + #else + simde_svuint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_u16(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, op)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, op)); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u16 + #define svdup_n_u16(op) simde_svdup_n_u16((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_u16(uint16_t op) { + return simde_svdup_n_u16(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u16 + #define svdup_u16(op) simde_svdup_n_u16((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_n_u16_z(simde_svbool_t pg, uint16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u16_z(pg, op); + #else + return simde_x_svsel_u16_z(pg, simde_svdup_n_u16(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u16_z + #define svdup_n_u16_z(pg, op) simde_svdup_n_u16_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_u16_z(simde_svbool_t pg, uint8_t op) { + return simde_svdup_n_u16_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u16_z + #define svdup_u16_z(pg, op) simde_svdup_n_u16_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_n_u16_m(simde_svuint16_t inactive, simde_svbool_t pg, uint16_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u16_m(inactive, pg, op); + #else + return simde_svsel_u16(pg, simde_svdup_n_u16(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u16_m + #define svdup_n_u16_m(inactive, pg, op) simde_svdup_n_u16_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svdup_u16_m(simde_svuint16_t inactive, simde_svbool_t pg, uint16_t op) { + return simde_svdup_n_u16_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u16_m + #define svdup_u16_m(inactive, pg, op) simde_svdup_n_u16_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_n_u32(uint32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u32(op); + #else + simde_svuint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_u32(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, op)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, op)); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u32 + #define svdup_n_u32(op) simde_svdup_n_u32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_u32(uint8_t op) { + return simde_svdup_n_u32(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u32 + #define svdup_u32(op) simde_svdup_n_u32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_n_u32_z(simde_svbool_t pg, uint32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u32_z(pg, op); + #else + return simde_x_svsel_u32_z(pg, simde_svdup_n_u32(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u32_z + #define svdup_n_u32_z(pg, op) simde_svdup_n_u32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_u32_z(simde_svbool_t pg, uint32_t op) { + return simde_svdup_n_u32_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u32_z + #define svdup_u32_z(pg, op) simde_svdup_n_u32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_n_u32_m(simde_svuint32_t inactive, simde_svbool_t pg, uint32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u32_m(inactive, pg, op); + #else + return simde_svsel_u32(pg, simde_svdup_n_u32(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u32_m + #define svdup_n_u32_m(inactive, pg, op) simde_svdup_n_u32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svdup_u32_m(simde_svuint32_t inactive, simde_svbool_t pg, uint32_t op) { + return simde_svdup_n_u32_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u32_m + #define svdup_u32_m(inactive, pg, op) simde_svdup_n_u32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_n_u64(uint64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u64(op); + #else + simde_svuint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_u64(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, op)); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, op)); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, op)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(HEDLEY_STATIC_CAST(unsigned long long int, op)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, op)); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u64 + #define svdup_n_u64(op) simde_svdup_n_u64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_u64(uint64_t op) { + return simde_svdup_n_u64(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u64 + #define svdup_u64(op) simde_svdup_n_u64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_n_u64_z(simde_svbool_t pg, uint64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u64_z(pg, op); + #else + return simde_x_svsel_u64_z(pg, simde_svdup_n_u64(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u64_z + #define svdup_n_u64_z(pg, op) simde_svdup_n_u64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_u64_z(simde_svbool_t pg, uint64_t op) { + return simde_svdup_n_u64_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u64_z + #define svdup_u64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_n_u64_m(simde_svuint64_t inactive, simde_svbool_t pg, uint64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_u64_m(inactive, pg, op); + #else + return simde_svsel_u64(pg, simde_svdup_n_u64(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_u64_m + #define svdup_n_u64_m(inactive, pg, op) simde_svdup_n_u64_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svdup_u64_m(simde_svuint64_t inactive, simde_svbool_t pg, uint64_t op) { + return simde_svdup_n_u64_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_u64_m + #define svdup_u64_m(inactive, pg, op) simde_svdup_n_u64_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_n_f32(simde_float32 op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f32(op); + #else + simde_svfloat32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vdupq_n_f32(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512 = _mm512_set1_ps(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { + r.m256[i] = _mm256_set1_ps(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { + r.m128[i] = _mm_set1_ps(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f32x4_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f32 + #define svdup_n_f32(op) simde_svdup_n_f32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_f32(int8_t op) { + return simde_svdup_n_f32(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f32 + #define svdup_f32(op) simde_svdup_n_f32((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_n_f32_z(simde_svbool_t pg, simde_float32 op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f32_z(pg, op); + #else + return simde_x_svsel_f32_z(pg, simde_svdup_n_f32(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f32_z + #define svdup_n_f32_z(pg, op) simde_svdup_n_f32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_f32_z(simde_svbool_t pg, simde_float32 op) { + return simde_svdup_n_f32_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f32_z + #define svdup_f32_z(pg, op) simde_svdup_n_f32_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_n_f32_m(simde_svfloat32_t inactive, simde_svbool_t pg, simde_float32_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f32_m(inactive, pg, op); + #else + return simde_svsel_f32(pg, simde_svdup_n_f32(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f32_m + #define svdup_n_f32_m(inactive, pg, op) simde_svdup_n_f32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svdup_f32_m(simde_svfloat32_t inactive, simde_svbool_t pg, simde_float32_t op) { + return simde_svdup_n_f32_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f32_m + #define svdup_f32_m(inactive, pg, op) simde_svdup_n_f32_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_n_f64(simde_float64 op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f64(op); + #else + simde_svfloat64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon = vdupq_n_f64(op); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512d = _mm512_set1_pd(op); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { + r.m256d[i] = _mm256_set1_pd(op); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { + r.m128d[i] = _mm_set1_pd(op); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = vec_splats(op); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f64x2_splat(op); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f64 + #define svdup_n_f64(op) simde_svdup_n_f64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_f64(simde_float64 op) { + return simde_svdup_n_f64(op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f64 + #define svdup_f64(op) simde_svdup_n_f64((op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_n_f64_z(simde_svbool_t pg, simde_float64 op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f64_z(pg, op); + #else + return simde_x_svsel_f64_z(pg, simde_svdup_n_f64(op)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f64_z + #define svdup_n_f64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_f64_z(simde_svbool_t pg, simde_float64 op) { + return simde_svdup_n_f64_z(pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f64_z + #define svdup_f64_z(pg, op) simde_svdup_n_f64_z((pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_n_f64_m(simde_svfloat64_t inactive, simde_svbool_t pg, simde_float64_t op) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svdup_n_f64_m(inactive, pg, op); + #else + return simde_svsel_f64(pg, simde_svdup_n_f64(op), inactive); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_n_f64_m + #define svdup_n_f64_m(inactive, pg, op) simde_svdup_n_f64_m((inactive), (pg), (op)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svdup_f64_m(simde_svfloat64_t inactive, simde_svbool_t pg, simde_float64_t op) { + return simde_svdup_n_f64_m(inactive, pg, op); +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svdup_f64_m + #define svdup_f64_m(inactive, pg, op) simde_svdup_n_f64_m((inactive), (pg), (op)) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_n ( int8_t op) { return simde_svdup_n_s8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup ( int8_t op) { return simde_svdup_n_s8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_n_z(simde_svbool_t pg, int8_t op) { return simde_svdup_n_s8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svdup_z (simde_svbool_t pg, int8_t op) { return simde_svdup_n_s8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_n ( int16_t op) { return simde_svdup_n_s16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup ( int16_t op) { return simde_svdup_n_s16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_n_z(simde_svbool_t pg, int16_t op) { return simde_svdup_n_s16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svdup_z (simde_svbool_t pg, int16_t op) { return simde_svdup_n_s16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_n ( int32_t op) { return simde_svdup_n_s32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup ( int32_t op) { return simde_svdup_n_s32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_n_z(simde_svbool_t pg, int32_t op) { return simde_svdup_n_s32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svdup_z (simde_svbool_t pg, int32_t op) { return simde_svdup_n_s32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_n ( int64_t op) { return simde_svdup_n_s64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup ( int64_t op) { return simde_svdup_n_s64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_n_z(simde_svbool_t pg, int64_t op) { return simde_svdup_n_s64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svdup_z (simde_svbool_t pg, int64_t op) { return simde_svdup_n_s64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_n ( uint8_t op) { return simde_svdup_n_u8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup ( uint8_t op) { return simde_svdup_n_u8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_n_z(simde_svbool_t pg, uint8_t op) { return simde_svdup_n_u8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svdup_z (simde_svbool_t pg, uint8_t op) { return simde_svdup_n_u8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_n ( uint16_t op) { return simde_svdup_n_u16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup ( uint16_t op) { return simde_svdup_n_u16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_n_z(simde_svbool_t pg, uint16_t op) { return simde_svdup_n_u16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svdup_z (simde_svbool_t pg, uint16_t op) { return simde_svdup_n_u16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_n ( uint32_t op) { return simde_svdup_n_u32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup ( uint32_t op) { return simde_svdup_n_u32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_n_z(simde_svbool_t pg, uint32_t op) { return simde_svdup_n_u32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svdup_z (simde_svbool_t pg, uint32_t op) { return simde_svdup_n_u32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_n ( uint64_t op) { return simde_svdup_n_u64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup ( uint64_t op) { return simde_svdup_n_u64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_n_z(simde_svbool_t pg, uint64_t op) { return simde_svdup_n_u64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svdup_z (simde_svbool_t pg, uint64_t op) { return simde_svdup_n_u64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_n ( simde_float32 op) { return simde_svdup_n_f32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup ( simde_float32 op) { return simde_svdup_n_f32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_n_z(simde_svbool_t pg, simde_float32 op) { return simde_svdup_n_f32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svdup_z (simde_svbool_t pg, simde_float32 op) { return simde_svdup_n_f32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_n ( simde_float64 op) { return simde_svdup_n_f64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup ( simde_float64 op) { return simde_svdup_n_f64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_n_z(simde_svbool_t pg, simde_float64 op) { return simde_svdup_n_f64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svdup_z (simde_svbool_t pg, simde_float64 op) { return simde_svdup_n_f64_z (pg, op); } + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_n ( int8_t op) { return svdup_n_s8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup ( int8_t op) { return svdup_n_s8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_n_z(svbool_t pg, int8_t op) { return svdup_n_s8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint8_t svdup_z (svbool_t pg, int8_t op) { return svdup_n_s8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_n ( int16_t op) { return svdup_n_s16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup ( int16_t op) { return svdup_n_s16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_n_z(svbool_t pg, int16_t op) { return svdup_n_s16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint16_t svdup_z (svbool_t pg, int16_t op) { return svdup_n_s16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_n ( int32_t op) { return svdup_n_s32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup ( int32_t op) { return svdup_n_s32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_n_z(svbool_t pg, int32_t op) { return svdup_n_s32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint32_t svdup_z (svbool_t pg, int32_t op) { return svdup_n_s32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_n ( int64_t op) { return svdup_n_s64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup ( int64_t op) { return svdup_n_s64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_n_z(svbool_t pg, int64_t op) { return svdup_n_s64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svint64_t svdup_z (svbool_t pg, int64_t op) { return svdup_n_s64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_n ( uint8_t op) { return svdup_n_u8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup ( uint8_t op) { return svdup_n_u8 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_n_z(svbool_t pg, uint8_t op) { return svdup_n_u8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint8_t svdup_z (svbool_t pg, uint8_t op) { return svdup_n_u8_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_n ( uint16_t op) { return svdup_n_u16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup ( uint16_t op) { return svdup_n_u16 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_n_z(svbool_t pg, uint16_t op) { return svdup_n_u16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint16_t svdup_z (svbool_t pg, uint16_t op) { return svdup_n_u16_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_n ( uint32_t op) { return svdup_n_u32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup ( uint32_t op) { return svdup_n_u32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_n_z(svbool_t pg, uint32_t op) { return svdup_n_u32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint32_t svdup_z (svbool_t pg, uint32_t op) { return svdup_n_u32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_n ( uint64_t op) { return svdup_n_u64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup ( uint64_t op) { return svdup_n_u64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_n_z(svbool_t pg, uint64_t op) { return svdup_n_u64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svuint64_t svdup_z (svbool_t pg, uint64_t op) { return svdup_n_u64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_n ( simde_float32 op) { return svdup_n_f32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup ( simde_float32 op) { return svdup_n_f32 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_n_z(svbool_t pg, simde_float32 op) { return svdup_n_f32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat32_t svdup_z (svbool_t pg, simde_float32 op) { return svdup_n_f32_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_n ( simde_float64 op) { return svdup_n_f64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup ( simde_float64 op) { return svdup_n_f64 ( op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_n_z(svbool_t pg, simde_float64 op) { return svdup_n_f64_z (pg, op); } + SIMDE_FUNCTION_ATTRIBUTES svfloat64_t svdup_z (svbool_t pg, simde_float64 op) { return svdup_n_f64_z (pg, op); } + #endif +#elif defined(SIMDE_GENERIC_) + #define simde_svdup_n(op) \ + (SIMDE_GENERIC_((op), \ + int8_t: simde_svdup_n_s8, \ + int16_t: simde_svdup_n_s16, \ + int32_t: simde_svdup_n_s32, \ + int64_t: simde_svdup_n_s64, \ + uint8_t: simde_svdup_n_u8, \ + uint16_t: simde_svdup_n_u16, \ + uint32_t: simde_svdup_n_u32, \ + uint64_t: simde_svdup_n_u64, \ + float32_t: simde_svdup_n_f32, \ + float64_t: simde_svdup_n_f64)((op))) + #define simde_svdup(op) simde_svdup_n((op)) + + #define simde_svdup_n_z(pg, op) \ + (SIMDE_GENERIC_((op), \ + int8_t: simde_svdup_n_s8_z, \ + int16_t: simde_svdup_n_s16_z, \ + int32_t: simde_svdup_n_s32_z, \ + int64_t: simde_svdup_n_s64_z, \ + uint8_t: simde_svdup_n_s8_z, \ + uint16_t: simde_svdup_n_u16_z, \ + uint32_t: simde_svdup_n_u32_z, \ + uint64_t: simde_svdup_n_u64_z, \ + float32_t: simde_svdup_n_u32_z, \ + float64_t: simde_svdup_n_f64_z)((pg), (op))) + #define simde_svdup_z(pg, op) simde_svdup_n_z((pg), (op)) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svdup + #undef svdup_z + #undef svdup_n + #undef svdup_n_z + #define svdup_n(op) simde_svdup_n((op)) + #define svdup_n_z(pg, op) simde_svdup_n_z((pg), (op)) + #define svdup(op) simde_svdup((op)) + #define svdup_z(pg, op) simde_svdup_z((pg), (op)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_DUP_H */ +/* :: End simde/arm/sve/dup.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s8_x(pg, op1, op2); + #else + simde_svint8_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_s8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi8(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s8_x + #define svadd_s8_x(pg, op1, op2) simde_svadd_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s8_z(pg, op1, op2); + #else + return simde_x_svsel_s8_z(pg, simde_svadd_s8_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s8_z + #define svadd_s8_z(pg, op1, op2) simde_svadd_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s8_m(pg, op1, op2); + #else + return simde_svsel_s8(pg, simde_svadd_s8_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s8_m + #define svadd_s8_m(pg, op1, op2) simde_svadd_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s8_x(pg, op1, op2); + #else + return simde_svadd_s8_x(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s8_x + #define svadd_n_s8_x(pg, op1, op2) simde_svadd_n_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s8_z(pg, op1, op2); + #else + return simde_svadd_s8_z(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s8_z + #define svadd_n_s8_z(pg, op1, op2) simde_svadd_n_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svadd_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s8_m(pg, op1, op2); + #else + return simde_svadd_s8_m(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s8_m + #define svadd_n_s8_m(pg, op1, op2) simde_svadd_n_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s16_x(pg, op1, op2); + #else + simde_svint16_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_s16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi16(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s16_x + #define svadd_s16_x(pg, op1, op2) simde_svadd_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s16_z(pg, op1, op2); + #else + return simde_x_svsel_s16_z(pg, simde_svadd_s16_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s16_z + #define svadd_s16_z(pg, op1, op2) simde_svadd_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s16_m(pg, op1, op2); + #else + return simde_svsel_s16(pg, simde_svadd_s16_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s16_m + #define svadd_s16_m(pg, op1, op2) simde_svadd_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s16_x(pg, op1, op2); + #else + return simde_svadd_s16_x(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s16_x + #define svadd_n_s16_x(pg, op1, op2) simde_svadd_n_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s16_z(pg, op1, op2); + #else + return simde_svadd_s16_z(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s16_z + #define svadd_n_s16_z(pg, op1, op2) simde_svadd_n_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svadd_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s16_m(pg, op1, op2); + #else + return simde_svadd_s16_m(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s16_m + #define svadd_n_s16_m(pg, op1, op2) simde_svadd_n_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s32_x(pg, op1, op2); + #else + simde_svint32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_s32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi32(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi32(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi32(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi32(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s32_x + #define svadd_s32_x(pg, op1, op2) simde_svadd_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s32_z(pg, op1, op2); + #else + return simde_x_svsel_s32_z(pg, simde_svadd_s32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s32_z + #define svadd_s32_z(pg, op1, op2) simde_svadd_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s32_m(pg, op1, op2); + #else + return simde_svsel_s32(pg, simde_svadd_s32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s32_m + #define svadd_s32_m(pg, op1, op2) simde_svadd_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s32_x(pg, op1, op2); + #else + return simde_svadd_s32_x(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s32_x + #define svadd_n_s32_x(pg, op1, op2) simde_svadd_n_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s32_z(pg, op1, op2); + #else + return simde_svadd_s32_z(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s32_z + #define svadd_n_s32_z(pg, op1, op2) simde_svadd_n_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svadd_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s32_m(pg, op1, op2); + #else + return simde_svadd_s32_m(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s32_m + #define svadd_n_s32_m(pg, op1, op2) simde_svadd_n_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s64_x(pg, op1, op2); + #else + simde_svint64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_s64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi64(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi64(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi64(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi64(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s64_x + #define svadd_s64_x(pg, op1, op2) simde_svadd_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s64_z(pg, op1, op2); + #else + return simde_x_svsel_s64_z(pg, simde_svadd_s64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s64_z + #define svadd_s64_z(pg, op1, op2) simde_svadd_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_s64_m(pg, op1, op2); + #else + return simde_svsel_s64(pg, simde_svadd_s64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_s64_m + #define svadd_s64_m(pg, op1, op2) simde_svadd_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s64_x(pg, op1, op2); + #else + return simde_svadd_s64_x(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s64_x + #define svadd_n_s64_x(pg, op1, op2) simde_svadd_n_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s64_z(pg, op1, op2); + #else + return simde_svadd_s64_z(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s64_z + #define svadd_n_s64_z(pg, op1, op2) simde_svadd_n_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svadd_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_s64_m(pg, op1, op2); + #else + return simde_svadd_s64_m(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_s64_m + #define svadd_n_s64_m(pg, op1, op2) simde_svadd_n_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u8_x(pg, op1, op2); + #else + simde_svuint8_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_u8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi8(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u8_x + #define svadd_u8_x(pg, op1, op2) simde_svadd_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u8_z(pg, op1, op2); + #else + return simde_x_svsel_u8_z(pg, simde_svadd_u8_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u8_z + #define svadd_u8_z(pg, op1, op2) simde_svadd_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u8_m(pg, op1, op2); + #else + return simde_svsel_u8(pg, simde_svadd_u8_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u8_m + #define svadd_u8_m(pg, op1, op2) simde_svadd_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u8_x(pg, op1, op2); + #else + return simde_svadd_u8_x(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u8_x + #define svadd_n_u8_x(pg, op1, op2) simde_svadd_n_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u8_z(pg, op1, op2); + #else + return simde_svadd_u8_z(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u8_z + #define svadd_n_u8_z(pg, op1, op2) simde_svadd_n_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svadd_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u8_m(pg, op1, op2); + #else + return simde_svadd_u8_m(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u8_m + #define svadd_n_u8_m(pg, op1, op2) simde_svadd_n_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u16_x(pg, op1, op2); + #else + simde_svuint16_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_u16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi16(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u16_x + #define svadd_u16_x(pg, op1, op2) simde_svadd_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u16_z(pg, op1, op2); + #else + return simde_x_svsel_u16_z(pg, simde_svadd_u16_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u16_z + #define svadd_u16_z(pg, op1, op2) simde_svadd_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u16_m(pg, op1, op2); + #else + return simde_svsel_u16(pg, simde_svadd_u16_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u16_m + #define svadd_u16_m(pg, op1, op2) simde_svadd_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u16_x(pg, op1, op2); + #else + return simde_svadd_u16_x(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u16_x + #define svadd_n_u16_x(pg, op1, op2) simde_svadd_n_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u16_z(pg, op1, op2); + #else + return simde_svadd_u16_z(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u16_z + #define svadd_n_u16_z(pg, op1, op2) simde_svadd_n_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svadd_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u16_m(pg, op1, op2); + #else + return simde_svadd_u16_m(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u16_m + #define svadd_n_u16_m(pg, op1, op2) simde_svadd_n_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u32_x(pg, op1, op2); + #else + simde_svuint32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_u32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi32(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi32(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi32(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi32(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u32_x + #define svadd_u32_x(pg, op1, op2) simde_svadd_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u32_z(pg, op1, op2); + #else + return simde_x_svsel_u32_z(pg, simde_svadd_u32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u32_z + #define svadd_u32_z(pg, op1, op2) simde_svadd_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u32_m(pg, op1, op2); + #else + return simde_svsel_u32(pg, simde_svadd_u32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u32_m + #define svadd_u32_m(pg, op1, op2) simde_svadd_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u32_x(pg, op1, op2); + #else + return simde_svadd_u32_x(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u32_x + #define svadd_n_u32_x(pg, op1, op2) simde_svadd_n_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u32_z(pg, op1, op2); + #else + return simde_svadd_u32_z(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u32_z + #define svadd_n_u32_z(pg, op1, op2) simde_svadd_n_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svadd_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u32_m(pg, op1, op2); + #else + return simde_svadd_u32_m(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u32_m + #define svadd_n_u32_m(pg, op1, op2) simde_svadd_n_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u64_x(pg, op1, op2); + #else + simde_svuint64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_u64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_add_epi64(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_add_epi64(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_add_epi64(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_add_epi64(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u64_x + #define svadd_u64_x(pg, op1, op2) simde_svadd_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u64_z(pg, op1, op2); + #else + return simde_x_svsel_u64_z(pg, simde_svadd_u64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u64_z + #define svadd_u64_z(pg, op1, op2) simde_svadd_u64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_u64_m(pg, op1, op2); + #else + return simde_svsel_u64(pg, simde_svadd_u64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_u64_m + #define svadd_u64_m(pg, op1, op2) simde_svadd_u64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u64_x(pg, op1, op2); + #else + return simde_svadd_u64_x(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u64_x + #define svadd_n_u64_x(pg, op1, op2) simde_svadd_n_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u64_z(pg, op1, op2); + #else + return simde_svadd_u64_z(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u64_z + #define svadd_n_u64_z(pg, op1, op2) simde_svadd_n_u64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svadd_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_u64_m(pg, op1, op2); + #else + return simde_svadd_u64_m(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_u64_m + #define svadd_n_u64_m(pg, op1, op2) simde_svadd_n_u64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f32_x(pg, op1, op2); + #else + simde_svfloat32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vaddq_f32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512 = _mm512_add_ps(op1.m512, op2.m512); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256[0] = _mm256_add_ps(op1.m256[0], op2.m256[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { + r.m256[i] = _mm256_add_ps(op1.m256[i], op2.m256[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { + r.m128[i] = _mm_add_ps(op1.m128[i], op2.m128[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f32x4_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f32_x + #define svadd_f32_x(pg, op1, op2) simde_svadd_f32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f32_z(pg, op1, op2); + #else + return simde_x_svsel_f32_z(pg, simde_svadd_f32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f32_z + #define svadd_f32_z(pg, op1, op2) simde_svadd_f32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f32_m(pg, op1, op2); + #else + return simde_svsel_f32(pg, simde_svadd_f32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f32_m + #define svadd_f32_m(pg, op1, op2) simde_svadd_f32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_n_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f32_x(pg, op1, op2); + #else + return simde_svadd_f32_x(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f32_x + #define svadd_n_f32_x(pg, op1, op2) simde_svadd_n_f32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_n_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f32_z(pg, op1, op2); + #else + return simde_svadd_f32_z(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f32_z + #define svadd_n_f32_z(pg, op1, op2) simde_svadd_n_f32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svadd_n_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f32_m(pg, op1, op2); + #else + return simde_svadd_f32_m(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f32_m + #define svadd_n_f32_m(pg, op1, op2) simde_svadd_n_f32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f64_x(pg, op1, op2); + #else + simde_svfloat64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon = vaddq_f64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512d = _mm512_add_pd(op1.m512d, op2.m512d); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256d[0] = _mm256_add_pd(op1.m256d[0], op2.m256d[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { + r.m256d[i] = _mm256_add_pd(op1.m256d[i], op2.m256d[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { + r.m128d[i] = _mm_add_pd(op1.m128d[i], op2.m128d[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r.altivec = vec_add(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec + op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f64x2_add(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values + op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] + op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f64_x + #define svadd_f64_x(pg, op1, op2) simde_svadd_f64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f64_z(pg, op1, op2); + #else + return simde_x_svsel_f64_z(pg, simde_svadd_f64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f64_z + #define svadd_f64_z(pg, op1, op2) simde_svadd_f64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_f64_m(pg, op1, op2); + #else + return simde_svsel_f64(pg, simde_svadd_f64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_f64_m + #define svadd_f64_m(pg, op1, op2) simde_svadd_f64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_n_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f64_x(pg, op1, op2); + #else + return simde_svadd_f64_x(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f64_x + #define svadd_n_f64_x(pg, op1, op2) simde_svadd_n_f64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_n_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f64_z(pg, op1, op2); + #else + return simde_svadd_f64_z(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f64_z + #define svadd_n_f64_z(pg, op1, op2) simde_svadd_n_f64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svadd_n_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svadd_n_f64_m(pg, op1, op2); + #else + return simde_svadd_f64_m(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svadd_n_f64_m + #define svadd_n_f64_m(pg, op1, op2) simde_svadd_n_f64_m(pg, op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_x (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_z (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svadd_s8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svadd_s16_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svadd_s32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svadd_s64_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svadd_u8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svadd_u16_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svadd_u32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svadd_u64_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svadd_f32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svadd_f64_m (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_x(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_z(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svadd_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svadd_n_s8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svadd_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svadd_n_s16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svadd_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svadd_n_s32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svadd_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svadd_n_s64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svadd_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svadd_n_u8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svadd_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svadd_n_u16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svadd_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svadd_n_u32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svadd_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svadd_n_u64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svadd_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svadd_n_f32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svadd_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svadd_n_f64_m(pg, op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_svadd_x(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svadd_s8_x, \ + simde_svint16_t: simde_svadd_s16_x, \ + simde_svint32_t: simde_svadd_s32_x, \ + simde_svint64_t: simde_svadd_s64_x, \ + simde_svuint8_t: simde_svadd_u8_x, \ + simde_svuint16_t: simde_svadd_u16_x, \ + simde_svuint32_t: simde_svadd_u32_x, \ + simde_svuint64_t: simde_svadd_u64_x, \ + simde_svfloat32_t: simde_svadd_f32_x, \ + simde_svfloat64_t: simde_svadd_f64_x, \ + int8_t: simde_svadd_n_s8_x, \ + int16_t: simde_svadd_n_s16_x, \ + int32_t: simde_svadd_n_s32_x, \ + int64_t: simde_svadd_n_s64_x, \ + uint8_t: simde_svadd_n_u8_x, \ + uint16_t: simde_svadd_n_u16_x, \ + uint32_t: simde_svadd_n_u32_x, \ + uint64_t: simde_svadd_n_u64_x, \ + simde_float32: simde_svadd_n_f32_x, \ + simde_float64: simde_svadd_n_f64_x)((pg), (op1), (op2))) + + #define simde_svadd_z(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svadd_s8_z, \ + simde_svint16_t: simde_svadd_s16_z, \ + simde_svint32_t: simde_svadd_s32_z, \ + simde_svint64_t: simde_svadd_s64_z, \ + simde_svuint8_t: simde_svadd_u8_z, \ + simde_svuint16_t: simde_svadd_u16_z, \ + simde_svuint32_t: simde_svadd_u32_z, \ + simde_svuint64_t: simde_svadd_u64_z, \ + simde_svfloat32_t: simde_svadd_f32_z, \ + simde_svfloat64_t: simde_svadd_f64_z, \ + int8_t: simde_svadd_n_s8_z, \ + int16_t: simde_svadd_n_s16_z, \ + int32_t: simde_svadd_n_s32_z, \ + int64_t: simde_svadd_n_s64_z, \ + uint8_t: simde_svadd_n_u8_z, \ + uint16_t: simde_svadd_n_u16_z, \ + uint32_t: simde_svadd_n_u32_z, \ + uint64_t: simde_svadd_n_u64_z, \ + simde_float32: simde_svadd_n_f32_z, \ + simde_float64: simde_svadd_n_f64_z)((pg), (op1), (op2))) + + #define simde_svadd_m(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svadd_s8_m, \ + simde_svint16_t: simde_svadd_s16_m, \ + simde_svint32_t: simde_svadd_s32_m, \ + simde_svint64_t: simde_svadd_s64_m, \ + simde_svuint8_t: simde_svadd_u8_m, \ + simde_svuint16_t: simde_svadd_u16_m, \ + simde_svuint32_t: simde_svadd_u32_m, \ + simde_svuint64_t: simde_svadd_u64_m, \ + simde_svfloat32_t: simde_svadd_f32_m, \ + simde_svfloat64_t: simde_svadd_f64_m, \ + int8_t: simde_svadd_n_s8_m, \ + int16_t: simde_svadd_n_s16_m, \ + int32_t: simde_svadd_n_s32_m, \ + int64_t: simde_svadd_n_s64_m, \ + uint8_t: simde_svadd_n_u8_m, \ + uint16_t: simde_svadd_n_u16_m, \ + uint32_t: simde_svadd_n_u32_m, \ + uint64_t: simde_svadd_n_u64_m, \ + simde_float32: simde_svadd_n_f32_m, \ + simde_float64: simde_svadd_n_f64_m)((pg), (op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svadd_x + #undef svadd_z + #undef svadd_m + #undef svadd_n_x + #undef svadd_n_z + #undef svadd_n_m + #define svadd_x(pg, op1, op2) simde_svadd_x((pg), (op1), (op2)) + #define svadd_z(pg, op1, op2) simde_svadd_z((pg), (op1), (op2)) + #define svadd_m(pg, op1, op2) simde_svadd_m((pg), (op1), (op2)) + #define svadd_n_x(pg, op1, op2) simde_svadd_n_x((pg), (op1), (op2)) + #define svadd_n_z(pg, op1, op2) simde_svadd_n_z((pg), (op1), (op2)) + #define svadd_n_m(pg, op1, op2) simde_svadd_n_m((pg), (op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_ADD_H */ +/* :: End simde/arm/sve/add.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_AND_H) +#define SIMDE_ARM_SVE_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s8_x(pg, op1, op2); + #else + simde_svint8_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec & op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values & op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] & op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s8_x + #define svand_s8_x(pg, op1, op2) simde_svand_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s8_z(pg, op1, op2); + #else + return simde_x_svsel_s8_z(pg, simde_svand_s8_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s8_z + #define svand_s8_z(pg, op1, op2) simde_svand_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s8_m(pg, op1, op2); + #else + return simde_svsel_s8(pg, simde_svand_s8_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s8_m + #define svand_s8_m(pg, op1, op2) simde_svand_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s8_z(pg, op1, op2); + #else + return simde_svand_s8_z(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s8_z + #define svand_n_s8_z(pg, op1, op2) simde_svand_n_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s8_m(pg, op1, op2); + #else + return simde_svand_s8_m(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s8_m + #define svand_n_s8_m(pg, op1, op2) simde_svand_n_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svand_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s8_x(pg, op1, op2); + #else + return simde_svand_s8_x(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s8_x + #define svand_n_s8_x(pg, op1, op2) simde_svand_n_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s16_x(pg, op1, op2); + #else + simde_svint16_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec & op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values & op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] & op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s16_x + #define svand_s16_x(pg, op1, op2) simde_svand_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s16_z(pg, op1, op2); + #else + return simde_x_svsel_s16_z(pg, simde_svand_s16_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s16_z + #define svand_s16_z(pg, op1, op2) simde_svand_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s16_m(pg, op1, op2); + #else + return simde_svsel_s16(pg, simde_svand_s16_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s16_m + #define svand_s16_m(pg, op1, op2) simde_svand_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s16_z(pg, op1, op2); + #else + return simde_svand_s16_z(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s16_z + #define svand_n_s16_z(pg, op1, op2) simde_svand_n_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s16_m(pg, op1, op2); + #else + return simde_svand_s16_m(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s16_m + #define svand_n_s16_m(pg, op1, op2) simde_svand_n_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svand_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s16_x(pg, op1, op2); + #else + return simde_svand_s16_x(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s16_x + #define svand_n_s16_x(pg, op1, op2) simde_svand_n_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s32_x(pg, op1, op2); + #else + simde_svint32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_and(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec & op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values & op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] & op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s32_x + #define svand_s32_x(pg, op1, op2) simde_svand_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s32_z(pg, op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svint32_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_maskz_and_epi32(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i); + #else + r.m256i[0] = _mm256_maskz_and_epi32(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]); + #endif + + return r; + #else + return simde_x_svsel_s32_z(pg, simde_svand_s32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s32_z + #define svand_s32_z(pg, op1, op2) simde_svand_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s32_m(pg, op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svint32_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_mask_and_epi32(op1.m512i, simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i); + #else + r.m256i[0] = _mm256_mask_and_epi32(op1.m256i[0], simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0]); + #endif + + return r; + #else + return simde_svsel_s32(pg, simde_svand_s32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s32_m + #define svand_s32_m(pg, op1, op2) simde_svand_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s32_z(pg, op1, op2); + #else + return simde_svand_s32_z(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s32_z + #define svand_n_s32_z(pg, op1, op2) simde_svand_n_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s32_m(pg, op1, op2); + #else + return simde_svand_s32_m(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s32_m + #define svand_n_s32_m(pg, op1, op2) simde_svand_n_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svand_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s32_x(pg, op1, op2); + #else + return simde_svand_s32_x(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s32_x + #define svand_n_s32_x(pg, op1, op2) simde_svand_n_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s64_x(pg, op1, op2); + #else + simde_svint64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vandq_s64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_and_si512(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_and_si256(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_and_si256(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r.altivec = vec_and(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec & op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values & op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] & op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s64_x + #define svand_s64_x(pg, op1, op2) simde_svand_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s64_z(pg, op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svint64_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_maskz_and_epi64(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i); + #else + r.m256i[0] = _mm256_maskz_and_epi64(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]); + #endif + + return r; + #else + return simde_x_svsel_s64_z(pg, simde_svand_s64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s64_z + #define svand_s64_z(pg, op1, op2) simde_svand_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_s64_m(pg, op1, op2); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && ((SIMDE_ARM_SVE_VECTOR_SIZE >= 512) || defined(SIMDE_X86_AVX512VL_NATIVE)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + simde_svint64_t r; + + #if SIMDE_ARM_SVE_VECTOR_SIZE >= 512 + r.m512i = _mm512_mask_and_epi64(op1.m512i, simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i); + #else + r.m256i[0] = _mm256_mask_and_epi64(op1.m256i[0], simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0]); + #endif + + return r; + #else + return simde_svsel_s64(pg, simde_svand_s64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_s64_m + #define svand_s64_m(pg, op1, op2) simde_svand_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s64_z(pg, op1, op2); + #else + return simde_svand_s64_z(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s64_z + #define svand_n_s64_z(pg, op1, op2) simde_svand_n_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s64_m(pg, op1, op2); + #else + return simde_svand_s64_m(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s64_m + #define svand_n_s64_m(pg, op1, op2) simde_svand_n_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svand_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_s64_x(pg, op1, op2); + #else + return simde_svand_s64_x(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_s64_x + #define svand_n_s64_x(pg, op1, op2) simde_svand_n_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u8_z(pg, op1, op2); + #else + return simde_svreinterpret_u8_s8(simde_svand_s8_z(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u8_z + #define svand_u8_z(pg, op1, op2) simde_svand_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u8_m(pg, op1, op2); + #else + return simde_svreinterpret_u8_s8(simde_svand_s8_m(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u8_m + #define svand_u8_m(pg, op1, op2) simde_svand_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u8_x(pg, op1, op2); + #else + return simde_svreinterpret_u8_s8(simde_svand_s8_x(pg, simde_svreinterpret_s8_u8(op1), simde_svreinterpret_s8_u8(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u8_x + #define svand_u8_x(pg, op1, op2) simde_svand_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u8_z(pg, op1, op2); + #else + return simde_svand_u8_z(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u8_z + #define svand_n_u8_z(pg, op1, op2) simde_svand_n_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u8_m(pg, op1, op2); + #else + return simde_svand_u8_m(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u8_m + #define svand_n_u8_m(pg, op1, op2) simde_svand_n_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svand_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u8_x(pg, op1, op2); + #else + return simde_svand_u8_x(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u8_x + #define svand_n_u8_x(pg, op1, op2) simde_svand_n_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u16_z(pg, op1, op2); + #else + return simde_svreinterpret_u16_s16(simde_svand_s16_z(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u16_z + #define svand_u16_z(pg, op1, op2) simde_svand_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u16_m(pg, op1, op2); + #else + return simde_svreinterpret_u16_s16(simde_svand_s16_m(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u16_m + #define svand_u16_m(pg, op1, op2) simde_svand_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u16_x(pg, op1, op2); + #else + return simde_svreinterpret_u16_s16(simde_svand_s16_x(pg, simde_svreinterpret_s16_u16(op1), simde_svreinterpret_s16_u16(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u16_x + #define svand_u16_x(pg, op1, op2) simde_svand_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u16_z(pg, op1, op2); + #else + return simde_svand_u16_z(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u16_z + #define svand_n_u16_z(pg, op1, op2) simde_svand_n_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u16_m(pg, op1, op2); + #else + return simde_svand_u16_m(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u16_m + #define svand_n_u16_m(pg, op1, op2) simde_svand_n_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svand_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u16_x(pg, op1, op2); + #else + return simde_svand_u16_x(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u16_x + #define svand_n_u16_x(pg, op1, op2) simde_svand_n_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u32_z(pg, op1, op2); + #else + return simde_svreinterpret_u32_s32(simde_svand_s32_z(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u32_z + #define svand_u32_z(pg, op1, op2) simde_svand_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u32_m(pg, op1, op2); + #else + return simde_svreinterpret_u32_s32(simde_svand_s32_m(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u32_m + #define svand_u32_m(pg, op1, op2) simde_svand_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u32_x(pg, op1, op2); + #else + return simde_svreinterpret_u32_s32(simde_svand_s32_x(pg, simde_svreinterpret_s32_u32(op1), simde_svreinterpret_s32_u32(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u32_x + #define svand_u32_x(pg, op1, op2) simde_svand_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u32_z(pg, op1, op2); + #else + return simde_svand_u32_z(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u32_z + #define svand_n_u32_z(pg, op1, op2) simde_svand_n_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u32_m(pg, op1, op2); + #else + return simde_svand_u32_m(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u32_m + #define svand_n_u32_m(pg, op1, op2) simde_svand_n_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svand_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u32_x(pg, op1, op2); + #else + return simde_svand_u32_x(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u32_x + #define svand_n_u32_x(pg, op1, op2) simde_svand_n_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u64_z(pg, op1, op2); + #else + return simde_svreinterpret_u64_s64(simde_svand_s64_z(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u64_z + #define svand_u64_z(pg, op1, op2) simde_svand_u64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u64_m(pg, op1, op2); + #else + return simde_svreinterpret_u64_s64(simde_svand_s64_m(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u64_m + #define svand_u64_m(pg, op1, op2) simde_svand_u64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_u64_x(pg, op1, op2); + #else + return simde_svreinterpret_u64_s64(simde_svand_s64_x(pg, simde_svreinterpret_s64_u64(op1), simde_svreinterpret_s64_u64(op2))); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_u64_x + #define svand_u64_x(pg, op1, op2) simde_svand_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u64_z(pg, op1, op2); + #else + return simde_svand_u64_z(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u64_z + #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u64_m(pg, op1, op2); + #else + return simde_svand_u64_m(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u64_m + #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svand_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svand_n_u64_x(pg, op1, op2); + #else + return simde_svand_u64_x(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svand_n_u64_x + #define svand_n_u64_x(pg, op1, op2) simde_svand_n_u64_x(pg, op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_z(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_m(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svand_s8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svand_s16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svand_s32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svand_s64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svand_u8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svand_u16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svand_u32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svand_u64_x(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_z(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_m(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svand_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svand_n_s8_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svand_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svand_n_s16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svand_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svand_n_s32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svand_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svand_n_s64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svand_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svand_n_u8_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svand_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svand_n_u16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svand_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svand_n_u32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svand_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svand_n_u64_x(pg, op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_svand_z(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svand_s8_z, \ + simde_svint16_t: simde_svand_s16_z, \ + simde_svint32_t: simde_svand_s32_z, \ + simde_svint64_t: simde_svand_s64_z, \ + simde_svuint8_t: simde_svand_u8_z, \ + simde_svuint16_t: simde_svand_u16_z, \ + simde_svuint32_t: simde_svand_u32_z, \ + simde_svuint64_t: simde_svand_u64_z, \ + int8_t: simde_svand_n_s8_z, \ + int16_t: simde_svand_n_s16_z, \ + int32_t: simde_svand_n_s32_z, \ + int64_t: simde_svand_n_s64_z, \ + uint8_t: simde_svand_n_u8_z, \ + uint16_t: simde_svand_n_u16_z, \ + uint32_t: simde_svand_n_u32_z, \ + uint64_t: simde_svand_n_u64_z)((pg), (op1), (op2))) + + #define simde_svand_m(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svand_s8_m, \ + simde_svint16_t: simde_svand_s16_m, \ + simde_svint32_t: simde_svand_s32_m, \ + simde_svint64_t: simde_svand_s64_m, \ + simde_svuint8_t: simde_svand_u8_m, \ + simde_svuint16_t: simde_svand_u16_m, \ + simde_svuint32_t: simde_svand_u32_m, \ + simde_svuint64_t: simde_svand_u64_m, \ + int8_t: simde_svand_n_s8_m, \ + int16_t: simde_svand_n_s16_m, \ + int32_t: simde_svand_n_s32_m, \ + int64_t: simde_svand_n_s64_m, \ + uint8_t: simde_svand_n_u8_m, \ + uint16_t: simde_svand_n_u16_m, \ + uint32_t: simde_svand_n_u32_m, \ + uint64_t: simde_svand_n_u64_m)((pg), (op1), (op2))) + + #define simde_svand_x(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svand_s8_x, \ + simde_svint16_t: simde_svand_s16_x, \ + simde_svint32_t: simde_svand_s32_x, \ + simde_svint64_t: simde_svand_s64_x, \ + simde_svuint8_t: simde_svand_u8_x, \ + simde_svuint16_t: simde_svand_u16_x, \ + simde_svuint32_t: simde_svand_u32_x, \ + simde_svuint64_t: simde_svand_u64_x, \ + int8_t: simde_svand_n_s8_x, \ + int16_t: simde_svand_n_s16_x, \ + int32_t: simde_svand_n_s32_x, \ + int64_t: simde_svand_n_s64_x, \ + uint8_t: simde_svand_n_u8_x, \ + uint16_t: simde_svand_n_u16_x, \ + uint32_t: simde_svand_n_u32_x, \ + uint64_t: simde_svand_n_u64_x)((pg), (op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svand_x + #undef svand_z + #undef svand_m + #define svand_x(pg, op1, op2) simde_svand_x((pg), (op1), (op2)) + #define svand_z(pg, op1, op2) simde_svand_z((pg), (op1), (op2)) + #define svand_m(pg, op1, op2) simde_svand_m((pg), (op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_AND_H */ +/* :: End simde/arm/sve/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/cmplt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_CMPLT_H) +#define SIMDE_ARM_SVE_CMPLT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_s8(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_s8(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epi8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epi8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_i8 = vandq_s8(pg.neon_i8, vreinterpretq_s8_u8(vcltq_s8(op1.neon, op2.neon))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi8(op1.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b8 = vec_and(pg.altivec_b8, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b8 = pg.altivec_b8 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_i8x16_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i8 = pg.values_i8 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i8), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i8) / sizeof(r.values_i8[0])) ; i++) { + r.values_i8[i] = pg.values_i8[i] & ((op1.values[i] < op2.values[i]) ? ~INT8_C(0) : INT8_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_s8 + #define svcmplt_s8(pg, op1, op2) simde_svcmplt_s8(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_s16(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_s16(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epi16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epi16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_i16 = vandq_s16(pg.neon_i16, vreinterpretq_s16_u16(vcltq_s16(op1.neon, op2.neon))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi16(op1.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b16 = vec_and(pg.altivec_b16, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b16 = pg.altivec_b16 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_i16x8_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i16 = pg.values_i16 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i16), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i16) / sizeof(r.values_i16[0])) ; i++) { + r.values_i16[i] = pg.values_i16[i] & ((op1.values[i] < op2.values[i]) ? ~INT16_C(0) : INT16_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_s16 + #define svcmplt_s16(pg, op1, op2) simde_svcmplt_s16(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_s32(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_s32(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epi32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epi32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_i32 = vandq_s32(pg.neon_i32, vreinterpretq_s32_u32(vcltq_s32(op1.neon, op2.neon))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_and_si128(pg.m128i[i], _mm_cmplt_epi32(op1.m128i[i], op2.m128i[i])); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_i32x4_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i32 = pg.values_i32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i32), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i32) / sizeof(r.values_i32[0])) ; i++) { + r.values_i32[i] = pg.values_i32[i] & ((op1.values[i] < op2.values[i]) ? ~INT32_C(0) : INT32_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_s32 + #define svcmplt_s32(pg, op1, op2) simde_svcmplt_s32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_s64(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_s64(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epi64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epi64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon_i64 = vandq_s64(pg.neon_i64, vreinterpretq_s64_u64(vcltq_s64(op1.neon, op2.neon))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec_b64 = vec_and(pg.altivec_b64, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) + r.v128 = wasm_v128_and(pg.v128, wasm_i64x2_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i64 = pg.values_i64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i64), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i64) / sizeof(r.values_i64[0])) ; i++) { + r.values_i64[i] = pg.values_i64[i] & ((op1.values[i] < op2.values[i]) ? ~INT64_C(0) : INT64_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_s64 + #define svcmplt_s64(pg, op1, op2) simde_svcmplt_s64(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_u8(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_u8(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask64(_mm512_mask_cmplt_epu8_mask(simde_svbool_to_mmask64(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask32(_mm256_mask_cmplt_epu8_mask(simde_svbool_to_mmask32(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_u8 = vandq_u8(pg.neon_u8, vcltq_u8(op1.neon, op2.neon)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b8 = vec_and(pg.altivec_b8, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b8 = pg.altivec_b8 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_u8x16_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_u8 = pg.values_u8 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u8), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u8) / sizeof(r.values_u8[0])) ; i++) { + r.values_u8[i] = pg.values_u8[i] & ((op1.values[i] < op2.values[i]) ? ~UINT8_C(0) : UINT8_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_u8 + #define svcmplt_u8(pg, op1, op2) simde_svcmplt_u8(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_u16(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_u16(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask32(_mm512_mask_cmplt_epu16_mask(simde_svbool_to_mmask32(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask16(_mm256_mask_cmplt_epu16_mask(simde_svbool_to_mmask16(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_u16 = vandq_u16(pg.neon_u16, vcltq_u16(op1.neon, op2.neon)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b16 = vec_and(pg.altivec_b16, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b16 = pg.altivec_b16 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_u16x8_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_u16 = pg.values_u16 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u16), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u16) / sizeof(r.values_u16[0])) ; i++) { + r.values_u16[i] = pg.values_u16[i] & ((op1.values[i] < op2.values[i]) ? ~UINT16_C(0) : UINT16_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_u16 + #define svcmplt_u16(pg, op1, op2) simde_svcmplt_u16(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_u32(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_u32(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask16(_mm512_mask_cmplt_epu32_mask(simde_svbool_to_mmask16(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm256_mask_cmplt_epu32_mask(simde_svbool_to_mmask8(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_u32(op1.neon, op2.neon)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_u32x4_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_u32 = pg.values_u32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u32), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u32) / sizeof(r.values_u32[0])) ; i++) { + r.values_u32[i] = pg.values_u32[i] & ((op1.values[i] < op2.values[i]) ? ~UINT32_C(0) : UINT32_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_u32 + #define svcmplt_u32(pg, op1, op2) simde_svcmplt_u32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_u64(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_u64(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm512_mask_cmplt_epu64_mask(simde_svbool_to_mmask8(pg), op1.m512i, op2.m512i)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask4(_mm256_mask_cmplt_epu64_mask(simde_svbool_to_mmask4(pg), op1.m256i[0], op2.m256i[0])); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_u64(op1.neon, op2.neon)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec_b64 = vec_and(pg.altivec_b64, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) + r.v128 = wasm_v128_and(pg.v128, wasm_u64x2_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_u64 = pg.values_u64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_u64), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_u64) / sizeof(r.values_u64[0])) ; i++) { + r.values_u64[i] = pg.values_u64[i] & ((op1.values[i] < op2.values[i]) ? ~UINT64_C(0) : UINT64_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_u64 + #define svcmplt_u64(pg, op1, op2) simde_svcmplt_u64(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_f32(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_f32(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask16(_mm512_mask_cmp_ps_mask(simde_svbool_to_mmask16(pg), op1.m512, op2.m512, _CMP_LT_OQ)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm256_mask_cmp_ps_mask(simde_svbool_to_mmask8(pg), op1.m256[0], op2.m256[0], _CMP_LT_OQ)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon_u32 = vandq_u32(pg.neon_u32, vcltq_f32(op1.neon, op2.neon)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(pg.m128i[i]), _mm_cmplt_ps(op1.m128[i], op2.m128[i]))); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec_b32 = vec_and(pg.altivec_b32, vec_cmplt(op1.altivec, op2.altivec)); + #elif defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r.altivec_b32 = pg.altivec_b32 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_v128_and(pg.v128, wasm_f32x4_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i32 = pg.values_i32 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i32), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i32) / sizeof(r.values_i32[0])) ; i++) { + r.values_i32[i] = pg.values_i32[i] & ((op1.values[i] < op2.values[i]) ? ~INT32_C(0) : INT32_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_f32 + #define svcmplt_f32(pg, op1, op2) simde_svcmplt_f32(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svbool_t +simde_svcmplt_f64(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svcmplt_f64(pg, op1, op2); + #else + simde_svbool_t r; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask8(_mm512_mask_cmp_pd_mask(simde_svbool_to_mmask8(pg), op1.m512d, op2.m512d, _CMP_LT_OQ)); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + r = simde_svbool_from_mmask4(_mm256_mask_cmp_pd_mask(simde_svbool_to_mmask4(pg), op1.m256d[0], op2.m256d[0], _CMP_LT_OQ)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon_u64 = vandq_u64(pg.neon_u64, vcltq_f64(op1.neon, op2.neon)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_castpd_si128(_mm_and_pd(_mm_castsi128_pd(pg.m128i[i]), _mm_cmplt_pd(op1.m128d[i], op2.m128d[i]))); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec_b64 = pg.altivec_b64 & vec_cmplt(op1.altivec, op2.altivec); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_WASM_TODO) + r.v128 = wasm_v128_and(pg.v128, wasm_f64x2_lt(op1.v128, op2.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values_i64 = pg.values_i64 & HEDLEY_REINTERPRET_CAST(__typeof__(r.values_i64), op1.values < op2.values); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values_i64) / sizeof(r.values_i64[0])) ; i++) { + r.values_i64[i] = pg.values_i64[i] & ((op1.values[i] < op2.values[i]) ? ~INT64_C(0) : INT64_C(0)); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svcmplt_f64 + #define svcmplt_f64(pg, op1, op2) simde_svcmplt_f64(pg, op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svcmplt_s8(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svcmplt_s16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svcmplt_s32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svcmplt_s64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svcmplt_u8(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svcmplt_u16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svcmplt_u32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svcmplt_u64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svcmplt_f32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svbool_t simde_svcmplt(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svcmplt_f64(pg, op1, op2); } + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint8_t op1, svint8_t op2) { return svcmplt_s8(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint16_t op1, svint16_t op2) { return svcmplt_s16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint32_t op1, svint32_t op2) { return svcmplt_s32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svint64_t op1, svint64_t op2) { return svcmplt_s64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint8_t op1, svuint8_t op2) { return svcmplt_u8(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint16_t op1, svuint16_t op2) { return svcmplt_u16(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint32_t op1, svuint32_t op2) { return svcmplt_u32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svuint64_t op1, svuint64_t op2) { return svcmplt_u64(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svfloat32_t op1, svfloat32_t op2) { return svcmplt_f32(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES svbool_t svcmplt(svbool_t pg, svfloat64_t op1, svfloat64_t op2) { return svcmplt_f64(pg, op1, op2); } + #endif +#elif defined(SIMDE_GENERIC_) + #define simde_svcmplt(pg, op1, op2) \ + (SIMDE_GENERIC_((op1), \ + simde_svint8_t: simde_svcmplt_s8)(pg, op1, op2), \ + simde_svint16_t: simde_svcmplt_s16)(pg, op1, op2), \ + simde_svint32_t: simde_svcmplt_s32)(pg, op1, op2), \ + simde_svint64_t: simde_svcmplt_s64)(pg, op1, op2), \ + simde_svuint8_t: simde_svcmplt_u8)(pg, op1, op2), \ + simde_svuint16_t: simde_svcmplt_u16)(pg, op1, op2), \ + simde_svuint32_t: simde_svcmplt_u32)(pg, op1, op2), \ + simde_svuint64_t: simde_svcmplt_u64)(pg, op1, op2), \ + simde_svint32_t: simde_svcmplt_f32)(pg, op1, op2), \ + simde_svint64_t: simde_svcmplt_f64)(pg, op1, op2)) + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #define svcmplt(pg, op1, op2) \ + (SIMDE_GENERIC_((op1), \ + svint8_t: svcmplt_s8)(pg, op1, op2), \ + svint16_t: svcmplt_s16)(pg, op1, op2), \ + svint32_t: svcmplt_s32)(pg, op1, op2), \ + svint64_t: svcmplt_s64)(pg, op1, op2), \ + svuint8_t: svcmplt_u8)(pg, op1, op2), \ + svuint16_t: svcmplt_u16)(pg, op1, op2), \ + svuint32_t: svcmplt_u32)(pg, op1, op2), \ + svuint64_t: svcmplt_u64)(pg, op1, op2), \ + svint32_t: svcmplt_f32)(pg, op1, op2), \ + svint64_t: svcmplt_f64)(pg, op1, op2)) + #endif +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svcmplt + #define svcmplt(pg, op1, op2) simde_svcmplt((pg), (op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_CMPLT_H */ +/* :: End simde/arm/sve/cmplt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/qadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_QADD_H) +#define SIMDE_ARM_SVE_QADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svqadd_s8(simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_s8(op1, op2); + #else + simde_svint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_s8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_adds_epi8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_adds_epi8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_adds_epi8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_add_sat(op1.v128, op2.v128); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_i8(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_s8 + #define svqadd_s8(op1, op2) simde_svqadd_s8(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svqadd_n_s8(simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_s8(op1, op2); + #else + return simde_svqadd_s8(op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_s8 + #define svqadd_n_s8(op1, op2) simde_svqadd_n_s8(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svqadd_s16(simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_s16(op1, op2); + #else + simde_svint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_s16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_adds_epi16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_adds_epi16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_adds_epi16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_add_sat(op1.v128, op2.v128); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_i16(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_s16 + #define svqadd_s16(op1, op2) simde_svqadd_s16(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svqadd_n_s16(simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_s16(op1, op2); + #else + return simde_svqadd_s16(op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_s16 + #define svqadd_n_s16(op1, op2) simde_svqadd_n_s16(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svqadd_s32(simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_s32(op1, op2); + #else + simde_svint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_s32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm512_cvtsepi64_epi32(_mm512_add_epi64(_mm512_cvtepi32_epi64(op1.m256i[i]), _mm512_cvtepi32_epi64(op2.m256i[i]))); + } + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm256_cvtsepi64_epi32(_mm256_add_epi64(_mm256_cvtepi32_epi64(op1.m128i[i]), _mm256_cvtepi32_epi64(op2.m128i[i]))); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_i32(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_s32 + #define svqadd_s32(op1, op2) simde_svqadd_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svqadd_n_s32(simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_s32(op1, op2); + #else + return simde_svqadd_s32(op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_s32 + #define svqadd_n_s32(op1, op2) simde_svqadd_n_s32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svqadd_s64(simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_s64(op1, op2); + #else + simde_svint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_s64(op1.neon, op2.neon); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_i64(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_s64 + #define svqadd_s64(op1, op2) simde_svqadd_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svqadd_n_s64(simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_s64(op1, op2); + #else + return simde_svqadd_s64(op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_s64 + #define svqadd_n_s64(op1, op2) simde_svqadd_n_s64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svqadd_u8(simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_u8(op1, op2); + #else + simde_svuint8_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_u8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_adds_epu8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_adds_epu8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_adds_epu8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_u8x16_add_sat(op1.v128, op2.v128); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_u8(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_u8 + #define svqadd_u8(op1, op2) simde_svqadd_u8(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svqadd_n_u8(simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_u8(op1, op2); + #else + return simde_svqadd_u8(op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_u8 + #define svqadd_n_u8(op1, op2) simde_svqadd_n_u8(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svqadd_u16(simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_u16(op1, op2); + #else + simde_svuint16_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_u16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_adds_epu16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_adds_epu16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_adds_epu16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_u16x8_add_sat(op1.v128, op2.v128); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_u16(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_u16 + #define svqadd_u16(op1, op2) simde_svqadd_u16(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svqadd_n_u16(simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_u16(op1, op2); + #else + return simde_svqadd_u16(op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_u16 + #define svqadd_n_u16(op1, op2) simde_svqadd_n_u16(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svqadd_u32(simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_u32(op1, op2); + #else + simde_svuint32_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_u32(op1.neon, op2.neon); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_adds(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = + vec_packs( + vec_unpackh(op1.altivec) + vec_unpackh(op2.altivec), + vec_unpackl(op1.altivec) + vec_unpackl(op2.altivec) + ); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_u32(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_u32 + #define svqadd_u32(op1, op2) simde_svqadd_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svqadd_n_u32(simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_u32(op1, op2); + #else + return simde_svqadd_u32(op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_u32 + #define svqadd_n_u32(op1, op2) simde_svqadd_n_u32(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svqadd_u64(simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_u64(op1, op2); + #else + simde_svuint64_t r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vqaddq_u64(op1.neon, op2.neon); + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = simde_math_adds_u64(op1.values[i], op2.values[i]); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_u64 + #define svqadd_u64(op1, op2) simde_svqadd_u64(op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svqadd_n_u64(simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svqadd_n_u64(op1, op2); + #else + return simde_svqadd_u64(op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svqadd_n_u64 + #define svqadd_n_u64(op1, op2) simde_svqadd_n_u64(op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svqadd( simde_svint8_t op1, simde_svint8_t op2) { return simde_svqadd_s8 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svqadd( simde_svint16_t op1, simde_svint16_t op2) { return simde_svqadd_s16 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svqadd( simde_svint32_t op1, simde_svint32_t op2) { return simde_svqadd_s32 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svqadd( simde_svint64_t op1, simde_svint64_t op2) { return simde_svqadd_s64 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svqadd( simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svqadd_u8 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svqadd( simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svqadd_u16 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svqadd( simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svqadd_u32 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svqadd( simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svqadd_u64 (op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svqadd( simde_svint8_t op1, int8_t op2) { return simde_svqadd_n_s8 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svqadd( simde_svint16_t op1, int16_t op2) { return simde_svqadd_n_s16(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svqadd( simde_svint32_t op1, int32_t op2) { return simde_svqadd_n_s32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svqadd( simde_svint64_t op1, int64_t op2) { return simde_svqadd_n_s64(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svqadd( simde_svuint8_t op1, uint8_t op2) { return simde_svqadd_n_u8 (op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svqadd( simde_svuint16_t op1, uint16_t op2) { return simde_svqadd_n_u16(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svqadd( simde_svuint32_t op1, uint32_t op2) { return simde_svqadd_n_u32(op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svqadd( simde_svuint64_t op1, uint64_t op2) { return simde_svqadd_n_u64(op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_svqadd_x(op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svqadd_s8, \ + simde_svint16_t: simde_svqadd_s16, \ + simde_svint32_t: simde_svqadd_s32, \ + simde_svint64_t: simde_svqadd_s64, \ + simde_svuint8_t: simde_svqadd_u8, \ + simde_svuint16_t: simde_svqadd_u16, \ + simde_svuint32_t: simde_svqadd_u32, \ + simde_svuint64_t: simde_svqadd_u64, \ + int8_t: simde_svqadd_n_s8, \ + int16_t: simde_svqadd_n_s16, \ + int32_t: simde_svqadd_n_s32, \ + int64_t: simde_svqadd_n_s64, \ + uint8_t: simde_svqadd_n_u8, \ + uint16_t: simde_svqadd_n_u16, \ + uint32_t: simde_svqadd_n_u32, \ + uint64_t: simde_svqadd_n_u64)((pg), (op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svqadd + #define svqadd(op1, op2) simde_svqadd((pg), (op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_QADD_H */ +/* :: End simde/arm/sve/qadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/arm/sve/sub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_ARM_SVE_SUB_H) +#define SIMDE_ARM_SVE_SUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_s8_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s8_x(pg, op1, op2); + #else + simde_svint8_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_s8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi8(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s8_x + #define svsub_s8_x(pg, op1, op2) simde_svsub_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_s8_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s8_z(pg, op1, op2); + #else + return simde_x_svsel_s8_z(pg, simde_svsub_s8_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s8_z + #define svsub_s8_z(pg, op1, op2) simde_svsub_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_s8_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s8_m(pg, op1, op2); + #else + return simde_svsel_s8(pg, simde_svsub_s8_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s8_m + #define svsub_s8_m(pg, op1, op2) simde_svsub_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_n_s8_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s8_x(pg, op1, op2); + #else + return simde_svsub_s8_x(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s8_x + #define svsub_n_s8_x(pg, op1, op2) simde_svsub_n_s8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_n_s8_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s8_z(pg, op1, op2); + #else + return simde_svsub_s8_z(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s8_z + #define svsub_n_s8_z(pg, op1, op2) simde_svsub_n_s8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint8_t +simde_svsub_n_s8_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s8_m(pg, op1, op2); + #else + return simde_svsub_s8_m(pg, op1, simde_svdup_n_s8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s8_m + #define svsub_n_s8_m(pg, op1, op2) simde_svsub_n_s8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_s16_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s16_x(pg, op1, op2); + #else + simde_svint16_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_s16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi16(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s16_x + #define svsub_s16_x(pg, op1, op2) simde_svsub_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_s16_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s16_z(pg, op1, op2); + #else + return simde_x_svsel_s16_z(pg, simde_svsub_s16_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s16_z + #define svsub_s16_z(pg, op1, op2) simde_svsub_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_s16_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s16_m(pg, op1, op2); + #else + return simde_svsel_s16(pg, simde_svsub_s16_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s16_m + #define svsub_s16_m(pg, op1, op2) simde_svsub_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_n_s16_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s16_x(pg, op1, op2); + #else + return simde_svsub_s16_x(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s16_x + #define svsub_n_s16_x(pg, op1, op2) simde_svsub_n_s16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_n_s16_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s16_z(pg, op1, op2); + #else + return simde_svsub_s16_z(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s16_z + #define svsub_n_s16_z(pg, op1, op2) simde_svsub_n_s16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint16_t +simde_svsub_n_s16_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s16_m(pg, op1, op2); + #else + return simde_svsub_s16_m(pg, op1, simde_svdup_n_s16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s16_m + #define svsub_n_s16_m(pg, op1, op2) simde_svsub_n_s16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_s32_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s32_x(pg, op1, op2); + #else + simde_svint32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_s32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi32(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi32(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi32(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi32(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s32_x + #define svsub_s32_x(pg, op1, op2) simde_svsub_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_s32_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s32_z(pg, op1, op2); + #else + return simde_x_svsel_s32_z(pg, simde_svsub_s32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s32_z + #define svsub_s32_z(pg, op1, op2) simde_svsub_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_s32_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s32_m(pg, op1, op2); + #else + return simde_svsel_s32(pg, simde_svsub_s32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s32_m + #define svsub_s32_m(pg, op1, op2) simde_svsub_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_n_s32_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s32_x(pg, op1, op2); + #else + return simde_svsub_s32_x(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s32_x + #define svsub_n_s32_x(pg, op1, op2) simde_svsub_n_s32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_n_s32_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s32_z(pg, op1, op2); + #else + return simde_svsub_s32_z(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s32_z + #define svsub_n_s32_z(pg, op1, op2) simde_svsub_n_s32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint32_t +simde_svsub_n_s32_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s32_m(pg, op1, op2); + #else + return simde_svsub_s32_m(pg, op1, simde_svdup_n_s32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s32_m + #define svsub_n_s32_m(pg, op1, op2) simde_svsub_n_s32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_s64_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s64_x(pg, op1, op2); + #else + simde_svint64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_s64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi64(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi64(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi64(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi64(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s64_x + #define svsub_s64_x(pg, op1, op2) simde_svsub_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_s64_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s64_z(pg, op1, op2); + #else + return simde_x_svsel_s64_z(pg, simde_svsub_s64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s64_z + #define svsub_s64_z(pg, op1, op2) simde_svsub_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_s64_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_s64_m(pg, op1, op2); + #else + return simde_svsel_s64(pg, simde_svsub_s64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_s64_m + #define svsub_s64_m(pg, op1, op2) simde_svsub_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_n_s64_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s64_x(pg, op1, op2); + #else + return simde_svsub_s64_x(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s64_x + #define svsub_n_s64_x(pg, op1, op2) simde_svsub_n_s64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_n_s64_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s64_z(pg, op1, op2); + #else + return simde_svsub_s64_z(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s64_z + #define svsub_n_s64_z(pg, op1, op2) simde_svsub_n_s64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svint64_t +simde_svsub_n_s64_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_s64_m(pg, op1, op2); + #else + return simde_svsub_s64_m(pg, op1, simde_svdup_n_s64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_s64_m + #define svsub_n_s64_m(pg, op1, op2) simde_svsub_n_s64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_u8_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u8_x(pg, op1, op2); + #else + simde_svuint8_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_u8(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi8(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi8(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi8(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi8(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i8x16_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u8_x + #define svsub_u8_x(pg, op1, op2) simde_svsub_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_u8_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u8_z(pg, op1, op2); + #else + return simde_x_svsel_u8_z(pg, simde_svsub_u8_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u8_z + #define svsub_u8_z(pg, op1, op2) simde_svsub_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_u8_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u8_m(pg, op1, op2); + #else + return simde_svsel_u8(pg, simde_svsub_u8_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u8_m + #define svsub_u8_m(pg, op1, op2) simde_svsub_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_n_u8_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u8_x(pg, op1, op2); + #else + return simde_svsub_u8_x(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u8_x + #define svsub_n_u8_x(pg, op1, op2) simde_svsub_n_u8_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_n_u8_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u8_z(pg, op1, op2); + #else + return simde_svsub_u8_z(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u8_z + #define svsub_n_u8_z(pg, op1, op2) simde_svsub_n_u8_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint8_t +simde_svsub_n_u8_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u8_m(pg, op1, op2); + #else + return simde_svsub_u8_m(pg, op1, simde_svdup_n_u8(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u8_m + #define svsub_n_u8_m(pg, op1, op2) simde_svsub_n_u8_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_u16_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u16_x(pg, op1, op2); + #else + simde_svuint16_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_u16(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi16(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi16(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi16(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi16(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i16x8_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u16_x + #define svsub_u16_x(pg, op1, op2) simde_svsub_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_u16_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u16_z(pg, op1, op2); + #else + return simde_x_svsel_u16_z(pg, simde_svsub_u16_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u16_z + #define svsub_u16_z(pg, op1, op2) simde_svsub_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_u16_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u16_m(pg, op1, op2); + #else + return simde_svsel_u16(pg, simde_svsub_u16_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u16_m + #define svsub_u16_m(pg, op1, op2) simde_svsub_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_n_u16_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u16_x(pg, op1, op2); + #else + return simde_svsub_u16_x(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u16_x + #define svsub_n_u16_x(pg, op1, op2) simde_svsub_n_u16_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_n_u16_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u16_z(pg, op1, op2); + #else + return simde_svsub_u16_z(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u16_z + #define svsub_n_u16_z(pg, op1, op2) simde_svsub_n_u16_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint16_t +simde_svsub_n_u16_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u16_m(pg, op1, op2); + #else + return simde_svsub_u16_m(pg, op1, simde_svdup_n_u16(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u16_m + #define svsub_n_u16_m(pg, op1, op2) simde_svsub_n_u16_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_u32_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u32_x(pg, op1, op2); + #else + simde_svuint32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_u32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi32(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi32(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi32(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi32(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i32x4_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u32_x + #define svsub_u32_x(pg, op1, op2) simde_svsub_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_u32_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u32_z(pg, op1, op2); + #else + return simde_x_svsel_u32_z(pg, simde_svsub_u32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u32_z + #define svsub_u32_z(pg, op1, op2) simde_svsub_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_u32_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u32_m(pg, op1, op2); + #else + return simde_svsel_u32(pg, simde_svsub_u32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u32_m + #define svsub_u32_m(pg, op1, op2) simde_svsub_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_n_u32_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u32_x(pg, op1, op2); + #else + return simde_svsub_u32_x(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u32_x + #define svsub_n_u32_x(pg, op1, op2) simde_svsub_n_u32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_n_u32_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u32_z(pg, op1, op2); + #else + return simde_svsub_u32_z(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u32_z + #define svsub_n_u32_z(pg, op1, op2) simde_svsub_n_u32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint32_t +simde_svsub_n_u32_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u32_m(pg, op1, op2); + #else + return simde_svsub_u32_m(pg, op1, simde_svdup_n_u32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u32_m + #define svsub_n_u32_m(pg, op1, op2) simde_svsub_n_u32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_u64_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u64_x(pg, op1, op2); + #else + simde_svuint64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_u64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512i = _mm512_sub_epi64(op1.m512i, op2.m512i); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256i[0] = _mm256_sub_epi64(op1.m256i[0], op2.m256i[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256i) / sizeof(r.m256i[0])) ; i++) { + r.m256i[i] = _mm256_sub_epi64(op1.m256i[i], op2.m256i[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128i) / sizeof(r.m128i[0])) ; i++) { + r.m128i[i] = _mm_sub_epi64(op1.m128i[i], op2.m128i[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_i64x2_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u64_x + #define svsub_u64_x(pg, op1, op2) simde_svsub_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_u64_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u64_z(pg, op1, op2); + #else + return simde_x_svsel_u64_z(pg, simde_svsub_u64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u64_z + #define svsub_u64_z(pg, op1, op2) simde_svsub_u64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_u64_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_u64_m(pg, op1, op2); + #else + return simde_svsel_u64(pg, simde_svsub_u64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_u64_m + #define svsub_u64_m(pg, op1, op2) simde_svsub_u64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_n_u64_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u64_x(pg, op1, op2); + #else + return simde_svsub_u64_x(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u64_x + #define svsub_n_u64_x(pg, op1, op2) simde_svsub_n_u64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_n_u64_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u64_z(pg, op1, op2); + #else + return simde_svsub_u64_z(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u64_z + #define svsub_n_u64_z(pg, op1, op2) simde_svsub_n_u64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svuint64_t +simde_svsub_n_u64_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_u64_m(pg, op1, op2); + #else + return simde_svsub_u64_m(pg, op1, simde_svdup_n_u64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_u64_m + #define svsub_n_u64_m(pg, op1, op2) simde_svsub_n_u64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f32_x(pg, op1, op2); + #else + simde_svfloat32_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r.neon = vsubq_f32(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512 = _mm512_sub_ps(op1.m512, op2.m512); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256[0] = _mm256_sub_ps(op1.m256[0], op2.m256[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256) / sizeof(r.m256[0])) ; i++) { + r.m256[i] = _mm256_sub_ps(op1.m256[i], op2.m256[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128) / sizeof(r.m128[0])) ; i++) { + r.m128[i] = _mm_sub_ps(op1.m128[i], op2.m128[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f32x4_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f32_x + #define svsub_f32_x(pg, op1, op2) simde_svsub_f32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f32_z(pg, op1, op2); + #else + return simde_x_svsel_f32_z(pg, simde_svsub_f32_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f32_z + #define svsub_f32_z(pg, op1, op2) simde_svsub_f32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f32_m(pg, op1, op2); + #else + return simde_svsel_f32(pg, simde_svsub_f32_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f32_m + #define svsub_f32_m(pg, op1, op2) simde_svsub_f32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_n_f32_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f32_x(pg, op1, op2); + #else + return simde_svsub_f32_x(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f32_x + #define svsub_n_f32_x(pg, op1, op2) simde_svsub_n_f32_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_n_f32_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f32_z(pg, op1, op2); + #else + return simde_svsub_f32_z(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f32_z + #define svsub_n_f32_z(pg, op1, op2) simde_svsub_n_f32_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat32_t +simde_svsub_n_f32_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f32_m(pg, op1, op2); + #else + return simde_svsub_f32_m(pg, op1, simde_svdup_n_f32(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f32_m + #define svsub_n_f32_m(pg, op1, op2) simde_svsub_n_f32_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f64_x(pg, op1, op2); + #else + simde_svfloat64_t r; + HEDLEY_STATIC_CAST(void, pg); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r.neon = vsubq_f64(op1.neon, op2.neon); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + r.m512d = _mm512_sub_pd(op1.m512d, op2.m512d); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r.m256d[0] = _mm256_sub_pd(op1.m256d[0], op2.m256d[0]); + #elif defined(SIMDE_X86_AVX2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m256d) / sizeof(r.m256d[0])) ; i++) { + r.m256d[i] = _mm256_sub_pd(op1.m256d[i], op2.m256d[i]); + } + #elif defined(SIMDE_X86_SSE2_NATIVE) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.m128d) / sizeof(r.m128d[0])) ; i++) { + r.m128d[i] = _mm_sub_pd(op1.m128d[i], op2.m128d[i]); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r.altivec = vec_sub(op1.altivec, op2.altivec); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r.altivec = op1.altivec - op2.altivec; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r.v128 = wasm_f64x2_sub(op1.v128, op2.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r.values = op1.values - op2.values; + #else + SIMDE_VECTORIZE + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, sizeof(r.values) / sizeof(r.values[0])) ; i++) { + r.values[i] = op1.values[i] - op2.values[i]; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f64_x + #define svsub_f64_x(pg, op1, op2) simde_svsub_f64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f64_z(pg, op1, op2); + #else + return simde_x_svsel_f64_z(pg, simde_svsub_f64_x(pg, op1, op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f64_z + #define svsub_f64_z(pg, op1, op2) simde_svsub_f64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_f64_m(pg, op1, op2); + #else + return simde_svsel_f64(pg, simde_svsub_f64_x(pg, op1, op2), op1); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_f64_m + #define svsub_f64_m(pg, op1, op2) simde_svsub_f64_m(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_n_f64_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f64_x(pg, op1, op2); + #else + return simde_svsub_f64_x(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f64_x + #define svsub_n_f64_x(pg, op1, op2) simde_svsub_n_f64_x(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_n_f64_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f64_z(pg, op1, op2); + #else + return simde_svsub_f64_z(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f64_z + #define svsub_n_f64_z(pg, op1, op2) simde_svsub_n_f64_z(pg, op1, op2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_svfloat64_t +simde_svsub_n_f64_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { + #if defined(SIMDE_ARM_SVE_NATIVE) + return svsub_n_f64_m(pg, op1, op2); + #else + return simde_svsub_f64_m(pg, op1, simde_svdup_n_f64(op2)); + #endif +} +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef simde_svsub_n_f64_m + #define svsub_n_f64_m(pg, op1, op2) simde_svsub_n_f64_m(pg, op1, op2) +#endif + +#if defined(__cplusplus) + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_x(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_x(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_x(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_x(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_x(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_x(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_x(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_x(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_x (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_z(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_z(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_z(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_z(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_z(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_z(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_z(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_z(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_z (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_m(simde_svbool_t pg, simde_svint8_t op1, simde_svint8_t op2) { return simde_svsub_s8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_m(simde_svbool_t pg, simde_svint16_t op1, simde_svint16_t op2) { return simde_svsub_s16_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_m(simde_svbool_t pg, simde_svint32_t op1, simde_svint32_t op2) { return simde_svsub_s32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_m(simde_svbool_t pg, simde_svint64_t op1, simde_svint64_t op2) { return simde_svsub_s64_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_m(simde_svbool_t pg, simde_svuint8_t op1, simde_svuint8_t op2) { return simde_svsub_u8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_m(simde_svbool_t pg, simde_svuint16_t op1, simde_svuint16_t op2) { return simde_svsub_u16_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_m(simde_svbool_t pg, simde_svuint32_t op1, simde_svuint32_t op2) { return simde_svsub_u32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_m(simde_svbool_t pg, simde_svuint64_t op1, simde_svuint64_t op2) { return simde_svsub_u64_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_svfloat32_t op2) { return simde_svsub_f32_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_svfloat64_t op2) { return simde_svsub_f64_m (pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_x(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_x(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_x(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_x(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_x(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_x (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_x(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_x(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_x(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_x(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_x(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_x(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_x(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_z(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_z(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_z(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_z(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_z(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_z (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_z(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_z(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_z(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_z(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_z(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_z(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_z(pg, op1, op2); } + + SIMDE_FUNCTION_ATTRIBUTES simde_svint8_t simde_svsub_m(simde_svbool_t pg, simde_svint8_t op1, int8_t op2) { return simde_svsub_n_s8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint16_t simde_svsub_m(simde_svbool_t pg, simde_svint16_t op1, int16_t op2) { return simde_svsub_n_s16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint32_t simde_svsub_m(simde_svbool_t pg, simde_svint32_t op1, int32_t op2) { return simde_svsub_n_s32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svint64_t simde_svsub_m(simde_svbool_t pg, simde_svint64_t op1, int64_t op2) { return simde_svsub_n_s64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint8_t simde_svsub_m(simde_svbool_t pg, simde_svuint8_t op1, uint8_t op2) { return simde_svsub_n_u8_m (pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint16_t simde_svsub_m(simde_svbool_t pg, simde_svuint16_t op1, uint16_t op2) { return simde_svsub_n_u16_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint32_t simde_svsub_m(simde_svbool_t pg, simde_svuint32_t op1, uint32_t op2) { return simde_svsub_n_u32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svuint64_t simde_svsub_m(simde_svbool_t pg, simde_svuint64_t op1, uint64_t op2) { return simde_svsub_n_u64_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat32_t simde_svsub_m(simde_svbool_t pg, simde_svfloat32_t op1, simde_float32 op2) { return simde_svsub_n_f32_m(pg, op1, op2); } + SIMDE_FUNCTION_ATTRIBUTES simde_svfloat64_t simde_svsub_m(simde_svbool_t pg, simde_svfloat64_t op1, simde_float64 op2) { return simde_svsub_n_f64_m(pg, op1, op2); } +#elif defined(SIMDE_GENERIC_) + #define simde_svsub_x(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svsub_s8_x, \ + simde_svint16_t: simde_svsub_s16_x, \ + simde_svint32_t: simde_svsub_s32_x, \ + simde_svint64_t: simde_svsub_s64_x, \ + simde_svuint8_t: simde_svsub_u8_x, \ + simde_svuint16_t: simde_svsub_u16_x, \ + simde_svuint32_t: simde_svsub_u32_x, \ + simde_svuint64_t: simde_svsub_u64_x, \ + simde_svfloat32_t: simde_svsub_f32_x, \ + simde_svfloat64_t: simde_svsub_f64_x, \ + int8_t: simde_svsub_n_s8_x, \ + int16_t: simde_svsub_n_s16_x, \ + int32_t: simde_svsub_n_s32_x, \ + int64_t: simde_svsub_n_s64_x, \ + uint8_t: simde_svsub_n_u8_x, \ + uint16_t: simde_svsub_n_u16_x, \ + uint32_t: simde_svsub_n_u32_x, \ + uint64_t: simde_svsub_n_u64_x, \ + simde_float32: simde_svsub_n_f32_x, \ + simde_float64: simde_svsub_n_f64_x)((pg), (op1), (op2))) + + #define simde_svsub_z(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svsub_s8_z, \ + simde_svint16_t: simde_svsub_s16_z, \ + simde_svint32_t: simde_svsub_s32_z, \ + simde_svint64_t: simde_svsub_s64_z, \ + simde_svuint8_t: simde_svsub_u8_z, \ + simde_svuint16_t: simde_svsub_u16_z, \ + simde_svuint32_t: simde_svsub_u32_z, \ + simde_svuint64_t: simde_svsub_u64_z, \ + simde_svfloat32_t: simde_svsub_f32_z, \ + simde_svfloat64_t: simde_svsub_f64_z, \ + int8_t: simde_svsub_n_s8_z, \ + int16_t: simde_svsub_n_s16_z, \ + int32_t: simde_svsub_n_s32_z, \ + int64_t: simde_svsub_n_s64_z, \ + uint8_t: simde_svsub_n_u8_z, \ + uint16_t: simde_svsub_n_u16_z, \ + uint32_t: simde_svsub_n_u32_z, \ + uint64_t: simde_svsub_n_u64_z, \ + simde_float32: simde_svsub_n_f32_z, \ + simde_float64: simde_svsub_n_f64_z)((pg), (op1), (op2))) + + #define simde_svsub_m(pg, op1, op2) \ + (SIMDE_GENERIC_((op2), \ + simde_svint8_t: simde_svsub_s8_m, \ + simde_svint16_t: simde_svsub_s16_m, \ + simde_svint32_t: simde_svsub_s32_m, \ + simde_svint64_t: simde_svsub_s64_m, \ + simde_svuint8_t: simde_svsub_u8_m, \ + simde_svuint16_t: simde_svsub_u16_m, \ + simde_svuint32_t: simde_svsub_u32_m, \ + simde_svuint64_t: simde_svsub_u64_m, \ + simde_svfloat32_t: simde_svsub_f32_m, \ + simde_svfloat64_t: simde_svsub_f64_m, \ + int8_t: simde_svsub_n_s8_m, \ + int16_t: simde_svsub_n_s16_m, \ + int32_t: simde_svsub_n_s32_m, \ + int64_t: simde_svsub_n_s64_m, \ + uint8_t: simde_svsub_n_u8_m, \ + uint16_t: simde_svsub_n_u16_m, \ + uint32_t: simde_svsub_n_u32_m, \ + uint64_t: simde_svsub_n_u64_m, \ + simde_float32: simde_svsub_n_f32_m, \ + simde_float64: simde_svsub_n_f64_m)((pg), (op1), (op2))) +#endif +#if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + #undef svsub_x + #undef svsub_z + #undef svsub_m + #undef svsub_n_x + #undef svsub_n_z + #undef svsub_n_m + #define svsub_x(pg, op1, op2) simde_svsub_x((pg), (op1), (op2)) + #define svsub_z(pg, op1, op2) simde_svsub_z((pg), (op1), (op2)) + #define svsub_m(pg, op1, op2) simde_svsub_m((pg), (op1), (op2)) + #define svsub_n_x(pg, op1, op2) simde_svsub_n_x((pg), (op1), (op2)) + #define svsub_n_z(pg, op1, op2) simde_svsub_n_z((pg), (op1), (op2)) + #define svsub_n_m(pg, op1, op2) simde_svsub_n_m((pg), (op1), (op2)) +#endif + +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_SVE_SUB_H */ +/* :: End simde/arm/sve/sub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#endif /* SIMDE_ARM_SVE_H */ +/* :: End simde/arm/sve.h :: */ diff --git a/include/simde/mips/msa.h b/include/simde/mips/msa.h new file mode 100644 index 00000000..b5b23e44 --- /dev/null +++ b/include/simde/mips/msa.h @@ -0,0 +1,10738 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_H) +#define SIMDE_MIPS_MSA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_TYPES_H) +#define SIMDE_MIPS_MSA_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_VECTOR_SUBSCRIPT) + #define SIMDE_MIPS_MSA_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) +#else + #define SIMDE_MIPS_MSA_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] +#endif + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(int8_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v16i8_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(int16_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v8i16 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v8i16_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(int32_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v4i32 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v4i32_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(int64_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v2i64 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v2i64_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(uint8_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v16u8 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v16u8_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(uint16_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v8u16 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v8u16_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(uint32_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v4u32 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v4u32_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(uint64_t, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v2u64 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x2_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v2u64_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(simde_float32, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v4f32 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128 m128; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v4f32_private; + +typedef union { + SIMDE_MIPS_MSA_DECLARE_VECTOR(simde_float64, values, 16); + + #if defined(SIMDE_MIPS_MSA_NATIVE) + v2f64 msa; + #endif + + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128d m128d; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float64x2_t neon; + #endif + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif +} simde_v2f64_private; + +#if defined(SIMDE_MIPS_MSA_NATIVE) + typedef v16i8 simde_v16i8; + typedef v8i16 simde_v8i16; + typedef v4i32 simde_v4i32; + typedef v2i64 simde_v2i64; + typedef v16u8 simde_v16u8; + typedef v8u16 simde_v8u16; + typedef v4u32 simde_v4u32; + typedef v2u64 simde_v2u64; + typedef v4f32 simde_v4f32; + typedef v2f64 simde_v2f64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int8x16_t simde_v16i8; + typedef int16x8_t simde_v8i16; + typedef int32x4_t simde_v4i32; + typedef int64x2_t simde_v2i64; + typedef uint8x16_t simde_v16u8; + typedef uint16x8_t simde_v8u16; + typedef uint32x4_t simde_v4u32; + typedef uint64x2_t simde_v2u64; + typedef float32x4_t simde_v4f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde_v2f64; + #elif defined(SIMDE_VECTOR) + typedef double simde_v2f64 __attribute__((__vector_size__(16))); + #else + typedef simde_v2f64_private simde_v2f64; + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed char) simde_v16i8; + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed short) simde_v8i16; + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_v4i32; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) simde_v16u8; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) simde_v8u16; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) simde_v4u32; + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde_v4f32; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed long long) simde_v2i64; + typedef SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) simde_v2u64; + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde_v2f64; + #elif defined(SIMDE_VECTOR) + typedef int32_t simde_v2i64 __attribute__((__vector_size__(16))); + typedef int64_t simde_v2u64 __attribute__((__vector_size__(16))); + typedef double simde_v2f64 __attribute__((__vector_size__(16))); + #else + typedef simde_v2i64_private simde_v2i64; + typedef simde_v2u64_private simde_v2u64; + typedef simde_v2f64_private simde_v2f64; + #endif +#elif defined(SIMDE_VECTOR) + typedef int8_t simde_v16i8 __attribute__((__vector_size__(16))); + typedef int16_t simde_v8i16 __attribute__((__vector_size__(16))); + typedef int32_t simde_v4i32 __attribute__((__vector_size__(16))); + typedef int64_t simde_v2i64 __attribute__((__vector_size__(16))); + typedef uint8_t simde_v16u8 __attribute__((__vector_size__(16))); + typedef uint16_t simde_v8u16 __attribute__((__vector_size__(16))); + typedef uint32_t simde_v4u32 __attribute__((__vector_size__(16))); + typedef uint64_t simde_v2u64 __attribute__((__vector_size__(16))); + typedef simde_float32 simde_v4f32 __attribute__((__vector_size__(16))); + typedef simde_float64 simde_v2f64 __attribute__((__vector_size__(16))); +#else + /* At this point, MSA support is unlikely to work well. The MSA + * API appears to rely on the ability to cast MSA types, and there is + * no function to cast them (like vreinterpret_* on NEON), so you are + * supposed to use C casts. The API isn't really usable without them; + * For example, there is no function to load floating point or + * unsigned integer values. + * + * For APIs like SSE and WASM, we typedef multiple MSA types to the + * same underlying type. This means casting will work as expected, + * but you won't be able to overload functions based on the MSA type. + * + * Otherwise, all we can really do is typedef to the private types. + * In C++ we could overload casts, but in C our options are more + * limited and I think we would need to rely on conversion functions + * as an extension. */ + #if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde_v16i8; + typedef __m128i simde_v8i16; + typedef __m128i simde_v4i32; + typedef __m128i simde_v2i64; + typedef __m128i simde_v16u8; + typedef __m128i simde_v8u16; + typedef __m128i simde_v4u32; + typedef __m128i simde_v2u64; + typedef __m128 simde_v4f32; + typedef __m128d simde_v2f64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde_v16i8; + typedef v128_t simde_v8i16; + typedef v128_t simde_v4i32; + typedef v128_t simde_v2i64; + typedef v128_t simde_v16u8; + typedef v128_t simde_v8u16; + typedef v128_t simde_v4u32; + typedef v128_t simde_v2u64; + typedef v128_t simde_v4f32; + typedef v128_t simde_v2f64; + #else + typedef simde_v16i8_private simde_v16i8; + typedef simde_v8i16_private simde_v8i16; + typedef simde_v4i32_private simde_v4i32; + typedef simde_v2i64_private simde_v2i64; + typedef simde_v16i8_private simde_v16u8; + typedef simde_v8u16_private simde_v8u16; + typedef simde_v4u32_private simde_v4u32; + typedef simde_v2u64_private simde_v2u64; + typedef simde_v4f32_private simde_v4f32; + typedef simde_v2f64_private simde_v2f64; + #endif +#endif + +#define SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_to_private, simde_##T##_private, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_##T##_from_private, simde_##T, simde_##T##_private) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v16i8, simde_v16i8, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v8i16, simde_v8i16, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v4i32, simde_v4i32, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v2i64, simde_v2i64, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v16u8, simde_v16u8, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v8u16, simde_v8u16, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v4u32, simde_v4u32, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v2u64, simde_v2u64, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v4f32, simde_v4f32, simde_##T) \ + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_x_##T##_to_v2f64, simde_v2f64, simde_##T) + +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v16i8) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v8i16) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v4i32) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v2i64) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v16u8) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v8u16) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v4u32) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v2u64) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v4f32) +SIMDE_MIPS_MSA_TYPE_DEFINE_CONVERSIONS_(v2f64) + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_MIPS_MSA_TYPES_H */ +/* :: End simde/mips/msa/types.h :: */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/add_a.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ADD_A_H) +#define SIMDE_MIPS_MSA_ADD_A_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_add_a_b(simde_v16i8 a, simde_v16i8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_add_a_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s8(vabsq_s8(a), vabsq_s8(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(vec_abs(a), vec_abs(b)); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + b_ = simde_v16i8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_add_epi8(_mm_abs_epi8(a_.m128i), _mm_abs_epi8(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add(wasm_i8x16_abs(a_.v128), wasm_i8x16_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + const __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + r_.values = + ((-a_.values & amask) | (a_.values & ~amask)) + + ((-b_.values & bmask) | (b_.values & ~bmask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]) + + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]); + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_add_a_b + #define __msa_add_a_b(a, b) simde_msa_add_a_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_add_a_h(simde_v8i16 a, simde_v8i16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_add_a_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s16(vabsq_s16(a), vabsq_s16(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(vec_abs(a), vec_abs(b)); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + b_ = simde_v8i16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_add_epi16(_mm_abs_epi16(a_.m128i), _mm_abs_epi16(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add(wasm_i16x8_abs(a_.v128), wasm_i16x8_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + const __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + r_.values = + ((-a_.values & amask) | (a_.values & ~amask)) + + ((-b_.values & bmask) | (b_.values & ~bmask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]) + + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]); + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_add_a_h + #define __msa_add_a_h(a, b) simde_msa_add_a_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_add_a_w(simde_v4i32 a, simde_v4i32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_add_a_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s32(vabsq_s32(a), vabsq_s32(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(vec_abs(a), vec_abs(b)); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + b_ = simde_v4i32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_add_epi32(_mm_abs_epi32(a_.m128i), _mm_abs_epi32(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_add(wasm_i32x4_abs(a_.v128), wasm_i32x4_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + const __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + r_.values = + ((-a_.values & amask) | (a_.values & ~amask)) + + ((-b_.values & bmask) | (b_.values & ~bmask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]) + + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]); + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_add_a_w + #define __msa_add_a_w(a, b) simde_msa_add_a_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_add_a_d(simde_v2i64 a, simde_v2i64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_add_a_d(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddq_s64(vabsq_s64(a), vabsq_s64(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_add(vec_abs(a), vec_abs(b)); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + b_ = simde_v2i64_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_add_epi64(_mm_abs_epi64(a_.m128i), _mm_abs_epi64(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_add(wasm_i64x2_abs(a_.v128), wasm_i64x2_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + const __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + r_.values = + ((-a_.values & amask) | (a_.values & ~amask)) + + ((-b_.values & bmask) | (b_.values & ~bmask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]) + + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]); + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_add_a_d + #define __msa_add_a_d(a, b) simde_msa_add_a_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ADD_A_H) */ +/* :: End simde/mips/msa/add_a.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/adds.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ADDS_H) +#define SIMDE_MIPS_MSA_ADDS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_adds_s_b(simde_v16i8 a, simde_v16i8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_s_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + b_ = simde_v16i8_to_private(b), + r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SCALAR) + uint8_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint8_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint8_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 7) + INT8_MAX; + + uint8_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_i8(a_.values[i], b_.values[i]); + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_s_b + #define __msa_adds_s_b(a, b) simde_msa_adds_s_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_adds_s_h(simde_v8i16 a, simde_v8i16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_s_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + b_ = simde_v8i16_to_private(b), + r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SCALAR) + uint16_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint16_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint16_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 15) + INT16_MAX; + + uint16_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_i16(a_.values[i], b_.values[i]); + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_s_h + #define __msa_adds_s_h(a, b) simde_msa_adds_s_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_adds_s_w(simde_v4i32 a, simde_v4i32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_s_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + b_ = simde_v4i32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/56544654/501126 */ + const __m128i int_max = _mm_set1_epi32(INT32_MAX); + + /* normal result (possibly wraps around) */ + const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); + + /* If result saturates, it has the same sign as both a and b */ + const __m128i sign_bit = _mm_srli_epi32(a_.m128i, 31); /* shift sign to lowest bit */ + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i overflow = _mm_ternarylogic_epi32(a_.m128i, b_.m128i, sum, 0x42); + #else + const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); + const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); + #endif + + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r_.m128i = _mm_mask_add_epi32(sum, _mm_movepi32_mask(overflow), int_max, sign_bit); + #else + const __m128i saturated = _mm_add_epi32(int_max, sign_bit); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(sum), + _mm_castsi128_ps(saturated), + _mm_castsi128_ps(overflow) + ) + ); + #else + const __m128i overflow_mask = _mm_srai_epi32(overflow, 31); + r_.m128i = + _mm_or_si128( + _mm_and_si128(overflow_mask, saturated), + _mm_andnot_si128(overflow_mask, sum) + ); + #endif + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint32_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_i32(a_.values[i], b_.values[i]); + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_s_w + #define __msa_adds_s_w(a, b) simde_msa_adds_s_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_adds_s_d(simde_v2i64 a, simde_v2i64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_s_d(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s64(a, b); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + b_ = simde_v2i64_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + /* https://stackoverflow.com/a/56544654/501126 */ + const __m128i int_max = _mm_set1_epi64x(INT64_MAX); + + /* normal result (possibly wraps around) */ + const __m128i sum = _mm_add_epi64(a_.m128i, b_.m128i); + + /* If result saturates, it has the same sign as both a and b */ + const __m128i sign_bit = _mm_srli_epi64(a_.m128i, 63); /* shift sign to lowest bit */ + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i overflow = _mm_ternarylogic_epi64(a_.m128i, b_.m128i, sum, 0x42); + #else + const __m128i sign_xor = _mm_xor_si128(a_.m128i, b_.m128i); + const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.m128i, sum)); + #endif + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.m128i = _mm_mask_add_epi64(sum, _mm_movepi64_mask(overflow), int_max, sign_bit); + #else + const __m128i saturated = _mm_add_epi64(int_max, sign_bit); + + r_.m128i = + _mm_castpd_si128( + _mm_blendv_pd( + _mm_castsi128_pd(sum), + _mm_castsi128_pd(saturated), + _mm_castsi128_pd(overflow) + ) + ); + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + uint64_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.values); + uint64_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.values); + uint64_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 63) + INT64_MAX; + + uint64_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.values = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_i64(a_.values[i], b_.values[i]); + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_s_d + #define __msa_adds_s_d(a, b) simde_msa_adds_s_d((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16u8 +simde_msa_adds_u_b(simde_v16u8 a, simde_v16u8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_u_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v16u8_private + a_ = simde_v16u8_to_private(a), + b_ = simde_v16u8_to_private(b), + r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u8x16_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epu8(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_u8(a_.values[i], b_.values[i]); + } + #endif + + return simde_v16u8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_u_b + #define __msa_adds_u_b(a, b) simde_msa_adds_u_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8u16 +simde_msa_adds_u_h(simde_v8u16 a, simde_v8u16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_u_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v8u16_private + a_ = simde_v8u16_to_private(a), + b_ = simde_v8u16_to_private(b), + r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_u16x8_add_sat(a_.v128, b_.v128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_adds_epu16(a_.m128i, b_.m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_u16(a_.values[i], b_.values[i]); + } + #endif + + return simde_v8u16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_u_h + #define __msa_adds_u_h(a, b) simde_msa_adds_u_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4u32 +simde_msa_adds_u_w(simde_v4u32 a, simde_v4u32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_u_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + return vec_adds(a, b); + #else + simde_v4u32_private + a_ = simde_v4u32_to_private(a), + b_ = simde_v4u32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__AVX512VL__) + __m128i notb = _mm_ternarylogic_epi32(b, b, b, 0x0f); + #else + __m128i notb = _mm_xor_si128(b_.m128i, _mm_set1_epi32(~INT32_C(0))); + #endif + r_.m128i = + _mm_add_epi32( + b_.m128i, + _mm_min_epu32( + a_.m128i, + notb + ) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i sum = _mm_add_epi32(a_.m128i, b_.m128i); + const __m128i i32min = _mm_set1_epi32(INT32_MIN); + a_.m128i = _mm_xor_si128(a_.m128i, i32min); + r_.m128i = _mm_or_si128(_mm_cmpgt_epi32(a_.m128i, _mm_xor_si128(i32min, sum)), sum); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_u32(a_.values[i], b_.values[i]); + } + #endif + + return simde_v4u32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_u_w + #define __msa_adds_u_w(a, b) simde_msa_adds_u_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2u64 +simde_msa_adds_u_d(simde_v2u64 a, simde_v2u64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_u_d(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_u64(a, b); + #else + simde_v2u64_private + a_ = simde_v2u64_to_private(a), + b_ = simde_v2u64_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + b_.values; + r_.values |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), r_.values < a_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_adds_u64(a_.values[i], b_.values[i]); + } + #endif + + return simde_v2u64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_u_d + #define __msa_adds_u_d(a, b) simde_msa_adds_u_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ADDS_H) */ +/* :: End simde/mips/msa/adds.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/adds_a.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ADDS_A_H) +#define SIMDE_MIPS_MSA_ADDS_A_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_adds_a_b(simde_v16i8 a, simde_v16i8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_a_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s8(vabsq_s8(a), vabsq_s8(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_adds(vec_abs(a), vec_abs(b)); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + b_ = simde_v16i8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_adds_epi8(_mm_abs_epi8(a_.m128i), _mm_abs_epi8(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add_sat(wasm_i8x16_abs(a_.v128), wasm_i8x16_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SCALAR) + __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + __typeof__(a_.values) aabs = (-a_.values & amask) | (a_.values & ~amask); + __typeof__(b_.values) babs = (-b_.values & bmask) | (b_.values & ~bmask); + __typeof__(r_.values) sum = aabs + babs; + __typeof__(r_.values) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + __typeof__(r_.values) smask = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), aabs > (max - babs)); + r_.values = (max & smask) | (sum & ~smask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + simde_math_adds_i8( + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]), + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]) + ); + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_a_b + #define __msa_adds_a_b(a, b) simde_msa_adds_a_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_adds_a_h(simde_v8i16 a, simde_v8i16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_a_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s16(vabsq_s16(a), vabsq_s16(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_adds(vec_abs(a), vec_abs(b)); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + b_ = simde_v8i16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_adds_epi16(_mm_abs_epi16(a_.m128i), _mm_abs_epi16(b_.m128i)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add_sat(wasm_i16x8_abs(a_.v128), wasm_i16x8_abs(b_.v128)); + #elif defined(SIMDE_VECTOR_SCALAR) + __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + __typeof__(a_.values) aabs = (-a_.values & amask) | (a_.values & ~amask); + __typeof__(b_.values) babs = (-b_.values & bmask) | (b_.values & ~bmask); + __typeof__(r_.values) sum = aabs + babs; + __typeof__(r_.values) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + __typeof__(r_.values) smask = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), aabs > (max - babs)); + r_.values = (max & smask) | (sum & ~smask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + simde_math_adds_i16( + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]), + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]) + ); + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_a_h + #define __msa_adds_a_h(a, b) simde_msa_adds_a_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_adds_a_w(simde_v4i32 a, simde_v4i32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_a_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqaddq_s32(vabsq_s32(a), vabsq_s32(b)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_adds(vec_abs(a), vec_abs(b)); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + b_ = simde_v4i32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + __m128i aabs = _mm_abs_epi32(a_.m128i); + __m128i babs = _mm_abs_epi32(b_.m128i); + __m128i sum = _mm_add_epi32(aabs, babs); + __m128i max = _mm_set1_epi32(INT32_MAX); + __m128i smask = + _mm_cmplt_epi32( + _mm_sub_epi32(max, babs), + aabs + ); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.m128i = _mm_blendv_epi8(sum, max, smask); + #else + r_.m128i = + _mm_or_si128( + _mm_and_si128(smask, max), + _mm_andnot_si128(smask, sum) + ); + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + __typeof__(a_.values) aabs = (-a_.values & amask) | (a_.values & ~amask); + __typeof__(b_.values) babs = (-b_.values & bmask) | (b_.values & ~bmask); + __typeof__(r_.values) sum = aabs + babs; + __typeof__(r_.values) max = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; + __typeof__(r_.values) smask = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), aabs > (max - babs)); + r_.values = (max & smask) | (sum & ~smask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + simde_math_adds_i32( + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]), + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]) + ); + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_a_w + #define __msa_adds_a_w(a, b) simde_msa_adds_a_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_adds_a_d(simde_v2i64 a, simde_v2i64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_adds_a_d(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddq_s64(vabsq_s64(a), vabsq_s64(b)); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + b_ = simde_v2i64_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SCALAR) + __typeof__(a_.values) amask = HEDLEY_REINTERPRET_CAST(__typeof__(a_.values), a_.values < 0); + __typeof__(b_.values) bmask = HEDLEY_REINTERPRET_CAST(__typeof__(b_.values), b_.values < 0); + __typeof__(a_.values) aabs = (-a_.values & amask) | (a_.values & ~amask); + __typeof__(b_.values) babs = (-b_.values & bmask) | (b_.values & ~bmask); + __typeof__(r_.values) sum = aabs + babs; + __typeof__(r_.values) max = { INT64_MAX, INT64_MAX }; + __typeof__(r_.values) smask = HEDLEY_REINTERPRET_CAST(__typeof__(r_.values), aabs > (max - babs)); + r_.values = (max & smask) | (sum & ~smask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = + simde_math_adds_i64( + ((a_.values[i] < 0) ? -a_.values[i] : a_.values[i]), + ((b_.values[i] < 0) ? -b_.values[i] : b_.values[i]) + ); + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_adds_a_d + #define __msa_adds_a_d(a, b) simde_msa_adds_a_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ADDS_A_H) */ +/* :: End simde/mips/msa/adds_a.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/addv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ADDV_H) +#define SIMDE_MIPS_MSA_ADDV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_addv_b(simde_v16i8 a, simde_v16i8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_addv_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + b_ = simde_v16i8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addv_b + #define __msa_addv_b(a, b) simde_msa_addv_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_addv_h(simde_v8i16 a, simde_v8i16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_addv_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + b_ = simde_v8i16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addv_h + #define __msa_addv_h(a, b) simde_msa_addv_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_addv_w(simde_v4i32 a, simde_v4i32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_addv_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, b); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + b_ = simde_v4i32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addv_w + #define __msa_addv_w(a, b) simde_msa_addv_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_addv_d(simde_v2i64 a, simde_v2i64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_addv_d(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_add(a, b); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + b_ = simde_v2i64_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_add(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i]; + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addv_d + #define __msa_addv_d(a, b) simde_msa_addv_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ADDV_H) */ +/* :: End simde/mips/msa/addv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/addvi.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ADDVI_H) +#define SIMDE_MIPS_MSA_ADDVI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_addvi_b(simde_v16i8 a, const int imm0_31) + SIMDE_REQUIRE_CONSTANT_RANGE(imm0_31, 0, 31) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s8(a, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, imm0_31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, vec_splats(HEDLEY_STATIC_CAST(signed char, imm0_31))); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi8(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, imm0_31))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_add(a_.v128, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, imm0_31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values + HEDLEY_STATIC_CAST(int8_t, imm0_31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + HEDLEY_STATIC_CAST(int8_t, imm0_31); + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_addvi_b(a, imm0_31) __msa_addvi_b((a), (imm0_31)) +#endif +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addvi_b + #define __msa_addvi_b(a, imm0_31) simde_msa_addvi_b((a), (imm0_31)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_addvi_h(simde_v8i16 a, const int imm0_31) + SIMDE_REQUIRE_CONSTANT_RANGE(imm0_31, 0, 31) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s16(a, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, imm0_31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, vec_splats(HEDLEY_STATIC_CAST(signed short, imm0_31))); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi16(a_.m128i, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, imm0_31))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_add(a_.v128, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, imm0_31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values + HEDLEY_STATIC_CAST(int16_t, imm0_31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + HEDLEY_STATIC_CAST(int16_t, imm0_31); + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_addvi_h(a, imm0_31) __msa_addvi_h((a), (imm0_31)) +#endif +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addvi_h + #define __msa_addvi_h(a, imm0_31) simde_msa_addvi_h((a), (imm0_31)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_addvi_w(simde_v4i32 a, const int imm0_31) + SIMDE_REQUIRE_CONSTANT_RANGE(imm0_31, 0, 31) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s32(a, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, imm0_31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_add(a, vec_splats(HEDLEY_STATIC_CAST(signed int, imm0_31))); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi32(a_.m128i, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, imm0_31))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_add(a_.v128, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, imm0_31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values + HEDLEY_STATIC_CAST(int32_t, imm0_31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + HEDLEY_STATIC_CAST(int32_t, imm0_31); + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_addvi_w(a, imm0_31) __msa_addvi_w((a), (imm0_31)) +#endif +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addvi_w + #define __msa_addvi_w(a, imm0_31) simde_msa_addvi_w((a), (imm0_31)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_addvi_d(simde_v2i64 a, const int imm0_31) + SIMDE_REQUIRE_CONSTANT_RANGE(imm0_31, 0, 31) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vaddq_s64(a, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, imm0_31))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_add(a, vec_splats(HEDLEY_STATIC_CAST(signed long long, imm0_31))); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_add_epi64(a_.m128i, _mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, imm0_31))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_add(a_.v128, wasm_i64x2_splat(HEDLEY_STATIC_CAST(int64_t, imm0_31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values + HEDLEY_STATIC_CAST(int64_t, HEDLEY_STATIC_CAST(int64_t, imm0_31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + imm0_31; + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_addvi_d(a, imm0_31) __msa_addvi_d((a), (imm0_31)) +#endif +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_addvi_d + #define __msa_addvi_d(a, imm0_31) simde_msa_addvi_d((a), (imm0_31)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ADDVI_H) */ +/* :: End simde/mips/msa/addvi.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_AND_H) +#define SIMDE_MIPS_MSA_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16u8 +simde_msa_and_v(simde_v16u8 a, simde_v16u8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_and_v(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, b); + #else + simde_v16u8_private + a_ = simde_v16u8_to_private(a), + b_ = simde_v16u8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values & b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & b_.values[i]; + } + #endif + + return simde_v16u8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_and_v + #define __msa_and_v(a, b) simde_msa_and_v((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_AND_H) */ +/* :: End simde/mips/msa/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/andi.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ANDI_H) +#define SIMDE_MIPS_MSA_ANDI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16u8 +simde_msa_andi_b(simde_v16u8 a, const int imm0_255) + SIMDE_REQUIRE_CONSTANT_RANGE(imm0_255, 0, 255) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vandq_u8(a, vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, imm0_255))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_and(a, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm0_255))); + #else + simde_v16u8_private + a_ = simde_v16u8_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_and_si128(a_.m128i, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, imm0_255))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_v128_and(a_.v128, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, imm0_255))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.values = a_.values & HEDLEY_STATIC_CAST(uint8_t, imm0_255); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] & HEDLEY_STATIC_CAST(int8_t, imm0_255); + } + #endif + + return simde_v16u8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_NATIVE) + #define simde_msa_andi_b(a, imm0_255) __msa_andi_b((a), (imm0_255)) +#endif +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_andi_b + #define __msa_andi_b(a, imm0_255) simde_msa_andi_b((a), (imm0_255)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ANDI_H) */ +/* :: End simde/mips/msa/andi.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/ld.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_LD_H) +#define SIMDE_MIPS_MSA_LD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_ld_b(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_ld_b(rs, s10); + #else + simde_v16i8 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_ld_b + #define __msa_ld_b(rs, s10) simde_msa_ld_b((rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_ld_h(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_ld_h(rs, s10); + #else + simde_v8i16 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_ld_h + #define __msa_ld_h(rs, s10) simde_msa_ld_h((rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_ld_w(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_ld_w(rs, s10); + #else + simde_v4i32 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_ld_w + #define __msa_ld_w(rs, s10) simde_msa_ld_w((rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_ld_d(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_ld_d(rs, s10); + #else + simde_v2i64 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_ld_d + #define __msa_ld_d(rs, s10) simde_msa_ld_d((rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16u8 +simde_x_msa_ld_u_b(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v16u8, __msa_ld_b(rs, s10)); + #else + simde_v16u8 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8u16 +simde_x_msa_ld_u_h(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v8u16, __msa_ld_b(rs, s10)); + #else + simde_v8u16 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4u32 +simde_x_msa_ld_u_w(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v4u32, __msa_ld_b(rs, s10)); + #else + simde_v4u32 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2u64 +simde_x_msa_ld_u_d(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v2u64, __msa_ld_b(rs, s10)); + #else + simde_v2u64 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4f32 +simde_x_msa_fld_w(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v4f32, __msa_ld_b(rs, s10)); + #else + simde_v4f32 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2f64 +simde_x_msa_fld_d(const void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return HEDLEY_REINTERPRET_CAST(simde_v2f64, __msa_ld_b(rs, s10)); + #else + simde_v2f64 r; + + simde_memcpy(&r, &(HEDLEY_REINTERPRET_CAST(const int8_t*, rs)[s10]), sizeof(r)); + + return r; + #endif +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_LD_H) */ +/* :: End simde/mips/msa/ld.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/madd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TOa THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_MADD_H) +#define SIMDE_MIPS_MSA_MADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4f32 +simde_msa_fmadd_w(simde_v4f32 a, simde_v4f32 b, simde_v4f32 c) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_fmadd_w(a, b, c); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + return vfmaq_f32(a, c, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vmlaq_f32(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return vec_madd(c, b, a); + #else + simde_v4f32_private + a_ = simde_v4f32_to_private(a), + b_ = simde_v4f32_to_private(b), + c_ = simde_v4f32_to_private(c), + r_; + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128 = _mm_fmadd_ps(c_.m128, b_.m128, a_.m128); + #elif defined(SIMDE_X86_SSE_NATIVE) + r_.m128 = _mm_add_ps(a_.m128, _mm_mul_ps(b_.m128, c_.m128)); + #elif defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f32x4_fma(a_.v128, b_.v128, c_.v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f32x4_add(a_.v128, wasm_f32x4_mul(b_.v128, c_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.values = a_.values + (b_.values * c_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_fmaf(c_.values[i], b_.values[i], a_.values[i]); + } + #endif + + return simde_v4f32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_fmadd_w + #define __msa_fmadd_w(a, b) simde_msa_fmadd_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2f64 +simde_msa_fmadd_d(simde_v2f64 a, simde_v2f64 b, simde_v2f64 c) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_fmadd_d(a, b, c); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + return vec_madd(c, b, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vfmaq_f64(a, c, b); + #else + simde_v2f64_private + a_ = simde_v2f64_to_private(a), + b_ = simde_v2f64_to_private(b), + c_ = simde_v2f64_to_private(c), + r_; + + #if defined(SIMDE_X86_FMA_NATIVE) + r_.m128d = _mm_fmadd_pd(c_.m128d, b_.m128d, a_.m128d); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128d = _mm_add_pd(a_.m128d, _mm_mul_pd(b_.m128d, c_.m128d)); + #elif defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + r_.v128 = wasm_f64x2_fma(a_.v128, b_.v128, c_.v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_f64x2_add(a_.v128, wasm_f64x2_mul(b_.v128, c_.v128)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values + (b_.values * c_.values); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_math_fma(c_.values[i], b_.values[i], a_.values[i]); + } + #endif + + return simde_v2f64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_fmadd_d + #define __msa_fmadd_d(a, b) simde_msa_fmadd_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_MADD_H) */ +/* :: End simde/mips/msa/madd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/st.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_ST_H) +#define SIMDE_MIPS_MSA_ST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_msa_st_b(simde_v16i8 a, void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_st_b(a, rs, s10); + #else + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_st_b + #define __msa_st_b(a, rs, s10) simde_msa_st_b((a), (rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_msa_st_h(simde_v8i16 a, void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int16_t)) == 0, "`s10' must be a multiple of sizeof(int16_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_st_h(a, rs, s10); + #else + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_st_h + #define __msa_st_h(a, rs, s10) simde_msa_st_h((a), (rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_msa_st_w(simde_v4i32 a, void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int32_t)) == 0, "`s10' must be a multiple of sizeof(int32_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_st_w(a, rs, s10); + #else + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_st_w + #define __msa_st_w(a, rs, s10) simde_msa_st_w((a), (rs), (s10)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_msa_st_d(simde_v2i64 a, void * rs, const int s10) + SIMDE_REQUIRE_CONSTANT_RANGE(s10, 0, 1023) + HEDLEY_REQUIRE_MSG((s10 % sizeof(int64_t)) == 0, "`s10' must be a multiple of sizeof(int64_t)") { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_st_d(a, rs, s10); + #else + simde_memcpy(&(HEDLEY_REINTERPRET_CAST(int8_t*, rs)[s10]), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_st_d + #define __msa_st_d(a, rs, s10) simde_msa_st_d((a), (rs), (s10)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_ST_H) */ +/* :: End simde/mips/msa/st.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/mips/msa/subv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_MIPS_MSA_SUBV_H) +#define SIMDE_MIPS_MSA_SUBV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v16i8 +simde_msa_subv_b(simde_v16i8 a, simde_v16i8 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_subv_b(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s8(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_v16i8_private + a_ = simde_v16i8_to_private(a), + b_ = simde_v16i8_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi8(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i8x16_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_v16i8_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_subv_b + #define __msa_subv_b(a, b) simde_msa_subv_b((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v8i16 +simde_msa_subv_h(simde_v8i16 a, simde_v8i16 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_subv_h(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s16(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_v8i16_private + a_ = simde_v8i16_to_private(a), + b_ = simde_v8i16_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi16(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i16x8_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_v8i16_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_subv_h + #define __msa_subv_h(a, b) simde_msa_subv_h((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v4i32 +simde_msa_subv_w(simde_v4i32 a, simde_v4i32 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_subv_w(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_sub(a, b); + #else + simde_v4i32_private + a_ = simde_v4i32_to_private(a), + b_ = simde_v4i32_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi32(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i32x4_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_v4i32_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_subv_w + #define __msa_subv_w(a, b) simde_msa_subv_w((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v2i64 +simde_msa_subv_d(simde_v2i64 a, simde_v2i64 b) { + #if defined(SIMDE_MIPS_MSA_NATIVE) + return __msa_subv_d(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsubq_s64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + return vec_sub(a, b); + #else + simde_v2i64_private + a_ = simde_v2i64_to_private(a), + b_ = simde_v2i64_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i = _mm_sub_epi64(a_.m128i, b_.m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.v128 = wasm_i64x2_sub(a_.v128, b_.v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.values = a_.values - b_.values; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i]; + } + #endif + + return simde_v2i64_from_private(r_); + #endif +} +#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) + #undef __msa_subv_d + #define __msa_subv_d(a, b) simde_msa_subv_d((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MIPS_MSA_SUBV_H) */ +/* :: End simde/mips/msa/subv.h :: */ + +#endif /* SIMDE_MIPS_MSA_H */ +/* :: End simde/mips/msa.h :: */ diff --git a/include/simde/wasm/relaxed-simd.h b/include/simde/wasm/relaxed-simd.h new file mode 100644 index 00000000..dd6a286d --- /dev/null +++ b/include/simde/wasm/relaxed-simd.h @@ -0,0 +1,17553 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/wasm/relaxed-simd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_WASM_RELAXED_SIMD_H) +#define SIMDE_WASM_RELAXED_SIMD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/wasm/simd128.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_WASM_SIMD128_H) +#define SIMDE_WASM_SIMD128_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 sse_m128; + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i sse_m128i; + SIMDE_ALIGN_TO_16 __m128d sse_m128d; + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde_v128_private; + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde_v128_t; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x4_t simde_v128_t; +#elif defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde_v128_t; +#elif defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde_v128_t; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_v128_t; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde_v128_t SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde_v128_private simde_v128_t; +#endif + +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + typedef simde_v128_t v128_t; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_t), "simde_v128_t size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_private), "simde_v128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_t) == 16, "simde_v128_t is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_private) == 16, "simde_v128_private is not 16-byte aligned"); +#endif + +#define SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(Other_Type, SIMDe_Type, To_Name, From_Name) \ + SIMDE_FUNCTION_ATTRIBUTES \ + Other_Type To_Name(SIMDe_Type v) { \ + Other_Type r; \ + simde_memcpy(&r, &v, sizeof(r)); \ + return r; \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + SIMDe_Type From_Name(Other_Type v) { \ + SIMDe_Type r; \ + simde_memcpy(&r, &v, sizeof(r)); \ + return r; \ + } + +SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(simde_v128_private, simde_v128_t, simde_v128_to_private, simde_v128_from_private) + +#define SIMDE_WASM_SIMD128_FMIN(x, y) \ + (simde_math_isnan(x) ? SIMDE_MATH_NAN \ + : simde_math_isnan(y) ? SIMDE_MATH_NAN \ + : (((x) == 0) && ((y) == 0)) ? (simde_math_signbit(x) ? (x) : (y)) \ + : ((x) < (y) ? (x) : (y))) + +#define SIMDE_WASM_SIMD128_FMAX(x, y) \ + (simde_math_isnan(x) ? SIMDE_MATH_NAN \ + : simde_math_isnan(y) ? SIMDE_MATH_NAN \ + : (((x) == 0) && ((y) == 0)) ? (simde_math_signbit(x) ? (y) : (x)) \ + : ((x) > (y) ? (x) : (y))) + +#if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128 , simde_v128_t, simde_v128_to_m128 , simde_v128_from_m128 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128i, simde_v128_t, simde_v128_to_m128i, simde_v128_from_m128i) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128d, simde_v128_t, simde_v128_to_m128d, simde_v128_from_m128d) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int8x16_t, simde_v128_t, simde_v128_to_neon_i8 , simde_v128_from_neon_i8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int16x8_t, simde_v128_t, simde_v128_to_neon_i16, simde_v128_from_neon_i16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int32x4_t, simde_v128_t, simde_v128_to_neon_i32, simde_v128_from_neon_i32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int64x2_t, simde_v128_t, simde_v128_to_neon_i64, simde_v128_from_neon_i64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint8x16_t, simde_v128_t, simde_v128_to_neon_u8 , simde_v128_from_neon_u8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint16x8_t, simde_v128_t, simde_v128_to_neon_u16, simde_v128_from_neon_u16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint32x4_t, simde_v128_t, simde_v128_to_neon_u32, simde_v128_from_neon_u32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint64x2_t, simde_v128_t, simde_v128_to_neon_u64, simde_v128_from_neon_u64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float32x4_t, simde_v128_t, simde_v128_to_neon_f32, simde_v128_from_neon_f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float64x2_t, simde_v128_t, simde_v128_to_neon_f64, simde_v128_from_neon_f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed char), simde_v128_t, simde_v128_to_altivec_i8 , simde_v128_from_altivec_i8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed short), simde_v128_t, simde_v128_to_altivec_i16, simde_v128_from_altivec_i16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed int), simde_v128_t, simde_v128_to_altivec_i32, simde_v128_from_altivec_i32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), simde_v128_t, simde_v128_to_altivec_u8 , simde_v128_from_altivec_u8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), simde_v128_t, simde_v128_to_altivec_u16, simde_v128_from_altivec_u16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), simde_v128_t, simde_v128_to_altivec_u32, simde_v128_from_altivec_u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed long long), simde_v128_t, simde_v128_to_altivec_i64, simde_v128_from_altivec_i64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), simde_v128_t, simde_v128_to_altivec_u64, simde_v128_from_altivec_u64) + #endif + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde_v128_to_altivec_f32(simde_v128_t value) { + simde_v128_private r_ = simde_v128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_v128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde_v128_private r_; + r_.altivec_f32 = value; + return simde_v128_from_private(r_); + } + #else + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(float), simde_v128_t, simde_v128_to_altivec_f32, simde_v128_from_altivec_f32) + #endif +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +/* + * Begin function implementations + */ + +/* load */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load(mem); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_REINTERPRET_CAST(const __m128i*, mem)); + #else + simde_v128_t r; + simde_memcpy(&r, mem, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load(mem) simde_wasm_v128_load((mem)) +#endif + +/* store */ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store (void * mem, simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem, a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_REINTERPRET_CAST(__m128i*, mem), a); + #else + simde_memcpy(mem, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store(mem, a) simde_wasm_v128_store((mem), (a)) +#endif + +/* make */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_make ( + int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, + int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return + wasm_i8x16_make( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return + _mm_setr_epi8( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + #else + simde_v128_private r_; + + r_.i8[ 0] = c0; + r_.i8[ 1] = c1; + r_.i8[ 2] = c2; + r_.i8[ 3] = c3; + r_.i8[ 4] = c4; + r_.i8[ 5] = c5; + r_.i8[ 6] = c6; + r_.i8[ 7] = c7; + r_.i8[ 8] = c8; + r_.i8[ 9] = c9; + r_.i8[10] = c10; + r_.i8[11] = c11; + r_.i8[12] = c12; + r_.i8[13] = c13; + r_.i8[14] = c14; + r_.i8[15] = c15; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_make( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_make( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_make ( + int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(c0, c1, c2, c3, c4, c5, c6, c7); + #else + simde_v128_private r_; + + r_.i16[0] = c0; + r_.i16[1] = c1; + r_.i16[2] = c2; + r_.i16[3] = c3; + r_.i16[4] = c4; + r_.i16[5] = c5; + r_.i16[6] = c6; + r_.i16[7] = c7; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_make((c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_make (int32_t c0, int32_t c1, int32_t c2, int32_t c3) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_make(c0, c1, c2, c3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(c0, c1, c2, c3); + #else + simde_v128_private r_; + + r_.i32[0] = c0; + r_.i32[1] = c1; + r_.i32[2] = c2; + r_.i32[3] = c3; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_make(c0, c1, c2, c3) simde_wasm_i32x4_make((c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_make (int64_t c0, int64_t c1) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_make(c0, c1); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi64x(c1, c0); + #else + simde_v128_private r_; + + r_.i64[ 0] = c0; + r_.i64[ 1] = c1; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_make(c0, c1) simde_wasm_i64x2_make((c0), (c1)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_make (simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_make(c0, c1, c2, c3); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_setr_ps(c0, c1, c2, c3); + #else + r_.f32[0] = c0; + r_.f32[1] = c1; + r_.f32[2] = c2; + r_.f32[3] = c3; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_make(c0, c1, c2, c3) simde_wasm_f32x4_make((c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_make (simde_float64 c0, simde_float64 c1) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_make(c0, c1); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_set_pd(c1, c0); + #else + r_.f64[ 0] = c0; + r_.f64[ 1] = c1; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_make(c0, c1) simde_wasm_f64x2_make((c0), (c1)) +#endif + +/* const */ + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + wasm_i8x16_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + SIMDE_ASSERT_CONSTANT_(c4); \ + SIMDE_ASSERT_CONSTANT_(c5); \ + SIMDE_ASSERT_CONSTANT_(c6); \ + SIMDE_ASSERT_CONSTANT_(c7); \ + SIMDE_ASSERT_CONSTANT_(c8); \ + SIMDE_ASSERT_CONSTANT_(c9); \ + SIMDE_ASSERT_CONSTANT_(c10); \ + SIMDE_ASSERT_CONSTANT_(c11); \ + SIMDE_ASSERT_CONSTANT_(c12); \ + SIMDE_ASSERT_CONSTANT_(c13); \ + SIMDE_ASSERT_CONSTANT_(c13); \ + SIMDE_ASSERT_CONSTANT_(c15); \ + \ + simde_wasm_i8x16_make( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i8x16_const ( + int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, + int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) { + return simde_wasm_i8x16_make( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + wasm_i16x8_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + SIMDE_ASSERT_CONSTANT_(c4); \ + SIMDE_ASSERT_CONSTANT_(c5); \ + SIMDE_ASSERT_CONSTANT_(c6); \ + SIMDE_ASSERT_CONSTANT_(c7); \ + \ + simde_wasm_i16x8_make( \ + c0, c1, c2, c3, c4, c5, c6, c7); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i16x8_const ( + int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) { + return simde_wasm_i16x8_make( + c0, c1, c2, c3, c4, c5, c6, c7); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i32x4_const( \ + c0, c1, c2, c3) \ + wasm_i32x4_const( \ + (c0), (c1), (c2), (c3)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i32x4_const( \ + c0, c1, c2, c3) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + \ + simde_wasm_i32x4_make( \ + c0, c1, c2, c3); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i32x4_const ( + int32_t c0, int32_t c1, int32_t c2, int32_t c3) { + return simde_wasm_i32x4_make( + c0, c1, c2, c3); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i32x4_const( \ + c0, c1, c2, c3) \ + simde_wasm_i32x4_const( \ + (c0), (c1), (c2), (c3)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i64x2_const( \ + c0, c1) \ + wasm_i64x2_const( \ + (c0), (c1)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i64x2_const( \ + c0, c1) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + \ + simde_wasm_i64x2_make( \ + c0, c1); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i64x2_const ( + int64_t c0, int64_t c1) { + return simde_wasm_i64x2_make( + c0, c1); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i64x2_const( \ + c0, c1) \ + simde_wasm_i64x2_const( \ + (c0), (c1)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_f32x4_const( \ + c0, c1, c2, c3) \ + wasm_f32x4_const( \ + (c0), (c1), (c2), (c3)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_f32x4_const( \ + c0, c1, c2, c3) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + \ + simde_wasm_f32x4_make( \ + c0, c1, c2, c3); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_f32x4_const ( + simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) { + return simde_wasm_f32x4_make( + c0, c1, c2, c3); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_f32x4_const( \ + c0, c1, c2, c3) \ + simde_wasm_f32x4_const( \ + (c0), (c1), (c2), (c3)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_f64x2_const( \ + c0, c1) \ + wasm_f64x2_const( \ + (c0), (c1)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_f64x2_const( \ + c0, c1) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + \ + simde_wasm_f64x2_make( \ + c0, c1); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_f64x2_const ( + simde_float64 c0, simde_float64 c1) { + return simde_wasm_f64x2_make( + c0, c1); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_f64x2_const( \ + c0, c1) \ + simde_wasm_f64x2_const( \ + (c0), (c1)) +#endif + +/* splat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_splat (int8_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_splat(a) simde_wasm_i8x16_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_splat (int16_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_splat(a) simde_wasm_i16x8_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_splat (int32_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_splat(a) simde_wasm_i32x4_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_splat (int64_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + r_.sse_m128i = _mm_set1_epi64x(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_splat(a) simde_wasm_i64x2_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_splat (simde_float32 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_set1_ps(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_splat(a) simde_wasm_f32x4_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_splat (simde_float64 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_set1_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_splat(a) simde_wasm_f64x2_splat((a)) +#endif + +/* load_splat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load8_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load8_splat(mem); + #else + int8_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i8x16_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load8_splat(mem) simde_wasm_v128_load8_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load16_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load16_splat(mem); + #else + int16_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i16x8_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load16_splat(mem) simde_wasm_v128_load16_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load32_splat(mem); + #else + int32_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i32x4_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_splat(mem) simde_wasm_v128_load32_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load64_splat(mem); + #else + int64_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i64x2_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_splat(mem) simde_wasm_v128_load64_splat((mem)) +#endif + +/* extract_lane + * + * Note that, unlike normal WASM SIMD128 we return intN_t instead of + * int for sizeof(X) <= sizeof(int). This is done for portability; + * the regular API doesn't have to worry about things like int being + * 16 bits (like on AVR). + * + * This does mean that code which works in SIMDe may not work without + * changes on WASM, but luckily the necessary changes (i.e., casting + * the return values to smaller type when assigning to the smaller + * type) mean the code will work in *both* SIMDe and a native + * implementation. If you use the simde_* prefixed functions it will + * always work. */ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_wasm_i8x16_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i8[lane & 15]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, wasm_i8x16_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(simde_v128_to_m128i(a), (lane) & 15)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) vgetq_lane_s8(simde_v128_to_neon_i8(a), (lane) & 15) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_extract_lane(a, lane) simde_wasm_i8x16_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_wasm_i16x8_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i16[lane & 7]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE2_NATIVE) + #define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, _mm_extract_epi16((a), (lane) & 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i16x8_extract_lane(a, lane) vgetq_lane_s16(simde_v128_to_neon_i16(a), (lane) & 7) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extract_lane(a, lane) simde_wasm_i16x8_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_wasm_i32x4_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i32[lane & 3]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, _mm_extract_epi32((a), (lane) & 3)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i32x4_extract_lane(a, lane) vgetq_lane_s32(simde_v128_to_neon_i32(a), (lane) & 3) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extract_lane(a, lane) simde_wasm_i32x4_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_wasm_i64x2_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i64[lane & 1]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) + #define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, _mm_extract_epi64((a), (lane) & 1)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i64x2_extract_lane(a, lane) vgetq_lane_s64(simde_v128_to_neon_i64(a), (lane) & 1) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extract_lane(a, lane) simde_wasm_i64x2_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_wasm_u8x16_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.u8[lane & 15]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_u8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint8_t, wasm_u8x16_extract_lane((a), (lane))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_u8x16_extract_lane(a, lane) vgetq_lane_u8(simde_v128_to_neon_u8(a), (lane) & 15) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_extract_lane(a, lane) simde_wasm_u8x16_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_wasm_u16x8_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.u16[lane & 7]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_u16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint16_t, wasm_u16x8_extract_lane((a), (lane))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_u16x8_extract_lane(a, lane) vgetq_lane_u16(simde_v128_to_neon_u16(a), (lane) & 7) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extract_lane(a, lane) simde_wasm_u16x8_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_wasm_f32x4_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.f32[lane & 3]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f32x4_extract_lane(a, lane) wasm_f32x4_extract_lane((a), (lane)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_f32x4(a, lane) _mm_extract_ps(simde_v128_to_m128(a), (lane) & 3) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f32x4_extract_lane(a, lane) vgetq_lane_f32(simde_v128_to_neon_f32(a), (lane) & 3) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_extract_lane(a, lane) simde_wasm_f32x4_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_wasm_f64x2_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.f64[lane & 1]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f64x2_extract_lane(a, lane) wasm_f64x2_extract_lane((a), (lane)) +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f64x2_extract_lane(a, lane) vgetq_lane_f64(simde_v128_to_neon_f64(a), (lane) & 1) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_extract_lane(a, lane) simde_wasm_f64x2_extract_lane((a), (lane)) +#endif + +/* replace_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_replace_lane (simde_v128_t a, const int lane, int8_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i8[lane & 15] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i8x16_replace_lane(a, lane, value) wasm_i8x16_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) + #define simde_wasm_i8x16_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi8((a), (value), (lane) & 15)) + #else + #define simde_wasm_i8x16_replace_lane(a, lane, value) _mm_insert_epi8((a), (value), (lane) & 15) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_i8x16_replace_lane(a, lane, value) simde_v128_from_neon_i8(vsetq_lane_s8((value), simde_v128_to_neon_i8(a), (lane) & 15)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_replace_lane(a, lane, value) simde_wasm_i8x16_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_replace_lane (simde_v128_t a, const int lane, int16_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i16[lane & 7] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i16x8_replace_lane(a, lane, value) wasm_i16x8_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE2_NATIVE) + #define simde_wasm_i16x8_replace_lane(a, lane, value) _mm_insert_epi16((a), (value), (lane) & 7) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i16x8_replace_lane(a, lane, value) simde_v128_from_neon_i16(vsetq_lane_s16((value), simde_v128_to_neon_i16(a), (lane) & 7)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_replace_lane(a, lane, value) simde_wasm_i16x8_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_replace_lane (simde_v128_t a, const int lane, int32_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i32[lane & 3] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i32x4_replace_lane(a, lane, value) wasm_i32x4_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) + #define simde_wasm_i32x4_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi32((a), (value), (lane) & 3)) + #else + #define simde_wasm_i32x4_replace_lane(a, lane, value) _mm_insert_epi32((a), (value), (lane) & 3) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i32x4_replace_lane(a, lane, value) simde_v128_from_neon_i32(vsetq_lane_s32((value), simde_v128_to_neon_i32(a), (lane) & 3)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_replace_lane(a, lane, value) simde_wasm_i32x4_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_replace_lane (simde_v128_t a, const int lane, int64_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i64[lane & 1] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i64x2_replace_lane(a, lane, value) wasm_i64x2_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) + #define simde_wasm_i64x2_replace_lane(a, lane, value) _mm_insert_epi64((a), (value), (lane) & 1) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i64x2_replace_lane(a, lane, value) simde_v128_from_neon_i64(vsetq_lane_s64((value), simde_v128_to_neon_i64(a), (lane) & 1)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_replace_lane(a, lane, value) simde_wasm_i64x2_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_replace_lane (simde_v128_t a, const int lane, simde_float32 value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.f32[lane & 3] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f32x4_replace_lane(a, lane, value) wasm_f32x4_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f32x4_replace_lane(a, lane, value) simde_v128_from_neon_f32(vsetq_lane_f32((value), simde_v128_to_neon_f32(a), (lane) & 3)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_replace_lane(a, lane, value) simde_wasm_f32x4_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_replace_lane (simde_v128_t a, const int lane, simde_float64 value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.f64[lane & 1] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f64x2_replace_lane(a, lane, value) wasm_f64x2_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f64x2_replace_lane(a, lane, value) simde_v128_from_neon_f64(vsetq_lane_f64((value), simde_v128_to_neon_f64(a), (lane) & 1)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_replace_lane(a, lane, value) simde_wasm_f64x2_replace_lane((a), (lane), (value)) +#endif + +/* eq */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_eq(a, b) simde_wasm_i8x16_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_eq(a, b) simde_wasm_i16x8_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_eq(a, b) simde_wasm_i32x4_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_eq(a, b) simde_wasm_i64x2_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpeq_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_eq(a, b) simde_wasm_f32x4_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpeq_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 == b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_eq(a, b) simde_wasm_f64x2_eq((a), (b)) +#endif + +/* ne */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_ne(a, b) simde_wasm_i8x16_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_ne(a, b) simde_wasm_i16x8_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_ne(a, b) simde_wasm_i32x4_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_ne(a, b) simde_wasm_i64x2_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpneq_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 != b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ne(a, b) simde_wasm_f32x4_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpneq_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 != b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ne(a, b) simde_wasm_f64x2_ne((a), (b)) +#endif + +/* lt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_lt(a, b) simde_wasm_i8x16_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_lt(a, b) simde_wasm_i16x8_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_lt(a, b) simde_wasm_i32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t tmp = vorrq_s32( + vandq_s32( + vreinterpretq_s32_u32(vceqq_s32(b_.neon_i32, a_.neon_i32)), + vsubq_s32(a_.neon_i32, b_.neon_i32) + ), + vreinterpretq_s32_u32(vcgtq_s32(b_.neon_i32, a_.neon_i32)) + ); + int32x4x2_t trn = vtrnq_s32(tmp, tmp); + r_.neon_i32 = trn.val[1]; + #elif defined(SIMDE_X86_SSE4_2_NATIVE) + r_.sse_m128i = _mm_cmpgt_epi64(b_.sse_m128i, a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746 */ + r_.sse_m128i = + _mm_shuffle_epi32( + _mm_or_si128( + _mm_and_si128( + _mm_cmpeq_epi32(b_.sse_m128i, a_.sse_m128i), + _mm_sub_epi64(a_.sse_m128i, b_.sse_m128i) + ), + _mm_cmpgt_epi32( + b_.sse_m128i, + a_.sse_m128i + ) + ), + _MM_SHUFFLE(3, 3, 1, 1) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) tmp = + vec_or( + vec_and( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(b_.altivec_i32, a_.altivec_i32)), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_sub( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.altivec_i32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b_.altivec_i32) + )) + ), + vec_cmpgt(b_.altivec_i32, a_.altivec_i32) + ); + r_.altivec_i32 = vec_mergeo(tmp, tmp); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_lt(a, b) simde_wasm_i64x2_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a_.altivec_u8, b_.altivec_u8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu8(b_.sse_m128i, a_.sse_m128i); + r_.sse_m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_lt(a, b) simde_wasm_u8x16_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a_.altivec_u16, b_.altivec_u16)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu16(b_.sse_m128i, a_.sse_m128i); + r_.sse_m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_lt(a, b) simde_wasm_u16x8_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_xor_si128( + _mm_cmpgt_epi32(b_.sse_m128i, a_.sse_m128i), + _mm_srai_epi32(_mm_xor_si128(b_.sse_m128i, a_.sse_m128i), 31) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a_.altivec_u32, b_.altivec_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_lt(a, b) simde_wasm_u32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmplt_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 < b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_lt(a, b) simde_wasm_f32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmplt_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 < b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_lt(a, b) simde_wasm_f64x2_lt((a), (b)) +#endif + +/* gt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_gt(a, b); + #else + return simde_wasm_i8x16_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_gt(a, b) simde_wasm_i8x16_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_gt(a, b); + #else + return simde_wasm_i16x8_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_gt(a, b) simde_wasm_i16x8_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_gt(a, b); + #else + return simde_wasm_i32x4_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_gt(a, b) simde_wasm_i32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_gt(a, b); + #else + return simde_wasm_i64x2_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_gt(a, b) simde_wasm_i64x2_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_gt(a, b); + #else + return simde_wasm_u8x16_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_gt(a, b) simde_wasm_u8x16_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_gt(a, b); + #else + return simde_wasm_u16x8_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_gt(a, b) simde_wasm_u16x8_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_gt(a, b); + #else + return simde_wasm_u32x4_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_gt(a, b) simde_wasm_u32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_gt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpgt_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 > b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_gt(a, b) simde_wasm_f32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_gt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpgt_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.f64 > b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_gt(a, b) simde_wasm_f64x2_gt((a), (b)) +#endif + +/* le */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, _mm_min_epi8(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_le(a, b) simde_wasm_i8x16_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, _mm_min_epi16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_le(a, b) simde_wasm_i16x8_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, _mm_min_epi32(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_le(a, b) simde_wasm_i32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, _mm_min_epi64(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_le(a, b) simde_wasm_i64x2_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_le(a, b) simde_wasm_u8x16_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_le(a, b) simde_wasm_u16x8_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_le(a, b) simde_wasm_u32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmple_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 <= b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_le(a, b) simde_wasm_f32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmple_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 <= b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_le(a, b) simde_wasm_f64x2_le((a), (b)) +#endif + +/* ge */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epi8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_ge(a, b) simde_wasm_i8x16_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epi16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_ge(a, b) simde_wasm_i16x8_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epi32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_ge(a, b) simde_wasm_i32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(_mm_min_epi64(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_ge(a, b) simde_wasm_i64x2_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epu8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_ge(a, b) simde_wasm_u8x16_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epu16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_ge(a, b) simde_wasm_u16x8_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epu32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_ge(a, b) simde_wasm_u32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpge_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 >= b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ge(a, b) simde_wasm_f32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpge_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 >= b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ge(a, b) simde_wasm_f64x2_ge((a), (b)) +#endif + +/* not */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_not (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_not(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_not(a) simde_wasm_v128_not((a)) +#endif + +/* and */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_and (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_and_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_and(a, b) simde_wasm_v128_and((a), (b)) +#endif + +/* or */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_or (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_or_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_or(a, b) simde_wasm_v128_or((a), (b)) +#endif + +/* xor */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_xor (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_xor(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_xor(a, b) simde_wasm_v128_xor((a), (b)) +#endif + +/* andnot */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_andnot (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_andnot(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(b_.sse_m128i, a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f & ~b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & ~b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_andnot(a, b) simde_wasm_v128_andnot((a), (b)) +#endif + +/* bitselect */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_bitselect (simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_bitselect(a, b, mask); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_ternarylogic_epi32(mask_.sse_m128i, a_.sse_m128i, b_.sse_m128i, 0xca); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128 (mask_.sse_m128i, a_.sse_m128i), + _mm_andnot_si128(mask_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = (a_.i32f & mask_.i32f) | (b_.i32f & ~mask_.i32f); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = (a_.i32f[i] & mask_.i32f[i]) | (b_.i32f[i] & ~mask_.i32f[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_bitselect(a, b, c) simde_wasm_v128_bitselect((a), (b), (c)) +#endif + +/* bitmask */ + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i8x16_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(a_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i8[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_bitmask(a) simde_wasm_i8x16_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i16x8_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(_mm_packs_epi16(a_.sse_m128i, _mm_setzero_si128()))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint16_t md[8] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + uint16x8_t extended = vreinterpretq_u16_s16(vshrq_n_s16(a_.neon_i16, 15)); + uint16x8_t masked = vandq_u16(vld1q_u16(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(masked); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(masked)); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i16[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_bitmask(a) simde_wasm_i16x8_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i32x4_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_ps(a_.sse_m128)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_bitmask(a) simde_wasm_i32x4_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i64x2_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_pd(a_.sse_m128d)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i64[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_bitmask(a) simde_wasm_i64x2_bitmask((a)) +#endif + +/* abs */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi8(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_VECTOR_SCALAR) + __typeof__(r_.i8) mask = HEDLEY_REINTERPRET_CAST(__typeof__(mask), a_.i8 < 0); + r_.i8 = (-a_.i8 & mask) | (a_.i8 & ~mask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_abs(a) simde_wasm_i8x16_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi16(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT8_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_abs(a) simde_wasm_i16x8_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi32(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32) z = { 0, }; + __typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < z); + r_.i32 = (-a_.i32 & m) | (a_.i32 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_abs(a) simde_wasm_i32x4_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_abs_epi64(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vabsq_s64(a_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_abs(a_.altivec_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i64) z = { 0, }; + __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); + r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_abs(a) simde_wasm_i64x2_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_signbit(a_.f32[i]) ? -a_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_abs(a) simde_wasm_f32x4_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_signbit(a_.f64[i]) ? -a_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_abs(a) simde_wasm_f64x2_abs((a)) +#endif + +/* neg */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vnegq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_i8 = vec_neg(a_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = -a_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = -a_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_neg(a) simde_wasm_i8x16_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vnegq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i16 = vec_neg(a_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = -a_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = -a_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_neg(a) simde_wasm_i16x8_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vnegq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i32 = vec_neg(a_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = -a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = -a_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_neg(a) simde_wasm_i32x4_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vnegq_s64(a_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_neg(a_.altivec_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = -a_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = -a_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_neg(a) simde_wasm_i64x2_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_neg(a) simde_wasm_f32x4_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_neg(a) simde_wasm_f64x2_neg((a)) +#endif + +/* any_true */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_v128_any_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_any_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + simde_bool r = 0; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r = !_mm_test_all_zeros(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r = _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) != 0xffff; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = !!vmaxvq_u32(a_.neon_u32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t tmp = vpmax_u32(vget_low_u32(a_.u32), vget_high_u32(a_.u32)); + r = vget_lane_u32(tmp, 0); + r |= vget_lane_u32(tmp, 1); + r = !!r; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = HEDLEY_STATIC_CAST(simde_bool, vec_any_ne(a_.altivec_i32, vec_splats(0))); + #else + int_fast32_t ri = 0; + SIMDE_VECTORIZE_REDUCTION(|:ri) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + ri |= (a_.i32f[i]); + } + r = !!ri; + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_any_true(a) simde_wasm_v128_any_true((a)) +#endif + +/* all_true */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i8x16_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi8(a_.sse_m128i, _mm_set1_epi8(INT8_C(0))), _mm_set1_epi8(~INT8_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u8(vceqzq_u8(a_.neon_u8)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t zeroes = vdupq_n_u8(0); + uint8x16_t false_set = vceqq_u8(a_.neon_u8, vdupq_n_u8(0)); + uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u8(false_set), vreinterpretq_u32_u8(zeroes)); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(signed char, 0)))); + #else + int8_t r = !INT8_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r &= !!(a_.i8[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_all_true(a) simde_wasm_i8x16_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i16x8_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi16(~INT16_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_epi8(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128())) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u16(vceqzq_u16(a_.neon_u16)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t zeroes = vdupq_n_u16(0); + uint16x8_t false_set = vceqq_u16(a_.neon_u16, vdupq_n_u16(0)); + uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u16(false_set), vreinterpretq_u32_u16(zeroes)); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(signed short, 0)))); + #else + int16_t r = !INT16_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r &= !!(a_.i16[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_all_true(a) simde_wasm_i16x8_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i32x4_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()))) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u32(vceqzq_u32(a_.neon_u32)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t d_all_true = vmvnq_u32(vceqq_u32(a_.neon_u32, vdupq_n_u32(0))); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(signed int, 0)))); + #else + int32_t r = !INT32_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r &= !!(a_.i32[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_all_true(a) simde_wasm_i32x4_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i64x2_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) && defined(__wasm_unimplemented_simd128__) + return wasm_i64x2_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi64(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(_mm_cmpeq_pd(a_.sse_m128d, _mm_setzero_pd())) == 0; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i64, HEDLEY_REINTERPRET_CAST(__typeof__(a_.altivec_i64), vec_splats(0)))); + #else + int64_t r = !INT32_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r &= !!(a_.i64[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__wasm_unimplemented_simd128__)) + #define wasm_i64x2_all_true(a) simde_wasm_i64x2_all_true((a)) +#endif + +/* shl */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_sl(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i8 = a_.i8 << (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_shl(a, count) simde_wasm_i8x16_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sl(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i16 = a_.i16 << (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_shl(a, count) simde_wasm_i16x8_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i32 = a_.i32 << (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_shl(a, count) simde_wasm_i32x4_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_i64x2_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i64 = a_.i64 << (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_shl(a, count) simde_wasm_i64x2_shl((a), (count)) +#endif + +/* shr */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(-HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_sra(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i8 = a_.i8 >> (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_shr(a, count) simde_wasm_i8x16_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(-HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i16 = a_.i16 >> (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_shr(a, count) simde_wasm_i16x8_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_sra_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i32 = a_.i32 >> (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_shr(a, count) simde_wasm_i32x4_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_i64x2_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_sra_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(-HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i64 = a_.i64 >> (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_shr(a, count) simde_wasm_i64x2_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vshlq_u8(a_.neon_u8, vdupq_n_s8(-HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_sr(a_.altivec_u8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u8 = a_.u8 >> (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_shr(a, count) simde_wasm_u8x16_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(-HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u16 = a_.u16 >> (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_shr(a, count) simde_wasm_u16x8_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_srl_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(-HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u32 = a_.u32 >> (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_shr(a, count) simde_wasm_u32x4_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_u64x2_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_srl_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u64 = a_.u64 >> (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_shr(a, count) simde_wasm_u64x2_shr((a), (count)) +#endif + +/* add */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_add(a, b) simde_wasm_i8x16_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_add(a, b) simde_wasm_i16x8_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_add(a, b) simde_wasm_i32x4_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_add(a, b) simde_wasm_i64x2_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_add_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_add(a, b) simde_wasm_f32x4_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_add_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_add(a, b) simde_wasm_f64x2_add((a), (b)) +#endif + +/* sub */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_sub(a, b) simde_wasm_i8x16_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_sub(a, b) simde_wasm_i16x8_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_sub(a, b) simde_wasm_i32x4_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_sub(a, b) simde_wasm_i64x2_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_sub_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_sub(a, b) simde_wasm_f32x4_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_sub_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_sub(a, b) simde_wasm_f64x2_sub((a), (b)) +#endif + +/* mul */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_pack( + vec_mule(a_.altivec_i16, b_.altivec_i16), + vec_mulo(a_.altivec_i16, b_.altivec_i16) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 * b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] * b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_mul(a, b) simde_wasm_i16x8_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_mullo_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 * b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] * b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_mul(a, b) simde_wasm_i32x4_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.sse_m128i = _mm_mullo_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_mul(a, b) simde_wasm_i64x2_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_mul_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_mul(a, b) simde_wasm_f32x4_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_mul_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_mul(a, b) simde_wasm_f64x2_mul((a), (b)) +#endif + +/* q15mulr_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_q15mulr_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_q15mulr_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + /* https://github.com/WebAssembly/simd/pull/365 */ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqrdmulhq_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + tmp += UINT32_C(0x4000); + tmp >>= 15; + r_.i16[i] = (tmp < INT16_MIN) ? INT16_MIN : ((tmp > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, tmp)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_q15mulr_sat(a, b) simde_wasm_i16x8_q15mulr_sat((a), (b)) +#endif + +/* min */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_min(a, b) simde_wasm_i8x16_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_min_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_min(a, b) simde_wasm_i16x8_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_min(a, b) simde_wasm_i32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_min_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_min(a, b) simde_wasm_u8x16_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.sse_m128i = _mm_sub_epi16(a, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_min(a, b) simde_wasm_u16x8_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epu32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i difference = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i); + __m128i m = + _mm_cmpeq_epi32( + /* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */ + _mm_and_si128( + difference, + _mm_xor_si128( + _mm_cmpgt_epi32( + _mm_xor_si128(difference, i32_min), + _mm_xor_si128(a_.sse_m128i, i32_min) + ), + _mm_set1_epi32(~INT32_C(0)) + ) + ), + _mm_setzero_si128() + ); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_min(a, b) simde_wasm_u32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L202 + simde_v128_private scratch; + scratch.sse_m128 = a_.sse_m128; + scratch.sse_m128 = _mm_min_ps(scratch.sse_m128, b_.sse_m128); + r_.sse_m128 = b_.sse_m128; + r_.sse_m128 = _mm_min_ps(r_.sse_m128, a_.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128 = _mm_cmpunord_ps(r_.sse_m128, scratch.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128i = _mm_srli_epi32(r_.sse_m128i, 10); + r_.sse_m128 = _mm_andnot_ps(r_.sse_m128, scratch.sse_m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_WASM_SIMD128_FMIN(a_.f32[i], b_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_min(a, b) simde_wasm_f32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L263 + simde_v128_private scratch; + scratch.sse_m128d = a_.sse_m128d; + scratch.sse_m128d = _mm_min_pd(scratch.sse_m128d, b_.sse_m128d); + r_.sse_m128d = b_.sse_m128d; + r_.sse_m128d = _mm_min_pd(r_.sse_m128d, a_.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128d = _mm_cmpunord_pd(r_.sse_m128d, scratch.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128i = _mm_srli_epi64(r_.sse_m128i, 13); + r_.sse_m128d = _mm_andnot_pd(r_.sse_m128d, scratch.sse_m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = SIMDE_WASM_SIMD128_FMIN(a_.f64[i], b_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_min(a, b) simde_wasm_f64x2_min((a), (b)) +#endif + +/* max */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + r_.i8 = (m & a_.i8) | (~m & b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_max(a, b) simde_wasm_i8x16_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_max_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16); + r_.i16 = (m & a_.i16) | (~m & b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_max(a, b) simde_wasm_i16x8_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + r_.i32 = (m & a_.i32) | (~m & b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_max(a, b) simde_wasm_i32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_max_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8); + r_.u8 = (m & a_.u8) | (~m & b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_max(a, b) simde_wasm_u8x16_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.sse_m128i = _mm_add_epi16(b, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16); + r_.u16 = (m & a_.u16) | (~m & b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_max(a, b) simde_wasm_u16x8_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epu32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-886057227 */ + __m128i m = + _mm_xor_si128( + _mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i), + _mm_srai_epi32(_mm_xor_si128(a_.sse_m128i, b_.sse_m128i), 31) + ); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32); + r_.u32 = (m & a_.u32) | (~m & b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_max(a, b) simde_wasm_u32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L231 + simde_v128_private scratch; + scratch.sse_m128 = a_.sse_m128; + scratch.sse_m128 = _mm_max_ps(scratch.sse_m128, b_.sse_m128); + r_.sse_m128 = b_.sse_m128; + r_.sse_m128 = _mm_max_ps(r_.sse_m128, a_.sse_m128); + r_.sse_m128 = _mm_xor_ps(r_.sse_m128, scratch.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + scratch.sse_m128 = _mm_sub_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128 = _mm_cmpunord_ps(r_.sse_m128, scratch.sse_m128); + r_.sse_m128i = _mm_srli_epi32(r_.sse_m128i, 10); + r_.sse_m128 = _mm_andnot_ps(r_.sse_m128, scratch.sse_m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_WASM_SIMD128_FMAX(a_.f32[i], b_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_max(a, b) simde_wasm_f32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L301 + simde_v128_private scratch; + scratch.sse_m128d = a_.sse_m128d; + scratch.sse_m128d = _mm_max_pd(scratch.sse_m128d, b_.sse_m128d); + r_.sse_m128d = b_.sse_m128d; + r_.sse_m128d = _mm_max_pd(r_.sse_m128d, a_.sse_m128d); + r_.sse_m128d = _mm_xor_pd(r_.sse_m128d, scratch.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + scratch.sse_m128d = _mm_sub_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128d = _mm_cmpunord_pd(r_.sse_m128d, scratch.sse_m128d); + r_.sse_m128i = _mm_srli_epi64(r_.sse_m128i, 13); + r_.sse_m128d = _mm_andnot_pd(r_.sse_m128d, scratch.sse_m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = SIMDE_WASM_SIMD128_FMAX(a_.f64[i], b_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_max(a, b) simde_wasm_f64x2_max((a), (b)) +#endif + +/* add_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(a_.u8) r1, r2, m; + r1 = a_.u8 + b_.u8; + r2 = (a_.u8 >> 7) + INT8_MAX; + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r2 ^ b_.u8) | ~(b_.u8 ^ r1)) < 0); + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r1 & m) | (r2 & ~m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_add_sat(a, b) simde_wasm_i8x16_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(a_.u16) r1, r2, m; + r1 = a_.u16 + b_.u16; + r2 = (a_.u16 >> 15) + INT16_MAX; + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r2 ^ b_.u16) | ~(b_.u16 ^ r1)) < 0); + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r1 & m) | (r2 & ~m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_add_sat(a, b) simde_wasm_i16x8_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 + b_.u8; + r_.u8 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 < a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_add_sat(a, b) simde_wasm_u8x16_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 + b_.u16; + r_.u16 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 < a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_add_sat(a, b) simde_wasm_u16x8_add_sat((a), (b)) +#endif + +/* avgr */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_avgr (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_avgr(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_avg_epu8(a_.sse_m128i, b_.sse_m128i); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_avgr(a, b) simde_wasm_u8x16_avgr((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_avgr (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_avgr(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_avg_epu16(a_.sse_m128i, b_.sse_m128i); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_avgr(a, b) simde_wasm_u16x8_avgr((a), (b)) +#endif + +/* sub_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_subs(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.i8) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (b_.i8 > a_.i8) ^ INT8_MAX); + const __typeof__(r_.i8) diff = a_.i8 - b_.i8; + const __typeof__(r_.i8) saturate = diff_sat ^ diff; + const __typeof__(r_.i8) m = saturate >> 7; + r_.i8 = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_sub_sat(a, b) simde_wasm_i8x16_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_subs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.i16) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (b_.i16 > a_.i16) ^ INT16_MAX); + const __typeof__(r_.i16) diff = a_.i16 - b_.i16; + const __typeof__(r_.i16) saturate = diff_sat ^ diff; + const __typeof__(r_.i16) m = saturate >> 15; + r_.i16 = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_sub_sat(a, b) simde_wasm_i16x8_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u8 = a_.u8 - b_.u8; + r_.u8 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 <= a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_sub_sat(a, b) simde_wasm_u8x16_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 - b_.u16; + r_.u16 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 <= a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_sub_sat(a, b) simde_wasm_u16x8_sub_sat((a), (b)) +#endif + +/* pmin */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_pmin (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_pmin(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_min_ps(b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = + vbslq_f32( + vcltq_f32(b_.neon_f32, a_.neon_f32), + b_.neon_f32, + a_.neon_f32 + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = + vec_sel( + a_.altivec_f32, + b_.altivec_f32, + vec_cmpgt(a_.altivec_f32, b_.altivec_f32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (b_.f32[i] < a_.f32[i]) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_pmin(a, b) simde_wasm_f32x4_pmin((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_pmin (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_pmin(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_min_pd(b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = + vbslq_f64( + vcltq_f64(b_.neon_f64, a_.neon_f64), + b_.neon_f64, + a_.neon_f64 + ); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = + vec_sel( + a_.altivec_f32, + b_.altivec_f32, + vec_cmpgt(a_.altivec_f32, b_.altivec_f32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (b_.f64[i] < a_.f64[i]) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_pmin(a, b) simde_wasm_f64x2_pmin((a), (b)) +#endif + +/* pmax */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_pmax (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_pmax(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_max_ps(b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcltq_f32(a_.neon_f32, b_.neon_f32), b_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + int32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( + ( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32)) | + (~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32)) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_pmax(a, b) simde_wasm_f32x4_pmax((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_pmax (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_pmax(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_max_pd(b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vbslq_f64(vcltq_f64(a_.neon_f64, b_.neon_f64), b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sel(a_.altivec_f64, b_.altivec_f64, vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + int64_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64 < b_.f64); + r_.f64 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f64), + ( + ( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f64)) | + (~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64)) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_pmax(a, b) simde_wasm_f64x2_pmax((a), (b)) +#endif + +/* div */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_div (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_div(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_div_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_div(a, b) simde_wasm_f32x4_div((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_div (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_div(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_div_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_div(a, b) simde_wasm_f64x2_div((a), (b)) +#endif + +/* shuffle */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7, + const int c8, const int c9, const int c10, const int c11, const int c12, const int c13, const int c14, const int c15) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i8[ 0] = ( c0 < 16) ? a_.i8[ c0] : b_.i8[ c0 & 15]; + r_.i8[ 1] = ( c1 < 16) ? a_.i8[ c1] : b_.i8[ c1 & 15]; + r_.i8[ 2] = ( c2 < 16) ? a_.i8[ c2] : b_.i8[ c2 & 15]; + r_.i8[ 3] = ( c3 < 16) ? a_.i8[ c3] : b_.i8[ c3 & 15]; + r_.i8[ 4] = ( c4 < 16) ? a_.i8[ c4] : b_.i8[ c4 & 15]; + r_.i8[ 5] = ( c5 < 16) ? a_.i8[ c5] : b_.i8[ c5 & 15]; + r_.i8[ 6] = ( c6 < 16) ? a_.i8[ c6] : b_.i8[ c6 & 15]; + r_.i8[ 7] = ( c7 < 16) ? a_.i8[ c7] : b_.i8[ c7 & 15]; + r_.i8[ 8] = ( c8 < 16) ? a_.i8[ c8] : b_.i8[ c8 & 15]; + r_.i8[ 9] = ( c9 < 16) ? a_.i8[ c9] : b_.i8[ c9 & 15]; + r_.i8[10] = (c10 < 16) ? a_.i8[c10] : b_.i8[c10 & 15]; + r_.i8[11] = (c11 < 16) ? a_.i8[c11] : b_.i8[c11 & 15]; + r_.i8[12] = (c12 < 16) ? a_.i8[c12] : b_.i8[c12 & 15]; + r_.i8[13] = (c13 < 16) ? a_.i8[c13] : b_.i8[c13 & 15]; + r_.i8[14] = (c14 < 16) ? a_.i8[c14] : b_.i8[c14 & 15]; + r_.i8[15] = (c15 < 16) ? a_.i8[c15] : b_.i8[c15 & 15]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(8, 16, \ + HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_shuffle(a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_shuffle((a), (b), \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i16[ 0] = (c0 < 8) ? a_.i16[ c0] : b_.i16[ c0 & 7]; + r_.i16[ 1] = (c1 < 8) ? a_.i16[ c1] : b_.i16[ c1 & 7]; + r_.i16[ 2] = (c2 < 8) ? a_.i16[ c2] : b_.i16[ c2 & 7]; + r_.i16[ 3] = (c3 < 8) ? a_.i16[ c3] : b_.i16[ c3 & 7]; + r_.i16[ 4] = (c4 < 8) ? a_.i16[ c4] : b_.i16[ c4 & 7]; + r_.i16[ 5] = (c5 < 8) ? a_.i16[ c5] : b_.i16[ c5 & 7]; + r_.i16[ 6] = (c6 < 8) ? a_.i16[ c6] : b_.i16[ c6 & 7]; + r_.i16[ 7] = (c7 < 8) ? a_.i16[ c7] : b_.i16[ c7 & 7]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(16, 16, \ + HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3, c4, c5, c6, c7)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_shuffle(a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_shuffle((a), (b), \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i32[ 0] = (c0 < 4) ? a_.i32[ c0] : b_.i32[ c0 & 3]; + r_.i32[ 1] = (c1 < 4) ? a_.i32[ c1] : b_.i32[ c1 & 3]; + r_.i32[ 2] = (c2 < 4) ? a_.i32[ c2] : b_.i32[ c2 & 3]; + r_.i32[ 3] = (c3 < 4) ? a_.i32[ c3] : b_.i32[ c3 & 3]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) \ + wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(32, 16, \ + HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i32x4_shuffle(a, b, \ + c0, c1, c2, c3) \ + simde_wasm_i32x4_shuffle((a), (b), \ + (c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i64[ 0] = (c0 < 2) ? a_.i64[ c0] : b_.i64[ c0 & 1]; + r_.i64[ 1] = (c1 < 2) ? a_.i64[ c1] : b_.i64[ c1 & 1]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) \ + wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(64, 16, \ + HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), b), \ + c0, c1)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i64x2_shuffle(a, b, \ + c0, c1) \ + simde_wasm_i64x2_shuffle((a), (b), \ + (c0), (c1)) +#endif + +/* swizzle */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_swizzle (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_swizzle(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t tmp = { { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } }; + r_.neon_i8 = vcombine_s8( + vtbl2_s8(tmp, vget_low_s8(b_.neon_i8)), + vtbl2_s8(tmp, vget_high_s8(b_.neon_i8)) + ); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + /* https://github.com/WebAssembly/simd/issues/68#issuecomment-470825324 */ + r_.sse_m128i = + _mm_shuffle_epi8( + a_.sse_m128i, + _mm_adds_epu8( + _mm_set1_epi8(0x70), + b_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_perm( + a_.altivec_i8, + a_.altivec_i8, + b_.altivec_u8 + ); + r_.altivec_i8 = vec_and(r_.altivec_i8, vec_cmple(b_.altivec_u8, vec_splat_u8(15))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.u8[i] > 15) ? INT8_C(0) : a_.i8[b_.u8[i]]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_swizzle(a, b) simde_wasm_i8x16_swizzle((a), (b)) +#endif + +/* narrow */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_narrow_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_narrow_i16x8(a, b) simde_wasm_i8x16_narrow_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_narrow_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_narrow_i32x4(a, b) simde_wasm_i16x8_narrow_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_narrow_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packus_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_narrow_i16x8(a, b) simde_wasm_u8x16_narrow_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_narrow_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_packus_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a_.sse_m128i, 31), a_.sse_m128i); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b_.sse_m128i, 31), b_.sse_m128i); + r_.sse_m128i = + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_narrow_i32x4(a, b) simde_wasm_u16x8_narrow_i32x4((a), (b)) +#endif + +/* demote */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_demote_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_demote_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cvtpd_ps(a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_floate(a_.altivec_f64); + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_pack( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_f32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0)) + ) + ); + #else + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0x00, 0x01, 0x02, 0x03, /* 0 */ + 0x08, 0x09, 0x0a, 0x0b, /* 2 */ + 0x10, 0x11, 0x12, 0x13, /* 4 */ + 0x18, 0x19, 0x1a, 0x1b /* 6 */ + }; + r_.altivec_f32 = vec_perm(r_.altivec_f32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_splat_s32(0)), perm); + #endif + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = __builtin_shufflevector(__builtin_convertvector(a_.f64, __typeof__(z)), z, 0, 1, 2, 3); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_demote_f64x2_zero(a) simde_wasm_f32x4_demote_f64x2_zero((a)) +#endif + +/* extend_low */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extend_low_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extend_low_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmovl_s8(vget_low_s8(a_.neon_i8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi8_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_sra( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergeh(a_.altivec_i8, a_.altivec_i8)), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int8_t v SIMDE_VECTOR(8) = { + a_.i8[0], a_.i8[1], a_.i8[2], a_.i8[3], + a_.i8[4], a_.i8[5], a_.i8[6], a_.i8[7] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extend_low_i8x16(a) simde_wasm_i16x8_extend_low_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extend_low_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extend_low_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi16_epi32(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergeh(a_.altivec_i16, a_.altivec_i16)), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int16_t v SIMDE_VECTOR(8) = { a_.i16[0], a_.i16[1], a_.i16[2], a_.i16[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extend_low_i16x8(a) simde_wasm_i32x4_extend_low_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extend_low_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extend_low_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi32_epi64(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_unpacklo_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i64 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergeh(a_.altivec_i32, a_.altivec_i32)), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32)) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mergeh( + a_.altivec_i32, + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(int), + vec_cmpgt(vec_splat_s32(0), a_.altivec_i32) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int32_t v SIMDE_VECTOR(8) = { a_.i32[0], a_.i32[1] }; + + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extend_low_i32x4(a) simde_wasm_i64x2_extend_low_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extend_low_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extend_low_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmovl_u8(vget_low_u8(a_.neon_u8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu8_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_mergeh(a_.altivec_i8, vec_splat_s8(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint8_t v SIMDE_VECTOR(8) = { + a_.u8[0], a_.u8[1], a_.u8[2], a_.u8[3], + a_.u8[4], a_.u8[5], a_.u8[6], a_.u8[7] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extend_low_u8x16(a) simde_wasm_u16x8_extend_low_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extend_low_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extend_low_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu16_epi32(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_mergeh(a_.altivec_i16, vec_splat_s16(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint16_t v SIMDE_VECTOR(8) = { a_.u16[0], a_.u16[1], a_.u16[2], a_.u16[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extend_low_u16x8(a) simde_wasm_u32x4_extend_low_u16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extend_low_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extend_low_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu32_epi64(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i =_mm_unpacklo_epi32(a_.sse_m128i, _mm_setzero_si128()); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_mergeh(a_.altivec_i32, vec_splat_s32(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint32_t v SIMDE_VECTOR(8) = { a_.u32[0], a_.u32[1] }; + + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(int64_t, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extend_low_u32x4(a) simde_wasm_u64x2_extend_low_u32x4((a)) +#endif + +/* promote */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_promote_low_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_promote_low_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cvtps_pd(a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_unpackh(a_.altivec_f32); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.f32, a_.f32, 0, 1), __typeof__(r_.f64)); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[0]); + r_.f64[1] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[1]); + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_promote_low_f32x4(a) simde_wasm_f64x2_promote_low_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extend_high_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extend_high_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmovl_s8(vget_high_s8(a_.neon_i8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_sra( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergel(a_.altivec_i8, a_.altivec_i8)), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int8_t v SIMDE_VECTOR(8) = { + a_.i8[ 8], a_.i8[ 9], a_.i8[10], a_.i8[11], + a_.i8[12], a_.i8[13], a_.i8[14], a_.i8[15] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extend_high_i8x16(a) simde_wasm_i16x8_extend_high_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extend_high_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extend_high_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_high_s16(a_.neon_i16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergel(a_.altivec_i16, a_.altivec_i16)), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int16_t v SIMDE_VECTOR(8) = { a_.i16[4], a_.i16[5], a_.i16[6], a_.i16[7] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extend_high_i16x8(a) simde_wasm_i32x4_extend_high_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extend_high_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extend_high_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_high_s32(a_.neon_i32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_unpackhi_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i64 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergel(a_.altivec_i32, a_.altivec_i32)), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32)) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mergel( + a_.altivec_i32, + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(int), + vec_cmpgt(vec_splat_s32(0), a_.altivec_i32) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int32_t v SIMDE_VECTOR(8) = { a_.i32[2], a_.i32[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extend_high_i32x4(a) simde_wasm_i64x2_extend_high_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extend_high_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extend_high_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmovl_u8(vget_high_u8(a_.neon_u8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_mergel(a_.altivec_i8, vec_splat_s8(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint8_t v SIMDE_VECTOR(8) = { + a_.u8[ 8], a_.u8[ 9], a_.u8[10], a_.u8[11], + a_.u8[12], a_.u8[13], a_.u8[14], a_.u8[15] + }; + + SIMDE_CONVERT_VECTOR_(r_.u16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extend_high_u8x16(a) simde_wasm_u16x8_extend_high_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extend_high_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extend_high_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_high_u16(a_.neon_u16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_mergel(a_.altivec_i16, vec_splat_s16(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint16_t v SIMDE_VECTOR(8) = { a_.u16[4], a_.u16[5], a_.u16[6], a_.u16[7] }; + + SIMDE_CONVERT_VECTOR_(r_.u32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extend_high_u16x8(a) simde_wasm_u32x4_extend_high_u16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extend_high_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extend_high_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_high_u32(a_.neon_u32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i =_mm_unpackhi_epi32(a_.sse_m128i, _mm_setzero_si128()); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_mergel(a_.altivec_i32, vec_splat_s32(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint32_t v SIMDE_VECTOR(8) = { a_.u32[2], a_.u32[3] }; + + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extend_high_u32x4(a) simde_wasm_u64x2_extend_high_u32x4((a)) +#endif + +/* extmul_low */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extmul_low_i8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extmul_low_i8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmull_s8(vget_low_s8(a_.neon_i8), vget_low_s8(b_.neon_i8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i8, a_.altivec_i8); + bshuf = vec_mergeh(b_.altivec_i8, b_.altivec_i8); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + }; + ashuf = vec_perm(a_.altivec_i8, a_.altivec_i8, perm); + bshuf = vec_perm(b_.altivec_i8, b_.altivec_i8, perm); + #endif + + r_.altivec_i16 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mullo_epi16( + _mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8), + _mm_srai_epi16(_mm_unpacklo_epi8(b_.sse_m128i, b_.sse_m128i), 8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i16 = + __builtin_convertvector( + __builtin_shufflevector(a_.i8, a_.i8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.i16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i8, b_.i8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.i16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extmul_low_i8x16(a, b) simde_wasm_i16x8_extmul_low_i8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extmul_low_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extmul_low_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed short) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i16, a_.altivec_i16); + bshuf = vec_mergeh(b_.altivec_i16, b_.altivec_i16); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 4, 5, 4, 5, + 6, 7, 6, 7 + }; + ashuf = vec_perm(a_.altivec_i16, a_.altivec_i16, perm); + bshuf = vec_perm(b_.altivec_i16, b_.altivec_i16, perm); + #endif + + r_.altivec_i32 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpacklo_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i32 = + __builtin_convertvector( + __builtin_shufflevector(a_.i16, a_.i16, 0, 1, 2, 3), + __typeof__(r_.i32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i16, b_.i16, 0, 1, 2, 3), + __typeof__(r_.i32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extmul_low_i16x8(a, b) simde_wasm_i32x4_extmul_low_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extmul_low_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extmul_low_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmull_s32(vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i32, a_.altivec_i32); + bshuf = vec_mergeh(b_.altivec_i32, b_.altivec_i32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 0, 1, 2, 3, + 4, 5, 6, 7, 4, 5, 6, 7 + }; + ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm); + bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm); + #endif + + r_.altivec_i64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_mul_epi32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i64 = + __builtin_convertvector( + __builtin_shufflevector(a_.i32, a_.i32, 0, 1), + __typeof__(r_.i64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i32, b_.i32, 0, 1), + __typeof__(r_.i64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extmul_low_i32x4(a, b) simde_wasm_i64x2_extmul_low_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extmul_low_u8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extmul_low_u8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmull_u8(vget_low_u8(a_.neon_u8), vget_low_u8(b_.neon_u8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u8, a_.altivec_u8); + bshuf = vec_mergeh(b_.altivec_u8, b_.altivec_u8); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + }; + ashuf = vec_perm(a_.altivec_u8, a_.altivec_u8, perm); + bshuf = vec_perm(b_.altivec_u8, b_.altivec_u8, perm); + #endif + + r_.altivec_u16 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u16 = + __builtin_convertvector( + __builtin_shufflevector(a_.u8, a_.u8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.u16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u8, b_.u8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.u16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extmul_low_u8x16(a, b) simde_wasm_u16x8_extmul_low_u8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extmul_low_u16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extmul_low_u16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmull_u16(vget_low_u16(a_.neon_u16), vget_low_u16(b_.neon_u16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u16, a_.altivec_u16); + bshuf = vec_mergeh(b_.altivec_u16, b_.altivec_u16); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 4, 5, 4, 5, + 6, 7, 6, 7 + }; + ashuf = vec_perm(a_.altivec_u16, a_.altivec_u16, perm); + bshuf = vec_perm(b_.altivec_u16, b_.altivec_u16, perm); + #endif + + r_.altivec_u32 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpacklo_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u32 = + __builtin_convertvector( + __builtin_shufflevector(a_.u16, a_.u16, 0, 1, 2, 3), + __typeof__(r_.u32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u16, b_.u16, 0, 1, 2, 3), + __typeof__(r_.u32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extmul_low_u16x8(a, b) simde_wasm_u32x4_extmul_low_u16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extmul_low_u32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extmul_low_u32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmull_u32(vget_low_u32(a_.neon_u32), vget_low_u32(b_.neon_u32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u32, a_.altivec_u32); + bshuf = vec_mergeh(b_.altivec_u32, b_.altivec_u32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 0, 1, 2, 3, + 4, 5, 6, 7, 4, 5, 6, 7 + }; + ashuf = vec_perm(a_.altivec_u32, a_.altivec_u32, perm); + bshuf = vec_perm(b_.altivec_u32, b_.altivec_u32, perm); + #endif + + r_.altivec_u64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mul_epu32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u64 = + __builtin_convertvector( + __builtin_shufflevector(a_.u32, a_.u32, 0, 1), + __typeof__(r_.u64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u32, b_.u32, 0, 1), + __typeof__(r_.u64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extmul_low_u32x4(a, b) simde_wasm_u64x2_extmul_low_u32x4((a), (b)) +#endif + +/* extmul_high */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extmul_high_i8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extmul_high_i8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vmull_high_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmull_s8(vget_high_s8(a_.neon_i8), vget_high_s8(b_.neon_i8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_mule( + vec_mergel(a_.altivec_i8, a_.altivec_i8), + vec_mergel(b_.altivec_i8, b_.altivec_i8) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mullo_epi16( + _mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8), + _mm_srai_epi16(_mm_unpackhi_epi8(b_.sse_m128i, b_.sse_m128i), 8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i16 = + __builtin_convertvector( + __builtin_shufflevector(a_.i8, a_.i8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.i16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i8, b_.i8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.i16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extmul_high_i8x16(a, b) simde_wasm_i16x8_extmul_high_i8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extmul_high_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extmul_high_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mule( + vec_mergel(a_.altivec_i16, a_.altivec_i16), + vec_mergel(b_.altivec_i16, b_.altivec_i16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpackhi_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i32 = + __builtin_convertvector( + __builtin_shufflevector(a_.i16, a_.i16, 4, 5, 6, 7), + __typeof__(r_.i32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i16, b_.i16, 4, 5, 6, 7), + __typeof__(r_.i32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extmul_high_i16x8(a, b) simde_wasm_i32x4_extmul_high_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extmul_high_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extmul_high_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vmull_high_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmull_s32(vget_high_s32(a_.neon_i32), vget_high_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergel(a_.altivec_i32, a_.altivec_i32); + bshuf = vec_mergel(b_.altivec_i32, b_.altivec_i32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 8, 9, 10, 11, 8, 9, 10, 11, + 12, 13, 14, 15, 12, 13, 14, 15 + }; + ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm); + bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm); + #endif + + r_.altivec_i64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_mul_epi32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i64 = + __builtin_convertvector( + __builtin_shufflevector(a_.i32, a_.i32, 2, 3), + __typeof__(r_.i64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i32, b_.i32, 2, 3), + __typeof__(r_.i64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extmul_high_i32x4(a, b) simde_wasm_i64x2_extmul_high_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extmul_high_u8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extmul_high_u8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u16 = vmull_high_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmull_u8(vget_high_u8(a_.neon_u8), vget_high_u8(b_.neon_u8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = + vec_mule( + vec_mergel(a_.altivec_u8, a_.altivec_u8), + vec_mergel(b_.altivec_u8, b_.altivec_u8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u16 = + __builtin_convertvector( + __builtin_shufflevector(a_.u8, a_.u8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.u16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u8, b_.u8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.u16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extmul_high_u8x16(a, b) simde_wasm_u16x8_extmul_high_u8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extmul_high_u16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extmul_high_u16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmull_u16(vget_high_u16(a_.neon_u16), vget_high_u16(b_.neon_u16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32 = + vec_mule( + vec_mergel(a_.altivec_u16, a_.altivec_u16), + vec_mergel(b_.altivec_u16, b_.altivec_u16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpackhi_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u32 = + __builtin_convertvector( + __builtin_shufflevector(a_.u16, a_.u16, 4, 5, 6, 7), + __typeof__(r_.u32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u16, b_.u16, 4, 5, 6, 7), + __typeof__(r_.u32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extmul_high_u16x8(a, b) simde_wasm_u32x4_extmul_high_u16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extmul_high_u32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extmul_high_u32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vmull_high_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmull_u32(vget_high_u32(a_.neon_u32), vget_high_u32(b_.neon_u32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u64 = + vec_mule( + vec_mergel(a_.altivec_u32, a_.altivec_u32), + vec_mergel(b_.altivec_u32, b_.altivec_u32) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mul_epu32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u64 = + __builtin_convertvector( + __builtin_shufflevector(a_.u32, a_.u32, 2, 3), + __typeof__(r_.u64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u32, b_.u32, 2, 3), + __typeof__(r_.u64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i + 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extmul_high_u32x4(a, b) simde_wasm_u64x2_extmul_high_u32x4((a), (b)) +#endif + +/* extadd_pairwise */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extadd_pairwise_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extadd_pairwise_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vpaddlq_s8(a_.neon_i8); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddw_epi8(a_.sse_m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.sse_m128i = _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a_.sse_m128i); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); + r_.altivec_i16 = + vec_add( + vec_mule(a_.altivec_i8, one), + vec_mulo(a_.altivec_i8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = + ((a_.i16 << 8) >> 8) + + ((a_.i16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extadd_pairwise_i8x16(a) simde_wasm_i16x8_extadd_pairwise_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extadd_pairwise_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extadd_pairwise_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vpaddlq_s16(a_.neon_i16); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddd_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, _mm_set1_epi16(INT8_C(1))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); + r_.altivec_i32 = + vec_add( + vec_mule(a_.altivec_i16, one), + vec_mulo(a_.altivec_i16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = + ((a_.i32 << 16) >> 16) + + ((a_.i32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extadd_pairwise_i16x8(a) simde_wasm_i32x4_extadd_pairwise_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extadd_pairwise_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extadd_pairwise_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vpaddlq_u8(a_.neon_u8); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddw_epu8(a_.sse_m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.sse_m128i = _mm_maddubs_epi16(a_.sse_m128i, _mm_set1_epi8(INT8_C(1))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); + r_.altivec_u16 = + vec_add( + vec_mule(a_.altivec_u8, one), + vec_mulo(a_.altivec_u8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = + ((a_.u16 << 8) >> 8) + + ((a_.u16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extadd_pairwise_u8x16(a) simde_wasm_u16x8_extadd_pairwise_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extadd_pairwise_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extadd_pairwise_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vpaddlq_u16(a_.neon_u16); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddd_epu16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_add_epi32( + _mm_srli_epi32(a_.sse_m128i, 16), + _mm_and_si128(a_.sse_m128i, _mm_set1_epi32(INT32_C(0x0000ffff))) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); + r_.altivec_u32 = + vec_add( + vec_mule(a_.altivec_u16, one), + vec_mulo(a_.altivec_u16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = + ((a_.u32 << 16) >> 16) + + ((a_.u32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extadd_pairwise_u16x8(a) simde_wasm_u32x4_extadd_pairwise_u16x8((a)) +#endif + +/* X_load_Y */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_load8x8 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_load8x8(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + int8_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_ALIGN_TO_16 int8_t v[8]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_load8x8(mem) simde_wasm_i16x8_load8x8((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_load16x4 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_load16x4(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + int16_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_ALIGN_TO_16 int16_t v[4]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_load16x4(mem) simde_wasm_i32x4_load16x4((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_load32x2 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_load32x2(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) && !defined(SIMDE_BUG_CLANG_50893) + int32_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_ALIGN_TO_16 int32_t v[2]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_load32x2(mem) simde_wasm_i64x2_load32x2((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_load8x8 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_load8x8(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint8_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u16, v); + #else + SIMDE_ALIGN_TO_16 uint8_t v[8]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_load8x8(mem) simde_wasm_u16x8_load8x8((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_load16x4 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_load16x4(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint16_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u32, v); + #else + SIMDE_ALIGN_TO_16 uint16_t v[4]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_load16x4(mem) simde_wasm_u32x4_load16x4((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_load32x2 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_load32x2(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint32_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_ALIGN_TO_16 uint32_t v[2]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_load32x2(mem) simde_wasm_u64x2_load32x2((mem)) +#endif + +/* load*_zero */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_zero (const void * a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load32_zero(a); + #else + simde_v128_private r_; + + int32_t a_; + simde_memcpy(&a_, a, sizeof(a_)); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cvtsi32_si128(a_); + #else + r_.i32[0] = a_; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_zero(a) simde_wasm_v128_load32_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_zero (const void * a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load64_zero(a); + #else + simde_v128_private r_; + + int64_t a_; + simde_memcpy(&a_, a, sizeof(a_)); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + r_.sse_m128i = _mm_cvtsi64_si128(a_); + #else + r_.i64[0] = a_; + r_.i64[1] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_zero(a) simde_wasm_v128_load64_zero((a)) +#endif + +/* load*_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load8_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + #if defined(SIMDE_BUG_CLANG_50901) + simde_v128_private r_ = simde_v128_to_private(vec); + r_.altivec_i8 = vec_insert(*HEDLEY_REINTERPRET_CAST(const signed char *, a), a_.altivec_i8, lane); + return simde_v128_from_private(r_); + #else + a_.i8[lane] = *HEDLEY_REINTERPRET_CAST(const int8_t *, a); + return simde_v128_from_private(a_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load8_lane(a, vec, lane) wasm_v128_load8_lane(HEDLEY_CONST_CAST(int8_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load8_lane(a, vec, lane) simde_wasm_v128_load8_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load16_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int16_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int16_t)); + a_.i16[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load16_lane(a, vec, lane) wasm_v128_load16_lane(HEDLEY_CONST_CAST(int16_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load16_lane(a, vec, lane) simde_wasm_v128_load16_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int32_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int32_t)); + a_.i32[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load32_lane(a, vec, lane) wasm_v128_load32_lane(HEDLEY_CONST_CAST(int32_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_lane(a, vec, lane) simde_wasm_v128_load32_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int64_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int64_t)); + a_.i64[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load64_lane(a, vec, lane) wasm_v128_load64_lane(HEDLEY_CONST_CAST(int64_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_lane(a, vec, lane) simde_wasm_v128_load64_lane((a), (vec), (lane)) +#endif + +/* store*_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store8_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int8_t tmp = vec_.i8[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store8_lane(a, vec, lane) wasm_v128_store8_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store8_lane(a, vec, lane) simde_wasm_v128_store8_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store16_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int16_t tmp = vec_.i16[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store16_lane(a, vec, lane) wasm_v128_store16_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store16_lane(a, vec, lane) simde_wasm_v128_store16_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store32_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int32_t tmp = vec_.i32[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store32_lane(a, vec, lane) wasm_v128_store32_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store32_lane(a, vec, lane) simde_wasm_v128_store32_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store64_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int64_t tmp = vec_.i64[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store64_lane(a, vec, lane) wasm_v128_store64_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store64_lane(a, vec, lane) simde_wasm_v128_store64_lane((a), (vec), (lane)) +#endif + +/* convert */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_convert_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_convert_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cvtepi32_ps(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_convert_i32x4(a) simde_wasm_f32x4_convert_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_convert_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_convert_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_convert_u32x4(a) simde_wasm_f32x4_convert_u32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_convert_low_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_convert_low_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.i32, a_.i32, 0, 1), __typeof__(r_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_convert_low_i32x4(a) simde_wasm_f64x2_convert_low_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_convert_low_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_convert_low_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.u32, a_.u32, 0, 1), __typeof__(r_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_convert_low_u32x4(a) simde_wasm_f64x2_convert_low_u32x4((a)) +#endif + +/* trunc_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_sat_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_max_mask = _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(2147483520.0)))); + const __m128 clamped = _mm_max_ps(a_.sse_m128, _mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))); + r_.sse_m128i = _mm_cvttps_epi32(clamped); + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(r_.sse_m128i), + _mm_castsi128_ps(_mm_set1_epi32(INT32_MAX)), + _mm_castsi128_ps(i32_max_mask) + ) + ); + #else + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(i32_max_mask, _mm_set1_epi32(INT32_MAX)), + _mm_andnot_si128(i32_max_mask, r_.sse_m128i) + ); + #endif + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128))); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0) }; + __typeof__(r_.i32) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.f32 > max_representable); + __typeof__(r_.i32) max_i32 = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; + r_.i32 = (max_i32 & max_mask) | (r_.i32 & ~max_mask); + + const __typeof__(a_.f32) min_representable = { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) }; + __typeof__(r_.i32) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.f32 < min_representable); + __typeof__(r_.i32) min_i32 = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (min_i32 & min_mask) | (r_.i32 & ~min_mask); + + r_.i32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + if (simde_math_isnanf(a_.f32[i])) { + r_.i32[i] = INT32_C(0); + } else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) { + r_.i32[i] = INT32_MIN; + } else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)) { + r_.i32[i] = INT32_MAX; + } else { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f32[i]); + } + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_sat_f32x4(a) simde_wasm_i32x4_trunc_sat_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_sat_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcvtq_u32_f32(a_.neon_f32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cvttps_epu32(a_.sse_m128); + #else + __m128 first_oob_high = _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0)); + __m128 neg_zero_if_too_high = + _mm_castsi128_ps( + _mm_slli_epi32( + _mm_castps_si128(_mm_cmple_ps(first_oob_high, a_.sse_m128)), + 31 + ) + ); + r_.sse_m128i = + _mm_xor_si128( + _mm_cvttps_epi32( + _mm_sub_ps(a_.sse_m128, _mm_and_ps(neg_zero_if_too_high, first_oob_high)) + ), + _mm_castps_si128(neg_zero_if_too_high) + ); + #endif + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(0.0))))); + r_.sse_m128i = _mm_or_si128 (r_.sse_m128i, _mm_castps_si128(_mm_cmpge_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0))))); + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.u32, a_.f32); + + const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0) }; + r_.u32 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > max_representable); + + const __typeof__(a_.f32) min_representable = { SIMDE_FLOAT32_C(0.0), }; + r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > min_representable); + + r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 == a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + if (simde_math_isnan(a_.f32[i]) || + a_.f32[i] < SIMDE_FLOAT32_C(0.0)) { + r_.u32[i] = UINT32_C(0); + } else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)) { + r_.u32[i] = UINT32_MAX; + } else { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f32[i]); + } + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_sat_f32x4(a) simde_wasm_u32x4_trunc_sat_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_sat_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vcombine_s32(vqmovn_s64(vcvtq_s64_f64(a_.neon_f64)), vdup_n_s32(INT32_C(0))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(double) in_not_nan = + vec_and(a_.altivec_f64, vec_cmpeq(a_.altivec_f64, a_.altivec_f64)); + r_.altivec_i32 = vec_signede(in_not_nan); + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i32 = + vec_pack( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_i32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0)) + ); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 4, 5, 6, 7, + 16, 17, 18, 19, 20, 21, 22, 23 + }; + r_.altivec_i32 = + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(signed int), + vec_perm( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), r_.altivec_i32), + vec_splat_s8(0), + perm + ) + ); + #endif + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (simde_math_isnan(a_.f64[i])) { + r_.i32[i] = INT32_C(0); + } else if (a_.f64[i] < HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) { + r_.i32[i] = INT32_MIN; + } else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)) { + r_.i32[i] = INT32_MAX; + } else { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f64[i]); + } + } + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_sat_f64x2_zero(a) simde_wasm_i32x4_trunc_sat_f64x2_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_sat_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vcombine_u32(vqmovn_u64(vcvtq_u64_f64(a_.neon_f64)), vdup_n_u32(UINT32_C(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (simde_math_isnanf(a_.f64[i]) || + a_.f64[i] < SIMDE_FLOAT64_C(0.0)) { + r_.u32[i] = UINT32_C(0); + } else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, UINT32_MAX)) { + r_.u32[i] = UINT32_MAX; + } else { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f64[i]); + } + } + r_.u32[2] = 0; + r_.u32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_sat_f64x2_zero(a) simde_wasm_u32x4_trunc_sat_f64x2_zero((a)) +#endif + +/* popcnt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_popcnt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_popcnt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcntq_s8(a_.neon_i8); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE) + r_.sse_m128i = _mm_popcnt_epi8(a_.sse_m128i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = _mm_andnot_si128(tmp0, a_.sse_m128i); + __m128i y = _mm_and_si128(tmp0, a_.sse_m128i); + tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_srli_epi16(tmp1, 4); + y = _mm_shuffle_epi8(tmp0, y); + tmp1 = _mm_shuffle_epi8(tmp0, tmp1); + return _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = _mm_and_si128(a_.sse_m128i, tmp0); + tmp0 = _mm_andnot_si128(tmp0, a_.sse_m128i); + __m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp0 = _mm_srli_epi16(tmp0, 4); + y = _mm_shuffle_epi8(y, tmp1); + tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_shuffle_epi8(tmp1, tmp0); + return _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp0 = _mm_and_si128(_mm_srli_epi16(a_.sse_m128i, 1), _mm_set1_epi8(0x55)); + __m128i tmp1 = _mm_sub_epi8(a_.sse_m128i, tmp0); + tmp0 = tmp1; + tmp1 = _mm_and_si128(tmp1, _mm_set1_epi8(0x33)); + tmp0 = _mm_and_si128(_mm_srli_epi16(tmp0, 2), _mm_set1_epi8(0x33)); + tmp1 = _mm_add_epi8(tmp1, tmp0); + tmp0 = _mm_srli_epi16(tmp1, 4); + tmp1 = _mm_add_epi8(tmp1, tmp0); + r_.sse_m128i = _mm_and_si128(tmp1, _mm_set1_epi8(0x0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a_.altivec_i8))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); + v = v - ((v >> 1) & (85)); + v = (v & (51)) + ((v >> (2)) & (51)); + v = (v + (v >> (4))) & (15); + r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_popcnt(a) simde_wasm_i8x16_popcnt((a)) +#endif + +/* dot */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_dot_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_dot_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_dot_i16x8(a, b) simde_wasm_i32x4_dot_i16x8((a), (b)) +#endif + +/* ceil */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ceil (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ceil(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128 = _mm_round_ps(a_.sse_m128, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/WebAssembly/simd/pull/232 */ + const __m128i input_as_i32 = _mm_cvttps_epi32(a_.sse_m128); + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i input_is_out_of_range = _mm_or_si128(_mm_cmpeq_epi32(input_as_i32, i32_min), i32_min); + const __m128 truncated = + _mm_or_ps( + _mm_andnot_ps( + _mm_castsi128_ps(input_is_out_of_range), + _mm_cvtepi32_ps(input_as_i32) + ), + _mm_castsi128_ps( + _mm_castps_si128( + _mm_and_ps( + _mm_castsi128_ps(input_is_out_of_range), + a_.sse_m128 + ) + ) + ) + ); + + const __m128 trunc_is_ge_input = + _mm_or_ps( + _mm_cmple_ps(a_.sse_m128, truncated), + _mm_castsi128_ps(i32_min) + ); + r_.sse_m128 = + _mm_or_ps( + _mm_andnot_ps( + trunc_is_ge_input, + _mm_add_ps(truncated, _mm_set1_ps(SIMDE_FLOAT32_C(1.0))) + ), + _mm_and_ps(trunc_is_ge_input, truncated) + ); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ceil(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_ceilf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ceil(a) simde_wasm_f32x4_ceil((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ceil (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ceil(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128d = _mm_round_pd(a_.sse_m128d, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_ceil(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_ceil(a_.f64[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ceil(a) simde_wasm_f64x2_ceil((a)) +#endif + +/* floor */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_floor (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_floor(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128 = _mm_floor_ps(a_.sse_m128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i vint_min = _mm_set1_epi32(INT_MIN); + const __m128i input_as_int = _mm_cvttps_epi32(a_.sse_m128); + const __m128 input_truncated = _mm_cvtepi32_ps(input_as_int); + const __m128i oor_all_or_neg = _mm_or_si128(_mm_cmpeq_epi32(input_as_int, vint_min), vint_min); + const __m128 tmp = + _mm_castsi128_ps( + _mm_or_si128( + _mm_andnot_si128( + oor_all_or_neg, + _mm_castps_si128(input_truncated) + ), + _mm_and_si128( + oor_all_or_neg, + _mm_castps_si128(a_.sse_m128) + ) + ) + ); + r_.sse_m128 = + _mm_sub_ps( + tmp, + _mm_and_ps( + _mm_cmplt_ps( + a_.sse_m128, + tmp + ), + _mm_set1_ps(SIMDE_FLOAT32_C(1.0)) + ) + ); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t input_as_int = vcvtq_s32_f32(a_.f32); + const float32x4_t input_truncated = vcvtq_f32_s32(input_as_int); + const float32x4_t tmp = + vbslq_f32( + vbicq_u32( + vcagtq_f32( + vreinterpretq_f32_u32(vdupq_n_u32(UINT32_C(0x4B000000))), + a_.f32 + ), + vdupq_n_u32(UINT32_C(0x80000000)) + ), + input_truncated, + a_.f32); + r_.neon_f32 = + vsubq_f32( + tmp, + vreinterpretq_f32_u32( + vandq_u32( + vcgtq_f32( + tmp, + a_.f32 + ), + vdupq_n_u32(UINT32_C(0x3F800000)) + ) + ) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_floor(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_floorf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_floor(a) simde_wasm_f32x4_floor((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_floor (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_floor(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_floor(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_floor(a) simde_wasm_f64x2_floor((a)) +#endif + +/* trunc */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_trunc (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_trunc(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_truncf(a_.f32[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_trunc(a) simde_wasm_f32x4_trunc((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_trunc (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_trunc(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_trunc(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_trunc(a) simde_wasm_f64x2_trunc((a)) +#endif + +/* nearest */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_nearest (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_nearest(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_nearbyintf(a_.f32[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_nearest(a) simde_wasm_f32x4_nearest((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_nearest (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_nearest(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_nearbyint(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_nearest(a) simde_wasm_f64x2_nearest((a)) +#endif + +/* sqrt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_sqrt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_sqrt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.sse_m128 = _mm_sqrt_ps(a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_sqrtf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_sqrt(a) simde_wasm_f32x4_sqrt((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_sqrt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_sqrt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.sse_m128d = _mm_sqrt_pd(a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_sqrt(a_.f64[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_sqrt(a) simde_wasm_f64x2_sqrt((a)) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_WASM_SIMD128_H) */ +/* :: End simde/wasm/simd128.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* swizzle */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_swizzle_relaxed (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_swizzle(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t tmp = { { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } }; + r_.neon_i8 = vcombine_s8( + vtbl2_s8(tmp, vget_low_s8(b_.neon_i8)), + vtbl2_s8(tmp, vget_high_s8(b_.neon_i8)) + ); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.sse_m128i = _mm_shuffle_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_perm( + a_.altivec_i8, + a_.altivec_i8, + b_.altivec_u8 + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.u8[i] & 15]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_swizzle_relaxed(a, b) simde_wasm_i8x16_swizzle_relaxed((a), (b)) +#endif + +/* Conversions */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cvtps_epi32(a_.sse_m128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || (defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_BUG_GCC_101614)) + r_.altivec_i32 = vec_signed(a_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_f32x4(a) simde_wasm_i32x4_trunc_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcvtq_u32_f32(a_.neon_f32); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cvttps_epu32(a_.sse_m128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i input_to_signed_i32 = _mm_cvttps_epi32(a_.sse_m128); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128( + _mm_cvttps_epi32( + /* 2147483648.0f is the last representable float less than INT32_MAX */ + _mm_add_ps(a_.sse_m128, _mm_set1_ps(-SIMDE_FLOAT32_C(2147483648.0))) + ), + _mm_srai_epi32(input_to_signed_i32, 31) + ), + input_to_signed_i32 + ); + // #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + // r_.altivec_u32 = vec_unsignede(a_.altivec_f32); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.u32, a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_f32x4(a) simde_wasm_u32x4_trunc_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cvttpd_epi32(a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vcombine_s32(vmovn_s64(vcvtq_s64_f64(a_.neon_f64)), vdup_n_s32(INT32_C(0))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i32 = vec_signede(a_.altivec_f64); + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i32 = + vec_pack( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_i32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0)) + ); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 4, 5, 6, 7, + 16, 17, 18, 19, 20, 21, 22, 23 + }; + r_.altivec_i32 = + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(signed int), + vec_perm( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), r_.altivec_i32), + vec_splat_s8(0), + perm + ) + ); + #endif + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(8) z = { 0, 0 }; + __typeof__(z) c = __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)); + r_.i32 = __builtin_shufflevector(c, z, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f64[i]); + } + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_f64x2_zero(a) simde_wasm_i32x4_trunc_f64x2_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + const __m128i input_to_signed_i32 = _mm_cvttpd_epi32(a_.sse_m128d); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128( + _mm_cvttpd_epi32( + /* 2147483648.0f is the last representable float less than INT32_MAX */ + _mm_add_pd(a_.sse_m128d, _mm_set1_pd(-SIMDE_FLOAT64_C(2147483648.0))) + ), + _mm_srai_epi32(input_to_signed_i32, 31) + ), + input_to_signed_i32 + ); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vcombine_u32(vmovn_u64(vcvtq_u64_f64(a_.neon_f64)), vdup_n_u32(UINT32_C(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + uint32_t SIMDE_VECTOR(8) z = { 0, 0 }; + __typeof__(z) c = __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)); + r_.u32 = __builtin_shufflevector(c, z, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f64[i]); + } + r_.u32[2] = 0; + r_.u32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_f64x2_zero(a) simde_wasm_u32x4_trunc_f64x2_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_blend(simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_i8x16_blend(a, b, mask); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + r_.sse_m128i = _mm_blendv_epi8(b_.sse_m128i, a_.sse_m128i, mask_.sse_m128i); + + return simde_v128_from_private(r_); + #else + return simde_wasm_v128_bitselect(a, b, mask); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_blend(a, b, c) simde_wasm_i8x16_blend((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_blend(simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_i16x8_blend(a, b, mask); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + r_.sse_m128i = _mm_blendv_epi8(b_.sse_m128i, a_.sse_m128i, _mm_srai_epi16(mask_.sse_m128i, 15)); + + return simde_v128_from_private(r_); + #else + return simde_wasm_v128_bitselect(a, b, mask); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_blend(a, b, c) simde_wasm_i16x8_blend((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_blend(simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_i32x4_blend(a, b, mask); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + r_.sse_m128 = _mm_blendv_ps(b_.sse_m128, a_.sse_m128, mask_.sse_m128); + + return simde_v128_from_private(r_); + #else + return simde_wasm_v128_bitselect(a, b, mask); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_blend(a, b, c) simde_wasm_i32x4_blend((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_blend(simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_i64x2_blend(a, b, mask); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + r_.sse_m128d = _mm_blendv_pd(b_.sse_m128d, a_.sse_m128d, mask_.sse_m128d); + + return simde_v128_from_private(r_); + #else + return simde_wasm_v128_bitselect(a, b, mask); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_blend(a, b, c) simde_wasm_i64x2_blend((a), (b), (c)) +#endif + +/* fma */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_fma (simde_v128_t a, simde_v128_t b, simde_v128_t c) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f32x4_fma(a, b, c); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_add(a, wasm_f32x4_mul(b, c)); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + c_ = simde_v128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_madd(c_.altivec_f32, b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmaq_f32(a_.neon_f32, c_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlaq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32); + #elif defined(SIMDE_X86_FMA_NATIVE) + r_.sse_m128 = _mm_fmadd_ps(c_.sse_m128, b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 + (b_.f32 * c_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fmaf(c_.f32[i], b_.f32[i], a_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_fma(a, b) simde_wasm_f32x4_fma((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_fma (simde_v128_t a, simde_v128_t b, simde_v128_t c) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f64x2_fma(a, b, c); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_add(a, wasm_f64x2_mul(b, c)); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + c_ = simde_v128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_madd(c_.altivec_f64, b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmaq_f64(a_.neon_f64, c_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_X86_FMA_NATIVE) + r_.sse_m128d = _mm_fmadd_pd(c_.sse_m128d, b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 + (b_.f64 * c_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fma(c_.f64[i], b_.f64[i], a_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_fma(a, b) simde_wasm_f64x2_fma((a), (b)) +#endif + +/* fms */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_fms (simde_v128_t a, simde_v128_t b, simde_v128_t c) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f32x4_fms(a, b, c); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_sub(a, wasm_f32x4_mul(b, c)); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + c_ = simde_v128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_nmsub(c_.altivec_f32, b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmsq_f32(a_.neon_f32, c_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlsq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32); + #elif defined(SIMDE_X86_FMA_NATIVE) + r_.sse_m128 = _mm_fnmadd_ps(c_.sse_m128, b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 - (b_.f32 * c_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - (b_.f32[i] * c_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_fms(a, b) simde_wasm_f32x4_fms((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_fms (simde_v128_t a, simde_v128_t b, simde_v128_t c) { + #if defined(SIMDE_WASM_RELAXED_SIMD_NATIVE) + return wasm_f64x2_fms(a, b, c); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_sub(a, wasm_f64x2_mul(b, c)); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + c_ = simde_v128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nmsub(c_.altivec_f64, b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmsq_f64(a_.neon_f64, c_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_X86_FMA_NATIVE) + r_.sse_m128d = _mm_fnmadd_pd(c_.sse_m128d, b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 - (b_.f64 * c_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - (b_.f64[i] * c_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_RELAXED_SIMD_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_fms(a, b) simde_wasm_f64x2_fms((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_WASM_RELAXED_SIMD_H) */ +/* :: End simde/wasm/relaxed-simd.h :: */ diff --git a/include/simde/wasm/simd128.h b/include/simde/wasm/simd128.h new file mode 100644 index 00000000..3f16eb5a --- /dev/null +++ b/include/simde/wasm/simd128.h @@ -0,0 +1,17043 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/wasm/simd128.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_WASM_SIMD128_H) +#define SIMDE_WASM_SIMD128_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 sse_m128; + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i sse_m128i; + SIMDE_ALIGN_TO_16 __m128d sse_m128d; + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde_v128_private; + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde_v128_t; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x4_t simde_v128_t; +#elif defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde_v128_t; +#elif defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde_v128_t; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(signed int) simde_v128_t; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde_v128_t SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde_v128_private simde_v128_t; +#endif + +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + typedef simde_v128_t v128_t; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_t), "simde_v128_t size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde_v128_private), "simde_v128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_t) == 16, "simde_v128_t is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde_v128_private) == 16, "simde_v128_private is not 16-byte aligned"); +#endif + +#define SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(Other_Type, SIMDe_Type, To_Name, From_Name) \ + SIMDE_FUNCTION_ATTRIBUTES \ + Other_Type To_Name(SIMDe_Type v) { \ + Other_Type r; \ + simde_memcpy(&r, &v, sizeof(r)); \ + return r; \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + SIMDe_Type From_Name(Other_Type v) { \ + SIMDe_Type r; \ + simde_memcpy(&r, &v, sizeof(r)); \ + return r; \ + } + +SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(simde_v128_private, simde_v128_t, simde_v128_to_private, simde_v128_from_private) + +#define SIMDE_WASM_SIMD128_FMIN(x, y) \ + (simde_math_isnan(x) ? SIMDE_MATH_NAN \ + : simde_math_isnan(y) ? SIMDE_MATH_NAN \ + : (((x) == 0) && ((y) == 0)) ? (simde_math_signbit(x) ? (x) : (y)) \ + : ((x) < (y) ? (x) : (y))) + +#define SIMDE_WASM_SIMD128_FMAX(x, y) \ + (simde_math_isnan(x) ? SIMDE_MATH_NAN \ + : simde_math_isnan(y) ? SIMDE_MATH_NAN \ + : (((x) == 0) && ((y) == 0)) ? (simde_math_signbit(x) ? (y) : (x)) \ + : ((x) > (y) ? (x) : (y))) + +#if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128 , simde_v128_t, simde_v128_to_m128 , simde_v128_from_m128 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128i, simde_v128_t, simde_v128_to_m128i, simde_v128_from_m128i) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(__m128d, simde_v128_t, simde_v128_to_m128d, simde_v128_from_m128d) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int8x16_t, simde_v128_t, simde_v128_to_neon_i8 , simde_v128_from_neon_i8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int16x8_t, simde_v128_t, simde_v128_to_neon_i16, simde_v128_from_neon_i16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int32x4_t, simde_v128_t, simde_v128_to_neon_i32, simde_v128_from_neon_i32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( int64x2_t, simde_v128_t, simde_v128_to_neon_i64, simde_v128_from_neon_i64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint8x16_t, simde_v128_t, simde_v128_to_neon_u8 , simde_v128_from_neon_u8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint16x8_t, simde_v128_t, simde_v128_to_neon_u16, simde_v128_from_neon_u16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint32x4_t, simde_v128_t, simde_v128_to_neon_u32, simde_v128_from_neon_u32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS( uint64x2_t, simde_v128_t, simde_v128_to_neon_u64, simde_v128_from_neon_u64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float32x4_t, simde_v128_t, simde_v128_to_neon_f32, simde_v128_from_neon_f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(float64x2_t, simde_v128_t, simde_v128_to_neon_f64, simde_v128_from_neon_f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed char), simde_v128_t, simde_v128_to_altivec_i8 , simde_v128_from_altivec_i8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed short), simde_v128_t, simde_v128_to_altivec_i16, simde_v128_from_altivec_i16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed int), simde_v128_t, simde_v128_to_altivec_i32, simde_v128_from_altivec_i32) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), simde_v128_t, simde_v128_to_altivec_u8 , simde_v128_from_altivec_u8 ) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), simde_v128_t, simde_v128_to_altivec_u16, simde_v128_from_altivec_u16) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), simde_v128_t, simde_v128_to_altivec_u32, simde_v128_from_altivec_u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR( signed long long), simde_v128_t, simde_v128_to_altivec_i64, simde_v128_from_altivec_i64) + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), simde_v128_t, simde_v128_to_altivec_u64, simde_v128_from_altivec_u64) + #endif + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde_v128_to_altivec_f32(simde_v128_t value) { + simde_v128_private r_ = simde_v128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_v128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde_v128_private r_; + r_.altivec_f32 = value; + return simde_v128_from_private(r_); + } + #else + SIMDE_WASM_SIMD128_GENERATE_CONVERSION_FUNCTIONS(SIMDE_POWER_ALTIVEC_VECTOR(float), simde_v128_t, simde_v128_to_altivec_f32, simde_v128_from_altivec_f32) + #endif +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +/* + * Begin function implementations + */ + +/* load */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load(mem); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_REINTERPRET_CAST(const __m128i*, mem)); + #else + simde_v128_t r; + simde_memcpy(&r, mem, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load(mem) simde_wasm_v128_load((mem)) +#endif + +/* store */ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store (void * mem, simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem, a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_REINTERPRET_CAST(__m128i*, mem), a); + #else + simde_memcpy(mem, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store(mem, a) simde_wasm_v128_store((mem), (a)) +#endif + +/* make */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_make ( + int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, + int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return + wasm_i8x16_make( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return + _mm_setr_epi8( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + #else + simde_v128_private r_; + + r_.i8[ 0] = c0; + r_.i8[ 1] = c1; + r_.i8[ 2] = c2; + r_.i8[ 3] = c3; + r_.i8[ 4] = c4; + r_.i8[ 5] = c5; + r_.i8[ 6] = c6; + r_.i8[ 7] = c7; + r_.i8[ 8] = c8; + r_.i8[ 9] = c9; + r_.i8[10] = c10; + r_.i8[11] = c11; + r_.i8[12] = c12; + r_.i8[13] = c13; + r_.i8[14] = c14; + r_.i8[15] = c15; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_make( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_make( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_make ( + int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(c0, c1, c2, c3, c4, c5, c6, c7); + #else + simde_v128_private r_; + + r_.i16[0] = c0; + r_.i16[1] = c1; + r_.i16[2] = c2; + r_.i16[3] = c3; + r_.i16[4] = c4; + r_.i16[5] = c5; + r_.i16[6] = c6; + r_.i16[7] = c7; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_make((c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_make (int32_t c0, int32_t c1, int32_t c2, int32_t c3) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_make(c0, c1, c2, c3); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(c0, c1, c2, c3); + #else + simde_v128_private r_; + + r_.i32[0] = c0; + r_.i32[1] = c1; + r_.i32[2] = c2; + r_.i32[3] = c3; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_make(c0, c1, c2, c3) simde_wasm_i32x4_make((c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_make (int64_t c0, int64_t c1) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_make(c0, c1); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi64x(c1, c0); + #else + simde_v128_private r_; + + r_.i64[ 0] = c0; + r_.i64[ 1] = c1; + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_make(c0, c1) simde_wasm_i64x2_make((c0), (c1)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_make (simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_make(c0, c1, c2, c3); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_setr_ps(c0, c1, c2, c3); + #else + r_.f32[0] = c0; + r_.f32[1] = c1; + r_.f32[2] = c2; + r_.f32[3] = c3; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_make(c0, c1, c2, c3) simde_wasm_f32x4_make((c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_make (simde_float64 c0, simde_float64 c1) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_make(c0, c1); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_set_pd(c1, c0); + #else + r_.f64[ 0] = c0; + r_.f64[ 1] = c1; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_make(c0, c1) simde_wasm_f64x2_make((c0), (c1)) +#endif + +/* const */ + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + wasm_i8x16_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + SIMDE_ASSERT_CONSTANT_(c4); \ + SIMDE_ASSERT_CONSTANT_(c5); \ + SIMDE_ASSERT_CONSTANT_(c6); \ + SIMDE_ASSERT_CONSTANT_(c7); \ + SIMDE_ASSERT_CONSTANT_(c8); \ + SIMDE_ASSERT_CONSTANT_(c9); \ + SIMDE_ASSERT_CONSTANT_(c10); \ + SIMDE_ASSERT_CONSTANT_(c11); \ + SIMDE_ASSERT_CONSTANT_(c12); \ + SIMDE_ASSERT_CONSTANT_(c13); \ + SIMDE_ASSERT_CONSTANT_(c13); \ + SIMDE_ASSERT_CONSTANT_(c15); \ + \ + simde_wasm_i8x16_make( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i8x16_const ( + int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, + int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) { + return simde_wasm_i8x16_make( + c0, c1, c2, c3, c4, c5, c6, c7, + c8, c9, c10, c11, c12, c13, c14, c15); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_const( \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + wasm_i16x8_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + SIMDE_ASSERT_CONSTANT_(c4); \ + SIMDE_ASSERT_CONSTANT_(c5); \ + SIMDE_ASSERT_CONSTANT_(c6); \ + SIMDE_ASSERT_CONSTANT_(c7); \ + \ + simde_wasm_i16x8_make( \ + c0, c1, c2, c3, c4, c5, c6, c7); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i16x8_const ( + int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) { + return simde_wasm_i16x8_make( + c0, c1, c2, c3, c4, c5, c6, c7); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_const( \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_const( \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i32x4_const( \ + c0, c1, c2, c3) \ + wasm_i32x4_const( \ + (c0), (c1), (c2), (c3)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i32x4_const( \ + c0, c1, c2, c3) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + \ + simde_wasm_i32x4_make( \ + c0, c1, c2, c3); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i32x4_const ( + int32_t c0, int32_t c1, int32_t c2, int32_t c3) { + return simde_wasm_i32x4_make( + c0, c1, c2, c3); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i32x4_const( \ + c0, c1, c2, c3) \ + simde_wasm_i32x4_const( \ + (c0), (c1), (c2), (c3)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i64x2_const( \ + c0, c1) \ + wasm_i64x2_const( \ + (c0), (c1)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_i64x2_const( \ + c0, c1) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + \ + simde_wasm_i64x2_make( \ + c0, c1); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_i64x2_const ( + int64_t c0, int64_t c1) { + return simde_wasm_i64x2_make( + c0, c1); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i64x2_const( \ + c0, c1) \ + simde_wasm_i64x2_const( \ + (c0), (c1)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_f32x4_const( \ + c0, c1, c2, c3) \ + wasm_f32x4_const( \ + (c0), (c1), (c2), (c3)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_f32x4_const( \ + c0, c1, c2, c3) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + SIMDE_ASSERT_CONSTANT_(c2); \ + SIMDE_ASSERT_CONSTANT_(c3); \ + \ + simde_wasm_f32x4_make( \ + c0, c1, c2, c3); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_f32x4_const ( + simde_float32 c0, simde_float32 c1, simde_float32 c2, simde_float32 c3) { + return simde_wasm_f32x4_make( + c0, c1, c2, c3); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_f32x4_const( \ + c0, c1, c2, c3) \ + simde_wasm_f32x4_const( \ + (c0), (c1), (c2), (c3)) +#endif + +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_f64x2_const( \ + c0, c1) \ + wasm_f64x2_const( \ + (c0), (c1)) +#elif defined(SIMDE_STATEMENT_EXPR_) && defined(SIMDE_ASSERT_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define \ + simde_wasm_f64x2_const( \ + c0, c1) \ + SIMDE_STATEMENT_EXPR_(({ \ + SIMDE_ASSERT_CONSTANT_(c0); \ + SIMDE_ASSERT_CONSTANT_(c1); \ + \ + simde_wasm_f64x2_make( \ + c0, c1); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde_v128_t + simde_wasm_f64x2_const ( + simde_float64 c0, simde_float64 c1) { + return simde_wasm_f64x2_make( + c0, c1); + } +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_f64x2_const( \ + c0, c1) \ + simde_wasm_f64x2_const( \ + (c0), (c1)) +#endif + +/* splat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_splat (int8_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_splat(a) simde_wasm_i8x16_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_splat (int16_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_splat(a) simde_wasm_i16x8_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_splat (int32_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_set1_epi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_splat(a) simde_wasm_i32x4_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_splat (int64_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + r_.sse_m128i = _mm_set1_epi64x(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_splat(a) simde_wasm_i64x2_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_splat (simde_float32 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_set1_ps(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_splat(a) simde_wasm_f32x4_splat((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_splat (simde_float64 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_splat(a); + #else + simde_v128_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_set1_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_splat(a) simde_wasm_f64x2_splat((a)) +#endif + +/* load_splat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load8_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load8_splat(mem); + #else + int8_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i8x16_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load8_splat(mem) simde_wasm_v128_load8_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load16_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load16_splat(mem); + #else + int16_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i16x8_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load16_splat(mem) simde_wasm_v128_load16_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load32_splat(mem); + #else + int32_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i32x4_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_splat(mem) simde_wasm_v128_load32_splat((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_splat (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load64_splat(mem); + #else + int64_t v; + simde_memcpy(&v, mem, sizeof(v)); + return simde_wasm_i64x2_splat(v); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_splat(mem) simde_wasm_v128_load64_splat((mem)) +#endif + +/* extract_lane + * + * Note that, unlike normal WASM SIMD128 we return intN_t instead of + * int for sizeof(X) <= sizeof(int). This is done for portability; + * the regular API doesn't have to worry about things like int being + * 16 bits (like on AVR). + * + * This does mean that code which works in SIMDe may not work without + * changes on WASM, but luckily the necessary changes (i.e., casting + * the return values to smaller type when assigning to the smaller + * type) mean the code will work in *both* SIMDe and a native + * implementation. If you use the simde_* prefixed functions it will + * always work. */ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_wasm_i8x16_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i8[lane & 15]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, wasm_i8x16_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(simde_v128_to_m128i(a), (lane) & 15)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_i8x16_extract_lane(a, lane) vgetq_lane_s8(simde_v128_to_neon_i8(a), (lane) & 15) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_extract_lane(a, lane) simde_wasm_i8x16_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_wasm_i16x8_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i16[lane & 7]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE2_NATIVE) + #define simde_wasm_i16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(int16_t, _mm_extract_epi16((a), (lane) & 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i16x8_extract_lane(a, lane) vgetq_lane_s16(simde_v128_to_neon_i16(a), (lane) & 7) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extract_lane(a, lane) simde_wasm_i16x8_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_wasm_i32x4_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i32[lane & 3]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_i32x4_extract_lane(a, lane) HEDLEY_STATIC_CAST(int32_t, _mm_extract_epi32((a), (lane) & 3)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i32x4_extract_lane(a, lane) vgetq_lane_s32(simde_v128_to_neon_i32(a), (lane) & 3) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extract_lane(a, lane) simde_wasm_i32x4_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_wasm_i64x2_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.i64[lane & 1]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane((a), (lane))) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) + #define simde_wasm_i64x2_extract_lane(a, lane) HEDLEY_STATIC_CAST(int64_t, _mm_extract_epi64((a), (lane) & 1)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i64x2_extract_lane(a, lane) vgetq_lane_s64(simde_v128_to_neon_i64(a), (lane) & 1) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extract_lane(a, lane) simde_wasm_i64x2_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_wasm_u8x16_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.u8[lane & 15]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_u8x16_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint8_t, wasm_u8x16_extract_lane((a), (lane))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_u8x16_extract_lane(a, lane) vgetq_lane_u8(simde_v128_to_neon_u8(a), (lane) & 15) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_extract_lane(a, lane) simde_wasm_u8x16_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_wasm_u16x8_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.u16[lane & 7]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_u16x8_extract_lane(a, lane) HEDLEY_STATIC_CAST(uint16_t, wasm_u16x8_extract_lane((a), (lane))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_u16x8_extract_lane(a, lane) vgetq_lane_u16(simde_v128_to_neon_u16(a), (lane) & 7) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extract_lane(a, lane) simde_wasm_u16x8_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_wasm_f32x4_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.f32[lane & 3]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f32x4_extract_lane(a, lane) wasm_f32x4_extract_lane((a), (lane)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_wasm_f32x4(a, lane) _mm_extract_ps(simde_v128_to_m128(a), (lane) & 3) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f32x4_extract_lane(a, lane) vgetq_lane_f32(simde_v128_to_neon_f32(a), (lane) & 3) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_extract_lane(a, lane) simde_wasm_f32x4_extract_lane((a), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_wasm_f64x2_extract_lane (simde_v128_t a, const int lane) { + simde_v128_private a_ = simde_v128_to_private(a); + return a_.f64[lane & 1]; +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f64x2_extract_lane(a, lane) wasm_f64x2_extract_lane((a), (lane)) +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f64x2_extract_lane(a, lane) vgetq_lane_f64(simde_v128_to_neon_f64(a), (lane) & 1) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_extract_lane(a, lane) simde_wasm_f64x2_extract_lane((a), (lane)) +#endif + +/* replace_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_replace_lane (simde_v128_t a, const int lane, int8_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i8[lane & 15] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i8x16_replace_lane(a, lane, value) wasm_i8x16_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) + #define simde_wasm_i8x16_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi8((a), (value), (lane) & 15)) + #else + #define simde_wasm_i8x16_replace_lane(a, lane, value) _mm_insert_epi8((a), (value), (lane) & 15) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_wasm_i8x16_replace_lane(a, lane, value) simde_v128_from_neon_i8(vsetq_lane_s8((value), simde_v128_to_neon_i8(a), (lane) & 15)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_replace_lane(a, lane, value) simde_wasm_i8x16_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_replace_lane (simde_v128_t a, const int lane, int16_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i16[lane & 7] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i16x8_replace_lane(a, lane, value) wasm_i16x8_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE2_NATIVE) + #define simde_wasm_i16x8_replace_lane(a, lane, value) _mm_insert_epi16((a), (value), (lane) & 7) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i16x8_replace_lane(a, lane, value) simde_v128_from_neon_i16(vsetq_lane_s16((value), simde_v128_to_neon_i16(a), (lane) & 7)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_replace_lane(a, lane, value) simde_wasm_i16x8_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_replace_lane (simde_v128_t a, const int lane, int32_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i32[lane & 3] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i32x4_replace_lane(a, lane, value) wasm_i32x4_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) + #define simde_wasm_i32x4_replace_lane(a, lane, value) HEDLEY_REINTERPRET_CAST(simde_v128_t, _mm_insert_epi32((a), (value), (lane) & 3)) + #else + #define simde_wasm_i32x4_replace_lane(a, lane, value) _mm_insert_epi32((a), (value), (lane) & 3) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i32x4_replace_lane(a, lane, value) simde_v128_from_neon_i32(vsetq_lane_s32((value), simde_v128_to_neon_i32(a), (lane) & 3)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_replace_lane(a, lane, value) simde_wasm_i32x4_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_replace_lane (simde_v128_t a, const int lane, int64_t value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.i64[lane & 1] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_i64x2_replace_lane(a, lane, value) wasm_i64x2_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) + #define simde_wasm_i64x2_replace_lane(a, lane, value) _mm_insert_epi64((a), (value), (lane) & 1) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_i64x2_replace_lane(a, lane, value) simde_v128_from_neon_i64(vsetq_lane_s64((value), simde_v128_to_neon_i64(a), (lane) & 1)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_replace_lane(a, lane, value) simde_wasm_i64x2_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_replace_lane (simde_v128_t a, const int lane, simde_float32 value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.f32[lane & 3] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f32x4_replace_lane(a, lane, value) wasm_f32x4_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f32x4_replace_lane(a, lane, value) simde_v128_from_neon_f32(vsetq_lane_f32((value), simde_v128_to_neon_f32(a), (lane) & 3)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_replace_lane(a, lane, value) simde_wasm_f32x4_replace_lane((a), (lane), (value)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_replace_lane (simde_v128_t a, const int lane, simde_float64 value) { + simde_v128_private a_ = simde_v128_to_private(a); + a_.f64[lane & 1] = value; + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_f64x2_replace_lane(a, lane, value) wasm_f64x2_replace_lane((a), (lane), (value)) +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES) + #define simde_wasm_f64x2_replace_lane(a, lane, value) simde_v128_from_neon_f64(vsetq_lane_f64((value), simde_v128_to_neon_f64(a), (lane) & 1)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_replace_lane(a, lane, value) simde_wasm_f64x2_replace_lane((a), (lane), (value)) +#endif + +/* eq */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_eq(a, b) simde_wasm_i8x16_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_eq(a, b) simde_wasm_i16x8_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_eq(a, b) simde_wasm_i32x4_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_eq(a, b) simde_wasm_i64x2_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpeq_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_eq(a, b) simde_wasm_f32x4_eq((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_eq (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_eq(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpeq_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 == b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_eq(a, b) simde_wasm_f64x2_eq((a), (b)) +#endif + +/* ne */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_ne(a, b) simde_wasm_i8x16_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_ne(a, b) simde_wasm_i16x8_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_ne(a, b) simde_wasm_i32x4_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_ne(a, b) simde_wasm_i64x2_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpneq_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 != b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ne(a, b) simde_wasm_f32x4_ne((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ne (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ne(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpneq_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 != b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ne(a, b) simde_wasm_f64x2_ne((a), (b)) +#endif + +/* lt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_lt(a, b) simde_wasm_i8x16_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_lt(a, b) simde_wasm_i16x8_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_lt(a, b) simde_wasm_i32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t tmp = vorrq_s32( + vandq_s32( + vreinterpretq_s32_u32(vceqq_s32(b_.neon_i32, a_.neon_i32)), + vsubq_s32(a_.neon_i32, b_.neon_i32) + ), + vreinterpretq_s32_u32(vcgtq_s32(b_.neon_i32, a_.neon_i32)) + ); + int32x4x2_t trn = vtrnq_s32(tmp, tmp); + r_.neon_i32 = trn.val[1]; + #elif defined(SIMDE_X86_SSE4_2_NATIVE) + r_.sse_m128i = _mm_cmpgt_epi64(b_.sse_m128i, a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746 */ + r_.sse_m128i = + _mm_shuffle_epi32( + _mm_or_si128( + _mm_and_si128( + _mm_cmpeq_epi32(b_.sse_m128i, a_.sse_m128i), + _mm_sub_epi64(a_.sse_m128i, b_.sse_m128i) + ), + _mm_cmpgt_epi32( + b_.sse_m128i, + a_.sse_m128i + ) + ), + _MM_SHUFFLE(3, 3, 1, 1) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) tmp = + vec_or( + vec_and( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(b_.altivec_i32, a_.altivec_i32)), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_sub( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.altivec_i32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), b_.altivec_i32) + )) + ), + vec_cmpgt(b_.altivec_i32, a_.altivec_i32) + ); + r_.altivec_i32 = vec_mergeo(tmp, tmp); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_lt(a, b) simde_wasm_i64x2_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(a_.altivec_u8, b_.altivec_u8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu8(b_.sse_m128i, a_.sse_m128i); + r_.sse_m128i = _mm_adds_epu8(tmp, _mm_sub_epi8(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_lt(a, b) simde_wasm_u8x16_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmplt(a_.altivec_u16, b_.altivec_u16)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_subs_epu16(b_.sse_m128i, a_.sse_m128i); + r_.sse_m128i = _mm_adds_epu16(tmp, _mm_sub_epi16(_mm_setzero_si128(), tmp)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_lt(a, b) simde_wasm_u16x8_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_xor_si128( + _mm_cmpgt_epi32(b_.sse_m128i, a_.sse_m128i), + _mm_srai_epi32(_mm_xor_si128(b_.sse_m128i, a_.sse_m128i), 31) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmplt(a_.altivec_u32, b_.altivec_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_lt(a, b) simde_wasm_u32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmplt_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 < b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_lt(a, b) simde_wasm_f32x4_lt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_lt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_lt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmplt_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 < b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_lt(a, b) simde_wasm_f64x2_lt((a), (b)) +#endif + +/* gt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_gt(a, b); + #else + return simde_wasm_i8x16_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_gt(a, b) simde_wasm_i8x16_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_gt(a, b); + #else + return simde_wasm_i16x8_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_gt(a, b) simde_wasm_i16x8_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_gt(a, b); + #else + return simde_wasm_i32x4_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_gt(a, b) simde_wasm_i32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_gt(a, b); + #else + return simde_wasm_i64x2_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_gt(a, b) simde_wasm_i64x2_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_gt(a, b); + #else + return simde_wasm_u8x16_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_gt(a, b) simde_wasm_u8x16_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_gt(a, b); + #else + return simde_wasm_u16x8_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_gt(a, b) simde_wasm_u16x8_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_gt(a, b); + #else + return simde_wasm_u32x4_lt(b, a); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_gt(a, b) simde_wasm_u32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_gt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpgt_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 > b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_gt(a, b) simde_wasm_f32x4_gt((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_gt (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_gt(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpgt_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.f64 > b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_gt(a, b) simde_wasm_f64x2_gt((a), (b)) +#endif + +/* le */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(a_.sse_m128i, _mm_min_epi8(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_le(a, b) simde_wasm_i8x16_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(a_.sse_m128i, _mm_min_epi16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_le(a, b) simde_wasm_i16x8_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(a_.sse_m128i, _mm_min_epi32(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_le(a, b) simde_wasm_i32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(a_.sse_m128i, _mm_min_epi64(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_le(a, b) simde_wasm_i64x2_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_le(a, b) simde_wasm_u8x16_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_le(a, b) simde_wasm_u16x8_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_le(a, b) simde_wasm_u32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmple_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 <= b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_le(a, b) simde_wasm_f32x4_le((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_le (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_le(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmple_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 <= b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_le(a, b) simde_wasm_f64x2_le((a), (b)) +#endif + +/* ge */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epi8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_ge(a, b) simde_wasm_i8x16_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epi16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_ge(a, b) simde_wasm_i16x8_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epi32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_ge(a, b) simde_wasm_i32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi64(_mm_min_epi64(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_ge(a, b) simde_wasm_i64x2_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi8(_mm_min_epu8(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~UINT8_C(0) : UINT8_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_ge(a, b) simde_wasm_u8x16_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi16(_mm_min_epu16(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_ge(a, b) simde_wasm_u16x8_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cmpeq_epi32(_mm_min_epu32(a_.sse_m128i, b_.sse_m128i), b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_ge(a, b) simde_wasm_u32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cmpge_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 >= b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ge(a, b) simde_wasm_f32x4_ge((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ge (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ge(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cmpge_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f64 >= b_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ge(a, b) simde_wasm_f64x2_ge((a), (b)) +#endif + +/* not */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_not (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_not(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_not(a) simde_wasm_v128_not((a)) +#endif + +/* and */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_and (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_and_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_and(a, b) simde_wasm_v128_and((a), (b)) +#endif + +/* or */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_or (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_or_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_or(a, b) simde_wasm_v128_or((a), (b)) +#endif + +/* xor */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_xor (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_xor(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_xor(a, b) simde_wasm_v128_xor((a), (b)) +#endif + +/* andnot */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_andnot (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_andnot(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(b_.sse_m128i, a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = a_.i32f & ~b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & ~b_.i32f[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_andnot(a, b) simde_wasm_v128_andnot((a), (b)) +#endif + +/* bitselect */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_bitselect (simde_v128_t a, simde_v128_t b, simde_v128_t mask) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_bitselect(a, b, mask); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + mask_ = simde_v128_to_private(mask), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_ternarylogic_epi32(mask_.sse_m128i, a_.sse_m128i, b_.sse_m128i, 0xca); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128 (mask_.sse_m128i, a_.sse_m128i), + _mm_andnot_si128(mask_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32f = (a_.i32f & mask_.i32f) | (b_.i32f & ~mask_.i32f); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = (a_.i32f[i] & mask_.i32f[i]) | (b_.i32f[i] & ~mask_.i32f[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_bitselect(a, b, c) simde_wasm_v128_bitselect((a), (b), (c)) +#endif + +/* bitmask */ + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i8x16_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(a_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i8[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_bitmask(a) simde_wasm_i8x16_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i16x8_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_epi8(_mm_packs_epi16(a_.sse_m128i, _mm_setzero_si128()))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint16_t md[8] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + uint16x8_t extended = vreinterpretq_u16_s16(vshrq_n_s16(a_.neon_i16, 15)); + uint16x8_t masked = vandq_u16(vld1q_u16(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(masked); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(masked)); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 112, 96, 80, 64, 48, 32, 16, 0, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i16[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_bitmask(a) simde_wasm_i16x8_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i32x4_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_ps(a_.sse_m128)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_bitmask(a) simde_wasm_i32x4_bitmask((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_wasm_i64x2_bitmask (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_bitmask(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + uint32_t r = 0; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r = HEDLEY_STATIC_CAST(uint32_t, _mm_movemask_pd(a_.sse_m128d)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(uint32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(uint32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.i64[i] < 0) << i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_bitmask(a) simde_wasm_i64x2_bitmask((a)) +#endif + +/* abs */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi8(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_VECTOR_SCALAR) + __typeof__(r_.i8) mask = HEDLEY_REINTERPRET_CAST(__typeof__(mask), a_.i8 < 0); + r_.i8 = (-a_.i8 & mask) | (a_.i8 & ~mask); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT8_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_abs(a) simde_wasm_i8x16_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi16(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT8_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_abs(a) simde_wasm_i16x8_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_abs_epi32(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32) z = { 0, }; + __typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < z); + r_.i32 = (-a_.i32 & m) | (a_.i32 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_abs(a) simde_wasm_i32x4_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_abs_epi64(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vabsq_s64(a_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_abs(a_.altivec_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i64) z = { 0, }; + __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); + r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_abs(a) simde_wasm_i64x2_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_signbit(a_.f32[i]) ? -a_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_abs(a) simde_wasm_f32x4_abs((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_abs (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_abs(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_andnot_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_signbit(a_.f64[i]) ? -a_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_abs(a) simde_wasm_f64x2_abs((a)) +#endif + +/* neg */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi8(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vnegq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_i8 = vec_neg(a_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = -a_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = -a_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_neg(a) simde_wasm_i8x16_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi16(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vnegq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i16 = vec_neg(a_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = -a_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = -a_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_neg(a) simde_wasm_i16x8_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi32(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vnegq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i32 = vec_neg(a_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = -a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = -a_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_neg(a) simde_wasm_i32x4_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi64(_mm_setzero_si128(), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vnegq_s64(a_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_neg(a_.altivec_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = -a_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = -a_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_neg(a) simde_wasm_i64x2_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << 31)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_neg(a) simde_wasm_f32x4_neg((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_neg (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_neg(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_xor_si128(_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << 63)), a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_neg(a) simde_wasm_f64x2_neg((a)) +#endif + +/* any_true */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_v128_any_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_any_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + simde_bool r = 0; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r = !_mm_test_all_zeros(a_.sse_m128i, _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r = _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) != 0xffff; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = !!vmaxvq_u32(a_.neon_u32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t tmp = vpmax_u32(vget_low_u32(a_.u32), vget_high_u32(a_.u32)); + r = vget_lane_u32(tmp, 0); + r |= vget_lane_u32(tmp, 1); + r = !!r; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = HEDLEY_STATIC_CAST(simde_bool, vec_any_ne(a_.altivec_i32, vec_splats(0))); + #else + int_fast32_t ri = 0; + SIMDE_VECTORIZE_REDUCTION(|:ri) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + ri |= (a_.i32f[i]); + } + r = !!ri; + #endif + + return r; + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_any_true(a) simde_wasm_v128_any_true((a)) +#endif + +/* all_true */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i8x16_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi8(a_.sse_m128i, _mm_set1_epi8(INT8_C(0))), _mm_set1_epi8(~INT8_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_epi8(_mm_cmpeq_epi8(a_.sse_m128i, _mm_setzero_si128())) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u8(vceqzq_u8(a_.neon_u8)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t zeroes = vdupq_n_u8(0); + uint8x16_t false_set = vceqq_u8(a_.neon_u8, vdupq_n_u8(0)); + uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u8(false_set), vreinterpretq_u32_u8(zeroes)); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(signed char, 0)))); + #else + int8_t r = !INT8_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r &= !!(a_.i8[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_all_true(a) simde_wasm_i8x16_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i16x8_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi16(~INT16_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_epi8(_mm_cmpeq_epi16(a_.sse_m128i, _mm_setzero_si128())) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u16(vceqzq_u16(a_.neon_u16)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t zeroes = vdupq_n_u16(0); + uint16x8_t false_set = vceqq_u16(a_.neon_u16, vdupq_n_u16(0)); + uint32x4_t d_all_true = vceqq_u32(vreinterpretq_u32_u16(false_set), vreinterpretq_u32_u16(zeroes)); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(signed short, 0)))); + #else + int16_t r = !INT16_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r &= !!(a_.i16[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_all_true(a) simde_wasm_i16x8_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i32x4_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a_.sse_m128i, _mm_setzero_si128()))) == 0; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vmaxvq_u32(vceqzq_u32(a_.neon_u32)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t d_all_true = vmvnq_u32(vceqq_u32(a_.neon_u32, vdupq_n_u32(0))); + uint32x2_t q_all_true = vpmin_u32(vget_low_u32(d_all_true), vget_high_u32(d_all_true)); + + return !!( + vget_lane_u32(q_all_true, 0) & + vget_lane_u32(q_all_true, 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(signed int, 0)))); + #else + int32_t r = !INT32_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r &= !!(a_.i32[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_all_true(a) simde_wasm_i32x4_all_true((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_bool +simde_wasm_i64x2_all_true (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) && defined(__wasm_unimplemented_simd128__) + return wasm_i64x2_all_true(a); + #else + simde_v128_private a_ = simde_v128_to_private(a); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(_mm_cmpeq_epi64(a_.sse_m128i, _mm_setzero_si128()), _mm_set1_epi32(~INT32_C(0))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(_mm_cmpeq_pd(a_.sse_m128d, _mm_setzero_pd())) == 0; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return HEDLEY_STATIC_CAST(simde_bool, vec_all_ne(a_.altivec_i64, HEDLEY_REINTERPRET_CAST(__typeof__(a_.altivec_i64), vec_splats(0)))); + #else + int64_t r = !INT32_C(0); + + SIMDE_VECTORIZE_REDUCTION(&:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r &= !!(a_.i64[i]); + } + + return r; + #endif + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(__wasm_unimplemented_simd128__)) + #define wasm_i64x2_all_true(a) simde_wasm_i64x2_all_true((a)) +#endif + +/* shl */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_sl(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i8 = a_.i8 << (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_shl(a, count) simde_wasm_i8x16_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sl(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i16 = a_.i16 << (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_shl(a, count) simde_wasm_i16x8_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i32 = a_.i32 << (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_shl(a, count) simde_wasm_i32x4_shl((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shl (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_i64x2_shl(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i64 = a_.i64 << (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_shl(a, count) simde_wasm_i64x2_shl((a), (count)) +#endif + +/* shr */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vshlq_s8(a_.neon_i8, vdupq_n_s8(-HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_sra(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i8 = a_.i8 >> (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_shr(a, count) simde_wasm_i8x16_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(-HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i16 = a_.i16 >> (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_shr(a, count) simde_wasm_i16x8_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_sra_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i32 = a_.i32 >> (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_shr(a, count) simde_wasm_i32x4_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_i64x2_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_sra_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshlq_s64(a_.neon_i64, vdupq_n_s64(-HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.i64 = a_.i64 >> (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_shr(a, count) simde_wasm_i64x2_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vshlq_u8(a_.neon_u8, vdupq_n_s8(-HEDLEY_STATIC_CAST(int8_t, count & 7))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_sr(a_.altivec_u8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, count & 7))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u8 = a_.u8 >> (count & 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> (count & 7)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_shr(a, count) simde_wasm_u8x16_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a_.sse_m128i, _mm_cvtsi32_si128(count & 15)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(-HEDLEY_STATIC_CAST(int16_t, count & 15))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_sra(a_.altivec_i16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, count & 15))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u16 = a_.u16 >> (count & 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> (count & 15)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_shr(a, count) simde_wasm_u16x8_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_srl_epi32(a_.sse_m128i, _mm_cvtsi32_si128(count & 31)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(-HEDLEY_STATIC_CAST(int32_t, count & 31))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_sra(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, count & 31))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u32 = a_.u32 >> (count & 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> (count & 31)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_shr(a, count) simde_wasm_u32x4_shr((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_shr (simde_v128_t a, uint32_t count) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + #if defined(SIMDE_BUG_CLANG_60655) + count = count & 63; + #endif + return wasm_u64x2_shr(a, count); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_srl_epi64(a_.sse_m128i, _mm_cvtsi32_si128(count & 63)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-HEDLEY_STATIC_CAST(int64_t, count & 63))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_sra(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, count & 63))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && defined(SIMDE_VECTOR_SCALAR) + r_.u64 = a_.u64 >> (count & 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> (count & 63)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_shr(a, count) simde_wasm_u64x2_shr((a), (count)) +#endif + +/* add */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_add(a, b) simde_wasm_i8x16_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_add(a, b) simde_wasm_i16x8_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_add(a, b) simde_wasm_i32x4_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_add_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_add(a, b) simde_wasm_i64x2_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_add_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_add(a, b) simde_wasm_f32x4_add((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_add (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_add(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_add_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_add(a, b) simde_wasm_f64x2_add((a), (b)) +#endif + +/* sub */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_sub(a, b) simde_wasm_i8x16_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_sub(a, b) simde_wasm_i16x8_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_sub(a, b) simde_wasm_i32x4_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_sub_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_sub(a, b) simde_wasm_i64x2_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_sub_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_sub(a, b) simde_wasm_f32x4_sub((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_sub (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_sub(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_sub_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_sub(a, b) simde_wasm_f64x2_sub((a), (b)) +#endif + +/* mul */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_pack( + vec_mule(a_.altivec_i16, b_.altivec_i16), + vec_mulo(a_.altivec_i16, b_.altivec_i16) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i16 = a_.i16 * b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] * b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_mul(a, b) simde_wasm_i16x8_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_mullo_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = a_.i32 * b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] * b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_mul(a, b) simde_wasm_i32x4_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.sse_m128i = _mm_mullo_epi64(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_mul(a, b) simde_wasm_i64x2_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_mul_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_mul(a, b) simde_wasm_f32x4_mul((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_mul (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_mul(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_mul_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_mul(a, b) simde_wasm_f64x2_mul((a), (b)) +#endif + +/* q15mulr_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_q15mulr_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_q15mulr_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + /* https://github.com/WebAssembly/simd/pull/365 */ + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqrdmulhq_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + tmp += UINT32_C(0x4000); + tmp >>= 15; + r_.i16[i] = (tmp < INT16_MIN) ? INT16_MIN : ((tmp > INT16_MAX) ? (INT16_MAX) : HEDLEY_STATIC_CAST(int16_t, tmp)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_q15mulr_sat(a, b) simde_wasm_i16x8_q15mulr_sat((a), (b)) +#endif + +/* min */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmplt_epi8(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_min(a, b) simde_wasm_i8x16_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_min_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_min(a, b) simde_wasm_i16x8_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmplt_epi32(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_min(a, b) simde_wasm_i32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_min_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_min(a, b) simde_wasm_u8x16_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.sse_m128i = _mm_sub_epi16(a, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_min(a, b) simde_wasm_u16x8_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_min_epu32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i difference = _mm_sub_epi32(a_.sse_m128i, b_.sse_m128i); + __m128i m = + _mm_cmpeq_epi32( + /* _mm_subs_epu32(a_.sse_m128i, b_.sse_m128i) */ + _mm_and_si128( + difference, + _mm_xor_si128( + _mm_cmpgt_epi32( + _mm_xor_si128(difference, i32_min), + _mm_xor_si128(a_.sse_m128i, i32_min) + ), + _mm_set1_epi32(~INT32_C(0)) + ) + ), + _mm_setzero_si128() + ); + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(m, a_.sse_m128i), + _mm_andnot_si128(m, b_.sse_m128i) + ); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_min(a, b) simde_wasm_u32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L202 + simde_v128_private scratch; + scratch.sse_m128 = a_.sse_m128; + scratch.sse_m128 = _mm_min_ps(scratch.sse_m128, b_.sse_m128); + r_.sse_m128 = b_.sse_m128; + r_.sse_m128 = _mm_min_ps(r_.sse_m128, a_.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128 = _mm_cmpunord_ps(r_.sse_m128, scratch.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128i = _mm_srli_epi32(r_.sse_m128i, 10); + r_.sse_m128 = _mm_andnot_ps(r_.sse_m128, scratch.sse_m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_WASM_SIMD128_FMIN(a_.f32[i], b_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_min(a, b) simde_wasm_f32x4_min((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_min (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_min(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L263 + simde_v128_private scratch; + scratch.sse_m128d = a_.sse_m128d; + scratch.sse_m128d = _mm_min_pd(scratch.sse_m128d, b_.sse_m128d); + r_.sse_m128d = b_.sse_m128d; + r_.sse_m128d = _mm_min_pd(r_.sse_m128d, a_.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128d = _mm_cmpunord_pd(r_.sse_m128d, scratch.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128i = _mm_srli_epi64(r_.sse_m128i, 13); + r_.sse_m128d = _mm_andnot_pd(r_.sse_m128d, scratch.sse_m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = SIMDE_WASM_SIMD128_FMIN(a_.f64[i], b_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_min(a, b) simde_wasm_f64x2_min((a), (b)) +#endif + +/* max */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + r_.i8 = (m & a_.i8) | (~m & b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_max(a, b) simde_wasm_i8x16_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_max_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16); + r_.i16 = (m & a_.i16) | (~m & b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_max(a, b) simde_wasm_i16x8_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + r_.i32 = (m & a_.i32) | (~m & b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_max(a, b) simde_wasm_i32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_max_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u8) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8); + r_.u8 = (m & a_.u8) | (~m & b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_max(a, b) simde_wasm_u8x16_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + r_.sse_m128i = _mm_add_epi16(b, _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u16) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16); + r_.u16 = (m & a_.u16) | (~m & b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_max(a, b) simde_wasm_u16x8_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_max_epu32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-886057227 */ + __m128i m = + _mm_xor_si128( + _mm_cmpgt_epi32(a_.sse_m128i, b_.sse_m128i), + _mm_srai_epi32(_mm_xor_si128(a_.sse_m128i, b_.sse_m128i), 31) + ); + r_.sse_m128i = _mm_or_si128(_mm_and_si128(m, a_.sse_m128i), _mm_andnot_si128(m, b_.sse_m128i)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.u32) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32); + r_.u32 = (m & a_.u32) | (~m & b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_max(a, b) simde_wasm_u32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L231 + simde_v128_private scratch; + scratch.sse_m128 = a_.sse_m128; + scratch.sse_m128 = _mm_max_ps(scratch.sse_m128, b_.sse_m128); + r_.sse_m128 = b_.sse_m128; + r_.sse_m128 = _mm_max_ps(r_.sse_m128, a_.sse_m128); + r_.sse_m128 = _mm_xor_ps(r_.sse_m128, scratch.sse_m128); + scratch.sse_m128 = _mm_or_ps(scratch.sse_m128, r_.sse_m128); + scratch.sse_m128 = _mm_sub_ps(scratch.sse_m128, r_.sse_m128); + r_.sse_m128 = _mm_cmpunord_ps(r_.sse_m128, scratch.sse_m128); + r_.sse_m128i = _mm_srli_epi32(r_.sse_m128i, 10); + r_.sse_m128 = _mm_andnot_ps(r_.sse_m128, scratch.sse_m128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_WASM_SIMD128_FMAX(a_.f32[i], b_.f32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_max(a, b) simde_wasm_f32x4_max((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_max (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_max(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + // Inspired by https://github.com/v8/v8/blob/c750b6c85bd1ad1d27f7acc1812165f465515144/src/codegen/shared-ia32-x64/macro-assembler-shared-ia32-x64.cc#L301 + simde_v128_private scratch; + scratch.sse_m128d = a_.sse_m128d; + scratch.sse_m128d = _mm_max_pd(scratch.sse_m128d, b_.sse_m128d); + r_.sse_m128d = b_.sse_m128d; + r_.sse_m128d = _mm_max_pd(r_.sse_m128d, a_.sse_m128d); + r_.sse_m128d = _mm_xor_pd(r_.sse_m128d, scratch.sse_m128d); + scratch.sse_m128d = _mm_or_pd(scratch.sse_m128d, r_.sse_m128d); + scratch.sse_m128d = _mm_sub_pd(scratch.sse_m128d, r_.sse_m128d); + r_.sse_m128d = _mm_cmpunord_pd(r_.sse_m128d, scratch.sse_m128d); + r_.sse_m128i = _mm_srli_epi64(r_.sse_m128i, 13); + r_.sse_m128d = _mm_andnot_pd(r_.sse_m128d, scratch.sse_m128d); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = SIMDE_WASM_SIMD128_FMAX(a_.f64[i], b_.f64[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_max(a, b) simde_wasm_f64x2_max((a), (b)) +#endif + +/* add_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(a_.u8) r1, r2, m; + r1 = a_.u8 + b_.u8; + r2 = (a_.u8 >> 7) + INT8_MAX; + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r2 ^ b_.u8) | ~(b_.u8 ^ r1)) < 0); + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (r1 & m) | (r2 & ~m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_add_sat(a, b) simde_wasm_i8x16_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(a_.u16) r1, r2, m; + r1 = a_.u16 + b_.u16; + r2 = (a_.u16 >> 15) + INT16_MAX; + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r2 ^ b_.u16) | ~(b_.u16 ^ r1)) < 0); + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (r1 & m) | (r2 & ~m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_add_sat(a, b) simde_wasm_i16x8_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 + b_.u8; + r_.u8 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 < a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_add_sat(a, b) simde_wasm_u8x16_add_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_add_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_add_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_adds_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 + b_.u16; + r_.u16 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 < a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_add_sat(a, b) simde_wasm_u16x8_add_sat((a), (b)) +#endif + +/* avgr */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_avgr (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_avgr(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_avg_epu8(a_.sse_m128i, b_.sse_m128i); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_avgr(a, b) simde_wasm_u8x16_avgr((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_avgr (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_avgr(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_avg_epu16(a_.sse_m128i, b_.sse_m128i); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_avgr(a, b) simde_wasm_u16x8_avgr((a), (b)) +#endif + +/* sub_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epi8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_subs(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.i8) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (b_.i8 > a_.i8) ^ INT8_MAX); + const __typeof__(r_.i8) diff = a_.i8 - b_.i8; + const __typeof__(r_.i8) saturate = diff_sat ^ diff; + const __typeof__(r_.i8) m = saturate >> 7; + r_.i8 = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_sub_sat(a, b) simde_wasm_i8x16_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_subs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + const __typeof__(r_.i16) diff_sat = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (b_.i16 > a_.i16) ^ INT16_MAX); + const __typeof__(r_.i16) diff = a_.i16 - b_.i16; + const __typeof__(r_.i16) saturate = diff_sat ^ diff; + const __typeof__(r_.i16) m = saturate >> 15; + r_.i16 = (diff_sat & m) | (diff & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_sub_sat(a, b) simde_wasm_i16x8_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epu8(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u8 = a_.u8 - b_.u8; + r_.u8 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), r_.u8 <= a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_sub_sat(a, b) simde_wasm_u8x16_sub_sat((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_sub_sat (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_sub_sat(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_subs_epu16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 - b_.u16; + r_.u16 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), r_.u16 <= a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_sub_sat(a, b) simde_wasm_u16x8_sub_sat((a), (b)) +#endif + +/* pmin */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_pmin (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_pmin(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_min_ps(b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = + vbslq_f32( + vcltq_f32(b_.neon_f32, a_.neon_f32), + b_.neon_f32, + a_.neon_f32 + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = + vec_sel( + a_.altivec_f32, + b_.altivec_f32, + vec_cmpgt(a_.altivec_f32, b_.altivec_f32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (b_.f32[i] < a_.f32[i]) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_pmin(a, b) simde_wasm_f32x4_pmin((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_pmin (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_pmin(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_min_pd(b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = + vbslq_f64( + vcltq_f64(b_.neon_f64, a_.neon_f64), + b_.neon_f64, + a_.neon_f64 + ); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = + vec_sel( + a_.altivec_f32, + b_.altivec_f32, + vec_cmpgt(a_.altivec_f32, b_.altivec_f32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (b_.f64[i] < a_.f64[i]) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_pmin(a, b) simde_wasm_f64x2_pmin((a), (b)) +#endif + +/* pmax */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_pmax (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_pmax(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_max_ps(b_.sse_m128, a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcltq_f32(a_.neon_f32, b_.neon_f32), b_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + int32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( + ( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32)) | + (~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32)) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_pmax(a, b) simde_wasm_f32x4_pmax((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_pmax (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_pmax(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_max_pd(b_.sse_m128d, a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vbslq_f64(vcltq_f64(a_.neon_f64, b_.neon_f64), b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sel(a_.altivec_f64, b_.altivec_f64, vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + int64_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64 < b_.f64); + r_.f64 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f64), + ( + ( m & HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f64)) | + (~m & HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f64)) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_pmax(a, b) simde_wasm_f64x2_pmax((a), (b)) +#endif + +/* div */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_div (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_div(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_div_ps(a_.sse_m128, b_.sse_m128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_div(a, b) simde_wasm_f32x4_div((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_div (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_div(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_div_pd(a_.sse_m128d, b_.sse_m128d); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_div(a, b) simde_wasm_f64x2_div((a), (b)) +#endif + +/* shuffle */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7, + const int c8, const int c9, const int c10, const int c11, const int c12, const int c13, const int c14, const int c15) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i8[ 0] = ( c0 < 16) ? a_.i8[ c0] : b_.i8[ c0 & 15]; + r_.i8[ 1] = ( c1 < 16) ? a_.i8[ c1] : b_.i8[ c1 & 15]; + r_.i8[ 2] = ( c2 < 16) ? a_.i8[ c2] : b_.i8[ c2 & 15]; + r_.i8[ 3] = ( c3 < 16) ? a_.i8[ c3] : b_.i8[ c3 & 15]; + r_.i8[ 4] = ( c4 < 16) ? a_.i8[ c4] : b_.i8[ c4 & 15]; + r_.i8[ 5] = ( c5 < 16) ? a_.i8[ c5] : b_.i8[ c5 & 15]; + r_.i8[ 6] = ( c6 < 16) ? a_.i8[ c6] : b_.i8[ c6 & 15]; + r_.i8[ 7] = ( c7 < 16) ? a_.i8[ c7] : b_.i8[ c7 & 15]; + r_.i8[ 8] = ( c8 < 16) ? a_.i8[ c8] : b_.i8[ c8 & 15]; + r_.i8[ 9] = ( c9 < 16) ? a_.i8[ c9] : b_.i8[ c9 & 15]; + r_.i8[10] = (c10 < 16) ? a_.i8[c10] : b_.i8[c10 & 15]; + r_.i8[11] = (c11 < 16) ? a_.i8[c11] : b_.i8[c11 & 15]; + r_.i8[12] = (c12 < 16) ? a_.i8[c12] : b_.i8[c12 & 15]; + r_.i8[13] = (c13 < 16) ? a_.i8[c13] : b_.i8[c13 & 15]; + r_.i8[14] = (c14 < 16) ? a_.i8[c14] : b_.i8[c14 & 15]; + r_.i8[15] = (c15 < 16) ? a_.i8[c15] : b_.i8[c15 & 15]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i8x16_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(8, 16, \ + HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int8_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i8x16_shuffle(a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7, \ + c8, c9, c10, c11, c12, c13, c14, c15) \ + simde_wasm_i8x16_shuffle((a), (b), \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7), \ + (c8), (c9), (c10), (c11), (c12), (c13), (c14), (c15)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3, const int c4, const int c5, const int c6, const int c7) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i16[ 0] = (c0 < 8) ? a_.i16[ c0] : b_.i16[ c0 & 7]; + r_.i16[ 1] = (c1 < 8) ? a_.i16[ c1] : b_.i16[ c1 & 7]; + r_.i16[ 2] = (c2 < 8) ? a_.i16[ c2] : b_.i16[ c2 & 7]; + r_.i16[ 3] = (c3 < 8) ? a_.i16[ c3] : b_.i16[ c3 & 7]; + r_.i16[ 4] = (c4 < 8) ? a_.i16[ c4] : b_.i16[ c4 & 7]; + r_.i16[ 5] = (c5 < 8) ? a_.i16[ c5] : b_.i16[ c5 & 7]; + r_.i16[ 6] = (c6 < 8) ? a_.i16[ c6] : b_.i16[ c6 & 7]; + r_.i16[ 7] = (c7 < 8) ? a_.i16[ c7] : b_.i16[ c7 & 7]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i16x8_shuffle( \ + a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(16, 16, \ + HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int16_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3, c4, c5, c6, c7)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i16x8_shuffle(a, b, \ + c0, c1, c2, c3, c4, c5, c6, c7) \ + simde_wasm_i16x8_shuffle((a), (b), \ + (c0), (c1), (c2), (c3), (c4), (c5), (c6), (c7)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1, const int c2, const int c3) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i32[ 0] = (c0 < 4) ? a_.i32[ c0] : b_.i32[ c0 & 3]; + r_.i32[ 1] = (c1 < 4) ? a_.i32[ c1] : b_.i32[ c1 & 3]; + r_.i32[ 2] = (c2 < 4) ? a_.i32[ c2] : b_.i32[ c2 & 3]; + r_.i32[ 3] = (c3 < 4) ? a_.i32[ c3] : b_.i32[ c3 & 3]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) \ + wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i32x4_shuffle( \ + a, b, \ + c0, c1, c2, c3) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(32, 16, \ + HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int32_t SIMDE_VECTOR(16), b), \ + c0, c1, c2, c3)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i32x4_shuffle(a, b, \ + c0, c1, c2, c3) \ + simde_wasm_i32x4_shuffle((a), (b), \ + (c0), (c1), (c2), (c3)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_shuffle ( + simde_v128_t a, simde_v128_t b, + const int c0, const int c1) { + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + r_.i64[ 0] = (c0 < 2) ? a_.i64[ c0] : b_.i64[ c0 & 1]; + r_.i64[ 1] = (c1 < 2) ? a_.i64[ c1] : b_.i64[ c1 & 1]; + + return simde_v128_from_private(r_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define \ + simde_wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) \ + wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define \ + simde_wasm_i64x2_shuffle( \ + a, b, \ + c0, c1) \ + (__extension__ ({ \ + HEDLEY_REINTERPRET_CAST(simde_v128_t, SIMDE_SHUFFLE_VECTOR_(64, 16, \ + HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), a), \ + HEDLEY_REINTERPRET_CAST(int64_t SIMDE_VECTOR(16), b), \ + c0, c1)); \ + })) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define \ + wasm_i64x2_shuffle(a, b, \ + c0, c1) \ + simde_wasm_i64x2_shuffle((a), (b), \ + (c0), (c1)) +#endif + +/* swizzle */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_swizzle (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_swizzle(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8x2_t tmp = { { vget_low_s8(a_.neon_i8), vget_high_s8(a_.neon_i8) } }; + r_.neon_i8 = vcombine_s8( + vtbl2_s8(tmp, vget_low_s8(b_.neon_i8)), + vtbl2_s8(tmp, vget_high_s8(b_.neon_i8)) + ); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + /* https://github.com/WebAssembly/simd/issues/68#issuecomment-470825324 */ + r_.sse_m128i = + _mm_shuffle_epi8( + a_.sse_m128i, + _mm_adds_epu8( + _mm_set1_epi8(0x70), + b_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_perm( + a_.altivec_i8, + a_.altivec_i8, + b_.altivec_u8 + ); + r_.altivec_i8 = vec_and(r_.altivec_i8, vec_cmple(b_.altivec_u8, vec_splat_u8(15))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.u8[i] > 15) ? INT8_C(0) : a_.i8[b_.u8[i]]; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_swizzle(a, b) simde_wasm_i8x16_swizzle((a), (b)) +#endif + +/* narrow */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_narrow_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_narrow_i16x8(a, b) simde_wasm_i8x16_narrow_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_narrow_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_narrow_i32x4(a, b) simde_wasm_i16x8_narrow_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u8x16_narrow_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u8x16_narrow_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packus_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u8x16_narrow_i16x8(a, b) simde_wasm_u8x16_narrow_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_narrow_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_narrow_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_packus_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a_.sse_m128i, 31), a_.sse_m128i); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b_.sse_m128i, 31), b_.sse_m128i); + r_.sse_m128i = + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_narrow_i32x4(a, b) simde_wasm_u16x8_narrow_i32x4((a), (b)) +#endif + +/* demote */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_demote_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_demote_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cvtpd_ps(a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_floate(a_.altivec_f64); + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_pack( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_f32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0)) + ) + ); + #else + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0x00, 0x01, 0x02, 0x03, /* 0 */ + 0x08, 0x09, 0x0a, 0x0b, /* 2 */ + 0x10, 0x11, 0x12, 0x13, /* 4 */ + 0x18, 0x19, 0x1a, 0x1b /* 6 */ + }; + r_.altivec_f32 = vec_perm(r_.altivec_f32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_splat_s32(0)), perm); + #endif + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = __builtin_shufflevector(__builtin_convertvector(a_.f64, __typeof__(z)), z, 0, 1, 2, 3); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_demote_f64x2_zero(a) simde_wasm_f32x4_demote_f64x2_zero((a)) +#endif + +/* extend_low */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extend_low_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extend_low_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmovl_s8(vget_low_s8(a_.neon_i8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi8_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_sra( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergeh(a_.altivec_i8, a_.altivec_i8)), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int8_t v SIMDE_VECTOR(8) = { + a_.i8[0], a_.i8[1], a_.i8[2], a_.i8[3], + a_.i8[4], a_.i8[5], a_.i8[6], a_.i8[7] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extend_low_i8x16(a) simde_wasm_i16x8_extend_low_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extend_low_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extend_low_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi16_epi32(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergeh(a_.altivec_i16, a_.altivec_i16)), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int16_t v SIMDE_VECTOR(8) = { a_.i16[0], a_.i16[1], a_.i16[2], a_.i16[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extend_low_i16x8(a) simde_wasm_i32x4_extend_low_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extend_low_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extend_low_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi32_epi64(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_unpacklo_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i64 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergeh(a_.altivec_i32, a_.altivec_i32)), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32)) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mergeh( + a_.altivec_i32, + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(int), + vec_cmpgt(vec_splat_s32(0), a_.altivec_i32) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int32_t v SIMDE_VECTOR(8) = { a_.i32[0], a_.i32[1] }; + + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extend_low_i32x4(a) simde_wasm_i64x2_extend_low_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extend_low_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extend_low_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmovl_u8(vget_low_u8(a_.neon_u8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu8_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_mergeh(a_.altivec_i8, vec_splat_s8(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint8_t v SIMDE_VECTOR(8) = { + a_.u8[0], a_.u8[1], a_.u8[2], a_.u8[3], + a_.u8[4], a_.u8[5], a_.u8[6], a_.u8[7] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extend_low_u8x16(a) simde_wasm_u16x8_extend_low_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extend_low_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extend_low_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu16_epi32(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi32(_mm_unpacklo_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_mergeh(a_.altivec_i16, vec_splat_s16(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint16_t v SIMDE_VECTOR(8) = { a_.u16[0], a_.u16[1], a_.u16[2], a_.u16[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extend_low_u16x8(a) simde_wasm_u32x4_extend_low_u16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extend_low_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extend_low_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu32_epi64(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i =_mm_unpacklo_epi32(a_.sse_m128i, _mm_setzero_si128()); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_mergeh(a_.altivec_i32, vec_splat_s32(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint32_t v SIMDE_VECTOR(8) = { a_.u32[0], a_.u32[1] }; + + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(int64_t, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extend_low_u32x4(a) simde_wasm_u64x2_extend_low_u32x4((a)) +#endif + +/* promote */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_promote_low_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_promote_low_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128d = _mm_cvtps_pd(a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_unpackh(a_.altivec_f32); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.f32, a_.f32, 0, 1), __typeof__(r_.f64)); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[0]); + r_.f64[1] = HEDLEY_STATIC_CAST(simde_float64, a_.f32[1]); + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_promote_low_f32x4(a) simde_wasm_f64x2_promote_low_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extend_high_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extend_high_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmovl_s8(vget_high_s8(a_.neon_i8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_sra( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(short), vec_mergel(a_.altivec_i8, a_.altivec_i8)), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 8) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int8_t v SIMDE_VECTOR(8) = { + a_.i8[ 8], a_.i8[ 9], a_.i8[10], a_.i8[11], + a_.i8[12], a_.i8[13], a_.i8[14], a_.i8[15] + }; + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extend_high_i8x16(a) simde_wasm_i16x8_extend_high_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extend_high_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extend_high_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_high_s16(a_.neon_i16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srai_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int), vec_mergel(a_.altivec_i16, a_.altivec_i16)), + vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int16_t v SIMDE_VECTOR(8) = { a_.i16[4], a_.i16[5], a_.i16[6], a_.i16[7] }; + + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extend_high_i16x8(a) simde_wasm_i32x4_extend_high_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extend_high_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extend_high_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_high_s32(a_.neon_i32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepi32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_unpackhi_epi32(a_.sse_m128i, _mm_cmpgt_epi32(_mm_setzero_si128(), a_.sse_m128i)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i64 = + vec_sra(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_mergel(a_.altivec_i32, a_.altivec_i32)), + vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 32)) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mergel( + a_.altivec_i32, + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(int), + vec_cmpgt(vec_splat_s32(0), a_.altivec_i32) + ) + ); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const int32_t v SIMDE_VECTOR(8) = { a_.i32[2], a_.i32[3] }; + + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extend_high_i32x4(a) simde_wasm_i64x2_extend_high_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extend_high_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extend_high_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmovl_u8(vget_high_u8(a_.neon_u8)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu8_epi16(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_mergel(a_.altivec_i8, vec_splat_s8(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint8_t v SIMDE_VECTOR(8) = { + a_.u8[ 8], a_.u8[ 9], a_.u8[10], a_.u8[11], + a_.u8[12], a_.u8[13], a_.u8[14], a_.u8[15] + }; + + SIMDE_CONVERT_VECTOR_(r_.u16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extend_high_u8x16(a) simde_wasm_u16x8_extend_high_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extend_high_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extend_high_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_high_u16(a_.neon_u16)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu16_epi32(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_srli_epi32(_mm_unpackhi_epi16(a_.sse_m128i, a_.sse_m128i), 16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_mergel(a_.altivec_i16, vec_splat_s16(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint16_t v SIMDE_VECTOR(8) = { a_.u16[4], a_.u16[5], a_.u16[6], a_.u16[7] }; + + SIMDE_CONVERT_VECTOR_(r_.u32, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extend_high_u16x8(a) simde_wasm_u32x4_extend_high_u16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extend_high_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extend_high_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_high_u32(a_.neon_u32)); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = _mm_cvtepu32_epi64(_mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 2, 3, 2))); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i =_mm_unpackhi_epi32(a_.sse_m128i, _mm_setzero_si128()); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_mergel(a_.altivec_i32, vec_splat_s32(0)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + const uint32_t v SIMDE_VECTOR(8) = { a_.u32[2], a_.u32[3] }; + + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extend_high_u32x4(a) simde_wasm_u64x2_extend_high_u32x4((a)) +#endif + +/* extmul_low */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extmul_low_i8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extmul_low_i8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmull_s8(vget_low_s8(a_.neon_i8), vget_low_s8(b_.neon_i8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i8, a_.altivec_i8); + bshuf = vec_mergeh(b_.altivec_i8, b_.altivec_i8); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + }; + ashuf = vec_perm(a_.altivec_i8, a_.altivec_i8, perm); + bshuf = vec_perm(b_.altivec_i8, b_.altivec_i8, perm); + #endif + + r_.altivec_i16 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mullo_epi16( + _mm_srai_epi16(_mm_unpacklo_epi8(a_.sse_m128i, a_.sse_m128i), 8), + _mm_srai_epi16(_mm_unpacklo_epi8(b_.sse_m128i, b_.sse_m128i), 8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i16 = + __builtin_convertvector( + __builtin_shufflevector(a_.i8, a_.i8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.i16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i8, b_.i8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.i16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extmul_low_i8x16(a, b) simde_wasm_i16x8_extmul_low_i8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extmul_low_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extmul_low_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed short) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i16, a_.altivec_i16); + bshuf = vec_mergeh(b_.altivec_i16, b_.altivec_i16); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 4, 5, 4, 5, + 6, 7, 6, 7 + }; + ashuf = vec_perm(a_.altivec_i16, a_.altivec_i16, perm); + bshuf = vec_perm(b_.altivec_i16, b_.altivec_i16, perm); + #endif + + r_.altivec_i32 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpacklo_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i32 = + __builtin_convertvector( + __builtin_shufflevector(a_.i16, a_.i16, 0, 1, 2, 3), + __typeof__(r_.i32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i16, b_.i16, 0, 1, 2, 3), + __typeof__(r_.i32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extmul_low_i16x8(a, b) simde_wasm_i32x4_extmul_low_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extmul_low_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extmul_low_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmull_s32(vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_i32, a_.altivec_i32); + bshuf = vec_mergeh(b_.altivec_i32, b_.altivec_i32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 0, 1, 2, 3, + 4, 5, 6, 7, 4, 5, 6, 7 + }; + ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm); + bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm); + #endif + + r_.altivec_i64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_mul_epi32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i64 = + __builtin_convertvector( + __builtin_shufflevector(a_.i32, a_.i32, 0, 1), + __typeof__(r_.i64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i32, b_.i32, 0, 1), + __typeof__(r_.i64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extmul_low_i32x4(a, b) simde_wasm_i64x2_extmul_low_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extmul_low_u8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extmul_low_u8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmull_u8(vget_low_u8(a_.neon_u8), vget_low_u8(b_.neon_u8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u8, a_.altivec_u8); + bshuf = vec_mergeh(b_.altivec_u8, b_.altivec_u8); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + }; + ashuf = vec_perm(a_.altivec_u8, a_.altivec_u8, perm); + bshuf = vec_perm(b_.altivec_u8, b_.altivec_u8, perm); + #endif + + r_.altivec_u16 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u16 = + __builtin_convertvector( + __builtin_shufflevector(a_.u8, a_.u8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.u16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u8, b_.u8, 0, 1, 2, 3, 4, 5, 6, 7), + __typeof__(r_.u16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extmul_low_u8x16(a, b) simde_wasm_u16x8_extmul_low_u8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extmul_low_u16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extmul_low_u16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmull_u16(vget_low_u16(a_.neon_u16), vget_low_u16(b_.neon_u16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u16, a_.altivec_u16); + bshuf = vec_mergeh(b_.altivec_u16, b_.altivec_u16); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 0, 1, + 2, 3, 2, 3, + 4, 5, 4, 5, + 6, 7, 6, 7 + }; + ashuf = vec_perm(a_.altivec_u16, a_.altivec_u16, perm); + bshuf = vec_perm(b_.altivec_u16, b_.altivec_u16, perm); + #endif + + r_.altivec_u32 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpacklo_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u32 = + __builtin_convertvector( + __builtin_shufflevector(a_.u16, a_.u16, 0, 1, 2, 3), + __typeof__(r_.u32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u16, b_.u16, 0, 1, 2, 3), + __typeof__(r_.u32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extmul_low_u16x8(a, b) simde_wasm_u32x4_extmul_low_u16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extmul_low_u32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extmul_low_u32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmull_u32(vget_low_u32(a_.neon_u32), vget_low_u32(b_.neon_u32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergeh(a_.altivec_u32, a_.altivec_u32); + bshuf = vec_mergeh(b_.altivec_u32, b_.altivec_u32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 0, 1, 2, 3, + 4, 5, 6, 7, 4, 5, 6, 7 + }; + ashuf = vec_perm(a_.altivec_u32, a_.altivec_u32, perm); + bshuf = vec_perm(b_.altivec_u32, b_.altivec_u32, perm); + #endif + + r_.altivec_u64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mul_epu32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(1, 1, 0, 0)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u64 = + __builtin_convertvector( + __builtin_shufflevector(a_.u32, a_.u32, 0, 1), + __typeof__(r_.u64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u32, b_.u32, 0, 1), + __typeof__(r_.u64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extmul_low_u32x4(a, b) simde_wasm_u64x2_extmul_low_u32x4((a), (b)) +#endif + +/* extmul_high */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extmul_high_i8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extmul_high_i8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vmull_high_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmull_s8(vget_high_s8(a_.neon_i8), vget_high_s8(b_.neon_i8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = + vec_mule( + vec_mergel(a_.altivec_i8, a_.altivec_i8), + vec_mergel(b_.altivec_i8, b_.altivec_i8) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mullo_epi16( + _mm_srai_epi16(_mm_unpackhi_epi8(a_.sse_m128i, a_.sse_m128i), 8), + _mm_srai_epi16(_mm_unpackhi_epi8(b_.sse_m128i, b_.sse_m128i), 8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i16 = + __builtin_convertvector( + __builtin_shufflevector(a_.i8, a_.i8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.i16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i8, b_.i8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.i16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i + 8]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extmul_high_i8x16(a, b) simde_wasm_i16x8_extmul_high_i8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extmul_high_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extmul_high_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = + vec_mule( + vec_mergel(a_.altivec_i16, a_.altivec_i16), + vec_mergel(b_.altivec_i16, b_.altivec_i16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpackhi_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epi16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i32 = + __builtin_convertvector( + __builtin_shufflevector(a_.i16, a_.i16, 4, 5, 6, 7), + __typeof__(r_.i32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i16, b_.i16, 4, 5, 6, 7), + __typeof__(r_.i32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 4]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extmul_high_i16x8(a, b) simde_wasm_i32x4_extmul_high_i16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_extmul_high_i32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_extmul_high_i32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vmull_high_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmull_s32(vget_high_s32(a_.neon_i32), vget_high_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) ashuf; + SIMDE_POWER_ALTIVEC_VECTOR(signed int) bshuf; + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + ashuf = vec_mergel(a_.altivec_i32, a_.altivec_i32); + bshuf = vec_mergel(b_.altivec_i32, b_.altivec_i32); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 8, 9, 10, 11, 8, 9, 10, 11, + 12, 13, 14, 15, 12, 13, 14, 15 + }; + ashuf = vec_perm(a_.altivec_i32, a_.altivec_i32, perm); + bshuf = vec_perm(b_.altivec_i32, b_.altivec_i32, perm); + #endif + + r_.altivec_i64 = vec_mule(ashuf, bshuf); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_mul_epi32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.i64 = + __builtin_convertvector( + __builtin_shufflevector(a_.i32, a_.i32, 2, 3), + __typeof__(r_.i64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.i32, b_.i32, 2, 3), + __typeof__(r_.i64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i + 2]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_extmul_high_i32x4(a, b) simde_wasm_i64x2_extmul_high_i32x4((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extmul_high_u8x16 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extmul_high_u8x16(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u16 = vmull_high_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmull_u8(vget_high_u8(a_.neon_u8), vget_high_u8(b_.neon_u8)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = + vec_mule( + vec_mergel(a_.altivec_u8, a_.altivec_u8), + vec_mergel(b_.altivec_u8, b_.altivec_u8) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u16 = + __builtin_convertvector( + __builtin_shufflevector(a_.u8, a_.u8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.u16) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u8, b_.u8, 8, 9, 10, 11, 12, 13, 14, 15), + __typeof__(r_.u16) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[i + 8]) * HEDLEY_STATIC_CAST(uint16_t, b_.u8[i + 8]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extmul_high_u8x16(a, b) simde_wasm_u16x8_extmul_high_u8x16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extmul_high_u16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extmul_high_u16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmull_u16(vget_high_u16(a_.neon_u16), vget_high_u16(b_.neon_u16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32 = + vec_mule( + vec_mergel(a_.altivec_u16, a_.altivec_u16), + vec_mergel(b_.altivec_u16, b_.altivec_u16) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_unpackhi_epi16( + _mm_mullo_epi16(a_.sse_m128i, b_.sse_m128i), + _mm_mulhi_epu16(a_.sse_m128i, b_.sse_m128i) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u32 = + __builtin_convertvector( + __builtin_shufflevector(a_.u16, a_.u16, 4, 5, 6, 7), + __typeof__(r_.u32) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u16, b_.u16, 4, 5, 6, 7), + __typeof__(r_.u32) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[i + 4]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i + 4]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extmul_high_u16x8(a, b) simde_wasm_u32x4_extmul_high_u16x8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_extmul_high_u32x4 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_extmul_high_u32x4(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vmull_high_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmull_u32(vget_high_u32(a_.neon_u32), vget_high_u32(b_.neon_u32)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u64 = + vec_mule( + vec_mergel(a_.altivec_u32, a_.altivec_u32), + vec_mergel(b_.altivec_u32, b_.altivec_u32) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_mul_epu32( + _mm_shuffle_epi32(a_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)), + _mm_shuffle_epi32(b_.sse_m128i, _MM_SHUFFLE(3, 3, 2, 2)) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.u64 = + __builtin_convertvector( + __builtin_shufflevector(a_.u32, a_.u32, 2, 3), + __typeof__(r_.u64) + ) + * + __builtin_convertvector( + __builtin_shufflevector(b_.u32, b_.u32, 2, 3), + __typeof__(r_.u64) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i + 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i + 2]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_extmul_high_u32x4(a, b) simde_wasm_u64x2_extmul_high_u32x4((a), (b)) +#endif + +/* extadd_pairwise */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_extadd_pairwise_i8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_extadd_pairwise_i8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vpaddlq_s8(a_.neon_i8); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddw_epi8(a_.sse_m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.sse_m128i = _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a_.sse_m128i); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); + r_.altivec_i16 = + vec_add( + vec_mule(a_.altivec_i8, one), + vec_mulo(a_.altivec_i8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = + ((a_.i16 << 8) >> 8) + + ((a_.i16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_extadd_pairwise_i8x16(a) simde_wasm_i16x8_extadd_pairwise_i8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_extadd_pairwise_i16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_extadd_pairwise_i16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vpaddlq_s16(a_.neon_i16); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddd_epi16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, _mm_set1_epi16(INT8_C(1))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); + r_.altivec_i32 = + vec_add( + vec_mule(a_.altivec_i16, one), + vec_mulo(a_.altivec_i16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = + ((a_.i32 << 16) >> 16) + + ((a_.i32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_extadd_pairwise_i16x8(a) simde_wasm_i32x4_extadd_pairwise_i16x8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_extadd_pairwise_u8x16 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_extadd_pairwise_u8x16(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vpaddlq_u8(a_.neon_u8); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddw_epu8(a_.sse_m128i); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + r_.sse_m128i = _mm_maddubs_epi16(a_.sse_m128i, _mm_set1_epi8(INT8_C(1))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); + r_.altivec_u16 = + vec_add( + vec_mule(a_.altivec_u8, one), + vec_mulo(a_.altivec_u8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = + ((a_.u16 << 8) >> 8) + + ((a_.u16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_extadd_pairwise_u8x16(a) simde_wasm_u16x8_extadd_pairwise_u8x16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_extadd_pairwise_u16x8 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_extadd_pairwise_u16x8(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vpaddlq_u16(a_.neon_u16); + #elif defined(SIMDE_X86_XOP_NATIVE) + r_.sse_m128i = _mm_haddd_epu16(a_.sse_m128i); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = + _mm_add_epi32( + _mm_srli_epi32(a_.sse_m128i, 16), + _mm_and_si128(a_.sse_m128i, _mm_set1_epi32(INT32_C(0x0000ffff))) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); + r_.altivec_u32 = + vec_add( + vec_mule(a_.altivec_u16, one), + vec_mulo(a_.altivec_u16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = + ((a_.u32 << 16) >> 16) + + ((a_.u32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_extadd_pairwise_u16x8(a) simde_wasm_u32x4_extadd_pairwise_u16x8((a)) +#endif + +/* X_load_Y */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i16x8_load8x8 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i16x8_load8x8(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + int8_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_ALIGN_TO_16 int8_t v[8]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i16x8_load8x8(mem) simde_wasm_i16x8_load8x8((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_load16x4 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_load16x4(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + int16_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i32, v); + #else + SIMDE_ALIGN_TO_16 int16_t v[4]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_load16x4(mem) simde_wasm_i32x4_load16x4((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i64x2_load32x2 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i64x2_load32x2(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) && !defined(SIMDE_BUG_CLANG_50893) + int32_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.i64, v); + #else + SIMDE_ALIGN_TO_16 int32_t v[2]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i64x2_load32x2(mem) simde_wasm_i64x2_load32x2((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u16x8_load8x8 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u16x8_load8x8(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint8_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u16, v); + #else + SIMDE_ALIGN_TO_16 uint8_t v[8]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u16x8_load8x8(mem) simde_wasm_u16x8_load8x8((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_load16x4 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_load16x4(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint16_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u32, v); + #else + SIMDE_ALIGN_TO_16 uint16_t v[4]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_load16x4(mem) simde_wasm_u32x4_load16x4((mem)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u64x2_load32x2 (const void * mem) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u64x2_load32x2(mem); + #else + simde_v128_private r_; + + #if defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100762) + uint32_t v SIMDE_VECTOR(8); + simde_memcpy(&v, mem, sizeof(v)); + SIMDE_CONVERT_VECTOR_(r_.u64, v); + #else + SIMDE_ALIGN_TO_16 uint32_t v[2]; + simde_memcpy(v, mem, sizeof(v)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, v[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u64x2_load32x2(mem) simde_wasm_u64x2_load32x2((mem)) +#endif + +/* load*_zero */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_zero (const void * a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load32_zero(a); + #else + simde_v128_private r_; + + int32_t a_; + simde_memcpy(&a_, a, sizeof(a_)); + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_cvtsi32_si128(a_); + #else + r_.i32[0] = a_; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_zero(a) simde_wasm_v128_load32_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_zero (const void * a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_load64_zero(a); + #else + simde_v128_private r_; + + int64_t a_; + simde_memcpy(&a_, a, sizeof(a_)); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + r_.sse_m128i = _mm_cvtsi64_si128(a_); + #else + r_.i64[0] = a_; + r_.i64[1] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_zero(a) simde_wasm_v128_load64_zero((a)) +#endif + +/* load*_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load8_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + #if defined(SIMDE_BUG_CLANG_50901) + simde_v128_private r_ = simde_v128_to_private(vec); + r_.altivec_i8 = vec_insert(*HEDLEY_REINTERPRET_CAST(const signed char *, a), a_.altivec_i8, lane); + return simde_v128_from_private(r_); + #else + a_.i8[lane] = *HEDLEY_REINTERPRET_CAST(const int8_t *, a); + return simde_v128_from_private(a_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load8_lane(a, vec, lane) wasm_v128_load8_lane(HEDLEY_CONST_CAST(int8_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load8_lane(a, vec, lane) simde_wasm_v128_load8_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load16_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int16_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int16_t)); + a_.i16[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load16_lane(a, vec, lane) wasm_v128_load16_lane(HEDLEY_CONST_CAST(int16_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load16_lane(a, vec, lane) simde_wasm_v128_load16_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load32_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int32_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int32_t)); + a_.i32[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load32_lane(a, vec, lane) wasm_v128_load32_lane(HEDLEY_CONST_CAST(int32_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load32_lane(a, vec, lane) simde_wasm_v128_load32_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_v128_load64_lane (const void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_v128_private + a_ = simde_v128_to_private(vec); + + int64_t tmp = 0; + simde_memcpy(&tmp, a, sizeof(int64_t)); + a_.i64[lane] = tmp; + + return simde_v128_from_private(a_); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_load64_lane(a, vec, lane) wasm_v128_load64_lane(HEDLEY_CONST_CAST(int64_t *, (a)), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_load64_lane(a, vec, lane) simde_wasm_v128_load64_lane((a), (vec), (lane)) +#endif + +/* store*_lane */ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store8_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 15) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int8_t tmp = vec_.i8[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store8_lane(a, vec, lane) wasm_v128_store8_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store8_lane(a, vec, lane) simde_wasm_v128_store8_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store16_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int16_t tmp = vec_.i16[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store16_lane(a, vec, lane) wasm_v128_store16_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store16_lane(a, vec, lane) simde_wasm_v128_store16_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store32_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int32_t tmp = vec_.i32[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store32_lane(a, vec, lane) wasm_v128_store32_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store32_lane(a, vec, lane) simde_wasm_v128_store32_lane((a), (vec), (lane)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_wasm_v128_store64_lane (void * a, simde_v128_t vec, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { + simde_v128_private + vec_ = simde_v128_to_private(vec); + + int64_t tmp = vec_.i64[lane]; + simde_memcpy(a, &tmp, sizeof(tmp)); +} +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_wasm_v128_store64_lane(a, vec, lane) wasm_v128_store64_lane((a), (vec), (lane)) +#endif +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_v128_store64_lane(a, vec, lane) simde_wasm_v128_store64_lane((a), (vec), (lane)) +#endif + +/* convert */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_convert_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_convert_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128 = _mm_cvtepi32_ps(a_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A32V7) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_convert_i32x4(a) simde_wasm_f32x4_convert_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_convert_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_convert_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_convert_u32x4(a) simde_wasm_f32x4_convert_u32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_convert_low_i32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_convert_low_i32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.i32, a_.i32, 0, 1), __typeof__(r_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_convert_low_i32x4(a) simde_wasm_f64x2_convert_low_i32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_convert_low_u32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_convert_low_u32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + r_.f64 = __builtin_convertvector(__builtin_shufflevector(a_.u32, a_.u32, 0, 1), __typeof__(r_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.u32[i]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_convert_low_u32x4(a) simde_wasm_f64x2_convert_low_u32x4((a)) +#endif + +/* trunc_sat */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_sat_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i i32_max_mask = _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(2147483520.0)))); + const __m128 clamped = _mm_max_ps(a_.sse_m128, _mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))); + r_.sse_m128i = _mm_cvttps_epi32(clamped); + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128i = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(r_.sse_m128i), + _mm_castsi128_ps(_mm_set1_epi32(INT32_MAX)), + _mm_castsi128_ps(i32_max_mask) + ) + ); + #else + r_.sse_m128i = + _mm_or_si128( + _mm_and_si128(i32_max_mask, _mm_set1_epi32(INT32_MAX)), + _mm_andnot_si128(i32_max_mask, r_.sse_m128i) + ); + #endif + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128))); + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0), SIMDE_FLOAT32_C(2147483520.0) }; + __typeof__(r_.i32) max_mask = HEDLEY_REINTERPRET_CAST(__typeof__(max_mask), a_.f32 > max_representable); + __typeof__(r_.i32) max_i32 = { INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX }; + r_.i32 = (max_i32 & max_mask) | (r_.i32 & ~max_mask); + + const __typeof__(a_.f32) min_representable = { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) }; + __typeof__(r_.i32) min_mask = HEDLEY_REINTERPRET_CAST(__typeof__(min_mask), a_.f32 < min_representable); + __typeof__(r_.i32) min_i32 = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (min_i32 & min_mask) | (r_.i32 & ~min_mask); + + r_.i32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + if (simde_math_isnanf(a_.f32[i])) { + r_.i32[i] = INT32_C(0); + } else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) { + r_.i32[i] = INT32_MIN; + } else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)) { + r_.i32[i] = INT32_MAX; + } else { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f32[i]); + } + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_sat_f32x4(a) simde_wasm_i32x4_trunc_sat_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_sat_f32x4 (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f32x4(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcvtq_u32_f32(a_.neon_f32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + #if defined(SIMDE_X86_AVX512VL_NATIVE) + r_.sse_m128i = _mm_cvttps_epu32(a_.sse_m128); + #else + __m128 first_oob_high = _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0)); + __m128 neg_zero_if_too_high = + _mm_castsi128_ps( + _mm_slli_epi32( + _mm_castps_si128(_mm_cmple_ps(first_oob_high, a_.sse_m128)), + 31 + ) + ); + r_.sse_m128i = + _mm_xor_si128( + _mm_cvttps_epi32( + _mm_sub_ps(a_.sse_m128, _mm_and_ps(neg_zero_if_too_high, first_oob_high)) + ), + _mm_castps_si128(neg_zero_if_too_high) + ); + #endif + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpgt_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(0.0))))); + r_.sse_m128i = _mm_or_si128 (r_.sse_m128i, _mm_castps_si128(_mm_cmpge_ps(a_.sse_m128, _mm_set1_ps(SIMDE_FLOAT32_C(4294967296.0))))); + #endif + + #if !defined(SIMDE_FAST_NANS) + r_.sse_m128i = _mm_and_si128(r_.sse_m128i, _mm_castps_si128(_mm_cmpord_ps(a_.sse_m128, a_.sse_m128))); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_IEEE754_STORAGE) + SIMDE_CONVERT_VECTOR_(r_.u32, a_.f32); + + const __typeof__(a_.f32) max_representable = { SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0), SIMDE_FLOAT32_C(4294967040.0) }; + r_.u32 |= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > max_representable); + + const __typeof__(a_.f32) min_representable = { SIMDE_FLOAT32_C(0.0), }; + r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 > min_representable); + + r_.u32 &= HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.f32 == a_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + if (simde_math_isnan(a_.f32[i]) || + a_.f32[i] < SIMDE_FLOAT32_C(0.0)) { + r_.u32[i] = UINT32_C(0); + } else if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)) { + r_.u32[i] = UINT32_MAX; + } else { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f32[i]); + } + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_sat_f32x4(a) simde_wasm_u32x4_trunc_sat_f32x4((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_trunc_sat_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vcombine_s32(vqmovn_s64(vcvtq_s64_f64(a_.neon_f64)), vdup_n_s32(INT32_C(0))); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(double) in_not_nan = + vec_and(a_.altivec_f64, vec_cmpeq(a_.altivec_f64, a_.altivec_f64)); + r_.altivec_i32 = vec_signede(in_not_nan); + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i32 = + vec_pack( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), r_.altivec_i32), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(long long), vec_splat_s32(0)) + ); + #else + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { + 0, 1, 2, 3, 4, 5, 6, 7, + 16, 17, 18, 19, 20, 21, 22, 23 + }; + r_.altivec_i32 = + HEDLEY_REINTERPRET_CAST( + SIMDE_POWER_ALTIVEC_VECTOR(signed int), + vec_perm( + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), r_.altivec_i32), + vec_splat_s8(0), + perm + ) + ); + #endif + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (simde_math_isnan(a_.f64[i])) { + r_.i32[i] = INT32_C(0); + } else if (a_.f64[i] < HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) { + r_.i32[i] = INT32_MIN; + } else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, INT32_MAX)) { + r_.i32[i] = INT32_MAX; + } else { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.f64[i]); + } + } + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_trunc_sat_f64x2_zero(a) simde_wasm_i32x4_trunc_sat_f64x2_zero((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_u32x4_trunc_sat_f64x2_zero (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_u32x4_trunc_sat_f64x2_zero(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vcombine_u32(vqmovn_u64(vcvtq_u64_f64(a_.neon_f64)), vdup_n_u32(UINT32_C(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (simde_math_isnanf(a_.f64[i]) || + a_.f64[i] < SIMDE_FLOAT64_C(0.0)) { + r_.u32[i] = UINT32_C(0); + } else if (a_.f64[i] > HEDLEY_STATIC_CAST(simde_float64, UINT32_MAX)) { + r_.u32[i] = UINT32_MAX; + } else { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.f64[i]); + } + } + r_.u32[2] = 0; + r_.u32[3] = 0; + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_u32x4_trunc_sat_f64x2_zero(a) simde_wasm_u32x4_trunc_sat_f64x2_zero((a)) +#endif + +/* popcnt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i8x16_popcnt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i8x16_popcnt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcntq_s8(a_.neon_i8); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BITALG_NATIVE) + r_.sse_m128i = _mm_popcnt_epi8(a_.sse_m128i); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = _mm_andnot_si128(tmp0, a_.sse_m128i); + __m128i y = _mm_and_si128(tmp0, a_.sse_m128i); + tmp0 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_srli_epi16(tmp1, 4); + y = _mm_shuffle_epi8(tmp0, y); + tmp1 = _mm_shuffle_epi8(tmp0, tmp1); + return _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i tmp0 = _mm_set1_epi8(0x0f); + __m128i tmp1 = _mm_and_si128(a_.sse_m128i, tmp0); + tmp0 = _mm_andnot_si128(tmp0, a_.sse_m128i); + __m128i y = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp0 = _mm_srli_epi16(tmp0, 4); + y = _mm_shuffle_epi8(y, tmp1); + tmp1 = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + tmp1 = _mm_shuffle_epi8(tmp1, tmp0); + return _mm_add_epi8(y, tmp1); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp0 = _mm_and_si128(_mm_srli_epi16(a_.sse_m128i, 1), _mm_set1_epi8(0x55)); + __m128i tmp1 = _mm_sub_epi8(a_.sse_m128i, tmp0); + tmp0 = tmp1; + tmp1 = _mm_and_si128(tmp1, _mm_set1_epi8(0x33)); + tmp0 = _mm_and_si128(_mm_srli_epi16(tmp0, 2), _mm_set1_epi8(0x33)); + tmp1 = _mm_add_epi8(tmp1, tmp0); + tmp0 = _mm_srli_epi16(tmp1, 4); + tmp1 = _mm_add_epi8(tmp1, tmp0); + r_.sse_m128i = _mm_and_si128(tmp1, _mm_set1_epi8(0x0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a_.altivec_i8))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); + v = v - ((v >> 1) & (85)); + v = (v & (51)) + ((v >> (2)) & (51)); + v = (v + (v >> (4))) & (15); + r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i8x16_popcnt(a) simde_wasm_i8x16_popcnt((a)) +#endif + +/* dot */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_i32x4_dot_i16x8 (simde_v128_t a, simde_v128_t b) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_i32x4_dot_i16x8(a, b); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + b_ = simde_v128_to_private(b), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_madd_epi16(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_i32x4_dot_i16x8(a, b) simde_wasm_i32x4_dot_i16x8((a), (b)) +#endif + +/* ceil */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_ceil (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_ceil(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128 = _mm_round_ps(a_.sse_m128, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/WebAssembly/simd/pull/232 */ + const __m128i input_as_i32 = _mm_cvttps_epi32(a_.sse_m128); + const __m128i i32_min = _mm_set1_epi32(INT32_MIN); + const __m128i input_is_out_of_range = _mm_or_si128(_mm_cmpeq_epi32(input_as_i32, i32_min), i32_min); + const __m128 truncated = + _mm_or_ps( + _mm_andnot_ps( + _mm_castsi128_ps(input_is_out_of_range), + _mm_cvtepi32_ps(input_as_i32) + ), + _mm_castsi128_ps( + _mm_castps_si128( + _mm_and_ps( + _mm_castsi128_ps(input_is_out_of_range), + a_.sse_m128 + ) + ) + ) + ); + + const __m128 trunc_is_ge_input = + _mm_or_ps( + _mm_cmple_ps(a_.sse_m128, truncated), + _mm_castsi128_ps(i32_min) + ); + r_.sse_m128 = + _mm_or_ps( + _mm_andnot_ps( + trunc_is_ge_input, + _mm_add_ps(truncated, _mm_set1_ps(SIMDE_FLOAT32_C(1.0))) + ), + _mm_and_ps(trunc_is_ge_input, truncated) + ); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ceil(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_ceilf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_ceil(a) simde_wasm_f32x4_ceil((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_ceil (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_ceil(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128d = _mm_round_pd(a_.sse_m128d, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_ceil(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_ceil(a_.f64[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_ceil(a) simde_wasm_f64x2_ceil((a)) +#endif + +/* floor */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_floor (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_floor(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.sse_m128 = _mm_floor_ps(a_.sse_m128); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i vint_min = _mm_set1_epi32(INT_MIN); + const __m128i input_as_int = _mm_cvttps_epi32(a_.sse_m128); + const __m128 input_truncated = _mm_cvtepi32_ps(input_as_int); + const __m128i oor_all_or_neg = _mm_or_si128(_mm_cmpeq_epi32(input_as_int, vint_min), vint_min); + const __m128 tmp = + _mm_castsi128_ps( + _mm_or_si128( + _mm_andnot_si128( + oor_all_or_neg, + _mm_castps_si128(input_truncated) + ), + _mm_and_si128( + oor_all_or_neg, + _mm_castps_si128(a_.sse_m128) + ) + ) + ); + r_.sse_m128 = + _mm_sub_ps( + tmp, + _mm_and_ps( + _mm_cmplt_ps( + a_.sse_m128, + tmp + ), + _mm_set1_ps(SIMDE_FLOAT32_C(1.0)) + ) + ); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t input_as_int = vcvtq_s32_f32(a_.f32); + const float32x4_t input_truncated = vcvtq_f32_s32(input_as_int); + const float32x4_t tmp = + vbslq_f32( + vbicq_u32( + vcagtq_f32( + vreinterpretq_f32_u32(vdupq_n_u32(UINT32_C(0x4B000000))), + a_.f32 + ), + vdupq_n_u32(UINT32_C(0x80000000)) + ), + input_truncated, + a_.f32); + r_.neon_f32 = + vsubq_f32( + tmp, + vreinterpretq_f32_u32( + vandq_u32( + vcgtq_f32( + tmp, + a_.f32 + ), + vdupq_n_u32(UINT32_C(0x3F800000)) + ) + ) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_floor(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_floorf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_floor(a) simde_wasm_f32x4_floor((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_floor (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_floor(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_floor(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_floor(a) simde_wasm_f64x2_floor((a)) +#endif + +/* trunc */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_trunc (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_trunc(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_truncf(a_.f32[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_trunc(a) simde_wasm_f32x4_trunc((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_trunc (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_trunc(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_trunc(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_trunc(a) simde_wasm_f64x2_trunc((a)) +#endif + +/* nearest */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_nearest (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_nearest(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_nearbyintf(a_.f32[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_nearest(a) simde_wasm_f32x4_nearest((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_nearest (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_nearest(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_nearbyint(a_.f64[i])); + } + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_nearest(a) simde_wasm_f64x2_nearest((a)) +#endif + +/* sqrt */ + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f32x4_sqrt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_sqrt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.sse_m128 = _mm_sqrt_ps(a_.sse_m128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_quietf(simde_math_sqrtf(a_.f32[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f32x4_sqrt(a) simde_wasm_f32x4_sqrt((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_v128_t +simde_wasm_f64x2_sqrt (simde_v128_t a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_sqrt(a); + #else + simde_v128_private + a_ = simde_v128_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE_NATIVE) + r_.sse_m128d = _mm_sqrt_pd(a_.sse_m128d); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_quiet(simde_math_sqrt(a_.f64[i])); + } + #endif + + return simde_v128_from_private(r_); + #endif +} +#if defined(SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES) + #define wasm_f64x2_sqrt(a) simde_wasm_f64x2_sqrt((a)) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_WASM_SIMD128_H) */ +/* :: End simde/wasm/simd128.h :: */ diff --git a/include/simde/x86/avx.h b/include/simde/x86/avx.h new file mode 100644 index 00000000..8230fdcc --- /dev/null +++ b/include/simde/x86/avx.h @@ -0,0 +1,33765 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ diff --git a/include/simde/x86/avx2.h b/include/simde/x86/avx2.h new file mode 100644 index 00000000..c2293b53 --- /dev/null +++ b/include/simde/x86/avx2.h @@ -0,0 +1,39524 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2019-2020 Michael R. Crusoe + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX2_H) +#define SIMDE_X86_AVX2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi8 + #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi16 + #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi32(simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi32 + #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi8 + #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi16 + #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi16(a, b); + #else + return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi16 + #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi32 + #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi32(a, b); + #else + return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi32 + #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi64 + #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm256_setzero_si256(); + + for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.m128i_private[h].i8[i] = 0; + } else if (srcpos > 15) { + r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; + } else { + r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; + } + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) +# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_alignr_epi8(a, b, count) \ + simde_mm256_set_m128i( \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_alignr_epi8 + #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_and_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_si256 + #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_andnot_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_si256 + #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi8 + #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi16 + #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadds_epi16(a, b); + #else + return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadds_epi16 + #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu8 + #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu16 + #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu8 + #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu16 + #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) +# define simde_mm_blend_epi32(a, b, imm8) \ + simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi32 + #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) +#elif defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi16(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi16 + #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi32(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi32 + #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_blendv_epi8(a, b, mask); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + mask_ = simde__m256i_to_private(mask); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(mask_.i8) tmp = mask_.i8 >> 7; + r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + int8_t tmp = mask_.i8[i] >> 7; + r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_epi8 + #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastb_epi8(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastb_epi8 + #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastb_epi8(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastb_epi8 + #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastw_epi16(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastw_epi16 + #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastw_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastw_epi16 + #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastd_epi32(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastd_epi32 + #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastd_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastd_epi32 + #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastq_epi64(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastq_epi64 + #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastq_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastq_epi64 + #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastss_ps(a); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_shuffle_ps(a, a, 0); + #else + simde__m128_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastss_ps + #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastss_ps(a); + #else + simde__m256_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + __m128 tmp = _mm_permute_ps(a_.n, 0); + r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); + #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) + r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastss_ps + #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_broadcastsd_pd (simde__m128d a) { + return simde_mm_movedup_pd(a); +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastsd_pd + #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastsd_pd(a); + #else + simde__m256d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsd_pd + #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) + return _mm256_broadcastsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = a_; + r_.m128i_private[1] = a_; + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = a_.i64[1]; + r_.i64[2] = a_.i64[0]; + r_.i64[3] = a_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsi128_si256 + #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) + #undef _mm_broadcastsi128_si256 + #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i - imm8; + if(i >= (ssize/2)) { + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i + imm8; + if(i < (ssize/2)) { + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi8 + #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi16 + #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi32 + #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi64 + #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi8 + #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 > b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi16 + #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi32 + #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi64 + #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi16 + #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi32 + #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi64 + #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi32 + #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi64 + #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_epi64 + #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi16 + #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi32 + #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi64 + #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi32 + #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi64 + #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu32_epi64 + #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi8 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31){ + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i8[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi8 + #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi16 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i16[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi16 + #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extracti128_si256 + #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi32 + #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi32 + #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi32 + #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi32 + #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi32 + #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi32 + #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi32 + #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi32 + #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi64 + #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi64 + #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256i_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi64 + #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi64 + #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi64 + #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi64 + #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi64 + #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi64 + #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_ps + #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_ps + #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_ps + #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + src_ = simde__m256_to_private(src), + mask_ = simde__m256_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_ps + #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_ps + #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_ps + #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_ps + #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_ps + #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_pd + #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_pd + #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_pd + #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_pd + #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_pd + #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_pd + #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_pd + #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_pd + #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[ imm8 & 1 ] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_inserti128_si256 + #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_madd_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); + SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); + + SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); + SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); + product = a32x16 * b32x16; + + even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); + odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); + + r_.i32 = even + odd; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_madd_epi16 + #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maddubs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maddubs_epi16 + #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi32(mem_addr, mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi32 + #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi32(mem_addr, mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi32 + #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi64 + #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi64 + #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi32(mem_addr, mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi32 + #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi32(mem_addr, mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi32 + #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi64 + #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi64 + #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_max_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi8 + #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu8 + #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu16 + #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu32 + #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi16 + #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi32 + #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_min_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi8 + #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi16 + #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi32 + #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu8 + #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu16 + #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu32 + #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_movemask_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_movemask_epi8(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + uint32_t r = 0; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); + } + #else + r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); + } + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_epi8 + #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + const int a_offset1 = imm8 & 4; + const int b_offset1 = (imm8 & 3) << 2; + const int a_offset2 = (imm8 >> 3) & 4; + const int b_offset2 = ((imm8 >> 3) & 3) << 2; + + #if defined(simde_math_abs) + const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; + for (int i = 0 ; i < halfway_point ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); + r_.u16[halfway_point + i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mpsadbw_epu8 + #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhrs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi16 + #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi32(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi32 + #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_si256 + #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi16 + #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi32(a, b); + #else + simde__m256i_private + r_, + v_[] = { + simde__m256i_to_private(a), + simde__m256i_to_private(b) + }; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi32 + #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi16 + #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi32 + #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2x128_si256 + #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; + r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; + r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; + r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_epi64 + #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; + r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; + r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; + r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_pd + #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 7]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_epi32 + #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); + #else + return _mm256_permutevar8x32_ps(a, idx); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[idx_.i32[i] & 7]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_ps + #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sad_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sad_epu8 + #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_shuffle_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { + r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; + r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi8 + #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_shuffle_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 32, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi32 + #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4, \ + 8, 9, 10, 11, \ + ((((imm8) ) & 3) + 8 + 4), \ + ((((imm8) >> 2) & 3) + 8 + 4), \ + ((((imm8) >> 4) & 3) + 8 + 4), \ + ((((imm8) >> 6) & 3) + 8 + 4) \ + ) }); })) +#else +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflehi_epi16 + #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7, \ + ((((imm8) ) & 3) + 8), \ + ((((imm8) >> 2) & 3) + 8), \ + ((((imm8) >> 4) & 3) + 8), \ + ((((imm8) >> 6) & 3) + 8), \ + 12, 13, 14, 15) }); })) +#else +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflelo_epi16 + #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi8 + #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi16 + #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi32 + #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi16 + #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi32 + #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi64 + #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* Note: There is no consistency in how compilers handle values outside of + the expected range, hence the discrepancy between what we allow and what + Intel specifies. Some compilers will return 0, others seem to just mask + off everything outside of the range. */ + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { + r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi16 + #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { + r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi32 + #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi64 + #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) - imm8; + r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_si256 + #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); + r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi32 + #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi32 + #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); + r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi64 + #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi64 + #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi16 + #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi32 + #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi16 + #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi32 + #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_srav_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); + r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srav_epi32 + #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srav_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + count_ = simde__m256i_to_private(count); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); + r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + if (shift > 31) shift = 31; + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srav_epi32 + #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi16 + #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi32 + #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi64 + #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + if (imm8 > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { + r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); + } + #else + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi16 + #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { + r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi32 + #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi64 + #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = imm8 + HEDLEY_STATIC_CAST(int, i); + r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_si256 + #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi32 + #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi32 + #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi64 + #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi64 + #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi8 + #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi16 + #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi16(a, b); + #else + return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi16 + #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi32 + #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi32(a, b); + #else + return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi32 + #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi64 + #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi8 + #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi16 + #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsubs_epi16(a, b); + #else + return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsubs_epi16 + #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu8 + #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu16 + #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_x_mm256_test_all_ones (simde__m256i a) { + simde__m256i_private a_ = simde__m256i_to_private(a); + int r; + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi8 + #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi16 + #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 0, 8, 1, 9, 4, 12, 5, 13); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi32 + #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi64 + #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi8 + #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 4, 20, 5, 21, 6, 22, 7, 23, + 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi16 + #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 2, 10, 3, 11, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi32 + #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi64 + #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_xor_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_si256 + #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX2_H) */ +/* :: End simde/x86/avx2.h :: */ diff --git a/include/simde/x86/avx512.h b/include/simde/x86/avx512.h new file mode 100644 index 00000000..16c85809 --- /dev/null +++ b/include/simde/x86/avx512.h @@ -0,0 +1,96026 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_H) +#define SIMDE_X86_AVX512_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_TYPES_H) +#define SIMDE_X86_AVX512_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for + * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte + * aligned even if we reduce the alignment requirements of other members. + * + * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the + * to/from private functions will break, and I'm not willing to change their APIs to use + * pointers (which would also require more verbose code on the caller side) just to make + * MSVC happy. + * + * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, + * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to + * fix this without requiring API changes (except transparently through macros), patches + * are welcome. + */ + +# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) +# if defined(SIMDE_X86_AVX512F_NATIVE) +# undef SIMDE_X86_AVX512F_NATIVE +# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") +# endif +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 +# else +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 +# endif + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_ALIGN_TO_16 __m128bh n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_BF16_NATIVE) + SIMDE_ALIGN_TO_32 __m256bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_AVX512_ALIGN __m512bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; + SIMDE_AVX512_ALIGN simde__m128d m128d[4]; + SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; + SIMDE_AVX512_ALIGN simde__m256d m256d[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; + SIMDE_AVX512_ALIGN simde__m128i m128i[4]; + SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; + SIMDE_AVX512_ALIGN simde__m256i m256i[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512i_private; + +/* Intel uses the same header (immintrin.h) for everything AVX and + * later. If native aliases are enabled, and the machine has native + * support for AVX imintrin.h will already have been included, which + * means simde__m512* will already have been defined. So, even + * if the machine doesn't support AVX512F we need to use the native + * type; it has already been defined. + * + * However, we also can't just assume that including immintrin.h does + * actually define these. It could be a compiler which supports AVX + * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we + * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, + * so we assume that if it's present AVX-512F has already been + * declared. + * + * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC + * uses the preprocessor to define all the _MM_CMPINT_* members, + * in most compilers they are simply normal enum members. However, + * all compilers I've looked at use an object-like macro for + * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT + * is included in case a compiler does the reverse, though I haven't + * run into one which does. + * + * As for the ICC check, unlike other compilers, merely using the + * AVX-512 types causes ICC to generate AVX-512 instructions. */ +#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m512 simde__m512; + typedef __m512i simde__m512i; + typedef __m512d simde__m512d; + + typedef __mmask8 simde__mmask8; + typedef __mmask16 simde__mmask16; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m512_private simde__m512; + typedef simde__m512i_private simde__m512i; + typedef simde__m512d_private simde__m512d; + #endif + + typedef uint8_t simde__mmask8; + typedef uint16_t simde__mmask16; +#endif + +#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m128bh simde__m128bh; + typedef __m256bh simde__m256bh; + typedef __m512bh simde__m512bh; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m128bh_private simde__m128bh; + typedef simde__m256bh_private simde__m256bh; + typedef simde__m512bh_private simde__m512bh; + #endif +#endif + +/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is + * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang + * both are in avx512bwintrin.h), not AVX-512F. However, we don't have + * a good (not-compiler-specific) way to detect if these headers have + * been included. In compilers which support AVX-512F but not + * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) + * won't exist. + * + * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t + * in all compilers, so it's safe to use these instead of typedefs to + * __mmask{16,32}. If you run into a problem with this please file an + * issue and we'll try to figure out a work-around. */ +typedef uint32_t simde__mmask32; +typedef uint64_t simde__mmask64; +#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef uint32_t __mmask32; + #else + #define __mmask32 uint32_t; + #endif +#endif +#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + #if defined(HEDLEY_GCC_VERSION) + typedef unsigned long long __mmask64; + #else + typedef uint64_t __mmask64; + #endif + #else + #define __mmask64 uint64_t; + #endif +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m512 __m512; + typedef simde__m512i __m512i; + typedef simde__m512d __m512d; + #else + #define __m512 simde__m512 + #define __m512i simde__m512i + #define __m512d simde__m512d + #endif +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m128bh __m128bh; + typedef simde__m256bh __m256bh; + typedef simde__m512bh __m512bh; + #else + #define __m128bh simde__m128bh + #define __m256bh simde__m256bh + #define __m512bh simde__m512bh + #endif +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); +#endif + +#define SIMDE_MM_CMPINT_EQ 0 +#define SIMDE_MM_CMPINT_LT 1 +#define SIMDE_MM_CMPINT_LE 2 +#define SIMDE_MM_CMPINT_FALSE 3 +#define SIMDE_MM_CMPINT_NE 4 +#define SIMDE_MM_CMPINT_NLT 5 +#define SIMDE_MM_CMPINT_NLE 6 +#define SIMDE_MM_CMPINT_TRUE 7 +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) +#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ +#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT +#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE +#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE +#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE +#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT +#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE +#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh +simde__m128bh_from_private(simde__m128bh_private v) { + simde__m128bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh_private +simde__m128bh_to_private(simde__m128bh v) { + simde__m128bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh +simde__m256bh_from_private(simde__m256bh_private v) { + simde__m256bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh_private +simde__m256bh_to_private(simde__m256bh v) { + simde__m256bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh +simde__m512bh_from_private(simde__m512bh_private v) { + simde__m512bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh_private +simde__m512bh_to_private(simde__m512bh v) { + simde__m512bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde__m512_from_private(simde__m512_private v) { + simde__m512 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512_private +simde__m512_to_private(simde__m512 v) { + simde__m512_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde__m512i_from_private(simde__m512i_private v) { + simde__m512i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i_private +simde__m512i_to_private(simde__m512i v) { + simde__m512i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde__m512d_from_private(simde__m512d_private v) { + simde__m512d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d_private +simde__m512d_to_private(simde__m512d v) { + simde__m512d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ +/* :: End simde/x86/avx512/types.h :: */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/2intersect.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Ashleigh Newman-Jones + */ + +#if !defined(SIMDE_X86_AVX512_2INTERSECT_H) +#define SIMDE_X86_AVX512_2INTERSECT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + _mm_2intersect_epi32(a, b, k1, k2); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { + const int32_t m = a_.i32[i] == b_.i32[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef __mm_2intersect_epi32 + #define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + _mm_2intersect_epi64(a, b, k1, k2); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { + const int32_t m = a_.i64[i] == b_.i64[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef __mm_2intersect_epi64 + #define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + _mm256_2intersect_epi32(a, b, k1, k2); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { + const int32_t m = a_.i32[i] == b_.i32[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_2intersect_epi32 + #define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + _mm256_2intersect_epi64(a, b, k1, k2); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { + const int32_t m = a_.i64[i] == b_.i64[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_2intersect_epi64 + #define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_2intersect_epi32(simde__m512i a, simde__m512i b, simde__mmask16 *k1, simde__mmask16 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) + _mm512_2intersect_epi32(a, b, k1, k2); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask16 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { + const int32_t m = a_.i32[i] == b_.i32[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) + #undef _mm512_2intersect_epi32 + #define _mm512_2intersect_epi32(a, b, k1, k2) simde_mm512_2intersect_epi32(a, b, k1, k2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_2intersect_epi64(simde__m512i a, simde__m512i b, simde__mmask8 *k1, simde__mmask8 *k2) { + #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) + _mm512_2intersect_epi64(a, b, k1, k2); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) { + const int32_t m = a_.i64[i] == b_.i64[j]; + k1_ |= m << i; + k2_ |= m << j; + } + } + + *k1 = k1_; + *k2 = k2_; + #endif +} +#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) + #undef _mm512_2intersect_epi64 + #define _mm512_2intersect_epi64(a, b, k1, k2) simde_mm512_2intersect_epi64(a, b, k1, k2) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_2INTERSECT_H) */ +/* :: End simde/x86/avx512/2intersect.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/4dpwssd.h :: */ +#if !defined(SIMDE_X86_AVX512_4DPWSSD_H) +#define SIMDE_X86_AVX512_4DPWSSD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dpwssd.h :: */ +#if !defined(SIMDE_X86_AVX512_DPWSSD_H) +#define SIMDE_X86_AVX512_DPWSSD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_MOV_H) +#define SIMDE_X86_AVX512_MOV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CAST_H) +#define SIMDE_X86_AVX512_CAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castpd_ps (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_ps + #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castpd_si512 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_si512 + #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castps_pd (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_pd + #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castps_si512 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_si512 + #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castsi512_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_ps + #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castsi512_pd (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_pd + #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd128_pd512 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd128_pd512(a); + #else + simde__m512d_private r_; + r_.m128d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd128_pd512 + #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd256_pd512 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd256_pd512(a); + #else + simde__m512d_private r_; + r_.m256d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd256_pd512 + #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm512_castpd512_pd128 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd128(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd128 + #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_castpd512_pd256 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd256(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m256d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd256 + #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps128_ps512 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps128_ps512(a); + #else + simde__m512_private r_; + r_.m128[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps128_ps512 + #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps256_ps512 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps256_ps512(a); + #else + simde__m512_private r_; + r_.m256[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps256_ps512 + #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_castps512_ps128 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps128(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps128 + #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_castps512_ps256 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps256(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m256[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps256 + #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi128_si512 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi128_si512(a); + #else + simde__m512i_private r_; + r_.m128i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi128_si512 + #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi256_si512 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi256_si512(a); + #else + simde__m512i_private r_; + r_.m256i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi256_si512 + #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_castsi512_si128 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si128(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si128 + #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_castsi512_si256 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si256(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m256i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si256 + #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ +/* :: End simde/x86/avx512/cast.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SET_H) +#define SIMDE_X86_AVX512_SET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/load.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LOAD_H) +#define SIMDE_X86_AVX512_LOAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_load_pd (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); + #else + simde__m512d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_pd + #define _mm512_load_pd(a) simde_mm512_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_load_ps (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); + #else + simde__m512 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_ps + #define _mm512_load_ps(a) simde_mm512_load_ps(a) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_load_si512 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); + #else + simde__m512i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); + return r; + #endif +} +#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_epi8 + #undef _mm512_load_epi16 + #undef _mm512_load_epi32 + #undef _mm512_load_epi64 + #undef _mm512_load_si512 + #define _mm512_load_si512(a) simde_mm512_load_si512(a) + #define _mm512_load_epi8(a) simde_mm512_load_si512(a) + #define _mm512_load_epi16(a) simde_mm512_load_si512(a) + #define _mm512_load_epi32(a) simde_mm512_load_si512(a) + #define _mm512_load_epi64(a) simde_mm512_load_si512(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ +/* :: End simde/x86/avx512/load.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, + int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + simde__m512i_private r_; + + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + r_.i16[16] = e16; + r_.i16[17] = e17; + r_.i16[18] = e18; + r_.i16[19] = e19; + r_.i16[20] = e20; + r_.i16[21] = e21; + r_.i16[22] = e22; + r_.i16[23] = e23; + r_.i16[24] = e24; + r_.i16[25] = e25; + r_.i16[26] = e26; + r_.i16[27] = e27; + r_.i16[28] = e28; + r_.i16[29] = e29; + r_.i16[30] = e30; + r_.i16[31] = e31; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi16 + #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + simde__m512i_private r_; + + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + r_.i32[ 8] = e8; + r_.i32[ 9] = e9; + r_.i32[10] = e10; + r_.i32[11] = e11; + r_.i32[12] = e12; + r_.i32[13] = e13; + r_.i32[14] = e14; + r_.i32[15] = e15; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi32 + #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + simde__m512i_private r_; + + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + r_.i64[4] = e4; + r_.i64[5] = e5; + r_.i64[6] = e6; + r_.i64[7] = e7; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi64 + #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, + uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, + uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, + uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, + uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m512i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + r_.u8[32] = e32; + r_.u8[33] = e33; + r_.u8[34] = e34; + r_.u8[35] = e35; + r_.u8[36] = e36; + r_.u8[37] = e37; + r_.u8[38] = e38; + r_.u8[39] = e39; + r_.u8[40] = e40; + r_.u8[41] = e41; + r_.u8[42] = e42; + r_.u8[43] = e43; + r_.u8[44] = e44; + r_.u8[45] = e45; + r_.u8[46] = e46; + r_.u8[47] = e47; + r_.u8[48] = e48; + r_.u8[49] = e49; + r_.u8[50] = e50; + r_.u8[51] = e51; + r_.u8[52] = e52; + r_.u8[53] = e53; + r_.u8[54] = e54; + r_.u8[55] = e55; + r_.u8[56] = e56; + r_.u8[57] = e57; + r_.u8[58] = e58; + r_.u8[59] = e59; + r_.u8[60] = e60; + r_.u8[61] = e61; + r_.u8[62] = e62; + r_.u8[63] = e63; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, + uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, + uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m512i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + r_.u16[16] = e16; + r_.u16[17] = e17; + r_.u16[18] = e18; + r_.u16[19] = e19; + r_.u16[20] = e20; + r_.u16[21] = e21; + r_.u16[22] = e22; + r_.u16[23] = e23; + r_.u16[24] = e24; + r_.u16[25] = e25; + r_.u16[26] = e26; + r_.u16[27] = e27; + r_.u16[28] = e28; + r_.u16[29] = e29; + r_.u16[30] = e30; + r_.u16[31] = e31; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, + uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + simde__m512i_private r_; + + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + r_.u32[ 8] = e8; + r_.u32[ 9] = e9; + r_.u32[10] = e10; + r_.u32[11] = e11; + r_.u32[12] = e12; + r_.u32[13] = e13; + r_.u32[14] = e14; + r_.u32[15] = e15; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m512i_private r_; + + r_.u64[ 0] = e0; + r_.u64[ 1] = e1; + r_.u64[ 2] = e2; + r_.u64[ 3] = e3; + r_.u64[ 4] = e4; + r_.u64[ 5] = e5; + r_.u64[ 6] = e6; + r_.u64[ 7] = e7; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, + int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, + int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, + int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) + return _mm512_set_epi8( + e63, e62, e61, e60, e59, e58, e57, e56, + e55, e54, e53, e52, e51, e50, e49, e48, + e47, e46, e45, e44, e43, e42, e41, e40, + e39, e38, e37, e36, e35, e34, e33, e32, + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0 + ); + #else + simde__m512i_private r_; + + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + r_.i8[32] = e32; + r_.i8[33] = e33; + r_.i8[34] = e34; + r_.i8[35] = e35; + r_.i8[36] = e36; + r_.i8[37] = e37; + r_.i8[38] = e38; + r_.i8[39] = e39; + r_.i8[40] = e40; + r_.i8[41] = e41; + r_.i8[42] = e42; + r_.i8[43] = e43; + r_.i8[44] = e44; + r_.i8[45] = e45; + r_.i8[46] = e46; + r_.i8[47] = e47; + r_.i8[48] = e48; + r_.i8[49] = e49; + r_.i8[50] = e50; + r_.i8[51] = e51; + r_.i8[52] = e52; + r_.i8[53] = e53; + r_.i8[54] = e54; + r_.i8[55] = e55; + r_.i8[56] = e56; + r_.i8[57] = e57; + r_.i8[58] = e58; + r_.i8[59] = e59; + r_.i8[60] = e60; + r_.i8[61] = e61; + r_.i8[62] = e62; + r_.i8[63] = e63; + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi8 + #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_16(simde__m128i) simde__m128i v[] = { d, c, b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m128i[0] = d; + r_.m128i[1] = c; + r_.m128i[2] = b; + r_.m128i[3] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_32(simde__m256i) simde__m256i v[] = { b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m256i[0] = b; + r_.m256i[1] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, + simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + simde__m512_private r_; + + r_.f32[ 0] = e0; + r_.f32[ 1] = e1; + r_.f32[ 2] = e2; + r_.f32[ 3] = e3; + r_.f32[ 4] = e4; + r_.f32[ 5] = e5; + r_.f32[ 6] = e6; + r_.f32[ 7] = e7; + r_.f32[ 8] = e8; + r_.f32[ 9] = e9; + r_.f32[10] = e10; + r_.f32[11] = e11; + r_.f32[12] = e12; + r_.f32[13] = e13; + r_.f32[14] = e14; + r_.f32[15] = e15; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_ps + #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + simde__m512d_private r_; + + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + r_.f64[4] = e4; + r_.f64[5] = e5; + r_.f64[6] = e6; + r_.f64[7] = e7; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_pd + #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ +/* :: End simde/x86/avx512/set.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi8(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi8 + #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi16(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi16 + #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi32(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi32 + #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi64(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi64 + #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_pd(src, k, a); + #else + return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_pd + #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_ps(src, k, a); + #else + return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_ps + #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi8(src, k, a); + #else + simde__m256i_private + r_, + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi8 + #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi16(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi16 + #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi32(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi32 + #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi64(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi64 + #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_pd(src, k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_pd + #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_ps(src, k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_ps + #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi8(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi8 + #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi16(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi16 + #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi32(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi32 + #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi64(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi64 + #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_pd(src, k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_pd + #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_ps(src, k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_ps + #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi8(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi8 + #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi16(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi16 + #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi32(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi32 + #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi64(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi64 + #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_pd(k, a); + #else + return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_pd + #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_ps(k, a); + #else + return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_ps + #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi8(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi8 + #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi16(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi16 + #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi32(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi32 + #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi64(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi64 + #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_pd(k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_pd + #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_ps(k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_ps + #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi8(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi8 + #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi16(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi16 + #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi32(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi32 + #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi64(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi64 + #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_pd(k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_pd + #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_ps(k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_ps + #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ +/* :: End simde/x86/avx512/mov.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_dpwssd_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_dpwssd_epi32(src, a, b); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + int32_t x1_ SIMDE_VECTOR(32); + int32_t x2_ SIMDE_VECTOR(32); + simde__m128i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + a_.i16, a_.i16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + b_.i16, b_.i16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (r1_[0].i32 * r2_[0].i32) + + (r1_[1].i32 * r2_[1].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { + src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + } + #endif + + return simde__m128i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_dpwssd_epi32 + #define _mm_dpwssd_epi32(src, a, b) simde_mm_dpwssd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_dpwssd_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_mask_dpwssd_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dpwssd_epi32 + #define _mm_mask_dpwssd_epi32(src, k, a, b) simde_mm_mask_dpwssd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_dpwssd_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_maskz_dpwssd_epi32(k, src, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dpwssd_epi32 + #define _mm_maskz_dpwssd_epi32(k, src, a, b) simde_mm_maskz_dpwssd_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_dpwssd_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_dpwssd_epi32(src, a, b); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + int32_t x1_ SIMDE_VECTOR(64); + int32_t x2_ SIMDE_VECTOR(64); + simde__m256i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + a_.i16, a_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + b_.i16, b_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (r1_[0].i32 * r2_[0].i32) + + (r1_[1].i32 * r2_[1].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { + src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + } + #endif + + return simde__m256i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_dpwssd_epi32 + #define _mm256_dpwssd_epi32(src, a, b) simde_mm256_dpwssd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_dpwssd_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_mask_dpwssd_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dpwssd_epi32 + #define _mm256_mask_dpwssd_epi32(src, k, a, b) simde_mm256_mask_dpwssd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_dpwssd_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_maskz_dpwssd_epi32(k, src, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dpwssd_epi32 + #define _mm256_maskz_dpwssd_epi32(k, src, a, b) simde_mm256_maskz_dpwssd_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_dpwssd_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_dpwssd_epi32(src, a, b); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + int32_t x1_ SIMDE_VECTOR(128); + int32_t x2_ SIMDE_VECTOR(128); + simde__m512i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + a_.i16, a_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + b_.i16, b_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (r1_[0].i32 * r2_[0].i32) + + (r1_[1].i32 * r2_[1].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.i16[0])) ; i++) { + src_.i32[i / 2] += HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + } + #endif + + return simde__m512i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_dpwssd_epi32 + #define _mm512_dpwssd_epi32(src, a, b) simde_mm512_dpwssd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_dpwssd_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_mask_dpwssd_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dpwssd_epi32 + #define _mm512_mask_dpwssd_epi32(src, k, a, b) simde_mm512_mask_dpwssd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_dpwssd_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_maskz_dpwssd_epi32(k, src, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpwssd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dpwssd_epi32 + #define _mm512_maskz_dpwssd_epi32(k, src, a, b) simde_mm512_maskz_dpwssd_epi32(k, src, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DPWSSD_H) */ +/* :: End simde/x86/avx512/dpwssd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SET1_H) +#define SIMDE_X86_AVX512_SET1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi8(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi8 + #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi8 + #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi8 + #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi16(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi16 + #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi16 + #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi16 + #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi32(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi32 + #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi32 + #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi32 + #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi64 (int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi64(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi64 + #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi64 + #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi64 + #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu8 (uint8_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu16 (uint16_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu32 (uint32_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu64 (uint64_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_ps(a); + #else + simde__m512_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_ps + #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_pd(a); + #else + simde__m512d_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_pd + #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ +/* :: End simde/x86/avx512/set1.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/add.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_ADD_H) +#define SIMDE_X86_AVX512_ADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2019-2020 Michael R. Crusoe + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX2_H) +#define SIMDE_X86_AVX2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi8 + #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi16 + #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi32(simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi32 + #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi8 + #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi16 + #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi16(a, b); + #else + return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi16 + #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi32 + #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi32(a, b); + #else + return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi32 + #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi64 + #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm256_setzero_si256(); + + for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.m128i_private[h].i8[i] = 0; + } else if (srcpos > 15) { + r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; + } else { + r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; + } + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) +# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_alignr_epi8(a, b, count) \ + simde_mm256_set_m128i( \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_alignr_epi8 + #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_and_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_si256 + #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_andnot_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_si256 + #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi8 + #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi16 + #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadds_epi16(a, b); + #else + return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadds_epi16 + #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu8 + #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu16 + #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu8 + #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu16 + #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) +# define simde_mm_blend_epi32(a, b, imm8) \ + simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi32 + #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) +#elif defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi16(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi16 + #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi32(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi32 + #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_blendv_epi8(a, b, mask); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + mask_ = simde__m256i_to_private(mask); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(mask_.i8) tmp = mask_.i8 >> 7; + r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + int8_t tmp = mask_.i8[i] >> 7; + r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_epi8 + #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastb_epi8(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastb_epi8 + #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastb_epi8(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastb_epi8 + #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastw_epi16(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastw_epi16 + #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastw_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastw_epi16 + #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastd_epi32(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastd_epi32 + #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastd_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastd_epi32 + #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastq_epi64(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastq_epi64 + #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastq_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastq_epi64 + #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastss_ps(a); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_shuffle_ps(a, a, 0); + #else + simde__m128_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastss_ps + #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastss_ps(a); + #else + simde__m256_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + __m128 tmp = _mm_permute_ps(a_.n, 0); + r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); + #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) + r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastss_ps + #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_broadcastsd_pd (simde__m128d a) { + return simde_mm_movedup_pd(a); +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastsd_pd + #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastsd_pd(a); + #else + simde__m256d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsd_pd + #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) + return _mm256_broadcastsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = a_; + r_.m128i_private[1] = a_; + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = a_.i64[1]; + r_.i64[2] = a_.i64[0]; + r_.i64[3] = a_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsi128_si256 + #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) + #undef _mm_broadcastsi128_si256 + #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i - imm8; + if(i >= (ssize/2)) { + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i + imm8; + if(i < (ssize/2)) { + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi8 + #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi16 + #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi32 + #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi64 + #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi8 + #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 > b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi16 + #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi32 + #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi64 + #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi16 + #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi32 + #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi64 + #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi32 + #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi64 + #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_epi64 + #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi16 + #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi32 + #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi64 + #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi32 + #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi64 + #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu32_epi64 + #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi8 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31){ + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i8[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi8 + #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi16 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i16[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi16 + #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extracti128_si256 + #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi32 + #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi32 + #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi32 + #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi32 + #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi32 + #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi32 + #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi32 + #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi32 + #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi64 + #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi64 + #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256i_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi64 + #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi64 + #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi64 + #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi64 + #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi64 + #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi64 + #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_ps + #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_ps + #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_ps + #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + src_ = simde__m256_to_private(src), + mask_ = simde__m256_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_ps + #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_ps + #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_ps + #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_ps + #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_ps + #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_pd + #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_pd + #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_pd + #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_pd + #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_pd + #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_pd + #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_pd + #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_pd + #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[ imm8 & 1 ] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_inserti128_si256 + #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_madd_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); + SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); + + SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); + SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); + product = a32x16 * b32x16; + + even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); + odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); + + r_.i32 = even + odd; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_madd_epi16 + #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maddubs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maddubs_epi16 + #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi32(mem_addr, mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi32 + #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi32(mem_addr, mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi32 + #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi64 + #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi64 + #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi32(mem_addr, mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi32 + #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi32(mem_addr, mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi32 + #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi64 + #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi64 + #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_max_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi8 + #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu8 + #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu16 + #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu32 + #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi16 + #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi32 + #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_min_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi8 + #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi16 + #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi32 + #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu8 + #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu16 + #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu32 + #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_movemask_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_movemask_epi8(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + uint32_t r = 0; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); + } + #else + r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); + } + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_epi8 + #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + const int a_offset1 = imm8 & 4; + const int b_offset1 = (imm8 & 3) << 2; + const int a_offset2 = (imm8 >> 3) & 4; + const int b_offset2 = ((imm8 >> 3) & 3) << 2; + + #if defined(simde_math_abs) + const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; + for (int i = 0 ; i < halfway_point ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); + r_.u16[halfway_point + i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mpsadbw_epu8 + #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhrs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi16 + #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi32(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi32 + #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_si256 + #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi16 + #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi32(a, b); + #else + simde__m256i_private + r_, + v_[] = { + simde__m256i_to_private(a), + simde__m256i_to_private(b) + }; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi32 + #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi16 + #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi32 + #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2x128_si256 + #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; + r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; + r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; + r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_epi64 + #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; + r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; + r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; + r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_pd + #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 7]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_epi32 + #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); + #else + return _mm256_permutevar8x32_ps(a, idx); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[idx_.i32[i] & 7]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_ps + #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sad_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sad_epu8 + #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_shuffle_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { + r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; + r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi8 + #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_shuffle_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 32, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi32 + #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4, \ + 8, 9, 10, 11, \ + ((((imm8) ) & 3) + 8 + 4), \ + ((((imm8) >> 2) & 3) + 8 + 4), \ + ((((imm8) >> 4) & 3) + 8 + 4), \ + ((((imm8) >> 6) & 3) + 8 + 4) \ + ) }); })) +#else +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflehi_epi16 + #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7, \ + ((((imm8) ) & 3) + 8), \ + ((((imm8) >> 2) & 3) + 8), \ + ((((imm8) >> 4) & 3) + 8), \ + ((((imm8) >> 6) & 3) + 8), \ + 12, 13, 14, 15) }); })) +#else +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflelo_epi16 + #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi8 + #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi16 + #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi32 + #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi16 + #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi32 + #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi64 + #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* Note: There is no consistency in how compilers handle values outside of + the expected range, hence the discrepancy between what we allow and what + Intel specifies. Some compilers will return 0, others seem to just mask + off everything outside of the range. */ + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { + r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi16 + #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { + r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi32 + #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi64 + #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) - imm8; + r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_si256 + #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); + r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi32 + #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi32 + #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); + r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi64 + #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi64 + #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi16 + #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi32 + #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi16 + #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi32 + #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_srav_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); + r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srav_epi32 + #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srav_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + count_ = simde__m256i_to_private(count); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); + r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + if (shift > 31) shift = 31; + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srav_epi32 + #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi16 + #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi32 + #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi64 + #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + if (imm8 > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { + r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); + } + #else + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi16 + #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { + r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi32 + #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi64 + #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = imm8 + HEDLEY_STATIC_CAST(int, i); + r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_si256 + #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi32 + #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi32 + #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi64 + #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi64 + #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi8 + #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi16 + #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi16(a, b); + #else + return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi16 + #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi32 + #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi32(a, b); + #else + return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi32 + #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi64 + #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi8 + #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi16 + #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsubs_epi16(a, b); + #else + return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsubs_epi16 + #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu8 + #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu16 + #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_x_mm256_test_all_ones (simde__m256i a) { + simde__m256i_private a_ = simde__m256i_to_private(a); + int r; + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi8 + #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi16 + #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 0, 8, 1, 9, 4, 12, 5, 13); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi32 + #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi64 + #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi8 + #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 4, 20, 5, 21, 6, 22, 7, 23, + 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi16 + #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 2, 10, 3, 11, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi32 + #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi64 + #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_xor_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_si256 + #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX2_H) */ +/* :: End simde/x86/avx2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi8 + #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi8 + #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi16 + #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi16 + #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi32 + #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi32 + #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi64 + #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi64 + #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_mask_add_ss(src, k, a, b); + #elif 1 + simde__m128_private + src_ = simde__m128_to_private(src), + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_ss + #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_maskz_add_ss(k, a, b); + #elif 1 + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_ss + #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_add_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi16 + #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_add_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi16 + #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi32 + #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi32 + #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi64 + #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi64 + #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi8 + #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi8 + #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi8 + #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi16 + #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi16 + #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi16 + #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_ARM_SVE_NATIVE) + const size_t n = sizeof(a_.i32) / sizeof(a_.i32[0]); + size_t i = 0; + svbool_t pg = svwhilelt_b32(i, n); + do { + svint32_t + va = svld1_s32(pg, &(a_.i32[i])), + vb = svld1_s32(pg, &(b_.i32[i])); + svst1_s32(pg, &(r_.i32[i]), svadd_s32_x(pg, va, vb)); + i += svcntw(); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi32 + #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi32 + #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi32 + #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi64 + #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi64 + #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi64 + #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_add_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_ps + #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_ps + #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_ps + #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_add_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_pd + #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_pd + #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_pd + #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ +/* :: End simde/x86/avx512/add.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_4dpwssd_epi32 (simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b); + #else + simde__m128i_private bv = simde__m128i_to_private(simde_mm_loadu_epi32(b)); + simde__m512i r; + + r = simde_mm512_dpwssd_epi32(src, a0, simde_mm512_set1_epi32(bv.i32[0])); + r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a1, simde_mm512_set1_epi32(bv.i32[1])), r); + r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a2, simde_mm512_set1_epi32(bv.i32[2])), r); + r = simde_mm512_add_epi32(simde_mm512_dpwssd_epi32(src, a3, simde_mm512_set1_epi32(bv.i32[3])), r); + + return r; + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_4dpwssd_epi32 + #define _mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b) simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_4dpwssd_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b)); + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_mask_4dpwssd_epi32 + #define _mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b) simde_mm512_mask_4dpwssd_epi32(src, k, a0, a1, a2, a3, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_4dpwssd_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_mask_4dpwssd_epi32(k, src, a0, a1, a2, a3, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_4dpwssd_epi32(src, a0, a1, a2, a3, b)); + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_maskz_4dpwssd_epi32 + #define _mm512_maskz_4dpwssd_epi32(k, src, a0, a1, a2, a3, b) simde_mm512_maskz_4dpwssd_epi32(k, src, a0, a1, a2, a3, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_4DPWSSD_H) */ +/* :: End simde/x86/avx512/4dpwssd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/4dpwssds.h :: */ +#if !defined(SIMDE_X86_AVX512_4DPWSSDS_H) +#define SIMDE_X86_AVX512_4DPWSSDS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dpwssds.h :: */ +#if !defined(SIMDE_X86_AVX512_DPWSSDS_H) +#define SIMDE_X86_AVX512_DPWSSDS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_dpwssds_epi32 (simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_dpwssds_epi32(src, a, b); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t x1_ SIMDE_VECTOR(32); + int32_t x2_ SIMDE_VECTOR(32); + simde__m128i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + a_.i16, a_.i16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + b_.i16, b_.i16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); + uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); + uint32_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) + ); + } + #endif + + return simde__m128i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_dpwssds_epi32 + #define _mm_dpwssds_epi32(src, a, b) simde_mm_dpwssds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_dpwssds_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_dpwssds_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dpwssds_epi32 + #define _mm_mask_dpwssds_epi32(src, k, a, b) simde_mm_mask_dpwssds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_dpwssds_epi32 (simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_dpwssds_epi32(k, src, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dpwssds_epi32 + #define _mm_maskz_dpwssds_epi32(k, src, a, b) simde_mm_maskz_dpwssds_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_dpwssds_epi32 (simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_dpwssds_epi32(src, a, b); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t x1_ SIMDE_VECTOR(64); + int32_t x2_ SIMDE_VECTOR(64); + simde__m256i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + a_.i16, a_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + b_.i16, b_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); + uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); + uint32_t ru SIMDE_VECTOR(32) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) + ); + } + #endif + + return simde__m256i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_dpwssds_epi32 + #define _mm256_dpwssds_epi32(src, a, b) simde_mm256_dpwssds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_dpwssds_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_dpwssds_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dpwssds_epi32 + #define _mm256_mask_dpwssds_epi32(src, k, a, b) simde_mm256_mask_dpwssds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_dpwssds_epi32 (simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_dpwssds_epi32(k, src, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dpwssds_epi32 + #define _mm256_maskz_dpwssds_epi32(k, src, a, b) simde_mm256_maskz_dpwssds_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_dpwssds_epi32 (simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_dpwssds_epi32(src, a, b); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t x1_ SIMDE_VECTOR(128); + int32_t x2_ SIMDE_VECTOR(128); + simde__m512i_private + r1_[2], + r2_[2]; + + a_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + a_.i16, a_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + b_.i16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + b_.i16, b_.i16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.i16); + SIMDE_CONVERT_VECTOR_(x2_, b_.i16); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), ((r1_[0].i32 * r2_[0].i32) + (r1_[1].i32 * r2_[1].i32))); + uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(src_.u32), src_.i32); + uint32_t ru SIMDE_VECTOR(64) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0]) / 2) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) ]) + + HEDLEY_STATIC_CAST(int32_t, a_.i16[(2 * i) + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[(2 * i) + 1]) + ); + } + #endif + + return simde__m512i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_dpwssds_epi32 + #define _mm512_dpwssds_epi32(src, a, b) simde_mm512_dpwssds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_dpwssds_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_mask_dpwssds_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dpwssds_epi32 + #define _mm512_mask_dpwssds_epi32(src, k, a, b) simde_mm512_mask_dpwssds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_dpwssds_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_maskz_dpwssds_epi32(k, src, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpwssds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dpwssds_epi32 + #define _mm512_maskz_dpwssds_epi32(k, src, a, b) simde_mm512_maskz_dpwssds_epi32(k, src, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DPWSSDS_H) */ +/* :: End simde/x86/avx512/dpwssds.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/adds.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_ADDS_H) +#define SIMDE_X86_AVX512_ADDS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_adds_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_adds_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_adds_epi8 + #define _mm_mask_adds_epi8(src, k, a, b) simde_mm_mask_adds_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_adds_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_adds_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_adds_epi8 + #define _mm_maskz_adds_epi8(k, a, b) simde_mm_maskz_adds_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_adds_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_adds_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_adds_epi16 + #define _mm_mask_adds_epi16(src, k, a, b) simde_mm_mask_adds_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_adds_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_adds_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_adds_epi16 + #define _mm_maskz_adds_epi16(k, a, b) simde_mm_maskz_adds_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_adds_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_adds_epi8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_adds_epi8 + #define _mm256_mask_adds_epi8(src, k, a, b) simde_mm256_mask_adds_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_adds_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_adds_epi8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_adds_epi8 + #define _mm256_maskz_adds_epi8(k, a, b) simde_mm256_maskz_adds_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_adds_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_adds_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_adds_epi16 + #define _mm256_mask_adds_epi16(src, k, a, b) simde_mm256_mask_adds_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_adds_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_adds_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_adds_epi16 + #define _mm256_maskz_adds_epi16(k, a, b) simde_mm256_maskz_adds_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_adds_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_adds_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_adds_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_adds_epi8 + #define _mm512_adds_epi8(a, b) simde_mm512_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_adds_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_adds_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_adds_epi8 + #define _mm512_mask_adds_epi8(src, k, a, b) simde_mm512_mask_adds_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_adds_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_adds_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_adds_epi8 + #define _mm512_maskz_adds_epi8(k, a, b) simde_mm512_maskz_adds_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_adds_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_adds_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_adds_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_adds_epi16 + #define _mm512_adds_epi16(a, b) simde_mm512_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_adds_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_adds_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_adds_epi16 + #define _mm512_mask_adds_epi16(src, k, a, b) simde_mm512_mask_adds_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_adds_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_adds_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_adds_epi16 + #define _mm512_maskz_adds_epi16(k, a, b) simde_mm512_maskz_adds_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_adds_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_adds_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_adds_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_adds_epu8 + #define _mm512_adds_epu8(a, b) simde_mm512_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_adds_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_adds_epu8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_adds_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_adds_epu8 + #define _mm512_mask_adds_epu8(src, k, a, b) simde_mm512_mask_adds_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_adds_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_adds_epu8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_adds_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_adds_epu8 + #define _mm512_maskz_adds_epu8(k, a, b) simde_mm512_maskz_adds_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_adds_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_adds_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_adds_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_adds_epu16 + #define _mm512_adds_epu16(a, b) simde_mm512_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_adds_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_adds_epu16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_adds_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_adds_epu16 + #define _mm512_mask_adds_epu16(src, k, a, b) simde_mm512_mask_adds_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_adds_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_adds_epu16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_adds_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_adds_epu16 + #define _mm512_maskz_adds_epu16(k, a, b) simde_mm512_maskz_adds_epu16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_adds_epi32(simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vqaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6) + r_.altivec_i32 = vec_adds(a_.altivec_i32, b_.altivec_i32); + #else + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/56544654/501126 */ + const __m128i int_max = _mm_set1_epi32(INT32_MAX); + + /* normal result (possibly wraps around) */ + const __m128i sum = _mm_add_epi32(a_.n, b_.n); + + /* If result saturates, it has the same sign as both a and b */ + const __m128i sign_bit = _mm_srli_epi32(a_.n, 31); /* shift sign to lowest bit */ + + #if defined(SIMDE_X86_AVX512VL_NATIVE) + const __m128i overflow = _mm_ternarylogic_epi32(a_.n, b_.n, sum, 0x42); + #else + const __m128i sign_xor = _mm_xor_si128(a_.n, b_.n); + const __m128i overflow = _mm_andnot_si128(sign_xor, _mm_xor_si128(a_.n, sum)); + #endif + + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + r_.n = _mm_mask_add_epi32(sum, _mm_movepi32_mask(overflow), int_max, sign_bit); + #else + const __m128i saturated = _mm_add_epi32(int_max, sign_bit); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + r_.n = + _mm_castps_si128( + _mm_blendv_ps( + _mm_castsi128_ps(sum), + _mm_castsi128_ps(saturated), + _mm_castsi128_ps(overflow) + ) + ); + #else + const __m128i overflow_mask = _mm_srai_epi32(overflow, 31); + r_.n = + _mm_or_si128( + _mm_and_si128(overflow_mask, saturated), + _mm_andnot_si128(overflow_mask, sum) + ); + #endif + #endif + #elif defined(SIMDE_VECTOR_SCALAR) + uint32_t au SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); + uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); + uint32_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); + } + #endif + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_adds_epi32(simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_adds_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SCALAR) + uint32_t au SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); + uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); + uint32_t ru SIMDE_VECTOR(32) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_adds_epi32(simde__m512i a, simde__m512i b) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_adds_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_adds_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SCALAR) + uint32_t au SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(au), a_.i32); + uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), b_.i32); + uint32_t ru SIMDE_VECTOR(64) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = simde_math_adds_i32(a_.i32[i], b_.i32[i]); + } + #endif + + return simde__m512i_from_private(r_); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ADDS_H) */ +/* :: End simde/x86/avx512/adds.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_4dpwssds_epi32 (simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b); + #else + simde__m128i_private bv = simde__m128i_to_private(simde_mm_loadu_epi32(b)); + simde__m512i r; + + r = simde_mm512_dpwssds_epi32(src, a0, simde_mm512_set1_epi32(bv.i32[0])); + r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a1, simde_mm512_set1_epi32(bv.i32[1])), r); + r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a2, simde_mm512_set1_epi32(bv.i32[2])), r); + r = simde_x_mm512_adds_epi32(simde_mm512_dpwssds_epi32(src, a3, simde_mm512_set1_epi32(bv.i32[3])), r); + + return r; + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_4dpwssds_epi32 + #define _mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b) simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_4dpwssds_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b)); + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_mask_4dpwssds_epi32 + #define _mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b) simde_mm512_mask_4dpwssds_epi32(src, k, a0, a1, a2, a3, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_4dpwssds_epi32 (simde__mmask16 k, simde__m512i src, simde__m512i a0, simde__m512i a1, simde__m512i a2, simde__m512i a3, simde__m128i* b) { + #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + return _mm512_mask_4dpwssds_epi32(k, src, a0, a1, a2, a3, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_4dpwssds_epi32(src, a0, a1, a2, a3, b)); + #endif +} +#if defined(SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES) + #undef simde_mm512_maskz_4dpwssds_epi32 + #define _mm512_maskz_4dpwssds_epi32(k, src, a0, a1, a2, a3, b) simde_mm512_maskz_4dpwssds_epi32(k, src, a0, a1, a2, a3, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_4DPWSSDS_H) */ +/* :: End simde/x86/avx512/4dpwssds.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/abs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_ABS_H) +#define SIMDE_X86_AVX512_ABS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_abs_epi8(src, k, a); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi8 + #define _mm_mask_abs_epi8(src, k, a) simde_mm_mask_abs_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi8(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_abs_epi8(k, a); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi8 + #define _mm_maskz_abs_epi8(k, a) simde_mm_maskz_abs_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_abs_epi16(src, k, a); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi16 + #define _mm_mask_abs_epi16(src, k, a) simde_mm_mask_abs_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi16(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_abs_epi16(k, a); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi16 + #define _mm_maskz_abs_epi16(k, a) simde_mm_maskz_abs_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_abs_epi32(src, k, a); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi32 + #define _mm_mask_abs_epi32(src, k, a) simde_mm_mask_abs_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi32(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_abs_epi32(k, a); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi32 + #define _mm_maskz_abs_epi32(k, a) simde_mm_maskz_abs_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi64(simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_abs_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31); + return _mm_sub_epi64(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vabsq_s64(a_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64x2_t m = vshrq_n_s64(a_.neon_i64, 63); + r_.neon_i64 = vsubq_s64(veorq_s64(a_.neon_i64, m), m); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_abs(a_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_abs(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i64) z = { 0, }; + __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); + r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_abs_epi64 + #define _mm_abs_epi64(a) simde_mm_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_abs_epi64(src, k, a); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi64 + #define _mm_mask_abs_epi64(src, k, a) simde_mm_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi64(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_abs_epi64(k, a); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi64 + #define _mm_maskz_abs_epi64(k, a) simde_mm_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi64(simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_abs_epi64(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_abs_epi64(a_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi64 + #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_abs_epi64(src, k, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_abs_epi64 + #define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_abs_epi64(k, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_abs_epi64 + #define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_abs_epi8(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi8(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi8 + #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_abs_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi8 + #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_abs_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi8 + #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi16 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_abs_epi16(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi16(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi16 + #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_abs_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi16 + #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_abs_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi16 + #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_abs_epi32(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi32(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi32 + #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_abs_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi32 + #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_abs_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi32 + #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_abs_epi64(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi64(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi64 + #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_abs_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi64 + #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_abs_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi64 + #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_abs_ps(simde__m512 v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + return _mm512_abs_ps(v2); + #else + simde__m512_private + r_, + v2_ = simde__m512_to_private(v2); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vabsq_f32(v2_.m128_private[i].neon_f32); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = vec_abs(v2_.m128_private[i].altivec_f32); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { + r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_ps + #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + return _mm512_mask_abs_ps(src, k, v2); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_ps + #define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_abs_pd(simde__m512d v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,3,0)) + return _mm512_abs_pd(v2); + #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ + return _mm512_abs_pd(_mm512_castpd_ps(v2)); + #else + simde__m512d_private + r_, + v2_ = simde__m512d_to_private(v2); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vabsq_f64(v2_.m128d_private[i].neon_f64); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = vec_abs(v2_.m128d_private[i].altivec_f64); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { + r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_pd + #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,3,0)) + return _mm512_mask_abs_pd(src, k, v2); + #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ + return _mm512_mask_abs_pd(src, k, _mm512_castpd_ps(v2)); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_pd + #define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ABS_H) */ +/* :: End simde/x86/avx512/abs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_AND_H) +#define SIMDE_X86_AVX512_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_and_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_pd + #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_and_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_ps + #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_ps + #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_ps + #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_pd + #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_pd + #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi32 + #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi32 + #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi32 + #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi64 + #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi64 + #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi64 + #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_si512 + #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ +/* :: End simde/x86/avx512/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/andnot.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_ANDNOT_H) +#define SIMDE_X86_AVX512_ANDNOT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) +#else + #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_ps + #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_ps + #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_ps + #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) +#else + #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_pd + #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_pd + #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_pd + #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_andnot_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) +#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_si512 + #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi32 + #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi64 + #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi32 + #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi32 + #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi64 + #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi64 + #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ +/* :: End simde/x86/avx512/andnot.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/avg.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_AVG_H) +#define SIMDE_X86_AVX512_AVG_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_avg_epu8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_avg_epu8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_avg_epu8 + #define _mm_mask_avg_epu8(src, k, a, b) simde_mm_mask_avg_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_avg_epu8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_avg_epu8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_avg_epu8 + #define _mm_maskz_avg_epu8(k, a, b) simde_mm_maskz_avg_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_avg_epu16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_avg_epu16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_avg_epu16 + #define _mm_mask_avg_epu16(src, k, a, b) simde_mm_mask_avg_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_avg_epu16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_avg_epu16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_avg_epu16 + #define _mm_maskz_avg_epu16(k, a, b) simde_mm_maskz_avg_epu16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_avg_epu8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_avg_epu8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_avg_epu8 + #define _mm256_mask_avg_epu8(src, k, a, b) simde_mm256_mask_avg_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_avg_epu8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_avg_epu8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_avg_epu8 + #define _mm256_maskz_avg_epu8(k, a, b) simde_mm256_maskz_avg_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_avg_epu16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_avg_epu16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_avg_epu16 + #define _mm256_mask_avg_epu16(src, k, a, b) simde_mm256_mask_avg_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_avg_epu16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_avg_epu16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_avg_epu16 + #define _mm256_maskz_avg_epu16(k, a, b) simde_mm256_maskz_avg_epu16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_avg_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_avg_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_avg_epu8 + #define _mm512_avg_epu8(a, b) simde_mm512_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_avg_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_avg_epu8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_avg_epu8 + #define _mm512_mask_avg_epu8(src, k, a, b) simde_mm512_mask_avg_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_avg_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_avg_epu8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_avg_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_avg_epu8 + #define _mm512_maskz_avg_epu8(k, a, b) simde_mm512_maskz_avg_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_avg_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_avg_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_avg_epu16 + #define _mm512_avg_epu16(a, b) simde_mm512_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_avg_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_avg_epu16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_avg_epu16 + #define _mm512_mask_avg_epu16(src, k, a, b) simde_mm512_mask_avg_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_avg_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_avg_epu16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_avg_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_avg_epu16 + #define _mm512_maskz_avg_epu16(k, a, b) simde_mm512_maskz_avg_epu16(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_AVG_H) */ +/* :: End simde/x86/avx512/avg.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/bitshuffle.h :: */ +#if !defined(SIMDE_X86_AVX512_BITSHUFFLE_H) +#define SIMDE_X86_AVX512_BITSHUFFLE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_bitshuffle_epi64_mask (simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_bitshuffle_epi64_mask(b, c); + #else + simde__m128i_private + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + simde__mmask16 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(b_.u64) rv = { 0, 0 }; + __typeof__(b_.u64) lshift = { 0, 8 }; + + for (int8_t i = 0 ; i < 8 ; i++) { + __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; + rv |= ((b_.u64 >> ct) & 1) << lshift; + lshift += 1; + } + + r = + HEDLEY_STATIC_CAST(simde__mmask16, rv[0]) | + HEDLEY_STATIC_CAST(simde__mmask16, rv[1]); + #else + for (size_t i = 0 ; i < (sizeof(c_.m64_private) / sizeof(c_.m64_private[0])) ; i++) { + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t j = 0 ; j < (sizeof(c_.m64_private[i].u8) / sizeof(c_.m64_private[i].u8[0])) ; j++) { + r |= (((b_.u64[i] >> (c_.m64_private[i].u8[j]) & 63) & 1) << ((i * 8) + j)); + } + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_bitshuffle_epi64_mask + #define _mm_bitshuffle_epi64_mask(b, c) simde_mm_bitshuffle_epi64_mask(b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_bitshuffle_epi64_mask (simde__mmask16 k, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_bitshuffle_epi64_mask(k, b, c); + #else + return (k & simde_mm_bitshuffle_epi64_mask(b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_bitshuffle_epi64_mask + #define _mm_mask_bitshuffle_epi64_mask(k, b, c) simde_mm_mask_bitshuffle_epi64_mask(k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_bitshuffle_epi64_mask (simde__m256i b, simde__m256i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_bitshuffle_epi64_mask(b, c); + #else + simde__m256i_private + b_ = simde__m256i_to_private(b), + c_ = simde__m256i_to_private(c); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < sizeof(b_.m128i) / sizeof(b_.m128i[0]) ; i++) { + r |= (HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_bitshuffle_epi64_mask(b_.m128i[i], c_.m128i[i])) << (i * 16)); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(b_.u64) rv = { 0, 0, 0, 0 }; + __typeof__(b_.u64) lshift = { 0, 8, 16, 24 }; + + for (int8_t i = 0 ; i < 8 ; i++) { + __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; + rv |= ((b_.u64 >> ct) & 1) << lshift; + lshift += 1; + } + + r = + HEDLEY_STATIC_CAST(simde__mmask32, rv[0]) | + HEDLEY_STATIC_CAST(simde__mmask32, rv[1]) | + HEDLEY_STATIC_CAST(simde__mmask32, rv[2]) | + HEDLEY_STATIC_CAST(simde__mmask32, rv[3]); + #else + for (size_t i = 0 ; i < (sizeof(c_.m128i_private) / sizeof(c_.m128i_private[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(c_.m128i_private[i].m64_private) / sizeof(c_.m128i_private[i].m64_private[0])) ; j++) { + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t k = 0 ; k < (sizeof(c_.m128i_private[i].m64_private[j].u8) / sizeof(c_.m128i_private[i].m64_private[j].u8[0])) ; k++) { + r |= (((b_.m128i_private[i].u64[j] >> (c_.m128i_private[i].m64_private[j].u8[k]) & 63) & 1) << ((i * 16) + (j * 8) + k)); + } + } + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_bitshuffle_epi64_mask + #define _mm256_bitshuffle_epi64_mask(b, c) simde_mm256_bitshuffle_epi64_mask(b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_bitshuffle_epi64_mask (simde__mmask32 k, simde__m256i b, simde__m256i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_bitshuffle_epi64_mask(k, b, c); + #else + return (k & simde_mm256_bitshuffle_epi64_mask(b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_bitshuffle_epi64_mask + #define _mm256_mask_bitshuffle_epi64_mask(k, b, c) simde_mm256_mask_bitshuffle_epi64_mask(k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_bitshuffle_epi64_mask (simde__m512i b, simde__m512i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_bitshuffle_epi64_mask(b, c); + #else + simde__m512i_private + b_ = simde__m512i_to_private(b), + c_ = simde__m512i_to_private(c); + simde__mmask64 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(b_.m128i) / sizeof(b_.m128i[0])) ; i++) { + r |= (HEDLEY_STATIC_CAST(simde__mmask64, simde_mm_bitshuffle_epi64_mask(b_.m128i[i], c_.m128i[i])) << (i * 16)); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(b_.m256i) / sizeof(b_.m256i[0])) ; i++) { + r |= (HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_bitshuffle_epi64_mask(b_.m256i[i], c_.m256i[i])) << (i * 32)); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(b_.u64) rv = { 0, 0, 0, 0, 0, 0, 0, 0 }; + __typeof__(b_.u64) lshift = { 0, 8, 16, 24, 32, 40, 48, 56 }; + + for (int8_t i = 0 ; i < 8 ; i++) { + __typeof__(b_.u64) ct = (HEDLEY_REINTERPRET_CAST(__typeof__(ct), c_.u8) >> (i * 8)) & 63; + rv |= ((b_.u64 >> ct) & 1) << lshift; + lshift += 1; + } + + r = + HEDLEY_STATIC_CAST(simde__mmask64, rv[0]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[1]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[2]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[3]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[4]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[5]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[6]) | + HEDLEY_STATIC_CAST(simde__mmask64, rv[7]); + #else + for (size_t i = 0 ; i < (sizeof(c_.m128i_private) / sizeof(c_.m128i_private[0])) ; i++) { + for (size_t j = 0 ; j < (sizeof(c_.m128i_private[i].m64_private) / sizeof(c_.m128i_private[i].m64_private[0])) ; j++) { + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t k = 0 ; k < (sizeof(c_.m128i_private[i].m64_private[j].u8) / sizeof(c_.m128i_private[i].m64_private[j].u8[0])) ; k++) { + r |= (((b_.m128i_private[i].u64[j] >> (c_.m128i_private[i].m64_private[j].u8[k]) & 63) & 1) << ((i * 16) + (j * 8) + k)); + } + } + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_bitshuffle_epi64_mask + #define _mm512_bitshuffle_epi64_mask(b, c) simde_mm512_bitshuffle_epi64_mask(b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_bitshuffle_epi64_mask (simde__mmask64 k, simde__m512i b, simde__m512i c) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_mask_bitshuffle_epi64_mask(k, b, c); + #else + return (k & simde_mm512_bitshuffle_epi64_mask(b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_bitshuffle_epi64_mask + #define _mm512_mask_bitshuffle_epi64_mask(k, b, c) simde_mm512_mask_bitshuffle_epi64_mask(k, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_BITSHUFFLE_H) */ +/* :: End simde/x86/avx512/bitshuffle.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/blend.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_BLEND_H) +#define SIMDE_X86_AVX512_BLEND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_blend_epi8(k, a, b); + #else + return simde_mm_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi8 + #define _mm_mask_blend_epi8(k, a, b) simde_mm_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_blend_epi16(k, a, b); + #else + return simde_mm_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi16 + #define _mm_mask_blend_epi16(k, a, b) simde_mm_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_epi32(k, a, b); + #else + return simde_mm_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi32 + #define _mm_mask_blend_epi32(k, a, b) simde_mm_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_epi64(k, a, b); + #else + return simde_mm_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi64 + #define _mm_mask_blend_epi64(k, a, b) simde_mm_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_blend_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_ps(k, a, b); + #else + return simde_mm_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_ps + #define _mm_mask_blend_ps(k, a, b) simde_mm_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_blend_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_pd(k, a, b); + #else + return simde_mm_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_pd + #define _mm_mask_blend_pd(k, a, b) simde_mm_mask_blend_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_blend_epi8(k, a, b); + #else + return simde_mm256_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi8 + #define _mm256_mask_blend_epi8(k, a, b) simde_mm256_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_blend_epi16(k, a, b); + #else + return simde_mm256_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi16 + #define _mm256_mask_blend_epi16(k, a, b) simde_mm256_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_epi32(k, a, b); + #else + return simde_mm256_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi32 + #define _mm256_mask_blend_epi32(k, a, b) simde_mm256_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_epi64(k, a, b); + #else + return simde_mm256_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi64 + #define _mm256_mask_blend_epi64(k, a, b) simde_mm256_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_blend_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_ps(k, a, b); + #else + return simde_mm256_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_ps + #define _mm256_mask_blend_ps(k, a, b) simde_mm256_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_blend_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_pd(k, a, b); + #else + return simde_mm256_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_pd + #define _mm256_mask_blend_pd(k, a, b) simde_mm256_mask_blend_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_blend_epi8(k, a, b); + #else + return simde_mm512_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi8 + #define _mm512_mask_blend_epi8(k, a, b) simde_mm512_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_blend_epi16(k, a, b); + #else + return simde_mm512_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi16 + #define _mm512_mask_blend_epi16(k, a, b) simde_mm512_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_epi32(k, a, b); + #else + return simde_mm512_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi32 + #define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_epi64(k, a, b); + #else + return simde_mm512_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi64 + #define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_ps(k, a, b); + #else + return simde_mm512_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_ps + #define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_pd(k, a, b); + #else + return simde_mm512_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_pd + #define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_BLEND_H) */ +/* :: End simde/x86/avx512/blend.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/broadcast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_BROADCAST_H) +#define SIMDE_X86_AVX512_BROADCAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_f32x2 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_broadcast_f32x2(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f32x2 + #define _mm256_broadcast_f32x2(a) simde_mm256_broadcast_f32x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_broadcast_f32x2(simde__m256 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_mask_broadcast_f32x2(src, k, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f32x2 + #define _mm256_mask_broadcast_f32x2(src, k, a) simde_mm256_mask_broadcast_f32x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_broadcast_f32x2(simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_maskz_broadcast_f32x2(k, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f32x2 + #define _mm256_maskz_broadcast_f32x2(k, a) simde_mm256_maskz_broadcast_f32x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x2 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f32x2(a); + #else + simde__m512_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x2 + #define _mm512_broadcast_f32x2(a) simde_mm512_broadcast_f32x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x2(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f32x2(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x2 + #define _mm512_mask_broadcast_f32x2(src, k, a) simde_mm512_mask_broadcast_f32x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x2(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f32x2(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x2 + #define _mm512_maskz_broadcast_f32x2(k, a) simde_mm512_maskz_broadcast_f32x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x8 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f32x8(a); + #else + simde__m512_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=8) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + r_.f32[i + 2] = a_.f32[2]; + r_.f32[i + 3] = a_.f32[3]; + r_.f32[i + 4] = a_.f32[4]; + r_.f32[i + 5] = a_.f32[5]; + r_.f32[i + 6] = a_.f32[6]; + r_.f32[i + 7] = a_.f32[7]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x8 + #define _mm512_broadcast_f32x8(a) simde_mm512_broadcast_f32x8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x8(simde__m512 src, simde__mmask16 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f32x8(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x8 + #define _mm512_mask_broadcast_f32x8(src, k, a) simde_mm512_mask_broadcast_f32x8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x8(simde__mmask16 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f32x8(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x8 + #define _mm512_maskz_broadcast_f32x8(k, a) simde_mm512_maskz_broadcast_f32x8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcast_f64x2 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f64x2(a); + #else + simde__m512d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[0]; + r_.f64[i + 1] = a_.f64[1]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f64x2 + #define _mm512_broadcast_f64x2(a) simde_mm512_broadcast_f64x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcast_f64x2(simde__m512d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f64x2(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f64x2 + #define _mm512_mask_broadcast_f64x2(src, k, a) simde_mm512_mask_broadcast_f64x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f64x2(k, a); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f64x2 + #define _mm512_maskz_broadcast_f64x2(k, a) simde_mm512_maskz_broadcast_f64x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_f32x4 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_broadcast_f32x4(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = a_; + r_.m128_private[1] = a_; + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 4) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + r_.f32[i + 2] = a_.f32[2]; + r_.f32[i + 3] = a_.f32[3]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f32x4 + #define _mm256_broadcast_f32x4(a) simde_mm256_broadcast_f32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_broadcast_f32x4(simde__m256 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_broadcast_f32x4(src, k, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f32x4 + #define _mm256_mask_broadcast_f32x4(src, k, a) simde_mm256_mask_broadcast_f32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_broadcast_f32x4(simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_broadcast_f32x4(k, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f32x4 + #define _mm256_maskz_broadcast_f32x4(k, a) simde_mm256_maskz_broadcast_f32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_f64x2 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_broadcast_f64x2(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + /* I don't have a bug # for this, but when compiled with clang-10 without optimization on aarch64 + * the __builtin_shufflevector version doesn't work correctly. clang 9 and 11 aren't a problem */ + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION < 100000 || SIMDE_DETECT_CLANG_VERSION > 100000)) + r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[0]; + r_.f64[i + 1] = a_.f64[1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f64x2 + #define _mm256_broadcast_f64x2(a) simde_mm256_broadcast_f64x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_broadcast_f64x2(simde__m256d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_mask_broadcast_f64x2(src, k, a); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f64x2 + #define _mm256_mask_broadcast_f64x2(src, k, a) simde_mm256_mask_broadcast_f64x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_maskz_broadcast_f64x2(k, a); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f64x2 + #define _mm256_maskz_broadcast_f64x2(k, a) simde_mm256_maskz_broadcast_f64x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x4 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_f32x4(a); + #else + simde__m512_private r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = a; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x4 + #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_f32x4(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x4 + #define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_f32x4(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x4 + #define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcast_f64x4 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_f64x4(a); + #else + simde__m512d_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = a; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f64x4 + #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_f64x4(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f64x4 + #define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_f64x4(k, a); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f64x4 + #define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcast_i32x4 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_i32x4(a); + #else + simde__m512i_private r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = a; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_i32x4 + #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_i32x4(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_i32x4 + #define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_i32x4(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_i32x4 + #define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcast_i64x4 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_i64x4(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_i64x4 + #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_i64x4(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_i64x4 + #define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_i64x4(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_i64x4 + #define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastd_epi32(a); + #else + simde__m512i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastd_epi32 + #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastd_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastd_epi32 + #define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastd_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastd_epi32 + #define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastq_epi64(a); + #else + simde__m512i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastq_epi64 + #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastq_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastq_epi64 + #define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastq_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastq_epi64 + #define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastss_ps(a); + #else + simde__m512_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastss_ps + #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastss_ps(src, k, a); + #else + simde__m512_private + src_ = simde__m512_to_private(src), + r_; + simde__m128_private + a_ = simde__m128_to_private(a); + + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastss_ps + #define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastss_ps(k, a); + #else + simde__m512_private + r_; + simde__m128_private + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastss_ps + #define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastsd_pd(a); + #else + simde__m512d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastsd_pd + #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastsd_pd(src, k, a); + #else + simde__m512d_private + src_ = simde__m512d_to_private(src), + r_; + simde__m128d_private + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastsd_pd + #define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastsd_pd(k, a); + #else + simde__m512d_private + r_; + simde__m128d_private + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastsd_pd + #define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_broadcastb_epi8(a); + #else + simde__m128i_private a_= simde__m128i_to_private(a); + return simde_mm512_set1_epi8(a_.i8[0]); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastb_epi8 + #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_broadcastb_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastb_epi8 + #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_broadcastb_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastb_epi8 + #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_broadcastw_epi16(a); + #else + simde__m128i_private a_= simde__m128i_to_private(a); + return simde_mm512_set1_epi16(a_.i16[0]); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastw_epi16 + #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_BROADCAST_H) */ +/* :: End simde/x86/avx512/broadcast.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmp.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_CMP_H) +#define SIMDE_X86_AVX512_CMP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov_mask.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) +#define SIMDE_X86_AVX512_MOV_MASK_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_movepi8_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi8_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask16 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi8_mask + #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi16_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi16_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* There is no 32-bit _mm_movemask_* function, so we use + * _mm_movemask_epi8 then extract the odd bits. */ + uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); + r = ( (r >> 1)) & UINT32_C(0x5555); + r = (r | (r >> 1)) & UINT32_C(0x3333); + r = (r | (r >> 2)) & UINT32_C(0x0f0f); + r = (r | (r >> 4)) & UINT32_C(0x00ff); + return HEDLEY_STATIC_CAST(simde__mmask8, r); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi16_mask + #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi32_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi32_mask(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi32_mask + #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi64_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi64_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi64_mask + #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_movepi8_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi8_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask32, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi8_mask + #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_movepi16_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi16_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi16_mask + #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi32_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi32_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi32_mask + #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi64_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi64_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi64_mask + #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_movepi8_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi8_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask64 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); + } + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask64, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi8_mask + #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_movepi16_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi16_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi16_mask + #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_movepi32_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi32_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi32_mask + #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_movepi64_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi64_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi64_mask + #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ +/* :: End simde/x86/avx512/mov_mask.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setzero.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SETZERO_H) +#define SIMDE_X86_AVX512_SETZERO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setzero_si512(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_si512(); + #else + simde__m512i r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_si512 + #define _mm512_setzero_si512() simde_mm512_setzero_si512() + #undef _mm512_setzero_epi32 + #define _mm512_setzero_epi32() simde_mm512_setzero_si512() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setzero_ps(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_ps(); + #else + return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_ps + #define _mm512_setzero_ps() simde_mm512_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setzero_pd(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_pd(); + #else + return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_pd + #define _mm512_setzero_pd() simde_mm512_setzero_pd() +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ +/* :: End simde/x86/avx512/setzero.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setone.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SETONE_H) +#define SIMDE_X86_AVX512_SETONE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_setone_si512(void) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } + + return simde__m512i_from_private(r_); +} +#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_setone_ps(void) { + return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_setone_pd(void) { + return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ +/* :: End simde/x86/avx512/setone.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512_to_private(simde_mm512_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_ps_mask + #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps_mask + #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps_mask + #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_pd_mask + #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd_mask + #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd_mask + #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + switch (imm8) { + case SIMDE_MM_CMPINT_EQ: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_FALSE: + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); + break; + + + case SIMDE_MM_CMPINT_NE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_TRUE: + r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_epu16_mask + #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) +#else + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmp_epu16_mask +#define _mm512_mask_cmp_epu16_mask(a, b, imm8) simde_mm512_mask_cmp_epu16_mask((a), (b), (imm8)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ +/* :: End simde/x86/avx512/cmp.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpeq.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_CMPEQ_H) +#define SIMDE_X86_AVX512_CMPEQ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpeq_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[i], b_.m256i[i]))); + r |= HEDLEY_STATIC_CAST(uint64_t, t) << (i * 32); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi8_mask + #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpeq_epi8_mask(simde__mmask64 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpeq_epi8_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi8_mask + #define _mm512_mask_cmpeq_epi8_mask(k1, a, b) simde_mm512_mask_cmpeq_epi8_mask((k1), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpeq_epi32_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi32_mask + #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpeq_epi32_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi32_mask + #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpeq_epi64_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi64_mask + #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpeq_epi64_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi64_mask + #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpeq_epu16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpeq_epu16_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask32 r; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.u16), a_.u16 == b_.u16); + r = simde_mm512_movepi16_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r |= (a_.u16[i] == b_.u16[i]) ? (UINT16_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epu31_mask + #define _mm512_cmpeq_epu32_mask(a, b) simde_mm512_cmpeq_epu32_mask(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpeq_epu16_mask(simde__mmask32 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpeq_epu16_mask(k1, a, b); + #else + return k1 & simde_mm512_cmpeq_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epu16_mask + #define _mm512_mask_cmpeq_epu16_mask(k1, a, b) simde_mm512_mask_cmpeq_epu16_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { + return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_ps_mask + #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { + return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_pd_mask + #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPEQ_H) */ +/* :: End simde/x86/avx512/cmpeq.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpge.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Christopher Moore + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_CMPGE_H) +#define SIMDE_X86_AVX512_CMPGE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/movm.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_MOVM_H) +#define SIMDE_X86_AVX512_MOVM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi8 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi8(k); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + const simde__m128i zero = simde_mm_setzero_si128(); + const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); + const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_shuffle_epi8(r, shuffle); + r = simde_mm_cmpgt_epi8(zero, r); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int8_t pos_data[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + int8x8_t pos = vld1_s8(pos_data); + r_.neon_i8 = vcombine_s8( + vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k)), pos), 7), + vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k >> 8)), pos), 7)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi8 + #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi8 (simde__mmask32 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi8(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const simde__m256i zero = simde_mm256_setzero_si256(); + const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); + const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); + simde__m256i r; + + r = simde_mm256_set_m128i(_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), _mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); + r = simde_mm256_mullo_epi16(r, bits); + r = simde_mm256_shuffle_epi8(r, shuffle); + r = simde_mm256_cmpgt_epi8(zero, r); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); + r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi8 + #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi8 (simde__mmask64 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movm_epi8(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); + r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi8 + #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi16 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi16(k); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi16(r, 15); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int16_t pos_data[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; + const int16x8_t pos = vld1q_s16(pos_data); + r_.neon_i16 = vshrq_n_s16(vshlq_s16(vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, k)), pos), 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi16 + #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi16 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi16(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i bits = _mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); + __m256i r; + + r = _mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = _mm256_mullo_epi16(r, bits); + r = _mm256_srai_epi16(r, 15); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); + r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi16 + #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi16 (simde__mmask32 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm512_movm_epi16(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); + r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi16 + #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi32 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movm_epi32(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i shifts = _mm_set_epi32(28, 29, 30, 31); + __m128i r; + + r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm_sllv_epi32(r, shifts); + r = _mm_srai_epi32(r, 31); + + return r; + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi32(r, 31); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int32_t pos_data[] = { 31, 30, 29, 28 }; + const int32x4_t pos = vld1q_s32(pos_data); + r_.neon_i32 = vshrq_n_s32(vshlq_s32(vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, k)), pos), 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi32 + #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi32 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movm_epi32(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i shifts = _mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); + __m256i r; + + r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm256_sllv_epi32(r, shifts); + r = _mm256_srai_epi32(r, 31); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi32(k ); + r_.m128i[1] = simde_mm_movm_epi32(k >> 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi32 + #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi32 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movm_epi32(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); + r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi32 + #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi64(k); + /* N.B. CM: These fallbacks may not be faster as there are only two elements */ + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i shifts = _mm_set_epi32(30, 30, 31, 31); + __m128i r; + + r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm_sllv_epi32(r, shifts); + r = _mm_srai_epi32(r, 31); + + return r; + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi32(r, 31); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int64_t pos_data[] = { 63, 62 }; + const int64x2_t pos = vld1q_s64(pos_data); + r_.neon_i64 = vshrq_n_s64(vshlq_s64(vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, k)), pos), 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi64 + #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi64(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i shifts = _mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); + __m256i r; + + r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm256_sllv_epi32(r, shifts); + r = _mm256_srai_epi32(r, 31); + + return r; + #else + simde__m256i_private r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi64(k ); + r_.m128i[1] = simde_mm_movm_epi64(k >> 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi64 + #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movm_epi64(k); + #else + simde__m512i_private r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi64(k ); + r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi64 + #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOVM_H) */ +/* :: End simde/x86/avx512/movm.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmpge_epi8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpge(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpge_epi8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epi8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpge_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi8_mask + #define _mm_mask_cmpge_epi8_mask(src, k, a, b) simde_mm_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmpge_epi8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpge_epi8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epi8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpge_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi8_mask + #define _mm256_mask_cmpge_epi8_mask(src, k, a, b) simde_mm256_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmpge_epi8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epi8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpge_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi8_mask + #define _mm512_mask_cmpge_epi8_mask(src, k, a, b) simde_mm512_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmpge_epu8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a_.altivec_u8, b_.altivec_u8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpge_epu8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epu8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpge_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu8_mask + #define _mm_mask_cmpge_epu8_mask(src, k, a, b) simde_mm_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmpge_epu8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpge_epu8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epu8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpge_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu8_mask + #define _mm256_mask_cmpge_epu8_mask(src, k, a, b) simde_mm256_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmpge_epu8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epu8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpge_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu8_mask + #define _mm512_mask_cmpge_epu8_mask(src, k, a, b) simde_mm512_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmpge_epi16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpge(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epi16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi16_mask + #define _mm_mask_cmpge_epi16_mask(src, k, a, b) simde_mm_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmpge_epi16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpge_epi16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epi16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpge_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi16_mask + #define _mm256_mask_cmpge_epi16_mask(src, k, a, b) simde_mm256_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmpge_epi16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpge_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epi16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpge_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi16_mask + #define _mm512_mask_cmpge_epi16_mask(src, k, a, b) simde_mm512_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmpge_epu16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a_.altivec_u16, b_.altivec_u16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epu16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu16_mask + #define _mm_mask_cmpge_epu16_mask(src, k, a, b) simde_mm_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmpge_epu16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpge_epu16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epu16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpge_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu16_mask + #define _mm256_mask_cmpge_epu16_mask(src, k, a, b) simde_mm256_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmpge_epu16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpge_epu16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epu16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpge_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu16_mask + #define _mm512_mask_cmpge_epu16_mask(src, k, a, b) simde_mm512_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmpge_epi32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpge(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epi32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi32_mask + #define _mm_mask_cmpge_epi32_mask(src, k, a, b) simde_mm_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmpge_epi32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epi32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epi32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi32_mask + #define _mm256_mask_cmpge_epi32_mask(src, k, a, b) simde_mm256_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmpge_epi32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpge_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epi32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpge_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi32_mask + #define _mm512_mask_cmpge_epi32_mask(src, k, a, b) simde_mm512_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmpge_epu32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a_.altivec_u32, b_.altivec_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epu32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu32_mask + #define _mm_mask_cmpge_epu32_mask(src, k, a, b) simde_mm_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmpge_epu32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epu32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epu32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu32_mask + #define _mm256_mask_cmpge_epu32_mask(src, k, a, b) simde_mm256_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmpge_epu32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpge_epu32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epu32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpge_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu32_mask + #define _mm512_mask_cmpge_epu32_mask(src, k, a, b) simde_mm512_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmpge_epi64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpge(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epi64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpge_epi64_mask + #define _mm_cmpge_epi64_mask(a, b) simde_mm_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi64_mask + #define _mm_mask_cmpge_epi64_mask(src, k, a, b) simde_mm_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmpge_epi64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epi64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epi64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpge_epi64_mask + #define _mm256_cmpge_epi64_mask(a, b) simde_mm256_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi64_mask + #define _mm256_mask_cmpge_epi64_mask(src, k, a, b) simde_mm256_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmpge_epi64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpge_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epi64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi64_mask + #define _mm512_cmpge_epi64_mask(a, b) simde_mm512_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi64_mask + #define _mm512_mask_cmpge_epi64_mask(src, k, a, b) simde_mm512_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmpge_epu64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a_.altivec_u64, b_.altivec_u64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epu64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu64_mask + #define _mm_mask_cmpge_epu64_mask(src, k, a, b) simde_mm_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmpge_epu64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epu64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epu64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu64_mask + #define _mm256_mask_cmpge_epu64_mask(src, k, a, b) simde_mm256_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmpge_epu64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpge_epu64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epu64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu64_mask + #define _mm512_mask_cmpge_epu64_mask(src, k, a, b) simde_mm512_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPGE_H) */ +/* :: End simde/x86/avx512/cmpge.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpgt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CMPGT_H) +#define SIMDE_X86_AVX512_CMPGT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[i], b_.m256i[i]))); + r |= HEDLEY_STATIC_CAST(uint64_t, t) << HEDLEY_STATIC_CAST(uint64_t, i * 32); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi8_mask + #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epu8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epu8_mask + #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpgt_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epi16_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi16(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi16_mask + #define _mm512_cmpgt_epi16_mask(a, b) simde_mm512_cmpgt_epi16_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpgt_epi32_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi32_mask + #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpgt_epi32_mask(k1, a, b); + #else + return simde_mm512_cmpgt_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpgt_epi32_mask + #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpgt_epi64_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi64_mask + #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpgt_epi64_mask(k1, a, b); + #else + return simde_mm512_cmpgt_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpgt_epi64_mask + #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPGT_H) */ +/* :: End simde/x86/avx512/cmpgt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmple.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_CMPLE_H) +#define SIMDE_X86_AVX512_CMPLE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmple_epi8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmple(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmple_epi8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmple_epi8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmple_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi8_mask + #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmple_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmple_epi8_mask(k, a, b); + #else + return k & simde_mm_cmple_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epi8_mask + #define _mm_mask_cmple_epi8_mask(src, k, a, b) simde_mm_mask_cmple_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmple_epi8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmple_epi8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmple_epi8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmple_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi8_mask + #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmple_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmple_epi8_mask(k, a, b); + #else + return k & simde_mm256_cmple_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epi8_mask + #define _mm256_mask_cmple_epi8_mask(src, k, a, b) simde_mm256_mask_cmple_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmple_epi8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epi8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmple_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmple_epi8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmple_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi8_mask + #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmple_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmple_epi8_mask(k, a, b); + #else + return k & simde_mm512_cmple_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epi8_mask + #define _mm512_mask_cmple_epi8_mask(src, k, a, b) simde_mm512_mask_cmple_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmple_epu8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmple(a_.altivec_u8, b_.altivec_u8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmple_epu8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmple_epu8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmple_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu8_mask + #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmple_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmple_epu8_mask(k, a, b); + #else + return k & simde_mm_cmple_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epu8_mask + #define _mm_mask_cmple_epu8_mask(src, k, a, b) simde_mm_mask_cmple_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmple_epu8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmple_epu8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmple_epu8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmple_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu8_mask + #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmple_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmple_epu8_mask(k, a, b); + #else + return k & simde_mm256_cmple_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epu8_mask + #define _mm256_mask_cmple_epu8_mask(src, k, a, b) simde_mm256_mask_cmple_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmple_epu8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epu8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmple_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmple_epu8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmple_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu8_mask + #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmple_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmple_epu8_mask(k, a, b); + #else + return k & simde_mm512_cmple_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epu8_mask + #define _mm512_mask_cmple_epu8_mask(src, k, a, b) simde_mm512_mask_cmple_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmple_epi16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmple(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epi16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmple_epi16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmple_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi16_mask + #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmple_epi16_mask(k, a, b); + #else + return k & simde_mm_cmple_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epi16_mask + #define _mm_mask_cmple_epi16_mask(src, k, a, b) simde_mm_mask_cmple_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmple_epi16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmple_epi16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmple_epi16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmple_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi16_mask + #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmple_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmple_epi16_mask(k, a, b); + #else + return k & simde_mm256_cmple_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epi16_mask + #define _mm256_mask_cmple_epi16_mask(src, k, a, b) simde_mm256_mask_cmple_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmple_epi16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epi16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmple_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmple_epi16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmple_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi16_mask + #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmple_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmple_epi16_mask(k, a, b); + #else + return k & simde_mm512_cmple_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epi16_mask + #define _mm512_mask_cmple_epi16_mask(src, k, a, b) simde_mm512_mask_cmple_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmple_epu16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmple(a_.altivec_u16, b_.altivec_u16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epu16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmple_epu16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmple_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu16_mask + #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmple_epu16_mask(k, a, b); + #else + return k & simde_mm_cmple_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epu16_mask + #define _mm_mask_cmple_epu16_mask(src, k, a, b) simde_mm_mask_cmple_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmple_epu16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmple_epu16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmple_epu16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmple_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu16_mask + #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmple_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmple_epu16_mask(k, a, b); + #else + return k & simde_mm256_cmple_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epu16_mask + #define _mm256_mask_cmple_epu16_mask(src, k, a, b) simde_mm256_mask_cmple_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmple_epu16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epu16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmple_epu16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmple_epu16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmple_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu16_mask + #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmple_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmple_epu16_mask(k, a, b); + #else + return k & simde_mm512_cmple_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epu16_mask + #define _mm512_mask_cmple_epu16_mask(src, k, a, b) simde_mm512_mask_cmple_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmple_epi32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmple(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epi32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmple_epi32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmple_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi32_mask + #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmple_epi32_mask(k, a, b); + #else + return k & simde_mm_cmple_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epi32_mask + #define _mm_mask_cmple_epi32_mask(src, k, a, b) simde_mm_mask_cmple_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmple_epi32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmple_epi32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmple_epi32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmple_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi32_mask + #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmple_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmple_epi32_mask(k, a, b); + #else + return k & simde_mm256_cmple_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epi32_mask + #define _mm256_mask_cmple_epi32_mask(src, k, a, b) simde_mm256_mask_cmple_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmple_epi32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmple_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmple_epi32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmple_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi32_mask + #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmple_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmple_epi32_mask(k, a, b); + #else + return k & simde_mm512_cmple_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epi32_mask + #define _mm512_mask_cmple_epi32_mask(src, k, a, b) simde_mm512_mask_cmple_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmple_epu32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmple(a_.altivec_u32, b_.altivec_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epu32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmple_epu32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmple_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu32_mask + #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmple_epu32_mask(k, a, b); + #else + return k & simde_mm_cmple_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epu32_mask + #define _mm_mask_cmple_epu32_mask(src, k, a, b) simde_mm_mask_cmple_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmple_epu32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmple_epu32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmple_epu32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmple_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu32_mask + #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmple_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmple_epu32_mask(k, a, b); + #else + return k & simde_mm256_cmple_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epu32_mask + #define _mm256_mask_cmple_epu32_mask(src, k, a, b) simde_mm256_mask_cmple_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmple_epu32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epu32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmple_epu32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmple_epu32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmple_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu32_mask + #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmple_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmple_epu32_mask(k, a, b); + #else + return k & simde_mm512_cmple_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epu32_mask + #define _mm512_mask_cmple_epu32_mask(src, k, a, b) simde_mm512_mask_cmple_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmple_epi64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmple(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epi64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmple_epi64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmple_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmple_epi64_mask + #define _mm_cmple_epi64_mask(a, b) simde_mm_cmple_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmple_epi64_mask(k, a, b); + #else + return k & simde_mm_cmple_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epi64_mask + #define _mm_mask_cmple_epi64_mask(src, k, a, b) simde_mm_mask_cmple_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmple_epi64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmple_epi64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmple_epi64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmple_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmple_epi64_mask + #define _mm256_cmple_epi64_mask(a, b) simde_mm256_cmple_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmple_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmple_epi64_mask(k, a, b); + #else + return k & simde_mm256_cmple_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epi64_mask + #define _mm256_mask_cmple_epi64_mask(src, k, a, b) simde_mm256_mask_cmple_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmple_epi64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmple_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmple_epi64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmple_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epi64_mask + #define _mm512_cmple_epi64_mask(a, b) simde_mm512_cmple_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmple_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmple_epi64_mask(k, a, b); + #else + return k & simde_mm512_cmple_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epi64_mask + #define _mm512_mask_cmple_epi64_mask(src, k, a, b) simde_mm512_mask_cmple_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmple_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmple_epu64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmple(a_.altivec_u64, b_.altivec_u64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmple_epu64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmple_epu64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmple_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu64_mask + #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmple_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmple_epu64_mask(k, a, b); + #else + return k & simde_mm_cmple_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmple_epu64_mask + #define _mm_mask_cmple_epu64_mask(src, k, a, b) simde_mm_mask_cmple_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmple_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmple_epu64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmple_epu64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmple_epu64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmple_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu64_mask + #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmple_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmple_epu64_mask(k, a, b); + #else + return k & simde_mm256_cmple_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmple_epu64_mask + #define _mm256_mask_cmple_epu64_mask(src, k, a, b) simde_mm256_mask_cmple_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmple_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmple_epu64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmple_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmple_epu64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmple_epu64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmple_epu64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmple_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmple_epu64_mask + #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmple_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmple_epu64_mask(k, a, b); + #else + return k & simde_mm512_cmple_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmple_epu64_mask + #define _mm512_mask_cmple_epu64_mask(src, k, a, b) simde_mm512_mask_cmple_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPLE_H) */ +/* :: End simde/x86/avx512/cmple.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmplt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_CMPLT_H) +#define SIMDE_X86_AVX512_CMPLT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { + return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_ps_mask + #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { + return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_pd_mask + #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmplt_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_epi8_mask + #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmplt_epu8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_epu8_mask + #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPLT_H) */ +/* :: End simde/x86/avx512/cmplt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpneq.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_CMPNEQ_H) +#define SIMDE_X86_AVX512_CMPNEQ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpneq_epi8_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpneq_epi8_mask(a, b); + #else + return ~simde_mm_movepi8_mask(simde_mm_cmpeq_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epi8_mask + #define _mm_cmpneq_epi8_mask(a, b) simde_mm_cmpneq_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpneq_epi8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpneq_epi8_mask(k1, a, b); + #else + return simde_mm_cmpneq_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epi8_mask + #define _mm_mask_cmpneq_epi8_mask(a, b) simde_mm_mask_cmpneq_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpneq_epu8_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpneq_epu8_mask(a, b); + #else + return simde_mm_cmpneq_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epu8_mask + #define _mm_cmpneq_epu8_mask(a, b) simde_mm_cmpneq_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpneq_epu8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpneq_epu8_mask(k1, a, b); + #else + return simde_mm_mask_cmpneq_epi8_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epu8_mask + #define _mm_mask_cmpneq_epu8_mask(a, b) simde_mm_mask_cmpneq_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epi16_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpneq_epi16_mask(a, b); + #else + return ~simde_mm_movepi16_mask(simde_mm_cmpeq_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epi16_mask + #define _mm_cmpneq_epi16_mask(a, b) simde_mm_cmpneq_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epi16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpneq_epi16_mask(k1, a, b); + #else + return simde_mm_cmpneq_epi16_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epi16_mask + #define _mm_mask_cmpneq_epi16_mask(a, b) simde_mm_mask_cmpneq_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epu16_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpneq_epu16_mask(a, b); + #else + return simde_mm_cmpneq_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epu16_mask + #define _mm_cmpneq_epu16_mask(a, b) simde_mm_cmpneq_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epu16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpneq_epu16_mask(k1, a, b); + #else + return simde_mm_mask_cmpneq_epi16_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epu16_mask + #define _mm_mask_cmpneq_epu16_mask(a, b) simde_mm_mask_cmpneq_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epi32_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpneq_epi32_mask(a, b); + #else + return (~simde_mm_movepi32_mask(simde_mm_cmpeq_epi32(a, b))) & 15; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epi32_mask + #define _mm_cmpneq_epi32_mask(a, b) simde_mm_cmpneq_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpneq_epi32_mask(k1, a, b); + #else + return simde_mm_cmpneq_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epi32_mask + #define _mm_mask_cmpneq_epi32_mask(a, b) simde_mm_mask_cmpneq_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epu32_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpneq_epu32_mask(a, b); + #else + return simde_mm_cmpneq_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epu32_mask + #define _mm_cmpneq_epu32_mask(a, b) simde_mm_cmpneq_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpneq_epu32_mask(k1, a, b); + #else + return simde_mm_mask_cmpneq_epi32_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epu32_mask + #define _mm_mask_cmpneq_epu32_mask(a, b) simde_mm_mask_cmpneq_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epi64_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpneq_epi64_mask(a, b); + #else + return (~simde_mm_movepi64_mask(simde_mm_cmpeq_epi64(a, b))) & 3; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epi64_mask + #define _mm_cmpneq_epi64_mask(a, b) simde_mm_cmpneq_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpneq_epi64_mask(k1, a, b); + #else + return simde_mm_cmpneq_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epi64_mask + #define _mm_mask_cmpneq_epi64_mask(a, b) simde_mm_mask_cmpneq_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpneq_epu64_mask(simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpneq_epu64_mask(a, b); + #else + return simde_mm_cmpneq_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpneq_epu64_mask + #define _mm_cmpneq_epu64_mask(a, b) simde_mm_cmpneq_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpneq_epu64_mask(k1, a, b); + #else + return simde_mm_mask_cmpneq_epi64_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpneq_epu64_mask + #define _mm_mask_cmpneq_epu64_mask(a, b) simde_mm_mask_cmpneq_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpneq_epi8_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpneq_epi8_mask(a, b); + #else + return ~simde_mm256_movepi8_mask(simde_mm256_cmpeq_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epi8_mask + #define _mm256_cmpneq_epi8_mask(a, b) simde_mm256_cmpneq_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpneq_epi8_mask(simde__mmask32 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpneq_epi8_mask(k1, a, b); + #else + return simde_mm256_cmpneq_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epi8_mask + #define _mm256_mask_cmpneq_epi8_mask(a, b) simde_mm256_mask_cmpneq_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpneq_epu8_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpneq_epu8_mask(a, b); + #else + return simde_mm256_cmpneq_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epu8_mask + #define _mm256_cmpneq_epu8_mask(a, b) simde_mm256_cmpneq_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpneq_epu8_mask(simde__mmask32 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpneq_epu8_mask(k1, a, b); + #else + return simde_mm256_mask_cmpneq_epi8_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epu8_mask + #define _mm256_mask_cmpneq_epu8_mask(a, b) simde_mm256_mask_cmpneq_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpneq_epi16_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpneq_epi16_mask(a, b); + #else + return ~simde_mm256_movepi16_mask(simde_mm256_cmpeq_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epi16_mask + #define _mm256_cmpneq_epi16_mask(a, b) simde_mm256_cmpneq_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpneq_epi16_mask(simde__mmask16 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpneq_epi16_mask(k1, a, b); + #else + return simde_mm256_cmpneq_epi16_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epi16_mask + #define _mm256_mask_cmpneq_epi16_mask(a, b) simde_mm256_mask_cmpneq_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpneq_epu16_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpneq_epu16_mask(a, b); + #else + return simde_mm256_cmpneq_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epu16_mask + #define _mm256_cmpneq_epu16_mask(a, b) simde_mm256_cmpneq_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpneq_epu16_mask(simde__mmask16 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpneq_epu16_mask(k1, a, b); + #else + return simde_mm256_mask_cmpneq_epi16_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epu16_mask + #define _mm256_mask_cmpneq_epu16_mask(a, b) simde_mm256_mask_cmpneq_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpneq_epi32_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpneq_epi32_mask(a, b); + #else + return (~simde_mm256_movepi32_mask(simde_mm256_cmpeq_epi32(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epi32_mask + #define _mm256_cmpneq_epi32_mask(a, b) simde_mm256_cmpneq_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpneq_epi32_mask(k1, a, b); + #else + return simde_mm256_cmpneq_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epi32_mask + #define _mm256_mask_cmpneq_epi32_mask(a, b) simde_mm256_mask_cmpneq_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpneq_epu32_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpneq_epu32_mask(a, b); + #else + return simde_mm256_cmpneq_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epu32_mask + #define _mm256_cmpneq_epu32_mask(a, b) simde_mm256_cmpneq_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpneq_epu32_mask(k1, a, b); + #else + return simde_mm256_mask_cmpneq_epi32_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epu32_mask + #define _mm256_mask_cmpneq_epu32_mask(a, b) simde_mm256_mask_cmpneq_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpneq_epi64_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpneq_epi64_mask(a, b); + #else + return (~simde_mm256_movepi64_mask(simde_mm256_cmpeq_epi64(a, b))) & 15; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epi64_mask + #define _mm256_cmpneq_epi64_mask(a, b) simde_mm256_cmpneq_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpneq_epi64_mask(k1, a, b); + #else + return simde_mm256_cmpneq_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epi64_mask + #define _mm256_mask_cmpneq_epi64_mask(a, b) simde_mm256_mask_cmpneq_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpneq_epu64_mask(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpneq_epu64_mask(a, b); + #else + return simde_mm256_cmpneq_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpneq_epu64_mask + #define _mm256_cmpneq_epu64_mask(a, b) simde_mm256_cmpneq_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpneq_epu64_mask(k1, a, b); + #else + return simde_mm256_mask_cmpneq_epi64_mask(k1, a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpneq_epu64_mask + #define _mm256_mask_cmpneq_epu64_mask(a, b) simde_mm256_mask_cmpneq_epu64_mask((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPNEQ_H) */ +/* :: End simde/x86/avx512/cmpneq.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/compress.h :: */ +#if !defined(SIMDE_X86_AVX512_COMPRESS_H) +#define SIMDE_X86_AVX512_COMPRESS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_compress_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_mask_compress_pd(src, k, a); + #else + simde__m256d_private + a_ = simde__m256d_to_private(a), + src_ = simde__m256d_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) { + a_.f64[ri] = src_.f64[ri]; + } + + return simde__m256d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compress_pd + #define _mm256_mask_compress_pd(src, k, a) simde_mm256_mask_compress_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm256_mask_compressstoreu_pd(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm256_mask_storeu_pd(base_addr, store_mask, _mm256_maskz_compress_pd(k, a)); + #else + simde__m256d_private + a_ = simde__m256d_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compressstoreu_pd + #define _mm256_mask_compressstoreu_pd(base_addr, k, a) simde_mm256_mask_compressstoreu_pd(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_compress_pd (simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_maskz_compress_pd(k, a); + #else + simde__m256d_private + a_ = simde__m256d_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) { + a_.f64[ri] = SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_compress_pd + #define _mm256_maskz_compress_pd(k, a) simde_mm256_maskz_compress_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_compress_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_mask_compress_ps(src, k, a); + #else + simde__m256_private + a_ = simde__m256_to_private(a), + src_ = simde__m256_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) { + a_.f32[ri] = src_.f32[ri]; + } + + return simde__m256_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compress_ps + #define _mm256_mask_compress_ps(src, k, a) simde_mm256_mask_compress_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_mask_compressstoreu_ps (void* base_addr, simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm256_mask_compressstoreu_ps(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm256_mask_storeu_ps(base_addr, store_mask, _mm256_maskz_compress_ps(k, a)); + #else + simde__m256_private + a_ = simde__m256_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compressstoreu_pd + #define _mm256_mask_compressstoreu_ps(base_addr, k, a) simde_mm256_mask_compressstoreu_ps(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_compress_ps (simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_maskz_compress_ps(k, a); + #else + simde__m256_private + a_ = simde__m256_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) { + a_.f32[ri] = SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_compress_ps + #define _mm256_maskz_compress_ps(k, a) simde_mm256_maskz_compress_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_compress_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_mask_compress_epi32(src, k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + src_ = simde__m256i_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) { + a_.i32[ri] = src_.i32[ri]; + } + + return simde__m256i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compress_epi32 + #define _mm256_mask_compress_epi32(src, k, a) simde_mm256_mask_compress_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_mask_compressstoreu_epi32 (void* base_addr, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm256_mask_compressstoreu_epi32(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm256_mask_storeu_epi32(base_addr, store_mask, _mm256_maskz_compress_epi32(k, a)); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compressstoreu_epi32 + #define _mm256_mask_compressstoreu_epi32(base_addr, k, a) simde_mm256_mask_compressstoreu_epi32(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_compress_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_maskz_compress_epi32(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) { + a_.f32[ri] = INT32_C(0); + } + + return simde__m256i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_compress_epi32 + #define _mm256_maskz_compress_epi32(k, a) simde_mm256_maskz_compress_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_compress_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_mask_compress_epi64(src, k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + src_ = simde__m256i_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) { + a_.i64[ri] = src_.i64[ri]; + } + + return simde__m256i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compress_epi64 + #define _mm256_mask_compress_epi64(src, k, a) simde_mm256_mask_compress_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm256_mask_compressstoreu_epi64(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm256_mask_storeu_epi64(base_addr, store_mask, _mm256_maskz_compress_epi64(k, a)); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_compressstoreu_epi64 + #define _mm256_mask_compressstoreu_epi64(base_addr, k, a) simde_mm256_mask_compressstoreu_epi64(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_compress_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm256_maskz_compress_epi64(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) { + a_.i64[ri] = INT64_C(0); + } + + return simde__m256i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_compress_epi64 + #define _mm256_maskz_compress_epi64(k, a) simde_mm256_maskz_compress_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_compress_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_compress_pd(src, k, a); + #else + simde__m512d_private + a_ = simde__m512d_to_private(a), + src_ = simde__m512d_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; ri++) { + a_.f64[ri] = src_.f64[ri]; + } + + return simde__m512d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compress_pd + #define _mm512_mask_compress_pd(src, k, a) simde_mm512_mask_compress_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_compressstoreu_pd (void* base_addr, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm512_mask_compressstoreu_pd(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm512_mask_storeu_pd(base_addr, store_mask, _mm512_maskz_compress_pd(k, a)); + #else + simde__m512d_private + a_ = simde__m512d_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.f64[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compressstoreu_pd + #define _mm512_mask_compressstoreu_pd(base_addr, k, a) simde_mm512_mask_compressstoreu_pd(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_compress_pd (simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_compress_pd(k, a); + #else + simde__m512d_private + a_ = simde__m512d_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if ((k >> i) & 1) { + a_.f64[ri++] = a_.f64[i]; + } + } + + for ( ; ri < (sizeof(a_.f64) / sizeof(a_.f64[0])); ri++) { + a_.f64[ri] = SIMDE_FLOAT64_C(0.0); + } + + return simde__m512d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_compress_pd + #define _mm512_maskz_compress_pd(k, a) simde_mm512_maskz_compress_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_compress_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_compress_ps(src, k, a); + #else + simde__m512_private + a_ = simde__m512_to_private(a), + src_ = simde__m512_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; ri++) { + a_.f32[ri] = src_.f32[ri]; + } + + return simde__m512_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compress_ps + #define _mm512_mask_compress_ps(src, k, a) simde_mm512_mask_compress_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_compressstoreu_ps (void* base_addr, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm512_mask_compressstoreu_ps(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask16 store_mask = _pext_u32(-1, k); + _mm512_mask_storeu_ps(base_addr, store_mask, _mm512_maskz_compress_ps(k, a)); + #else + simde__m512_private + a_ = simde__m512_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.f32[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compressstoreu_pd + #define _mm512_mask_compressstoreu_ps(base_addr, k, a) simde_mm512_mask_compressstoreu_ps(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_compress_ps (simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_compress_ps(k, a); + #else + simde__m512_private + a_ = simde__m512_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if ((k >> i) & 1) { + a_.f32[ri++] = a_.f32[i]; + } + } + + for ( ; ri < (sizeof(a_.f32) / sizeof(a_.f32[0])); ri++) { + a_.f32[ri] = SIMDE_FLOAT32_C(0.0); + } + + return simde__m512_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_compress_ps + #define _mm512_maskz_compress_ps(k, a) simde_mm512_maskz_compress_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_compress_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_compress_epi32(src, k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + src_ = simde__m512i_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; ri++) { + a_.i32[ri] = src_.i32[ri]; + } + + return simde__m512i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compress_epi32 + #define _mm512_mask_compress_epi32(src, k, a) simde_mm512_mask_compress_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_compressstoreu_epi16 (void* base_addr, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(__znver4__) + _mm512_mask_compressstoreu_epi16(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VBMI2_NATIVE) && defined(__znver4__) + simde__mmask32 store_mask = _pext_u32(-1, k); + _mm512_mask_storeu_epi16(base_addr, store_mask, _mm512_maskz_compress_epi16(k, a)); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + if ((k >> i) & 1) { + a_.i16[ri++] = a_.i16[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.i16[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compressstoreu_epi16 + #define _mm512_mask_compressstoreu_epi16(base_addr, k, a) simde_mm512_mask_compressstoreu_epi16(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_compressstoreu_epi32 (void* base_addr, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm512_mask_compressstoreu_epi32(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask16 store_mask = _pext_u32(-1, k); + _mm512_mask_storeu_epi32(base_addr, store_mask, _mm512_maskz_compress_epi32(k, a)); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.i32[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compressstoreu_epi32 + #define _mm512_mask_compressstoreu_epi32(base_addr, k, a) simde_mm512_mask_compressstoreu_epi32(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_compress_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_compress_epi32(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if ((k >> i) & 1) { + a_.i32[ri++] = a_.i32[i]; + } + } + + for ( ; ri < (sizeof(a_.i32) / sizeof(a_.i32[0])); ri++) { + a_.f32[ri] = INT32_C(0); + } + + return simde__m512i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_compress_epi32 + #define _mm512_maskz_compress_epi32(k, a) simde_mm512_maskz_compress_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_compress_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_compress_epi64(src, k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + src_ = simde__m512i_to_private(src); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; ri++) { + a_.i64[ri] = src_.i64[ri]; + } + + return simde__m512i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compress_epi64 + #define _mm512_mask_compress_epi64(src, k, a) simde_mm512_mask_compress_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_compressstoreu_epi64 (void* base_addr, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && !defined(__znver4__) + _mm512_mask_compressstoreu_epi64(base_addr, k, a); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) && defined(__znver4__) + simde__mmask8 store_mask = _pext_u32(-1, k); + _mm512_mask_storeu_epi64(base_addr, store_mask, _mm512_maskz_compress_epi64(k, a)); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + simde_memcpy(base_addr, &a_, ri * sizeof(a_.i64[0])); + + return; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_compressstoreu_epi64 + #define _mm512_mask_compressstoreu_epi64(base_addr, k, a) simde_mm512_mask_compressstoreu_epi64(base_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_compress_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_compress_epi64(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a); + size_t ri = 0; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if ((k >> i) & 1) { + a_.i64[ri++] = a_.i64[i]; + } + } + + for ( ; ri < (sizeof(a_.i64) / sizeof(a_.i64[0])); ri++) { + a_.i64[ri] = INT64_C(0); + } + + return simde__m512i_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_compress_epi64 + #define _mm512_maskz_compress_epi64(k, a) simde_mm512_maskz_compress_epi64(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_COMPRESS_H) */ +/* :: End simde/x86/avx512/compress.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/conflict.h :: */ +#if !defined(SIMDE_X86_AVX512_CONFLICT_H) +#define SIMDE_X86_AVX512_CONFLICT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_conflict_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_conflict_epi32(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + simde_mm_movemask_ps( + simde_mm_castsi128_ps( + simde_mm_cmpeq_epi32(simde_mm_set1_epi32(a_.i32[i]), a) + ) + ) & ((1 << i) - 1); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_conflict_epi32 + #define _mm_conflict_epi32(a) simde_mm_conflict_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_conflict_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_mask_conflict_epi32(src, k, a); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_conflict_epi32 + #define _mm_mask_conflict_epi32(src, k, a) simde_mm_mask_conflict_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_conflict_epi32 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_maskz_conflict_epi32(k, a); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_conflict_epi32 + #define _mm_maskz_conflict_epi32(k, a) simde_mm_maskz_conflict_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_conflict_epi32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_conflict_epi32(a); + #else + simde__m256i_private + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()), + a_ = simde__m256i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + simde_mm256_movemask_ps( + simde_mm256_castsi256_ps( + simde_mm256_cmpeq_epi32(simde_mm256_set1_epi32(a_.i32[i]), a) + ) + ) & ((1 << i) - 1); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_conflict_epi32 + #define _mm256_conflict_epi32(a) simde_mm256_conflict_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_conflict_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_mask_conflict_epi32(src, k, a); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_conflict_epi32 + #define _mm256_mask_conflict_epi32(src, k, a) simde_mm256_mask_conflict_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_conflict_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_maskz_conflict_epi32(k, a); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_conflict_epi32 + #define _mm256_maskz_conflict_epi32(k, a) simde_mm256_maskz_conflict_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_conflict_epi32 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_conflict_epi32(a); + #else + simde__m512i_private + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()), + a_ = simde__m512i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + HEDLEY_STATIC_CAST( + int32_t, + simde_mm512_cmpeq_epi32_mask(simde_mm512_set1_epi32(a_.i32[i]), a) + ) & ((1 << i) - 1); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_conflict_epi32 + #define _mm512_conflict_epi32(a) simde_mm512_conflict_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_conflict_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_mask_conflict_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_conflict_epi32 + #define _mm512_mask_conflict_epi32(src, k, a) simde_mm512_mask_conflict_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_conflict_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_maskz_conflict_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_conflict_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_conflict_epi32 + #define _mm512_maskz_conflict_epi32(k, a) simde_mm512_maskz_conflict_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_conflict_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_conflict_epi64(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST( + int64_t, + simde_mm_movemask_pd( + simde_mm_castsi128_pd( + simde_mm_cmpeq_epi64(simde_mm_set1_epi64x(a_.i64[i]), a) + ) + ) + ) & ((1 << i) - 1); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_conflict_epi64 + #define _mm_conflict_epi64(a) simde_mm_conflict_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_conflict_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_mask_conflict_epi64(src, k, a); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_conflict_epi64 + #define _mm_mask_conflict_epi64(src, k, a) simde_mm_mask_conflict_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_conflict_epi64 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_maskz_conflict_epi64(k, a); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_conflict_epi64 + #define _mm_maskz_conflict_epi64(k, a) simde_mm_maskz_conflict_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_conflict_epi64 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_conflict_epi64(a); + #else + simde__m256i_private + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()), + a_ = simde__m256i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST( + int64_t, + simde_mm256_movemask_pd( + simde_mm256_castsi256_pd( + simde_mm256_cmpeq_epi64(simde_mm256_set1_epi64x(a_.i64[i]), a) + ) + ) + ) & ((1 << i) - 1); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_conflict_epi64 + #define _mm256_conflict_epi64(a) simde_mm256_conflict_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_conflict_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_mask_conflict_epi64(src, k, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_conflict_epi64 + #define _mm256_mask_conflict_epi64(src, k, a) simde_mm256_mask_conflict_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_conflict_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm256_maskz_conflict_epi64(k, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_conflict_epi64 + #define _mm256_maskz_conflict_epi64(k, a) simde_mm256_maskz_conflict_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_conflict_epi64 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_conflict_epi64(a); + #else + simde__m512i_private + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()), + a_ = simde__m512i_to_private(a); + + for (size_t i = 1 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST( + int64_t, + simde_mm512_cmpeq_epi64_mask(simde_mm512_set1_epi64(a_.i64[i]), a) + ) & ((1 << i) - 1); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_conflict_epi64 + #define _mm512_conflict_epi64(a) simde_mm512_conflict_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_conflict_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_mask_conflict_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_conflict_epi64 + #define _mm512_mask_conflict_epi64(src, k, a) simde_mm512_mask_conflict_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_conflict_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm512_maskz_conflict_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_conflict_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_conflict_epi64 + #define _mm512_maskz_conflict_epi64(k, a) simde_mm512_maskz_conflict_epi64(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CONFLICT_H) */ +/* :: End simde/x86/avx512/conflict.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/copysign.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_COPYSIGN_H) +#define SIMDE_X86_AVX512_COPYSIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/xor.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_XOR_H) +#define SIMDE_X86_AVX512_XOR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + /* TODO: generate reduced case to give to Intel */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_ps + #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_ps + #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_ps + #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_pd + #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_pd + #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_pd + #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi32 + #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi32 + #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi32 + #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi64 + #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi64 + #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi64 + #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); + r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_si512 + #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ +/* :: End simde/x86/avx512/xor.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { + simde__m512_private + r_, + dest_ = simde__m512_to_private(dest), + src_ = simde__m512_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); + #endif + + return simde__m512_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { + simde__m512d_private + r_, + dest_ = simde__m512d_to_private(dest), + src_ = simde__m512d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); + #endif + + return simde__m512d_from_private(r_); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_COPYSIGN_H) */ +/* :: End simde/x86/avx512/copysign.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cvt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_CVT_H) +#define SIMDE_X86_AVX512_CVT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/f16c.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-f16.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_FLOAT16_H) +#define SIMDE_FLOAT16_H + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* Portable version which should work on pretty much any compiler. + * Obviously you can't rely on compiler support for things like + * conversion to/from 32-bit floats, so make sure you always use the + * functions and macros in this file! + * + * The portable implementations are (heavily) based on CC0 code by + * Fabian Giesen: (see also + * ). + * I have basically just modified it to get rid of some UB (lots of + * aliasing, right shifting a negative value), use fixed-width types, + * and work in C. */ +#define SIMDE_FLOAT16_API_PORTABLE 1 +/* _Float16, per C standard (TS 18661-3; + * ). */ +#define SIMDE_FLOAT16_API_FLOAT16 2 +/* clang >= 6.0 supports __fp16 as an interchange format on all + * targets, but only allows you to use them for arguments and return + * values on targets which have defined an ABI. We get around the + * restriction by wrapping the __fp16 in a struct, but we can't do + * that on Arm since it would break compatibility with the NEON F16 + * functions. */ +#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 +/* This is basically __fp16 as specified by Arm, where arugments and + * return values are raw __fp16 values not structs. */ +#define SIMDE_FLOAT16_API_FP16 4 + +/* Choosing an implementation. This is a bit rough, but I don't have + * any ideas on how to improve it. If you do, patches are definitely + * welcome. */ +#if !defined(SIMDE_FLOAT16_API) + #if 0 && !defined(__cplusplus) + /* I haven't found a way to detect this. It seems like defining + * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then + * checking for defined(FLT16_MAX) should work, but both gcc and + * clang will define the constants even if _Float16 is not + * supported. Ideas welcome. */ + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 + #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(SIMDE_ARM_NEON_FP16) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 + #elif defined(__FLT16_MIN__) && (defined(__clang__) && (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI + #else + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE + #endif +#endif + +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 + typedef _Float16 simde_float16; + #define SIMDE_FLOAT16_C(value) value##f16 +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI + typedef struct { __fp16 value; } simde_float16; + #if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) + #else + #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) + #endif +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 + typedef __fp16 simde_float16; + #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE + typedef struct { uint16_t value; } simde_float16; +#else + #error No 16-bit floating point API. +#endif + +#if \ + defined(SIMDE_VECTOR_OPS) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) + #define SIMDE_FLOAT16_VECTOR +#endif + +/* Reinterpret -- you *generally* shouldn't need these, they're really + * intended for internal use. However, on x86 half-precision floats + * get stuffed into a __m128i/__m256i, so it may be useful. */ + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) + +#define SIMDE_NANHF simde_uint16_as_float16(0x7E00) +#define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) + +/* Conversion -- convert between single-precision and half-precision + * floats. */ + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float16 +simde_float16_from_float32 (simde_float32 value) { + simde_float16 res; + + #if \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) + res = HEDLEY_STATIC_CAST(simde_float16, value); + #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) + res.value = HEDLEY_STATIC_CAST(__fp16, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint32_t f32u = simde_float32_as_uint32(value); + static const uint32_t f32u_infty = UINT32_C(255) << 23; + static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; + static const uint32_t denorm_magic = + ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; + uint16_t f16u; + + uint32_t sign = f32u & (UINT32_C(1) << 31); + f32u ^= sign; + + /* NOTE all the integer compares in this function cast the operands + * to signed values to help compilers vectorize to SSE2, which lacks + * unsigned comparison instructions. This is fine since all + * operands are below 0x80000000 (we clear the sign bit). */ + + if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ + f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ + } else { /* (De)normalized number or zero */ + if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ + /* use a magic value to align our 10 mantissa bits at the bottom of + * the float. as long as FP addition is round-to-nearest-even this + * just works. */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); + + /* and one integer subtract of the bias later, we have our final float! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); + } else { + uint32_t mant_odd = (f32u >> 13) & 1; + + /* update exponent, rounding bias part 1 */ + f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); + /* rounding bias part 2 */ + f32u += mant_odd; + /* take the bits! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); + } + } + + f16u |= sign >> 16; + res = simde_uint16_as_float16(f16u); + #endif + + return res; +} + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float32 +simde_float16_to_float32 (simde_float16 value) { + simde_float32 res; + + #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) + res = HEDLEY_STATIC_CAST(simde_float32, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint16_t half = simde_float16_as_uint16(value); + const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); + const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ + uint32_t f32u; + + f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ + uint32_t exp = shifted_exp & f32u; /* just the exponent */ + f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ + + /* handle exponent special cases */ + if (exp == shifted_exp) /* Inf/NaN? */ + f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ + else if (exp == 0) { /* Zero/Denormal? */ + f32u += (1) << 23; /* extra exp adjust */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ + } + + f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ + res = simde_uint32_as_float32(f32u); + #endif + + return res; +} + +#ifdef SIMDE_FLOAT16_C + #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) +#else + #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_FLOAT16_H) */ +/* :: End simde/simde-f16.h :: */ + +#if !defined(SIMDE_X86_F16C_H) +#define SIMDE_X86_F16C_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_X86_PF16C_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_PF16C_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_ph(simde__m128 a, const int imm8) { + simde__m128_private a_ = simde__m128_to_private(a); + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + + HEDLEY_STATIC_CAST(void, imm8); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + r_.neon_f16 = vcombine_f16(vcvt_f16_f32(a_.neon_f32), vdup_n_f16(SIMDE_FLOAT16_C(0.0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_F16C_NATIVE) + #define simde_mm_cvtps_ph(a, imm8) _mm_cvtps_ph(a, imm8) +#endif +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_ph(a, sae) simde_mm_cvtps_ph(a, sae) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtph_ps(simde__m128i a) { + #if defined(SIMDE_X86_F16C_NATIVE) + return _mm_cvtph_ps(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + r_.neon_f32 = vcvt_f32_f16(vget_low_f16(a_.neon_f16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm_cvtph_ps(a) simde_mm_cvtph_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtps_ph(simde__m256 a, const int imm8) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128i_private r_; + + HEDLEY_STATIC_CAST(void, imm8); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_F16C_NATIVE) + #define simde_mm256_cvtps_ph(a, imm8) _mm256_cvtps_ph(a, imm8) +#endif +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm256_cvtps_ph(a, imm8) simde_mm256_cvtps_ph(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cvtph_ps(simde__m128i a) { + #if defined(SIMDE_X86_F16C_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtph_ps(a); + #elif defined(SIMDE_X86_F16C_NATIVE) + return _mm256_setr_m128( + _mm_cvtph_ps(a), + _mm_cvtph_ps(_mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(a), 0xee))) + ); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__m256_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm256_cvtph_ps(a) simde_mm256_cvtph_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_F16C_H) */ +/* :: End simde/x86/f16c.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi64_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_cvtepi64_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/questions/41144668/how-to-efficiently-perform-double-int64-conversions-with-sse-avx */ + __m128i xH = _mm_srai_epi32(a_.n, 16); + #if defined(SIMDE_X86_SSE4_2_NATIVE) + xH = _mm_blend_epi16(xH, _mm_setzero_si128(), 0x33); + #else + xH = _mm_and_si128(xH, _mm_set_epi16(~INT16_C(0), ~INT16_C(0), INT16_C(0), INT16_C(0), ~INT16_C(0), ~INT16_C(0), INT16_C(0), INT16_C(0))); + #endif + xH = _mm_add_epi64(xH, _mm_castpd_si128(_mm_set1_pd(442721857769029238784.0))); + const __m128i e = _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)); + #if defined(SIMDE_X86_SSE4_2_NATIVE) + __m128i xL = _mm_blend_epi16(a_.n, e, 0x88); + #else + __m128i m = _mm_set_epi16(INT16_C(0), ~INT16_C(0), ~INT16_C(0), ~INT16_C(0), INT16_C(0), ~INT16_C(0), ~INT16_C(0), ~INT16_C(0)); + __m128i xL = _mm_or_si128(_mm_and_si128(m, a_.n), _mm_andnot_si128(m, e)); + #endif + __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.0)); + return _mm_add_pd(f, _mm_castsi128_pd(xL)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi64_pd + #define _mm_cvtepi64_pd(a) simde_mm_cvtepi64_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_cvtepi64_pd(simde__m128d src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_mask_cvtepi64_pd(src, k, a); + #else + return simde_mm_mask_mov_pd(src, k, simde_mm_cvtepi64_pd(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cvtepi64_pd + #define _mm_mask_cvtepi64_pd(src, k, a) simde_mm_mask_cvtepi64_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_cvtepi64_pd(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_maskz_cvtepi64_pd(k, a); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_cvtepi64_pd(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_cvtepi64_pd + #define _mm_maskz_cvtepi64_pd(k, a) simde_mm_maskz_cvtepi64_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_cvtepi16_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtepi16_epi8(a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i8, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtepi16_epi8 + #define _mm512_cvtepi16_epi8(a) simde_mm512_cvtepi16_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_mask_cvtepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cvtepi16_epi8(src, k, a); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtepi16_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtepi16_epi8 + #define _mm512_mask_cvtepi16_epi8(src, k, a) simde_mm512_mask_cvtepi16_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_maskz_cvtepi16_epi8 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_cvtepi16_epi8(k, a); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtepi16_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtepi16_epi8 + #define _mm512_maskz_cvtepi16_epi8(k, a) simde_mm512_maskz_cvtepi16_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_cvtepi8_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtepi8_epi16(a); + #else + simde__m512i_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtepi8_epi16 + #define _mm512_cvtepi8_epi16(a) simde_mm512_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cvtepi32_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepi32_ps(a); + #else + simde__m512_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtepi32_ps + #define _mm512_cvtepi32_ps(a) simde_mm512_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_cvtepi64_epi32 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepi64_epi32(a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i64[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtepi64_epi32 + #define _mm512_cvtepi64_epi32(a) simde_mm512_cvtepi64_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cvtepu32_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepu32_ps(a); + #else + simde__m512_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + /* https://stackoverflow.com/a/34067907/501126 */ + const __m128 tmp = _mm_cvtepi32_ps(_mm_srli_epi32(a_.m128i[i], 1)); + r_.m128[i] = + _mm_add_ps( + _mm_add_ps(tmp, tmp), + _mm_cvtepi32_ps(_mm_and_si128(a_.m128i[i], _mm_set1_epi32(1))) + ); + } + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(float, a_.u32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtepu32_ps + #define _mm512_cvtepu32_ps(a) simde_mm512_cvtepu32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cvtph_ps(simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtph_ps(a); + #endif + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m512_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); + } + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtph_ps + #define _mm512_cvtph_ps(a) simde_mm512_cvtph_ps(a) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CVT_H) */ +/* :: End simde/x86/avx512/cvt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cvtt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_CVTT_H) +#define SIMDE_X86_AVX512_CVTT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi64 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_cvttpd_epi64(a); + #else + simde__m128i_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + r_.n = + _mm_set_epi64x( + _mm_cvttsd_si64(_mm_unpackhi_pd(a_.n, a_.n)), + _mm_cvttsd_si64(a_.n) + ); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vcvtq_s64_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_signed(a_.altivec_f64); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.f64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_cvttpd_epi64 + #define _mm_cvttpd_epi64(a) simde_mm_cvttpd_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_cvttpd_epi64(simde__m128i src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_mask_cvttpd_epi64(src, k, a); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_cvttpd_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cvttpd_epi64 + #define _mm_mask_cvttpd_epi64(src, k, a) simde_mm_mask_cvttpd_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_cvttpd_epi64(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_maskz_cvttpd_epi64(k, a); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_cvttpd_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_cvttpd_epi64 + #define _mm_maskz_cvttpd_epi64(k, a) simde_mm_maskz_cvttpd_epi64(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CVTT_H) */ +/* :: End simde/x86/avx512/cvtt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cvts.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_CVTS_H) +#define SIMDE_X86_AVX512_CVTS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsepi16_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cvtsepi16_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i8[i] = + (a_.i16[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i16[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtsepi16_epi8 + #define _mm_cvtsepi16_epi8(a) simde_mm_cvtsepi16_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtsepi16_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cvtsepi16_epi8(a); + #else + simde__m128i_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = + (a_.i16[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i16[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsepi16_epi8 + #define _mm256_cvtsepi16_epi8(a) simde_mm256_cvtsepi16_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsepi32_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cvtsepi32_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i8[i] = + (a_.i32[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i32[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtsepi32_epi8 + #define _mm_cvtsepi32_epi8(a) simde_mm_cvtsepi32_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtsepi32_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cvtsepi32_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i8[i] = + (a_.i32[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i32[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsepi32_epi8 + #define _mm256_cvtsepi32_epi8(a) simde_mm256_cvtsepi32_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsepi32_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cvtsepi32_epi16(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i16[i] = + (a_.i32[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i32[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtsepi32_epi16 + #define _mm_cvtsepi32_epi16(a) simde_mm_cvtsepi32_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtsepi32_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cvtsepi32_epi16(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i16[i] = + (a_.i32[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i32[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsepi32_epi16 + #define _mm256_cvtsepi32_epi16(a) simde_mm256_cvtsepi32_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsepi64_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cvtsepi64_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i8[i] = + (a_.i64[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i64[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtsepi64_epi8 + #define _mm_cvtsepi64_epi8(a) simde_mm_cvtsepi64_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtsepi64_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cvtsepi64_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i8[i] = + (a_.i64[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i64[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsepi64_epi8 + #define _mm256_cvtsepi64_epi8(a) simde_mm256_cvtsepi64_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_cvtsepi16_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtsepi16_epi8(a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = + (a_.i16[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i16[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi16_epi8 + #define _mm512_cvtsepi16_epi8(a) simde_mm512_cvtsepi16_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_mask_cvtsepi16_epi8 (simde__m256i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cvtsepi16_epi8(src, k, a); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm512_cvtsepi16_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi16_epi8 + #define _mm512_mask_cvtsepi16_epi8(src, k, a) simde_mm512_mask_cvtsepi16_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_maskz_cvtsepi16_epi8 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_cvtsepi16_epi8(k, a); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm512_cvtsepi16_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi16_epi8 + #define _mm512_maskz_cvtsepi16_epi8(k, a) simde_mm512_maskz_cvtsepi16_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_cvtsepi32_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtsepi32_epi8(a); + #else + simde__m128i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i8[i] = + (a_.i32[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i32[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i32[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi32_epi8 + #define _mm512_cvtsepi32_epi8(a) simde_mm512_cvtsepi32_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_mask_cvtsepi32_epi8 (simde__m128i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cvtsepi32_epi8(src, k, a); + #else + simde__m128i_private r_; + simde__m128i_private src_ = simde__m128i_to_private(src); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i8[i] = ((k>>i) &1 ) ? + ((a_.i32[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i32[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : src_.i8[i] ; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi32_epi8 + #define _mm512_mask_cvtsepi32_epi8(src, k, a) simde_mm512_mask_cvtsepi32_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_maskz_cvtsepi32_epi8 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_cvtsepi32_epi8(k, a); + #else + simde__m128i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i8[i] = ((k>>i) &1 ) ? + ((a_.i32[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i32[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i32[i]))) : INT8_C(0) ; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi32_epi8 + #define _mm512_maskz_cvtsepi32_epi8(k, a) simde_mm512_maskz_cvtsepi32_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_cvtsepi32_epi16 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtsepi32_epi16(a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i16[i] = + (a_.i32[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i32[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi32_epi16 + #define _mm512_cvtsepi32_epi16(a) simde_mm512_cvtsepi32_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_mask_cvtsepi32_epi16 (simde__m256i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cvtsepi32_epi16(src, k, a); + #else + simde__m256i_private r_; + simde__m256i_private src_ = simde__m256i_to_private(src); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i16[i] = ((k>>i) &1 ) ? + ((a_.i32[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i32[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : src_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi32_epi16 + #define _mm512_mask_cvtsepi32_epi16(src, k, a) simde_mm512_mask_cvtsepi32_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_maskz_cvtsepi32_epi16 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_cvtsepi32_epi16(k, a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i16[i] = ((k>>i) &1 ) ? + ((a_.i32[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i32[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i32[i]))) : INT16_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi32_epi16 + #define _mm512_maskz_cvtsepi32_epi16(k, a) simde_mm512_maskz_cvtsepi32_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_cvtsepi64_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtsepi64_epi8(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i8[i] = + (a_.i64[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i64[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i64[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi64_epi8 + #define _mm512_cvtsepi64_epi8(a) simde_mm512_cvtsepi64_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_mask_cvtsepi64_epi8 (simde__m128i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cvtsepi64_epi8(src, k, a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private src_ = simde__m128i_to_private(src); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i8[i] = ((k>>i) &1 ) ? + ((a_.i64[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i64[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : src_.i8[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi64_epi8 + #define _mm512_mask_cvtsepi64_epi8(src, k, a) simde_mm512_mask_cvtsepi64_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_maskz_cvtsepi64_epi8 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_cvtsepi64_epi8(k, a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i8[i] = ((k>>i) &1 ) ? + ((a_.i64[i] < INT8_MIN) + ? (INT8_MIN) + : ((a_.i64[i] > INT8_MAX) + ? (INT8_MAX) + : HEDLEY_STATIC_CAST(int8_t, a_.i64[i]))) : INT8_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi64_epi8 + #define _mm512_maskz_cvtsepi64_epi8(k, a) simde_mm512_maskz_cvtsepi64_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_cvtsepi64_epi16 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtsepi64_epi16(a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i16[i] = + (a_.i64[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i64[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i64[i])); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi64_epi16 + #define _mm512_cvtsepi64_epi16(a) simde_mm512_cvtsepi64_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_mask_cvtsepi64_epi16 (simde__m128i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cvtsepi64_epi16(src, k, a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m128i_private src_ = simde__m128i_to_private(src); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i16[i] = ((k>>i) & 1) ? + ((a_.i64[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i64[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : src_.i16[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi64_epi16 + #define _mm512_mask_cvtsepi64_epi16(src, k, a) simde_mm512_mask_cvtsepi64_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_maskz_cvtsepi64_epi16 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_cvtsepi64_epi16(k, a); + #else + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i16[i] = ((k>>i) & 1) ? + ((a_.i64[i] < INT16_MIN) + ? (INT16_MIN) + : ((a_.i64[i] > INT16_MAX) + ? (INT16_MAX) + : HEDLEY_STATIC_CAST(int16_t, a_.i64[i]))) : INT16_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi64_epi16 + #define _mm512_maskz_cvtsepi64_epi16(k, a) simde_mm512_maskz_cvtsepi64_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_cvtsepi64_epi32 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtsepi64_epi32(a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i32[i] = + (a_.i64[i] < INT32_MIN) + ? (INT32_MIN) + : ((a_.i64[i] > INT32_MAX) + ? (INT32_MAX) + : HEDLEY_STATIC_CAST(int32_t, a_.i64[i])); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cvtsepi64_epi32 + #define _mm512_cvtsepi64_epi32(a) simde_mm512_cvtsepi64_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_mask_cvtsepi64_epi32 (simde__m256i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cvtsepi64_epi32(src, k, a); + #else + simde__m256i_private r_; + simde__m256i_private src_ = simde__m256i_to_private(src); + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i32[i] = ((k>>i) & 1) ? + ((a_.i64[i] < INT32_MIN) + ? (INT32_MIN) + : ((a_.i64[i] > INT32_MAX) + ? (INT32_MAX) + : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : src_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cvtsepi64_epi32 + #define _mm512_mask_cvtsepi64_epi32(src, k, a) simde_mm512_mask_cvtsepi64_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_maskz_cvtsepi64_epi32 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_cvtsepi64_epi32(k, a); + #else + simde__m256i_private r_; + simde__m512i_private a_ = simde__m512i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i32[i] = ((k>>i) & 1) ? + ((a_.i64[i] < INT32_MIN) + ? (INT32_MIN) + : ((a_.i64[i] > INT32_MAX) + ? (INT32_MAX) + : HEDLEY_STATIC_CAST(int32_t, a_.i64[i]))) : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_cvtsepi64_epi32 + #define _mm512_maskz_cvtsepi64_epi32(k, a) simde_mm512_maskz_cvtsepi64_epi32(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CVTS_H) */ +/* :: End simde/x86/avx512/cvts.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dbsad.h :: */ +#if !defined(SIMDE_X86_AVX512_DBSAD_H) +#define SIMDE_X86_AVX512_DBSAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/shuffle.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SHUFFLE_H) +#define SIMDE_X86_AVX512_SHUFFLE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_shuffle_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_shuffle_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)]; + } + #endif + + return simde__m512i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_epi8 + #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_shuffle_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_shuffle_epi8 + #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_shuffle_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_shuffle_epi8 + #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 1]; + r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_i32x4 + #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) + +#define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) + +#define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) + +#define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 3]; + r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; + r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; + r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_i32x4 + #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) + +#define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) + +#define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) + +#define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_ps(a, b, imm8) _mm512_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ + \ + simde_mm512_shuffle_ps_a_.m256[0] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[0], simde_mm512_shuffle_ps_b_.m256[0], imm8); \ + simde_mm512_shuffle_ps_a_.m256[1] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[1], simde_mm512_shuffle_ps_b_.m256[1], imm8); \ + \ + simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ + \ + simde_mm512_shuffle_ps_a_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 64, \ + simde_mm512_shuffle_ps_a_.f32, \ + simde_mm512_shuffle_ps_b_.f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 16, \ + (((imm8) >> 6) & 3) + 16, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 20, \ + (((imm8) >> 6) & 3) + 20, \ + (((imm8) ) & 3) + 8, \ + (((imm8) >> 2) & 3) + 8, \ + (((imm8) >> 4) & 3) + 24, \ + (((imm8) >> 6) & 3) + 24, \ + (((imm8) ) & 3) + 12, \ + (((imm8) >> 2) & 3) + 12, \ + (((imm8) >> 4) & 3) + 28, \ + (((imm8) >> 6) & 3) + 28 \ + ); \ + \ + simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_shuffle_ps(simde__m512 a, simde__m512 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + const size_t halfway = (sizeof(r_.m128_private[0].f32) / sizeof(r_.m128_private[0].f32[0]) / 2); + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + SIMDE_VECTORIZE + for (size_t j = 0 ; j < halfway ; j++) { + r_.m128_private[i].f32[j] = a_.m128_private[i].f32[(imm8 >> (j * 2)) & 3]; + r_.m128_private[i].f32[halfway + j] = b_.m128_private[i].f32[(imm8 >> ((halfway + j) * 2)) & 3]; + } + } + + return simde__m512_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_ps + #define _mm512_shuffle_ps(a, b, imm8) simde_mm512_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_shuffle_pd(simde__m512d a, simde__m512d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.f64) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[i * 2] = (imm8 & ( 1 << (i*2) )) ? a_.f64[i * 2 + 1]: a_.f64[i * 2]; + r_.f64[i * 2 + 1] = (imm8 & ( 1 << (i*2+1) )) ? b_.f64[i * 2 + 1]: b_.f64[i * 2]; + } + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_pd(a, b, imm8) _mm512_shuffle_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_pd + #define _mm512_shuffle_pd(a, b, imm8) simde_mm512_shuffle_pd(a, b, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SHUFFLE_H) */ +/* :: End simde/x86/avx512/shuffle.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_dbsad_epu8(a, b, imm8) _mm_dbsad_epu8((a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_dbsad_epu8_internal_ (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + uint8_t a1 SIMDE_VECTOR(16) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, a_.u8, a_.u8, + 0, 1, 0, 1, + 4, 5, 4, 5, + 8, 9, 8, 9, + 12, 13, 12, 13); + uint8_t b1 SIMDE_VECTOR(16) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, b_.u8, b_.u8, + 0, 1, 1, 2, + 2, 3, 3, 4, + 8, 9, 9, 10, + 10, 11, 11, 12); + + __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); + __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); + + r_.u16 = + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16)); + + uint8_t a2 SIMDE_VECTOR(16) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, a_.u8, a_.u8, + 2, 3, 2, 3, + 6, 7, 6, 7, + 10, 11, 10, 11, + 14, 15, 14, 15); + uint8_t b2 SIMDE_VECTOR(16) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, b_.u8, b_.u8, + 2, 3, 3, 4, + 4, 5, 5, 6, + 10, 11, 11, 12, + 12, 13, 13, 14); + + __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); + __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); + + r_.u16 += + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15), __typeof__(r_.u16)); + #else + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = 0; + for (size_t j = 0 ; j < 4 ; j++) { + uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[((i << 1) & 12) + j]); + uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8)) + j]); + r_.u16[i] += (A < B) ? (B - A) : (A - B); + } + } + #endif + + return simde__m128i_from_private(r_); + } + #define simde_mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8_internal_((a), simde_mm_shuffle_epi32((b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_dbsad_epu8 + #define _mm_dbsad_epu8(a, b, imm8) simde_mm_dbsad_epu8(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) _mm_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) +#else + #define simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_mov_epi16(src, k, simde_mm_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dbsad_epu8 + #define _mm_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm_mask_dbsad_epu8(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) _mm_maskz_dbsad_epu8((k), (a), (b), (imm8)) +#else + #define simde_mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_mov_epi16(k, simde_mm_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dbsad_epu8 + #define _mm_maskz_dbsad_epu8(k, a, b, imm8) simde_mm_maskz_dbsad_epu8(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_dbsad_epu8(a, b, imm8) _mm256_dbsad_epu8((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256i_private \ + simde_mm256_dbsad_epu8_a_ = simde__m256i_to_private(a), \ + simde_mm256_dbsad_epu8_b_ = simde__m256i_to_private(b); \ + \ + simde_mm256_dbsad_epu8_a_.m128i[0] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[0], simde_mm256_dbsad_epu8_b_.m128i[0], imm8); \ + simde_mm256_dbsad_epu8_a_.m128i[1] = simde_mm_dbsad_epu8(simde_mm256_dbsad_epu8_a_.m128i[1], simde_mm256_dbsad_epu8_b_.m128i[1], imm8); \ + \ + simde__m256i_from_private(simde_mm256_dbsad_epu8_a_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_dbsad_epu8_internal_ (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + uint8_t a1 SIMDE_VECTOR(32) = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, a_.u8, a_.u8, + 0, 1, 0, 1, + 4, 5, 4, 5, + 8, 9, 8, 9, + 12, 13, 12, 13, + 16, 17, 16, 17, + 20, 21, 20, 21, + 24, 25, 24, 25, + 28, 29, 28, 29); + uint8_t b1 SIMDE_VECTOR(32) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, b_.u8, b_.u8, + 0, 1, 1, 2, + 2, 3, 3, 4, + 8, 9, 9, 10, + 10, 11, 11, 12, + 16, 17, 17, 18, + 18, 19, 19, 20, + 24, 25, 25, 26, + 26, 27, 27, 28); + + __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); + __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); + + r_.u16 = + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16)); + + uint8_t a2 SIMDE_VECTOR(32) = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, a_.u8, a_.u8, + 2, 3, 2, 3, + 6, 7, 6, 7, + 10, 11, 10, 11, + 14, 15, 14, 15, + 18, 19, 18, 19, + 22, 23, 22, 23, + 26, 27, 26, 27, + 30, 31, 30, 31); + uint8_t b2 SIMDE_VECTOR(32) = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, b_.u8, b_.u8, + 2, 3, 3, 4, + 4, 5, 5, 6, + 10, 11, 11, 12, + 12, 13, 13, 14, + 18, 19, 19, 20, + 20, 21, 21, 22, + 26, 27, 27, 28, + 28, 29, 29, 30); + + __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); + __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); + + r_.u16 += + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31), __typeof__(r_.u16)); + #else + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = 0; + for (size_t j = 0 ; j < 4 ; j++) { + uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1)) + j]); + uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1)) + j]); + r_.u16[i] += (A < B) ? (B - A) : (A - B); + } + } + #endif + + return simde__m256i_from_private(r_); + } + #define simde_mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8_internal_((a), simde_mm256_shuffle_epi32(b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_dbsad_epu8 + #define _mm256_dbsad_epu8(a, b, imm8) simde_mm256_dbsad_epu8(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) _mm256_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) +#else + #define simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_mov_epi16(src, k, simde_mm256_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dbsad_epu8 + #define _mm256_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm256_mask_dbsad_epu8(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) _mm256_maskz_dbsad_epu8((k), (a), (b), (imm8)) +#else + #define simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_mov_epi16(k, simde_mm256_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dbsad_epu8 + #define _mm256_maskz_dbsad_epu8(k, a, b, imm8) simde_mm256_maskz_dbsad_epu8(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_dbsad_epu8(a, b, imm8) _mm512_dbsad_epu8((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_dbsad_epu8(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512i_private \ + simde_mm512_dbsad_epu8_a_ = simde__m512i_to_private(a), \ + simde_mm512_dbsad_epu8_b_ = simde__m512i_to_private(b); \ + \ + simde_mm512_dbsad_epu8_a_.m256i[0] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[0], simde_mm512_dbsad_epu8_b_.m256i[0], imm8); \ + simde_mm512_dbsad_epu8_a_.m256i[1] = simde_mm256_dbsad_epu8(simde_mm512_dbsad_epu8_a_.m256i[1], simde_mm512_dbsad_epu8_b_.m256i[1], imm8); \ + \ + simde__m512i_from_private(simde_mm512_dbsad_epu8_a_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_dbsad_epu8_internal_ (simde__m512i a, simde__m512i b) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + uint8_t a1 SIMDE_VECTOR(64) = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, a_.u8, a_.u8, + 0, 1, 0, 1, + 4, 5, 4, 5, + 8, 9, 8, 9, + 12, 13, 12, 13, + 16, 17, 16, 17, + 20, 21, 20, 21, + 24, 25, 24, 25, + 28, 29, 28, 29, + 32, 33, 32, 33, + 36, 37, 36, 37, + 40, 41, 40, 41, + 44, 45, 44, 45, + 48, 49, 48, 49, + 52, 53, 52, 53, + 56, 57, 56, 57, + 60, 61, 60, 61); + uint8_t b1 SIMDE_VECTOR(64) = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, b_.u8, b_.u8, + 0, 1, 1, 2, + 2, 3, 3, 4, + 8, 9, 9, 10, + 10, 11, 11, 12, + 16, 17, 17, 18, + 18, 19, 19, 20, + 24, 25, 25, 26, + 26, 27, 27, 28, + 32, 33, 33, 34, + 34, 35, 35, 36, + 40, 41, 41, 42, + 42, 43, 43, 44, + 48, 49, 49, 50, + 50, 51, 51, 52, + 56, 57, 57, 58, + 58, 59, 59, 60); + + __typeof__(r_.u8) abd1_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd1_mask), a1 < b1); + __typeof__(r_.u8) abd1 = (((b1 - a1) & abd1_mask) | ((a1 - b1) & ~abd1_mask)); + + r_.u16 = + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd1, abd1, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16)); + + uint8_t a2 SIMDE_VECTOR(64) = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, a_.u8, a_.u8, + 2, 3, 2, 3, + 6, 7, 6, 7, + 10, 11, 10, 11, + 14, 15, 14, 15, + 18, 19, 18, 19, + 22, 23, 22, 23, + 26, 27, 26, 27, + 30, 31, 30, 31, + 34, 35, 34, 35, + 38, 39, 38, 39, + 42, 43, 42, 43, + 46, 47, 46, 47, + 50, 51, 50, 51, + 54, 55, 54, 55, + 58, 59, 58, 59, + 62, 63, 62, 63); + uint8_t b2 SIMDE_VECTOR(64) = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, b_.u8, b_.u8, + 2, 3, 3, 4, + 4, 5, 5, 6, + 10, 11, 11, 12, + 12, 13, 13, 14, + 18, 19, 19, 20, + 20, 21, 21, 22, + 26, 27, 27, 28, + 28, 29, 29, 30, + 34, 35, 35, 36, + 36, 37, 37, 38, + 42, 43, 43, 44, + 44, 45, 45, 46, + 50, 51, 51, 52, + 52, 53, 53, 54, + 58, 59, 59, 60, + 60, 61, 61, 62); + + __typeof__(r_.u8) abd2_mask = HEDLEY_REINTERPRET_CAST(__typeof__(abd2_mask), a2 < b2); + __typeof__(r_.u8) abd2 = (((b2 - a2) & abd2_mask) | ((a2 - b2) & ~abd2_mask)); + + r_.u16 += + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62), __typeof__(r_.u16)) + + __builtin_convertvector(__builtin_shufflevector(abd2, abd2, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63), __typeof__(r_.u16)); + #else + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = 0; + for (size_t j = 0 ; j < 4 ; j++) { + uint16_t A = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(((i << 1) & 12) | ((i & 8) << 1) | ((i & 16) << 1)) + j]); + uint16_t B = HEDLEY_STATIC_CAST(uint16_t, b_.u8[((i & 3) | ((i << 1) & 8) | ((i & 8) << 1) | ((i & 16) << 1)) + j]); + r_.u16[i] += (A < B) ? (B - A) : (A - B); + } + } + #endif + + return simde__m512i_from_private(r_); + } + #define simde_mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8_internal_((a), simde_mm512_castps_si512(simde_mm512_shuffle_ps(simde_mm512_castsi512_ps(b), simde_mm512_castsi512_ps(b), imm8))) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_dbsad_epu8 + #define _mm512_dbsad_epu8(a, b, imm8) simde_mm512_dbsad_epu8(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) _mm512_mask_dbsad_epu8((src), (k), (a), (b), (imm8)) +#else + #define simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_mov_epi16(src, k, simde_mm512_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dbsad_epu8 + #define _mm512_mask_dbsad_epu8(src, k, a, b, imm8) simde_mm512_mask_dbsad_epu8(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) _mm512_maskz_dbsad_epu8((k), (a), (b), (imm8)) +#else + #define simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_mov_epi16(k, simde_mm512_dbsad_epu8(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dbsad_epu8 + #define _mm512_maskz_dbsad_epu8(k, a, b, imm8) simde_mm512_maskz_dbsad_epu8(k, a, b, imm8) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DBSAD_H) */ +/* :: End simde/x86/avx512/dbsad.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/div.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_DIV_H) +#define SIMDE_X86_AVX512_DIV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_div_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_ps + #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_ps + #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_div_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_div_ps + #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_div_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_pd + #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_pd + #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_div_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_div_pd + #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DIV_H) */ +/* :: End simde/x86/avx512/div.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dpbf16.h :: */ +#if !defined(SIMDE_X86_AVX512_DPBF16_H) +#define SIMDE_X86_AVX512_DPBF16_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dpbf16_ps (simde__m128 src, simde__m128bh a, simde__m128bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_dpbf16_ps(src, a, b); + #else + simde__m128_private + src_ = simde__m128_to_private(src); + simde__m128bh_private + a_ = simde__m128bh_to_private(a), + b_ = simde__m128bh_to_private(b); + + #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + uint32_t x1 SIMDE_VECTOR(32); + uint32_t x2 SIMDE_VECTOR(32); + simde__m128_private + r1_[2], + r2_[2]; + + a_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + a_.u16, a_.u16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + b_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 16, + b_.u16, b_.u16, + 0, 2, 4, 6, + 1, 3, 5, 7 + ); + + SIMDE_CONVERT_VECTOR_(x1, a_.u16); + SIMDE_CONVERT_VECTOR_(x2, b_.u16); + + x1 <<= 16; + x2 <<= 16; + + simde_memcpy(&r1_, &x1, sizeof(x1)); + simde_memcpy(&r2_, &x2, sizeof(x2)); + + src_.f32 += + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); + } + #endif + + return simde__m128_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_dpbf16_ps + #define _mm_dpbf16_ps(src, a, b) simde_mm_dpbf16_ps(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_dpbf16_ps (simde__m128 src, simde__mmask8 k, simde__m128bh a, simde__m128bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_dpbf16_ps(src, k, a, b); + #else + return simde_mm_mask_mov_ps(src, k, simde_mm_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dpbf16_ps + #define _mm_mask_dpbf16_ps(src, k, a, b) simde_mm_mask_dpbf16_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_dpbf16_ps (simde__mmask8 k, simde__m128 src, simde__m128bh a, simde__m128bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_dpbf16_ps(k, src, a, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dpbf16_ps + #define _mm_maskz_dpbf16_ps(k, src, a, b) simde_mm_maskz_dpbf16_ps(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_dpbf16_ps (simde__m256 src, simde__m256bh a, simde__m256bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_dpbf16_ps(src, a, b); + #else + simde__m256_private + src_ = simde__m256_to_private(src); + simde__m256bh_private + a_ = simde__m256bh_to_private(a), + b_ = simde__m256bh_to_private(b); + + #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + uint32_t x1 SIMDE_VECTOR(64); + uint32_t x2 SIMDE_VECTOR(64); + simde__m256_private + r1_[2], + r2_[2]; + + a_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + a_.u16, a_.u16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + b_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 32, + b_.u16, b_.u16, + 0, 2, 4, 6, 8, 10, 12, 14, + 1, 3, 5, 7, 9, 11, 13, 15 + ); + + SIMDE_CONVERT_VECTOR_(x1, a_.u16); + SIMDE_CONVERT_VECTOR_(x2, b_.u16); + + x1 <<= 16; + x2 <<= 16; + + simde_memcpy(&r1_, &x1, sizeof(x1)); + simde_memcpy(&r2_, &x2, sizeof(x2)); + + src_.f32 += + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); + } + #endif + + return simde__m256_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_dpbf16_ps + #define _mm256_dpbf16_ps(src, a, b) simde_mm256_dpbf16_ps(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_dpbf16_ps (simde__m256 src, simde__mmask8 k, simde__m256bh a, simde__m256bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_dpbf16_ps(src, k, a, b); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dpbf16_ps + #define _mm256_mask_dpbf16_ps(src, k, a, b) simde_mm256_mask_dpbf16_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_dpbf16_ps (simde__mmask8 k, simde__m256 src, simde__m256bh a, simde__m256bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_dpbf16_ps(k, src, a, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dpbf16_ps + #define _mm256_maskz_dpbf16_ps(k, src, a, b) simde_mm256_maskz_dpbf16_ps(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_dpbf16_ps (simde__m512 src, simde__m512bh a, simde__m512bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + return _mm512_dpbf16_ps(src, a, b); + #else + simde__m512_private + src_ = simde__m512_to_private(src); + simde__m512bh_private + a_ = simde__m512bh_to_private(a), + b_ = simde__m512bh_to_private(b); + + #if ! ( defined(SIMDE_ARCH_X86) && defined(HEDLEY_GCC_VERSION) ) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + uint32_t x1 SIMDE_VECTOR(128); + uint32_t x2 SIMDE_VECTOR(128); + simde__m512_private + r1_[2], + r2_[2]; + + a_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + a_.u16, a_.u16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + b_.u16 = + SIMDE_SHUFFLE_VECTOR_( + 16, 64, + b_.u16, b_.u16, + 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, + 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 + ); + + SIMDE_CONVERT_VECTOR_(x1, a_.u16); + SIMDE_CONVERT_VECTOR_(x2, b_.u16); + + x1 <<= 16; + x2 <<= 16; + + simde_memcpy(&r1_, &x1, sizeof(x1)); + simde_memcpy(&r2_, &x2, sizeof(x2)); + + src_.f32 += + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[0].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[0].u32) + + HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r1_[1].u32) * HEDLEY_REINTERPRET_CAST(__typeof__(a_.f32), r2_[1].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + src_.f32[i / 2] += (simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) << 16) * simde_uint32_as_float32(HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) << 16)); + } + #endif + + return simde__m512_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) + #undef _mm512_dpbf16_ps + #define _mm512_dpbf16_ps(src, a, b) simde_mm512_dpbf16_ps(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_dpbf16_ps (simde__m512 src, simde__mmask16 k, simde__m512bh a, simde__m512bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + return _mm512_mask_dpbf16_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dpbf16_ps + #define _mm512_mask_dpbf16_ps(src, k, a, b) simde_mm512_mask_dpbf16_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_dpbf16_ps (simde__mmask16 k, simde__m512 src, simde__m512bh a, simde__m512bh b) { + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + return _mm512_maskz_dpbf16_ps(k, src, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_dpbf16_ps(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dpbf16_ps + #define _mm512_maskz_dpbf16_ps(k, src, a, b) simde_mm512_maskz_dpbf16_ps(k, src, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DPBF16_H) */ +/* :: End simde/x86/avx512/dpbf16.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dpbusd.h :: */ +#if !defined(SIMDE_X86_AVX512_DPBUSD_H) +#define SIMDE_X86_AVX512_DPBUSD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_dpbusd_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_dpbusd_epi32(src, a, b); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t x1_ SIMDE_VECTOR(64); + int32_t x2_ SIMDE_VECTOR(64); + simde__m128i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, + a_.u8, a_.u8, + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, + b_.i8, b_.i8, + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); + } + #endif + + return simde__m128i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_dpbusd_epi32 + #define _mm_dpbusd_epi32(src, a, b) simde_mm_dpbusd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_dpbusd_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_mask_dpbusd_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dpbusd_epi32 + #define _mm_mask_dpbusd_epi32(src, k, a, b) simde_mm_mask_dpbusd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_dpbusd_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_maskz_dpbusd_epi32(k, src, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dpbusd_epi32 + #define _mm_maskz_dpbusd_epi32(k, src, a, b) simde_mm_maskz_dpbusd_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_dpbusd_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_dpbusd_epi32(src, a, b); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + src_.m128i[0] = simde_mm_dpbusd_epi32(src_.m128i[0], a_.m128i[0], b_.m128i[0]); + src_.m128i[1] = simde_mm_dpbusd_epi32(src_.m128i[1], a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t x1_ SIMDE_VECTOR(128); + int32_t x2_ SIMDE_VECTOR(128); + simde__m256i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, + a_.u8, a_.u8, + 0, 4, 8, 12, 16, 20, 24, 28, + 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, + 3, 7, 11, 15, 19, 23, 27, 31 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, + b_.i8, b_.i8, + 0, 4, 8, 12, 16, 20, 24, 28, + 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, + 3, 7, 11, 15, 19, 23, 27, 31 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); + } + #endif + + return simde__m256i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_dpbusd_epi32 + #define _mm256_dpbusd_epi32(src, a, b) simde_mm256_dpbusd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_dpbusd_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_mask_dpbusd_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dpbusd_epi32 + #define _mm256_mask_dpbusd_epi32(src, k, a, b) simde_mm256_mask_dpbusd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_dpbusd_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_maskz_dpbusd_epi32(k, src, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dpbusd_epi32 + #define _mm256_maskz_dpbusd_epi32(k, src, a, b) simde_mm256_maskz_dpbusd_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_dpbusd_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_dpbusd_epi32(src, a, b); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + src_.m256i[0] = simde_mm256_dpbusd_epi32(src_.m256i[0], a_.m256i[0], b_.m256i[0]); + src_.m256i[1] = simde_mm256_dpbusd_epi32(src_.m256i[1], a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t x1_ SIMDE_VECTOR(256); + int32_t x2_ SIMDE_VECTOR(256); + simde__m512i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, + a_.u8, a_.u8, + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, + 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, + 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, + 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, + b_.i8, b_.i8, + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, + 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, + 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, + 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + src_.i32 += + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + src_.i32[i / 4] += HEDLEY_STATIC_CAST(uint16_t, a_.u8[i]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[i]); + } + #endif + + return simde__m512i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_dpbusd_epi32 + #define _mm512_dpbusd_epi32(src, a, b) simde_mm512_dpbusd_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_dpbusd_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_mask_dpbusd_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dpbusd_epi32 + #define _mm512_mask_dpbusd_epi32(src, k, a, b) simde_mm512_mask_dpbusd_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_dpbusd_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_maskz_dpbusd_epi32(k, src, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpbusd_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dpbusd_epi32 + #define _mm512_maskz_dpbusd_epi32(k, src, a, b) simde_mm512_maskz_dpbusd_epi32(k, src, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DPBUSD_H) */ +/* :: End simde/x86/avx512/dpbusd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/dpbusds.h :: */ +#if !defined(SIMDE_X86_AVX512_DPBUSDS_H) +#define SIMDE_X86_AVX512_DPBUSDS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_dpbusds_epi32(simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_dpbusds_epi32(src, a, b); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + uint32_t x1_ SIMDE_VECTOR(64); + int32_t x2_ SIMDE_VECTOR(64); + simde__m128i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, + a_.u8, a_.u8, + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 16, + b_.i8, b_.i8, + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(16) = + HEDLEY_REINTERPRET_CAST( + __typeof__(au), + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) + ); + uint32_t bu SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); + uint32_t ru SIMDE_VECTOR(16) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(16) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) + ); + } + #endif + + return simde__m128i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_dpbusds_epi32 + #define _mm_dpbusds_epi32(src, a, b) simde_mm_dpbusds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_dpbusds_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_mask_dpbusds_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_dpbusds_epi32 + #define _mm_mask_dpbusds_epi32(src, k, a, b) simde_mm_mask_dpbusds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_dpbusds_epi32(simde__mmask8 k, simde__m128i src, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm_maskz_dpbusds_epi32(k, src, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_dpbusds_epi32 + #define _mm_maskz_dpbusds_epi32(k, src, a, b) simde_mm_maskz_dpbusds_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_dpbusds_epi32(simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_dpbusds_epi32(src, a, b); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + src_.m128i[0] = simde_mm_dpbusds_epi32(src_.m128i[0], a_.m128i[0], b_.m128i[0]); + src_.m128i[1] = simde_mm_dpbusds_epi32(src_.m128i[1], a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + uint32_t x1_ SIMDE_VECTOR(128); + int32_t x2_ SIMDE_VECTOR(128); + simde__m256i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, + a_.u8, a_.u8, + 0, 4, 8, 12, 16, 20, 24, 28, + 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, + 3, 7, 11, 15, 19, 23, 27, 31 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 32, + b_.i8, b_.i8, + 0, 4, 8, 12, 16, 20, 24, 28, + 1, 5, 9, 13, 17, 21, 25, 29, + 2, 6, 10, 14, 18, 22, 26, 30, + 3, 7, 11, 15, 19, 23, 27, 31 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(32) = + HEDLEY_REINTERPRET_CAST( + __typeof__(au), + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) + ); + uint32_t bu SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); + uint32_t ru SIMDE_VECTOR(32) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(32) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) + ); + } + #endif + + return simde__m256i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_dpbusds_epi32 + #define _mm256_dpbusds_epi32(src, a, b) simde_mm256_dpbusds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_dpbusds_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_mask_dpbusds_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_dpbusds_epi32 + #define _mm256_mask_dpbusds_epi32(src, k, a, b) simde_mm256_mask_dpbusds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_dpbusds_epi32(simde__mmask8 k, simde__m256i src, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm256_maskz_dpbusds_epi32(k, src, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_dpbusds_epi32 + #define _mm256_maskz_dpbusds_epi32(k, src, a, b) simde_mm256_maskz_dpbusds_epi32(k, src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_dpbusds_epi32(simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_dpbusds_epi32(src, a, b); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + src_.m256i[0] = simde_mm256_dpbusds_epi32(src_.m256i[0], a_.m256i[0], b_.m256i[0]); + src_.m256i[1] = simde_mm256_dpbusds_epi32(src_.m256i[1], a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + uint32_t x1_ SIMDE_VECTOR(256); + int32_t x2_ SIMDE_VECTOR(256); + simde__m512i_private + r1_[4], + r2_[4]; + + a_.u8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, + a_.u8, a_.u8, + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, + 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, + 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, + 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 + ); + b_.i8 = + SIMDE_SHUFFLE_VECTOR_( + 8, 64, + b_.i8, b_.i8, + 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, + 1, 5, 9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61, + 2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62, + 3, 7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63 + ); + + SIMDE_CONVERT_VECTOR_(x1_, a_.u8); + SIMDE_CONVERT_VECTOR_(x2_, b_.i8); + + simde_memcpy(&r1_, &x1_, sizeof(x1_)); + simde_memcpy(&r2_, &x2_, sizeof(x2_)); + + uint32_t au SIMDE_VECTOR(64) = + HEDLEY_REINTERPRET_CAST( + __typeof__(au), + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[0].u32) * r2_[0].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[1].u32) * r2_[1].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[2].u32) * r2_[2].i32) + + (HEDLEY_REINTERPRET_CAST(__typeof__(a_.i32), r1_[3].u32) * r2_[3].i32) + ); + uint32_t bu SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(bu), src_.i32); + uint32_t ru SIMDE_VECTOR(64) = au + bu; + + au = (au >> 31) + INT32_MAX; + + uint32_t m SIMDE_VECTOR(64) = HEDLEY_REINTERPRET_CAST(__typeof__(m), HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au ^ bu) | ~(bu ^ ru)) < 0); + src_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(src_.i32), (au & ~m) | (ru & m)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0]) / 4) ; i++) { + src_.i32[i] = + simde_math_adds_i32( + src_.i32[i], + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) ]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 1]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 2]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 2]) + + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(4 * i) + 3]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[(4 * i) + 3]) + ); + } + #endif + + return simde__m512i_from_private(src_); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_dpbusds_epi32 + #define _mm512_dpbusds_epi32(src, a, b) simde_mm512_dpbusds_epi32(src, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_dpbusds_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_mask_dpbusds_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_dpbusds_epi32 + #define _mm512_mask_dpbusds_epi32(src, k, a, b) simde_mm512_mask_dpbusds_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_dpbusds_epi32(simde__mmask16 k, simde__m512i src, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VNNI_NATIVE) + return _mm512_maskz_dpbusds_epi32(k, src, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_dpbusds_epi32(src, a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_dpbusds_epi32 + #define _mm512_maskz_dpbusds_epi32(k, src, a, b) simde_mm512_maskz_dpbusds_epi32(k, src, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DPBUSDS_H) */ +/* :: End simde/x86/avx512/dpbusds.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/expand.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Andrew Rodriguez + * 2021 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_EXPAND_H) +#define SIMDE_X86_AVX512_EXPAND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_expand_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_expand_epi32(src, k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + src_ = simde__m256i_to_private(src); + simde__m256i_private r_; + + size_t src_idx = 0; + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + if (k & (UINT64_C(1) << i)) { + r_.i32[i] = a_.i32[src_idx++]; + } else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_expand_epi32 + #define _mm256_mask_expand_epi32(src, k, a) simde_mm256_mask_expand_epi32((src), (k), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_expand_epi32(simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_expand_epi32(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + size_t src_idx = 0; + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + if (k & (UINT64_C(1) << i)) { + r_.i32[i] = a_.i32[src_idx++]; + } else { + r_.i32[i] = INT32_C(0); + } + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_expand_epi32 + #define _mm256_maskz_expand_epi32(k, a) simde_mm256_maskz_expand_epi32((k), (a)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_EXPAND_H) */ +/* :: End simde/x86/avx512/expand.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/extract.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_EXTRACT_H) +#define SIMDE_X86_AVX512_EXTRACT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512_private a_ = simde__m512_to_private(a); + + /* GCC 6 generates an ICE */ + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(7,0,0) + return a_.m128[imm8 & 3]; + #else + simde__m128_private r_; + const size_t offset = HEDLEY_STATIC_CAST(size_t, imm8 & 3) * (sizeof(r_.f32) / sizeof(r_.f32[0])); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i + offset]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf32x4_ps + #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) +#else + #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm512_extractf32x4_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extractf32x4_ps + #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) +#else + #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm512_extractf32x4_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extractf32x4_ps + #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_extractf32x8_ps (simde__m512 a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512_private a_ = simde__m512_to_private(a); + + return a_.m256[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_extractf32x8_ps(a, imm8) _mm512_extractf32x8_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf32x8_ps + #define _mm512_extractf32x8_ps(a, imm8) simde_mm512_extractf32x8_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + return a_.m256d[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf64x4_pd + #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) +#else + #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extractf64x4_pd + #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) +#else + #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extractf64x4_pd + #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + return a_.m128i[imm8 & 3]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extracti32x4_epi32 + #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) +#else + #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extracti32x4_epi32 + #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) +#else + #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extracti32x4_epi32 + #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + return a_.m256i[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extracti64x4_epi64 + #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) +#else + #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm512_extracti64x4_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extracti64x4_epi64 + #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) +#else + #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm512_extracti64x4_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extracti64x4_epi64 + #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_EXTRACT_H) */ +/* :: End simde/x86/avx512/extract.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fixupimm.h :: */ +#if !defined(SIMDE_X86_AVX512_FIXUPIMM_H) +#define SIMDE_X86_AVX512_FIXUPIMM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/flushsubnormal.h :: */ +#if !defined(SIMDE_X86_AVX512_FLUSHSUBNORMAL_H) +#define SIMDE_X86_AVX512_FLUSHSUBNORMAL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_flushsubnormal_ps (simde__m128 a) { + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; + } + + return simde__m128_from_private(a_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_flushsubnormal_ps (simde__m256 a) { + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; + } + + return simde__m256_from_private(a_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_flushsubnormal_ps (simde__m512 a) { + simde__m512_private a_ = simde__m512_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + a_.f32[i] = simde_math_issubnormalf(a_.f32[i]) ? 0 : a_.f32[i]; + } + + return simde__m512_from_private(a_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_flushsubnormal_pd (simde__m128d a) { + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; + } + + return simde__m128d_from_private(a_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_flushsubnormal_pd (simde__m256d a) { + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; + } + + return simde__m256d_from_private(a_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_flushsubnormal_pd (simde__m512d a) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + a_.f64[i] = simde_math_issubnormal(a_.f64[i]) ? 0 : a_.f64[i]; + } + + return simde__m512d_from_private(a_); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FLUSHSUBNORMAL_H) */ +/* :: End simde/x86/avx512/flushsubnormal.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fixupimm_ps (simde__m128 a, simde__m128 b, simde__m128i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + s_ = simde__m128_to_private(simde_x_mm_flushsubnormal_ps(b)); + simde__m128i_private c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassifyf(s_.f32[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i32[i] >> (select << 2)) & 15)) { + case 0: + r_.f32[i] = a_.f32[i]; + break; + case 1: + r_.f32[i] = b_.f32[i]; + break; + case 2: + r_.f32[i] = SIMDE_MATH_NANF; + break; + case 3: + r_.f32[i] = -SIMDE_MATH_NANF; + break; + case 4: + r_.f32[i] = -SIMDE_MATH_INFINITYF; + break; + case 5: + r_.f32[i] = SIMDE_MATH_INFINITYF; + break; + case 6: + r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + break; + case 7: + r_.f32[i] = SIMDE_FLOAT32_C(-0.0); + break; + case 8: + r_.f32[i] = SIMDE_FLOAT32_C(0.0); + break; + case 9: + r_.f32[i] = SIMDE_FLOAT32_C(-1.0); + break; + case 10: + r_.f32[i] = SIMDE_FLOAT32_C(1.0); + break; + case 11: + r_.f32[i] = SIMDE_FLOAT32_C(0.5); + break; + case 12: + r_.f32[i] = SIMDE_FLOAT32_C(90.0); + break; + case 13: + r_.f32[i] = SIMDE_MATH_PIF / 2; + break; + case 14: + r_.f32[i] = SIMDE_MATH_FLT_MAX; + break; + case 15: + r_.f32[i] = -SIMDE_MATH_FLT_MAX; + break; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_fixupimm_ps(a, b, c, imm8) _mm_fixupimm_ps(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_ps + #define _mm_fixupimm_ps(a, b, c, imm8) simde_mm_fixupimm_ps(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) _mm_mask_fixupimm_ps(a, k, b, c, imm8) +#else + #define simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm_mask_mov_ps(a, k, simde_mm_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_ps + #define _mm_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm_mask_fixupimm_ps(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) _mm_maskz_fixupimm_ps(k, a, b, c, imm8) +#else + #define simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm_maskz_mov_ps(k, simde_mm_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_ps + #define _mm_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm_maskz_fixupimm_ps(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fixupimm_ps (simde__m256 a, simde__m256 b, simde__m256i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + s_ = simde__m256_to_private(simde_x_mm256_flushsubnormal_ps(b)); + simde__m256i_private c_ = simde__m256i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassifyf(s_.f32[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i32[i] >> (select << 2)) & 15)) { + case 0: + r_.f32[i] = a_.f32[i]; + break; + case 1: + r_.f32[i] = b_.f32[i]; + break; + case 2: + r_.f32[i] = SIMDE_MATH_NANF; + break; + case 3: + r_.f32[i] = -SIMDE_MATH_NANF; + break; + case 4: + r_.f32[i] = -SIMDE_MATH_INFINITYF; + break; + case 5: + r_.f32[i] = SIMDE_MATH_INFINITYF; + break; + case 6: + r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + break; + case 7: + r_.f32[i] = SIMDE_FLOAT32_C(-0.0); + break; + case 8: + r_.f32[i] = SIMDE_FLOAT32_C(0.0); + break; + case 9: + r_.f32[i] = SIMDE_FLOAT32_C(-1.0); + break; + case 10: + r_.f32[i] = SIMDE_FLOAT32_C(1.0); + break; + case 11: + r_.f32[i] = SIMDE_FLOAT32_C(0.5); + break; + case 12: + r_.f32[i] = SIMDE_FLOAT32_C(90.0); + break; + case 13: + r_.f32[i] = SIMDE_MATH_PIF / 2; + break; + case 14: + r_.f32[i] = SIMDE_MATH_FLT_MAX; + break; + case 15: + r_.f32[i] = -SIMDE_MATH_FLT_MAX; + break; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_fixupimm_ps(a, b, c, imm8) _mm256_fixupimm_ps(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_fixupimm_ps + #define _mm256_fixupimm_ps(a, b, c, imm8) simde_mm256_fixupimm_ps(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) _mm256_mask_fixupimm_ps(a, k, b, c, imm8) +#else + #define simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm256_mask_mov_ps(a, k, simde_mm256_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_fixupimm_ps + #define _mm256_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm256_mask_fixupimm_ps(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) _mm256_maskz_fixupimm_ps(k, a, b, c, imm8) +#else + #define simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_fixupimm_ps + #define _mm256_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm256_maskz_fixupimm_ps(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fixupimm_ps (simde__m512 a, simde__m512 b, simde__m512i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + s_ = simde__m512_to_private(simde_x_mm512_flushsubnormal_ps(b)); + simde__m512i_private c_ = simde__m512i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassifyf(s_.f32[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f32[i] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[i] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f32[i] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i32[i] >> (select << 2)) & 15)) { + case 0: + r_.f32[i] = a_.f32[i]; + break; + case 1: + r_.f32[i] = b_.f32[i]; + break; + case 2: + r_.f32[i] = SIMDE_MATH_NANF; + break; + case 3: + r_.f32[i] = -SIMDE_MATH_NANF; + break; + case 4: + r_.f32[i] = -SIMDE_MATH_INFINITYF; + break; + case 5: + r_.f32[i] = SIMDE_MATH_INFINITYF; + break; + case 6: + r_.f32[i] = s_.f32[i] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + break; + case 7: + r_.f32[i] = SIMDE_FLOAT32_C(-0.0); + break; + case 8: + r_.f32[i] = SIMDE_FLOAT32_C(0.0); + break; + case 9: + r_.f32[i] = SIMDE_FLOAT32_C(-1.0); + break; + case 10: + r_.f32[i] = SIMDE_FLOAT32_C(1.0); + break; + case 11: + r_.f32[i] = SIMDE_FLOAT32_C(0.5); + break; + case 12: + r_.f32[i] = SIMDE_FLOAT32_C(90.0); + break; + case 13: + r_.f32[i] = SIMDE_MATH_PIF / 2; + break; + case 14: + r_.f32[i] = SIMDE_MATH_FLT_MAX; + break; + case 15: + r_.f32[i] = -SIMDE_MATH_FLT_MAX; + break; + } + } + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_fixupimm_ps(a, b, c, imm8) _mm512_fixupimm_ps(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fixupimm_ps + #define _mm512_fixupimm_ps(a, b, c, imm8) simde_mm512_fixupimm_ps(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) _mm512_mask_fixupimm_ps(a, k, b, c, imm8) +#else + #define simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm512_mask_mov_ps(a, k, simde_mm512_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fixupimm_ps + #define _mm512_mask_fixupimm_ps(a, k, b, c, imm8) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) _mm512_maskz_fixupimm_ps(k, a, b, c, imm8) +#else + #define simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_fixupimm_ps(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fixupimm_ps + #define _mm512_maskz_fixupimm_ps(k, a, b, c, imm8) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fixupimm_ss (simde__m128 a, simde__m128 b, simde__m128i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + s_ = simde__m128_to_private(simde_x_mm_flushsubnormal_ps(b)); + simde__m128i_private c_ = simde__m128i_to_private(c); + + int32_t select = 1; + switch (simde_math_fpclassifyf(s_.f32[0])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f32[0] < SIMDE_FLOAT32_C(0.0)) ? 6 : (s_.f32[0] == SIMDE_FLOAT32_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f32[0] > SIMDE_FLOAT32_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i32[0] >> (select << 2)) & 15)) { + case 0: + b_.f32[0] = a_.f32[0]; + break; + case 2: + b_.f32[0] = SIMDE_MATH_NANF; + break; + case 3: + b_.f32[0] = -SIMDE_MATH_NANF; + break; + case 4: + b_.f32[0] = -SIMDE_MATH_INFINITYF; + break; + case 5: + b_.f32[0] = SIMDE_MATH_INFINITYF; + break; + case 6: + b_.f32[0] = s_.f32[0] < SIMDE_FLOAT32_C(0.0) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + break; + case 7: + b_.f32[0] = SIMDE_FLOAT32_C(-0.0); + break; + case 8: + b_.f32[0] = SIMDE_FLOAT32_C(0.0); + break; + case 9: + b_.f32[0] = SIMDE_FLOAT32_C(-1.0); + break; + case 10: + b_.f32[0] = SIMDE_FLOAT32_C(1.0); + break; + case 11: + b_.f32[0] = SIMDE_FLOAT32_C(0.5); + break; + case 12: + b_.f32[0] = SIMDE_FLOAT32_C(90.0); + break; + case 13: + b_.f32[0] = SIMDE_MATH_PIF / 2; + break; + case 14: + b_.f32[0] = SIMDE_MATH_FLT_MAX; + break; + case 15: + b_.f32[0] = -SIMDE_MATH_FLT_MAX; + break; + } + + return simde__m128_from_private(b_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_fixupimm_ss(a, b, c, imm8) _mm_fixupimm_ss(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_ss + #define _mm_fixupimm_ss(a, b, c, imm8) simde_mm_fixupimm_ss(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) _mm_mask_fixupimm_ss(a, k, b, c, imm8) +#else + #define simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) simde_mm_mask_mov_ps(a, ((k) | 14), simde_mm_fixupimm_ss(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_ss + #define _mm_mask_fixupimm_ss(a, k, b, c, imm8) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) _mm_maskz_fixupimm_ss(k, a, b, c, imm8) +#else + #define simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) simde_mm_maskz_mov_ps(((k) | 14), simde_mm_fixupimm_ss(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_ss + #define _mm_maskz_fixupimm_ss(k, a, b, c, imm8) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fixupimm_pd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + s_ = simde__m128d_to_private(simde_x_mm_flushsubnormal_pd(b)); + simde__m128i_private c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassify(s_.f64[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i64[i] >> (select << 2)) & 15)) { + case 0: + r_.f64[i] = a_.f64[i]; + break; + case 1: + r_.f64[i] = b_.f64[i]; + break; + case 2: + r_.f64[i] = SIMDE_MATH_NAN; + break; + case 3: + r_.f64[i] = -SIMDE_MATH_NAN; + break; + case 4: + r_.f64[i] = -SIMDE_MATH_INFINITY; + break; + case 5: + r_.f64[i] = SIMDE_MATH_INFINITY; + break; + case 6: + r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; + break; + case 7: + r_.f64[i] = SIMDE_FLOAT64_C(-0.0); + break; + case 8: + r_.f64[i] = SIMDE_FLOAT64_C(0.0); + break; + case 9: + r_.f64[i] = SIMDE_FLOAT64_C(-1.0); + break; + case 10: + r_.f64[i] = SIMDE_FLOAT64_C(1.0); + break; + case 11: + r_.f64[i] = SIMDE_FLOAT64_C(0.5); + break; + case 12: + r_.f64[i] = SIMDE_FLOAT64_C(90.0); + break; + case 13: + r_.f64[i] = SIMDE_MATH_PI / 2; + break; + case 14: + r_.f64[i] = SIMDE_MATH_DBL_MAX; + break; + case 15: + r_.f64[i] = -SIMDE_MATH_DBL_MAX; + break; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_fixupimm_pd(a, b, c, imm8) _mm_fixupimm_pd(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_pd + #define _mm_fixupimm_pd(a, b, c, imm8) simde_mm_fixupimm_pd(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) _mm_mask_fixupimm_pd(a, k, b, c, imm8) +#else + #define simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm_mask_mov_pd(a, k, simde_mm_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_pd + #define _mm_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm_mask_fixupimm_pd(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) _mm_maskz_fixupimm_pd(k, a, b, c, imm8) +#else + #define simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm_maskz_mov_pd(k, simde_mm_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_pd + #define _mm_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm_maskz_fixupimm_pd(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fixupimm_pd (simde__m256d a, simde__m256d b, simde__m256i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + s_ = simde__m256d_to_private(simde_x_mm256_flushsubnormal_pd(b)); + simde__m256i_private c_ = simde__m256i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassify(s_.f64[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i64[i] >> (select << 2)) & 15)) { + case 0: + r_.f64[i] = a_.f64[i]; + break; + case 1: + r_.f64[i] = b_.f64[i]; + break; + case 2: + r_.f64[i] = SIMDE_MATH_NAN; + break; + case 3: + r_.f64[i] = -SIMDE_MATH_NAN; + break; + case 4: + r_.f64[i] = -SIMDE_MATH_INFINITY; + break; + case 5: + r_.f64[i] = SIMDE_MATH_INFINITY; + break; + case 6: + r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; + break; + case 7: + r_.f64[i] = SIMDE_FLOAT64_C(-0.0); + break; + case 8: + r_.f64[i] = SIMDE_FLOAT64_C(0.0); + break; + case 9: + r_.f64[i] = SIMDE_FLOAT64_C(-1.0); + break; + case 10: + r_.f64[i] = SIMDE_FLOAT64_C(1.0); + break; + case 11: + r_.f64[i] = SIMDE_FLOAT64_C(0.5); + break; + case 12: + r_.f64[i] = SIMDE_FLOAT64_C(90.0); + break; + case 13: + r_.f64[i] = SIMDE_MATH_PI / 2; + break; + case 14: + r_.f64[i] = SIMDE_MATH_DBL_MAX; + break; + case 15: + r_.f64[i] = -SIMDE_MATH_DBL_MAX; + break; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_fixupimm_pd(a, b, c, imm8) _mm256_fixupimm_pd(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_fixupimm_pd + #define _mm256_fixupimm_pd(a, b, c, imm8) simde_mm256_fixupimm_pd(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) _mm256_mask_fixupimm_pd(a, k, b, c, imm8) +#else + #define simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm256_mask_mov_pd(a, k, simde_mm256_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_fixupimm_pd + #define _mm256_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm256_mask_fixupimm_pd(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) _mm256_maskz_fixupimm_pd(k, a, b, c, imm8) +#else + #define simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_fixupimm_pd + #define _mm256_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm256_maskz_fixupimm_pd(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fixupimm_pd (simde__m512d a, simde__m512d b, simde__m512i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + s_ = simde__m512d_to_private(simde_x_mm512_flushsubnormal_pd(b)); + simde__m512i_private c_ = simde__m512i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + int32_t select = 1; + switch (simde_math_fpclassify(s_.f64[i])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f64[i] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[i] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f64[i] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i64[i] >> (select << 2)) & 15)) { + case 0: + r_.f64[i] = a_.f64[i]; + break; + case 1: + r_.f64[i] = b_.f64[i]; + break; + case 2: + r_.f64[i] = SIMDE_MATH_NAN; + break; + case 3: + r_.f64[i] = -SIMDE_MATH_NAN; + break; + case 4: + r_.f64[i] = -SIMDE_MATH_INFINITY; + break; + case 5: + r_.f64[i] = SIMDE_MATH_INFINITY; + break; + case 6: + r_.f64[i] = s_.f64[i] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; + break; + case 7: + r_.f64[i] = SIMDE_FLOAT64_C(-0.0); + break; + case 8: + r_.f64[i] = SIMDE_FLOAT64_C(0.0); + break; + case 9: + r_.f64[i] = SIMDE_FLOAT64_C(-1.0); + break; + case 10: + r_.f64[i] = SIMDE_FLOAT64_C(1.0); + break; + case 11: + r_.f64[i] = SIMDE_FLOAT64_C(0.5); + break; + case 12: + r_.f64[i] = SIMDE_FLOAT64_C(90.0); + break; + case 13: + r_.f64[i] = SIMDE_MATH_PI / 2; + break; + case 14: + r_.f64[i] = SIMDE_MATH_DBL_MAX; + break; + case 15: + r_.f64[i] = -SIMDE_MATH_DBL_MAX; + break; + } + } + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_fixupimm_pd(a, b, c, imm8) _mm512_fixupimm_pd(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fixupimm_pd + #define _mm512_fixupimm_pd(a, b, c, imm8) simde_mm512_fixupimm_pd(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) _mm512_mask_fixupimm_pd(a, k, b, c, imm8) +#else + #define simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm512_mask_mov_pd(a, k, simde_mm512_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fixupimm_pd + #define _mm512_mask_fixupimm_pd(a, k, b, c, imm8) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) _mm512_maskz_fixupimm_pd(k, a, b, c, imm8) +#else + #define simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_fixupimm_pd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fixupimm_pd + #define _mm512_maskz_fixupimm_pd(k, a, b, c, imm8) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fixupimm_sd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + s_ = simde__m128d_to_private(simde_x_mm_flushsubnormal_pd(b)); + simde__m128i_private c_ = simde__m128i_to_private(c); + + int32_t select = 1; + switch (simde_math_fpclassify(s_.f64[0])) { + case SIMDE_MATH_FP_NORMAL: + select = (s_.f64[0] < SIMDE_FLOAT64_C(0.0)) ? 6 : (s_.f64[0] == SIMDE_FLOAT64_C(1.0)) ? 3 : 7; + break; + case SIMDE_MATH_FP_ZERO: + select = 2; + break; + case SIMDE_MATH_FP_NAN: + select = 0; + break; + case SIMDE_MATH_FP_INFINITE: + select = ((s_.f64[0] > SIMDE_FLOAT64_C(0.0)) ? 5 : 4); + break; + } + + switch (((c_.i64[0] >> (select << 2)) & 15)) { + case 0: + b_.f64[0] = a_.f64[0]; + break; + case 1: + b_.f64[0] = b_.f64[0]; + break; + case 2: + b_.f64[0] = SIMDE_MATH_NAN; + break; + case 3: + b_.f64[0] = -SIMDE_MATH_NAN; + break; + case 4: + b_.f64[0] = -SIMDE_MATH_INFINITY; + break; + case 5: + b_.f64[0] = SIMDE_MATH_INFINITY; + break; + case 6: + b_.f64[0] = s_.f64[0] < SIMDE_FLOAT64_C(0.0) ? -SIMDE_MATH_INFINITY : SIMDE_MATH_INFINITY; + break; + case 7: + b_.f64[0] = SIMDE_FLOAT64_C(-0.0); + break; + case 8: + b_.f64[0] = SIMDE_FLOAT64_C(0.0); + break; + case 9: + b_.f64[0] = SIMDE_FLOAT64_C(-1.0); + break; + case 10: + b_.f64[0] = SIMDE_FLOAT64_C(1.0); + break; + case 11: + b_.f64[0] = SIMDE_FLOAT64_C(0.5); + break; + case 12: + b_.f64[0] = SIMDE_FLOAT64_C(90.0); + break; + case 13: + b_.f64[0] = SIMDE_MATH_PI / 2; + break; + case 14: + b_.f64[0] = SIMDE_MATH_DBL_MAX; + break; + case 15: + b_.f64[0] = -SIMDE_MATH_DBL_MAX; + break; + } + + return simde__m128d_from_private(b_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_fixupimm_sd(a, b, c, imm8) _mm_fixupimm_sd(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_sd + #define _mm_fixupimm_sd(a, b, c, imm8) simde_mm_fixupimm_sd(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) _mm_mask_fixupimm_sd(a, k, b, c, imm8) +#else + #define simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) simde_mm_mask_mov_pd(a, ((k) | 2), simde_mm_fixupimm_sd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_sd + #define _mm_mask_fixupimm_sd(a, k, b, c, imm8) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) _mm_maskz_fixupimm_sd(k, a, b, c, imm8) +#else + #define simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) simde_mm_maskz_mov_pd(((k) | 2), simde_mm_fixupimm_sd(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_sd + #define _mm_maskz_fixupimm_sd(k, a, b, c, imm8) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FIXUPIMM_H) */ +/* :: End simde/x86/avx512/fixupimm.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fixupimm_round.h :: */ +#if !defined(SIMDE_X86_AVX512_FIXUPIMM_ROUND_H) +#define SIMDE_X86_AVX512_FIXUPIMM_ROUND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) _mm512_fixupimm_round_ps(a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_ps(a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_fixupimm_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_fixupimm_round_ps_envp; \ + int simde_mm512_fixupimm_round_ps_x = feholdexcept(&simde_mm512_fixupimm_round_ps_envp); \ + simde_mm512_fixupimm_round_ps_r = simde_mm512_fixupimm_ps(a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_fixupimm_round_ps_x == 0)) \ + fesetenv(&simde_mm512_fixupimm_round_ps_envp); \ + } \ + else { \ + simde_mm512_fixupimm_round_ps_r = simde_mm512_fixupimm_ps(a, b, c, imm8); \ + } \ + \ + simde_mm512_fixupimm_round_ps_r; \ + })) + #else + #define simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_ps(a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_fixupimm_round_ps (simde__m512 a, simde__m512 b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_fixupimm_ps(a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_fixupimm_ps(a, b, c, imm8); + #endif + } + else { + r = simde_mm512_fixupimm_ps(a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fixupimm_round_ps + #define _mm512_fixupimm_round_ps(a, b, c, imm8, sae) simde_mm512_fixupimm_round_ps(a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) _mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_mask_fixupimm_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_fixupimm_round_ps_envp; \ + int simde_mm512_mask_fixupimm_round_ps_x = feholdexcept(&simde_mm512_mask_fixupimm_round_ps_envp); \ + simde_mm512_mask_fixupimm_round_ps_r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_fixupimm_round_ps_x == 0)) \ + fesetenv(&simde_mm512_mask_fixupimm_round_ps_envp); \ + } \ + else { \ + simde_mm512_mask_fixupimm_round_ps_r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); \ + } \ + \ + simde_mm512_mask_fixupimm_round_ps_r; \ + })) + #else + #define simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_mask_fixupimm_round_ps (simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); + #endif + } + else { + r = simde_mm512_mask_fixupimm_ps(a, k, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fixupimm_round_ps + #define _mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_round_ps(a, k, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) _mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_maskz_fixupimm_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_fixupimm_round_ps_envp; \ + int simde_mm512_maskz_fixupimm_round_ps_x = feholdexcept(&simde_mm512_maskz_fixupimm_round_ps_envp); \ + simde_mm512_maskz_fixupimm_round_ps_r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_fixupimm_round_ps_x == 0)) \ + fesetenv(&simde_mm512_maskz_fixupimm_round_ps_envp); \ + } \ + else { \ + simde_mm512_maskz_fixupimm_round_ps_r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); \ + } \ + \ + simde_mm512_maskz_fixupimm_round_ps_r; \ + })) + #else + #define simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_maskz_fixupimm_round_ps (simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); + #endif + } + else { + r = simde_mm512_maskz_fixupimm_ps(k, a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fixupimm_round_ps + #define _mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_round_ps(k, a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) _mm512_fixupimm_round_pd(a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_pd(a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_fixupimm_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_fixupimm_round_pd_envp; \ + int simde_mm512_fixupimm_round_pd_x = feholdexcept(&simde_mm512_fixupimm_round_pd_envp); \ + simde_mm512_fixupimm_round_pd_r = simde_mm512_fixupimm_pd(a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_fixupimm_round_pd_x == 0)) \ + fesetenv(&simde_mm512_fixupimm_round_pd_envp); \ + } \ + else { \ + simde_mm512_fixupimm_round_pd_r = simde_mm512_fixupimm_pd(a, b, c, imm8); \ + } \ + \ + simde_mm512_fixupimm_round_pd_r; \ + })) + #else + #define simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_pd(a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_fixupimm_round_pd (simde__m512d a, simde__m512d b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_fixupimm_pd(a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_fixupimm_pd(a, b, c, imm8); + #endif + } + else { + r = simde_mm512_fixupimm_pd(a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fixupimm_round_pd + #define _mm512_fixupimm_round_pd(a, b, c, imm8, sae) simde_mm512_fixupimm_round_pd(a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) _mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_mask_fixupimm_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_fixupimm_round_pd_envp; \ + int simde_mm512_mask_fixupimm_round_pd_x = feholdexcept(&simde_mm512_mask_fixupimm_round_pd_envp); \ + simde_mm512_mask_fixupimm_round_pd_r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_fixupimm_round_pd_x == 0)) \ + fesetenv(&simde_mm512_mask_fixupimm_round_pd_envp); \ + } \ + else { \ + simde_mm512_mask_fixupimm_round_pd_r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); \ + } \ + \ + simde_mm512_mask_fixupimm_round_pd_r; \ + })) + #else + #define simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_mask_fixupimm_round_pd (simde__m512d a, simde__mmask8 k, simde__m512d b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); + #endif + } + else { + r = simde_mm512_mask_fixupimm_pd(a, k, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fixupimm_round_pd + #define _mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) simde_mm512_mask_fixupimm_round_pd(a, k, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) _mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_maskz_fixupimm_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_fixupimm_round_pd_envp; \ + int simde_mm512_maskz_fixupimm_round_pd_x = feholdexcept(&simde_mm512_maskz_fixupimm_round_pd_envp); \ + simde_mm512_maskz_fixupimm_round_pd_r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_fixupimm_round_pd_x == 0)) \ + fesetenv(&simde_mm512_maskz_fixupimm_round_pd_envp); \ + } \ + else { \ + simde_mm512_maskz_fixupimm_round_pd_r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); \ + } \ + \ + simde_mm512_maskz_fixupimm_round_pd_r; \ + })) + #else + #define simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_maskz_fixupimm_round_pd (simde__mmask8 k, simde__m512d a, simde__m512d b, simde__m512i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); + #endif + } + else { + r = simde_mm512_maskz_fixupimm_pd(k, a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fixupimm_round_pd + #define _mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) simde_mm512_maskz_fixupimm_round_pd(k, a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) _mm_fixupimm_round_ss(a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_ss(a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_fixupimm_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_fixupimm_round_ss_envp; \ + int simde_mm_fixupimm_round_ss_x = feholdexcept(&simde_mm_fixupimm_round_ss_envp); \ + simde_mm_fixupimm_round_ss_r = simde_mm_fixupimm_ss(a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_fixupimm_round_ss_x == 0)) \ + fesetenv(&simde_mm_fixupimm_round_ss_envp); \ + } \ + else { \ + simde_mm_fixupimm_round_ss_r = simde_mm_fixupimm_ss(a, b, c, imm8); \ + } \ + \ + simde_mm_fixupimm_round_ss_r; \ + })) + #else + #define simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_ss(a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_fixupimm_round_ss (simde__m128 a, simde__m128 b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_fixupimm_ss(a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_fixupimm_ss(a, b, c, imm8); + #endif + } + else { + r = simde_mm_fixupimm_ss(a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_round_ss + #define _mm_fixupimm_round_ss(a, b, c, imm8, sae) simde_mm_fixupimm_round_ss(a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) _mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_mask_fixupimm_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_fixupimm_round_ss_envp; \ + int simde_mm_mask_fixupimm_round_ss_x = feholdexcept(&simde_mm_mask_fixupimm_round_ss_envp); \ + simde_mm_mask_fixupimm_round_ss_r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_fixupimm_round_ss_x == 0)) \ + fesetenv(&simde_mm_mask_fixupimm_round_ss_envp); \ + } \ + else { \ + simde_mm_mask_fixupimm_round_ss_r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); \ + } \ + \ + simde_mm_mask_fixupimm_round_ss_r; \ + })) + #else + #define simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_ss(a, k, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_mask_fixupimm_round_ss (simde__m128 a, simde__mmask8 k, simde__m128 b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); + #endif + } + else { + r = simde_mm_mask_fixupimm_ss(a, k, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_round_ss + #define _mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_round_ss(a, k, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) _mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_maskz_fixupimm_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_fixupimm_round_ss_envp; \ + int simde_mm_maskz_fixupimm_round_ss_x = feholdexcept(&simde_mm_maskz_fixupimm_round_ss_envp); \ + simde_mm_maskz_fixupimm_round_ss_r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_fixupimm_round_ss_x == 0)) \ + fesetenv(&simde_mm_maskz_fixupimm_round_ss_envp); \ + } \ + else { \ + simde_mm_maskz_fixupimm_round_ss_r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); \ + } \ + \ + simde_mm_maskz_fixupimm_round_ss_r; \ + })) + #else + #define simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_maskz_fixupimm_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); + #endif + } + else { + r = simde_mm_maskz_fixupimm_ss(k, a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_round_ss + #define _mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_round_ss(k, a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) _mm_fixupimm_round_sd(a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_sd(a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_fixupimm_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_fixupimm_round_sd_envp; \ + int simde_mm_fixupimm_round_sd_x = feholdexcept(&simde_mm_fixupimm_round_sd_envp); \ + simde_mm_fixupimm_round_sd_r = simde_mm_fixupimm_sd(a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_fixupimm_round_sd_x == 0)) \ + fesetenv(&simde_mm_fixupimm_round_sd_envp); \ + } \ + else { \ + simde_mm_fixupimm_round_sd_r = simde_mm_fixupimm_sd(a, b, c, imm8); \ + } \ + \ + simde_mm_fixupimm_round_sd_r; \ + })) + #else + #define simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_sd(a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_fixupimm_round_sd (simde__m128d a, simde__m128d b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_fixupimm_sd(a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_fixupimm_sd(a, b, c, imm8); + #endif + } + else { + r = simde_mm_fixupimm_sd(a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_fixupimm_round_sd + #define _mm_fixupimm_round_sd(a, b, c, imm8, sae) simde_mm_fixupimm_round_sd(a, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) _mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_mask_fixupimm_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_fixupimm_round_sd_envp; \ + int simde_mm_mask_fixupimm_round_sd_x = feholdexcept(&simde_mm_mask_fixupimm_round_sd_envp); \ + simde_mm_mask_fixupimm_round_sd_r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_fixupimm_round_sd_x == 0)) \ + fesetenv(&simde_mm_mask_fixupimm_round_sd_envp); \ + } \ + else { \ + simde_mm_mask_fixupimm_round_sd_r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); \ + } \ + \ + simde_mm_mask_fixupimm_round_sd_r; \ + })) + #else + #define simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_sd(a, k, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_mask_fixupimm_round_sd (simde__m128d a, simde__mmask8 k, simde__m128d b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); + #endif + } + else { + r = simde_mm_mask_fixupimm_sd(a, k, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fixupimm_round_sd + #define _mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) simde_mm_mask_fixupimm_round_sd(a, k, b, c, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) _mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_maskz_fixupimm_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_fixupimm_round_sd_envp; \ + int simde_mm_maskz_fixupimm_round_sd_x = feholdexcept(&simde_mm_maskz_fixupimm_round_sd_envp); \ + simde_mm_maskz_fixupimm_round_sd_r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_fixupimm_round_sd_x == 0)) \ + fesetenv(&simde_mm_maskz_fixupimm_round_sd_envp); \ + } \ + else { \ + simde_mm_maskz_fixupimm_round_sd_r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); \ + } \ + \ + simde_mm_maskz_fixupimm_round_sd_r; \ + })) + #else + #define simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_maskz_fixupimm_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, simde__m128i c, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); + #endif + } + else { + r = simde_mm_maskz_fixupimm_sd(k, a, b, c, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fixupimm_round_sd + #define _mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) simde_mm_maskz_fixupimm_round_sd(k, a, b, c, imm8, sae) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FIXUPIMM_ROUND_H) */ +/* :: End simde/x86/avx512/fixupimm_round.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fmadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_FMADD_H) +#define SIMDE_X86_AVX512_FMADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/fma.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2019 Evan Nemerson + */ + +#if !defined(SIMDE_X86_FMA_H) +#define SIMDE_X86_FMA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_pd(a, b, c); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); + #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_pd + #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_pd(a, b, c); + #else + return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_pd + #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_ps(a, b, c); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ps + #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_ps(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); + } + + return simde__m256_from_private(r_); + #else + return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_ps + #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_sd(a, b, c); + #else + return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_sd + #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_ss(a, b, c); + #else + return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ss + #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_pd(a, b, c); + #else + return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_pd + #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_pd(a, b, c); + #else + return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_pd + #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_ps(a, b, c); + #else + return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_ps + #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_ps(a, b, c); + #else + return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_ps + #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_pd(a, b, c); + #else + return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_pd + #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_pd(a, b, c); + #else + return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_pd + #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_ps(a, b, c); + #else + return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ps + #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_ps(a, b, c); + #else + return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_ps + #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_sd(a, b, c); + #else + return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_sd + #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_ss(a, b, c); + #else + return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ss + #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_pd + #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_pd + #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_ps + #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_ps + #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_pd + #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_pd + #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ps + #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_ps + #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_sd + #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = a_; + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ss + #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_pd + #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_pd + #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ps + #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_ps + #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_sd + #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = simde__m128_to_private(a); + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ss + #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_FMA_H) */ +/* :: End simde/x86/fma.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmadd_ps(a, b, c); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + c_ = simde__m512_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = (a_.f32 * b_.f32) + c_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmadd_ps + #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_fmadd_ps(a, k, b, c); + #else + return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fmadd_ps + #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_fmadd_ps(k, a, b, c); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fmadd_ps + #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmadd_pd(a, b, c); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + c_ = simde__m512d_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = (a_.f64 * b_.f64) + c_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmadd_pd + #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FMADD_H) */ +/* :: End simde/x86/avx512/fmadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fmsub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 kitegi + */ + +#if !defined(SIMDE_X86_AVX512_FMSUB_H) +#define SIMDE_X86_AVX512_FMSUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask3_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c, simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask3_fmsub_pd(a, b, c, k); + #else + return simde_mm256_mask_mov_pd(c, k, simde_mm256_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask3_fmsub_pd + #define _mm256_mask3_fmsub_pd(a, b, c, k) _mm256_mask3_fmsub_pd(a, b, c, k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_fmsub_pd (simde__m256d a, simde__mmask8 k, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_fmsub_pd(a, k, b, c); + #else + return simde_mm256_mask_mov_pd(a, k, simde_mm256_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_fmsub_pd + #define _mm256_mask_fmsub_pd(a, k, b, c) _mm256_mask_fmsub_pd(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_fmsub_pd (simde__mmask8 k, simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_fmsub_pd(k, a, b, c); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_fmsub_pd + #define _mm256_maskz_fmsub_pd(k, a, b, c) _mm256_maskz_fmsub_pd(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask3_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c, simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask3_fmsub_pd(a, b, c, k); + #else + return simde_mm_mask_mov_pd(c, k, simde_mm_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask3_fmsub_pd + #define _mm_mask3_fmsub_pd(a, b, c, k) _mm_mask3_fmsub_pd(a, b, c, k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_fmsub_pd (simde__m128d a, simde__mmask8 k, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_fmsub_pd(a, k, b, c); + #else + return simde_mm_mask_mov_pd(a, k, simde_mm_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fmsub_pd + #define _mm_mask_fmsub_pd(a, k, b, c) _mm_mask_fmsub_pd(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_fmsub_pd (simde__mmask8 k, simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_fmsub_pd(k, a, b, c); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_fmsub_pd(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fmsub_pd + #define _mm_maskz_fmsub_pd(k, a, b, c) _mm_maskz_fmsub_pd(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask3_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c, simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask3_fmsub_ps(a, b, c, k); + #else + return simde_mm256_mask_mov_ps(c, k, simde_mm256_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask3_fmsub_ps + #define _mm256_mask3_fmsub_ps(a, b, c, k) _mm256_mask3_fmsub_ps(a, b, c, k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_fmsub_ps (simde__m256 a, simde__mmask8 k, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_fmsub_ps(a, k, b, c); + #else + return simde_mm256_mask_mov_ps(a, k, simde_mm256_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_fmsub_ps + #define _mm256_mask_fmsub_ps(a, k, b, c) _mm256_mask_fmsub_ps(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_fmsub_ps (simde__mmask8 k, simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_fmsub_ps(k, a, b, c); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_fmsub_ps + #define _mm256_maskz_fmsub_ps(k, a, b, c) _mm256_maskz_fmsub_ps(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask3_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c, simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask3_fmsub_ps(a, b, c, k); + #else + return simde_mm_mask_mov_ps(c, k, simde_mm_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask3_fmsub_ps + #define _mm_mask3_fmsub_ps(a, b, c, k) _mm_mask3_fmsub_ps(a, b, c, k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_fmsub_ps (simde__m128 a, simde__mmask8 k, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_fmsub_ps(a, k, b, c); + #else + return simde_mm_mask_mov_ps(a, k, simde_mm_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_fmsub_ps + #define _mm_mask_fmsub_ps(a, k, b, c) _mm_mask_fmsub_ps(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_fmsub_ps (simde__mmask8 k, simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_fmsub_ps(k, a, b, c); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_fmsub_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_fmsub_ps + #define _mm_maskz_fmsub_ps(k, a, b, c) _mm_maskz_fmsub_ps(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmsub_ps(a, b, c); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + c_ = simde__m512_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_fmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = (a_.f32 * b_.f32) - c_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmsub_ps + #define _mm512_fmsub_ps(a, b, c) simde_mm512_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmsub_pd(a, b, c); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + c_ = simde__m512d_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_fmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = (a_.f64 * b_.f64) - c_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmsub_pd + #define _mm512_fmsub_pd(a, b, c) simde_mm512_fmsub_pd(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FMSUB_H) */ +/* :: End simde/x86/avx512/fmsub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fnmadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 kitegi + */ + +#if !defined(SIMDE_X86_AVX512_FNMADD_H) +#define SIMDE_X86_AVX512_FNMADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fnmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fnmadd_ps(a, b, c); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + c_ = simde__m512_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_fnmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = -(a_.f32 * b_.f32) + c_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fnmadd_ps + #define _mm512_fnmadd_ps(a, b, c) simde_mm512_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fnmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fnmadd_pd(a, b, c); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + c_ = simde__m512d_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_fnmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = -(a_.f64 * b_.f64) + c_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fnmadd_pd + #define _mm512_fnmadd_pd(a, b, c) simde_mm512_fnmadd_pd(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FNMADD_H) */ +/* :: End simde/x86/avx512/fnmadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fnmsub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 kitegi + */ + +#if !defined(SIMDE_X86_AVX512_FNMSUB_H) +#define SIMDE_X86_AVX512_FNMSUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fnmsub_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fnmsub_ps(a, b, c); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + c_ = simde__m512_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_fnmsub_ps(a_.m256[i], b_.m256[i], c_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = -(a_.f32 * b_.f32) - c_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fnmsub_ps + #define _mm512_fnmsub_ps(a, b, c) simde_mm512_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fnmsub_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fnmsub_pd(a, b, c); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + c_ = simde__m512d_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_fnmsub_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = -(a_.f64 * b_.f64) - c_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fnmsub_pd + #define _mm512_fnmsub_pd(a, b, c) simde_mm512_fnmsub_pd(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FNMSUB_H) */ +/* :: End simde/x86/avx512/fnmsub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/insert.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_INSERT_H) +#define SIMDE_X86_AVX512_INSERT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + simde__m512 r; + switch(imm8) { + case 0: r = _mm512_insertf32x4(a, b, 0); break; + case 1: r = _mm512_insertf32x4(a, b, 1); break; + case 2: r = _mm512_insertf32x4(a, b, 2); break; + case 3: r = _mm512_insertf32x4(a, b, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_mm512_setzero_ps(); break; + } + return r; + #else + simde__m512_private a_ = simde__m512_to_private(a); + + a_.m128[imm8 & 3] = b; + + return simde__m512_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf32x4 + #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_insertf32x4 (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512 r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_mask_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_mask_mov_ps(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf32x4 + #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_insertf32x4 (simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512 r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_maskz_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_maskz_mov_ps(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf32x4 + #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + a_.m256d[imm8 & 1] = b; + + return simde__m512d_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf64x4 + #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_insertf64x4 (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_mask_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_mask_mov_pd(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf64x4 + #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_insertf64x4 (simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_maskz_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_maskz_mov_pd(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf64x4 + #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m128i[imm8 & 3] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti32x4 + #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti32x4 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_mask_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi32(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti32x4 + #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti32x4 (simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_maskz_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi32(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti32x4 + #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m256i[imm8 & 1] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti64x4 + #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti64x4 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_mask_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi64(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti64x4 + #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti64x4 (simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_maskz_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi64(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti64x4 + #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_insertf32x8 (simde__m512 a, simde__m256 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512_private a_ = simde__m512_to_private(a); + + a_.m256[imm8 & 1] = b; + + return simde__m512_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_insertf32x8(a, b, imm8) _mm512_insertf32x8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf32x8 + #define _mm512_insertf32x8(a, b, imm8) simde_mm512_insertf32x8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_insertf32x8(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512 r; + SIMDE_CONSTIFY_2_(_mm512_mask_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); + return r; + #else + simde__m512 r; + SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_mask_mov_ps(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf32x8 + #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_insertf32x8(simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512 r; + SIMDE_CONSTIFY_2_(_mm512_maskz_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); + return r; + #else + simde__m512 r; + SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_maskz_mov_ps(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf32x8 + #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_insertf64x2 (simde__m512d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + a_.m128d[imm8 & 3] = b; + + return simde__m512d_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_insertf64x2(a, b, imm8) _mm512_insertf64x2(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf64x2 + #define _mm512_insertf64x2(a, b, imm8) simde_mm512_insertf64x2(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_insertf64x2(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512d r; + SIMDE_CONSTIFY_4_(_mm512_mask_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); + return r; + #else + simde__m512d r; + SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_mask_mov_pd(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf64x2 + #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_insertf64x2(simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512d r; + SIMDE_CONSTIFY_4_(_mm512_maskz_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); + return r; + #else + simde__m512d r; + SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_maskz_mov_pd(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf64x2 + #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti32x8 (simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m256i[imm8 & 1] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_inserti32x8(a, b, imm8) _mm512_inserti32x8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti32x8 + #define _mm512_inserti32x8(a, b, imm8) simde_mm512_inserti32x8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti32x8(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_2_(_mm512_mask_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, src, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); + return simde_mm512_mask_mov_epi32(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti32x8 + #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti32x8(simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_2_(_mm512_maskz_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi32(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti32x8 + #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti64x2 (simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m128i[imm8 & 3] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_inserti64x2(a, b, imm8) _mm512_inserti64x2(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti64x2 + #define _mm512_inserti64x2(a, b, imm8) simde_mm512_inserti64x2(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti64x2(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_4_(_mm512_mask_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi64(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti64x2 + #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti64x2(simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_4_(_mm512_maskz_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi64(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti64x2 + #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imms8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_INSERT_H) */ +/* :: End simde/x86/avx512/insert.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/kshift.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_KSHIFT_H) +#define SIMDE_X86_AVX512_KSHIFT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask16 + #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask32 + #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask64 + #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask8 + #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask16 + #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask32 + #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask64 + #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask8 + #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ +/* :: End simde/x86/avx512/kshift.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/knot.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Michael R. Crusoe + */ + +#if !defined(SIMDE_X86_AVX512_KNOT_H) +#define SIMDE_X86_AVX512_KNOT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_knot_mask8 (simde__mmask8 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _knot_mask8(a); + #else + return ~a; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _knot_mask8 + #define _knot_mask8(a) simde_knot_mask8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_knot_mask16 (simde__mmask16 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _knot_mask16(a); + #else + return ~a; + #endif +} +#define simde_mm512_knot(a) simde_knot_mask16(a) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _knot_mask16 + #undef _mm512_knot + #define _knot_mask16(a) simde_knot_mask16(a) + #define _mm512_knot(a) simde_knot_mask16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_knot_mask32 (simde__mmask32 a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _knot_mask32(a); + #else + return ~a; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _knot_mask32 + #define _knot_mask32(a) simde_knot_mask32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_knot_mask64 (simde__mmask64 a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _knot_mask64(a); + #else + return ~a; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _knot_mask64 + #define _knot_mask64(a) simde_knot_mask64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_KNOT_H) */ +/* :: End simde/x86/avx512/knot.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/kxor.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Michael R. Crusoe + */ + +#if !defined(SIMDE_X86_AVX512_KXOR_H) +#define SIMDE_X86_AVX512_KXOR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kxor_mask8 (simde__mmask8 a, simde__mmask8 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _kxor_mask8(a, b); + #else + return a^b; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kxor_mask8 + #define _kxor_mask8(a, b) simde_kxor_mask8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kxor_mask16 (simde__mmask16 a, simde__mmask16 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _kxor_mask16(a, b); + #else + return a^b; + #endif +} +#define simde_mm512_kxor(a, b) simde_kxor_mask16(a, b) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kxor_mask16 + #undef _mm512_kxor + #define _kxor_mask16(a, b) simde_kxor_mask16(a, b) + #define _mm512_kxor(a, b) simde_kxor_mask16(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kxor_mask32 (simde__mmask32 a, simde__mmask32 b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _kxor_mask32(a, b); + #else + return a^b; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kxor_mask32 + #define _kxor_mask32(a, b) simde_kxor_mask32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kxor_mask64 (simde__mmask64 a, simde__mmask64 b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) \ + && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + return _kxor_mask64(a, b); + #else + return a^b; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kxor_mask64 + #define _kxor_mask64(a, b) simde_kxor_mask64(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_KXOR_H) */ +/* :: End simde/x86/avx512/kxor.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/loadu.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LOADU_H) +#define SIMDE_X86_AVX512_LOADU_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_loadu_ps (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_298042) + return _mm512_loadu_ps(SIMDE_ALIGN_CAST(const float *, mem_addr)); + #else + return _mm512_loadu_ps(mem_addr); + #endif + #else + simde__m512 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_loadu_ps + #define _mm512_loadu_ps(a) simde_mm512_loadu_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_loadu_pd (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + #if defined(SIMDE_BUG_CLANG_REV_298042) + return _mm512_loadu_pd(SIMDE_ALIGN_CAST(const double *, mem_addr)); + #else + return _mm512_loadu_pd(mem_addr); + #endif + #else + simde__m512d r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_loadu_pd + #define _mm512_loadu_pd(a) simde_mm512_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_loadu_si512 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_loadu_si512(HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); + #else + simde__m512i r; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm512_loadu_si512_s { + __typeof__(r) v; + } __attribute__((__packed__, __may_alias__)); + r = HEDLEY_REINTERPRET_CAST(const struct simde_mm512_loadu_si512_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #else + simde_memcpy(&r, mem_addr, sizeof(r)); + #endif + + return r; + #endif +} +#define simde_mm512_loadu_epi8(mem_addr) simde_mm512_loadu_si512(mem_addr) +#define simde_mm512_loadu_epi16(mem_addr) simde_mm512_loadu_si512(mem_addr) +#define simde_mm512_loadu_epi32(mem_addr) simde_mm512_loadu_si512(mem_addr) +#define simde_mm512_loadu_epi64(mem_addr) simde_mm512_loadu_si512(mem_addr) +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_loadu_epi8 + #undef _mm512_loadu_epi16 + #define _mm512_loadu_epi8(a) simde_mm512_loadu_si512(a) + #define _mm512_loadu_epi16(a) simde_mm512_loadu_si512(a) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_loadu_epi32 + #undef _mm512_loadu_epi64 + #undef _mm512_loadu_si512 + #define _mm512_loadu_si512(a) simde_mm512_loadu_si512(a) + #define _mm512_loadu_epi32(a) simde_mm512_loadu_si512(a) + #define _mm512_loadu_epi64(a) simde_mm512_loadu_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_loadu_epi16 (simde__mmask16 k, void const * mem_addr) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_loadu_epi16(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_loadu_epi16(mem_addr)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_loadu_epi16 + #define _mm256_maskz_loadu_epi16(k, mem_addr) simde_mm256_maskz_loadu_epi16(k, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_loadu_epi16 (simde__m512i src, simde__mmask32 k, void const * mem_addr) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_loadu_epi16(src, k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_loadu_epi16(mem_addr)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_loadu_epi16 + #define _mm512_mask_loadu_epi16(src, k, mem_addr) simde_mm512_mask_loadu_epi16(src, k, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_loadu_ps (simde__mmask16 k, void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_loadu_ps(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_loadu_ps(mem_addr)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_loadu_ps + #define _mm256_maskz_loadu_ps(k, mem_addr) simde_mm256_maskz_loadu_ps(k, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_loadu_pd (simde__mmask8 k, void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_loadu_pd(k, HEDLEY_REINTERPRET_CAST(void const*, mem_addr)); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_loadu_pd(mem_addr)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_loadu_pd + #define _mm256_maskz_loadu_pd(k, mem_addr) simde_mm256_maskz_loadu_pd(k, mem_addr) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LOADU_H) */ +/* :: End simde/x86/avx512/loadu.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/lzcnt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LZCNT_H) +#define SIMDE_X86_AVX512_LZCNT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if HEDLEY_MSVC_VERSION_CHECK(14,0,0) +#include +#pragma intrinsic(_BitScanReverse) + #if defined(_M_AMD64) || defined(_M_ARM64) + #pragma intrinsic(_BitScanReverse64) + #endif +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if \ + ( HEDLEY_HAS_BUILTIN(__builtin_clz) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) ) && \ + defined(__INT_MAX__) && defined(__LONG_MAX__) && defined(__LONG_LONG_MAX__) && \ + defined(__INT32_MAX__) && defined(__INT64_MAX__) + #if __INT_MAX__ == __INT32_MAX__ + #define simde_x_clz32(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) + #elif __LONG_MAX__ == __INT32_MAX__ + #define simde_x_clz32(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) + #elif __LONG_LONG_MAX__ == __INT32_MAX__ + #define simde_x_clz32(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) + #endif + + #if __INT_MAX__ == __INT64_MAX__ + #define simde_x_clz64(v) __builtin_clz(HEDLEY_STATIC_CAST(unsigned int, (v))) + #elif __LONG_MAX__ == __INT64_MAX__ + #define simde_x_clz64(v) __builtin_clzl(HEDLEY_STATIC_CAST(unsigned long, (v))) + #elif __LONG_LONG_MAX__ == __INT64_MAX__ + #define simde_x_clz64(v) __builtin_clzll(HEDLEY_STATIC_CAST(unsigned long long, (v))) + #endif +#elif HEDLEY_MSVC_VERSION_CHECK(14,0,0) + static int simde_x_clz32(uint32_t x) { + unsigned long r; + _BitScanReverse(&r, x); + return 31 - HEDLEY_STATIC_CAST(int, r); + } + #define simde_x_clz32 simde_x_clz32 + + static int simde_x_clz64(uint64_t x) { + unsigned long r; + + #if defined(_M_AMD64) || defined(_M_ARM64) + _BitScanReverse64(&r, x); + return 63 - HEDLEY_STATIC_CAST(int, r); + #else + uint32_t high = HEDLEY_STATIC_CAST(uint32_t, x >> 32); + if (high != 0) + return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, high)); + else + return _BitScanReverse(&r, HEDLEY_STATIC_CAST(unsigned long, x & ~UINT32_C(0))) + 32; + #endif + } + #define simde_x_clz64 simde_x_clz64 +#endif + +#if !defined(simde_x_clz32) || !defined(simde_x_clz64) + static uint8_t simde_x_avx512cd_lz_lookup(const uint8_t value) { + static const uint8_t lut[256] = { + 7, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + return lut[value]; + }; + + #if !defined(simde_x_clz32) + static int simde_x_clz32(uint32_t x) { + size_t s = sizeof(x) * 8; + uint32_t r; + + while ((s -= 8) != 0) { + r = x >> s; + if (r != 0) + return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + + (((sizeof(x) - 1) * 8) - s); + } + + if (x == 0) + return (int) ((sizeof(x) * 8) - 1); + else + return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + + ((sizeof(x) - 1) * 8); + } + #endif + + #if !defined(simde_x_clz64) + static int simde_x_clz64(uint64_t x) { + size_t s = sizeof(x) * 8; + uint64_t r; + + while ((s -= 8) != 0) { + r = x >> s; + if (r != 0) + return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, r)) + + (((sizeof(x) - 1) * 8) - s); + } + + if (x == 0) + return (int) ((sizeof(x) * 8) - 1); + else + return simde_x_avx512cd_lz_lookup(HEDLEY_STATIC_CAST(uint8_t, x)) + + ((sizeof(x) - 1) * 8); + } + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lzcnt_epi32(simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_lzcnt_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/58827596/501126 */ + a = _mm_andnot_si128(_mm_srli_epi32(a, 8), a); + a = _mm_castps_si128(_mm_cvtepi32_ps(a)); + a = _mm_srli_epi32(a, 23); + a = _mm_subs_epu16(_mm_set1_epi32(158), a); + a = _mm_min_epi16(a, _mm_set1_epi32(32)); + return a; + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_cntlz(a_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (HEDLEY_UNLIKELY(a_.i32[i] == 0) ? HEDLEY_STATIC_CAST(int32_t, sizeof(int32_t) * CHAR_BIT) : HEDLEY_STATIC_CAST(int32_t, simde_x_clz32(HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])))); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES) + #undef _mm_lzcnt_epi32 + #define _mm_lzcnt_epi32(a) simde_mm_lzcnt_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_lzcnt_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_mask_lzcnt_epi32(src, k, a); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_lzcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_lzcnt_epi32 + #define _mm_mask_lzcnt_epi32(src, k, a) simde_mm_mask_lzcnt_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_lzcnt_epi32(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512CD_NATIVE) + return _mm_maskz_lzcnt_epi32(k, a); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_lzcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_lzcnt_epi32 + #define _mm_maskz_lzcnt_epi32(k, a) simde_mm_maskz_lzcnt_epi32(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LZCNT_H) */ +/* :: End simde/x86/avx512/lzcnt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/madd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Ashleigh Newman-Jones + */ + +#if !defined(SIMDE_X86_AVX512_MADD_H) +#define SIMDE_X86_AVX512_MADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_madd_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_madd_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_madd_epi16 + #define _mm_mask_madd_epi16(src, k, a, b) simde_mm_mask_madd_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_madd_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_madd_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_madd_epi16 + #define _mm_maskz_madd_epi16(src, k, a, b) simde_mm_maskz_madd_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_madd_epi16 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_madd_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_madd_epi16 + #define _mm256_mask_madd_epi16(src, k, a, b) simde_mm256_mask_madd_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_madd_epi16 (simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_madd_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_madd_epi16 + #define _mm256_maskz_madd_epi16(src, k, a, b) simde_mm256_maskz_madd_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_madd_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_madd_epi16(a, b); + #else + simde__m512i_private r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if 0 && SIMDE_NATURAL_VECTOR_SIZE_LE(256) || defined(SIMDE_BUG_CLANG_BAD_MADD) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_madd_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = + (HEDLEY_STATIC_CAST(int32_t, a_.i16[ i ]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[ i ])) + + (HEDLEY_STATIC_CAST(int32_t, a_.i16[i + 1]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i + 1])); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_madd_epi16 + #define _mm512_madd_epi16(a, b) simde_mm512_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_madd_epi16 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_madd_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_madd_epi16 + #define _mm512_mask_madd_epi16(src, k, a, b) simde_mm512_mask_madd_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_madd_epi16 (simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_madd_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_madd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_madd_epi16 + #define _mm512_maskz_madd_epi16(src, k, a, b) simde_mm512_maskz_madd_epi16(src, k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MADD_H) */ +/* :: End simde/x86/avx512/madd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/maddubs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Ashleigh Newman-Jones + */ + +#if !defined(SIMDE_X86_AVX512_MADDUBS_H) +#define SIMDE_X86_AVX512_MADDUBS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_maddubs_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_maddubs_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_maddubs_epi16 + #define _mm_mask_maddubs_epi16(a, b) simde_mm_mask_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_maddubs_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE ) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_maddubs_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_maddubs_epi16 + #define _mm_maskz_maddubs_epi16(a, b) simde_mm_maskz_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_maddubs_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_maddubs_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_maddubs_epi16 + #define _mm256_mask_maddubs_epi16(a, b) simde_mm256_mask_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_maddubs_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_maddubs_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_maddubs_epi16 + #define _mm256_maskz_maddubs_epi16(a, b) simde_mm256_maskz_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maddubs_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maddubs_epi16(a, b); + #else + simde__m512i_private r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) || defined(SIMDE_BUG_CLANG_BAD_MADD) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_maddubs_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maddubs_epi16 + #define _mm512_maddubs_epi16(a, b) simde_mm512_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_maddubs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_maddubs_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_maddubs_epi16 + #define _mm512_mask_maddubs_epi16(a, b) simde_mm512_mask_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_maddubs_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_maddubs_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_maddubs_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_maddubs_epi16 + #define _mm512_maskz_maddubs_epi16(a, b) simde_mm512_maskz_maddubs_epi16(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MADDUBS_H) */ +/* :: End simde/x86/avx512/maddubs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/max.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MAX_H) +#define SIMDE_X86_AVX512_MAX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_max_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +# define _mm512_max_epi8(a, b) simde_mm512_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_max_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epi8 + #define _mm512_mask_max_epi8(src, k, a, b) simde_mm512_mask_max_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_max_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epi8 + #define _mm512_maskz_max_epi8(k, a, b) simde_mm512_maskz_max_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_max_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_max_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epu8 + #define _mm512_max_epu8(a, b) simde_mm512_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_max_epu8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_max_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epu8 + #define _mm512_mask_max_epu8(src, k, a, b) simde_mm512_mask_max_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_max_epu8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_max_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epu8 + #define _mm512_maskz_max_epu8(k, a, b) simde_mm512_maskz_max_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_max_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +# define _mm512_max_epi16(a, b) simde_mm512_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_max_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epi16 + #define _mm512_mask_max_epi16(src, k, a, b) simde_mm512_mask_max_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_max_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epi16 + #define _mm512_maskz_max_epi16(k, a, b) simde_mm512_maskz_max_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_max_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_max_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epu16 + #define _mm512_max_epu16(a, b) simde_mm512_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_max_epu16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_max_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epu16 + #define _mm512_mask_max_epu16(src, k, a, b) simde_mm512_mask_max_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_max_epu16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_max_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epu16 + #define _mm512_maskz_max_epu16(k, a, b) simde_mm512_maskz_max_epu16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_max_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_max_epi32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epi32 + #define _mm512_max_epi32(a, b) simde_mm512_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epi32 + #define _mm512_mask_max_epi32(src, k, a, b) simde_mm512_mask_max_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epi32 + #define _mm512_maskz_max_epi32(k, a, b) simde_mm512_maskz_max_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_max_epu32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_max_epu32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epu32 + #define _mm512_max_epu32(a, b) simde_mm512_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_max_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epu32 + #define _mm512_mask_max_epu32(src, k, a, b) simde_mm512_mask_max_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_epu32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_max_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epu32 + #define _mm512_maskz_max_epu32(k, a, b) simde_mm512_maskz_max_epu32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] > b_.i64[i] ? a_.i64[i] : b_.i64[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epi64 + #define _mm512_max_epi64(a, b) simde_mm512_max_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epi64 + #define _mm512_mask_max_epi64(src, k, a, b) simde_mm512_mask_max_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epi64 + #define _mm512_maskz_max_epi64(k, a, b) simde_mm512_maskz_max_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_max_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? a_.u64[i] : b_.u64[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_epu64 + #define _mm512_max_epu64(a, b) simde_mm512_max_epu64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_max_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_epu64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_max_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_epu64 + #define _mm512_mask_max_epu64(src, k, a, b) simde_mm512_mask_max_epu64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_max_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_epu64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_max_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_epu64 + #define _mm512_maskz_max_epu64(k, a, b) simde_mm512_maskz_max_epu64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_max_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256[0] = simde_mm256_max_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_max_ps(a_.m256[1], b_.m256[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] > b_.f32[i] ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_ps + #define _mm512_max_ps(a, b) simde_mm512_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_max_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_max_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_ps + #define _mm512_mask_max_ps(src, k, a, b) simde_mm512_mask_max_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_max_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_max_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_ps + #define _mm512_maskz_max_ps(k, a, b) simde_mm512_maskz_max_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_max_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_max_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] > b_.f64[i] ? a_.f64[i] : b_.f64[i]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_max_pd + #define _mm512_max_pd(a, b) simde_mm512_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_max_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_max_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_max_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_max_pd + #define _mm512_mask_max_pd(src, k, a, b) simde_mm512_mask_max_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_max_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_max_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_max_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_max_pd + #define _mm512_maskz_max_pd(k, a, b) simde_mm512_maskz_max_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MAX_H) */ +/* :: End simde/x86/avx512/max.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/min.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MIN_H) +#define SIMDE_X86_AVX512_MIN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_min_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? a_.i8[i] : b_.i8[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +# define _mm512_min_epi8(a, b) simde_mm512_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_min_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epi8 + #define _mm512_mask_min_epi8(src, k, a, b) simde_mm512_mask_min_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_min_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epi8 + #define _mm512_maskz_min_epi8(k, a, b) simde_mm512_maskz_min_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_min_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_min_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epu8 + #define _mm512_min_epu8(a, b) simde_mm512_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_min_epu8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_min_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epu8 + #define _mm512_mask_min_epu8(src, k, a, b) simde_mm512_mask_min_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_min_epu8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_min_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epu8 + #define _mm512_maskz_min_epu8(k, a, b) simde_mm512_maskz_min_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_min_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) +# define _mm512_min_epi16(a, b) simde_mm512_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_min_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epi16 + #define _mm512_mask_min_epi16(src, k, a, b) simde_mm512_mask_min_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_min_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epi16 + #define _mm512_maskz_min_epi16(k, a, b) simde_mm512_maskz_min_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_min_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_min_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epu16 + #define _mm512_min_epu16(a, b) simde_mm512_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epu16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_min_epu16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_min_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epu16 + #define _mm512_mask_min_epu16(src, k, a, b) simde_mm512_mask_min_epu16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epu16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_min_epu16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_min_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epu16 + #define _mm512_maskz_min_epu16(k, a, b) simde_mm512_maskz_min_epu16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_min_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_min_epi32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epi32 + #define _mm512_min_epi32(a, b) simde_mm512_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epi32 + #define _mm512_mask_min_epi32(src, k, a, b) simde_mm512_mask_min_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i + simde_mm512_maskz_min_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epi32 + #define _mm512_maskz_min_epi32(k, a, b) simde_mm512_maskz_min_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_min_epu32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_min_epu32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epu32 + #define _mm512_min_epu32(a, b) simde_mm512_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_min_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epu32 + #define _mm512_mask_min_epu32(src, k, a, b) simde_mm512_mask_min_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epu32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_epu32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_min_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epu32 + #define _mm512_maskz_min_epu32(k, a, b) simde_mm512_maskz_min_epu32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] < b_.i64[i] ? a_.i64[i] : b_.i64[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epi64 + #define _mm512_min_epi64(a, b) simde_mm512_min_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epi64 + #define _mm512_mask_min_epi64(src, k, a, b) simde_mm512_mask_min_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epi64 + #define _mm512_maskz_min_epi64(k, a, b) simde_mm512_maskz_min_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_min_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? a_.u64[i] : b_.u64[i]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_epu64 + #define _mm512_min_epu64(a, b) simde_mm512_min_epu64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_min_epu64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_epu64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_min_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_epu64 + #define _mm512_mask_min_epu64(src, k, a, b) simde_mm512_mask_min_epu64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_min_epu64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_epu64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_min_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_epu64 + #define _mm512_maskz_min_epu64(k, a, b) simde_mm512_maskz_min_epu64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_min_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256[0] = simde_mm256_min_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_min_ps(a_.m256[1], b_.m256[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] < b_.f32[i] ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_ps + #define _mm512_min_ps(a, b) simde_mm512_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_min_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_min_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_ps + #define _mm512_mask_min_ps(src, k, a, b) simde_mm512_mask_min_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_min_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_min_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_ps + #define _mm512_maskz_min_ps(k, a, b) simde_mm512_maskz_min_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_min_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_min_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] < b_.f64[i] ? a_.f64[i] : b_.f64[i]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_min_pd + #define _mm512_min_pd(a, b) simde_mm512_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_min_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_min_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_min_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_min_pd + #define _mm512_mask_min_pd(src, k, a, b) simde_mm512_mask_min_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_min_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_min_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_min_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_min_pd + #define _mm512_maskz_min_pd(k, a, b) simde_mm512_maskz_min_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MIN_H) */ +/* :: End simde/x86/avx512/min.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mul.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MUL_H) +#define SIMDE_X86_AVX512_MUL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_ps + #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_ps + #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_ps + #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_pd + #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_pd + #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_pd + #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + simde__m512i_private x; + __typeof__(r_.i64) ta, tb; + + /* Get even numbered 32-bit values */ + x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + /* Cast to 64 bits */ + SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); + SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); + r_.i64 = ta * tb; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); + } + #endif + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_epi32 + #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_epi32 + #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_epi32 + #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + simde__m512i_private x; + __typeof__(r_.u64) ta, tb; + + x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); + SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); + r_.u64 = ta * tb; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_epu32 + #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_epu32 + #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_epu32(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_epu32 + #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MUL_H) */ +/* :: End simde/x86/avx512/mul.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mulhi.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MULHI_H) +#define SIMDE_X86_AVX512_MULHI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mulhi_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mulhi_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mulhi_epi16 + #define _mm512_mulhi_epi16(a, b) simde_mm512_mulhi_epi16(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MULHI_H) */ +/* :: End simde/x86/avx512/mulhi.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mulhrs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MULHRS_H) +#define SIMDE_X86_AVX512_MULHRS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mulhrs_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mulhrs_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mulhrs_epi16 + #define _mm512_mulhrs_epi16(a, b) simde_mm512_mulhrs_epi16(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MULHRS_H) */ +/* :: End simde/x86/avx512/mulhrs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mullo.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MULLO_H) +#define SIMDE_X86_AVX512_MULLO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mullo_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mullo_epi16(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mullo_epi16 + #define _mm512_mullo_epi16(a, b) simde_mm512_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mullo_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mullo_epi32(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mullo_epi32 + #define _mm512_mullo_epi32(a, b) simde_mm512_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mullo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mullo_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_mullo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mullo_epi32 + #define _mm512_mask_mullo_epi32(src, k, a, b) simde_mm512_mask_mullo_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mullo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mullo_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_mullo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mullo_epi32 + #define _mm512_maskz_mullo_epi32(k, a, b) simde_mm512_maskz_mullo_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mullo_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mullo_epi64(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] * b_.i64[i]); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mullo_epi64 + #define _mm512_mullo_epi64(a, b) simde_mm512_mullo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mullo_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_mullo_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mullo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mullo_epi64 + #define _mm512_mask_mullo_epi64(src, k, a, b) simde_mm512_mask_mullo_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mullo_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_mullo_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_mullo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mullo_epi64 + #define _mm512_maskz_mullo_epi64(k, a, b) simde_mm512_maskz_mullo_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MULLO_H) */ +/* :: End simde/x86/avx512/mullo.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/multishift.h :: */ +#if !defined(SIMDE_X86_AVX512_MULTISHIFT_H) +#define SIMDE_X86_AVX512_MULTISHIFT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_multishift_epi64_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_multishift_epi64_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_multishift_epi64_epi8 + #define _mm_multishift_epi64_epi8(a, b) simde_mm_multishift_epi64_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_multishift_epi64_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_multishift_epi64_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_multishift_epi64_epi8 + #define _mm_mask_multishift_epi64_epi8(src, k, a, b) simde_mm_mask_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_multishift_epi64_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_multishift_epi64_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_multishift_epi64_epi8 + #define _mm_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm_maskz_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_multishift_epi64_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_multishift_epi64_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_multishift_epi64_epi8 + #define _mm256_multishift_epi64_epi8(a, b) simde_mm256_multishift_epi64_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_multishift_epi64_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_multishift_epi64_epi8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_multishift_epi64_epi8 + #define _mm256_mask_multishift_epi64_epi8(src, k, a, b) simde_mm256_mask_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_multishift_epi64_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_multishift_epi64_epi8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_multishift_epi64_epi8 + #define _mm256_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm256_maskz_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_multishift_epi64_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_multishift_epi64_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u8) / sizeof(r_.u8[0]) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (b_.u64[i / 8] >> (a_.u8[i] & 63)) | (b_.u64[i / 8] << (64 - (a_.u8[i] & 63)))); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_multishift_epi64_epi8 + #define _mm512_multishift_epi64_epi8(a, b) simde_mm512_multishift_epi64_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_multishift_epi64_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_multishift_epi64_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_multishift_epi64_epi8 + #define _mm512_mask_multishift_epi64_epi8(src, k, a, b) simde_mm512_mask_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_multishift_epi64_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_multishift_epi64_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_multishift_epi64_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_multishift_epi64_epi8 + #define _mm512_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm512_maskz_multishift_epi64_epi8(src, k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MULTISHIFT_H) */ +/* :: End simde/x86/avx512/multishift.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/negate.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_NEGATE_H) +#define SIMDE_X86_AVX512_NEGATE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_negate_ps(simde__m512 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_negate_pd(simde__m512d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_NEGATE_H) */ +/* :: End simde/x86/avx512/negate.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/or.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_OR_H) +#define SIMDE_X86_AVX512_OR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_or_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_ps + #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_ps + #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_ps + #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_or_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_pd + #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_pd + #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_pd + #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 | b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] | b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi32 + #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi32 + #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi32 + #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi64 + #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi64 + #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi64 + #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_si512 + #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ +/* :: End simde/x86/avx512/or.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/packs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_PACKS_H) +#define SIMDE_X86_AVX512_PACKS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_packs_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_packs_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_packs_epi16(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_packs_epi16(a_.m256i[1], b_.m256i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < octet_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + octet_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[quarter_point + i] = (a_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[octet_point + i])); + r_.i8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[octet_point + i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + r_.i8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + octet_point + i])); + r_.i8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + octet_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + octet_point + i])); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_packs_epi16 + #define _mm512_packs_epi16(a, b) simde_mm512_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_packs_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_packs_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_packs_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_packs_epi32(a_.m256i[1], b_.m256i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < octet_point ; i++) { + r_.i16[i] = (a_.i32[i] > INT16_MAX) ? INT16_MAX : ((a_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[i])); + r_.i16[i + octet_point] = (b_.i32[i] > INT16_MAX) ? INT16_MAX : ((b_.i32[i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[i])); + r_.i16[quarter_point + i] = (a_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[octet_point + i])); + r_.i16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[octet_point + i])); + r_.i16[halfway_point + i] = (a_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point +i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + i])); + r_.i16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point +i])); + r_.i16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((a_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, a_.i32[quarter_point + octet_point + i])); + r_.i16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > INT16_MAX) ? INT16_MAX : ((b_.i32[quarter_point + octet_point + i] < INT16_MIN) ? INT16_MIN : HEDLEY_STATIC_CAST(int16_t, b_.i32[quarter_point + octet_point + i])); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_packs_epi32 + #define _mm512_packs_epi32(a, b) simde_mm512_packs_epi32(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_PACKS_H) */ +/* :: End simde/x86/avx512/packs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/packus.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_PACKUS_H) +#define SIMDE_X86_AVX512_PACKUS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_packus_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_packus_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_packus_epi16(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_packus_epi16(a_.m256i[1], b_.m256i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + const size_t octet_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 8; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < octet_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + octet_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[quarter_point + i] = (a_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[octet_point + i])); + r_.u8[quarter_point + i + octet_point] = (b_.i16[octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[octet_point + i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + octet_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + r_.u8[halfway_point + quarter_point + i] = (a_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + octet_point + i])); + r_.u8[halfway_point + quarter_point + i + octet_point] = (b_.i16[quarter_point + octet_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + octet_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + octet_point + i])); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_packus_epi16 + #define _mm512_packus_epi16(a, b) simde_mm512_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_packus_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_packus_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_packus_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + const size_t octet_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < octet_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + octet_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[quarter_point + i] = (a_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[octet_point + i])); + r_.u16[quarter_point + i + octet_point] = (b_.i32[octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[octet_point + i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point +i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + octet_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point +i])); + r_.u16[halfway_point + quarter_point + i] = (a_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + octet_point + i])); + r_.u16[halfway_point + quarter_point + i + octet_point] = (b_.i32[quarter_point + octet_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + octet_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + octet_point + i])); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_packus_epi32 + #define _mm512_packus_epi32(a, b) simde_mm512_packus_epi32(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_PACKUS_H) */ +/* :: End simde/x86/avx512/packus.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/permutexvar.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) +#define SIMDE_X86_AVX512_PERMUTEXVAR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/slli.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SLLI_H) +#define SIMDE_X86_AVX512_SLLI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi16 (simde__m512i a, const unsigned int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_16_(_mm512_slli_epi16, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi16(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + if(imm8 < 16) + r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); + else + return simde_mm512_setzero_si512(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi16 + #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { + /* I guess the restriction was added in 6.4, back-ported to 5.5, then + * removed (fixed) in 7? */ + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_32_(_mm512_slli_epi32, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff". However in + * practice all bits are used. */ + if (imm8 > 31) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << imm8; + } + #endif + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi32 + #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_64_(_mm512_slli_epi64, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi64(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff". However in + * practice all bits are used. */ + if (imm8 > 63) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) + r_.u64 = a_.u64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << imm8; + } + #endif + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi64 + #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SLLI_H) */ +/* :: End simde/x86/avx512/slli.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srli.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRLI_H) +#define SIMDE_X86_AVX512_SRLI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi16 (simde__m512i a, const unsigned int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_16_(_mm512_srli_epi16, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi16(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi16 + #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_32_(_mm512_srli_epi32, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #else + if (imm8 > 31) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi32 + #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_64_(_mm512_srli_epi64, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi64(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #else + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff" here. However in + * practice all bits are used. */ + if (imm8 > 63) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) + r_.u64 = a_.u64 >> imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi64 + #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRLI_H) */ +/* :: End simde/x86/avx512/srli.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/test.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_TEST_H) +#define SIMDE_X86_AVX512_TEST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_test_epi32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_test_epi32_mask(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_test_epi32_mask +#define _mm256_test_epi32_mask(a, b) simde_mm256_test_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_test_epi32_mask (simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_test_epi32_mask(k1, a, b); + #else + return simde_mm256_test_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_test_epi32_mask + #define _mm256_mask_test_epi32_mask(k1, a, b) simde_mm256_mask_test_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi16_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask32 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi16_mask + #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_test_epi32_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask16 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi32_mask +#define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_test_epi64_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi64_mask + #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi8_mask + #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi16_mask(k1, a, b); + #else + return simde_mm512_test_epi16_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi16_mask + #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi32_mask(k1, a, b); + #else + return simde_mm512_test_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi32_mask + #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi64_mask(k1, a, b); + #else + return simde_mm512_test_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi64_mask + #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi8_mask(k1, a, b); + #else + return simde_mm512_test_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi8_mask + #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TEST_H) */ +/* :: End simde/x86/avx512/test.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutexvar_epi16 (simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutexvar_epi16(idx, a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde__m128i mask16 = simde_mm_set1_epi16(0x0007); + simde__m128i shift16 = simde_mm_set1_epi16(0x0202); + simde__m128i byte_index16 = simde_mm_set1_epi16(0x0100); + simde__m128i index16 = simde_mm_and_si128(idx, mask16); + index16 = simde_mm_mullo_epi16(index16, shift16); + index16 = simde_mm_add_epi16(index16, byte_index16); + return simde_mm_shuffle_epi8(a, index16); + #else + simde__m128i_private + idx_ = simde__m128i_to_private(idx), + a_ = simde__m128i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint16x8_t mask16 = vdupq_n_u16(0x0007); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + uint16x8_t index16 = vandq_u16(idx_.neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, vreinterpretq_u8_u16(index16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; + index16 = vec_and(idx_.altivec_u16, vec_splat_u16(7)); + index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); + r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t mask16 = wasm_i16x8_splat(0x0007); + const v128_t shift16 = wasm_i16x8_splat(0x0202); + const v128_t byte_index16 = wasm_i16x8_splat(0x0100); + v128_t index16 = wasm_v128_and(idx_.wasm_v128, mask16); + index16 = wasm_i16x8_mul(index16, shift16); + index16 = wasm_i16x8_add(index16, byte_index16); + r_.wasm_v128 = wasm_i8x16_swizzle(a_.wasm_v128, index16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x07]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutexvar_epi16 + #define _mm_permutexvar_epi16(idx, a) simde_mm_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutexvar_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutexvar_epi16 + #define _mm_mask_permutexvar_epi16(src, k, idx, a) simde_mm_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutexvar_epi16 (simde__mmask8 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutexvar_epi16 + #define _mm_maskz_permutexvar_epi16(k, idx, a) simde_mm_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutexvar_epi8 (simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + simde__m128i mask = simde_mm_set1_epi8(0x0F); + simde__m128i index = simde_mm_and_si128(idx, mask); + return simde_mm_shuffle_epi8(a, index); + #else + simde__m128i_private + idx_ = simde__m128i_to_private(idx), + a_ = simde__m128i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t mask = vdupq_n_u8(0x0F); + uint8x16_t index = vandq_u8(idx_.neon_u8, mask); + r_.neon_u8 = vqtbl1q_u8(a_.neon_u8, index); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_perm(a_.altivec_u8, a_.altivec_u8, idx_.altivec_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t mask = wasm_i8x16_splat(0x0F); + v128_t index = wasm_v128_and(idx_.wasm_v128, mask); + r_.wasm_v128 = wasm_i8x16_swizzle(a_.wasm_v128, index); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x0F]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutexvar_epi8 + #define _mm_permutexvar_epi8(idx, a) simde_mm_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutexvar_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutexvar_epi8 + #define _mm_mask_permutexvar_epi8(src, k, idx, a) simde_mm_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutexvar_epi8 (simde__mmask16 k, simde__m128i idx, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutexvar_epi8 + #define _mm_maskz_permutexvar_epi8(k, idx, a) simde_mm_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi16 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi16(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i mask16 = simde_mm256_set1_epi16(0x001F); + simde__m256i shift16 = simde_mm256_set1_epi16(0x0202); + simde__m256i byte_index16 = simde_mm256_set1_epi16(0x0100); + simde__m256i index16 = simde_mm256_and_si256(idx, mask16); + index16 = simde_mm256_mullo_epi16(index16, shift16); + simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + simde__m256i select = simde_mm256_slli_epi64(index16, 3); + index16 = simde_mm256_add_epi16(index16, byte_index16); + lo = simde_mm256_shuffle_epi8(lo, index16); + hi = simde_mm256_shuffle_epi8(hi, index16); + return simde_mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint16x8_t mask16 = vdupq_n_u16(0x000F); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u16(index16)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; + mask16 = vec_splat_u16(0x000F); + shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); + byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); + index16 = vec_mladd(index16, shift16, byte_index16); + r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, + a_.m128i_private[1].altivec_u8, + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, index16, r, t; + const v128_t mask16 = wasm_i16x8_splat(0x000F); + const v128_t shift16 = wasm_i16x8_splat(0x0202); + const v128_t byte_index16 = wasm_i16x8_splat(0x0100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask16); + index16 = wasm_i16x8_mul(index16, shift16); + index = wasm_i16x8_add(index16, byte_index16); + r = wasm_i8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a1, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x0F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi16 + #define _mm256_permutexvar_epi16(idx, a) simde_mm256_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi16 + #define _mm256_mask_permutexvar_epi16(src, k, idx, a) simde_mm256_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi16 (simde__mmask16 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi16 + #define _mm256_maskz_permutexvar_epi16(k, idx, a) simde_mm256_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi32 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi32(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint32x4_t mask32 = vdupq_n_u32(0x00000007); + uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, byte_index32); + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vreinterpretq_u8_u32(index32)); + } + #else + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 0x07]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi32 + #define _mm256_permutexvar_epi32(idx, a) simde_mm256_permutexvar_epi32(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi32(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi32 + #define _mm256_mask_permutexvar_epi32(src, k, idx, a) simde_mm256_mask_permutexvar_epi32(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi32 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi32(k, idx, a); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi32 + #define _mm256_maskz_permutexvar_epi32(k, idx, a) simde_mm256_maskz_permutexvar_epi32(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi64 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi64(idx, a); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[idx_.i64[i] & 3]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi64 + #define _mm256_permutexvar_epi64(idx, a) simde_mm256_permutexvar_epi64(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi64(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi64 + #define _mm256_mask_permutexvar_epi64(src, k, idx, a) simde_mm256_mask_permutexvar_epi64(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi64(k, idx, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi64 + #define _mm256_maskz_permutexvar_epi64(k, idx, a) simde_mm256_maskz_permutexvar_epi64(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutexvar_epi8 (simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i mask = simde_mm256_set1_epi8(0x0F); + simde__m256i lo = simde_mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + simde__m256i hi = simde_mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + simde__m256i index = simde_mm256_and_si256(idx, mask); + simde__m256i select = simde_mm256_slli_epi64(idx, 3); + lo = simde_mm256_shuffle_epi8(lo, index); + hi = simde_mm256_shuffle_epi8(hi, index); + return simde_mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + idx_ = simde__m256i_to_private(idx), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x2_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8 } }; + uint8x16_t mask = vdupq_n_u8(0x1F); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].neon_u8 = vqtbl2q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u8 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i8x16_splat(0x1F); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + r = wasm_i8x16_swizzle(a0, index); + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a1, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x1F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_epi8 + #define _mm256_permutexvar_epi8(idx, a) simde_mm256_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutexvar_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_epi8 + #define _mm256_mask_permutexvar_epi8(src, k, idx, a) simde_mm256_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutexvar_epi8 (simde__mmask32 k, simde__m256i idx, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_epi8 + #define _mm256_maskz_permutexvar_epi8(k, idx, a) simde_mm256_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutexvar_pd (simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_pd(idx, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_permutexvar_epi64(idx, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_pd + #define _mm256_permutexvar_pd(idx, a) simde_mm256_permutexvar_pd(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_permutexvar_pd (simde__m256d src, simde__mmask8 k, simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_pd(src, k, idx, a); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_pd + #define _mm256_mask_permutexvar_pd(src, k, idx, a) simde_mm256_mask_permutexvar_pd(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_permutexvar_pd (simde__mmask8 k, simde__m256i idx, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_pd(k, idx, a); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_pd + #define _mm256_maskz_permutexvar_pd(k, idx, a) simde_mm256_maskz_permutexvar_pd(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutexvar_ps (simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutexvar_ps(idx, a); + #elif defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_permutevar8x32_ps(a, idx); + #else + return simde_mm256_castsi256_ps(simde_mm256_permutexvar_epi32(idx, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutexvar_ps + #define _mm256_permutexvar_ps(idx, a) simde_mm256_permutexvar_ps(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_permutexvar_ps (simde__m256 src, simde__mmask8 k, simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutexvar_ps(src, k, idx, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutexvar_ps + #define _mm256_mask_permutexvar_ps(src, k, idx, a) simde_mm256_mask_permutexvar_ps(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_permutexvar_ps (simde__mmask8 k, simde__m256i idx, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutexvar_ps(k, idx, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutexvar_ps + #define _mm256_maskz_permutexvar_ps(k, idx, a) simde_mm256_maskz_permutexvar_ps(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutexvar_epi16 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutexvar_epi16(idx, a); + #else + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i t0, t1, index, select, a01, a23; + simde__m256i mask = simde_mm256_set1_epi16(0x001F); + simde__m256i shift = simde_mm256_set1_epi16(0x0202); + simde__m256i byte_index = simde_mm256_set1_epi16(0x0100); + simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); + simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); + simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); + simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + index = simde_mm256_and_si256(index, mask); + index = simde_mm256_mullo_epi16(index, shift); + index = simde_mm256_add_epi16(index, byte_index); + t0 = simde_mm256_shuffle_epi8(a0, index); + t1 = simde_mm256_shuffle_epi8(a1, index); + select = simde_mm256_slli_epi64(index, 3); + a01 = simde_mm256_blendv_epi8(t0, t1, select); + t0 = simde_mm256_shuffle_epi8(a2, index); + t1 = simde_mm256_shuffle_epi8(a3, index); + a23 = simde_mm256_blendv_epi8(t0, t1, select); + select = simde_mm256_slli_epi64(index, 2); + r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint16x8_t mask16 = vdupq_n_u16(0x001F); + uint16x8_t byte_index16 = vdupq_n_u16(0x0100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint16x8_t index16 = vandq_u16(idx_.m128i_private[i].neon_u16, mask16); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, byte_index16); + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u16(index16)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16, mask16, shift16, byte_index16; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; + mask16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x001F)); + shift16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)); + byte_index16 = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100)); + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index16 = vec_and(idx_.m128i_private[i].altivec_u16, mask16); + index16 = vec_mladd(index16, shift16, byte_index16); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i16x8_splat(0x001F); + const v128_t shift = wasm_i16x8_splat(0x0202); + const v128_t byte_index = wasm_i16x8_splat(0x0100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + index = wasm_i16x8_mul(index, shift); + index = wasm_i16x8_add(index, byte_index); + r = wasm_i8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[idx_.i16[i] & 0x1F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi16 + #define _mm512_permutexvar_epi16(idx, a) simde_mm512_permutexvar_epi16(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_permutexvar_epi16(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi16 + #define _mm512_mask_permutexvar_epi16(src, k, idx, a) simde_mm512_mask_permutexvar_epi16(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi16 (simde__mmask32 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_permutexvar_epi16(k, idx, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutexvar_epi16(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi16 + #define _mm512_maskz_permutexvar_epi16(k, idx, a) simde_mm512_maskz_permutexvar_epi16(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutexvar_epi32 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutexvar_epi32(idx, a); + #else + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i index, r0, r1, select; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + r0 = simde_mm256_permutevar8x32_epi32(a_.m256i[0], index); + r1 = simde_mm256_permutevar8x32_epi32(a_.m256i[1], index); + select = simde_mm256_slli_epi32(index, 28); + r_.m256i[i] = simde_mm256_castps_si256(simde_mm256_blendv_ps(simde_mm256_castsi256_ps(r0), + simde_mm256_castsi256_ps(r1), + simde_mm256_castsi256_ps(select))); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint32x4_t mask32 = vdupq_n_u32(0x0000000F); + uint32x4_t byte_index32 = vdupq_n_u32(0x03020100); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + uint32x4_t index32 = vandq_u32(idx_.m128i_private[i].neon_u32, mask32); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, byte_index32); + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vreinterpretq_u8_u32(index32)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) index32, mask32, byte_index32, temp32, sixteen; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) zero, shift; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) index, test, r01, r23; + mask32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x0000000F)); + byte_index32 = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100)); + zero = vec_splat_u16(0); + shift = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)); + sixteen = vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16)); + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index32 = vec_and(idx_.m128i_private[i].altivec_u32, mask32); + + /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ + temp32 = vec_sl(index32, sixteen); + index32 = vec_add(index32, temp32); + index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), + shift, + zero)); + + index32 = vec_add(index32, byte_index32); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, index); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, index); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(index, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i32x4_splat(0x0000000F); + const v128_t shift = wasm_i32x4_splat(0x04040404); + const v128_t byte_index = wasm_i32x4_splat(0x03020100); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + index = wasm_i32x4_mul(index, shift); + index = wasm_i32x4_add(index, byte_index); + r = wasm_i8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 0x0F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi32 + #define _mm512_permutexvar_epi32(idx, a) simde_mm512_permutexvar_epi32(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutexvar_epi32(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi32 + #define _mm512_mask_permutexvar_epi32(src, k, idx, a) simde_mm512_mask_permutexvar_epi32(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi32 (simde__mmask16 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutexvar_epi32(k, idx, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutexvar_epi32(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi32 + #define _mm512_maskz_permutexvar_epi32(k, idx, a) simde_mm512_maskz_permutexvar_epi32(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutexvar_epi64 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutexvar_epi64(idx, a); + #else + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), + r_; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[idx_.i64[i] & 7]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi64 + #define _mm512_permutexvar_epi64(idx, a) simde_mm512_permutexvar_epi64(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutexvar_epi64(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi64 + #define _mm512_mask_permutexvar_epi64(src, k, idx, a) simde_mm512_mask_permutexvar_epi64(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi64 (simde__mmask8 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutexvar_epi64(k, idx, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutexvar_epi64(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi64 + #define _mm512_maskz_permutexvar_epi64(k, idx, a) simde_mm512_maskz_permutexvar_epi64(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutexvar_epi8 (simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_permutexvar_epi8(idx, a); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + simde__m512i hilo, hi, lo, hi2, lo2, idx2; + simde__m512i ones = simde_mm512_set1_epi8(1); + simde__m512i low_bytes = simde_mm512_set1_epi16(0x00FF); + + idx2 = simde_mm512_srli_epi16(idx, 1); + hilo = simde_mm512_permutexvar_epi16(idx2, a); + simde__mmask64 mask = simde_mm512_test_epi8_mask(idx, ones); + lo = simde_mm512_and_si512(hilo, low_bytes); + hi = simde_mm512_srli_epi16(hilo, 8); + + idx2 = simde_mm512_srli_epi16(idx, 9); + hilo = simde_mm512_permutexvar_epi16(idx2, a); + lo2 = simde_mm512_slli_epi16(hilo, 8); + hi2 = simde_mm512_andnot_si512(low_bytes, hilo); + + lo = simde_mm512_or_si512(lo, lo2); + hi = simde_mm512_or_si512(hi, hi2); + + return simde_mm512_mask_blend_epi8(mask, lo, hi); + #else + simde__m512i_private + idx_ = simde__m512i_to_private(idx), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i t0, t1, index, select, a01, a23; + simde__m256i mask = simde_mm256_set1_epi8(0x3F); + simde__m256i a0 = simde_mm256_broadcastsi128_si256(a_.m128i[0]); + simde__m256i a1 = simde_mm256_broadcastsi128_si256(a_.m128i[1]); + simde__m256i a2 = simde_mm256_broadcastsi128_si256(a_.m128i[2]); + simde__m256i a3 = simde_mm256_broadcastsi128_si256(a_.m128i[3]); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + index = simde_mm256_and_si256(index, mask); + select = simde_mm256_slli_epi64(index, 3); + t0 = simde_mm256_shuffle_epi8(a0, index); + t1 = simde_mm256_shuffle_epi8(a1, index); + a01 = simde_mm256_blendv_epi8(t0, t1, select); + t0 = simde_mm256_shuffle_epi8(a2, index); + t1 = simde_mm256_shuffle_epi8(a3, index); + a23 = simde_mm256_blendv_epi8(t0, t1, select); + select = simde_mm256_slli_epi64(index, 2); + r_.m256i[i] = simde_mm256_blendv_epi8(a01, a23, select); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16x4_t table = { { a_.m128i_private[0].neon_u8, + a_.m128i_private[1].neon_u8, + a_.m128i_private[2].neon_u8, + a_.m128i_private[3].neon_u8 } }; + uint8x16_t mask = vdupq_n_u8(0x3F); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].neon_u8 = vqtbl4q_u8(table, vandq_u8(idx_.m128i_private[i].neon_u8, mask)); + } + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) test, r01, r23; + test = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 0x20)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r01 = vec_perm(a_.m128i_private[0].altivec_u8, a_.m128i_private[1].altivec_u8, idx_.m128i_private[i].altivec_u8); + r23 = vec_perm(a_.m128i_private[2].altivec_u8, a_.m128i_private[3].altivec_u8, idx_.m128i_private[i].altivec_u8); + r_.m128i_private[i].altivec_u8 = vec_sel(r01, r23, vec_cmpeq(vec_and(idx_.m128i_private[i].altivec_u8, test), test)); + } + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t index, r, t; + const v128_t mask = wasm_i8x16_splat(0x3F); + const v128_t sixteen = wasm_i8x16_splat(16); + const v128_t a0 = a_.m128i_private[0].wasm_v128; + const v128_t a1 = a_.m128i_private[1].wasm_v128; + const v128_t a2 = a_.m128i_private[2].wasm_v128; + const v128_t a3 = a_.m128i_private[3].wasm_v128; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + index = wasm_v128_and(idx_.m128i_private[i].wasm_v128, mask); + r = wasm_i8x16_swizzle(a0, index); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a1, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a2, index); + r = wasm_v128_or(r, t); + + index = wasm_i8x16_sub(index, sixteen); + t = wasm_i8x16_swizzle(a3, index); + r_.m128i_private[i].wasm_v128 = wasm_v128_or(r, t); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[idx_.i8[i] & 0x3F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_epi8 + #define _mm512_permutexvar_epi8(idx, a) simde_mm512_permutexvar_epi8(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutexvar_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_permutexvar_epi8(src, k, idx, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_epi8 + #define _mm512_mask_permutexvar_epi8(src, k, idx, a) simde_mm512_mask_permutexvar_epi8(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutexvar_epi8 (simde__mmask64 k, simde__m512i idx, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_permutexvar_epi8(k, idx, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutexvar_epi8(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_epi8 + #define _mm512_maskz_permutexvar_epi8(k, idx, a) simde_mm512_maskz_permutexvar_epi8(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_permutexvar_pd (simde__m512i idx, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutexvar_pd(idx, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_permutexvar_epi64(idx, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_pd + #define _mm512_permutexvar_pd(idx, a) simde_mm512_permutexvar_pd(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_permutexvar_pd (simde__m512d src, simde__mmask8 k, simde__m512i idx, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutexvar_pd(src, k, idx, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_pd + #define _mm512_mask_permutexvar_pd(src, k, idx, a) simde_mm512_mask_permutexvar_pd(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_permutexvar_pd (simde__mmask8 k, simde__m512i idx, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutexvar_pd(k, idx, a); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_permutexvar_pd(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_pd + #define _mm512_maskz_permutexvar_pd(k, idx, a) simde_mm512_maskz_permutexvar_pd(k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_permutexvar_ps (simde__m512i idx, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutexvar_ps(idx, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_permutexvar_epi32(idx, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutexvar_ps + #define _mm512_permutexvar_ps(idx, a) simde_mm512_permutexvar_ps(idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_permutexvar_ps (simde__m512 src, simde__mmask16 k, simde__m512i idx, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutexvar_ps(src, k, idx, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutexvar_ps + #define _mm512_mask_permutexvar_ps(src, k, idx, a) simde_mm512_mask_permutexvar_ps(src, k, idx, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_permutexvar_ps (simde__mmask16 k, simde__m512i idx, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutexvar_ps(k, idx, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_permutexvar_ps(idx, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutexvar_ps + #define _mm512_maskz_permutexvar_ps(k, idx, a) simde_mm512_maskz_permutexvar_ps(k, idx, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_PERMUTEXVAR_H) */ +/* :: End simde/x86/avx512/permutexvar.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/permutex2var.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) +#define SIMDE_X86_AVX512_PERMUTEX2VAR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The following generic code avoids many, nearly identical, repetitions of fairly complex code. + * If the compiler optimizes well, in particular extracting invariant code from loops + * and simplifying code involving constants passed as arguments, it should not be + * significantly slower than specific code. + * Note that when the original vector contains few elements, these implementations + * may not be faster than portable code. + */ +#if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC +#endif + +#if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; + + #if defined(SIMDE_X86_SSE3_NATIVE) + __m128i ra, rb, t, test, select, index; + const __m128i sixteen = _mm_set1_epi8(16); + + /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = _mm_slli_epi32(index, 1); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); + break; + case 2: + index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = _mm_slli_epi32(index, 2); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + t = _mm_slli_epi32(index, 16); + index = _mm_or_si128(index, t); + index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); + break; + } + + test = index; + index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); + test = _mm_cmpgt_epi8(test, index); + + ra = _mm_shuffle_epi8(a[0], index); + rb = _mm_shuffle_epi8(b[0], index); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); + rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); + } + + return _mm_blendv_epi8(ra, rb, test); + #else + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); + rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); + } + + return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); + #endif + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t index, r; + uint16x8_t index16; + uint32x4_t index32; + uint8x16x2_t table2_a, table2_b; + uint8x16x4_t table4_a, table4_b; + + switch (log2_index_size) { + case 0: + index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); + break; + case 1: + index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); + index = vreinterpretq_u8_u16(index16); + break; + case 2: + index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); + index = vreinterpretq_u8_u32(index32); + break; + } + + uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1)); + + switch (log2_data_length) { + case 0: + r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); + break; + case 1: + table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); + r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); + break; + case 2: + table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); + table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); + table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); + table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); + table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); + r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); + break; + } + + return simde__m128i_from_neon_u8(r); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; + + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); + break; + case 1: + index16 = simde__m128i_to_altivec_u16(idx); + index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); + index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); + break; + case 2: + index32 = simde__m128i_to_altivec_u32(idx); + index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); + + /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ + temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); + index32 = vec_add(index32, temp32); + index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), + vec_splat_u16(0))); + + index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); + break; + } + + if (log2_data_length == 0) { + r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); + } + else { + s = index; + index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); + test = vec_cmpgt(s, index); + + ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); + rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); + + SIMDE_VECTORIZE + for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { + select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); + index = vec_sub(index, thirty_two); + t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); + ra = vec_sel(t, ra, select); + t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); + rb = vec_sel(t, rb, select); + } + + r = vec_sel(ra, rb, test); + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sixteen = wasm_i8x16_splat(16); + + v128_t index = simde__m128i_to_wasm_v128(idx); + + switch (log2_index_size) { + case 0: + index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); + index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); + break; + case 2: + index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); + index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); + break; + } + + v128_t r = wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); + + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); + } + + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); + } + + return simde__m128i_from_wasm_v128(r); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); + } +} +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi16 + #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi16 +#define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi16 +#define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi16 +#define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi32 + #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi32 +#define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi32 +#define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi32 +#define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi64(a, idx, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi64 + #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi64 +#define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi64 +#define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi64 +#define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi8 + #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi8 +#define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi8 +#define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi8 +#define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_pd(a, idx, b); + #else + return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_pd + #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_pd +#define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_pd +#define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_pd +#define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_ps(a, idx, b); + #else + return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_ps + #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_ps +#define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_ps +#define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_ps +#define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + idx2 = _mm256_srli_epi32(idx, 1); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + idx2 = _mm256_srli_epi32(idx2, 16); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); + return _mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi16 + #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi16 +#define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi16 +#define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi16 +#define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i ta, tb, select; + ta = _mm256_permutevar8x32_epi32(a, idx); + tb = _mm256_permutevar8x32_epi32(b, idx); + select = _mm256_slli_epi32(idx, 28); + return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi32 + #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi32 +#define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi32 +#define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi32 +#define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi64(a, idx, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi64 + #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi64 +#define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi64 +#define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi64 +#define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, a01, b01; + const __m256i mask = _mm256_set1_epi8(0x3F); + const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + index = _mm256_and_si256(idx, mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + a01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + b01 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + return _mm256_blendv_epi8(a01, b01, select0x20); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi8 + #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi8 +#define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi8 +#define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi8 +#define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_pd(a, idx, b); + #else + return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_pd + #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_pd +#define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_pd +#define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_pd +#define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_ps(a, idx, b); + #else + return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_ps + #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_ps +#define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_ps +#define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_ps +#define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutex2var_epi16(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + idx1 = idx_.m256i[i]; + idx2 = _mm256_srli_epi32(idx1, 1); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo1), + _mm256_castsi256_ps(select))); + + idx2 = _mm256_srli_epi32(idx2, 16); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo2), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); + r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi16 + #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi16 +#define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi16 +#define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi16 +#define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_epi32(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i index, t0, t1, a01, b01, select; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); + select = _mm256_slli_epi32(index, 28); + a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); + b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + select = _mm256_slli_epi32(index, 27); + r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), + _mm256_castsi256_ps(b01), + _mm256_castsi256_ps(select))); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi32 + #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi32 +#define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi32 +#define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi32 +#define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_epi64(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi64 + #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi64 +#define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi64 +#define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi64 +#define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m512i hilo, hi, lo, hi2, lo2, idx2; + const __m512i ones = _mm512_set1_epi8(1); + const __m512i low_bytes = _mm512_set1_epi16(0x00FF); + + idx2 = _mm512_srli_epi16(idx, 1); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + __mmask64 mask = _mm512_test_epi8_mask(idx, ones); + lo = _mm512_and_si512(hilo, low_bytes); + hi = _mm512_srli_epi16(hilo, 8); + + idx2 = _mm512_srli_epi16(idx, 9); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + lo2 = _mm512_slli_epi16(hilo, 8); + hi2 = _mm512_andnot_si512(low_bytes, hilo); + + lo = _mm512_or_si512(lo, lo2); + hi = _mm512_or_si512(hi, hi2); + + return _mm512_mask_blend_epi8(mask, lo, hi); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; + const __m256i mask = _mm256_set1_epi8(0x7F); + const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = _mm256_and_si256(idx_.m256i[i], mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(a2, index); + t1 = _mm256_shuffle_epi8(a3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + a0123 = _mm256_blendv_epi8(t01, t23, select0x20); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b2, index); + t1 = _mm256_shuffle_epi8(b3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + b0123 = _mm256_blendv_epi8(t01, t23, select0x20); + select0x40 = _mm256_slli_epi64(index, 1); + r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi8 + #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi8 +#define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi8 +#define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi8 +#define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutex2var_pd(a, idx, b); + #else + return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_pd + #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_pd +#define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_pd +#define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_pd +#define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_ps(a, idx, b); + #else + return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_ps + #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_ps +#define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_ps +#define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_ps +#define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) */ +/* :: End simde/x86/avx512/permutex2var.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/popcnt.h :: */ +#if !defined(SIMDE_X86_AVX512_POPCNT_H) +#define SIMDE_X86_AVX512_POPCNT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_popcnt_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_popcnt_epi8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcntq_s8(a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_popcnt(a_.wasm_v128); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + const __m128i low_nibble_set = _mm_set1_epi8(0x0f); + const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); + const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); + const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + + r_.n = + _mm_add_epi8( + _mm_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm_shuffle_epi8( + lut, + _mm_srli_epi16( + high_nibble_of_input, + 4 + ) + ) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* v -= ((v >> 1) & UINT8_C(0x55)); */ + r_.n = + _mm_sub_epi8( + a_.n, + _mm_and_si128( + _mm_srli_epi16(a_.n, 1), + _mm_set1_epi8(0x55) + ) + ); + + /* v = (v & 0x33) + ((v >> 2) & 0x33); */ + r_.n = + _mm_add_epi8( + _mm_and_si128( + r_.n, + _mm_set1_epi8(0x33) + ), + _mm_and_si128( + _mm_srli_epi16(r_.n, 2), + _mm_set1_epi8(0x33) + ) + ); + + /* v = (v + (v >> 4)) & 0xf */ + r_.n = + _mm_and_si128( + _mm_add_epi8( + r_.n, + _mm_srli_epi16(r_.n, 4) + ), + _mm_set1_epi8(0x0f) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), a_.altivec_i8))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u8 -= ((a_.u8 >> 1) & 0x55); + a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); + a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; + r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); + v -= ((v >> 1) & 0x55); + v = (v & 0x33) + ((v >> 2) & 0x33); + v = (v + (v >> 4)) & 0xf; + r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_popcnt_epi8 + #define _mm_popcnt_epi8(a) simde_mm_popcnt_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_popcnt_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_popcnt_epi8(src, k, a); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_popcnt_epi8 + #define _mm_mask_popcnt_epi8(src, k, a) simde_mm_mask_popcnt_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_popcnt_epi8 (simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_popcnt_epi8(k, a); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_popcnt_epi8 + #define _mm_maskz_popcnt_epi8(k, a) simde_mm_maskz_popcnt_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_popcnt_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_popcnt_epi16(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vpaddlq_s8(vcntq_s8(a_.neon_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extadd_pairwise_i8x16(wasm_i8x16_popcnt(a_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), a_.altivec_u16))); + #elif defined(SIMDE_X86_XOP_NATIVE) + const __m128i low_nibble_set = _mm_set1_epi8(0x0f); + const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); + const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); + const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + + r_.n = + _mm_haddw_epi8( + _mm_add_epi8( + _mm_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm_shuffle_epi8( + lut, + _mm_srli_epi16(high_nibble_of_input, 4) + ) + ) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.n = + _mm_sub_epi16( + a_.n, + _mm_and_si128( + _mm_srli_epi16(a_.n, 1), + _mm_set1_epi16(0x5555) + ) + ); + + r_.n = + _mm_add_epi16( + _mm_and_si128( + r_.n, + _mm_set1_epi16(0x3333) + ), + _mm_and_si128( + _mm_srli_epi16(r_.n, 2), + _mm_set1_epi16(0x3333) + ) + ); + + r_.n = + _mm_and_si128( + _mm_add_epi16( + r_.n, + _mm_srli_epi16(r_.n, 4) + ), + _mm_set1_epi16(0x0f0f) + ); + + r_.n = + _mm_srli_epi16( + _mm_mullo_epi16( + r_.n, + _mm_set1_epi16(0x0101) + ), + (sizeof(uint16_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); + a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); + a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); + r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); + v -= ((v >> 1) & UINT16_C(0x5555)); + v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); + v = (v + (v >> 4)) & UINT16_C(0x0f0f); + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_popcnt_epi16 + #define _mm_popcnt_epi16(a) simde_mm_popcnt_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_popcnt_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_popcnt_epi16(src, k, a); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_popcnt_epi16 + #define _mm_mask_popcnt_epi16(src, k, a) simde_mm_mask_popcnt_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_popcnt_epi16 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_popcnt_epi16(k, a); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_popcnt_epi16 + #define _mm_maskz_popcnt_epi16(k, a) simde_mm_maskz_popcnt_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_popcnt_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_popcnt_epi32(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(vcntq_s8(a_.neon_i8))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), a_.altivec_u32))); + #elif defined(SIMDE_X86_XOP_NATIVE) + const __m128i low_nibble_set = _mm_set1_epi8(0x0f); + const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); + const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); + const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + + r_.n = + _mm_haddd_epi8( + _mm_add_epi8( + _mm_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm_shuffle_epi8( + lut, + _mm_srli_epi16(high_nibble_of_input, 4) + ) + ) + ); + #elif defined(SIMDE_X86_SSE4_1_NATIVE) + r_.n = + _mm_sub_epi32( + a_.n, + _mm_and_si128( + _mm_srli_epi32(a_.n, 1), + _mm_set1_epi32(0x55555555) + ) + ); + + r_.n = + _mm_add_epi32( + _mm_and_si128( + r_.n, + _mm_set1_epi32(0x33333333) + ), + _mm_and_si128( + _mm_srli_epi32(r_.n, 2), + _mm_set1_epi32(0x33333333) + ) + ); + + r_.n = + _mm_and_si128( + _mm_add_epi32( + r_.n, + _mm_srli_epi32(r_.n, 4) + ), + _mm_set1_epi32(0x0f0f0f0f) + ); + + r_.n = + _mm_srli_epi32( + _mm_mullo_epi32( + r_.n, + _mm_set1_epi32(0x01010101) + ), + (sizeof(uint32_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); + a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); + a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); + v -= ((v >> 1) & UINT32_C(0x55555555)); + v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); + v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_popcnt_epi32 + #define _mm_popcnt_epi32(a) simde_mm_popcnt_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_popcnt_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_popcnt_epi32(src, k, a); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_popcnt_epi32 + #define _mm_mask_popcnt_epi32(src, k, a) simde_mm_mask_popcnt_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_popcnt_epi32 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_popcnt_epi32(k, a); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_popcnt_epi32 + #define _mm_maskz_popcnt_epi32(k, a) simde_mm_maskz_popcnt_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_popcnt_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_popcnt_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(vcntq_s8(a_.neon_i8)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_popcnt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), a_.altivec_u64))); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + const __m128i low_nibble_set = _mm_set1_epi8(0x0f); + const __m128i high_nibble_of_input = _mm_andnot_si128(low_nibble_set, a_.n); + const __m128i low_nibble_of_input = _mm_and_si128(low_nibble_set, a_.n); + const __m128i lut = _mm_set_epi8(4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0); + + r_.n = + _mm_sad_epu8( + _mm_add_epi8( + _mm_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm_shuffle_epi8( + lut, + _mm_srli_epi16(high_nibble_of_input, 4) + ) + ), + _mm_setzero_si128() + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.n = + _mm_sub_epi8( + a_.n, + _mm_and_si128( + _mm_srli_epi16(a_.n, 1), + _mm_set1_epi8(0x55) + ) + ); + + r_.n = + _mm_add_epi8( + _mm_and_si128( + r_.n, + _mm_set1_epi8(0x33) + ), + _mm_and_si128( + _mm_srli_epi16(r_.n, 2), + _mm_set1_epi8(0x33) + ) + ); + + r_.n = + _mm_and_si128( + _mm_add_epi8( + r_.n, + _mm_srli_epi16(r_.n, 4) + ), + _mm_set1_epi8(0x0f) + ); + + r_.n = + _mm_sad_epu8( + r_.n, + _mm_setzero_si128() + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); + a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); + a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); + v -= ((v >> 1) & UINT64_C(0x5555555555555555)); + v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); + v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_popcnt_epi64 + #define _mm_popcnt_epi64(a) simde_mm_popcnt_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_popcnt_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_popcnt_epi64(src, k, a); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_popcnt_epi64 + #define _mm_mask_popcnt_epi64(src, k, a) simde_mm_mask_popcnt_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_popcnt_epi64 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_popcnt_epi64(k, a); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_popcnt_epi64 + #define _mm_maskz_popcnt_epi64(k, a) simde_mm_maskz_popcnt_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_popcnt_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_popcnt_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi8(a_.m128i[i]); + } + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i low_nibble_set = _mm256_set1_epi8(0x0f); + const __m256i high_nibble_of_input = _mm256_andnot_si256(low_nibble_set, a_.n); + const __m256i low_nibble_of_input = _mm256_and_si256(low_nibble_set, a_.n); + const __m256i lut = + _mm256_set_epi8( + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 + ); + + r_.n = + _mm256_add_epi8( + _mm256_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm256_shuffle_epi8( + lut, + _mm256_srli_epi16( + high_nibble_of_input, + 4 + ) + ) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u8 -= ((a_.u8 >> 1) & 0x55); + a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); + a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; + r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); + v -= ((v >> 1) & 0x55); + v = (v & 0x33) + ((v >> 2) & 0x33); + v = (v + (v >> 4)) & 0xf; + r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_popcnt_epi8 + #define _mm256_popcnt_epi8(a) simde_mm256_popcnt_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_popcnt_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_popcnt_epi8(src, k, a); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_popcnt_epi8 + #define _mm256_mask_popcnt_epi8(src, k, a) simde_mm256_mask_popcnt_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_popcnt_epi8 (simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_popcnt_epi8(k, a); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_popcnt_epi8 + #define _mm256_maskz_popcnt_epi8(k, a) simde_mm256_maskz_popcnt_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_popcnt_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_popcnt_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi16(a_.m128i[i]); + } + #elif defined(SIMDE_X86_AVX2_NATIVE) + r_.n = + _mm256_sub_epi16( + a_.n, + _mm256_and_si256( + _mm256_srli_epi16(a_.n, 1), + _mm256_set1_epi16(0x5555) + ) + ); + + r_.n = + _mm256_add_epi16( + _mm256_and_si256( + r_.n, + _mm256_set1_epi16(0x3333) + ), + _mm256_and_si256( + _mm256_srli_epi16(r_.n, 2), + _mm256_set1_epi16(0x3333) + ) + ); + + r_.n = + _mm256_and_si256( + _mm256_add_epi16( + r_.n, + _mm256_srli_epi16(r_.n, 4) + ), + _mm256_set1_epi16(0x0f0f) + ); + + r_.n = + _mm256_srli_epi16( + _mm256_mullo_epi16( + r_.n, + _mm256_set1_epi16(0x0101) + ), + (sizeof(uint16_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); + a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); + a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); + r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); + v -= ((v >> 1) & UINT16_C(0x5555)); + v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); + v = (v + (v >> 4)) & UINT16_C(0x0f0f); + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_popcnt_epi16 + #define _mm256_popcnt_epi16(a) simde_mm256_popcnt_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_popcnt_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_popcnt_epi16(src, k, a); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_popcnt_epi16 + #define _mm256_mask_popcnt_epi16(src, k, a) simde_mm256_mask_popcnt_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_popcnt_epi16 (simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_popcnt_epi16(k, a); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_popcnt_epi16 + #define _mm256_maskz_popcnt_epi16(k, a) simde_mm256_maskz_popcnt_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_popcnt_epi32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_popcnt_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi32(a_.m128i[i]); + } + #elif defined(SIMDE_X86_AVX2_NATIVE) + r_.n = + _mm256_sub_epi32( + a_.n, + _mm256_and_si256( + _mm256_srli_epi32(a_.n, 1), + _mm256_set1_epi32(0x55555555) + ) + ); + + r_.n = + _mm256_add_epi32( + _mm256_and_si256( + r_.n, + _mm256_set1_epi32(0x33333333) + ), + _mm256_and_si256( + _mm256_srli_epi32(r_.n, 2), + _mm256_set1_epi32(0x33333333) + ) + ); + + r_.n = + _mm256_and_si256( + _mm256_add_epi32( + r_.n, + _mm256_srli_epi32(r_.n, 4) + ), + _mm256_set1_epi32(0x0f0f0f0f) + ); + + r_.n = + _mm256_srli_epi32( + _mm256_mullo_epi32( + r_.n, + _mm256_set1_epi32(0x01010101) + ), + (sizeof(uint32_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); + a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); + a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); + v -= ((v >> 1) & UINT32_C(0x55555555)); + v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); + v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_popcnt_epi32 + #define _mm256_popcnt_epi32(a) simde_mm256_popcnt_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_popcnt_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_popcnt_epi32(src, k, a); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_popcnt_epi32 + #define _mm256_mask_popcnt_epi32(src, k, a) simde_mm256_mask_popcnt_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_popcnt_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_popcnt_epi32(k, a); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_popcnt_epi32 + #define _mm256_maskz_popcnt_epi32(k, a) simde_mm256_maskz_popcnt_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_popcnt_epi64 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_popcnt_epi64(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < sizeof(r_.m128i) / sizeof(r_.m128i[0]) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi64(a_.m128i[i]); + } + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i low_nibble_set = _mm256_set1_epi8(0x0f); + const __m256i high_nibble_of_input = _mm256_andnot_si256(low_nibble_set, a_.n); + const __m256i low_nibble_of_input = _mm256_and_si256(low_nibble_set, a_.n); + const __m256i lut = + _mm256_set_epi8( + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 + ); + + r_.n = + _mm256_sad_epu8( + _mm256_add_epi8( + _mm256_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm256_shuffle_epi8( + lut, + _mm256_srli_epi16(high_nibble_of_input, 4) + ) + ), + _mm256_setzero_si256() + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); + a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); + a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); + v -= ((v >> 1) & UINT64_C(0x5555555555555555)); + v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); + v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_popcnt_epi64 + #define _mm256_popcnt_epi64(a) simde_mm256_popcnt_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_popcnt_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_popcnt_epi64(src, k, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_popcnt_epi64 + #define _mm256_mask_popcnt_epi64(src, k, a) simde_mm256_mask_popcnt_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_popcnt_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_popcnt_epi64(k, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_popcnt_epi64 + #define _mm256_maskz_popcnt_epi64(k, a) simde_mm256_maskz_popcnt_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_popcnt_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_popcnt_epi8(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi8(a_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_popcnt_epi8(a_.m256i[i]); + } + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + const __m512i low_nibble_set = _mm512_set1_epi8(0x0f); + const __m512i high_nibble_of_input = _mm512_andnot_si512(low_nibble_set, a_.n); + const __m512i low_nibble_of_input = _mm512_and_si512(low_nibble_set, a_.n); + const __m512i lut = + simde_mm512_set_epi8( + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 + ); + + r_.n = + _mm512_add_epi8( + _mm512_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm512_shuffle_epi8( + lut, + _mm512_srli_epi16( + high_nibble_of_input, + 4 + ) + ) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u8 -= ((a_.u8 >> 1) & 0x55); + a_.u8 = ((a_.u8 & 0x33) + ((a_.u8 >> 2) & 0x33)); + a_.u8 = (a_.u8 + (a_.u8 >> 4)) & 15; + r_.u8 = a_.u8 >> ((sizeof(uint8_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + uint8_t v = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i]); + v -= ((v >> 1) & 0x55); + v = (v & 0x33) + ((v >> 2) & 0x33); + v = (v + (v >> 4)) & 0xf; + r_.u8[i] = v >> (sizeof(uint8_t) - 1) * CHAR_BIT; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_popcnt_epi8 + #define _mm512_popcnt_epi8(a) simde_mm512_popcnt_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_popcnt_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_mask_popcnt_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_popcnt_epi8 + #define _mm512_mask_popcnt_epi8(src, k, a) simde_mm512_mask_popcnt_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_popcnt_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_maskz_popcnt_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_popcnt_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_popcnt_epi8 + #define _mm512_maskz_popcnt_epi8(k, a) simde_mm512_maskz_popcnt_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_popcnt_epi16 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_popcnt_epi16(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi16(a_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_popcnt_epi16(a_.m256i[i]); + } + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + r_.n = + _mm512_sub_epi16( + a_.n, + _mm512_and_si512( + _mm512_srli_epi16(a_.n, 1), + _mm512_set1_epi16(0x5555) + ) + ); + + r_.n = + _mm512_add_epi16( + _mm512_and_si512( + r_.n, + _mm512_set1_epi16(0x3333) + ), + _mm512_and_si512( + _mm512_srli_epi16(r_.n, 2), + _mm512_set1_epi16(0x3333) + ) + ); + + r_.n = + _mm512_and_si512( + _mm512_add_epi16( + r_.n, + _mm512_srli_epi16(r_.n, 4) + ), + _mm512_set1_epi16(0x0f0f) + ); + + r_.n = + _mm512_srli_epi16( + _mm512_mullo_epi16( + r_.n, + _mm512_set1_epi16(0x0101) + ), + (sizeof(uint16_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u16 -= ((a_.u16 >> 1) & UINT16_C(0x5555)); + a_.u16 = ((a_.u16 & UINT16_C(0x3333)) + ((a_.u16 >> 2) & UINT16_C(0x3333))); + a_.u16 = (a_.u16 + (a_.u16 >> 4)) & UINT16_C(0x0f0f); + r_.u16 = (a_.u16 * UINT16_C(0x0101)) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + uint16_t v = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i]); + v -= ((v >> 1) & UINT16_C(0x5555)); + v = ((v & UINT16_C(0x3333)) + ((v >> 2) & UINT16_C(0x3333))); + v = (v + (v >> 4)) & UINT16_C(0x0f0f); + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (v * UINT16_C(0x0101))) >> ((sizeof(uint16_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_popcnt_epi16 + #define _mm512_popcnt_epi16(a) simde_mm512_popcnt_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_popcnt_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_mask_popcnt_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_popcnt_epi16 + #define _mm512_mask_popcnt_epi16(src, k, a) simde_mm512_mask_popcnt_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_popcnt_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BITALG_NATIVE) + return _mm512_maskz_popcnt_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_popcnt_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_popcnt_epi16 + #define _mm512_maskz_popcnt_epi16(k, a) simde_mm512_maskz_popcnt_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_popcnt_epi32 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_popcnt_epi32(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi32(a_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_popcnt_epi32(a_.m256i[i]); + } + #elif defined(SIMDE_X86_AVX512F_NATIVE) + r_.n = + _mm512_sub_epi32( + a_.n, + _mm512_and_si512( + _mm512_srli_epi32(a_.n, 1), + _mm512_set1_epi32(0x55555555) + ) + ); + + r_.n = + _mm512_add_epi32( + _mm512_and_si512( + r_.n, + _mm512_set1_epi32(0x33333333) + ), + _mm512_and_si512( + _mm512_srli_epi32(r_.n, 2), + _mm512_set1_epi32(0x33333333) + ) + ); + + r_.n = + _mm512_and_si512( + _mm512_add_epi32( + r_.n, + _mm512_srli_epi32(r_.n, 4) + ), + _mm512_set1_epi32(0x0f0f0f0f) + ); + + r_.n = + _mm512_srli_epi32( + _mm512_mullo_epi32( + r_.n, + _mm512_set1_epi32(0x01010101) + ), + (sizeof(uint32_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u32 -= ((a_.u32 >> 1) & UINT32_C(0x55555555)); + a_.u32 = ((a_.u32 & UINT32_C(0x33333333)) + ((a_.u32 >> 2) & UINT32_C(0x33333333))); + a_.u32 = (a_.u32 + (a_.u32 >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32 = (a_.u32 * UINT32_C(0x01010101)) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + uint32_t v = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i]); + v -= ((v >> 1) & UINT32_C(0x55555555)); + v = ((v & UINT32_C(0x33333333)) + ((v >> 2) & UINT32_C(0x33333333))); + v = (v + (v >> 4)) & UINT32_C(0x0f0f0f0f); + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (v * UINT32_C(0x01010101))) >> ((sizeof(uint32_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_popcnt_epi32 + #define _mm512_popcnt_epi32(a) simde_mm512_popcnt_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_popcnt_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_mask_popcnt_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_popcnt_epi32 + #define _mm512_mask_popcnt_epi32(src, k, a) simde_mm512_mask_popcnt_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_popcnt_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_maskz_popcnt_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_popcnt_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_popcnt_epi32 + #define _mm512_maskz_popcnt_epi32(k, a) simde_mm512_maskz_popcnt_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_popcnt_epi64 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_popcnt_epi64(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_popcnt_epi64(a_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < sizeof(r_.m256i) / sizeof(r_.m256i[0]) ; i++) { + r_.m256i[i] = simde_mm256_popcnt_epi64(a_.m256i[i]); + } + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + const __m512i low_nibble_set = _mm512_set1_epi8(0x0f); + const __m512i high_nibble_of_input = _mm512_andnot_si512(low_nibble_set, a_.n); + const __m512i low_nibble_of_input = _mm512_and_si512(low_nibble_set, a_.n); + const __m512i lut = + simde_mm512_set_epi8( + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, + 4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0 + ); + + r_.n = + _mm512_sad_epu8( + _mm512_add_epi8( + _mm512_shuffle_epi8( + lut, + low_nibble_of_input + ), + _mm512_shuffle_epi8( + lut, + _mm512_srli_epi16(high_nibble_of_input, 4) + ) + ), + _mm512_setzero_si512() + ); + #elif defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + r_.n = + _mm512_sub_epi64( + a_.n, + _mm512_and_si512( + _mm512_srli_epi64(a_.n, 1), + _mm512_set1_epi64(0x5555555555555555) + ) + ); + + r_.n = + _mm512_add_epi64( + _mm512_and_si512( + r_.n, + _mm512_set1_epi64(0x3333333333333333) + ), + _mm512_and_si512( + _mm512_srli_epi64(r_.n, 2), + _mm512_set1_epi64(0x3333333333333333) + ) + ); + + r_.n = + _mm512_and_si512( + _mm512_add_epi64( + r_.n, + _mm512_srli_epi64(r_.n, 4) + ), + _mm512_set1_epi64(0x0f0f0f0f0f0f0f0f) + ); + + r_.n = + _mm512_srli_epi64( + _mm512_mullo_epi64( + r_.n, + _mm512_set1_epi64(0x0101010101010101) + ), + (sizeof(uint64_t) - 1) * CHAR_BIT + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + a_.u64 -= ((a_.u64 >> 1) & UINT64_C(0x5555555555555555)); + a_.u64 = ((a_.u64 & UINT64_C(0x3333333333333333)) + ((a_.u64 >> 2) & UINT64_C(0x3333333333333333))); + a_.u64 = (a_.u64 + (a_.u64 >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64 = (a_.u64 * UINT64_C(0x0101010101010101)) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + uint64_t v = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i]); + v -= ((v >> 1) & UINT64_C(0x5555555555555555)); + v = ((v & UINT64_C(0x3333333333333333)) + ((v >> 2) & UINT64_C(0x3333333333333333))); + v = (v + (v >> 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, (v * UINT64_C(0x0101010101010101))) >> ((sizeof(uint64_t) - 1) * CHAR_BIT); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_popcnt_epi64 + #define _mm512_popcnt_epi64(a) simde_mm512_popcnt_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_popcnt_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_mask_popcnt_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_popcnt_epi64 + #define _mm512_mask_popcnt_epi64(src, k, a) simde_mm512_mask_popcnt_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_popcnt_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + return _mm512_maskz_popcnt_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_popcnt_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_popcnt_epi64 + #define _mm512_maskz_popcnt_epi64(k, a) simde_mm512_maskz_popcnt_epi64(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_POPCNT_H) */ +/* :: End simde/x86/avx512/popcnt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/range.h :: */ +#if !defined(SIMDE_X86_AVX512_RANGE_H) +#define SIMDE_X86_AVX512_RANGE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_range_ps (simde__m128 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128 r; + + switch (imm8 & 3) { + case 0: + r = simde_mm_min_ps(a, b); + break; + case 1: + r = simde_mm_max_ps(a, b); + break; + case 2: + r = simde_x_mm_select_ps(b, a, simde_mm_cmple_ps(simde_x_mm_abs_ps(a), simde_x_mm_abs_ps(b))); + break; + case 3: + r = simde_x_mm_select_ps(b, a, simde_mm_cmpge_ps(simde_x_mm_abs_ps(a), simde_x_mm_abs_ps(b))); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm_copysign_ps(r, a); + break; + case 8: + r = simde_mm_andnot_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + case 12: + r = simde_mm_or_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_range_ps(a, b, imm8) _mm_range_ps((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_range_ps + #define _mm_range_ps(a, b, imm8) simde_mm_range_ps(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_range_ps(src, k, a, b, imm8) _mm_mask_range_ps(src, k, a, b, imm8) +#else + #define simde_mm_mask_range_ps(src, k, a, b, imm8) simde_mm_mask_mov_ps(src, k, simde_mm_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_ps + #define _mm_mask_range_ps(src, k, a, b, imm8) simde_mm_mask_range_ps(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_range_ps(k, a, b, imm8) _mm_maskz_range_ps(k, a, b, imm8) +#else + #define simde_mm_maskz_range_ps(k, a, b, imm8) simde_mm_maskz_mov_ps(k, simde_mm_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_ps + #define _mm_maskz_range_ps(k, a, b, imm8) simde_mm_maskz_range_ps(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_range_ps (simde__m256 a, simde__m256 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256 r; + + switch (imm8 & 3) { + case 0: + r = simde_mm256_min_ps(a, b); + break; + case 1: + r = simde_mm256_max_ps(a, b); + break; + case 2: + r = simde_x_mm256_select_ps(b, a, simde_mm256_cmp_ps(simde_x_mm256_abs_ps(a), simde_x_mm256_abs_ps(b), SIMDE_CMP_LE_OQ)); + break; + case 3: + r = simde_x_mm256_select_ps(b, a, simde_mm256_cmp_ps(simde_x_mm256_abs_ps(a), simde_x_mm256_abs_ps(b), SIMDE_CMP_GE_OQ)); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm256_copysign_ps(r, a); + break; + case 8: + r = simde_mm256_andnot_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + case 12: + r = simde_mm256_or_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_range_ps(a, b, imm8) _mm256_range_ps((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_range_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_range_ps_a_ = simde__m256_to_private(a), \ + simde_mm256_range_ps_b_ = simde__m256_to_private(b); \ + \ + for (size_t simde_mm256_range_ps_i = 0 ; simde_mm256_range_ps_i < (sizeof(simde_mm256_range_ps_r_.m128) / sizeof(simde_mm256_range_ps_r_.m128[0])) ; simde_mm256_range_ps_i++) { \ + simde_mm256_range_ps_r_.m128[simde_mm256_range_ps_i] = simde_mm_range_ps(simde_mm256_range_ps_a_.m128[simde_mm256_range_ps_i], simde_mm256_range_ps_b_.m128[simde_mm256_range_ps_i], imm8); \ + } \ + \ + simde__m256_from_private(simde_mm256_range_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_range_ps + #define _mm256_range_ps(a, b, imm8) simde_mm256_range_ps(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_range_ps(src, k, a, b, imm8) _mm256_mask_range_ps(src, k, a, b, imm8) +#else + #define simde_mm256_mask_range_ps(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_range_ps + #define _mm256_mask_range_ps(src, k, a, b, imm8) simde_mm256_mask_range_ps(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_range_ps(k, a, b, imm8) _mm256_maskz_range_ps(k, a, b, imm8) +#else + #define simde_mm256_maskz_range_ps(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_range_ps + #define _mm256_maskz_range_ps(k, a, b, imm8) simde_mm256_maskz_range_ps(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_range_ps (simde__m512 a, simde__m512 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m512 r; + + switch (imm8 & 3) { + case 0: + r = simde_mm512_min_ps(a, b); + break; + case 1: + r = simde_mm512_max_ps(a, b); + break; + case 2: + r = simde_mm512_mask_mov_ps(b, simde_mm512_cmp_ps_mask(simde_mm512_abs_ps(a), simde_mm512_abs_ps(b), SIMDE_CMP_LE_OS), a); + break; + case 3: + r = simde_mm512_mask_mov_ps(a, simde_mm512_cmp_ps_mask(simde_mm512_abs_ps(b), simde_mm512_abs_ps(a), SIMDE_CMP_GE_OS), b); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm512_copysign_ps(r, a); + break; + case 8: + r = simde_mm512_andnot_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + case 12: + r = simde_mm512_or_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm512_range_ps(a, b, imm8) _mm512_range_ps((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_range_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_range_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_range_ps_b_ = simde__m512_to_private(b); \ + \ + for (size_t simde_mm512_range_ps_i = 0 ; simde_mm512_range_ps_i < (sizeof(simde_mm512_range_ps_r_.m128) / sizeof(simde_mm512_range_ps_r_.m128[0])) ; simde_mm512_range_ps_i++) { \ + simde_mm512_range_ps_r_.m128[simde_mm512_range_ps_i] = simde_mm_range_ps(simde_mm512_range_ps_a_.m128[simde_mm512_range_ps_i], simde_mm512_range_ps_b_.m128[simde_mm512_range_ps_i], imm8); \ + } \ + \ + simde__m512_from_private(simde_mm512_range_ps_r_); \ + })) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_range_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_range_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_range_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_range_ps_b_ = simde__m512_to_private(b); \ + \ + for (size_t simde_mm512_range_ps_i = 0 ; simde_mm512_range_ps_i < (sizeof(simde_mm512_range_ps_r_.m256) / sizeof(simde_mm512_range_ps_r_.m256[0])) ; simde_mm512_range_ps_i++) { \ + simde_mm512_range_ps_r_.m256[simde_mm512_range_ps_i] = simde_mm256_range_ps(simde_mm512_range_ps_a_.m256[simde_mm512_range_ps_i], simde_mm512_range_ps_b_.m256[simde_mm512_range_ps_i], imm8); \ + } \ + \ + simde__m512_from_private(simde_mm512_range_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_range_ps + #define _mm512_range_ps(a, b, imm8) simde_mm512_range_ps(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_range_ps(src, k, a, b, imm8) _mm512_mask_range_ps(src, k, a, b, imm8) +#else + #define simde_mm512_mask_range_ps(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_range_ps + #define _mm512_mask_range_ps(src, k, a, b, imm8) simde_mm512_mask_range_ps(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_range_ps(k, a, b, imm8) _mm512_maskz_range_ps(k, a, b, imm8) +#else + #define simde_mm512_maskz_range_ps(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_range_ps(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_range_ps + #define _mm512_maskz_range_ps(k, a, b, imm8) simde_mm512_maskz_range_ps(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_range_pd (simde__m128d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128d r; + + switch (imm8 & 3) { + case 0: + r = simde_mm_min_pd(a, b); + break; + case 1: + r = simde_mm_max_pd(a, b); + break; + case 2: + r = simde_x_mm_select_pd(b, a, simde_mm_cmple_pd(simde_x_mm_abs_pd(a), simde_x_mm_abs_pd(b))); + break; + case 3: + r = simde_x_mm_select_pd(b, a, simde_mm_cmpge_pd(simde_x_mm_abs_pd(a), simde_x_mm_abs_pd(b))); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm_copysign_pd(r, a); + break; + case 8: + r = simde_mm_andnot_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + case 12: + r = simde_mm_or_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_range_pd(a, b, imm8) _mm_range_pd((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_range_pd + #define _mm_range_pd(a, b, imm8) simde_mm_range_pd(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_range_pd(src, k, a, b, imm8) _mm_mask_range_pd(src, k, a, b, imm8) +#else + #define simde_mm_mask_range_pd(src, k, a, b, imm8) simde_mm_mask_mov_pd(src, k, simde_mm_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_pd + #define _mm_mask_range_pd(src, k, a, b, imm8) simde_mm_mask_range_pd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_range_pd(k, a, b, imm8) _mm_maskz_range_pd(k, a, b, imm8) +#else + #define simde_mm_maskz_range_pd(k, a, b, imm8) simde_mm_maskz_mov_pd(k, simde_mm_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_pd + #define _mm_maskz_range_pd(k, a, b, imm8) simde_mm_maskz_range_pd(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_range_pd (simde__m256d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d r; + + switch (imm8 & 3) { + case 0: + r = simde_mm256_min_pd(a, b); + break; + case 1: + r = simde_mm256_max_pd(a, b); + break; + case 2: + r = simde_x_mm256_select_pd(b, a, simde_mm256_cmp_pd(simde_x_mm256_abs_pd(a), simde_x_mm256_abs_pd(b), SIMDE_CMP_LE_OQ)); + break; + case 3: + r = simde_x_mm256_select_pd(b, a, simde_mm256_cmp_pd(simde_x_mm256_abs_pd(a), simde_x_mm256_abs_pd(b), SIMDE_CMP_GE_OQ)); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm256_copysign_pd(r, a); + break; + case 8: + r = simde_mm256_andnot_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + case 12: + r = simde_mm256_or_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_range_pd(a, b, imm8) _mm256_range_pd((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_range_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_range_pd_a_ = simde__m256d_to_private(a), \ + simde_mm256_range_pd_b_ = simde__m256d_to_private(b); \ + \ + for (size_t simde_mm256_range_pd_i = 0 ; simde_mm256_range_pd_i < (sizeof(simde_mm256_range_pd_r_.m128d) / sizeof(simde_mm256_range_pd_r_.m128d[0])) ; simde_mm256_range_pd_i++) { \ + simde_mm256_range_pd_r_.m128d[simde_mm256_range_pd_i] = simde_mm_range_pd(simde_mm256_range_pd_a_.m128d[simde_mm256_range_pd_i], simde_mm256_range_pd_b_.m128d[simde_mm256_range_pd_i], imm8); \ + } \ + \ + simde__m256d_from_private(simde_mm256_range_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_range_pd + #define _mm256_range_pd(a, b, imm8) simde_mm256_range_pd(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_range_pd(src, k, a, b, imm8) _mm256_mask_range_pd(src, k, a, b, imm8) +#else + #define simde_mm256_mask_range_pd(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_range_pd + #define _mm256_mask_range_pd(src, k, a, b, imm8) simde_mm256_mask_range_pd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_range_pd(k, a, b, imm8) _mm256_maskz_range_pd(k, a, b, imm8) +#else + #define simde_mm256_maskz_range_pd(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_range_pd + #define _mm256_maskz_range_pd(k, a, b, imm8) simde_mm256_maskz_range_pd(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_range_pd (simde__m512d a, simde__m512d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m512d r; + + switch (imm8 & 3) { + case 0: + r = simde_mm512_min_pd(a, b); + break; + case 1: + r = simde_mm512_max_pd(a, b); + break; + case 2: + r = simde_mm512_mask_mov_pd(b, simde_mm512_cmp_pd_mask(simde_mm512_abs_pd(a), simde_mm512_abs_pd(b), SIMDE_CMP_LE_OS), a); + break; + case 3: + r = simde_mm512_mask_mov_pd(a, simde_mm512_cmp_pd_mask(simde_mm512_abs_pd(b), simde_mm512_abs_pd(a), SIMDE_CMP_GE_OS), b); + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r = simde_x_mm512_copysign_pd(r, a); + break; + case 8: + r = simde_mm512_andnot_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + case 12: + r = simde_mm512_or_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), r); + break; + default: + break; + } + + return r; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm512_range_pd(a, b, imm8) _mm512_range_pd((a), (b), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_range_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_range_pd_a_ = simde__m512d_to_private(a), \ + simde_mm512_range_pd_b_ = simde__m512d_to_private(b); \ + \ + for (size_t simde_mm512_range_pd_i = 0 ; simde_mm512_range_pd_i < (sizeof(simde_mm512_range_pd_r_.m128d) / sizeof(simde_mm512_range_pd_r_.m128d[0])) ; simde_mm512_range_pd_i++) { \ + simde_mm512_range_pd_r_.m128d[simde_mm512_range_pd_i] = simde_mm_range_pd(simde_mm512_range_pd_a_.m128d[simde_mm512_range_pd_i], simde_mm512_range_pd_b_.m128d[simde_mm512_range_pd_i], imm8); \ + } \ + \ + simde__m512d_from_private(simde_mm512_range_pd_r_); \ + })) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_range_pd(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_range_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_range_pd_a_ = simde__m512d_to_private(a), \ + simde_mm512_range_pd_b_ = simde__m512d_to_private(b); \ + \ + for (size_t simde_mm512_range_pd_i = 0 ; simde_mm512_range_pd_i < (sizeof(simde_mm512_range_pd_r_.m256d) / sizeof(simde_mm512_range_pd_r_.m256d[0])) ; simde_mm512_range_pd_i++) { \ + simde_mm512_range_pd_r_.m256d[simde_mm512_range_pd_i] = simde_mm256_range_pd(simde_mm512_range_pd_a_.m256d[simde_mm512_range_pd_i], simde_mm512_range_pd_b_.m256d[simde_mm512_range_pd_i], imm8); \ + } \ + \ + simde__m512d_from_private(simde_mm512_range_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_range_pd + #define _mm512_range_pd(a, b, imm8) simde_mm512_range_pd(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_range_pd(src, k, a, b, imm8) _mm512_mask_range_pd(src, k, a, b, imm8) +#else + #define simde_mm512_mask_range_pd(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_range_pd + #define _mm512_mask_range_pd(src, k, a, b, imm8) simde_mm512_mask_range_pd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_range_pd(k, a, b, imm8) _mm512_maskz_range_pd(k, a, b, imm8) +#else + #define simde_mm512_maskz_range_pd(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_range_pd(a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_range_pd + #define _mm512_maskz_range_pd(k, a, b, imm8) simde_mm512_maskz_range_pd(k, a, b, imm8) +#endif + +#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_x_mm_range_ss(a, b, imm8) simde_mm_move_ss(a, simde_mm_range_ps(a, b, imm8)) +#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + #define simde_x_mm_range_ss(a, b, imm8) simde_mm_move_ss(a, simde_mm_range_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b), imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_x_mm_range_ss (simde__m128 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + simde_float32 abs_a = simde_uint32_as_float32(a_.u32[0] & UINT32_C(2147483647)); + simde_float32 abs_b = simde_uint32_as_float32(b_.u32[0] & UINT32_C(2147483647)); + + switch (imm8 & 3) { + case 0: + r_ = simde__m128_to_private(simde_mm_min_ss(a, b)); + break; + case 1: + r_ = simde__m128_to_private(simde_mm_max_ss(a, b)); + break; + case 2: + r_.f32[0] = abs_a <= abs_b ? a_.f32[0] : b_.f32[0]; + break; + case 3: + r_.f32[0] = abs_b >= abs_a ? b_.f32[0] : a_.f32[0]; + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r_.f32[0] = simde_uint32_as_float32((a_.u32[0] & UINT32_C(2147483648)) ^ (r_.u32[0] & UINT32_C(2147483647))); + break; + case 8: + r_.f32[0] = simde_uint32_as_float32(r_.u32[0] & UINT32_C(2147483647)); + break; + case 12: + r_.f32[0] = simde_uint32_as_float32(r_.u32[0] | UINT32_C(2147483648)); + break; + default: + break; + } + + return simde__m128_from_private(r_); + } +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_mask_range_ss(src, k, a, b, imm8) _mm_mask_range_ss(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_mask_range_ss(src, k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128_private \ + simde_mm_mask_range_ss_r_ = simde__m128_to_private(a), \ + simde_mm_mask_range_ss_src_ = simde__m128_to_private(src); \ + \ + if (k & 1) \ + simde_mm_mask_range_ss_r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); \ + else \ + simde_mm_mask_range_ss_r_.f32[0] = simde_mm_mask_range_ss_src_.f32[0]; \ + \ + simde__m128_from_private(simde_mm_mask_range_ss_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_mask_range_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + src_ = simde__m128_to_private(src); + + if (k & 1) + r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); + else + r_.f32[0] = src_.f32[0]; + + return simde__m128_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_ss + #define _mm_mask_range_ss(src, k, a, b, imm8) simde_mm_mask_range_ss(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_maskz_range_ss(k, a, b, imm8) _mm_maskz_range_ss(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_maskz_range_ss(k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128_private simde_mm_maskz_range_ss_r_ = simde__m128_to_private(a); \ + \ + if (k & 1) \ + simde_mm_maskz_range_ss_r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); \ + else \ + simde_mm_maskz_range_ss_r_.f32[0] = SIMDE_FLOAT32_C(0.0); \ + \ + simde__m128_from_private(simde_mm_maskz_range_ss_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_maskz_range_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private r_ = simde__m128_to_private(a); + + if (k & 1) + r_ = simde__m128_to_private(simde_x_mm_range_ss(a, b, imm8)); + else + r_.f32[0] = SIMDE_FLOAT32_C(0.0); + + return simde__m128_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_ss + #define _mm_maskz_range_ss(k, a, b, imm8) simde_mm_mask_range_ss(k, a, b, imm8) +#endif + +#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_x_mm_range_sd(a, b, imm8) simde_mm_move_sd(a, simde_mm_range_pd(a, b, imm8)) +#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + #define simde_x_mm_range_sd(a, b, imm8) simde_mm_move_sd(a, simde_mm_range_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b), imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_x_mm_range_sd (simde__m128d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + simde_float64 abs_a = simde_uint64_as_float64(a_.u64[0] & UINT64_C(9223372036854775807)); + simde_float64 abs_b = simde_uint64_as_float64(b_.u64[0] & UINT64_C(9223372036854775807)); + + switch (imm8 & 3) { + case 0: + r_ = simde__m128d_to_private(simde_mm_min_sd(a, b)); + break; + case 1: + r_ = simde__m128d_to_private(simde_mm_max_sd(a, b)); + break; + case 2: + r_.f64[0] = abs_a <= abs_b ? a_.f64[0] : b_.f64[0]; + break; + case 3: + r_.f64[0] = abs_b >= abs_a ? b_.f64[0] : a_.f64[0]; + break; + default: + break; + } + + switch (imm8 & 12) { + case 0: + r_.f64[0] = simde_uint64_as_float64((a_.u64[0] & UINT64_C(9223372036854775808)) ^ (r_.u64[0] & UINT64_C(9223372036854775807))); + break; + case 8: + r_.f64[0] = simde_uint64_as_float64(r_.u64[0] & UINT64_C(9223372036854775807)); + break; + case 12: + r_.f64[0] = simde_uint64_as_float64(r_.u64[0] | UINT64_C(9223372036854775808)); + break; + default: + break; + } + + return simde__m128d_from_private(r_); + } +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_mask_range_sd(src, k, a, b, imm8) _mm_mask_range_sd(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_mask_range_sd(src, k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d_private \ + simde_mm_mask_range_sd_r_ = simde__m128d_to_private(a), \ + simde_mm_mask_range_sd_src_ = simde__m128d_to_private(src); \ + \ + if (k & 1) \ + simde_mm_mask_range_sd_r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); \ + else \ + simde_mm_mask_range_sd_r_.f64[0] = simde_mm_mask_range_sd_src_.f64[0]; \ + \ + simde__m128d_from_private(simde_mm_mask_range_sd_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_mask_range_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + src_ = simde__m128d_to_private(src); + + if (k & 1) + r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); + else + r_.f64[0] = src_.f64[0]; + + return simde__m128d_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_sd + #define _mm_mask_range_sd(src, k, a, b, imm8) simde_mm_mask_range_sd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_maskz_range_sd(k, a, b, imm8) _mm_maskz_range_sd(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_maskz_range_sd(k, a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d_private simde_mm_maskz_range_sd_r_ = simde__m128d_to_private(a); \ + \ + if (k & 1) \ + simde_mm_maskz_range_sd_r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); \ + else \ + simde_mm_maskz_range_sd_r_.f64[0] = SIMDE_FLOAT64_C(0.0); \ + \ + simde__m128d_from_private(simde_mm_maskz_range_sd_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_maskz_range_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128d_private r_ = simde__m128d_to_private(a); + + if (k & 1) + r_ = simde__m128d_to_private(simde_x_mm_range_sd(a, b, imm8)); + else + r_.f64[0] = SIMDE_FLOAT64_C(0.0); + + return simde__m128d_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_sd + #define _mm_maskz_range_sd(k, a, b, imm8) simde_mm_mask_range_sd(k, a, b, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_RANGE_H) */ +/* :: End simde/x86/avx512/range.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/range_round.h :: */ +#if !defined(SIMDE_X86_AVX512_RANGE_ROUND_H) +#define SIMDE_X86_AVX512_RANGE_ROUND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_range_round_ps(a, b, imm8, sae) _mm512_range_round_ps(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_range_round_ps(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_range_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_range_round_ps_envp; \ + int simde_mm512_range_round_ps_x = feholdexcept(&simde_mm512_range_round_ps_envp); \ + simde_mm512_range_round_ps_r = simde_mm512_range_ps(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_range_round_ps_x == 0)) \ + fesetenv(&simde_mm512_range_round_ps_envp); \ + } \ + else { \ + simde_mm512_range_round_ps_r = simde_mm512_range_ps(a, b, imm8); \ + } \ + \ + simde_mm512_range_round_ps_r; \ + })) + #else + #define simde_mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_ps(a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_range_round_ps (simde__m512 a, simde__m512 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_range_ps(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_range_ps(a, b, imm8); + #endif + } + else { + r = simde_mm512_range_ps(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_range_round_ps + #define _mm512_range_round_ps(a, b, imm8, sae) simde_mm512_range_round_ps(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) _mm512_mask_range_round_ps(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_ps(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_mask_range_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_range_round_ps_envp; \ + int simde_mm512_mask_range_round_ps_x = feholdexcept(&simde_mm512_mask_range_round_ps_envp); \ + simde_mm512_mask_range_round_ps_r = simde_mm512_mask_range_ps(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_range_round_ps_x == 0)) \ + fesetenv(&simde_mm512_mask_range_round_ps_envp); \ + } \ + else { \ + simde_mm512_mask_range_round_ps_r = simde_mm512_mask_range_ps(src, k, a, b, imm8); \ + } \ + \ + simde_mm512_mask_range_round_ps_r; \ + })) + #else + #define simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_ps(src, k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_mask_range_round_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_range_ps(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_range_ps(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm512_mask_range_ps(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_range_round_ps + #define _mm512_mask_range_round_ps(src, k, a, b, imm8) simde_mm512_mask_range_round_ps(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) _mm512_maskz_range_round_ps(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_ps(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_maskz_range_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_range_round_ps_envp; \ + int simde_mm512_maskz_range_round_ps_x = feholdexcept(&simde_mm512_maskz_range_round_ps_envp); \ + simde_mm512_maskz_range_round_ps_r = simde_mm512_maskz_range_ps(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_range_round_ps_x == 0)) \ + fesetenv(&simde_mm512_maskz_range_round_ps_envp); \ + } \ + else { \ + simde_mm512_maskz_range_round_ps_r = simde_mm512_maskz_range_ps(k, a, b, imm8); \ + } \ + \ + simde_mm512_maskz_range_round_ps_r; \ + })) + #else + #define simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_ps(k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_maskz_range_round_ps (simde__mmask16 k, simde__m512 a, simde__m512 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_range_ps(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_range_ps(k, a, b, imm8); + #endif + } + else { + r = simde_mm512_maskz_range_ps(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_range_round_ps + #define _mm512_maskz_range_round_ps(k, a, b, imm8) simde_mm512_maskz_range_round_ps(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_range_round_pd(a, b, imm8, sae) _mm512_range_round_pd(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_pd(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_range_round_pd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_range_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_range_round_pd_envp; \ + int simde_mm512_range_round_pd_x = feholdexcept(&simde_mm512_range_round_pd_envp); \ + simde_mm512_range_round_pd_r = simde_mm512_range_pd(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_range_round_pd_x == 0)) \ + fesetenv(&simde_mm512_range_round_pd_envp); \ + } \ + else { \ + simde_mm512_range_round_pd_r = simde_mm512_range_pd(a, b, imm8); \ + } \ + \ + simde_mm512_range_round_pd_r; \ + })) + #else + #define simde_mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_pd(a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_range_round_pd (simde__m512d a, simde__m512d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_range_pd(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_range_pd(a, b, imm8); + #endif + } + else { + r = simde_mm512_range_pd(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_range_round_pd + #define _mm512_range_round_pd(a, b, imm8, sae) simde_mm512_range_round_pd(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) _mm512_mask_range_round_pd(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_pd(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_mask_range_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_range_round_pd_envp; \ + int simde_mm512_mask_range_round_pd_x = feholdexcept(&simde_mm512_mask_range_round_pd_envp); \ + simde_mm512_mask_range_round_pd_r = simde_mm512_mask_range_pd(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_range_round_pd_x == 0)) \ + fesetenv(&simde_mm512_mask_range_round_pd_envp); \ + } \ + else { \ + simde_mm512_mask_range_round_pd_r = simde_mm512_mask_range_pd(src, k, a, b, imm8); \ + } \ + \ + simde_mm512_mask_range_round_pd_r; \ + })) + #else + #define simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_pd(src, k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_mask_range_round_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_range_pd(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_range_pd(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm512_mask_range_pd(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_range_round_pd + #define _mm512_mask_range_round_pd(src, k, a, b, imm8) simde_mm512_mask_range_round_pd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) _mm512_maskz_range_round_pd(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_pd(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_maskz_range_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_range_round_pd_envp; \ + int simde_mm512_maskz_range_round_pd_x = feholdexcept(&simde_mm512_maskz_range_round_pd_envp); \ + simde_mm512_maskz_range_round_pd_r = simde_mm512_maskz_range_pd(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_range_round_pd_x == 0)) \ + fesetenv(&simde_mm512_maskz_range_round_pd_envp); \ + } \ + else { \ + simde_mm512_maskz_range_round_pd_r = simde_mm512_maskz_range_pd(k, a, b, imm8); \ + } \ + \ + simde_mm512_maskz_range_round_pd_r; \ + })) + #else + #define simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_pd(k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_maskz_range_round_pd (simde__mmask8 k, simde__m512d a, simde__m512d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_range_pd(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_range_pd(k, a, b, imm8); + #endif + } + else { + r = simde_mm512_maskz_range_pd(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_range_round_pd + #define _mm512_maskz_range_round_pd(k, a, b, imm8) simde_mm512_maskz_range_round_pd(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_range_round_ss(a, b, imm8, sae) _mm_range_round_ss(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_range_round_ss(a, b, imm8, sae) simde_x_mm_range_ss(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_range_round_ss(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_range_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_range_round_ss_envp; \ + int simde_mm_range_round_ss_x = feholdexcept(&simde_mm_range_round_ss_envp); \ + simde_mm_range_round_ss_r = simde_x_mm_range_ss(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_range_round_ss_x == 0)) \ + fesetenv(&simde_mm_range_round_ss_envp); \ + } \ + else { \ + simde_mm_range_round_ss_r = simde_x_mm_range_ss(a, b, imm8); \ + } \ + \ + simde_mm_range_round_ss_r; \ + })) + #else + #define simde_mm_range_round_ss(a, b, imm8, sae) simde_x_mm_range_ss(a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_range_round_ss (simde__m128 a, simde__m128 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_x_mm_range_ss(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_x_mm_range_ss(a, b, imm8); + #endif + } + else { + r = simde_x_mm_range_ss(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_range_round_ss + #define _mm_range_round_ss(a, b, imm8, sae) simde_mm_range_round_ss(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) _mm_mask_range_round_ss(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_ss(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_mask_range_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_range_round_ss_envp; \ + int simde_mm_mask_range_round_ss_x = feholdexcept(&simde_mm_mask_range_round_ss_envp); \ + simde_mm_mask_range_round_ss_r = simde_mm_mask_range_ss(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_range_round_ss_x == 0)) \ + fesetenv(&simde_mm_mask_range_round_ss_envp); \ + } \ + else { \ + simde_mm_mask_range_round_ss_r = simde_mm_mask_range_ss(src, k, a, b, imm8); \ + } \ + \ + simde_mm_mask_range_round_ss_r; \ + })) + #else + #define simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_ss(src, k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_mask_range_round_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_range_ss(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_range_ss(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm_mask_range_ss(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_round_ss + #define _mm_mask_range_round_ss(src, k, a, b, imm8) simde_mm_mask_range_round_ss(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) _mm_maskz_range_round_ss(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_ss(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_maskz_range_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_range_round_ss_envp; \ + int simde_mm_maskz_range_round_ss_x = feholdexcept(&simde_mm_maskz_range_round_ss_envp); \ + simde_mm_maskz_range_round_ss_r = simde_mm_maskz_range_ss(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_range_round_ss_x == 0)) \ + fesetenv(&simde_mm_maskz_range_round_ss_envp); \ + } \ + else { \ + simde_mm_maskz_range_round_ss_r = simde_mm_maskz_range_ss(k, a, b, imm8); \ + } \ + \ + simde_mm_maskz_range_round_ss_r; \ + })) + #else + #define simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_ss(k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_maskz_range_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_range_ss(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_range_ss(k, a, b, imm8); + #endif + } + else { + r = simde_mm_maskz_range_ss(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_round_ss + #define _mm_maskz_range_round_ss(k, a, b, imm8) simde_mm_maskz_range_round_ss(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_range_round_sd(a, b, imm8, sae) _mm_range_round_sd(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_range_round_sd(a, b, imm8, sae) simde_x_mm_range_sd(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_range_round_sd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_range_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_range_round_sd_envp; \ + int simde_mm_range_round_sd_x = feholdexcept(&simde_mm_range_round_sd_envp); \ + simde_mm_range_round_sd_r = simde_x_mm_range_sd(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_range_round_sd_x == 0)) \ + fesetenv(&simde_mm_range_round_sd_envp); \ + } \ + else { \ + simde_mm_range_round_sd_r = simde_x_mm_range_sd(a, b, imm8); \ + } \ + \ + simde_mm_range_round_sd_r; \ + })) + #else + #define simde_mm_range_round_sd(a, b, imm8, sae) simde_x_mm_range_sd(a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_range_round_sd (simde__m128d a, simde__m128d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_x_mm_range_sd(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_x_mm_range_sd(a, b, imm8); + #endif + } + else { + r = simde_x_mm_range_sd(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_range_round_sd + #define _mm_range_round_sd(a, b, imm8, sae) simde_mm_range_round_sd(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) _mm_mask_range_round_sd(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_sd(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_mask_range_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_range_round_sd_envp; \ + int simde_mm_mask_range_round_sd_x = feholdexcept(&simde_mm_mask_range_round_sd_envp); \ + simde_mm_mask_range_round_sd_r = simde_mm_mask_range_sd(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_range_round_sd_x == 0)) \ + fesetenv(&simde_mm_mask_range_round_sd_envp); \ + } \ + else { \ + simde_mm_mask_range_round_sd_r = simde_mm_mask_range_sd(src, k, a, b, imm8); \ + } \ + \ + simde_mm_mask_range_round_sd_r; \ + })) + #else + #define simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_sd(src, k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_mask_range_round_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_range_sd(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_range_sd(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm_mask_range_sd(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_range_round_sd + #define _mm_mask_range_round_sd(src, k, a, b, imm8) simde_mm_mask_range_round_sd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) _mm_maskz_range_round_sd(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_sd(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_maskz_range_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_range_round_sd_envp; \ + int simde_mm_maskz_range_round_sd_x = feholdexcept(&simde_mm_maskz_range_round_sd_envp); \ + simde_mm_maskz_range_round_sd_r = simde_mm_maskz_range_sd(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_range_round_sd_x == 0)) \ + fesetenv(&simde_mm_maskz_range_round_sd_envp); \ + } \ + else { \ + simde_mm_maskz_range_round_sd_r = simde_mm_maskz_range_sd(k, a, b, imm8); \ + } \ + \ + simde_mm_maskz_range_round_sd_r; \ + })) + #else + #define simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_sd(k, a, b, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_maskz_range_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, int imm8, int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_range_sd(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_range_sd(k, a, b, imm8); + #endif + } + else { + r = simde_mm_maskz_range_sd(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_range_round_sd + #define _mm_maskz_range_round_sd(k, a, b, imm8) simde_mm_maskz_range_round_sd(k, a, b, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_RANGE_ROUND_H) */ +/* :: End simde/x86/avx512/range_round.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/rol.h :: */ +#if !defined(SIMDE_X86_AVX512_ROL_H) +#define SIMDE_X86_AVX512_ROL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_rol_epi32(a, imm8) _mm_rol_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_rol_epi32 (simde__m128i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m128i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rol_epi32 + #define _mm_rol_epi32(a, imm8) simde_mm_rol_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_rol_epi32(src, k, a, imm8) _mm_mask_rol_epi32(src, k, a, imm8) +#else + #define simde_mm_mask_rol_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rol_epi32 + #define _mm_mask_rol_epi32(src, k, a, imm8) simde_mm_mask_rol_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_rol_epi32(k, a, imm8) _mm_maskz_rol_epi32(k, a, imm8) +#else + #define simde_mm_maskz_rol_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rol_epi32 + #define _mm_maskz_rol_epi32(src, k, a, imm8) simde_mm_maskz_rol_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_rol_epi32(a, imm8) _mm256_rol_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_rol_epi32 (simde__m256i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m256i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rol_epi32 + #define _mm256_rol_epi32(a, imm8) simde_mm256_rol_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_rol_epi32(src, k, a, imm8) _mm256_mask_rol_epi32(src, k, a, imm8) +#else + #define simde_mm256_mask_rol_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rol_epi32 + #define _mm256_mask_rol_epi32(src, k, a, imm8) simde_mm256_mask_rol_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_rol_epi32(k, a, imm8) _mm256_maskz_rol_epi32(k, a, imm8) +#else + #define simde_mm256_maskz_rol_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rol_epi32 + #define _mm256_maskz_rol_epi32(k, a, imm8) simde_mm256_maskz_rol_epi32(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_rol_epi32(a, imm8) _mm512_rol_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_rol_epi32 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 << (imm8 & 31)) | (a_.u32 >> (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] << (imm8 & 31)) | (a_.u32[i] >> (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m512i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rol_epi32 + #define _mm512_rol_epi32(a, imm8) simde_mm512_rol_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_rol_epi32(src, k, a, imm8) _mm512_mask_rol_epi32(src, k, a, imm8) +#else + #define simde_mm512_mask_rol_epi32(src, k, a, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rol_epi32 + #define _mm512_mask_rol_epi32(src, k, a, imm8) simde_mm512_mask_rol_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_rol_epi32(k, a, imm8) _mm512_maskz_rol_epi32(k, a, imm8) +#else + #define simde_mm512_maskz_rol_epi32(k, a, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_rol_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rol_epi32 + #define _mm512_maskz_rol_epi32(k, a, imm8) simde_mm512_maskz_rol_epi32(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_rol_epi64(a, imm8) _mm_rol_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_rol_epi64 (simde__m128i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m128i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rol_epi64 + #define _mm_rol_epi64(a, imm8) simde_mm_rol_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_rol_epi64(src, k, a, imm8) _mm_mask_rol_epi64(src, k, a, imm8) +#else + #define simde_mm_mask_rol_epi64(src, k, a, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rol_epi64 + #define _mm_mask_rol_epi64(src, k, a, imm8) simde_mm_mask_rol_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_rol_epi64(k, a, imm8) _mm_maskz_rol_epi64(k, a, imm8) +#else + #define simde_mm_maskz_rol_epi64(k, a, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rol_epi64 + #define _mm_maskz_rol_epi64(k, a, imm8) simde_mm_maskz_rol_epi64(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_rol_epi64(a, imm8) _mm256_rol_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_rol_epi64 (simde__m256i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m256i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rol_epi64 + #define _mm256_rol_epi64(a, imm8) simde_mm256_rol_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_rol_epi64(src, k, a, imm8) _mm256_mask_rol_epi64(src, k, a, imm8) +#else + #define simde_mm256_mask_rol_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rol_epi64 + #define _mm256_mask_rol_epi64(src, k, a, imm8) simde_mm256_mask_rol_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_rol_epi64(k, a, imm8) _mm256_maskz_rol_epi64(k, a, imm8) +#else + #define simde_mm256_maskz_rol_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rol_epi64 + #define _mm256_maskz_rol_epi64(k, a, imm8) simde_mm256_maskz_rol_epi64(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_rol_epi64(a, imm8) _mm512_rol_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_rol_epi64 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 << (imm8 & 63)) | (a_.u64 >> (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] << (imm8 & 63)) | (a_.u64[i] >> (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m512i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rol_epi64 + #define _mm512_rol_epi64(a, imm8) simde_mm512_rol_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_rol_epi64(src, k, a, imm8) _mm512_mask_rol_epi64(src, k, a, imm8) +#else + #define simde_mm512_mask_rol_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rol_epi64 + #define _mm512_mask_rol_epi64(src, k, a, imm8) simde_mm512_mask_rol_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_rol_epi64(k, a, imm8) _mm512_maskz_rol_epi64(k, a, imm8) +#else + #define simde_mm512_maskz_rol_epi64(k, a, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_rol_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rol_epi64 + #define _mm512_maskz_rol_epi64(k, a, imm8) simde_mm512_maskz_rol_epi64(k, a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROL_H) */ +/* :: End simde/x86/avx512/rol.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/rolv.h :: */ +#if !defined(SIMDE_X86_AVX512_ROLV_H) +#define SIMDE_X86_AVX512_ROLV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srlv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRLV_H) +#define SIMDE_X86_AVX512_SRLV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_srlv_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi16 + #define _mm_srlv_epi16(a, b) simde_mm_srlv_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_srlv_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_srlv_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_srlv_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_srlv_epi16 + #define _mm_mask_srlv_epi16(src, k, a, b) simde_mm_mask_srlv_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_srlv_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_srlv_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_srlv_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_srlv_epi16 + #define _mm_maskz_srlv_epi16(k, a, b) simde_mm_maskz_srlv_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_srlv_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_srlv_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_srlv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_srlv_epi32 + #define _mm_mask_srlv_epi32(src, k, a, b) simde_mm_mask_srlv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_srlv_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_srlv_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_srlv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_srlv_epi32 + #define _mm_maskz_srlv_epi32(k, a, b) simde_mm_maskz_srlv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_srlv_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_srlv_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_srlv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_srlv_epi64 + #define _mm_mask_srlv_epi64(src, k, a, b) simde_mm_mask_srlv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_srlv_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_srlv_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_srlv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_srlv_epi64 + #define _mm_maskz_srlv_epi64(k, a, b) simde_mm_maskz_srlv_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_srlv_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_srlv_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi16 + #define _mm256_srlv_epi16(a, b) simde_mm256_srlv_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srlv_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_srlv_epi16(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srlv_epi16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 >> b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (b_.u16[i] < 16) ? (a_.u16[i] >> b_.u16[i]) : 0; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srlv_epi16 + #define _mm512_srlv_epi16(a, b) simde_mm512_srlv_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srlv_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_srlv_epi32(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srlv_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srlv_epi32 + #define _mm512_srlv_epi32(a, b) simde_mm512_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srlv_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_srlv_epi64(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srlv_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srlv_epi64 + #define _mm512_srlv_epi64(a, b) simde_mm512_srlv_epi64(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRLV_H) */ +/* :: End simde/x86/avx512/srlv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sllv.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SLLV_H) +#define SIMDE_X86_AVX512_SLLV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sllv_epi16 (simde__m512i a, simde__m512i b) { + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (b_.u16 < 16)) & (a_.u16 << b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (b_.u16[i] < 16) ? HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << b_.u16[i])) : 0; + } + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_sllv_epi16(a, b) _mm512_sllv_epi16(a, b) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sllv_epi16 + #define _mm512_sllv_epi16(a, b) simde_mm512_sllv_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sllv_epi32 (simde__m512i a, simde__m512i b) { + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << b_.u32[i])) : 0; + } + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_sllv_epi32(a, b) _mm512_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sllv_epi32 + #define _mm512_sllv_epi32(a, b) simde_mm512_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sllv_epi64 (simde__m512i a, simde__m512i b) { + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? HEDLEY_STATIC_CAST(uint64_t, (a_.u64[i] << b_.u64[i])) : 0; + } + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_sllv_epi64(a, b) _mm512_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sllv_epi64 + #define _mm512_sllv_epi64(a, b) simde_mm512_sllv_epi64(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SLLV_H) */ +/* :: End simde/x86/avx512/sllv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SUB_H) +#define SIMDE_X86_AVX512_SUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sub_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi8 + #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_sub_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi8 + #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_sub_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi8 + #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sub_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi16 + #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi32 + #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi32 + #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi32 + #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi64 + #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi64 + #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi64 + #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_ps + #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_ps + #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_ps + #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_pd + #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_pd + #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_pd + #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SUB_H) */ +/* :: End simde/x86/avx512/sub.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rolv_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_rolv_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32 = vec_rl(a_.altivec_u32, b_.altivec_u32); + + return simde__m128i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m128i + count1 = simde_mm_and_si128(b, simde_mm_set1_epi32(31)), + count2 = simde_mm_sub_epi32(simde_mm_set1_epi32(32), count1); + + return simde_mm_or_si128(simde_mm_sllv_epi32(a, count1), simde_mm_srlv_epi32(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rolv_epi32 + #define _mm_rolv_epi32(a, b) simde_mm_rolv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_rolv_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_rolv_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rolv_epi32 + #define _mm_mask_rolv_epi32(src, k, a, b) simde_mm_mask_rolv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_rolv_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_rolv_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rolv_epi32 + #define _mm_maskz_rolv_epi32(k, a, b) simde_mm_maskz_rolv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rolv_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_rolv_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u32 = vec_rl(a_.m128i_private[i].altivec_u32, b_.m128i_private[i].altivec_u32); + } + + return simde__m256i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_rolv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_rolv_epi32(a_.m128i[1], b_.m128i[1]); + + return simde__m256i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m256i + count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi32(31)), + count2 = simde_mm256_sub_epi32(simde_mm256_set1_epi32(32), count1); + + return simde_mm256_or_si256(simde_mm256_sllv_epi32(a, count1), simde_mm256_srlv_epi32(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rolv_epi32 + #define _mm256_rolv_epi32(a, b) simde_mm256_rolv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_rolv_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_rolv_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rolv_epi32 + #define _mm256_mask_rolv_epi32(src, k, a, b) simde_mm256_mask_rolv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_rolv_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_rolv_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rolv_epi32 + #define _mm256_maskz_rolv_epi32(k, a, b) simde_mm256_maskz_rolv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rolv_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rolv_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u32 = vec_rl(a_.m128i_private[i].altivec_u32, b_.m128i_private[i].altivec_u32); + } + + return simde__m512i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_rolv_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_rolv_epi32(a_.m256i[1], b_.m256i[1]); + + return simde__m512i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m512i + count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi32(31)), + count2 = simde_mm512_sub_epi32(simde_mm512_set1_epi32(32), count1); + + return simde_mm512_or_si512(simde_mm512_sllv_epi32(a, count1), simde_mm512_srlv_epi32(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rolv_epi32 + #define _mm512_rolv_epi32(a, b) simde_mm512_rolv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rolv_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rolv_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rolv_epi32 + #define _mm512_mask_rolv_epi32(src, k, a, b) simde_mm512_mask_rolv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_rolv_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_rolv_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_rolv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rolv_epi32 + #define _mm512_maskz_rolv_epi32(k, a, b) simde_mm512_maskz_rolv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rolv_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_rolv_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = vec_rl(a_.altivec_u64, b_.altivec_u64); + + return simde__m128i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m128i + count1 = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)), + count2 = simde_mm_sub_epi64(simde_mm_set1_epi64x(64), count1); + + return simde_mm_or_si128(simde_mm_sllv_epi64(a, count1), simde_mm_srlv_epi64(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rolv_epi64 + #define _mm_rolv_epi64(a, b) simde_mm_rolv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_rolv_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_rolv_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rolv_epi64 + #define _mm_mask_rolv_epi64(src, k, a, b) simde_mm_mask_rolv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_rolv_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_rolv_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rolv_epi64 + #define _mm_maskz_rolv_epi64(k, a, b) simde_mm_maskz_rolv_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rolv_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_rolv_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u64 = vec_rl(a_.m128i_private[i].altivec_u64, b_.m128i_private[i].altivec_u64); + } + + return simde__m256i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_rolv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_rolv_epi64(a_.m128i[1], b_.m128i[1]); + + return simde__m256i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m256i + count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi64x(63)), + count2 = simde_mm256_sub_epi64(simde_mm256_set1_epi64x(64), count1); + + return simde_mm256_or_si256(simde_mm256_sllv_epi64(a, count1), simde_mm256_srlv_epi64(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rolv_epi64 + #define _mm256_rolv_epi64(a, b) simde_mm256_rolv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_rolv_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_rolv_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rolv_epi64 + #define _mm256_mask_rolv_epi64(src, k, a, b) simde_mm256_mask_rolv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_rolv_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_rolv_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rolv_epi64 + #define _mm256_maskz_rolv_epi64(k, a, b) simde_mm256_maskz_rolv_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rolv_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rolv_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_u64 = vec_rl(a_.m128i_private[i].altivec_u64, b_.m128i_private[i].altivec_u64); + } + + return simde__m512i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_rolv_epi64(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_rolv_epi64(a_.m256i[1], b_.m256i[1]); + + return simde__m512i_from_private(r_); + #else + HEDLEY_STATIC_CAST(void, r_); + HEDLEY_STATIC_CAST(void, a_); + HEDLEY_STATIC_CAST(void, b_); + + simde__m512i + count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi64(63)), + count2 = simde_mm512_sub_epi64(simde_mm512_set1_epi64(64), count1); + + return simde_mm512_or_si512(simde_mm512_sllv_epi64(a, count1), simde_mm512_srlv_epi64(a, count2)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rolv_epi64 + #define _mm512_rolv_epi64(a, b) simde_mm512_rolv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rolv_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rolv_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rolv_epi64 + #define _mm512_mask_rolv_epi64(src, k, a, b) simde_mm512_mask_rolv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_rolv_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_rolv_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_rolv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rolv_epi64 + #define _mm512_maskz_rolv_epi64(k, a, b) simde_mm512_maskz_rolv_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROLV_H) */ +/* :: End simde/x86/avx512/rolv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/ror.h :: */ +#if !defined(SIMDE_X86_AVX512_ROR_H) +#define SIMDE_X86_AVX512_ROR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_ror_epi32(a, imm8) _mm_ror_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_ror_epi32 (simde__m128i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m128i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_ror_epi32 + #define _mm_ror_epi32(a, imm8) simde_mm_ror_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_ror_epi32(src, k, a, imm8) _mm_mask_ror_epi32(src, k, a, imm8) +#else + #define simde_mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_ror_epi32 + #define _mm_mask_ror_epi32(src, k, a, imm8) simde_mm_mask_ror_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_ror_epi32(k, a, imm8) _mm_maskz_ror_epi32(k, a, imm8) +#else + #define simde_mm_maskz_ror_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_ror_epi32 + #define _mm_maskz_ror_epi32(src, k, a, imm8) simde_mm_maskz_ror_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_ror_epi32(a, imm8) _mm256_ror_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_ror_epi32 (simde__m256i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m256i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_ror_epi32 + #define _mm256_ror_epi32(a, imm8) simde_mm256_ror_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_ror_epi32(src, k, a, imm8) _mm256_mask_ror_epi32(src, k, a, imm8) +#else + #define simde_mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_ror_epi32 + #define _mm256_mask_ror_epi32(src, k, a, imm8) simde_mm256_mask_ror_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_ror_epi32(k, a, imm8) _mm256_maskz_ror_epi32(k, a, imm8) +#else + #define simde_mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_ror_epi32 + #define _mm256_maskz_ror_epi32(k, a, imm8) simde_mm256_maskz_ror_epi32(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_ror_epi32(a, imm8) _mm512_ror_epi32(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_ror_epi32 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32 - imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + r_.u32 = (a_.u32 >> (imm8 & 31)) | (a_.u32 << (32 - (imm8 & 31))); + break; + } + #else + switch (imm8 & 31) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >> (imm8 & 31)) | (a_.u32[i] << (32 - (imm8 & 31))); + } + break; + } + #endif + + return simde__m512i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_ror_epi32 + #define _mm512_ror_epi32(a, imm8) simde_mm512_ror_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_ror_epi32(src, k, a, imm8) _mm512_mask_ror_epi32(src, k, a, imm8) +#else + #define simde_mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ror_epi32 + #define _mm512_mask_ror_epi32(src, k, a, imm8) simde_mm512_mask_ror_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_ror_epi32(k, a, imm8) _mm512_maskz_ror_epi32(k, a, imm8) +#else + #define simde_mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_ror_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_ror_epi32 + #define _mm512_maskz_ror_epi32(k, a, imm8) simde_mm512_maskz_ror_epi32(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_ror_epi64(a, imm8) _mm_ror_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_ror_epi64 (simde__m128i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m128i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_ror_epi64 + #define _mm_ror_epi64(a, imm8) simde_mm_ror_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_ror_epi64(src, k, a, imm8) _mm_mask_ror_epi64(src, k, a, imm8) +#else + #define simde_mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_ror_epi64 + #define _mm_mask_ror_epi64(src, k, a, imm8) simde_mm_mask_ror_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_ror_epi64(k, a, imm8) _mm_maskz_ror_epi64(k, a, imm8) +#else + #define simde_mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_ror_epi64 + #define _mm_maskz_ror_epi64(k, a, imm8) simde_mm_maskz_ror_epi64(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_ror_epi64(a, imm8) _mm256_ror_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_ror_epi64 (simde__m256i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m256i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_ror_epi64 + #define _mm256_ror_epi64(a, imm8) simde_mm256_ror_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_ror_epi64(src, k, a, imm8) _mm256_mask_ror_epi64(src, k, a, imm8) +#else + #define simde_mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_ror_epi64 + #define _mm256_mask_ror_epi64(src, k, a, imm8) simde_mm256_mask_ror_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_ror_epi64(k, a, imm8) _mm256_maskz_ror_epi64(k, a, imm8) +#else + #define simde_mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_ror_epi64 + #define _mm256_maskz_ror_epi64(k, a, imm8) simde_mm256_maskz_ror_epi64(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_ror_epi64(a, imm8) _mm512_ror_epi64(a, imm8) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_ror_epi64 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64 - imm8))); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + r_.u64 = (a_.u64 >> (imm8 & 63)) | (a_.u64 << (64 - (imm8 & 63))); + break; + } + #else + switch (imm8 & 63) { + case 0: + r_ = a_; + break; + default: + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >> (imm8 & 63)) | (a_.u64[i] << (64 - (imm8 & 63))); + } + break; + } + #endif + + return simde__m512i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_ror_epi64 + #define _mm512_ror_epi64(a, imm8) simde_mm512_ror_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_ror_epi64(src, k, a, imm8) _mm512_mask_ror_epi64(src, k, a, imm8) +#else + #define simde_mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ror_epi64 + #define _mm512_mask_ror_epi64(src, k, a, imm8) simde_mm512_mask_ror_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_ror_epi64(k, a, imm8) _mm512_maskz_ror_epi64(k, a, imm8) +#else + #define simde_mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_ror_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_ror_epi64 + #define _mm512_maskz_ror_epi64(k, a, imm8) simde_mm512_maskz_ror_epi64(k, a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROR_H) */ +/* :: End simde/x86/avx512/ror.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/rorv.h :: */ +#if !defined(SIMDE_X86_AVX512_RORV_H) +#define SIMDE_X86_AVX512_RORV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rorv_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_rorv_epi32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + r_.altivec_i32 = vec_rl(a_.altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.altivec_u32)); + return simde__m128i_from_private(r_); + #else + simde__m128i + count1 = simde_mm_and_si128(b, simde_mm_set1_epi32(31)), + count2 = simde_mm_sub_epi32(simde_mm_set1_epi32(32), count1); + return simde_mm_or_si128(simde_mm_srlv_epi32(a, count1), simde_mm_sllv_epi32(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rorv_epi32 + #define _mm_rorv_epi32(a, b) simde_mm_rorv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_rorv_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_rorv_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rorv_epi32 + #define _mm_mask_rorv_epi32(src, k, a, b) simde_mm_mask_rorv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_rorv_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_rorv_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rorv_epi32 + #define _mm_maskz_rorv_epi32(k, a, b) simde_mm_maskz_rorv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rorv_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_rorv_epi32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.m128i_private[i].altivec_u32)); + } + + return simde__m256i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i[0] = simde_mm_rorv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_rorv_epi32(a_.m128i[1], b_.m128i[1]); + + return simde__m256i_from_private(r_); + #else + simde__m256i + count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi32(31)), + count2 = simde_mm256_sub_epi32(simde_mm256_set1_epi32(32), count1); + return simde_mm256_or_si256(simde_mm256_srlv_epi32(a, count1), simde_mm256_sllv_epi32(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rorv_epi32 + #define _mm256_rorv_epi32(a, b) simde_mm256_rorv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_rorv_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_rorv_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rorv_epi32 + #define _mm256_mask_rorv_epi32(src, k, a, b) simde_mm256_mask_rorv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_rorv_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_rorv_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rorv_epi32 + #define _mm256_maskz_rorv_epi32(k, a, b) simde_mm256_maskz_rorv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rorv_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rorv_epi32(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i32 = vec_rl(a_.m128i_private[i].altivec_i32, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned int, 32)), b_.m128i_private[i].altivec_u32)); + } + + return simde__m512i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + r_.m256i[0] = simde_mm256_rorv_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_rorv_epi32(a_.m256i[1], b_.m256i[1]); + + return simde__m512i_from_private(r_); + #else + simde__m512i + count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi32(31)), + count2 = simde_mm512_sub_epi32(simde_mm512_set1_epi32(32), count1); + return simde_mm512_or_si512(simde_mm512_srlv_epi32(a, count1), simde_mm512_sllv_epi32(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rorv_epi32 + #define _mm512_rorv_epi32(a, b) simde_mm512_rorv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rorv_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rorv_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rorv_epi32 + #define _mm512_mask_rorv_epi32(src, k, a, b) simde_mm512_mask_rorv_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_rorv_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_rorv_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_rorv_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rorv_epi32 + #define _mm512_maskz_rorv_epi32(k, a, b) simde_mm512_maskz_rorv_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rorv_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_rorv_epi64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + r_.altivec_i64 = vec_rl(a_.altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.altivec_u64)); + return simde__m128i_from_private(r_); + #else + simde__m128i + count1 = simde_mm_and_si128(b, simde_mm_set1_epi64x(63)), + count2 = simde_mm_sub_epi64(simde_mm_set1_epi64x(64), count1); + return simde_mm_or_si128(simde_mm_srlv_epi64(a, count1), simde_mm_sllv_epi64(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_rorv_epi64 + #define _mm_rorv_epi64(a, b) simde_mm_rorv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_rorv_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_rorv_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_rorv_epi64 + #define _mm_mask_rorv_epi64(src, k, a, b) simde_mm_mask_rorv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_rorv_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_rorv_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_rorv_epi64 + #define _mm_maskz_rorv_epi64(k, a, b) simde_mm_maskz_rorv_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rorv_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_rorv_epi64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.m128i_private[i].altivec_u64)); + } + + return simde__m256i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i[0] = simde_mm_rorv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_rorv_epi64(a_.m128i[1], b_.m128i[1]); + + return simde__m256i_from_private(r_); + #else + simde__m256i + count1 = simde_mm256_and_si256(b, simde_mm256_set1_epi64x(63)), + count2 = simde_mm256_sub_epi64(simde_mm256_set1_epi64x(64), count1); + return simde_mm256_or_si256(simde_mm256_srlv_epi64(a, count1), simde_mm256_sllv_epi64(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_rorv_epi64 + #define _mm256_rorv_epi64(a, b) simde_mm256_rorv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_rorv_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_rorv_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_rorv_epi64 + #define _mm256_mask_rorv_epi64(src, k, a, b) simde_mm256_mask_rorv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_rorv_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_rorv_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_rorv_epi64 + #define _mm256_maskz_rorv_epi64(k, a, b) simde_mm256_maskz_rorv_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rorv_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rorv_epi64(a, b); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; i++) { + r_.m128i_private[i].altivec_i64 = vec_rl(a_.m128i_private[i].altivec_i64, vec_sub(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 64)), b_.m128i_private[i].altivec_u64)); + } + + return simde__m512i_from_private(r_); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + r_.m256i[0] = simde_mm256_rorv_epi64(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_rorv_epi64(a_.m256i[1], b_.m256i[1]); + + return simde__m512i_from_private(r_); + #else + simde__m512i + count1 = simde_mm512_and_si512(b, simde_mm512_set1_epi64(63)), + count2 = simde_mm512_sub_epi64(simde_mm512_set1_epi64(64), count1); + return simde_mm512_or_si512(simde_mm512_srlv_epi64(a, count1), simde_mm512_sllv_epi64(a, count2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_rorv_epi64 + #define _mm512_rorv_epi64(a, b) simde_mm512_rorv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rorv_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rorv_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rorv_epi64 + #define _mm512_mask_rorv_epi64(src, k, a, b) simde_mm512_mask_rorv_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_rorv_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_rorv_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_rorv_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_rorv_epi64 + #define _mm512_maskz_rorv_epi64(k, a, b) simde_mm512_maskz_rorv_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_RORV_H) */ +/* :: End simde/x86/avx512/rorv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/round.h :: */ +#if !defined(SIMDE_X86_AVX512_ROUND_H) +#define SIMDE_X86_AVX512_ROUND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_x_mm512_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_x_mm512_round_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_x_mm512_round_ps_a_ = simde__m512_to_private(a); \ + \ + for (size_t simde_x_mm512_round_ps_i = 0 ; simde_x_mm512_round_ps_i < (sizeof(simde_x_mm512_round_ps_r_.m256) / sizeof(simde_x_mm512_round_ps_r_.m256[0])) ; simde_x_mm512_round_ps_i++) { \ + simde_x_mm512_round_ps_r_.m256[simde_x_mm512_round_ps_i] = simde_mm256_round_ps(simde_x_mm512_round_ps_a_.m256[simde_x_mm512_round_ps_i], rounding); \ + } \ + \ + simde__m512_from_private(simde_x_mm512_round_ps_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_x_mm512_round_ps (simde__m512 a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.m128_private[i].altivec_f32)); + } + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vrndiq_f32(a_.m128_private[i].neon_f32); + } + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.m128_private[i].altivec_f32)); + } + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vrndnq_f32(a_.m128_private[i].neon_f32); + } + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.m128_private[i].altivec_f32)); + } + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vrndmq_f32(a_.m128_private[i].neon_f32); + } + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.m128_private[i].altivec_f32)); + } + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vrndpq_f32(a_.m128_private[i].neon_f32); + } + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.m128_private[i].altivec_f32)); + } + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vrndq_f32(a_.m128_private[i].neon_f32); + } + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_ps()); + } + + return simde__m512_from_private(r_); + } +#endif + +#if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_x_mm512_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_x_mm512_round_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_x_mm512_round_pd_a_ = simde__m512d_to_private(a); \ + \ + for (size_t simde_x_mm512_round_pd_i = 0 ; simde_x_mm512_round_pd_i < (sizeof(simde_x_mm512_round_pd_r_.m256d) / sizeof(simde_x_mm512_round_pd_r_.m256d[0])) ; simde_x_mm512_round_pd_i++) { \ + simde_x_mm512_round_pd_r_.m256d[simde_x_mm512_round_pd_i] = simde_mm256_round_pd(simde_x_mm512_round_pd_a_.m256d[simde_x_mm512_round_pd_i], rounding); \ + } \ + \ + simde__m512d_from_private(simde_x_mm512_round_pd_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_x_mm512_round_pd (simde__m512d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.m128d_private[i].altivec_f64)); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vrndiq_f64(a_.m128d_private[i].neon_f64); + } + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.m128d_private[i].altivec_f64)); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vrndaq_f64(a_.m128d_private[i].neon_f64); + } + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.m128d_private[i].altivec_f64)); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vrndmq_f64(a_.m128d_private[i].neon_f64); + } + #elif defined(simde_math_floor) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.m128d_private[i].altivec_f64)); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vrndpq_f64(a_.m128d_private[i].neon_f64); + } + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.m128d_private[i].altivec_f64)); + } + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vrndq_f64(a_.m128d_private[i].neon_f64); + } + #elif defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm512_setzero_pd()); + } + + return simde__m512d_from_private(r_); + } +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROUND_H) */ +/* :: End simde/x86/avx512/round.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/roundscale.h :: */ +#if !defined(SIMDE_X86_AVX512_ROUNDSCALE_H) +#define SIMDE_X86_AVX512_ROUNDSCALE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_ps(a, imm8) _mm_roundscale_ps((a), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_roundscale_ps_internal_ (simde__m128 result, simde__m128 a, int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m128 r, clear_sign; + + clear_sign = simde_mm_andnot_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); + r = simde_x_mm_select_ps(result, a, simde_mm_cmpeq_ps(clear_sign, simde_mm_set1_ps(SIMDE_MATH_INFINITYF))); + + return r; + } + #define simde_mm_roundscale_ps(a, imm8) \ + simde_mm_roundscale_ps_internal_( \ + simde_mm_mul_ps( \ + simde_mm_round_ps( \ + simde_mm_mul_ps( \ + a, \ + simde_mm_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_ps + #define _mm_roundscale_ps(a, imm8) simde_mm_roundscale_ps(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_roundscale_ps(src, k, a, imm8) _mm_mask_roundscale_ps(src, k, a, imm8) +#else + #define simde_mm_mask_roundscale_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_ps + #define _mm_mask_roundscale_ps(src, k, a, imm8) simde_mm_mask_roundscale_ps(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_roundscale_ps(k, a, imm8) _mm_maskz_roundscale_ps(k, a, imm8) +#else + #define simde_mm_maskz_roundscale_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_ps + #define _mm_maskz_roundscale_ps(k, a, imm8) simde_mm_maskz_roundscale_ps(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm256_roundscale_ps(a, imm8) _mm256_roundscale_ps((a), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_roundscale_ps(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_roundscale_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_roundscale_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_roundscale_ps_i = 0 ; simde_mm256_roundscale_ps_i < (sizeof(simde_mm256_roundscale_ps_r_.m128) / sizeof(simde_mm256_roundscale_ps_r_.m128[0])) ; simde_mm256_roundscale_ps_i++) { \ + simde_mm256_roundscale_ps_r_.m128[simde_mm256_roundscale_ps_i] = simde_mm_roundscale_ps(simde_mm256_roundscale_ps_a_.m128[simde_mm256_roundscale_ps_i], imm8); \ + } \ + \ + simde__m256_from_private(simde_mm256_roundscale_ps_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256 + simde_mm256_roundscale_ps_internal_ (simde__m256 result, simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m256 r, clear_sign; + + clear_sign = simde_mm256_andnot_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); + r = simde_x_mm256_select_ps(result, a, simde_mm256_castsi256_ps(simde_mm256_cmpeq_epi32(simde_mm256_castps_si256(clear_sign), simde_mm256_castps_si256(simde_mm256_set1_ps(SIMDE_MATH_INFINITYF))))); + + return r; + } + #define simde_mm256_roundscale_ps(a, imm8) \ + simde_mm256_roundscale_ps_internal_( \ + simde_mm256_mul_ps( \ + simde_mm256_round_ps( \ + simde_mm256_mul_ps( \ + a, \ + simde_mm256_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm256_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_roundscale_ps + #define _mm256_roundscale_ps(a, imm8) simde_mm256_roundscale_ps(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_roundscale_ps(src, k, a, imm8) _mm256_mask_roundscale_ps(src, k, a, imm8) +#else + #define simde_mm256_mask_roundscale_ps(src, k, a, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_roundscale_ps + #define _mm256_mask_roundscale_ps(src, k, a, imm8) simde_mm256_mask_roundscale_ps(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_roundscale_ps(k, a, imm8) _mm256_maskz_roundscale_ps(k, a, imm8) +#else + #define simde_mm256_maskz_roundscale_ps(k, a, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_roundscale_ps + #define _mm256_maskz_roundscale_ps(k, a, imm8) simde_mm256_maskz_roundscale_ps(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_roundscale_ps(a, imm8) _mm512_roundscale_ps((a), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_roundscale_ps(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_roundscale_ps_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_roundscale_ps_a_ = simde__m512_to_private(a); \ + \ + for (size_t simde_mm512_roundscale_ps_i = 0 ; simde_mm512_roundscale_ps_i < (sizeof(simde_mm512_roundscale_ps_r_.m256) / sizeof(simde_mm512_roundscale_ps_r_.m256[0])) ; simde_mm512_roundscale_ps_i++) { \ + simde_mm512_roundscale_ps_r_.m256[simde_mm512_roundscale_ps_i] = simde_mm256_roundscale_ps(simde_mm512_roundscale_ps_a_.m256[simde_mm512_roundscale_ps_i], imm8); \ + } \ + \ + simde__m512_from_private(simde_mm512_roundscale_ps_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_roundscale_ps_internal_ (simde__m512 result, simde__m512 a, int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m512 r, clear_sign; + + clear_sign = simde_mm512_andnot_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0)), result); + r = simde_mm512_mask_mov_ps(result, simde_mm512_cmpeq_epi32_mask(simde_mm512_castps_si512(clear_sign), simde_mm512_castps_si512(simde_mm512_set1_ps(SIMDE_MATH_INFINITYF))), a); + + return r; + } + #define simde_mm512_roundscale_ps(a, imm8) \ + simde_mm512_roundscale_ps_internal_( \ + simde_mm512_mul_ps( \ + simde_x_mm512_round_ps( \ + simde_mm512_mul_ps( \ + a, \ + simde_mm512_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm512_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_roundscale_ps + #define _mm512_roundscale_ps(a, imm8) simde_mm512_roundscale_ps(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_roundscale_ps(src, k, a, imm8) _mm512_mask_roundscale_ps(src, k, a, imm8) +#else + #define simde_mm512_mask_roundscale_ps(src, k, a, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_roundscale_ps + #define _mm512_mask_roundscale_ps(src, k, a, imm8) simde_mm512_mask_roundscale_ps(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_roundscale_ps(k, a, imm8) _mm512_maskz_roundscale_ps(k, a, imm8) +#else + #define simde_mm512_maskz_roundscale_ps(k, a, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_roundscale_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_roundscale_ps + #define _mm512_maskz_roundscale_ps(k, a, imm8) simde_mm512_maskz_roundscale_ps(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_pd(a, imm8) _mm_roundscale_pd((a), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_roundscale_pd_internal_ (simde__m128d result, simde__m128d a, int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m128d r, clear_sign; + + clear_sign = simde_mm_andnot_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); + r = simde_x_mm_select_pd(result, a, simde_mm_cmpeq_pd(clear_sign, simde_mm_set1_pd(SIMDE_MATH_INFINITY))); + + return r; + } + #define simde_mm_roundscale_pd(a, imm8) \ + simde_mm_roundscale_pd_internal_( \ + simde_mm_mul_pd( \ + simde_mm_round_pd( \ + simde_mm_mul_pd( \ + a, \ + simde_mm_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_pd + #define _mm_roundscale_pd(a, imm8) simde_mm_roundscale_pd(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_roundscale_pd(src, k, a, imm8) _mm_mask_roundscale_pd(src, k, a, imm8) +#else + #define simde_mm_mask_roundscale_pd(src, k, a, imm8) simde_mm_mask_mov_pd(src, k, simde_mm_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_pd + #define _mm_mask_roundscale_pd(src, k, a, imm8) simde_mm_mask_roundscale_pd(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_roundscale_pd(k, a, imm8) _mm_maskz_roundscale_pd(k, a, imm8) +#else + #define simde_mm_maskz_roundscale_pd(k, a, imm8) simde_mm_maskz_mov_pd(k, simde_mm_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_pd + #define _mm_maskz_roundscale_pd(k, a, imm8) simde_mm_maskz_roundscale_pd(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm256_roundscale_pd(a, imm8) _mm256_roundscale_pd((a), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_roundscale_pd(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_roundscale_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_roundscale_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_roundscale_pd_i = 0 ; simde_mm256_roundscale_pd_i < (sizeof(simde_mm256_roundscale_pd_r_.m128d) / sizeof(simde_mm256_roundscale_pd_r_.m128d[0])) ; simde_mm256_roundscale_pd_i++) { \ + simde_mm256_roundscale_pd_r_.m128d[simde_mm256_roundscale_pd_i] = simde_mm_roundscale_pd(simde_mm256_roundscale_pd_a_.m128d[simde_mm256_roundscale_pd_i], imm8); \ + } \ + \ + simde__m256d_from_private(simde_mm256_roundscale_pd_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m256d + simde_mm256_roundscale_pd_internal_ (simde__m256d result, simde__m256d a, int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m256d r, clear_sign; + + clear_sign = simde_mm256_andnot_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); + r = simde_x_mm256_select_pd(result, a, simde_mm256_castsi256_pd(simde_mm256_cmpeq_epi64(simde_mm256_castpd_si256(clear_sign), simde_mm256_castpd_si256(simde_mm256_set1_pd(SIMDE_MATH_INFINITY))))); + + return r; + } + #define simde_mm256_roundscale_pd(a, imm8) \ + simde_mm256_roundscale_pd_internal_( \ + simde_mm256_mul_pd( \ + simde_mm256_round_pd( \ + simde_mm256_mul_pd( \ + a, \ + simde_mm256_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm256_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_roundscale_pd + #define _mm256_roundscale_pd(a, imm8) simde_mm256_roundscale_pd(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_roundscale_pd(src, k, a, imm8) _mm256_mask_roundscale_pd(src, k, a, imm8) +#else + #define simde_mm256_mask_roundscale_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_roundscale_pd + #define _mm256_mask_roundscale_pd(src, k, a, imm8) simde_mm256_mask_roundscale_pd(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_roundscale_pd(k, a, imm8) _mm256_maskz_roundscale_pd(k, a, imm8) +#else + #define simde_mm256_maskz_roundscale_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_roundscale_pd + #define _mm256_maskz_roundscale_pd(k, a, imm8) simde_mm256_maskz_roundscale_pd(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_roundscale_pd(a, imm8) _mm512_roundscale_pd((a), (imm8)) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_roundscale_pd(a, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_roundscale_pd_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_roundscale_pd_a_ = simde__m512d_to_private(a); \ + \ + for (size_t simde_mm512_roundscale_pd_i = 0 ; simde_mm512_roundscale_pd_i < (sizeof(simde_mm512_roundscale_pd_r_.m256d) / sizeof(simde_mm512_roundscale_pd_r_.m256d[0])) ; simde_mm512_roundscale_pd_i++) { \ + simde_mm512_roundscale_pd_r_.m256d[simde_mm512_roundscale_pd_i] = simde_mm256_roundscale_pd(simde_mm512_roundscale_pd_a_.m256d[simde_mm512_roundscale_pd_i], imm8); \ + } \ + \ + simde__m512d_from_private(simde_mm512_roundscale_pd_r_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_roundscale_pd_internal_ (simde__m512d result, simde__m512d a, int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m512d r, clear_sign; + + clear_sign = simde_mm512_andnot_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.0)), result); + r = simde_mm512_mask_mov_pd(result, simde_mm512_cmpeq_epi64_mask(simde_mm512_castpd_si512(clear_sign), simde_mm512_castpd_si512(simde_mm512_set1_pd(SIMDE_MATH_INFINITY))), a); + + return r; + } + #define simde_mm512_roundscale_pd(a, imm8) \ + simde_mm512_roundscale_pd_internal_( \ + simde_mm512_mul_pd( \ + simde_x_mm512_round_pd( \ + simde_mm512_mul_pd( \ + a, \ + simde_mm512_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm512_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ + ), \ + (a), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_roundscale_pd + #define _mm512_roundscale_pd(a, imm8) simde_mm512_roundscale_pd(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_roundscale_pd(src, k, a, imm8) _mm512_mask_roundscale_pd(src, k, a, imm8) +#else + #define simde_mm512_mask_roundscale_pd(src, k, a, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_roundscale_pd + #define _mm512_mask_roundscale_pd(src, k, a, imm8) simde_mm512_mask_roundscale_pd(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_roundscale_pd(k, a, imm8) _mm512_maskz_roundscale_pd(k, a, imm8) +#else + #define simde_mm512_maskz_roundscale_pd(k, a, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_roundscale_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_roundscale_pd + #define _mm512_maskz_roundscale_pd(k, a, imm8) simde_mm512_maskz_roundscale_pd(k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_ss(a, b, imm8) _mm_roundscale_ss((a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_roundscale_ss_internal_ (simde__m128 result, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m128_private + r_ = simde__m128_to_private(result), + b_ = simde__m128_to_private(b); + + if(simde_math_isinff(r_.f32[0])) + r_.f32[0] = b_.f32[0]; + + return simde__m128_from_private(r_); + } + #define simde_mm_roundscale_ss(a, b, imm8) \ + simde_mm_roundscale_ss_internal_( \ + simde_mm_mul_ss( \ + simde_mm_round_ss( \ + a, \ + simde_mm_mul_ss( \ + b, \ + simde_mm_set1_ps(simde_math_exp2f(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm_set1_ps(simde_math_exp2f(-((imm8 >> 4) & 15))) \ + ), \ + (b), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_ss + #define _mm_roundscale_ss(a, b, imm8) simde_mm_roundscale_ss(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_mask_roundscale_ss(src, k, a, b, imm8) _mm_mask_roundscale_ss((src), (k), (a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_mask_roundscale_ss_internal_ (simde__m128 a, simde__m128 b, simde__mmask8 k) { + simde__m128 r; + + if(k & 1) + r = a; + else + r = b; + + return r; + } + #define simde_mm_mask_roundscale_ss(src, k, a, b, imm8) \ + simde_mm_mask_roundscale_ss_internal_( \ + simde_mm_roundscale_ss( \ + a, \ + b, \ + imm8 \ + ), \ + simde_mm_move_ss( \ + (a), \ + (src) \ + ), \ + (k) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_ss + #define _mm_mask_roundscale_ss(src, k, a, b, imm8) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_maskz_roundscale_ss(k, a, b, imm8) _mm_maskz_roundscale_ss((k), (a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_maskz_roundscale_ss_internal_ (simde__m128 a, simde__m128 b, simde__mmask8 k) { + simde__m128 r; + + if(k & 1) + r = a; + else + r = b; + + return r; + } + #define simde_mm_maskz_roundscale_ss(k, a, b, imm8) \ + simde_mm_maskz_roundscale_ss_internal_( \ + simde_mm_roundscale_ss( \ + a, \ + b, \ + imm8 \ + ), \ + simde_mm_move_ss( \ + (a), \ + simde_mm_setzero_ps() \ + ), \ + (k) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_ss + #define _mm_maskz_roundscale_ss(k, a, b, imm8) simde_mm_maskz_roundscale_ss(k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_sd(a, b, imm8) _mm_roundscale_sd((a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_roundscale_sd_internal_ (simde__m128d result, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + HEDLEY_STATIC_CAST(void, imm8); + + simde__m128d_private + r_ = simde__m128d_to_private(result), + b_ = simde__m128d_to_private(b); + + if(simde_math_isinf(r_.f64[0])) + r_.f64[0] = b_.f64[0]; + + return simde__m128d_from_private(r_); + } + #define simde_mm_roundscale_sd(a, b, imm8) \ + simde_mm_roundscale_sd_internal_( \ + simde_mm_mul_sd( \ + simde_mm_round_sd( \ + a, \ + simde_mm_mul_sd( \ + b, \ + simde_mm_set1_pd(simde_math_exp2(((imm8 >> 4) & 15)))), \ + ((imm8) & 15) \ + ), \ + simde_mm_set1_pd(simde_math_exp2(-((imm8 >> 4) & 15))) \ + ), \ + (b), \ + (imm8) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_sd + #define _mm_roundscale_sd(a, b, imm8) simde_mm_roundscale_sd(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_mask_roundscale_sd(src, k, a, b, imm8) _mm_mask_roundscale_sd((src), (k), (a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_mask_roundscale_sd_internal_ (simde__m128d a, simde__m128d b, simde__mmask8 k) { + simde__m128d r; + + if(k & 1) + r = a; + else + r = b; + + return r; + } + #define simde_mm_mask_roundscale_sd(src, k, a, b, imm8) \ + simde_mm_mask_roundscale_sd_internal_( \ + simde_mm_roundscale_sd( \ + a, \ + b, \ + imm8 \ + ), \ + simde_mm_move_sd( \ + (a), \ + (src) \ + ), \ + (k) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_sd + #define _mm_mask_roundscale_sd(src, k, a, b, imm8) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_maskz_roundscale_sd(k, a, b, imm8) _mm_maskz_roundscale_sd((k), (a), (b), (imm8)) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_maskz_roundscale_sd_internal_ (simde__m128d a, simde__m128d b, simde__mmask8 k) { + simde__m128d r; + + if(k & 1) + r = a; + else + r = b; + + return r; + } + #define simde_mm_maskz_roundscale_sd(k, a, b, imm8) \ + simde_mm_maskz_roundscale_sd_internal_( \ + simde_mm_roundscale_sd( \ + a, \ + b, \ + imm8 \ + ), \ + simde_mm_move_sd( \ + (a), \ + simde_mm_setzero_pd() \ + ), \ + (k) \ + ) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_sd + #define _mm_maskz_roundscale_sd(k, a, b, imm8) simde_mm_maskz_roundscale_sd(k, a, b, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROUNDSCALE_H) */ +/* :: End simde/x86/avx512/roundscale.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/roundscale_round.h :: */ +#if !defined(SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H) +#define SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(HEDLEY_MSVC_VERSION) +#pragma warning( push ) +#pragma warning( disable : 4244 ) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_roundscale_round_ps(a, imm8, sae) _mm512_roundscale_round_ps(a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_ps(a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_roundscale_round_ps(a,imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_roundscale_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_roundscale_round_ps_envp; \ + int simde_mm512_roundscale_round_ps_x = feholdexcept(&simde_mm512_roundscale_round_ps_envp); \ + simde_mm512_roundscale_round_ps_r = simde_mm512_roundscale_ps(a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_roundscale_round_ps_x == 0)) \ + fesetenv(&simde_mm512_roundscale_round_ps_envp); \ + } \ + else { \ + simde_mm512_roundscale_round_ps_r = simde_mm512_roundscale_ps(a, imm8); \ + } \ + \ + simde_mm512_roundscale_round_ps_r; \ + })) + #else + #define simde_mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_ps(a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_roundscale_round_ps (simde__m512 a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_roundscale_ps(a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_roundscale_ps(a, imm8); + #endif + } + else { + r = simde_mm512_roundscale_ps(a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_roundscale_round_ps + #define _mm512_roundscale_round_ps(a, imm8, sae) simde_mm512_roundscale_round_ps(a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) _mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_ps(src, k, a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_mask_roundscale_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_roundscale_round_ps_envp; \ + int simde_mm512_mask_roundscale_round_ps_x = feholdexcept(&simde_mm512_mask_roundscale_round_ps_envp); \ + simde_mm512_mask_roundscale_round_ps_r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_roundscale_round_ps_x == 0)) \ + fesetenv(&simde_mm512_mask_roundscale_round_ps_envp); \ + } \ + else { \ + simde_mm512_mask_roundscale_round_ps_r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); \ + } \ + \ + simde_mm512_mask_roundscale_round_ps_r; \ + })) + #else + #define simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_ps(src, k, a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_mask_roundscale_round_ps (simde__m512 src, simde__mmask8 k, simde__m512 a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); + #endif + } + else { + r = simde_mm512_mask_roundscale_ps(src, k, a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_roundscale_round_ps + #define _mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_mask_roundscale_round_ps(src, k, a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) _mm512_maskz_roundscale_round_ps(k, a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) simde_mm512_maskz_roundscale_ps(k, a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512 simde_mm512_maskz_roundscale_round_ps_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_roundscale_round_ps_envp; \ + int simde_mm512_maskz_roundscale_round_ps_x = feholdexcept(&simde_mm512_maskz_roundscale_round_ps_envp); \ + simde_mm512_maskz_roundscale_round_ps_r = simde_mm512_maskz_roundscale_ps(k, a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_roundscale_round_ps_x == 0)) \ + fesetenv(&simde_mm512_maskz_roundscale_round_ps_envp); \ + } \ + else { \ + simde_mm512_maskz_roundscale_round_ps_r = simde_mm512_maskz_roundscale_ps(k, a, imm8); \ + } \ + \ + simde_mm512_maskz_roundscale_round_ps_r; \ + })) + #else + #define simde_mm512_maskz_roundscale_round_ps(src, k, a, imm8, sae) simde_mm512_maskz_roundscale_ps(k, a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_maskz_roundscale_round_ps (simde__mmask8 k, simde__m512 a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_roundscale_ps(k, a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_roundscale_ps(k, a, imm8); + #endif + } + else { + r = simde_mm512_maskz_roundscale_ps(k, a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_roundscale_round_ps + #define _mm512_maskz_roundscale_round_ps(k, a, imm8, sae) simde_mm512_maskz_roundscale_round_ps(k, a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_roundscale_round_pd(a, imm8, sae) _mm512_roundscale_round_pd(a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_pd(a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_roundscale_round_pd(a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_roundscale_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_roundscale_round_pd_envp; \ + int simde_mm512_roundscale_round_pd_x = feholdexcept(&simde_mm512_roundscale_round_pd_envp); \ + simde_mm512_roundscale_round_pd_r = simde_mm512_roundscale_pd(a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_roundscale_round_pd_x == 0)) \ + fesetenv(&simde_mm512_roundscale_round_pd_envp); \ + } \ + else { \ + simde_mm512_roundscale_round_pd_r = simde_mm512_roundscale_pd(a, imm8); \ + } \ + \ + simde_mm512_roundscale_round_pd_r; \ + })) + #else + #define simde_mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_pd(a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_roundscale_round_pd (simde__m512d a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_roundscale_pd(a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_roundscale_pd(a, imm8); + #endif + } + else { + r = simde_mm512_roundscale_pd(a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_roundscale_round_pd + #define _mm512_roundscale_round_pd(a, imm8, sae) simde_mm512_roundscale_round_pd(a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) _mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_pd(src, k, a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_mask_roundscale_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_mask_roundscale_round_pd_envp; \ + int simde_mm512_mask_roundscale_round_pd_x = feholdexcept(&simde_mm512_mask_roundscale_round_pd_envp); \ + simde_mm512_mask_roundscale_round_pd_r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_mask_roundscale_round_pd_x == 0)) \ + fesetenv(&simde_mm512_mask_roundscale_round_pd_envp); \ + } \ + else { \ + simde_mm512_mask_roundscale_round_pd_r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); \ + } \ + \ + simde_mm512_mask_roundscale_round_pd_r; \ + })) + #else + #define simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_pd(src, k, a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_mask_roundscale_round_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); + #endif + } + else { + r = simde_mm512_mask_roundscale_pd(src, k, a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_roundscale_round_pd + #define _mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_mask_roundscale_round_pd(src, k, a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) _mm512_maskz_roundscale_round_pd(k, a, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) simde_mm512_maskz_roundscale_pd(k, a, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d simde_mm512_maskz_roundscale_round_pd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm512_maskz_roundscale_round_pd_envp; \ + int simde_mm512_maskz_roundscale_round_pd_x = feholdexcept(&simde_mm512_maskz_roundscale_round_pd_envp); \ + simde_mm512_maskz_roundscale_round_pd_r = simde_mm512_maskz_roundscale_pd(k, a, imm8); \ + if (HEDLEY_LIKELY(simde_mm512_maskz_roundscale_round_pd_x == 0)) \ + fesetenv(&simde_mm512_maskz_roundscale_round_pd_envp); \ + } \ + else { \ + simde_mm512_maskz_roundscale_round_pd_r = simde_mm512_maskz_roundscale_pd(k, a, imm8); \ + } \ + \ + simde_mm512_maskz_roundscale_round_pd_r; \ + })) + #else + #define simde_mm512_maskz_roundscale_round_pd(src, k, a, imm8, sae) simde_mm512_maskz_roundscale_pd(k, a, imm8) + #endif +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512d + simde_mm512_maskz_roundscale_round_pd (simde__mmask8 k, simde__m512d a, int imm8, int sae) + SIMDE_REQUIRE_RANGE(imm8, 0, 15) { + simde__m512d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm512_maskz_roundscale_pd(k, a, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm512_maskz_roundscale_pd(k, a, imm8); + #endif + } + else { + r = simde_mm512_maskz_roundscale_pd(k, a, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_roundscale_round_pd + #define _mm512_maskz_roundscale_round_pd(k, a, imm8, sae) simde_mm512_maskz_roundscale_round_pd(k, a, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_round_ss(a, b, imm8, sae) _mm_roundscale_round_ss(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_ss(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_roundscale_round_ss(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_roundscale_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_roundscale_round_ss_envp; \ + int simde_mm_roundscale_round_ss_x = feholdexcept(&simde_mm_roundscale_round_ss_envp); \ + simde_mm_roundscale_round_ss_r = simde_mm_roundscale_ss(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_roundscale_round_ss_x == 0)) \ + fesetenv(&simde_mm_roundscale_round_ss_envp); \ + } \ + else { \ + simde_mm_roundscale_round_ss_r = simde_mm_roundscale_ss(a, b, imm8); \ + } \ + \ + simde_mm_roundscale_round_ss_r; \ + })) + #else + #define simde_mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_ss(a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_roundscale_round_ss (simde__m128 a, simde__m128 b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_roundscale_ss(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_roundscale_ss(a, b, imm8); + #endif + } + else { + r = simde_mm_roundscale_ss(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_round_ss + #define _mm_roundscale_round_ss(a, b, imm8, sae) simde_mm_roundscale_round_ss(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) _mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_mask_roundscale_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_roundscale_round_ss_envp; \ + int simde_mm_mask_roundscale_round_ss_x = feholdexcept(&simde_mm_mask_roundscale_round_ss_envp); \ + simde_mm_mask_roundscale_round_ss_r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_roundscale_round_ss_x == 0)) \ + fesetenv(&simde_mm_mask_roundscale_round_ss_envp); \ + } \ + else { \ + simde_mm_mask_roundscale_round_ss_r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); \ + } \ + \ + simde_mm_mask_roundscale_round_ss_r; \ + })) + #else + #define simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_ss(src, k, a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_mask_roundscale_round_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm_mask_roundscale_ss(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_round_ss + #define _mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_round_ss(src, k, a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) _mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_ss(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128 simde_mm_maskz_roundscale_round_ss_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_roundscale_round_ss_envp; \ + int simde_mm_maskz_roundscale_round_ss_x = feholdexcept(&simde_mm_maskz_roundscale_round_ss_envp); \ + simde_mm_maskz_roundscale_round_ss_r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_roundscale_round_ss_x == 0)) \ + fesetenv(&simde_mm_maskz_roundscale_round_ss_envp); \ + } \ + else { \ + simde_mm_maskz_roundscale_round_ss_r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); \ + } \ + \ + simde_mm_maskz_roundscale_round_ss_r; \ + })) + #else + #define simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_ss(k, a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde_mm_maskz_roundscale_round_ss (simde__mmask8 k, simde__m128 a, simde__m128 b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128 r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); + #endif + } + else { + r = simde_mm_maskz_roundscale_ss(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_round_ss + #define _mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) simde_mm_maskz_roundscale_round_ss(k, a, b, imm8, sae) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +#pragma warning( pop ) +#endif + + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_roundscale_round_sd(a, b, imm8, sae) _mm_roundscale_round_sd(a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_sd(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_roundscale_round_sd(a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_roundscale_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_roundscale_round_sd_envp; \ + int simde_mm_roundscale_round_sd_x = feholdexcept(&simde_mm_roundscale_round_sd_envp); \ + simde_mm_roundscale_round_sd_r = simde_mm_roundscale_sd(a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_roundscale_round_sd_x == 0)) \ + fesetenv(&simde_mm_roundscale_round_sd_envp); \ + } \ + else { \ + simde_mm_roundscale_round_sd_r = simde_mm_roundscale_sd(a, b, imm8); \ + } \ + \ + simde_mm_roundscale_round_sd_r; \ + })) + #else + #define simde_mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_sd(a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_roundscale_round_sd (simde__m128d a, simde__m128d b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_roundscale_sd(a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_roundscale_sd(a, b, imm8); + #endif + } + else { + r = simde_mm_roundscale_sd(a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_roundscale_round_sd + #define _mm_roundscale_round_sd(a, b, imm8, sae) simde_mm_roundscale_round_sd(a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) _mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_mask_roundscale_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_mask_roundscale_round_sd_envp; \ + int simde_mm_mask_roundscale_round_sd_x = feholdexcept(&simde_mm_mask_roundscale_round_sd_envp); \ + simde_mm_mask_roundscale_round_sd_r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_mask_roundscale_round_sd_x == 0)) \ + fesetenv(&simde_mm_mask_roundscale_round_sd_envp); \ + } \ + else { \ + simde_mm_mask_roundscale_round_sd_r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); \ + } \ + \ + simde_mm_mask_roundscale_round_sd_r; \ + })) + #else + #define simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_sd(src, k, a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_mask_roundscale_round_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); + #endif + } + else { + r = simde_mm_mask_roundscale_sd(src, k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_roundscale_round_sd + #define _mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_roundscale_round_sd(src, k, a, b, imm8, sae) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_92035) + #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) _mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) +#elif defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_sd(k, a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) + #if defined(SIMDE_HAVE_FENV_H) + #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) SIMDE_STATEMENT_EXPR_(({ \ + simde__m128d simde_mm_maskz_roundscale_round_sd_r; \ + \ + if (sae & SIMDE_MM_FROUND_NO_EXC) { \ + fenv_t simde_mm_maskz_roundscale_round_sd_envp; \ + int simde_mm_maskz_roundscale_round_sd_x = feholdexcept(&simde_mm_maskz_roundscale_round_sd_envp); \ + simde_mm_maskz_roundscale_round_sd_r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); \ + if (HEDLEY_LIKELY(simde_mm_maskz_roundscale_round_sd_x == 0)) \ + fesetenv(&simde_mm_maskz_roundscale_round_sd_envp); \ + } \ + else { \ + simde_mm_maskz_roundscale_round_sd_r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); \ + } \ + \ + simde_mm_maskz_roundscale_round_sd_r; \ + })) + #else + #define simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_sd(k, a, b, imm8) + #endif +#elif !(defined(HEDLEY_MSVC_VERSION) && defined(SIMDE_X86_AVX_NATIVE)) + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde_mm_maskz_roundscale_round_sd (simde__mmask8 k, simde__m128d a, simde__m128d b, const int imm8, const int sae) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) + SIMDE_REQUIRE_CONSTANT(sae) { + simde__m128d r; + + if (sae & SIMDE_MM_FROUND_NO_EXC) { + #if defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); + #endif + } + else { + r = simde_mm_maskz_roundscale_sd(k, a, b, imm8); + } + + return r; + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_roundscale_round_sd + #define _mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) simde_mm_maskz_roundscale_round_sd(k, a, b, imm8, sae) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ROUNDSCALE_ROUND_H) */ +/* :: End simde/x86/avx512/roundscale_round.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sad.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SAD_H) +#define SIMDE_X86_AVX512_SAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sad_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sad_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sad_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 8) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sad_epu8 + #define _mm512_sad_epu8(a, b) simde_mm512_sad_epu8(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SAD_H) */ +/* :: End simde/x86/avx512/sad.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/scalef.h :: */ +#if !defined(SIMDE_X86_AVX512_SCALEF_H) +#define SIMDE_X86_AVX512_SCALEF_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/svml.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_SVML_H) +#define SIMDE_X86_SVML_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/xorsign.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +/* This is a SIMDe extension which is not part of AVX-512. It exists + * because a lot of numerical methods in SIMDe have algoriths which do + * something like: + * + * float sgn = input < 0 ? -1 : 1; + * ... + * return res * sgn; + * + * Which can be replaced with a much more efficient call to xorsign: + * + * return simde_x_mm512_xorsign_ps(res, input); + * + * While this was originally intended for use in SIMDe, please feel + * free to use it in your code. + */ + +#if !defined(SIMDE_X86_AVX512_XORSIGN_H) +#define SIMDE_X86_AVX512_XORSIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { + return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { + return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_XORSIGN_H) */ +/* :: End simde/x86/avx512/xorsign.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sqrt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SQRT_H) +#define SIMDE_X86_AVX512_SQRT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sqrt_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); + r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sqrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sqrt_ps + #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sqrt_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); + r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sqrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sqrt_pd + #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SQRT_H) */ +/* :: End simde/x86/avx512/sqrt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-complex.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +/* Support for complex math. + * + * We try to avoid inculding (in C++ mode) since it pulls in + * a *lot* of code. Unfortunately this only works for GNU modes (i.e., + * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals, + * but there is no way (AFAICT) to detect that flag so we have to rely + * on __STRICT_ANSI__ to instead detect GNU mode. + * + * This header is separate from simde-math.h since there is a good + * chance it will pull in , and most of the time we don't need + * complex math (on x86 only SVML uses it). */ + +#if !defined(SIMDE_COMPLEX_H) +#define SIMDE_COMPLEX_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if ( \ + HEDLEY_HAS_BUILTIN(__builtin_creal) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) \ + ) && (!defined(__cplusplus) && !defined(__STRICT_ANSI__)) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ + typedef __complex__ float simde_cfloat32; + typedef __complex__ double simde_cfloat64; + HEDLEY_DIAGNOSTIC_POP + #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) + #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) + + #if !defined(simde_math_creal) + #define simde_math_crealf(z) __builtin_crealf(z) + #endif + #if !defined(simde_math_crealf) + #define simde_math_creal(z) __builtin_creal(z) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimagf(z) __builtin_cimagf(z) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimag(z) __builtin_cimag(z) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) __builtin_cexp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) __builtin_cexpf(z) + #endif +#elif !defined(__cplusplus) + #include + + #if !defined(HEDLEY_MSVC_VERSION) + typedef float _Complex simde_cfloat32; + typedef double _Complex simde_cfloat64; + #else + typedef _Fcomplex simde_cfloat32; + typedef _Dcomplex simde_cfloat64; + #endif + + #if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) }) + #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) }) + #elif defined(CMPLX) && defined(CMPLXF) + #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) + #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) + #else + #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) + #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) + #endif + + #if !defined(simde_math_creal) + #define simde_math_creal(z) creal(z) + #endif + #if !defined(simde_math_crealf) + #define simde_math_crealf(z) crealf(z) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimag(z) cimag(z) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimagf(z) cimagf(z) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) cexp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) cexpf(z) + #endif +#else + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + #pragma warning(disable:4530) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + + typedef std::complex simde_cfloat32; + typedef std::complex simde_cfloat64; + #define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) + #define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) + + #if !defined(simde_math_creal) + #define simde_math_creal(z) ((z).real()) + #endif + #if !defined(simde_math_crealf) + #define simde_math_crealf(z) ((z).real()) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimag(z) ((z).imag()) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimagf(z) ((z).imag()) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) std::exp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) std::exp(z) + #endif +#endif + +#endif /* !defined(SIMDE_COMPLEX_H) */ +/* :: End simde/simde-complex.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_acos_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf4_u10(a); + #else + return Sleef_acosf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acos_ps + #define _mm_acos_ps(a) simde_mm_acos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_acos_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd2_u10(a); + #else + return Sleef_acosd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acos_pd + #define _mm_acos_pd(a) simde_mm_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_acos_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf8_u10(a); + #else + return Sleef_acosf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_acos_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acos_ps + #define _mm256_acos_ps(a) simde_mm256_acos_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_acos_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd4_u10(a); + #else + return Sleef_acosd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acos_pd + #define _mm256_acos_pd(a) simde_mm256_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_acos_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf16_u10(a); + #else + return Sleef_acosf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acos_ps + #define _mm512_acos_ps(a) simde_mm512_acos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_acos_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd8_u10(a); + #else + return Sleef_acosd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acos_pd + #define _mm512_acos_pd(a) simde_mm512_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acos_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acos_ps + #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acos_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acos_pd + #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_acosh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_acoshf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acosh_ps + #define _mm_acosh_ps(a) simde_mm_acosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_acosh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_acoshd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acosh_pd + #define _mm_acosh_pd(a) simde_mm_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_acosh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_acoshf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acosh_ps + #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_acosh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_acoshd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acosh_pd + #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_acosh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_acoshf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acosh_ps + #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_acosh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_acoshd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acosh_pd + #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acosh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acosh_ps + #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acosh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acosh_pd + #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_asin_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf4_u10(a); + #else + return Sleef_asinf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asin_ps + #define _mm_asin_ps(a) simde_mm_asin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_asin_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind2_u10(a); + #else + return Sleef_asind2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asin_pd + #define _mm_asin_pd(a) simde_mm_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_asin_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf8_u10(a); + #else + return Sleef_asinf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_asin_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asin_ps + #define _mm256_asin_ps(a) simde_mm256_asin_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_asin_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind4_u10(a); + #else + return Sleef_asind4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asin_pd + #define _mm256_asin_pd(a) simde_mm256_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_asin_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf16_u10(a); + #else + return Sleef_asinf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asin_ps + #define _mm512_asin_ps(a) simde_mm512_asin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_asin_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind8_u10(a); + #else + return Sleef_asind8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asin_pd + #define _mm512_asin_pd(a) simde_mm512_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asin_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asin_ps + #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asin_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asin_pd + #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_asinh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_asinhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asinh_ps + #define _mm_asinh_ps(a) simde_mm_asinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_asinh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_asinhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asinh_pd + #define _mm_asinh_pd(a) simde_mm_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_asinh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_asinhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asinh_ps + #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_asinh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_asinhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asinh_pd + #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_asinh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_asinhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asinh_ps + #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_asinh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_asinhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asinh_pd + #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asinh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asinh_ps + #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asinh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asinh_pd + #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atan_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf4_u10(a); + #else + return Sleef_atanf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan_ps + #define _mm_atan_ps(a) simde_mm_atan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atan_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand2_u10(a); + #else + return Sleef_atand2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan_pd + #define _mm_atan_pd(a) simde_mm_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atan_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf8_u10(a); + #else + return Sleef_atanf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atan_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan_ps + #define _mm256_atan_ps(a) simde_mm256_atan_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atan_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand4_u10(a); + #else + return Sleef_atand4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan_pd + #define _mm256_atan_pd(a) simde_mm256_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atan_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf16_u10(a); + #else + return Sleef_atanf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan_ps + #define _mm512_atan_ps(a) simde_mm512_atan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atan_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand8_u10(a); + #else + return Sleef_atand8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan_pd + #define _mm512_atan_pd(a) simde_mm512_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan_ps + #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan_pd + #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atan2_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f4_u10(a, b); + #else + return Sleef_atan2f4_u35(a, b); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan2_ps + #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atan2_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d2_u10(a, b); + #else + return Sleef_atan2d2_u35(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan2_pd + #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f8_u10(a, b); + #else + return Sleef_atan2f8_u35(a, b); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan2_ps + #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d4_u10(a, b); + #else + return Sleef_atan2d4_u35(a, b); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan2_pd + #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f16_u10(a, b); + #else + return Sleef_atan2f16_u35(a, b); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan2_ps + #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d8_u10(a, b); + #else + return Sleef_atan2d8_u35(a, b); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan2_pd + #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan2_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan2_ps + #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan2_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan2_pd + #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atanh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_atanhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atanh_ps + #define _mm_atanh_ps(a) simde_mm_atanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atanh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_atanhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atanh_pd + #define _mm_atanh_pd(a) simde_mm_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atanh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_atanhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atanh_ps + #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atanh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_atanhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atanh_pd + #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atanh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_atanhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atanh_ps + #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atanh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_atanhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atanh_pd + #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atanh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atanh_ps + #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atanh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atanh_pd + #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cbrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_cbrtf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cbrt_ps + #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cbrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_cbrtd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cbrt_pd + #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cbrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_cbrtf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cbrt_ps + #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cbrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_cbrtd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cbrt_pd + #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cbrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_cbrtf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cbrt_ps + #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cbrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_cbrtd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cbrt_pd + #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cbrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cbrt_ps + #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cbrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cbrt_pd + #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cexp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cexp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); + r_.f32[ i ] = simde_math_crealf(val); + r_.f32[i + 1] = simde_math_cimagf(val); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cexp_ps + #define _mm_cexp_ps(a) simde_mm_cexp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cexp_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cexp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); + r_.f32[ i ] = simde_math_crealf(val); + r_.f32[i + 1] = simde_math_cimagf(val); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cexp_ps + #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cos_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf4_u10(a); + #else + return Sleef_cosf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cos_ps + #define _mm_cos_ps(a) simde_mm_cos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cos_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd2_u10(a); + #else + return Sleef_cosd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cos_pd + #define _mm_cos_pd(a) simde_mm_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cos_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf8_u10(a); + #else + return Sleef_cosf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cos_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cos_ps + #define _mm256_cos_ps(a) simde_mm256_cos_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cos_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd4_u10(a); + #else + return Sleef_cosd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cos_pd + #define _mm256_cos_pd(a) simde_mm256_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cos_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf16_u10(a); + #else + return Sleef_cosf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cos_ps + #define _mm512_cos_ps(a) simde_mm512_cos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cos_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd8_u10(a); + #else + return Sleef_cosd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cos_pd + #define _mm512_cos_pd(a) simde_mm512_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cos_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cos_ps + #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cos_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cos_pd + #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deg2rad_ps(simde__m128 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deg2rad_pd(simde__m128d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deg2rad_ps(simde__m256 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) + return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F + }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deg2rad_pd(simde__m256d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) + return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_deg2rad_ps(simde__m512 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) + return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F + }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m512_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_deg2rad_pd(simde__m512d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) + return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { + SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, + SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 + }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m512d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cosd_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosd_ps + #define _mm_cosd_ps(a) simde_mm_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cosd_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosd_pd + #define _mm_cosd_pd(a) simde_mm_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cosd_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosd_ps + #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cosd_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosd_pd + #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cosd_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosd_ps + #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cosd_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosd_pd + #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosd_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosd_ps + #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosd_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosd_pd + #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cosh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_coshf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosh_ps + #define _mm_cosh_ps(a) simde_mm_cosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cosh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_coshd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosh_pd + #define _mm_cosh_pd(a) simde_mm_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cosh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_coshf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosh_ps + #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cosh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_coshd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosh_pd + #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cosh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_coshf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosh_ps + #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cosh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_coshd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosh_pd + #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosh_ps + #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosh_pd + #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi8 + #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi16 + #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi32 + #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b) + #undef _mm_idiv_epi32 + #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi64 + #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu8 + #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu16 + #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu32 + #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b) + #undef _mm_udiv_epi32 + #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu64 + #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi8 + #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi16 + #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi32 + #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b) + #undef _mm256_idiv_epi32 + #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi64 + #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu8 + #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu16 + #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu32 + #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b) + #undef _mm256_udiv_epi32 + #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu64 + #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi8 + #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi16 + #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi32 + #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_epi32 + #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi64 + #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu8 + #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu16 + #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu32 + #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_epu32 + #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu64 + #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erf_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erff4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erf_ps + #define _mm_erf_ps(a) simde_mm_erf_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erf_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erf_pd + #define _mm_erf_pd(a) simde_mm_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erf_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erff8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erf_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erf_ps + #define _mm256_erf_ps(a) simde_mm256_erf_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erf_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erf_pd + #define _mm256_erf_pd(a) simde_mm256_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erf_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erff16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erf_ps + #define _mm512_erf_ps(a) simde_mm512_erf_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erf_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erf_pd + #define _mm512_erf_pd(a) simde_mm512_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erf_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erf_ps + #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erf_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erf_pd + #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfc_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfcf4_u15(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfc_ps + #define _mm_erfc_ps(a) simde_mm_erfc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfc_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfcd2_u15(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfc_pd + #define _mm_erfc_pd(a) simde_mm_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfc_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfcf8_u15(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfc_ps + #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfc_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfcd4_u15(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfc_pd + #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfc_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfcf16_u15(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfc_ps + #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfc_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfcd8_u15(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfc_pd + #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfc_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfc_ps + #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfc_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfc_pd + #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp_ps + #define _mm_exp_ps(a) simde_mm_exp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp_pd + #define _mm_exp_pd(a) simde_mm_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp_ps + #define _mm256_exp_ps(a) simde_mm256_exp_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp_pd + #define _mm256_exp_pd(a) simde_mm256_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp_ps + #define _mm512_exp_ps(a) simde_mm512_exp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp_pd + #define _mm512_exp_pd(a) simde_mm512_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp_ps + #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp_pd + #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_expm1_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expm1f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_expm1_ps + #define _mm_expm1_ps(a) simde_mm_expm1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_expm1_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expm1d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_expm1_pd + #define _mm_expm1_pd(a) simde_mm_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_expm1_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expm1f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_expm1_ps + #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_expm1_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expm1d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_expm1_pd + #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_expm1_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expm1f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_expm1_ps + #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_expm1_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expm1d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_expm1_pd + #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_expm1_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_expm1_ps + #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_expm1_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_expm1_pd + #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp2_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp2f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp2_ps + #define _mm_exp2_ps(a) simde_mm_exp2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp2_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp2d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp2_pd + #define _mm_exp2_pd(a) simde_mm_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp2_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp2f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp2_ps + #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp2_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp2d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp2_pd + #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp2_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp2f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp2_ps + #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp2_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp2d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp2_pd + #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp2_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp2_ps + #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp2_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp2_pd + #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp10_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp10f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp10_ps + #define _mm_exp10_ps(a) simde_mm_exp10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp10_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp10d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp10_pd + #define _mm_exp10_pd(a) simde_mm_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp10_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp10f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp10_ps + #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp10_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp10d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp10_pd + #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp10_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp10f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp10_ps + #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp10_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp10d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp10_pd + #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp10_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp10_ps + #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp10_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp10_pd + #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cdfnorm_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m128 y = simde_mm_mul_ps(a5, t); + y = simde_mm_add_ps(y, a4); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a3); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a2); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a1); + y = simde_mm_mul_ps(y, t); + y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x)))); + y = simde_mm_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorm_ps + #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cdfnorm_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m128d y = simde_mm_mul_pd(a5, t); + y = simde_mm_add_pd(y, a4); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a3); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a2); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a1); + y = simde_mm_mul_pd(y, t); + y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x)))); + y = simde_mm_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorm_pd + #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cdfnorm_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m256 y = simde_mm256_mul_ps(a5, t); + y = simde_mm256_add_ps(y, a4); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a3); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a2); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a1); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x)))); + y = simde_mm256_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorm_ps + #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cdfnorm_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m256d y = simde_mm256_mul_pd(a5, t); + y = simde_mm256_add_pd(y, a4); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a3); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a2); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a1); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x)))); + y = simde_mm256_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorm_pd + #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cdfnorm_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m512 y = simde_mm512_mul_ps(a5, t); + y = simde_mm512_add_ps(y, a4); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a3); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a2); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a1); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x)))); + y = simde_mm512_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a))); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorm_ps + #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cdfnorm_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m512d y = simde_mm512_mul_pd(a5, t); + y = simde_mm512_add_pd(y, a4); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a3); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a2); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a1); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x)))); + y = simde_mm512_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a))); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorm_pd + #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorm_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorm_ps + #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorm_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorm_pd + #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b); + #else + simde__m128i r; + + r = simde_mm_div_epi32(a, b); + *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_idivrem_epi32 + #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); + #else + simde__m256i r; + + r = simde_mm256_div_epi32(a, b); + *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_idivrem_epi32 + #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hypot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf4_u05(a, b); + #else + return Sleef_hypotf4_u35(a, b); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_hypot_ps + #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hypot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd2_u05(a, b); + #else + return Sleef_hypotd2_u35(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_hypot_pd + #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf8_u05(a, b); + #else + return Sleef_hypotf8_u35(a, b); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_hypot_ps + #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd4_u05(a, b); + #else + return Sleef_hypotd4_u35(a, b); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_hypot_pd + #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf16_u05(a, b); + #else + return Sleef_hypotf16_u35(a, b); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_hypot_ps + #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd8_u05(a, b); + #else + return Sleef_hypotd8_u35(a, b); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_hypot_pd + #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_hypot_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_hypot_ps + #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_hypot_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_hypot_pd + #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_invcbrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invcbrt_ps(a); + #else + return simde_mm_rcp_ps(simde_mm_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invcbrt_ps + #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_invcbrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invcbrt_pd(a); + #else + return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invcbrt_pd + #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_invcbrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invcbrt_ps(a); + #else + return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invcbrt_ps + #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_invcbrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invcbrt_pd(a); + #else + return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invcbrt_pd + #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_invsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invsqrt_ps(a); + #else + return simde_mm_rcp_ps(simde_mm_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invsqrt_ps + #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_invsqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invsqrt_pd(a); + #else + return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invsqrt_pd + #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_invsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invsqrt_ps(a); + #else + return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invsqrt_ps + #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_invsqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invsqrt_pd(a); + #else + return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invsqrt_pd + #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_invsqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_invsqrt_ps(a); + #else + return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_invsqrt_ps + #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_invsqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_invsqrt_pd(a); + #else + return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_invsqrt_pd + #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_invsqrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_invsqrt_ps + #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_invsqrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_invsqrt_pd + #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf4_u10(a); + #else + return Sleef_logf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log_ps + #define _mm_log_ps(a) simde_mm_log_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd2_u10(a); + #else + return Sleef_logd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log_pd + #define _mm_log_pd(a) simde_mm_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf8_u10(a); + #else + return Sleef_logf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log_ps + #define _mm256_log_ps(a) simde_mm256_log_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd4_u10(a); + #else + return Sleef_logd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log_pd + #define _mm256_log_pd(a) simde_mm256_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf16_u10(a); + #else + return Sleef_logf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log_ps + #define _mm512_log_ps(a) simde_mm512_log_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd8_u10(a); + #else + return Sleef_logd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log_pd + #define _mm512_log_pd(a) simde_mm512_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log_ps + #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log_pd + #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cdfnorminv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128 matched, retval = simde_mm_setzero_ps(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)))); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* else if (a == 1) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425))); + /* else if (a > 0.97575) */ + simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575))); + + simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi); + matched = simde_mm_or_ps(matched, mask); + + /* else */ + simde__m128 mask_el = simde_x_mm_not_ps(matched); + mask = simde_mm_or_ps(mask, mask_el); + + /* r = a - 0.5f */ + simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m128 q = simde_mm_and_ps(mask_lo, a); + q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a))); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm_log_ps(q); + q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); + { + simde__m128 multiplier; + multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0))); + multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0)))); + multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r)); + numerator = simde_mm_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), + simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + return retval; + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorminv_ps + #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cdfnorminv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128d matched, retval = simde_mm_setzero_pd(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)))); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* else if (a == 1) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425))); + /* else if (a > 0.97575) */ + simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575))); + + simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi); + matched = simde_mm_or_pd(matched, mask); + + /* else */ + simde__m128d mask_el = simde_x_mm_not_pd(matched); + mask = simde_mm_or_pd(mask, mask_el); + + /* r = a - 0.5 */ + simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m128d q = simde_mm_and_pd(mask_lo, a); + q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a))); + + /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ + q = simde_mm_log_pd(q); + q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el); + + /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ + /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ + /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); + { + simde__m128d multiplier; + multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0))); + multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0)))); + multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r)); + numerator = simde_mm_mul_pd(numerator, multiplier); + } + + /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), + simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + return retval; + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorminv_pd + #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cdfnorminv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256 matched, retval = simde_mm256_setzero_ps(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ)); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* else if (a == 1) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi); + matched = simde_mm256_or_ps(matched, mask); + + /* else */ + simde__m256 mask_el = simde_x_mm256_not_ps(matched); + mask = simde_mm256_or_ps(mask, mask_el); + + /* r = a - 0.5f */ + simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m256 q = simde_mm256_and_ps(mask_lo, a); + q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a))); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm256_log_ps(q); + q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm256_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); + { + simde__m256 multiplier; + multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0))); + multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0)))); + multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r)); + numerator = simde_mm256_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), + simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + return retval; + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorminv_ps + #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cdfnorminv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256d matched, retval = simde_mm256_setzero_pd(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ)); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* else if (a == 1) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi); + matched = simde_mm256_or_pd(matched, mask); + + /* else */ + simde__m256d mask_el = simde_x_mm256_not_pd(matched); + mask = simde_mm256_or_pd(mask, mask_el); + + /* r = a - 0.5 */ + simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m256d q = simde_mm256_and_pd(mask_lo, a); + q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a))); + + /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ + q = simde_mm256_log_pd(q); + q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm256_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el); + + /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ + /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ + /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); + { + simde__m256d multiplier; + multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0))); + multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0)))); + multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r)); + numerator = simde_mm256_mul_pd(numerator, multiplier); + } + + /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), + simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + return retval; + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorminv_pd + #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cdfnorminv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]); + } + + return simde__m512_from_private(r_); + #else + + simde__m512 retval = simde_mm512_setzero_ps(); + simde__mmask16 matched; + + { /* if (a < 0 || a > 1) */ + matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)); + } + + { /* else if (a == 1) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF)); + } + + { /* else if (a < 0.02425) */ + simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__mmask16 mask = mask_lo | mask_hi; + matched = matched | mask; + + /* else */ + simde__mmask16 mask_el = ~matched; + + /* r = a - 0.5f */ + simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a); + q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm512_log_ps(q); + q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm512_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_mm512_mask_mul_ps(q, mask_el, r, r); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)))); + { + simde__m512 multiplier; + multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)); + multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0))); + multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r); + numerator = simde_mm512_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)))); + denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q), + simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorminv_ps + #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cdfnorminv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]); + } + + return simde__m512d_from_private(r_); + #else + + simde__m512d retval = simde_mm512_setzero_pd(); + simde__mmask8 matched; + + { /* if (a < 0 || a > 1) */ + matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)); + } + + { /* else if (a == 1) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY)); + } + + { /* else if (a < 0.02425) */ + simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__mmask8 mask = mask_lo | mask_hi; + matched = matched | mask; + + /* else */ + simde__mmask8 mask_el = ~matched; + + /* r = a - 0.5f */ + simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m512d q = a; + q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm512_log_pd(q); + q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm512_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_mm512_mask_mul_pd(q, mask_el, r, r); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)))); + { + simde__m512d multiplier; + multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)); + multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0))); + multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r); + numerator = simde_mm512_mul_pd(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)))); + denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q), + simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorminv_pd + #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorminv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorminv_ps + #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorminv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorminv_pd + #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfinv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */ + simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); + + simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a))); + + simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm_mul_ps(tt2, lnx); + + simde__m128 r = simde_mm_mul_ps(tt1, tt1); + r = simde_mm_sub_ps(r, tt2); + r = simde_mm_sqrt_ps(r); + r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r); + r = simde_mm_sqrt_ps(r); + + return simde_x_mm_xorsign_ps(r, a); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfinv_ps + #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfinv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); + + simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a))); + + simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm_mul_pd(tt2, lnx); + + simde__m128d r = simde_mm_mul_pd(tt1, tt1); + r = simde_mm_sub_pd(r, tt2); + r = simde_mm_sqrt_pd(r); + r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r); + r = simde_mm_sqrt_pd(r); + + return simde_x_mm_xorsign_pd(r, a); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfinv_pd + #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfinv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); + simde__m256 sgn = simde_x_mm256_copysign_ps(one, a); + + a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a)); + simde__m256 lnx = simde_mm256_log_ps(a); + + simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm256_mul_ps(tt2, lnx); + + simde__m256 r = simde_mm256_mul_ps(tt1, tt1); + r = simde_mm256_sub_ps(r, tt2); + r = simde_mm256_sqrt_ps(r); + r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r); + r = simde_mm256_sqrt_ps(r); + + return simde_mm256_mul_ps(sgn, r); + #else + simde__m256_private + a_ = simde__m256_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfinv_ps + #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfinv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); + simde__m256d sgn = simde_x_mm256_copysign_pd(one, a); + + a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a)); + simde__m256d lnx = simde_mm256_log_pd(a); + + simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm256_mul_pd(tt2, lnx); + + simde__m256d r = simde_mm256_mul_pd(tt1, tt1); + r = simde_mm256_sub_pd(r, tt2); + r = simde_mm256_sqrt_pd(r); + r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r); + r = simde_mm256_sqrt_pd(r); + + return simde_mm256_mul_pd(sgn, r); + #else + simde__m256d_private + a_ = simde__m256d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfinv_pd + #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfinv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); + simde__m512 sgn = simde_x_mm512_copysign_ps(one, a); + + a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a)); + simde__m512 lnx = simde_mm512_log_ps(a); + + simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm512_mul_ps(tt2, lnx); + + simde__m512 r = simde_mm512_mul_ps(tt1, tt1); + r = simde_mm512_sub_ps(r, tt2); + r = simde_mm512_sqrt_ps(r); + r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r); + r = simde_mm512_sqrt_ps(r); + + return simde_mm512_mul_ps(sgn, r); + #else + simde__m512_private + a_ = simde__m512_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfinv_ps + #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfinv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); + simde__m512d sgn = simde_x_mm512_copysign_pd(one, a); + + a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a)); + simde__m512d lnx = simde_mm512_log_pd(a); + + simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm512_mul_pd(tt2, lnx); + + simde__m512d r = simde_mm512_mul_pd(tt1, tt1); + r = simde_mm512_sub_pd(r, tt2); + r = simde_mm512_sqrt_pd(r); + r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r); + r = simde_mm512_sqrt_pd(r); + + return simde_mm512_mul_pd(sgn, r); + #else + simde__m512d_private + a_ = simde__m512d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfinv_pd + #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfinv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfinv_ps + #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfinv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfinv_pd + #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfcinv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128 matched, retval = simde_mm_setzero_ps(); + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); + matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)))); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) { + retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))); + mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)))); + mask = simde_mm_andnot_ps(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); + t = simde_mm_sqrt_ps(t); + t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m128 p[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910)) + }; + + const simde__m128 q[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm_fmadd_ps(denominator, t, q[0]); + + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); + t = simde_mm_sqrt_ps(t); + t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m128 p[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) + }; + + const simde__m128 q[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm_fmadd_ps(denominator, t, q[0]); + + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + + if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfcinv_ps + #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfcinv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128d matched, retval = simde_mm_setzero_pd(); + + { /* if (a < 2.0 && a > 0.0625) */ + matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))); + matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)))); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) { + retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625 && a > 0.0) */ + simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))); + mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)))); + mask = simde_mm_andnot_pd(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); + t = simde_mm_sqrt_pd(t); + t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m128d p[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910)) + }; + + const simde__m128d q[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm_fmadd_pd(denominator, t, q[0]); + + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + } + + { /* else if (a < 0.0) */ + simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); + t = simde_mm_sqrt_pd(t); + t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m128d p[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) + }; + + const simde__m128d q[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm_fmadd_pd(denominator, t, q[0]); + + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + + if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0) */ + retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfcinv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfcinv_pd + #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfcinv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256 matched, retval = simde_mm256_setzero_ps(); + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); + matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ)); + + if (!simde_mm256_testz_ps(matched, matched)) { + retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ)); + mask = simde_mm256_andnot_ps(matched, mask); + + if (!simde_mm256_testz_ps(mask, mask)) { + matched = simde_mm256_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); + t = simde_mm256_sqrt_ps(t); + t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m256 p[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) + }; + + const simde__m256 q[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); + + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + + if (!simde_mm256_testz_ps(mask, mask)) { + matched = simde_mm256_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); + t = simde_mm256_sqrt_ps(t); + t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m256 p[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000)) + }; + + const simde__m256 q[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); + + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + + if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfcinv_ps + #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfcinv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256d matched, retval = simde_mm256_setzero_pd(); + + { /* if (a < 2.0 && a > 0.0625) */ + matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); + matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ)); + + if (!simde_mm256_testz_pd(matched, matched)) { + retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625 && a > 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ)); + mask = simde_mm256_andnot_pd(matched, mask); + + if (!simde_mm256_testz_pd(mask, mask)) { + matched = simde_mm256_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); + t = simde_mm256_sqrt_pd(t); + t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m256d p[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) + }; + + const simde__m256d q[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); + + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + } + + { /* else if (a < 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + + if (!simde_mm256_testz_pd(mask, mask)) { + matched = simde_mm256_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); + t = simde_mm256_sqrt_pd(t); + t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m256d p[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000)) + }; + + const simde__m256d q[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); + + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + + if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0) */ + retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfcinv(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfcinv_pd + #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfcinv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64)) + /* The results on Arm are *slightly* off, which causes problems for + * the edge cases; for example, if you pass 2.0 sqrt will be called + * with a value of -0.0 instead of 0.0, resulting in a NaN. */ + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]); + } + return simde__m512_from_private(r_); + #else + simde__m512 retval = simde_mm512_setzero_ps(); + simde__mmask16 matched; + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); + matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ); + + if (matched != 0) { + retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (matched == 1) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); + mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); + t = simde_mm512_sqrt_ps(t); + t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m512 p[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) + }; + + const simde__m512 q[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); + + simde__m512 res = simde_mm512_div_ps(numerator, denominator); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); + t = simde_mm512_sqrt_ps(t); + t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m512 p[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) + }; + + const simde__m512 q[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); + + simde__m512 res = simde_mm512_div_ps(numerator, denominator); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + + if (matched == 1) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = ~matched & mask; + matched = matched | mask; + + simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfcinv_ps + #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfcinv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]); + } + return simde__m512d_from_private(r_); + #else + simde__m512d retval = simde_mm512_setzero_pd(); + simde__mmask8 matched; + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); + matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ); + + if (matched != 0) { + retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (matched == 1) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); + mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); + t = simde_mm512_sqrt_pd(t); + t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m512d p[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) + }; + + const simde__m512d q[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); + + simde__m512d res = simde_mm512_div_pd(numerator, denominator); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); + t = simde_mm512_sqrt_pd(t); + t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m512d p[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) + }; + + const simde__m512d q[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); + + simde__m512d res = simde_mm512_div_pd(numerator, denominator); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + + if (matched == 1) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = ~matched & mask; + matched = matched | mask; + + simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfcinv_pd + #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfcinv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfcinv_ps + #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfcinv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfcinv_pd + #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_logb_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_logb_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_logb_ps + #define _mm_logb_ps(a) simde_mm_logb_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_logb_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_logb_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_logb_pd + #define _mm_logb_pd(a) simde_mm_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_logb_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_logb_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_logb_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_logb_ps + #define _mm256_logb_ps(a) simde_mm256_logb_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_logb_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_logb_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_logb_pd + #define _mm256_logb_pd(a) simde_mm256_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_logb_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_logb_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_logb_ps + #define _mm512_logb_ps(a) simde_mm512_logb_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_logb_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_logb_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_logb_pd + #define _mm512_logb_pd(a) simde_mm512_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_logb_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_logb_ps + #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_logb_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_logb_pd + #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log2_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f4_u35(a); + #else + return Sleef_log2f4_u10(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log2_ps + #define _mm_log2_ps(a) simde_mm_log2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log2_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d2_u35(a); + #else + return Sleef_log2d2_u10(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log2_pd + #define _mm_log2_pd(a) simde_mm_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log2_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f8_u35(a); + #else + return Sleef_log2f8_u10(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log2_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log2_ps + #define _mm256_log2_ps(a) simde_mm256_log2_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log2_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d4_u35(a); + #else + return Sleef_log2d4_u10(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log2_pd + #define _mm256_log2_pd(a) simde_mm256_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log2_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f16_u35(a); + #else + return Sleef_log2f16_u10(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log2_ps + #define _mm512_log2_ps(a) simde_mm512_log2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log2_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d8_u35(a); + #else + return Sleef_log2d8_u10(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log2_pd + #define _mm512_log2_pd(a) simde_mm512_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log2_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log2_ps + #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log2_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log2_pd + #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log1p_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log1pf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log1p_ps + #define _mm_log1p_ps(a) simde_mm_log1p_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log1p_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log1pd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log1p_pd + #define _mm_log1p_pd(a) simde_mm_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log1p_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log1pf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log1p_ps + #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log1p_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log1pd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log1p_pd + #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log1p_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log1pf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log1p_ps + #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log1p_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log1pd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log1p_pd + #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log1p_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log1p_ps + #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log1p_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log1p_pd + #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log10_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log10f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log10_ps + #define _mm_log10_ps(a) simde_mm_log10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log10_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log10d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log10_pd + #define _mm_log10_pd(a) simde_mm_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log10_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log10f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log10_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log10_ps + #define _mm256_log10_ps(a) simde_mm256_log10_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log10_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log10d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log10_pd + #define _mm256_log10_pd(a) simde_mm256_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log10_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log10f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log10_ps + #define _mm512_log10_ps(a) simde_mm512_log10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log10_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log10d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log10_pd + #define _mm512_log10_pd(a) simde_mm512_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log10_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log10_ps + #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log10_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log10_pd + #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_nearbyint_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_nearbyint_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_nearbyint_ps + #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_nearbyint_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_nearbyint_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_nearbyint_pd + #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_nearbyint_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_nearbyint_ps + #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_nearbyint_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_nearbyint_pd + #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_pow_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_powf4_u10(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_pow_ps + #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_pow_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_powd2_u10(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_pow_pd + #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_pow_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_powf8_u10(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_pow_ps + #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_pow_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_powd4_u10(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_pow_pd + #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_pow_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_powf16_u10(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_pow_ps + #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_pow_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_powd8_u10(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_pow_pd + #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_pow_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_pow_ps + #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_pow_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_pow_pd + #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_clog_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_clog_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1])); + r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_clog_ps + #define _mm_clog_ps(a) simde_mm_clog_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_clog_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_clog_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1])); + r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_clog_ps + #define _mm256_clog_ps(a) simde_mm256_clog_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_csqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_csqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); + simde__m128_private pow_res_=simde__m128_to_private(pow_res); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); + simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); + + r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_csqrt_ps + #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_csqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_csqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))); + simde__m256_private pow_res_=simde__m256_to_private(pow_res); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); + simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); + + r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_csqrt_ps + #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi8 + #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi16 + #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi32 + #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b) + #undef _mm_irem_epi32 + #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi64 + #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu8 + #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu16 + #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu32 + #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b) + #undef _mm_urem_epi32 + #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu64 + #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi8 + #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi16 + #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi32 + #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b) + #undef _mm256_irem_epi32 + #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi64 + #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu8 + #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu16 + #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu32 + #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b) + #undef _mm256_urem_epi32 + #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu64 + #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi8 + #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi16 + #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi32 + #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rem_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rem_epi32 + #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi64 + #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu8 + #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu16 + #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu32 + #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rem_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rem_epu32 + #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu64 + #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_recip_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_recip_ps(a); + #else + return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_recip_ps + #define _mm512_recip_ps(a) simde_mm512_recip_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_recip_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_recip_pd(a); + #else + return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_recip_pd + #define _mm512_recip_pd(a) simde_mm512_recip_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_recip_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_recip_ps + #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_recip_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_recip_pd + #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_rint_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rint_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_rintf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_rintf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rint_ps + #define _mm512_rint_ps(a) simde_mm512_rint_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_rint_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rint_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_rintd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_rint(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rint_pd + #define _mm512_rint_pd(a) simde_mm512_rint_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rint_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rint_ps + #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rint_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rint_pd + #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sin_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf4_u10(a); + #else + return Sleef_sinf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sin_ps + #define _mm_sin_ps(a) simde_mm_sin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sin_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind2_u10(a); + #else + return Sleef_sind2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sin_pd + #define _mm_sin_pd(a) simde_mm_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sin_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf8_u10(a); + #else + return Sleef_sinf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sin_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sin_ps + #define _mm256_sin_ps(a) simde_mm256_sin_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sin_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind4_u10(a); + #else + return Sleef_sind4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sin_pd + #define _mm256_sin_pd(a) simde_mm256_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sin_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf16_u10(a); + #else + return Sleef_sinf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sin_ps + #define _mm512_sin_ps(a) simde_mm512_sin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sin_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind8_u10(a); + #else + return Sleef_sind8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sin_pd + #define _mm512_sin_pd(a) simde_mm512_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sin_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sin_ps + #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sin_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sin_pd + #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + Sleef___m128_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf4_u10(a); + #else + temp = Sleef_sincosf4_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m128 r; + + r = simde_mm_sin_ps(a); + *mem_addr = simde_mm_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sincos_ps + #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + Sleef___m128d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd2_u10(a); + #else + temp = Sleef_sincosd2_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m128d r; + + r = simde_mm_sin_pd(a); + *mem_addr = simde_mm_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sincos_pd + #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + Sleef___m256_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf8_u10(a); + #else + temp = Sleef_sincosf8_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m256 r; + + r = simde_mm256_sin_ps(a); + *mem_addr = simde_mm256_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sincos_ps + #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + Sleef___m256d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd4_u10(a); + #else + temp = Sleef_sincosd4_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m256d r; + + r = simde_mm256_sin_pd(a); + *mem_addr = simde_mm256_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sincos_pd + #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + Sleef___m512_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf16_u10(a); + #else + temp = Sleef_sincosf16_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m512 r; + + r = simde_mm512_sin_ps(a); + *mem_addr = simde_mm512_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sincos_ps + #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + Sleef___m512d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd8_u10(a); + #else + temp = Sleef_sincosd8_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m512d r; + + r = simde_mm512_sin_pd(a); + *mem_addr = simde_mm512_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sincos_pd + #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a); + #else + simde__m512 cos_res, sin_res; + sin_res = simde_mm512_sincos_ps(&cos_res, a); + *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res); + return simde_mm512_mask_mov_ps(sin_src, k, sin_res); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sincos_ps + #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a); + #else + simde__m512d cos_res, sin_res; + sin_res = simde_mm512_sincos_pd(&cos_res, a); + *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res); + return simde_mm512_mask_mov_pd(sin_src, k, sin_res); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sincos_pd + #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sind_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sind_ps + #define _mm_sind_ps(a) simde_mm_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sind_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sind_pd + #define _mm_sind_pd(a) simde_mm_sind_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sind_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sind_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sind_ps + #define _mm256_sind_ps(a) simde_mm256_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sind_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sind_pd + #define _mm256_sind_pd(a) simde_mm256_sind_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sind_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sind_ps + #define _mm512_sind_ps(a) simde_mm512_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sind_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sind_pd + #define _mm512_sind_pd(a) simde_mm512_sind_pd(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sind_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sind_ps + #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sind_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sind_pd + #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sinh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sinhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sinh_ps + #define _mm_sinh_ps(a) simde_mm_sinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sinh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sinhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sinh_pd + #define _mm_sinh_pd(a) simde_mm_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sinh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sinhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sinh_ps + #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sinh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sinhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sinh_pd + #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sinh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sinhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sinh_ps + #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sinh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sinhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sinh_pd + #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sinh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sinh_ps + #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sinh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sinh_pd + #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_ceil_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_ceilf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_ceil_ps + #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_ceil_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_ceild2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_ceil_pd + #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_ceil_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_ceilf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_ceil_ps + #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_ceil_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_ceild4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_ceil_pd + #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_ceil_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_ceilf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_ceil_ps + #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_ceil_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_ceild8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_ceil_pd + #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_ceil_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ceil_ps + #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_ceil_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ceil_pd + #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_floor_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_floorf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_floor_ps + #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_floor_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_floord2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_floor_pd + #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_floor_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_floorf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_floor_ps + #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_floor_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_floord4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_floor_pd + #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_floor_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_floorf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_floor_ps + #define _mm512_floor_ps(a) simde_mm512_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_floor_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_floord8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_floor_pd + #define _mm512_floor_pd(a) simde_mm512_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_floor_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_floor_ps + #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_floor_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_floor_pd + #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_round_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_round_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_roundf4(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_round_ps + #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_round_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_roundd2(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_round_pd + #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_round_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_round_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_roundf8(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_round_ps + #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_round_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_roundd4(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_round_pd + #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_svml_round_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_roundd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_round_pd + #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_svml_round_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_svml_round_pd + #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sqrtf4(a); + #else + return simde_mm_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_sqrt_ps + #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sqrtd2(a); + #else + return simde_mm_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_sqrt_pd + #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sqrtf8(a); + #else + return simde_mm256_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_sqrt_ps + #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sqrtd4(a); + #else + return simde_mm256_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_sqrt_pd + #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_svml_sqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sqrtf16(a); + #else + return simde_mm512_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_sqrt_ps + #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_svml_sqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sqrtd8(a); + #else + return simde_mm512_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_sqrt_pd + #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tan_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf4_u10(a); + #else + return Sleef_tanf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tan_ps + #define _mm_tan_ps(a) simde_mm_tan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tan_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand2_u10(a); + #else + return Sleef_tand2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tan_pd + #define _mm_tan_pd(a) simde_mm_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tan_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf8_u10(a); + #else + return Sleef_tanf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tan_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tan_ps + #define _mm256_tan_ps(a) simde_mm256_tan_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tan_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand4_u10(a); + #else + return Sleef_tand4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tan_pd + #define _mm256_tan_pd(a) simde_mm256_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tan_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf16_u10(a); + #else + return Sleef_tanf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tan_ps + #define _mm512_tan_ps(a) simde_mm512_tan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tan_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand8_u10(a); + #else + return Sleef_tand8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tan_pd + #define _mm512_tan_pd(a) simde_mm512_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tan_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tan_ps + #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tan_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tan_pd + #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tand_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tand_ps + #define _mm_tand_ps(a) simde_mm_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tand_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tand_pd + #define _mm_tand_pd(a) simde_mm_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tand_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tand_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tand_ps + #define _mm256_tand_ps(a) simde_mm256_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tand_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tand_pd + #define _mm256_tand_pd(a) simde_mm256_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tand_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tand_ps + #define _mm512_tand_ps(a) simde_mm512_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tand_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tand_pd + #define _mm512_tand_pd(a) simde_mm512_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tand_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tand_ps + #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tand_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tand_pd + #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tanh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_tanhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tanh_ps + #define _mm_tanh_ps(a) simde_mm_tanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tanh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_tanhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tanh_pd + #define _mm_tanh_pd(a) simde_mm_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tanh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_tanhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tanh_ps + #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tanh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_tanhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tanh_pd + #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tanh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_tanhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tanh_ps + #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tanh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_tanhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tanh_pd + #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tanh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tanh_ps + #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tanh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tanh_pd + #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_trunc_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_truncf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_trunc_ps + #define _mm_trunc_ps(a) simde_mm_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_trunc_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_truncd2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_trunc_pd + #define _mm_trunc_pd(a) simde_mm_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_trunc_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_truncf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_trunc_ps + #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_trunc_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_truncd4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_trunc_pd + #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_trunc_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_truncf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_trunc_ps + #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_trunc_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_truncd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_trunc_pd + #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_trunc_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_trunc_ps + #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_trunc_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_trunc_pd + #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_udivrem_epi32(mem_addr, a, b); + #else + simde__m128i r; + + r = simde_mm_div_epu32(a, b); + *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_udivrem_epi32 + #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); + #else + simde__m256i r; + + r = simde_mm256_div_epu32(a, b); + *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_udivrem_epi32 + #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SVML_H) */ +/* :: End simde/x86/svml.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_scalef_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_scalef_ps(a, b); + #else + return simde_mm_mul_ps(simde_x_mm_flushsubnormal_ps(a), simde_mm_exp2_ps(simde_mm_floor_ps(simde_x_mm_flushsubnormal_ps(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_scalef_ps + #define _mm_scalef_ps(a, b) simde_mm_scalef_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_scalef_ps (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_scalef_ps(src, k, a, b); + #else + return simde_mm_mask_mov_ps(src, k, simde_mm_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_scalef_ps + #define _mm_mask_scalef_ps(src, k, a, b) simde_mm_mask_scalef_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_scalef_ps (simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_scalef_ps(k, a, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_scalef_ps + #define _mm_maskz_scalef_ps(k, a, b) simde_mm_maskz_scalef_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_scalef_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_scalef_ps(a, b); + #else + return simde_mm256_mul_ps(simde_x_mm256_flushsubnormal_ps(a), simde_mm256_exp2_ps(simde_mm256_floor_ps(simde_x_mm256_flushsubnormal_ps(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_scalef_ps + #define _mm256_scalef_ps(a, b) simde_mm256_scalef_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_scalef_ps (simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_scalef_ps(src, k, a, b); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_scalef_ps + #define _mm256_mask_scalef_ps(src, k, a, b) simde_mm256_mask_scalef_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_scalef_ps (simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_scalef_ps(k, a, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_scalef_ps + #define _mm256_maskz_scalef_ps(k, a, b) simde_mm256_maskz_scalef_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_scalef_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_scalef_ps(a, b); + #else + return simde_mm512_mul_ps(simde_x_mm512_flushsubnormal_ps(a), simde_mm512_exp2_ps(simde_mm512_floor_ps(simde_x_mm512_flushsubnormal_ps(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_scalef_ps + #define _mm512_scalef_ps(a, b) simde_mm512_scalef_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_scalef_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_scalef_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_scalef_ps + #define _mm512_mask_scalef_ps(src, k, a, b) simde_mm512_mask_scalef_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_scalef_ps (simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_scalef_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_scalef_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_scalef_ps + #define _mm512_maskz_scalef_ps(k, a, b) simde_mm512_maskz_scalef_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_scalef_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_scalef_pd(a, b); + #else + return simde_mm_mul_pd(simde_x_mm_flushsubnormal_pd(a), simde_mm_exp2_pd(simde_mm_floor_pd(simde_x_mm_flushsubnormal_pd(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_scalef_pd + #define _mm_scalef_pd(a, b) simde_mm_scalef_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_scalef_pd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_scalef_pd(src, k, a, b); + #else + return simde_mm_mask_mov_pd(src, k, simde_mm_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_scalef_pd + #define _mm_mask_scalef_pd(src, k, a, b) simde_mm_mask_scalef_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_scalef_pd (simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_scalef_pd(k, a, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_scalef_pd + #define _mm_maskz_scalef_pd(k, a, b) simde_mm_maskz_scalef_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_scalef_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_scalef_pd(a, b); + #else + return simde_mm256_mul_pd(simde_x_mm256_flushsubnormal_pd(a), simde_mm256_exp2_pd(simde_mm256_floor_pd(simde_x_mm256_flushsubnormal_pd(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_scalef_pd + #define _mm256_scalef_pd(a, b) simde_mm256_scalef_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_scalef_pd (simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_scalef_pd(src, k, a, b); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_scalef_pd + #define _mm256_mask_scalef_pd(src, k, a, b) simde_mm256_mask_scalef_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_scalef_pd (simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_scalef_pd(k, a, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_scalef_pd + #define _mm256_maskz_scalef_pd(k, a, b) simde_mm256_maskz_scalef_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_scalef_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_scalef_pd(a, b); + #else + return simde_mm512_mul_pd(simde_x_mm512_flushsubnormal_pd(a), simde_mm512_exp2_pd(simde_mm512_floor_pd(simde_x_mm512_flushsubnormal_pd(b)))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_scalef_pd + #define _mm512_scalef_pd(a, b) simde_mm512_scalef_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_scalef_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_scalef_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_scalef_pd + #define _mm512_mask_scalef_pd(src, k, a, b) simde_mm512_mask_scalef_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_scalef_pd (simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_scalef_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_scalef_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_scalef_pd + #define _mm512_maskz_scalef_pd(k, a, b) simde_mm512_maskz_scalef_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_scalef_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm_scalef_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + a_.f32[0] = (simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0]))); + + return simde__m128_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_scalef_ss + #define _mm_scalef_ss(a, b) simde_mm_scalef_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_scalef_ss (simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(HEDLEY_GCC_VERSION) + return _mm_mask_scalef_round_ss(src, k, a, b, _MM_FROUND_CUR_DIRECTION); + #else + simde__m128_private + src_ = simde__m128_to_private(src), + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + a_.f32[0] = ((k & 1) ? ((simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0])))) : src_.f32[0]); + + return simde__m128_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_scalef_ss + #define _mm_mask_scalef_ss(src, k, a, b) simde_mm_mask_scalef_ss(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_scalef_ss (simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) + return _mm_maskz_scalef_ss(k, a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + a_.f32[0] = ((k & 1) ? ((simde_math_issubnormalf(a_.f32[0]) ? 0 : a_.f32[0]) * simde_math_exp2f(simde_math_floorf((simde_math_issubnormalf(b_.f32[0]) ? 0 : b_.f32[0])))) : SIMDE_FLOAT32_C(0.0)); + + return simde__m128_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_scalef_ss + #define _mm_maskz_scalef_ss(k, a, b) simde_mm_maskz_scalef_ss(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_scalef_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm_scalef_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + a_.f64[0] = (simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor((simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0]))); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_scalef_sd + #define _mm_scalef_sd(a, b) simde_mm_scalef_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_scalef_sd (simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) + return _mm_mask_scalef_sd(src, k, a, b); + #else + simde__m128d_private + src_ = simde__m128d_to_private(src), + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + a_.f64[0] = ((k & 1) ? ((simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor((simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0])))) : src_.f64[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_scalef_sd + #define _mm_mask_scalef_sd(src, k, a, b) simde_mm_mask_scalef_sd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_scalef_sd (simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_GCC_105339) + return _mm_maskz_scalef_sd(k, a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + a_.f64[0] = ((k & 1) ? ((simde_math_issubnormal(a_.f64[0]) ? 0 : a_.f64[0]) * simde_math_exp2(simde_math_floor(simde_math_issubnormal(b_.f64[0]) ? 0 : b_.f64[0]))) : SIMDE_FLOAT64_C(0.0)); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_scalef_sd + #define _mm_maskz_scalef_sd(k, a, b) simde_mm_maskz_scalef_sd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SCALEF_H) */ +/* :: End simde/x86/avx512/scalef.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SET4_H) +#define SIMDE_X86_AVX512_SET4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { + simde__m512i_private r_; + + r_.i32[ 0] = a; + r_.i32[ 1] = b; + r_.i32[ 2] = c; + r_.i32[ 3] = d; + r_.i32[ 4] = a; + r_.i32[ 5] = b; + r_.i32[ 6] = c; + r_.i32[ 7] = d; + r_.i32[ 8] = a; + r_.i32[ 9] = b; + r_.i32[10] = c; + r_.i32[11] = d; + r_.i32[12] = a; + r_.i32[13] = b; + r_.i32[14] = c; + r_.i32[15] = d; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set4_epi32 + #define _mm512_set4_epi32(d,c,b,a) simde_mm512_set4_epi32(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { + simde__m512i_private r_; + + r_.i64[0] = a; + r_.i64[1] = b; + r_.i64[2] = c; + r_.i64[3] = d; + r_.i64[4] = a; + r_.i64[5] = b; + r_.i64[6] = c; + r_.i64[7] = d; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set4_epi64 + #define _mm512_set4_epi64(d,c,b,a) simde_mm512_set4_epi64(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { + simde__m512_private r_; + + r_.f32[ 0] = a; + r_.f32[ 1] = b; + r_.f32[ 2] = c; + r_.f32[ 3] = d; + r_.f32[ 4] = a; + r_.f32[ 5] = b; + r_.f32[ 6] = c; + r_.f32[ 7] = d; + r_.f32[ 8] = a; + r_.f32[ 9] = b; + r_.f32[10] = c; + r_.f32[11] = d; + r_.f32[12] = a; + r_.f32[13] = b; + r_.f32[14] = c; + r_.f32[15] = d; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set4_ps + #define _mm512_set4_ps(d,c,b,a) simde_mm512_set4_ps(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { + simde__m512d_private r_; + + r_.f64[0] = a; + r_.f64[1] = b; + r_.f64[2] = c; + r_.f64[3] = d; + r_.f64[4] = a; + r_.f64[5] = b; + r_.f64[6] = c; + r_.f64[7] = d; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set4_pd + #define _mm512_set4_pd(d,c,b,a) simde_mm512_set4_pd(d,c,b,a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET4_H) */ +/* :: End simde/x86/avx512/set4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setr.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SETR_H) +#define SIMDE_X86_AVX512_SETR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setr_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + simde__m512i_private r_; + + r_.i32[ 0] = e15; + r_.i32[ 1] = e14; + r_.i32[ 2] = e13; + r_.i32[ 3] = e12; + r_.i32[ 4] = e11; + r_.i32[ 5] = e10; + r_.i32[ 6] = e9; + r_.i32[ 7] = e8; + r_.i32[ 8] = e7; + r_.i32[ 9] = e6; + r_.i32[10] = e5; + r_.i32[11] = e4; + r_.i32[12] = e3; + r_.i32[13] = e2; + r_.i32[14] = e1; + r_.i32[15] = e0; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr_epi32 + #define _mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setr_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + simde__m512i_private r_; + + r_.i64[0] = e7; + r_.i64[1] = e6; + r_.i64[2] = e5; + r_.i64[3] = e4; + r_.i64[4] = e3; + r_.i64[5] = e2; + r_.i64[6] = e1; + r_.i64[7] = e0; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr_epi64 + #define _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setr_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, + simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + simde__m512_private r_; + + r_.f32[ 0] = e15; + r_.f32[ 1] = e14; + r_.f32[ 2] = e13; + r_.f32[ 3] = e12; + r_.f32[ 4] = e11; + r_.f32[ 5] = e10; + r_.f32[ 6] = e9; + r_.f32[ 7] = e8; + r_.f32[ 8] = e7; + r_.f32[ 9] = e6; + r_.f32[10] = e5; + r_.f32[11] = e4; + r_.f32[12] = e3; + r_.f32[13] = e2; + r_.f32[14] = e1; + r_.f32[15] = e0; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr_ps + #define _mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setr_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + simde__m512d_private r_; + + r_.f64[0] = e7; + r_.f64[1] = e6; + r_.f64[2] = e5; + r_.f64[3] = e4; + r_.f64[4] = e3; + r_.f64[5] = e2; + r_.f64[6] = e1; + r_.f64[7] = e0; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr_pd + #define _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETR_H) */ +/* :: End simde/x86/avx512/setr.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setr4.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SETR4_H) +#define SIMDE_X86_AVX512_SETR4_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setr4_epi32 (int32_t d, int32_t c, int32_t b, int32_t a) { + simde__m512i_private r_; + + r_.i32[ 0] = d; + r_.i32[ 1] = c; + r_.i32[ 2] = b; + r_.i32[ 3] = a; + r_.i32[ 4] = d; + r_.i32[ 5] = c; + r_.i32[ 6] = b; + r_.i32[ 7] = a; + r_.i32[ 8] = d; + r_.i32[ 9] = c; + r_.i32[10] = b; + r_.i32[11] = a; + r_.i32[12] = d; + r_.i32[13] = c; + r_.i32[14] = b; + r_.i32[15] = a; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr4_epi32 + #define _mm512_setr4_epi32(d,c,b,a) simde_mm512_setr4_epi32(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setr4_epi64 (int64_t d, int64_t c, int64_t b, int64_t a) { + simde__m512i_private r_; + + r_.i64[0] = d; + r_.i64[1] = c; + r_.i64[2] = b; + r_.i64[3] = a; + r_.i64[4] = d; + r_.i64[5] = c; + r_.i64[6] = b; + r_.i64[7] = a; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr4_epi64 + #define _mm512_setr4_epi64(d,c,b,a) simde_mm512_setr4_epi64(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setr4_ps (simde_float32 d, simde_float32 c, simde_float32 b, simde_float32 a) { + simde__m512_private r_; + + r_.f32[ 0] = d; + r_.f32[ 1] = c; + r_.f32[ 2] = b; + r_.f32[ 3] = a; + r_.f32[ 4] = d; + r_.f32[ 5] = c; + r_.f32[ 6] = b; + r_.f32[ 7] = a; + r_.f32[ 8] = d; + r_.f32[ 9] = c; + r_.f32[10] = b; + r_.f32[11] = a; + r_.f32[12] = d; + r_.f32[13] = c; + r_.f32[14] = b; + r_.f32[15] = a; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr4_ps + #define _mm512_setr4_ps(d,c,b,a) simde_mm512_setr4_ps(d,c,b,a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setr4_pd (simde_float64 d, simde_float64 c, simde_float64 b, simde_float64 a) { + simde__m512d_private r_; + + r_.f64[0] = d; + r_.f64[1] = c; + r_.f64[2] = b; + r_.f64[3] = a; + r_.f64[4] = d; + r_.f64[5] = c; + r_.f64[6] = b; + r_.f64[7] = a; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setr4_pd + #define _mm512_setr4_pd(d,c,b,a) simde_mm512_setr4_pd(d,c,b,a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETR4_H) */ +/* :: End simde/x86/avx512/setr4.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/shldv.h :: */ +#if !defined(SIMDE_X86_AVX512_SHLDV_H) +#define SIMDE_X86_AVX512_SHLDV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shldv_epi32(simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_shldv_epi32(a, b, c); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + uint64x2_t + values_lo = vreinterpretq_u64_u32(vzip1q_u32(b_.neon_u32, a_.neon_u32)), + values_hi = vreinterpretq_u64_u32(vzip2q_u32(b_.neon_u32, a_.neon_u32)); + + int32x4_t count = vandq_s32(c_.neon_i32, vdupq_n_s32(31)); + + values_lo = vshlq_u64(values_lo, vmovl_s32(vget_low_s32(count))); + values_hi = vshlq_u64(values_hi, vmovl_high_s32(count)); + + r_.neon_u32 = + vuzp2q_u32( + vreinterpretq_u32_u64(values_lo), + vreinterpretq_u32_u64(values_hi) + ); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i + tmp1, + lo = + simde_mm256_castps_si256( + simde_mm256_unpacklo_ps( + simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(b)), + simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(a)) + ) + ), + hi = + simde_mm256_castps_si256( + simde_mm256_unpackhi_ps( + simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(b)), + simde_mm256_castsi256_ps(simde_mm256_castsi128_si256(a)) + ) + ), + tmp2 = + simde_mm256_castpd_si256( + simde_mm256_permute2f128_pd( + simde_mm256_castsi256_pd(lo), + simde_mm256_castsi256_pd(hi), + 32 + ) + ); + + tmp2 = + simde_mm256_sllv_epi64( + tmp2, + simde_mm256_cvtepi32_epi64( + simde_mm_and_si128( + c, + simde_mm_set1_epi32(31) + ) + ) + ); + + tmp1 = + simde_mm256_castpd_si256( + simde_mm256_permute2f128_pd( + simde_mm256_castsi256_pd(tmp2), + simde_mm256_castsi256_pd(tmp2), + 1 + ) + ); + + r_ = + simde__m128i_to_private( + simde_mm256_castsi256_si128( + simde_mm256_castps_si256( + simde_mm256_shuffle_ps( + simde_mm256_castsi256_ps(tmp2), + simde_mm256_castsi256_ps(tmp1), + 221 + ) + ) + ) + ); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde__m128i_private + c_ = simde__m128i_to_private(c), + lo = simde__m128i_to_private(simde_mm_unpacklo_epi32(b, a)), + hi = simde__m128i_to_private(simde_mm_unpackhi_epi32(b, a)); + + size_t halfway = (sizeof(r_.u32) / sizeof(r_.u32[0]) / 2); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway ; i++) { + lo.u64[i] <<= (c_.u32[i] & 31); + hi.u64[i] <<= (c_.u32[halfway + i] & 31); + } + + r_ = + simde__m128i_to_private( + simde_mm_castps_si128( + simde_mm_shuffle_ps( + simde_mm_castsi128_ps(simde__m128i_from_private(lo)), + simde_mm_castsi128_ps(simde__m128i_from_private(hi)), + 221) + ) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_CONVERT_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + simde__m128i_private + c_ = simde__m128i_to_private(c); + simde__m256i_private + a_ = simde__m256i_to_private(simde_mm256_castsi128_si256(a)), + b_ = simde__m256i_to_private(simde_mm256_castsi128_si256(b)), + tmp1, + tmp2; + + tmp1.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp1.u64), SIMDE_SHUFFLE_VECTOR_(32, 32, b_.i32, a_.i32, 0, 8, 1, 9, 2, 10, 3, 11)); + SIMDE_CONVERT_VECTOR_(tmp2.u64, c_.u32); + + tmp1.u64 <<= (tmp2.u64 & 31); + + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, tmp1.m128i_private[0].i32, tmp1.m128i_private[1].i32, 1, 3, 5, 7); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (((HEDLEY_STATIC_CAST(uint64_t, a_.u32[i]) << 32) | b_.u32[i]) << (c_.u32[i] & 31)) >> 32); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_shldv_epi32 + #define _mm_shldv_epi32(a, b, c) simde_mm_shldv_epi32(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SHLDV_H) */ +/* :: End simde/x86/avx512/shldv.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sll.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SLL_H) +#define SIMDE_X86_AVX512_SLL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sll_epi16 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sll_epi16(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sll_epi16(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sll_epi16 + #define _mm512_sll_epi16(a, count) simde_mm512_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sll_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX51BW_NATIVE) + return _mm512_mask_sll_epi16(src, k, a, count); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_sll_epi16(a, count)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sll_epi16 + #define _mm512_mask_sll_epi16(src, k, a, count) simde_mm512_mask_sll_epi16(src, k, a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sll_epi16 (simde__mmask32 k, simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_sll_epi16(k, a, count); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_sll_epi16(a, count)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sll_epi16 + #define _mm512_maskz_sll_epi16(src, k, a, count) simde_mm512_maskz_sll_epi16(src, k, a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sll_epi32 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sll_epi32(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sll_epi32(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sll_epi32 + #define _mm512_sll_epi32(a, count) simde_mm512_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sll_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sll_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sll_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sll_epi32 + #define _mm512_mask_sll_epi32(src, k, a, b) simde_mm512_mask_sll_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sll_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sll_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_sll_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sll_epi32 + #define _mm512_maskz_sll_epi32(k, a, b) simde_mm512_maskz_sll_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sll_epi64 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sll_epi64(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sll_epi64(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sll_epi64 + #define _mm512_sll_epi64(a, count) simde_mm512_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sll_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sll_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sll_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sll_epi64 + #define _mm512_mask_sll_epi64(src, k, a, b) simde_mm512_mask_sll_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sll_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sll_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_sll_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sll_epi64 + #define _mm512_maskz_sll_epi64(k, a, b) simde_mm512_maskz_sll_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SLL_H) */ +/* :: End simde/x86/avx512/sll.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sra.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRA_H) +#define SIMDE_X86_AVX512_SRA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sra_epi16 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sra_epi16(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sra_epi16(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sra_epi16 + #define _mm512_sra_epi16(a, count) simde_mm512_sra_epi16(a, count) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRA_H) */ +/* :: End simde/x86/avx512/sra.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srai.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRAI_H) +#define SIMDE_X86_AVX512_SRAI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srai_epi16 (simde__m512i a, const int imm8) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) +# define simde_mm512_srai_epi16(a, imm8) _mm512_srai_epi16(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srai_epi16 + #define _mm512_srai_epi16(a, imm8) simde_mm512_srai_epi16(a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRAI_H) */ +/* :: End simde/x86/avx512/srai.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srav.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRAV_H) +#define SIMDE_X86_AVX512_SRAV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srav_epi16 (simde__m512i a, simde__m512i count) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_srav_epi16(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + count_ = simde__m512i_to_private(count); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i16[i]); + if (shift > 16) shift = 15; + r_.i16[i] = a_.i16[i] >> shift; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srav_epi16 + #define _mm512_srav_epi16(a, count) simde_mm512_srav_epi16(a, count) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRAV_H) */ +/* :: End simde/x86/avx512/srav.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srl.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRL_H) +#define SIMDE_X86_AVX512_SRL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srl_epi16 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_srl_epi16(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srl_epi16(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 15) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.i64[0]; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.i64[0]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srl_epi16 + #define _mm512_srl_epi16(a, count) simde_mm512_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srl_epi32 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_srl_epi32(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srl_epi32(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 31) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.i64[0]; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.i64[0]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srl_epi32 + #define _mm512_srl_epi32(a, count) simde_mm512_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_srl_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_srl_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_srl_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_srl_epi32 + #define _mm512_mask_srl_epi32(src, k, a, b) simde_mm512_mask_srl_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_srl_epi32(simde__mmask16 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_srl_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_srl_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_srl_epi32 + #define _mm512_maskz_srl_epi32(k, a, b) simde_mm512_maskz_srl_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srl_epi64 (simde__m512i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_srl_epi64(a, count); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_srl_epi64(a_.m256i[i], count); + } + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + if (HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]) > 63) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count_.i64[0]; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> count_.i64[0]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srl_epi64 + #define _mm512_srl_epi64(a, count) simde_mm512_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_srl_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_srl_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_srl_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_srl_epi64 + #define _mm512_mask_srl_epi64(src, k, a, b) simde_mm512_mask_srl_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_srl_epi64(simde__mmask8 k, simde__m512i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_srl_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_srl_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_srl_epi64 + #define _mm512_maskz_srl_epi64(k, a, b) simde_mm512_maskz_srl_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRL_H) */ +/* :: End simde/x86/avx512/srl.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/store.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_STORE_H) +#define SIMDE_X86_AVX512_STORE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_store_ps (void * mem_addr, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_store_ps + #define _mm512_store_ps(mem_addr, a) simde_mm512_store_ps(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_store_pd (void * mem_addr, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_store_pd + #define _mm512_store_pd(mem_addr, a) simde_mm512_store_pd(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_store_si512 (void * mem_addr, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_store_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), &a, sizeof(a)); + #endif +} +#define simde_mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) +#define simde_mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) +#define simde_mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) +#define simde_mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_store_epi8 + #undef _mm512_store_epi16 + #undef _mm512_store_epi32 + #undef _mm512_store_epi64 + #undef _mm512_store_si512 + #define _mm512_store_si512(mem_addr, a) simde_mm512_store_si512(mem_addr, a) + #define _mm512_store_epi8(mem_addr, a) simde_mm512_store_si512(mem_addr, a) + #define _mm512_store_epi16(mem_addr, a) simde_mm512_store_si512(mem_addr, a) + #define _mm512_store_epi32(mem_addr, a) simde_mm512_store_si512(mem_addr, a) + #define _mm512_store_epi64(mem_addr, a) simde_mm512_store_si512(mem_addr, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_STORE_H) */ +/* :: End simde/x86/avx512/store.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/storeu.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_STOREU_H) +#define SIMDE_X86_AVX512_STOREU_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#define simde_mm256_storeu_epi8(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#define simde_mm256_storeu_epi16(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#define simde_mm256_storeu_epi32(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#define simde_mm256_storeu_epi64(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_epi8 + #undef _mm256_storeu_epi16 + #define _mm256_storeu_epi8(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi16(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_epi32 + #undef _mm256_storeu_epi64 + #define _mm256_storeu_epi32(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi64(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_mask_storeu_epi16 (void * mem_addr, simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + _mm256_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); + #else + const simde__m256i zero = simde_mm256_setzero_si256(); + simde_mm256_storeu_epi16(mem_addr, simde_mm256_mask_mov_epi16(zero, k, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_storeu_epi16 + #define _mm256_mask_storeu_epi16(mem_addr, k, a) simde_mm256_mask_storeu_epi16(mem_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_storeu_ps (void * mem_addr, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_storeu_ps + #define _mm512_storeu_ps(mem_addr, a) simde_mm512_storeu_ps(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_storeu_pd (void * mem_addr, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_storeu_pd + #define _mm512_storeu_pd(mem_addr, a) simde_mm512_storeu_pd(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_storeu_si512 (void * mem_addr, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_storeu_si512(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#define simde_mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#define simde_mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#define simde_mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#define simde_mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_storeu_epi8 + #undef _mm512_storeu_epi16 + #define _mm512_storeu_epi16(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) + #define _mm512_storeu_epi8(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_storeu_epi32 + #undef _mm512_storeu_epi64 + #undef _mm512_storeu_si512 + #define _mm512_storeu_si512(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) + #define _mm512_storeu_epi32(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) + #define _mm512_storeu_epi64(mem_addr, a) simde_mm512_storeu_si512(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_storeu_epi16 (void * mem_addr, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + _mm512_mask_storeu_epi16(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); + #else + const simde__m512i zero = simde_mm512_setzero_si512(); + simde_mm512_storeu_epi16(mem_addr, simde_mm512_mask_mov_epi16(zero, k, a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_storeu_epi16 + #define _mm512_mask_storeu_epi16(mem_addr, k, a) simde_mm512_mask_storeu_epi16(mem_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_storeu_ps (void * mem_addr, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_mask_storeu_ps(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); + #else + const simde__m512 zero = simde_mm512_setzero_ps(); + simde_mm512_storeu_ps(mem_addr, simde_mm512_mask_mov_ps(zero, k, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_storeu_ps + #define _mm512_mask_storeu_ps(mem_addr, k, a) simde_mm512_mask_storeu_ps(mem_addr, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm512_mask_storeu_pd (void * mem_addr, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + _mm512_mask_storeu_pd(HEDLEY_REINTERPRET_CAST(void*, mem_addr), k, a); + #else + const simde__m512d zero = simde_mm512_setzero_pd(); + simde_mm512_storeu_pd(mem_addr, simde_mm512_mask_mov_pd(zero, k, a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_storeu_pd + #define _mm512_mask_storeu_pd(mem_addr, k, a) simde_mm512_mask_storeu_pd(mem_addr, k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_STOREU_H) */ +/* :: End simde/x86/avx512/storeu.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/subs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SUBS_H) +#define SIMDE_X86_AVX512_SUBS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_subs_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_subs_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_subs_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_subs_epi8 + #define _mm512_subs_epi8(a, b) simde_mm512_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_subs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_subs_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_subs_epi8 + #define _mm512_mask_subs_epi8(src, k, a, b) simde_mm512_mask_subs_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_subs_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_subs_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_subs_epi8 + #define _mm512_maskz_subs_epi8(k, a, b) simde_mm512_maskz_subs_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_subs_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_subs_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_subs_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_subs_epi16 + #define _mm512_subs_epi16(a, b) simde_mm512_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_subs_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_subs_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_subs_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_subs_epu8 + #define _mm512_subs_epu8(a, b) simde_mm512_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_subs_epu8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_subs_epu8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_subs_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_subs_epu8 + #define _mm512_mask_subs_epu8(src, k, a, b) simde_mm512_mask_subs_epu8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_subs_epu8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_subs_epu8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_subs_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_subs_epu8 + #define _mm512_maskz_subs_epu8(k, a, b) simde_mm512_maskz_subs_epu8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_subs_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_subs_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(HEDLEY_INTEL_VERSION) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_subs_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_subs_epu16 + #define _mm512_subs_epu16(a, b) simde_mm512_subs_epu16(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SUBS_H) */ +/* :: End simde/x86/avx512/subs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/ternarylogic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Kunwar Maheep Singh + * 2021 Christopher Moore + */ + +/* The ternarylogic implementation is based on Wojciech Muła's work at + * https://github.com/WojciechMula/ternary-logic */ + +#if !defined(SIMDE_X86_AVX512_TERNARYLOGIC_H) +#define SIMDE_X86_AVX512_TERNARYLOGIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x00_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, b); + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t c0 = 0; + return c0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x01_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x02_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x03_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x04_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x05_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c | a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x06_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x07_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x08_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = t1 & c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x09_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = c & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 | c; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 | b; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b | c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x0f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x10_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x11_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c | b; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x12_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x13_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = b | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x14_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x15_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = c | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x16_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & t1; + const uint_fast32_t t3 = ~a; + const uint_fast32_t t4 = b ^ c; + const uint_fast32_t t5 = t3 & t4; + const uint_fast32_t t6 = t2 | t5; + return t6; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x17_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = b & c; + const uint_fast32_t t2 = (a & t0) | (~a & t1); + const uint_fast32_t t3 = ~t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x18_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x19_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = b & c; + const uint_fast32_t t2 = a & t1; + const uint_fast32_t t3 = t0 ^ t2; + const uint_fast32_t t4 = ~t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 | c; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ b; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 | b; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x1f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x20_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 & c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x21_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = b | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x22_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = c & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x23_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 | c; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x24_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x25_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = a ^ t2; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x26_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x27_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 | c; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x28_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = c & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x29_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = b ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 & t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c & t1; + const uint_fast32_t t3 = ~c; + const uint_fast32_t t4 = b | a; + const uint_fast32_t t5 = ~t4; + const uint_fast32_t t6 = t3 & t5; + const uint_fast32_t t7 = t2 | t6; + return t7; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b | t0; + const uint_fast32_t t2 = a ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a & b; + const uint_fast32_t t2 = t0 ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x2f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 & c; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x30_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x31_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 | a; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x32_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a | c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x33_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~b; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x34_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ b; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x35_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 | a; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x36_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = b ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x37_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = b & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x38_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x39_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = b ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a & t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 & c; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 & c; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b ^ a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = a | c; + const uint_fast32_t t2 = ~t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t t2 = a ^ b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x3f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x40_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 & b; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x41_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = c | t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x42_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x43_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = a ^ t2; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x44_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x45_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 | b; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x46_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x47_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 | b; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x48_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = b & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x49_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | b; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = b ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 & t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = a ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & t1; + const uint_fast32_t t3 = ~b; + const uint_fast32_t t4 = a | c; + const uint_fast32_t t5 = ~t4; + const uint_fast32_t t6 = t3 & t5; + const uint_fast32_t t7 = t2 | t6; + return t7; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = c & t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 & b; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x4f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = b & t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x50_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x51_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 | a; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x52_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x53_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 | a; + const uint_fast32_t t3 = t0 ^ t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x54_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a | b; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x55_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = ~c; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x56_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = c ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x57_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = c & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x58_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x59_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = c ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c ^ a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a & t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 & b; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 & b; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x5f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c & a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x60_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x61_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = a ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 & t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x62_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x63_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = b ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x64_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | b; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x65_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | b; + const uint_fast32_t t2 = c ^ t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x66_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c ^ b; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x67_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a | b; + const uint_fast32_t t2 = ~t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x68_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a & t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = b & c; + const uint_fast32_t t4 = t2 & t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x69_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = c ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); + const uint_fast32_t t2 = a ^ c1; + const uint_fast32_t t3 = b ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = b ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); + const uint_fast32_t t2 = a ^ c1; + const uint_fast32_t t3 = b ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x6f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x70_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x71_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = a & t2; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x72_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = c & t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 & a; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x73_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = a & t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x74_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b & t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 & a; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x75_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = a & t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x76_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x77_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c & b; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x78_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x79_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); + const uint_fast32_t t2 = b ^ c1; + const uint_fast32_t t3 = a ^ c; + const uint_fast32_t t4 = t2 ^ t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = a ^ b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x7f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); + const uint_fast32_t t2 = t1 ^ c1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x80_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x81_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = a ^ t2; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x82_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x83_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 | c; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x84_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x85_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 | b; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x86_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = c ^ t1; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x87_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x88_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c & b; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x89_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 | b; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | b; + const uint_fast32_t t2 = c & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | b; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 | c; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = b & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 | b; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 | c; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = t1 & t2; + const uint_fast32_t t4 = t0 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x8f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b & c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x90_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x91_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 | a; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x92_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = c ^ t1; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x93_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = b ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x94_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = b ^ t1; + const uint_fast32_t t3 = t0 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x95_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = c ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x96_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a ^ t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x97_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = t1 ^ a; + const uint_fast32_t t3 = b ^ c; + const uint_fast32_t t4 = a ^ t3; + const uint_fast32_t t5 = t2 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x98_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | b; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x99_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c ^ b; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9a_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 ^ c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9b_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 & c; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9c_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 ^ b; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9d_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 & b; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9e_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = c ^ t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0x9f_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c & a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 | a; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a | t0; + const uint_fast32_t t2 = c & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 | c; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | b; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c ^ a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = t1 ^ c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 & c; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | b; + const uint_fast32_t t1 = c & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xa9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = c ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xaa_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, b); + return c; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xab_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xac_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 & b; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xad_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xae_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = t1 | c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xaf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = c | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = a & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 | c; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = b & t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = a | c; + const uint_fast32_t t4 = t2 & t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a & c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = t1 ^ a; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~b; + const uint_fast32_t t3 = t2 & a; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = c ^ t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = b & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 & a; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xb9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xba_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 | c; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xbb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = c | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xbc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = a ^ b; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xbd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xbe_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = c | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xbf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b & a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 | a; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | c; + const uint_fast32_t t3 = t1 & t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = ~t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = b & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 | a; + const uint_fast32_t t2 = ~a; + const uint_fast32_t t3 = t2 | b; + const uint_fast32_t t4 = t1 & t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t t2 = t1 ^ b; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 & b; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = b & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xc9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = b ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xca_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 & c; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xcb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b & c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xcc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, c); + return b; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xcd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xce_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t t2 = t1 | b; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xcf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b | t0; + const uint_fast32_t t2 = a & t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t t2 = t1 ^ a; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = ~c; + const uint_fast32_t t3 = t2 & a; + const uint_fast32_t t4 = t1 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b & t0; + const uint_fast32_t t2 = b ^ c; + const uint_fast32_t t3 = ~t2; + const uint_fast32_t t4 = a & t3; + const uint_fast32_t t5 = t1 | t4; + return t5; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a & b; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = b ^ t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = c & t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = c & b; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 & a; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xd9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xda_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = a ^ c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xdb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a ^ c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xdc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & a; + const uint_fast32_t t2 = t1 | b; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xdd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = b | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xde_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = b | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xdf_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a & t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a ^ t0; + const uint_fast32_t t2 = ~t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = ~b; + const uint_fast32_t t2 = t1 & c; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ b; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & c; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = c & a; + const uint_fast32_t t1 = ~c; + const uint_fast32_t t2 = t1 & b; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a & b; + const uint_fast32_t t3 = t1 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & b; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~a; + const uint_fast32_t t2 = t1 ^ c; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = a & t1; + const uint_fast32_t t3 = t0 | t2; + return t3; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xe9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b ^ c; + const uint_fast32_t t2 = t0 ^ t1; + const uint_fast32_t t3 = a & b; + const uint_fast32_t t4 = t2 | t3; + return t4; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xea_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & a; + const uint_fast32_t t1 = c | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xeb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ a; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = c | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xec_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a & c; + const uint_fast32_t t1 = b | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xed_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = a ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = b | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xee_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + const uint_fast32_t t0 = c | b; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xef_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~a; + const uint_fast32_t t1 = b | c; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf0_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + HEDLEY_STATIC_CAST(void, c); + return a; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf1_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf2_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 & c; + const uint_fast32_t t2 = t1 | a; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf3_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = a | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf4_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = t0 & b; + const uint_fast32_t t2 = t1 | a; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf5_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf6_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = a | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf7_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf8_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b & c; + const uint_fast32_t t1 = a | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xf9_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b ^ c; + const uint_fast32_t t1 = ~t0; + const uint_fast32_t t2 = a | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xfa_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, b); + const uint_fast32_t t0 = c | a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xfb_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~b; + const uint_fast32_t t1 = t0 | c; + const uint_fast32_t t2 = a | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xfc_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t t0 = b | a; + return t0; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xfd_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = ~c; + const uint_fast32_t t1 = a | b; + const uint_fast32_t t2 = t0 | t1; + return t2; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xfe_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + const uint_fast32_t t0 = b | c; + const uint_fast32_t t1 = a | t0; + return t1; +} + +SIMDE_FUNCTION_ATTRIBUTES +uint_fast32_t +simde_x_ternarylogic_0xff_impl_(uint_fast32_t a, uint_fast32_t b, uint_fast32_t c) { + HEDLEY_STATIC_CAST(void, a); + HEDLEY_STATIC_CAST(void, b); + HEDLEY_STATIC_CAST(void, c); + const uint_fast32_t c1 = ~HEDLEY_STATIC_CAST(uint_fast32_t, 0); + return c1; +} + +#define SIMDE_X_TERNARYLOGIC_CASE(value) \ + case value: \ + SIMDE_VECTORIZE \ + for (size_t i = 0 ; i < (sizeof(r_.u32f) / sizeof(r_.u32f[0])) ; i++) { \ + r_.u32f[i] = HEDLEY_CONCAT3(simde_x_ternarylogic_, value, _impl_)(a_.u32f[i], b_.u32f[i], c_.u32f[i]); \ + } \ + break; + +#define SIMDE_X_TERNARYLOGIC_SWITCH(value) \ + switch(value) { \ + SIMDE_X_TERNARYLOGIC_CASE(0x00) \ + SIMDE_X_TERNARYLOGIC_CASE(0x01) \ + SIMDE_X_TERNARYLOGIC_CASE(0x02) \ + SIMDE_X_TERNARYLOGIC_CASE(0x03) \ + SIMDE_X_TERNARYLOGIC_CASE(0x04) \ + SIMDE_X_TERNARYLOGIC_CASE(0x05) \ + SIMDE_X_TERNARYLOGIC_CASE(0x06) \ + SIMDE_X_TERNARYLOGIC_CASE(0x07) \ + SIMDE_X_TERNARYLOGIC_CASE(0x08) \ + SIMDE_X_TERNARYLOGIC_CASE(0x09) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x0f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x10) \ + SIMDE_X_TERNARYLOGIC_CASE(0x11) \ + SIMDE_X_TERNARYLOGIC_CASE(0x12) \ + SIMDE_X_TERNARYLOGIC_CASE(0x13) \ + SIMDE_X_TERNARYLOGIC_CASE(0x14) \ + SIMDE_X_TERNARYLOGIC_CASE(0x15) \ + SIMDE_X_TERNARYLOGIC_CASE(0x16) \ + SIMDE_X_TERNARYLOGIC_CASE(0x17) \ + SIMDE_X_TERNARYLOGIC_CASE(0x18) \ + SIMDE_X_TERNARYLOGIC_CASE(0x19) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x1f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x20) \ + SIMDE_X_TERNARYLOGIC_CASE(0x21) \ + SIMDE_X_TERNARYLOGIC_CASE(0x22) \ + SIMDE_X_TERNARYLOGIC_CASE(0x23) \ + SIMDE_X_TERNARYLOGIC_CASE(0x24) \ + SIMDE_X_TERNARYLOGIC_CASE(0x25) \ + SIMDE_X_TERNARYLOGIC_CASE(0x26) \ + SIMDE_X_TERNARYLOGIC_CASE(0x27) \ + SIMDE_X_TERNARYLOGIC_CASE(0x28) \ + SIMDE_X_TERNARYLOGIC_CASE(0x29) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x2f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x30) \ + SIMDE_X_TERNARYLOGIC_CASE(0x31) \ + SIMDE_X_TERNARYLOGIC_CASE(0x32) \ + SIMDE_X_TERNARYLOGIC_CASE(0x33) \ + SIMDE_X_TERNARYLOGIC_CASE(0x34) \ + SIMDE_X_TERNARYLOGIC_CASE(0x35) \ + SIMDE_X_TERNARYLOGIC_CASE(0x36) \ + SIMDE_X_TERNARYLOGIC_CASE(0x37) \ + SIMDE_X_TERNARYLOGIC_CASE(0x38) \ + SIMDE_X_TERNARYLOGIC_CASE(0x39) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x3f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x40) \ + SIMDE_X_TERNARYLOGIC_CASE(0x41) \ + SIMDE_X_TERNARYLOGIC_CASE(0x42) \ + SIMDE_X_TERNARYLOGIC_CASE(0x43) \ + SIMDE_X_TERNARYLOGIC_CASE(0x44) \ + SIMDE_X_TERNARYLOGIC_CASE(0x45) \ + SIMDE_X_TERNARYLOGIC_CASE(0x46) \ + SIMDE_X_TERNARYLOGIC_CASE(0x47) \ + SIMDE_X_TERNARYLOGIC_CASE(0x48) \ + SIMDE_X_TERNARYLOGIC_CASE(0x49) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x4f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x50) \ + SIMDE_X_TERNARYLOGIC_CASE(0x51) \ + SIMDE_X_TERNARYLOGIC_CASE(0x52) \ + SIMDE_X_TERNARYLOGIC_CASE(0x53) \ + SIMDE_X_TERNARYLOGIC_CASE(0x54) \ + SIMDE_X_TERNARYLOGIC_CASE(0x55) \ + SIMDE_X_TERNARYLOGIC_CASE(0x56) \ + SIMDE_X_TERNARYLOGIC_CASE(0x57) \ + SIMDE_X_TERNARYLOGIC_CASE(0x58) \ + SIMDE_X_TERNARYLOGIC_CASE(0x59) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x5f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x60) \ + SIMDE_X_TERNARYLOGIC_CASE(0x61) \ + SIMDE_X_TERNARYLOGIC_CASE(0x62) \ + SIMDE_X_TERNARYLOGIC_CASE(0x63) \ + SIMDE_X_TERNARYLOGIC_CASE(0x64) \ + SIMDE_X_TERNARYLOGIC_CASE(0x65) \ + SIMDE_X_TERNARYLOGIC_CASE(0x66) \ + SIMDE_X_TERNARYLOGIC_CASE(0x67) \ + SIMDE_X_TERNARYLOGIC_CASE(0x68) \ + SIMDE_X_TERNARYLOGIC_CASE(0x69) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x6f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x70) \ + SIMDE_X_TERNARYLOGIC_CASE(0x71) \ + SIMDE_X_TERNARYLOGIC_CASE(0x72) \ + SIMDE_X_TERNARYLOGIC_CASE(0x73) \ + SIMDE_X_TERNARYLOGIC_CASE(0x74) \ + SIMDE_X_TERNARYLOGIC_CASE(0x75) \ + SIMDE_X_TERNARYLOGIC_CASE(0x76) \ + SIMDE_X_TERNARYLOGIC_CASE(0x77) \ + SIMDE_X_TERNARYLOGIC_CASE(0x78) \ + SIMDE_X_TERNARYLOGIC_CASE(0x79) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x7f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x80) \ + SIMDE_X_TERNARYLOGIC_CASE(0x81) \ + SIMDE_X_TERNARYLOGIC_CASE(0x82) \ + SIMDE_X_TERNARYLOGIC_CASE(0x83) \ + SIMDE_X_TERNARYLOGIC_CASE(0x84) \ + SIMDE_X_TERNARYLOGIC_CASE(0x85) \ + SIMDE_X_TERNARYLOGIC_CASE(0x86) \ + SIMDE_X_TERNARYLOGIC_CASE(0x87) \ + SIMDE_X_TERNARYLOGIC_CASE(0x88) \ + SIMDE_X_TERNARYLOGIC_CASE(0x89) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x8f) \ + SIMDE_X_TERNARYLOGIC_CASE(0x90) \ + SIMDE_X_TERNARYLOGIC_CASE(0x91) \ + SIMDE_X_TERNARYLOGIC_CASE(0x92) \ + SIMDE_X_TERNARYLOGIC_CASE(0x93) \ + SIMDE_X_TERNARYLOGIC_CASE(0x94) \ + SIMDE_X_TERNARYLOGIC_CASE(0x95) \ + SIMDE_X_TERNARYLOGIC_CASE(0x96) \ + SIMDE_X_TERNARYLOGIC_CASE(0x97) \ + SIMDE_X_TERNARYLOGIC_CASE(0x98) \ + SIMDE_X_TERNARYLOGIC_CASE(0x99) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9a) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9b) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9c) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9d) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9e) \ + SIMDE_X_TERNARYLOGIC_CASE(0x9f) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xa9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xaa) \ + SIMDE_X_TERNARYLOGIC_CASE(0xab) \ + SIMDE_X_TERNARYLOGIC_CASE(0xac) \ + SIMDE_X_TERNARYLOGIC_CASE(0xad) \ + SIMDE_X_TERNARYLOGIC_CASE(0xae) \ + SIMDE_X_TERNARYLOGIC_CASE(0xaf) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xb9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xba) \ + SIMDE_X_TERNARYLOGIC_CASE(0xbb) \ + SIMDE_X_TERNARYLOGIC_CASE(0xbc) \ + SIMDE_X_TERNARYLOGIC_CASE(0xbd) \ + SIMDE_X_TERNARYLOGIC_CASE(0xbe) \ + SIMDE_X_TERNARYLOGIC_CASE(0xbf) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xc9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xca) \ + SIMDE_X_TERNARYLOGIC_CASE(0xcb) \ + SIMDE_X_TERNARYLOGIC_CASE(0xcc) \ + SIMDE_X_TERNARYLOGIC_CASE(0xcd) \ + SIMDE_X_TERNARYLOGIC_CASE(0xce) \ + SIMDE_X_TERNARYLOGIC_CASE(0xcf) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xd9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xda) \ + SIMDE_X_TERNARYLOGIC_CASE(0xdb) \ + SIMDE_X_TERNARYLOGIC_CASE(0xdc) \ + SIMDE_X_TERNARYLOGIC_CASE(0xdd) \ + SIMDE_X_TERNARYLOGIC_CASE(0xde) \ + SIMDE_X_TERNARYLOGIC_CASE(0xdf) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xe9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xea) \ + SIMDE_X_TERNARYLOGIC_CASE(0xeb) \ + SIMDE_X_TERNARYLOGIC_CASE(0xec) \ + SIMDE_X_TERNARYLOGIC_CASE(0xed) \ + SIMDE_X_TERNARYLOGIC_CASE(0xee) \ + SIMDE_X_TERNARYLOGIC_CASE(0xef) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf0) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf1) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf2) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf3) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf4) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf5) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf6) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf7) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf8) \ + SIMDE_X_TERNARYLOGIC_CASE(0xf9) \ + SIMDE_X_TERNARYLOGIC_CASE(0xfa) \ + SIMDE_X_TERNARYLOGIC_CASE(0xfb) \ + SIMDE_X_TERNARYLOGIC_CASE(0xfc) \ + SIMDE_X_TERNARYLOGIC_CASE(0xfd) \ + SIMDE_X_TERNARYLOGIC_CASE(0xfe) \ + SIMDE_X_TERNARYLOGIC_CASE(0xff) \ + } + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_ternarylogic_epi32(a, b, c, imm8) _mm_ternarylogic_epi32(a, b, c, imm8) +#else + SIMDE_HUGE_FUNCTION_ATTRIBUTES + simde__m128i + simde_mm_ternarylogic_epi32(simde__m128i a, simde__m128i b, simde__m128i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) + int to_do, mask; + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128i_private t_; + to_do = imm8; + + r_.u64 = a_.u64 ^ a_.u64; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64 = ~r_.u64; + to_do &= ~mask; + } + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 = a_.u64; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= c_.u64; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ b_.u64; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & a_.u64; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & b_.u64; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64 & c_.u64; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t_.u64 = a_.u64 & b_.u64; + if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; + else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x30) { + t_.u64 = ~b_.u64 & a_.u64; + if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; + else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x0c) { + t_.u64 = ~a_.u64 & b_.u64; + if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; + else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x03) { + t_.u64 = ~(a_.u64 | b_.u64); + if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; + else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + #else + uint64_t t; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + to_do = imm8; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64[i] = UINT64_MAX; + to_do &= ~mask; + } + else r_.u64[i] = 0; + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] = a_.u64[i]; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i]; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t = a_.u64[i] & b_.u64[i]; + if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; + else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x30) { + t = ~b_.u64[i] & a_.u64[i]; + if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; + else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x0c) { + t = ~a_.u64[i] & b_.u64[i]; + if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; + else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x03) { + t = ~(a_.u64[i] | b_.u64[i]); + if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; + else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + } + #endif + #else + SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) + #endif + + return simde__m128i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_ternarylogic_epi32 + #define _mm_ternarylogic_epi32(a, b, c, imm8) simde_mm_ternarylogic_epi32(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm_mask_ternarylogic_epi32(src, k, a, b, imm8) +#else + #define simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm_ternarylogic_epi32(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_ternarylogic_epi32 + #define _mm_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm_mask_ternarylogic_epi32(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#else + #define simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm_maskz_mov_epi32(k, simde_mm_ternarylogic_epi32(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_ternarylogic_epi32 + #define _mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm256_ternarylogic_epi32(a, b, c, imm8) _mm256_ternarylogic_epi32(a, b, c, imm8) +#else + SIMDE_HUGE_FUNCTION_ATTRIBUTES + simde__m256i + simde_mm256_ternarylogic_epi32(simde__m256i a, simde__m256i b, simde__m256i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + c_ = simde__m256i_to_private(c); + + #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) + int to_do, mask; + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m256i_private t_; + to_do = imm8; + + r_.u64 = a_.u64 ^ a_.u64; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64 = ~r_.u64; + to_do &= ~mask; + } + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 = a_.u64; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= c_.u64; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ b_.u64; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & a_.u64; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & b_.u64; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64 & c_.u64; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t_.u64 = a_.u64 & b_.u64; + if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; + else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x30) { + t_.u64 = ~b_.u64 & a_.u64; + if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; + else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x0c) { + t_.u64 = ~a_.u64 & b_.u64; + if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; + else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x03) { + t_.u64 = ~(a_.u64 | b_.u64); + if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; + else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + #else + uint64_t t; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + to_do = imm8; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64[i] = UINT64_MAX; + to_do &= ~mask; + } + else r_.u64[i] = 0; + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] = a_.u64[i]; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i]; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t = a_.u64[i] & b_.u64[i]; + if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; + else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x30) { + t = ~b_.u64[i] & a_.u64[i]; + if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; + else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x0c) { + t = ~a_.u64[i] & b_.u64[i]; + if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; + else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x03) { + t = ~(a_.u64[i] | b_.u64[i]); + if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; + else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + } + #endif + #else + SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) + #endif + + return simde__m256i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_ternarylogic_epi32 + #define _mm256_ternarylogic_epi32(a, b, c, imm8) simde_mm256_ternarylogic_epi32(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) +#else + #define simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_ternarylogic_epi32(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_ternarylogic_epi32 + #define _mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm256_mask_ternarylogic_epi32(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#else + #define simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_ternarylogic_epi32(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_ternarylogic_epi32 + #define _mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm256_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_ternarylogic_epi32(a, b, c, imm8) _mm512_ternarylogic_epi32(a, b, c, imm8) +#else + SIMDE_HUGE_FUNCTION_ATTRIBUTES + simde__m512i + simde_mm512_ternarylogic_epi32(simde__m512i a, simde__m512i b, simde__m512i c, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + c_ = simde__m512i_to_private(c); + + #if defined(SIMDE_TERNARYLOGIC_COMPRESSION) + int to_do, mask; + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private t_; + to_do = imm8; + + r_.u64 = a_.u64 ^ a_.u64; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64 = ~r_.u64; + to_do &= ~mask; + } + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 = a_.u64; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= c_.u64; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ b_.u64; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 ^ c_.u64; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & a_.u64; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~a_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= b_.u64 & c_.u64; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~c_.u64 & b_.u64; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64 |= ~b_.u64 & c_.u64; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t_.u64 = a_.u64 & b_.u64; + if ((to_do & 0xc0) == 0xc0) r_.u64 |= t_.u64; + else if (to_do & 0x80) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x30) { + t_.u64 = ~b_.u64 & a_.u64; + if ((to_do & 0x30) == 0x30) r_.u64 |= t_.u64; + else if (to_do & 0x20) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x0c) { + t_.u64 = ~a_.u64 & b_.u64; + if ((to_do & 0x0c) == 0x0c) r_.u64 |= t_.u64; + else if (to_do & 0x08) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + + if (to_do & 0x03) { + t_.u64 = ~(a_.u64 | b_.u64); + if ((to_do & 0x03) == 0x03) r_.u64 |= t_.u64; + else if (to_do & 0x02) r_.u64 |= c_.u64 & t_.u64; + else r_.u64 |= ~c_.u64 & t_.u64; + } + #else + uint64_t t; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + to_do = imm8; + + mask = 0xFF; + if ((to_do & mask) == mask) { + r_.u64[i] = UINT64_MAX; + to_do &= ~mask; + } + else r_.u64[i] = 0; + + mask = 0xF0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] = a_.u64[i]; + to_do &= ~mask; + } + + mask = 0xCC; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i]; + to_do &= ~mask; + } + + mask = 0xAA; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0F; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x33; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x55; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x3C; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x5A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x66; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] ^ c_.u64[i]; + to_do &= ~mask; + } + + mask = 0xA0; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x50; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & a_.u64[i]; + to_do &= ~mask; + } + + mask = 0x0A; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~a_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x88; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + mask = 0x44; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~c_.u64[i] & b_.u64[i]; + to_do &= ~mask; + } + + mask = 0x22; + if ((to_do & mask) && ((imm8 & mask) == mask)) { + r_.u64[i] |= ~b_.u64[i] & c_.u64[i]; + to_do &= ~mask; + } + + if (to_do & 0xc0) { + t = a_.u64[i] & b_.u64[i]; + if ((to_do & 0xc0) == 0xc0) r_.u64[i] |= t; + else if (to_do & 0x80) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x30) { + t = ~b_.u64[i] & a_.u64[i]; + if ((to_do & 0x30) == 0x30) r_.u64[i] |= t; + else if (to_do & 0x20) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x0c) { + t = ~a_.u64[i] & b_.u64[i]; + if ((to_do & 0x0c) == 0x0c) r_.u64[i] |= t; + else if (to_do & 0x08) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + + if (to_do & 0x03) { + t = ~(a_.u64[i] | b_.u64[i]); + if ((to_do & 0x03) == 0x03) r_.u64[i] |= t; + else if (to_do & 0x02) r_.u64[i] |= c_.u64[i] & t; + else r_.u64[i] |= ~c_.u64[i] & t; + } + } + #endif + #else + SIMDE_X_TERNARYLOGIC_SWITCH(imm8 & 255) + #endif + + return simde__m512i_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_ternarylogic_epi32 + #define _mm512_ternarylogic_epi32(a, b, c, imm8) simde_mm512_ternarylogic_epi32(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) _mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) +#else + #define simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_ternarylogic_epi32(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ternarylogic_epi32 + #define _mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) simde_mm512_mask_ternarylogic_epi32(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) _mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#else + #define simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_ternarylogic_epi32(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_ternarylogic_epi32 + #define _mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) simde_mm512_maskz_ternarylogic_epi32(k, a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_ternarylogic_epi64(a, b, c, imm8) _mm_ternarylogic_epi64(a, b, c, imm8) +#else + #define simde_mm_ternarylogic_epi64(a, b, c, imm8) simde_mm_ternarylogic_epi32(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_ternarylogic_epi64 + #define _mm_ternarylogic_epi64(a, b, c, imm8) simde_mm_ternarylogic_epi64(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm_mask_ternarylogic_epi64(src, k, a, b, imm8) +#else + #define simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm_mask_mov_epi64(src, k, simde_mm_ternarylogic_epi64(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_ternarylogic_epi64 + #define _mm_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm_mask_ternarylogic_epi64(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#else + #define simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm_maskz_mov_epi64(k, simde_mm_ternarylogic_epi64(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_ternarylogic_epi64 + #define _mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_ternarylogic_epi64(a, b, c, imm8) _mm256_ternarylogic_epi64(a, b, c, imm8) +#else + #define simde_mm256_ternarylogic_epi64(a, b, c, imm8) simde_mm256_ternarylogic_epi32(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_ternarylogic_epi64 + #define _mm256_ternarylogic_epi64(a, b, c, imm8) simde_mm256_ternarylogic_epi64(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) +#else + #define simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_ternarylogic_epi64(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_ternarylogic_epi64 + #define _mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm256_mask_ternarylogic_epi64(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#else + #define simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_ternarylogic_epi64(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_ternarylogic_epi64 + #define _mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm256_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_ternarylogic_epi64(a, b, c, imm8) _mm512_ternarylogic_epi64(a, b, c, imm8) +#else + #define simde_mm512_ternarylogic_epi64(a, b, c, imm8) simde_mm512_ternarylogic_epi32(a, b, c, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_ternarylogic_epi64 + #define _mm512_ternarylogic_epi64(a, b, c, imm8) simde_mm512_ternarylogic_epi64(a, b, c, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) _mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) +#else + #define simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_ternarylogic_epi64(src, a, b, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ternarylogic_epi64 + #define _mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) simde_mm512_mask_ternarylogic_epi64(src, k, a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) _mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#else + #define simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_ternarylogic_epi64(a, b, c, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_ternarylogic_epi64 + #define _mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) simde_mm512_maskz_ternarylogic_epi64(k, a, b, c, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TERNARYLOGIC_H) */ +/* :: End simde/x86/avx512/ternarylogic.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/testn.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_TESTN_H) +#define SIMDE_X86_AVX512_TESTN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_testn_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_testn_epi64_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (!(a_.i64[i] & b_.i64[i])) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_testn_epi64_mask + #define _mm512_testn_epi64_mask(a, b) simde_mm512_testn_epi64_mask(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TESTN_H) */ +/* :: End simde/x86/avx512/testn.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/unpacklo.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_UNPACKLO_H) +#define SIMDE_X86_AVX512_UNPACKLO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpacklo_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_unpacklo_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, + 0, 64, 1, 65, 2, 66, 3, 67, + 4, 68, 5, 69, 6, 70, 7, 71, + 16, 80, 17, 81, 18, 82, 19, 83, + 20, 84, 21, 85, 22, 86, 23, 87, + 32, 96, 33, 97, 34, 98, 35, 99, + 36, 100, 37, 101, 38, 102, 39, 103, + 48, 112, 49, 113, 50, 114, 51, 115, + 52, 116, 53, 117, 54, 118, 55, 119); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpacklo_epi8(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpacklo_epi8(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_epi8 + #define _mm512_unpacklo_epi8(a, b) simde_mm512_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpacklo_epi8(simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_unpacklo_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_epi8 + #define _mm512_mask_unpacklo_epi8(src, k, a, b) simde_mm512_mask_unpacklo_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpacklo_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_unpacklo_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_epi8 + #define _mm512_maskz_unpacklo_epi8(k, a, b) simde_mm512_maskz_unpacklo_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpacklo_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_epi8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_epi8 + #define _mm256_mask_unpacklo_epi8(src, k, a, b) simde_mm256_mask_unpacklo_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpacklo_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_epi8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_epi8 + #define _mm256_maskz_unpacklo_epi8(k, a, b) simde_mm256_maskz_unpacklo_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpacklo_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_epi8 + #define _mm_mask_unpacklo_epi8(src, k, a, b) simde_mm_mask_unpacklo_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpacklo_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_unpacklo_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_epi8 + #define _mm_maskz_unpacklo_epi8(k, a, b) simde_mm_maskz_unpacklo_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpacklo_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_unpacklo_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, + 0, 32, 1, 33, 2, 34, 3, 35, 8, 40, 9, 41, 10, 42, 11, 43, + 16, 48, 17, 49, 18, 50, 19, 51, 24, 56, 25, 57, 26, 58, 27, 59); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpacklo_epi16(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpacklo_epi16(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_epi16 + #define _mm512_unpacklo_epi16(a, b) simde_mm512_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpacklo_epi16(simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_unpacklo_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_epi16 + #define _mm512_mask_unpacklo_epi16(src, k, a, b) simde_mm512_mask_unpacklo_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpacklo_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_unpacklo_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_epi16 + #define _mm512_maskz_unpacklo_epi16(k, a, b) simde_mm512_maskz_unpacklo_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpacklo_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_epi16 + #define _mm256_mask_unpacklo_epi16(src, k, a, b) simde_mm256_mask_unpacklo_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpacklo_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_epi16 + #define _mm256_maskz_unpacklo_epi16(k, a, b) simde_mm256_maskz_unpacklo_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpacklo_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_epi16 + #define _mm_mask_unpacklo_epi16(src, k, a, b) simde_mm_mask_unpacklo_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpacklo_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_unpacklo_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_epi16 + #define _mm_maskz_unpacklo_epi16(k, a, b) simde_mm_maskz_unpacklo_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpacklo_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpacklo_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, + 0, 16, 1, 17, 4, 20, 5, 21, + 8, 24, 9, 25, 12, 28, 13, 29); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpacklo_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpacklo_epi32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_epi32 + #define _mm512_unpacklo_epi32(a, b) simde_mm512_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpacklo_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpacklo_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_epi32 + #define _mm512_mask_unpacklo_epi32(src, k, a, b) simde_mm512_mask_unpacklo_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpacklo_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpacklo_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_epi32 + #define _mm512_maskz_unpacklo_epi32(k, a, b) simde_mm512_maskz_unpacklo_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpacklo_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_epi32 + #define _mm256_mask_unpacklo_epi32(src, k, a, b) simde_mm256_mask_unpacklo_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpacklo_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_epi32 + #define _mm256_maskz_unpacklo_epi32(k, a, b) simde_mm256_maskz_unpacklo_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpacklo_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_epi32 + #define _mm_mask_unpacklo_epi32(src, k, a, b) simde_mm_mask_unpacklo_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpacklo_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_unpacklo_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_epi32 + #define _mm_maskz_unpacklo_epi32(k, a, b) simde_mm_maskz_unpacklo_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpacklo_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpacklo_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 0, 8, 2, 10, 4, 12, 6, 14); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpacklo_epi64(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpacklo_epi64(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_epi64 + #define _mm512_unpacklo_epi64(a, b) simde_mm512_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpacklo_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpacklo_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_epi64 + #define _mm512_mask_unpacklo_epi64(src, k, a, b) simde_mm512_mask_unpacklo_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpacklo_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpacklo_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_epi64 + #define _mm512_maskz_unpacklo_epi64(k, a, b) simde_mm512_maskz_unpacklo_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpacklo_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_epi64 + #define _mm256_mask_unpacklo_epi64(src, k, a, b) simde_mm256_mask_unpacklo_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpacklo_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_epi64 + #define _mm256_maskz_unpacklo_epi64(k, a, b) simde_mm256_maskz_unpacklo_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpacklo_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_epi64 + #define _mm_mask_unpacklo_epi64(src, k, a, b) simde_mm_mask_unpacklo_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpacklo_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_unpacklo_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_epi64 + #define _mm_maskz_unpacklo_epi64(k, a, b) simde_mm_maskz_unpacklo_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_unpacklo_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpacklo_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, + 0, 16, 1, 17, 4, 20, 5, 21, + 8, 24, 9, 25, 12, 28, 13, 29); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_unpacklo_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_unpacklo_ps(a_.m256[1], b_.m256[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0]) / 2) ; i++) { + r_.f32[2 * i] = a_.f32[i + ~(~i | 1)]; + r_.f32[2 * i + 1] = b_.f32[i + ~(~i | 1)]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_ps + #define _mm512_unpacklo_ps(a, b) simde_mm512_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_unpacklo_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpacklo_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_ps + #define _mm512_mask_unpacklo_ps(src, k, a, b) simde_mm512_mask_unpacklo_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_unpacklo_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpacklo_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_ps + #define _mm512_maskz_unpacklo_ps(k, a, b) simde_mm512_maskz_unpacklo_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_unpacklo_ps(simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_ps(src, k, a, b); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_ps + #define _mm256_mask_unpacklo_ps(src, k, a, b) simde_mm256_mask_unpacklo_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_unpacklo_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_ps(k, a, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_ps + #define _mm256_maskz_unpacklo_ps(k, a, b) simde_mm256_maskz_unpacklo_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_unpacklo_ps(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_ps(src, k, a, b); + #else + return simde_mm_mask_mov_ps(src, k, simde_mm_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_ps + #define _mm_mask_unpacklo_ps(src, k, a, b) simde_mm_mask_unpacklo_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_unpacklo_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_ps(k, a, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_unpacklo_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_ps + #define _mm_maskz_unpacklo_ps(k, a, b) simde_mm_maskz_unpacklo_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_unpacklo_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpacklo_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 0, 8, 2, 10, 4, 12, 6, 14); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_unpacklo_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_unpacklo_pd(a_.m256d[1], b_.m256d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0]) / 2) ; i++) { + r_.f64[2 * i] = a_.f64[2 * i]; + r_.f64[2 * i + 1] = b_.f64[2 * i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpacklo_pd + #define _mm512_unpacklo_pd(a, b) simde_mm512_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_unpacklo_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpacklo_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpacklo_pd + #define _mm512_mask_unpacklo_pd(src, k, a, b) simde_mm512_mask_unpacklo_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_unpacklo_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpacklo_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpacklo_pd + #define _mm512_maskz_unpacklo_pd(k, a, b) simde_mm512_maskz_unpacklo_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_unpacklo_pd(simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpacklo_pd(src, k, a, b); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpacklo_pd + #define _mm256_mask_unpacklo_pd(src, k, a, b) simde_mm256_mask_unpacklo_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_unpacklo_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpacklo_pd(k, a, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpacklo_pd + #define _mm256_maskz_unpacklo_pd(k, a, b) simde_mm256_maskz_unpacklo_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_unpacklo_pd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpacklo_pd(src, k, a, b); + #else + return simde_mm_mask_mov_pd(src, k, simde_mm_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpacklo_pd + #define _mm_mask_unpacklo_pd(src, k, a, b) simde_mm_mask_unpacklo_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_unpacklo_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpacklo_pd(k, a, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_unpacklo_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpacklo_pd + #define _mm_maskz_unpacklo_pd(k, a, b) simde_mm_maskz_unpacklo_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_UNPACKLO_H) */ +/* :: End simde/x86/avx512/unpacklo.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/unpackhi.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_UNPACKHI_H) +#define SIMDE_X86_AVX512_UNPACKHI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpackhi_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_unpackhi_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 64, a_.i8, b_.i8, + 8, 72, 9, 73, 10, 74, 11, 75, + 12, 76, 13, 77, 14, 78, 15, 79, + 24, 88, 25, 89, 26, 90, 27, 91, + 28, 92, 29, 93, 30, 94, 31, 95, + 40, 104, 41, 105, 42, 106, 43, 107, + 44, 108, 45, 109, 46, 110, 47, 111, + 56, 120, 57, 121, 58, 122, 59, 123, + 60, 124, 61, 125, 62, 126, 63, 127); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpackhi_epi8(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpackhi_epi8(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_epi8 + #define _mm512_unpackhi_epi8(a, b) simde_mm512_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpackhi_epi8(simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_unpackhi_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_epi8 + #define _mm512_mask_unpackhi_epi8(src, k, a, b) simde_mm512_mask_unpackhi_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpackhi_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_unpackhi_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_epi8 + #define _mm512_maskz_unpackhi_epi8(k, a, b) simde_mm512_maskz_unpackhi_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpackhi_epi8(simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_epi8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_epi8 + #define _mm256_mask_unpackhi_epi8(src, k, a, b) simde_mm256_mask_unpackhi_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpackhi_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_epi8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_epi8 + #define _mm256_maskz_unpackhi_epi8(k, a, b) simde_mm256_maskz_unpackhi_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpackhi_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_epi8 + #define _mm_mask_unpackhi_epi8(src, k, a, b) simde_mm_mask_unpackhi_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpackhi_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_unpackhi_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_epi8 + #define _mm_maskz_unpackhi_epi8(k, a, b) simde_mm_maskz_unpackhi_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpackhi_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_unpackhi_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 64, a_.i16, b_.i16, + 4, 36, 5, 37, 6, 38, 7, 39, 12, 44, 13, 45, 14, 46, 15, 47, + 20, 52, 21, 53, 22, 54, 23, 55, 28, 60, 29, 61, 30, 62, 31, 63); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpackhi_epi16(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpackhi_epi16(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_epi16 + #define _mm512_unpackhi_epi16(a, b) simde_mm512_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpackhi_epi16(simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_unpackhi_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_epi16 + #define _mm512_mask_unpackhi_epi16(src, k, a, b) simde_mm512_mask_unpackhi_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpackhi_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_unpackhi_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_epi16 + #define _mm512_maskz_unpackhi_epi16(k, a, b) simde_mm512_maskz_unpackhi_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpackhi_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_epi16 + #define _mm256_mask_unpackhi_epi16(src, k, a, b) simde_mm256_mask_unpackhi_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpackhi_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_epi16 + #define _mm256_maskz_unpackhi_epi16(k, a, b) simde_mm256_maskz_unpackhi_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpackhi_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_epi16 + #define _mm_mask_unpackhi_epi16(src, k, a, b) simde_mm_mask_unpackhi_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpackhi_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_unpackhi_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_epi16 + #define _mm_maskz_unpackhi_epi16(k, a, b) simde_mm_maskz_unpackhi_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpackhi_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpackhi_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, + 2, 18, 3 , 19, 6, 22, 7, 23, + 10, 26, 11, 27, 14, 30, 15, 31); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpackhi_epi32(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpackhi_epi32(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_epi32 + #define _mm512_unpackhi_epi32(a, b) simde_mm512_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpackhi_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpackhi_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_epi32 + #define _mm512_mask_unpackhi_epi32(src, k, a, b) simde_mm512_mask_unpackhi_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpackhi_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpackhi_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_epi32 + #define _mm512_maskz_unpackhi_epi32(k, a, b) simde_mm512_maskz_unpackhi_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpackhi_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_epi32 + #define _mm256_mask_unpackhi_epi32(src, k, a, b) simde_mm256_mask_unpackhi_epi32(src, k, a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpackhi_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_epi32 + #define _mm256_maskz_unpackhi_epi32(k, a, b) simde_mm256_maskz_unpackhi_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpackhi_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_epi32 + #define _mm_mask_unpackhi_epi32(src, k, a, b) simde_mm_mask_unpackhi_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpackhi_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_unpackhi_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_epi32 + #define _mm_maskz_unpackhi_epi32(k, a, b) simde_mm_maskz_unpackhi_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_unpackhi_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpackhi_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.i64, b_.i64, 1, 9, 3, 11, 5, 13, 7, 15); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_unpackhi_epi64(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_unpackhi_epi64(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_epi64 + #define _mm512_unpackhi_epi64(a, b) simde_mm512_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_unpackhi_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpackhi_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_epi64 + #define _mm512_mask_unpackhi_epi64(src, k, a, b) simde_mm512_mask_unpackhi_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_unpackhi_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpackhi_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_epi64 + #define _mm512_maskz_unpackhi_epi64(k, a, b) simde_mm512_maskz_unpackhi_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_unpackhi_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_epi64 + #define _mm256_mask_unpackhi_epi64(src, k, a, b) simde_mm256_mask_unpackhi_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_unpackhi_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_epi64 + #define _mm256_maskz_unpackhi_epi64(k, a, b) simde_mm256_maskz_unpackhi_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_unpackhi_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_epi64 + #define _mm_mask_unpackhi_epi64(src, k, a, b) simde_mm_mask_unpackhi_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_unpackhi_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_unpackhi_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_epi64 + #define _mm_maskz_unpackhi_epi64(k, a, b) simde_mm_maskz_unpackhi_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_unpackhi_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpackhi_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.f32, b_.f32, + 2, 18, 3 , 19, 6, 22, 7, 23, + 10, 26, 11, 27, 14, 30, 15, 31); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_unpackhi_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_unpackhi_ps(a_.m256[1], b_.m256[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0]) / 2) ; i++) { + r_.f32[2 * i] = a_.f32[i + 2 + ~(~i | 1)]; + r_.f32[2 * i + 1] = b_.f32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_ps + #define _mm512_unpackhi_ps(a, b) simde_mm512_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_unpackhi_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpackhi_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_ps + #define _mm512_mask_unpackhi_ps(src, k, a, b) simde_mm512_mask_unpackhi_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_unpackhi_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpackhi_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_ps + #define _mm512_maskz_unpackhi_ps(k, a, b) simde_mm512_maskz_unpackhi_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_unpackhi_ps(simde__m256 src, simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_ps(src, k, a, b); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_ps + #define _mm256_mask_unpackhi_ps(src, k, a, b) simde_mm256_mask_unpackhi_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_unpackhi_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_ps(k, a, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_ps + #define _mm256_maskz_unpackhi_ps(k, a, b) simde_mm256_maskz_unpackhi_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_unpackhi_ps(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_ps(src, k, a, b); + #else + return simde_mm_mask_mov_ps(src, k, simde_mm_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_ps + #define _mm_mask_unpackhi_ps(src, k, a, b) simde_mm_mask_unpackhi_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_unpackhi_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_ps(k, a, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_unpackhi_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_ps + #define _mm_maskz_unpackhi_ps(k, a, b) simde_mm_maskz_unpackhi_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_unpackhi_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_unpackhi_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 64, a_.f64, b_.f64, 1, 9, 3, 11, 5, 13, 7, 15); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_unpackhi_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_unpackhi_pd(a_.m256d[1], b_.m256d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0]) / 2) ; i++) { + r_.f64[2 * i] = a_.f64[2 * i + 1]; + r_.f64[2 * i + 1] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_unpackhi_pd + #define _mm512_unpackhi_pd(a, b) simde_mm512_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_unpackhi_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_unpackhi_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_unpackhi_pd + #define _mm512_mask_unpackhi_pd(src, k, a, b) simde_mm512_mask_unpackhi_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_unpackhi_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_unpackhi_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_unpackhi_pd + #define _mm512_maskz_unpackhi_pd(k, a, b) simde_mm512_maskz_unpackhi_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_unpackhi_pd(simde__m256d src, simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_unpackhi_pd(src, k, a, b); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_unpackhi_pd + #define _mm256_mask_unpackhi_pd(src, k, a, b) simde_mm256_mask_unpackhi_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_unpackhi_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_unpackhi_pd(k, a, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_unpackhi_pd + #define _mm256_maskz_unpackhi_pd(k, a, b) simde_mm256_maskz_unpackhi_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_unpackhi_pd(simde__m128d src, simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_unpackhi_pd(src, k, a, b); + #else + return simde_mm_mask_mov_pd(src, k, simde_mm_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_unpackhi_pd + #define _mm_mask_unpackhi_pd(src, k, a, b) simde_mm_mask_unpackhi_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_unpackhi_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_unpackhi_pd(k, a, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_unpackhi_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_unpackhi_pd + #define _mm_maskz_unpackhi_pd(k, a, b) simde_mm_maskz_unpackhi_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_UNPACKHI_H) */ +/* :: End simde/x86/avx512/unpackhi.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#endif +/* :: End simde/x86/avx512.h :: */ diff --git a/include/simde/x86/clmul.h b/include/simde/x86/clmul.h new file mode 100644 index 00000000..01147981 --- /dev/null +++ b/include/simde/x86/clmul.h @@ -0,0 +1,35915 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/clmul.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2016 Thomas Pornin + */ + +/* The portable version is based on the implementation in BearSSL, + * which is MIT licensed, constant-time / branch-free, and documented + * at https://www.bearssl.org/constanttime.html (specifically, we use + * the implementation from ghash_ctmul64.c). */ + +#if !defined(SIMDE_X86_CLMUL_H) +#define SIMDE_X86_CLMUL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SET_H) +#define SIMDE_X86_AVX512_SET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_TYPES_H) +#define SIMDE_X86_AVX512_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for + * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte + * aligned even if we reduce the alignment requirements of other members. + * + * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the + * to/from private functions will break, and I'm not willing to change their APIs to use + * pointers (which would also require more verbose code on the caller side) just to make + * MSVC happy. + * + * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, + * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to + * fix this without requiring API changes (except transparently through macros), patches + * are welcome. + */ + +# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) +# if defined(SIMDE_X86_AVX512F_NATIVE) +# undef SIMDE_X86_AVX512F_NATIVE +# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") +# endif +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 +# else +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 +# endif + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_ALIGN_TO_16 __m128bh n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_BF16_NATIVE) + SIMDE_ALIGN_TO_32 __m256bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_AVX512_ALIGN __m512bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; + SIMDE_AVX512_ALIGN simde__m128d m128d[4]; + SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; + SIMDE_AVX512_ALIGN simde__m256d m256d[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; + SIMDE_AVX512_ALIGN simde__m128i m128i[4]; + SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; + SIMDE_AVX512_ALIGN simde__m256i m256i[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512i_private; + +/* Intel uses the same header (immintrin.h) for everything AVX and + * later. If native aliases are enabled, and the machine has native + * support for AVX imintrin.h will already have been included, which + * means simde__m512* will already have been defined. So, even + * if the machine doesn't support AVX512F we need to use the native + * type; it has already been defined. + * + * However, we also can't just assume that including immintrin.h does + * actually define these. It could be a compiler which supports AVX + * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we + * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, + * so we assume that if it's present AVX-512F has already been + * declared. + * + * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC + * uses the preprocessor to define all the _MM_CMPINT_* members, + * in most compilers they are simply normal enum members. However, + * all compilers I've looked at use an object-like macro for + * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT + * is included in case a compiler does the reverse, though I haven't + * run into one which does. + * + * As for the ICC check, unlike other compilers, merely using the + * AVX-512 types causes ICC to generate AVX-512 instructions. */ +#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m512 simde__m512; + typedef __m512i simde__m512i; + typedef __m512d simde__m512d; + + typedef __mmask8 simde__mmask8; + typedef __mmask16 simde__mmask16; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m512_private simde__m512; + typedef simde__m512i_private simde__m512i; + typedef simde__m512d_private simde__m512d; + #endif + + typedef uint8_t simde__mmask8; + typedef uint16_t simde__mmask16; +#endif + +#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m128bh simde__m128bh; + typedef __m256bh simde__m256bh; + typedef __m512bh simde__m512bh; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m128bh_private simde__m128bh; + typedef simde__m256bh_private simde__m256bh; + typedef simde__m512bh_private simde__m512bh; + #endif +#endif + +/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is + * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang + * both are in avx512bwintrin.h), not AVX-512F. However, we don't have + * a good (not-compiler-specific) way to detect if these headers have + * been included. In compilers which support AVX-512F but not + * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) + * won't exist. + * + * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t + * in all compilers, so it's safe to use these instead of typedefs to + * __mmask{16,32}. If you run into a problem with this please file an + * issue and we'll try to figure out a work-around. */ +typedef uint32_t simde__mmask32; +typedef uint64_t simde__mmask64; +#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef uint32_t __mmask32; + #else + #define __mmask32 uint32_t; + #endif +#endif +#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + #if defined(HEDLEY_GCC_VERSION) + typedef unsigned long long __mmask64; + #else + typedef uint64_t __mmask64; + #endif + #else + #define __mmask64 uint64_t; + #endif +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m512 __m512; + typedef simde__m512i __m512i; + typedef simde__m512d __m512d; + #else + #define __m512 simde__m512 + #define __m512i simde__m512i + #define __m512d simde__m512d + #endif +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m128bh __m128bh; + typedef simde__m256bh __m256bh; + typedef simde__m512bh __m512bh; + #else + #define __m128bh simde__m128bh + #define __m256bh simde__m256bh + #define __m512bh simde__m512bh + #endif +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); +#endif + +#define SIMDE_MM_CMPINT_EQ 0 +#define SIMDE_MM_CMPINT_LT 1 +#define SIMDE_MM_CMPINT_LE 2 +#define SIMDE_MM_CMPINT_FALSE 3 +#define SIMDE_MM_CMPINT_NE 4 +#define SIMDE_MM_CMPINT_NLT 5 +#define SIMDE_MM_CMPINT_NLE 6 +#define SIMDE_MM_CMPINT_TRUE 7 +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) +#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ +#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT +#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE +#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE +#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE +#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT +#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE +#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh +simde__m128bh_from_private(simde__m128bh_private v) { + simde__m128bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh_private +simde__m128bh_to_private(simde__m128bh v) { + simde__m128bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh +simde__m256bh_from_private(simde__m256bh_private v) { + simde__m256bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh_private +simde__m256bh_to_private(simde__m256bh v) { + simde__m256bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh +simde__m512bh_from_private(simde__m512bh_private v) { + simde__m512bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh_private +simde__m512bh_to_private(simde__m512bh v) { + simde__m512bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde__m512_from_private(simde__m512_private v) { + simde__m512 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512_private +simde__m512_to_private(simde__m512 v) { + simde__m512_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde__m512i_from_private(simde__m512i_private v) { + simde__m512i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i_private +simde__m512i_to_private(simde__m512i v) { + simde__m512i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde__m512d_from_private(simde__m512d_private v) { + simde__m512d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d_private +simde__m512d_to_private(simde__m512d v) { + simde__m512d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ +/* :: End simde/x86/avx512/types.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/load.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LOAD_H) +#define SIMDE_X86_AVX512_LOAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_load_pd (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); + #else + simde__m512d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_pd + #define _mm512_load_pd(a) simde_mm512_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_load_ps (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); + #else + simde__m512 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_ps + #define _mm512_load_ps(a) simde_mm512_load_ps(a) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_load_si512 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); + #else + simde__m512i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); + return r; + #endif +} +#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_epi8 + #undef _mm512_load_epi16 + #undef _mm512_load_epi32 + #undef _mm512_load_epi64 + #undef _mm512_load_si512 + #define _mm512_load_si512(a) simde_mm512_load_si512(a) + #define _mm512_load_epi8(a) simde_mm512_load_si512(a) + #define _mm512_load_epi16(a) simde_mm512_load_si512(a) + #define _mm512_load_epi32(a) simde_mm512_load_si512(a) + #define _mm512_load_epi64(a) simde_mm512_load_si512(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ +/* :: End simde/x86/avx512/load.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, + int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + simde__m512i_private r_; + + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + r_.i16[16] = e16; + r_.i16[17] = e17; + r_.i16[18] = e18; + r_.i16[19] = e19; + r_.i16[20] = e20; + r_.i16[21] = e21; + r_.i16[22] = e22; + r_.i16[23] = e23; + r_.i16[24] = e24; + r_.i16[25] = e25; + r_.i16[26] = e26; + r_.i16[27] = e27; + r_.i16[28] = e28; + r_.i16[29] = e29; + r_.i16[30] = e30; + r_.i16[31] = e31; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi16 + #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + simde__m512i_private r_; + + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + r_.i32[ 8] = e8; + r_.i32[ 9] = e9; + r_.i32[10] = e10; + r_.i32[11] = e11; + r_.i32[12] = e12; + r_.i32[13] = e13; + r_.i32[14] = e14; + r_.i32[15] = e15; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi32 + #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + simde__m512i_private r_; + + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + r_.i64[4] = e4; + r_.i64[5] = e5; + r_.i64[6] = e6; + r_.i64[7] = e7; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi64 + #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, + uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, + uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, + uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, + uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m512i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + r_.u8[32] = e32; + r_.u8[33] = e33; + r_.u8[34] = e34; + r_.u8[35] = e35; + r_.u8[36] = e36; + r_.u8[37] = e37; + r_.u8[38] = e38; + r_.u8[39] = e39; + r_.u8[40] = e40; + r_.u8[41] = e41; + r_.u8[42] = e42; + r_.u8[43] = e43; + r_.u8[44] = e44; + r_.u8[45] = e45; + r_.u8[46] = e46; + r_.u8[47] = e47; + r_.u8[48] = e48; + r_.u8[49] = e49; + r_.u8[50] = e50; + r_.u8[51] = e51; + r_.u8[52] = e52; + r_.u8[53] = e53; + r_.u8[54] = e54; + r_.u8[55] = e55; + r_.u8[56] = e56; + r_.u8[57] = e57; + r_.u8[58] = e58; + r_.u8[59] = e59; + r_.u8[60] = e60; + r_.u8[61] = e61; + r_.u8[62] = e62; + r_.u8[63] = e63; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, + uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, + uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m512i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + r_.u16[16] = e16; + r_.u16[17] = e17; + r_.u16[18] = e18; + r_.u16[19] = e19; + r_.u16[20] = e20; + r_.u16[21] = e21; + r_.u16[22] = e22; + r_.u16[23] = e23; + r_.u16[24] = e24; + r_.u16[25] = e25; + r_.u16[26] = e26; + r_.u16[27] = e27; + r_.u16[28] = e28; + r_.u16[29] = e29; + r_.u16[30] = e30; + r_.u16[31] = e31; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, + uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + simde__m512i_private r_; + + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + r_.u32[ 8] = e8; + r_.u32[ 9] = e9; + r_.u32[10] = e10; + r_.u32[11] = e11; + r_.u32[12] = e12; + r_.u32[13] = e13; + r_.u32[14] = e14; + r_.u32[15] = e15; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m512i_private r_; + + r_.u64[ 0] = e0; + r_.u64[ 1] = e1; + r_.u64[ 2] = e2; + r_.u64[ 3] = e3; + r_.u64[ 4] = e4; + r_.u64[ 5] = e5; + r_.u64[ 6] = e6; + r_.u64[ 7] = e7; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, + int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, + int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, + int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) + return _mm512_set_epi8( + e63, e62, e61, e60, e59, e58, e57, e56, + e55, e54, e53, e52, e51, e50, e49, e48, + e47, e46, e45, e44, e43, e42, e41, e40, + e39, e38, e37, e36, e35, e34, e33, e32, + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0 + ); + #else + simde__m512i_private r_; + + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + r_.i8[32] = e32; + r_.i8[33] = e33; + r_.i8[34] = e34; + r_.i8[35] = e35; + r_.i8[36] = e36; + r_.i8[37] = e37; + r_.i8[38] = e38; + r_.i8[39] = e39; + r_.i8[40] = e40; + r_.i8[41] = e41; + r_.i8[42] = e42; + r_.i8[43] = e43; + r_.i8[44] = e44; + r_.i8[45] = e45; + r_.i8[46] = e46; + r_.i8[47] = e47; + r_.i8[48] = e48; + r_.i8[49] = e49; + r_.i8[50] = e50; + r_.i8[51] = e51; + r_.i8[52] = e52; + r_.i8[53] = e53; + r_.i8[54] = e54; + r_.i8[55] = e55; + r_.i8[56] = e56; + r_.i8[57] = e57; + r_.i8[58] = e58; + r_.i8[59] = e59; + r_.i8[60] = e60; + r_.i8[61] = e61; + r_.i8[62] = e62; + r_.i8[63] = e63; + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi8 + #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_16(simde__m128i) simde__m128i v[] = { d, c, b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m128i[0] = d; + r_.m128i[1] = c; + r_.m128i[2] = b; + r_.m128i[3] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_32(simde__m256i) simde__m256i v[] = { b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m256i[0] = b; + r_.m256i[1] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, + simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + simde__m512_private r_; + + r_.f32[ 0] = e0; + r_.f32[ 1] = e1; + r_.f32[ 2] = e2; + r_.f32[ 3] = e3; + r_.f32[ 4] = e4; + r_.f32[ 5] = e5; + r_.f32[ 6] = e6; + r_.f32[ 7] = e7; + r_.f32[ 8] = e8; + r_.f32[ 9] = e9; + r_.f32[10] = e10; + r_.f32[11] = e11; + r_.f32[12] = e12; + r_.f32[13] = e13; + r_.f32[14] = e14; + r_.f32[15] = e15; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_ps + #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + simde__m512d_private r_; + + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + r_.f64[4] = e4; + r_.f64[5] = e5; + r_.f64[6] = e6; + r_.f64[7] = e7; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_pd + #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ +/* :: End simde/x86/avx512/set.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setzero.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SETZERO_H) +#define SIMDE_X86_AVX512_SETZERO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CAST_H) +#define SIMDE_X86_AVX512_CAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castpd_ps (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_ps + #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castpd_si512 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_si512 + #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castps_pd (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_pd + #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castps_si512 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_si512 + #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castsi512_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_ps + #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castsi512_pd (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_pd + #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd128_pd512 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd128_pd512(a); + #else + simde__m512d_private r_; + r_.m128d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd128_pd512 + #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd256_pd512 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd256_pd512(a); + #else + simde__m512d_private r_; + r_.m256d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd256_pd512 + #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm512_castpd512_pd128 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd128(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd128 + #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_castpd512_pd256 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd256(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m256d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd256 + #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps128_ps512 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps128_ps512(a); + #else + simde__m512_private r_; + r_.m128[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps128_ps512 + #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps256_ps512 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps256_ps512(a); + #else + simde__m512_private r_; + r_.m256[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps256_ps512 + #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_castps512_ps128 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps128(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps128 + #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_castps512_ps256 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps256(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m256[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps256 + #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi128_si512 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi128_si512(a); + #else + simde__m512i_private r_; + r_.m128i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi128_si512 + #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi256_si512 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi256_si512(a); + #else + simde__m512i_private r_; + r_.m256i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi256_si512 + #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_castsi512_si128 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si128(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si128 + #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_castsi512_si256 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si256(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m256i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si256 + #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ +/* :: End simde/x86/avx512/cast.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setzero_si512(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_si512(); + #else + simde__m512i r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_si512 + #define _mm512_setzero_si512() simde_mm512_setzero_si512() + #undef _mm512_setzero_epi32 + #define _mm512_setzero_epi32() simde_mm512_setzero_si512() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setzero_ps(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_ps(); + #else + return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_ps + #define _mm512_setzero_ps() simde_mm512_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setzero_pd(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_pd(); + #else + return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_pd + #define _mm512_setzero_pd() simde_mm512_setzero_pd() +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ +/* :: End simde/x86/avx512/setzero.h :: */ + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_x_clmul_u64(uint64_t x, uint64_t y) { + uint64_t x0, x1, x2, x3; + uint64_t y0, y1, y2, y3; + uint64_t z0, z1, z2, z3; + + x0 = x & UINT64_C(0x1111111111111111); + x1 = x & UINT64_C(0x2222222222222222); + x2 = x & UINT64_C(0x4444444444444444); + x3 = x & UINT64_C(0x8888888888888888); + y0 = y & UINT64_C(0x1111111111111111); + y1 = y & UINT64_C(0x2222222222222222); + y2 = y & UINT64_C(0x4444444444444444); + y3 = y & UINT64_C(0x8888888888888888); + + z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1); + z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2); + z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3); + z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0); + + z0 &= UINT64_C(0x1111111111111111); + z1 &= UINT64_C(0x2222222222222222); + z2 &= UINT64_C(0x4444444444444444); + z3 &= UINT64_C(0x8888888888888888); + + return z0 | z1 | z2 | z3; +} + +static uint64_t +simde_x_bitreverse_u64(uint64_t v) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t bytes = vreinterpret_u8_u64(vmov_n_u64(v)); + bytes = vrbit_u8(bytes); + bytes = vrev64_u8(bytes); + return vget_lane_u64(vreinterpret_u64_u8(bytes), 0); + #elif defined(SIMDE_X86_GFNI_NATIVE) + /* I don't think there is (or likely will ever be) a CPU with GFNI + * but not pclmulq, but this may be useful for things other than + * _mm_clmulepi64_si128. */ + __m128i vec = _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, v)); + + /* Reverse bits within each byte */ + vec = _mm_gf2p8affine_epi64_epi8(vec, _mm_cvtsi64_si128(HEDLEY_STATIC_CAST(int64_t, UINT64_C(0x8040201008040201))), 0); + + /* Reverse bytes */ + #if defined(SIMDE_X86_SSSE3_NATIVE) + vec = _mm_shuffle_epi8(vec, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); + #else + vec = _mm_or_si128(_mm_slli_epi16(vec, 8), _mm_srli_epi16(vec, 8)); + vec = _mm_shufflelo_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); + vec = _mm_shufflehi_epi16(vec, _MM_SHUFFLE(0, 1, 2, 3)); + #endif + + return HEDLEY_STATIC_CAST(uint64_t, _mm_cvtsi128_si64(vec)); + #elif HEDLEY_HAS_BUILTIN(__builtin_bitreverse64) + return __builtin_bitreverse64(v); + #else + v = ((v >> 1) & UINT64_C(0x5555555555555555)) | ((v & UINT64_C(0x5555555555555555)) << 1); + v = ((v >> 2) & UINT64_C(0x3333333333333333)) | ((v & UINT64_C(0x3333333333333333)) << 2); + v = ((v >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((v & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4); + v = ((v >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((v & UINT64_C(0x00FF00FF00FF00FF)) << 8); + v = ((v >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((v & UINT64_C(0x0000FFFF0000FFFF)) << 16); + return (v >> 32) | (v << 32); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_clmulepi64_si128 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) + uint64x1_t A = ((imm8) & 0x01) ? vget_high_u64(a_.neon_u64) : vget_low_u64(a_.neon_u64); + uint64x1_t B = ((imm8) & 0x10) ? vget_high_u64(b_.neon_u64) : vget_low_u64(b_.neon_u64); + #if defined(SIMDE_BUG_CLANG_48257) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif + poly64_t A_ = vget_lane_p64(vreinterpret_p64_u64(A), 0); + poly64_t B_ = vget_lane_p64(vreinterpret_p64_u64(B), 0); + #if defined(SIMDE_BUG_CLANG_48257) + HEDLEY_DIAGNOSTIC_POP + #endif + poly128_t R = vmull_p64(A_, B_); + r_.neon_u64 = vreinterpretq_u64_p128(R); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #if defined(SIMDE_SHUFFLE_VECTOR_) + switch (imm8 & 0x11) { + case 0x00: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); + break; + case 0x01: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 0, 0); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); + break; + case 0x10: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 0, 0); + break; + case 0x11: + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, b_.u64, 1, 1); + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, a_.u64, 1, 1); + break; + } + #else + { + const uint64_t A = a_.u64[(imm8 ) & 1]; + const uint64_t B = b_.u64[(imm8 >> 4) & 1]; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + a_.u64[i] = A; + b_.u64[i] = B; + } + } + #endif + + simde__m128i_private reversed_; + { + #if defined(SIMDE_SHUFFLE_VECTOR_) + reversed_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, b_.u64, 1, 3); + #else + reversed_.u64[0] = a_.u64[1]; + reversed_.u64[1] = b_.u64[1]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { + reversed_.u64[i] = simde_x_bitreverse_u64(reversed_.u64[i]); + } + } + + #if defined(SIMDE_SHUFFLE_VECTOR_) + a_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.u64, reversed_.u64, 0, 2); + b_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 16, b_.u64, reversed_.u64, 1, 3); + #else + a_.u64[1] = reversed_.u64[0]; + b_.u64[1] = reversed_.u64[1]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(reversed_.u64) / sizeof(reversed_.u64[0])) ; i++) { + r_.u64[i] = simde_x_clmul_u64(a_.u64[i], b_.u64[i]); + } + + r_.u64[1] = simde_x_bitreverse_u64(r_.u64[1]) >> 1; + #else + r_.u64[0] = simde_x_clmul_u64( a_.u64[imm8 & 1], b_.u64[(imm8 >> 4) & 1]); + r_.u64[1] = simde_x_bitreverse_u64(simde_x_clmul_u64(simde_x_bitreverse_u64(a_.u64[imm8 & 1]), simde_x_bitreverse_u64(b_.u64[(imm8 >> 4) & 1]))) >> 1; + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_PCLMUL_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_clmulepi64_si128(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_clmulepi64_si128((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_AES) + #define simde_mm_clmulepi64_si128(a, b, imm8) \ + simde__m128i_from_neon_u64( \ + vreinterpretq_u64_p128( \ + vmull_p64( \ + vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(a)), (imm8 ) & 1), \ + vgetq_lane_p64(vreinterpretq_p64_u64(simde__m128i_to_neon_u64(b)), (imm8 >> 4) & 1) \ + ) \ + ) \ + ) +#endif +#if defined(SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES) + #undef _mm_clmulepi64_si128 + #define _mm_clmulepi64_si128(a, b, imm8) simde_mm_clmulepi64_si128(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_clmulepi64_epi128 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X86_PCLMUL_NATIVE) + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS + switch (imm8 & 0x11) { + case 0x00: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x00); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x00); + break; + case 0x01: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x01); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x01); + break; + case 0x10: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x10); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x10); + break; + case 0x11: + r_.m128i[0] = _mm_clmulepi64_si128(a_.m128i[0], b_.m128i[0], 0x11); + r_.m128i[1] = _mm_clmulepi64_si128(a_.m128i[1], b_.m128i[1], 0x11); + break; + } + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS + #else + simde__m128i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + switch (imm8 & 0x01) { + case 0x00: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2); + break; + case 0x01: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3); + break; + } + switch (imm8 & 0x10) { + case 0x00: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2); + break; + case 0x10: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3); + break; + } + #else + a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; + a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; + b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; + b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { + a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); + b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); + + r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); + r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); + + r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; + } + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 2, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_ = simde__m256i_to_private(simde_mm256_set_m128i(simde__m128i_from_private(r_hi_), simde__m128i_from_private(r_lo_))); + r_.u64 = SIMDE_SHUFFLE_VECTOR_(64, 32, r_.u64, r_.u64, 0, 2, 1, 3); + #else + r_.u64[0] = r_lo_.u64[0]; + r_.u64[1] = r_hi_.u64[0]; + r_.u64[2] = r_lo_.u64[1]; + r_.u64[3] = r_hi_.u64[1]; + #endif + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_clmulepi64_epi128(a, b, imm8) _mm256_clmulepi64_epi128(a, b, imm8) +#endif +#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_clmulepi64_epi128 + #define _mm256_clmulepi64_epi128(a, b, imm8) simde_mm256_clmulepi64_epi128(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_clmulepi64_epi128 (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT(imm8) { + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(HEDLEY_MSVC_VERSION) + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); + #endif + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + switch (imm8 & 0x11) { + case 0x00: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x00); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x00); + break; + case 0x01: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x01); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x01); + break; + case 0x10: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x10); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x10); + break; + case 0x11: + r_.m256i[0] = simde_mm256_clmulepi64_epi128(a_.m256i[0], b_.m256i[0], 0x11); + r_.m256i[1] = simde_mm256_clmulepi64_epi128(a_.m256i[1], b_.m256i[1], 0x11); + break; + } + #else + simde__m256i_private a_lo_, b_lo_, r_lo_, a_hi_, b_hi_, r_hi_; + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + switch (imm8 & 0x01) { + case 0x00: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 0, 2, 4, 6); + break; + case 0x01: + a_lo_.u64 = __builtin_shufflevector(a_.u64, a_.u64, 1, 3, 5, 7); + break; + } + switch (imm8 & 0x10) { + case 0x00: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 0, 2, 4, 6); + break; + case 0x10: + b_lo_.u64 = __builtin_shufflevector(b_.u64, b_.u64, 1, 3, 5, 7); + break; + } + #else + a_lo_.u64[0] = a_.u64[((imm8 >> 0) & 1) + 0]; + a_lo_.u64[1] = a_.u64[((imm8 >> 0) & 1) + 2]; + a_lo_.u64[2] = a_.u64[((imm8 >> 0) & 1) + 4]; + a_lo_.u64[3] = a_.u64[((imm8 >> 0) & 1) + 6]; + b_lo_.u64[0] = b_.u64[((imm8 >> 4) & 1) + 0]; + b_lo_.u64[1] = b_.u64[((imm8 >> 4) & 1) + 2]; + b_lo_.u64[2] = b_.u64[((imm8 >> 4) & 1) + 4]; + b_lo_.u64[3] = b_.u64[((imm8 >> 4) & 1) + 6]; + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_hi_.u64) / sizeof(r_hi_.u64[0])) ; i++) { + a_hi_.u64[i] = simde_x_bitreverse_u64(a_lo_.u64[i]); + b_hi_.u64[i] = simde_x_bitreverse_u64(b_lo_.u64[i]); + + r_lo_.u64[i] = simde_x_clmul_u64(a_lo_.u64[i], b_lo_.u64[i]); + r_hi_.u64[i] = simde_x_clmul_u64(a_hi_.u64[i], b_hi_.u64[i]); + + r_hi_.u64[i] = simde_x_bitreverse_u64(r_hi_.u64[i]) >> 1; + } + + #if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(HEDLEY_IBM_VERSION) + r_.u64 = __builtin_shufflevector(r_lo_.u64, r_hi_.u64, 0, 4, 1, 5, 2, 6, 3, 7); + #else + r_.u64[0] = r_lo_.u64[0]; + r_.u64[1] = r_hi_.u64[0]; + r_.u64[2] = r_lo_.u64[1]; + r_.u64[3] = r_hi_.u64[1]; + r_.u64[4] = r_lo_.u64[2]; + r_.u64[5] = r_hi_.u64[2]; + r_.u64[6] = r_lo_.u64[3]; + r_.u64[7] = r_hi_.u64[3]; + #endif + #endif + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_clmulepi64_epi128(a, b, imm8) _mm512_clmulepi64_epi128(a, b, imm8) +#endif +#if defined(SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_clmulepi64_epi128 + #define _mm512_clmulepi64_epi128(a, b, imm8) simde_mm512_clmulepi64_epi128(a, b, imm8) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_CLMUL_H) */ +/* :: End simde/x86/clmul.h :: */ diff --git a/include/simde/x86/f16c.h b/include/simde/x86/f16c.h new file mode 100644 index 00000000..bfec53e3 --- /dev/null +++ b/include/simde/x86/f16c.h @@ -0,0 +1,34146 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/f16c.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-f16.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if !defined(SIMDE_FLOAT16_H) +#define SIMDE_FLOAT16_H + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* Portable version which should work on pretty much any compiler. + * Obviously you can't rely on compiler support for things like + * conversion to/from 32-bit floats, so make sure you always use the + * functions and macros in this file! + * + * The portable implementations are (heavily) based on CC0 code by + * Fabian Giesen: (see also + * ). + * I have basically just modified it to get rid of some UB (lots of + * aliasing, right shifting a negative value), use fixed-width types, + * and work in C. */ +#define SIMDE_FLOAT16_API_PORTABLE 1 +/* _Float16, per C standard (TS 18661-3; + * ). */ +#define SIMDE_FLOAT16_API_FLOAT16 2 +/* clang >= 6.0 supports __fp16 as an interchange format on all + * targets, but only allows you to use them for arguments and return + * values on targets which have defined an ABI. We get around the + * restriction by wrapping the __fp16 in a struct, but we can't do + * that on Arm since it would break compatibility with the NEON F16 + * functions. */ +#define SIMDE_FLOAT16_API_FP16_NO_ABI 3 +/* This is basically __fp16 as specified by Arm, where arugments and + * return values are raw __fp16 values not structs. */ +#define SIMDE_FLOAT16_API_FP16 4 + +/* Choosing an implementation. This is a bit rough, but I don't have + * any ideas on how to improve it. If you do, patches are definitely + * welcome. */ +#if !defined(SIMDE_FLOAT16_API) + #if 0 && !defined(__cplusplus) + /* I haven't found a way to detect this. It seems like defining + * __STDC_WANT_IEC_60559_TYPES_EXT__, then including float.h, then + * checking for defined(FLT16_MAX) should work, but both gcc and + * clang will define the constants even if _Float16 is not + * supported. Ideas welcome. */ + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FLOAT16 + #elif defined(__ARM_FP16_FORMAT_IEEE) && defined(SIMDE_ARM_NEON_FP16) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16 + #elif defined(__FLT16_MIN__) && (defined(__clang__) && (!defined(SIMDE_ARCH_AARCH64) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))) + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_FP16_NO_ABI + #else + #define SIMDE_FLOAT16_API SIMDE_FLOAT16_API_PORTABLE + #endif +#endif + +#if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16 + typedef _Float16 simde_float16; + #define SIMDE_FLOAT16_C(value) value##f16 +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI + typedef struct { __fp16 value; } simde_float16; + #if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_FLOAT16_C(value) (__extension__({ ((simde_float16) { HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ HEDLEY_STATIC_CAST(__fp16, (value)) }); HEDLEY_DIAGNOSTIC_POP })) + #else + #define SIMDE_FLOAT16_C(value) ((simde_float16) { HEDLEY_STATIC_CAST(__fp16, (value)) }) + #endif +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 + typedef __fp16 simde_float16; + #define SIMDE_FLOAT16_C(value) HEDLEY_STATIC_CAST(__fp16, (value)) +#elif SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_PORTABLE + typedef struct { uint16_t value; } simde_float16; +#else + #error No 16-bit floating point API. +#endif + +#if \ + defined(SIMDE_VECTOR_OPS) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE) && \ + (SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI) + #define SIMDE_FLOAT16_VECTOR +#endif + +/* Reinterpret -- you *generally* shouldn't need these, they're really + * intended for internal use. However, on x86 half-precision floats + * get stuffed into a __m128i/__m256i, so it may be useful. */ + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float16_as_uint16, uint16_t, simde_float16) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint16_as_float16, simde_float16, uint16_t) + +#define SIMDE_NANHF simde_uint16_as_float16(0x7E00) +#define SIMDE_INFINITYHF simde_uint16_as_float16(0x7C00) + +/* Conversion -- convert between single-precision and half-precision + * floats. */ + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float16 +simde_float16_from_float32 (simde_float32 value) { + simde_float16 res; + + #if \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FLOAT16) || \ + (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16) + res = HEDLEY_STATIC_CAST(simde_float16, value); + #elif (SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16_NO_ABI) + res.value = HEDLEY_STATIC_CAST(__fp16, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint32_t f32u = simde_float32_as_uint32(value); + static const uint32_t f32u_infty = UINT32_C(255) << 23; + static const uint32_t f16u_max = (UINT32_C(127) + UINT32_C(16)) << 23; + static const uint32_t denorm_magic = + ((UINT32_C(127) - UINT32_C(15)) + (UINT32_C(23) - UINT32_C(10)) + UINT32_C(1)) << 23; + uint16_t f16u; + + uint32_t sign = f32u & (UINT32_C(1) << 31); + f32u ^= sign; + + /* NOTE all the integer compares in this function cast the operands + * to signed values to help compilers vectorize to SSE2, which lacks + * unsigned comparison instructions. This is fine since all + * operands are below 0x80000000 (we clear the sign bit). */ + + if (f32u > f16u_max) { /* result is Inf or NaN (all exponent bits set) */ + f16u = (f32u > f32u_infty) ? UINT32_C(0x7e00) : UINT32_C(0x7c00); /* NaN->qNaN and Inf->Inf */ + } else { /* (De)normalized number or zero */ + if (f32u < (UINT32_C(113) << 23)) { /* resulting FP16 is subnormal or zero */ + /* use a magic value to align our 10 mantissa bits at the bottom of + * the float. as long as FP addition is round-to-nearest-even this + * just works. */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) + simde_uint32_as_float32(denorm_magic)); + + /* and one integer subtract of the bias later, we have our final float! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u - denorm_magic); + } else { + uint32_t mant_odd = (f32u >> 13) & 1; + + /* update exponent, rounding bias part 1 */ + f32u += (HEDLEY_STATIC_CAST(uint32_t, 15 - 127) << 23) + UINT32_C(0xfff); + /* rounding bias part 2 */ + f32u += mant_odd; + /* take the bits! */ + f16u = HEDLEY_STATIC_CAST(uint16_t, f32u >> 13); + } + } + + f16u |= sign >> 16; + res = simde_uint16_as_float16(f16u); + #endif + + return res; +} + +static HEDLEY_ALWAYS_INLINE HEDLEY_CONST +simde_float32 +simde_float16_to_float32 (simde_float16 value) { + simde_float32 res; + + #if defined(SIMDE_FLOAT16_FLOAT16) || defined(SIMDE_FLOAT16_FP16) + res = HEDLEY_STATIC_CAST(simde_float32, value); + #else + /* This code is CC0, based heavily on code by Fabian Giesen. */ + uint16_t half = simde_float16_as_uint16(value); + const simde_float32 denorm_magic = simde_uint32_as_float32((UINT32_C(113) << 23)); + const uint32_t shifted_exp = UINT32_C(0x7c00) << 13; /* exponent mask after shift */ + uint32_t f32u; + + f32u = (half & UINT32_C(0x7fff)) << 13; /* exponent/mantissa bits */ + uint32_t exp = shifted_exp & f32u; /* just the exponent */ + f32u += (UINT32_C(127) - UINT32_C(15)) << 23; /* exponent adjust */ + + /* handle exponent special cases */ + if (exp == shifted_exp) /* Inf/NaN? */ + f32u += (UINT32_C(128) - UINT32_C(16)) << 23; /* extra exp adjust */ + else if (exp == 0) { /* Zero/Denormal? */ + f32u += (1) << 23; /* extra exp adjust */ + f32u = simde_float32_as_uint32(simde_uint32_as_float32(f32u) - denorm_magic); /* renormalize */ + } + + f32u |= (half & UINT32_C(0x8000)) << 16; /* sign bit */ + res = simde_uint32_as_float32(f32u); + #endif + + return res; +} + +#ifdef SIMDE_FLOAT16_C + #define SIMDE_FLOAT16_VALUE(value) SIMDE_FLOAT16_C(value) +#else + #define SIMDE_FLOAT16_VALUE(value) simde_float16_from_float32(SIMDE_FLOAT32_C(value)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_FLOAT16_H) */ +/* :: End simde/simde-f16.h :: */ + +#if !defined(SIMDE_X86_F16C_H) +#define SIMDE_X86_F16C_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +#if !defined(SIMDE_X86_PF16C_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_PF16C_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_ph(simde__m128 a, const int imm8) { + simde__m128_private a_ = simde__m128_to_private(a); + simde__m128i_private r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + + HEDLEY_STATIC_CAST(void, imm8); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + r_.neon_f16 = vcombine_f16(vcvt_f16_f32(a_.neon_f32), vdup_n_f16(SIMDE_FLOAT16_C(0.0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_F16C_NATIVE) + #define simde_mm_cvtps_ph(a, imm8) _mm_cvtps_ph(a, imm8) +#endif +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_ph(a, sae) simde_mm_cvtps_ph(a, sae) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtph_ps(simde__m128i a) { + #if defined(SIMDE_X86_F16C_NATIVE) + return _mm_cvtph_ps(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + r_.neon_f32 = vcvt_f32_f16(vget_low_f16(a_.neon_f16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm_cvtph_ps(a) simde_mm_cvtph_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtps_ph(simde__m256 a, const int imm8) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128i_private r_; + + HEDLEY_STATIC_CAST(void, imm8); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.u16[i] = simde_float16_as_uint16(simde_float16_from_float32(a_.f32[i])); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_F16C_NATIVE) + #define simde_mm256_cvtps_ph(a, imm8) _mm256_cvtps_ph(a, imm8) +#endif +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm256_cvtps_ph(a, imm8) simde_mm256_cvtps_ph(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cvtph_ps(simde__m128i a) { + #if defined(SIMDE_X86_F16C_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtph_ps(a); + #elif defined(SIMDE_X86_F16C_NATIVE) + return _mm256_setr_m128( + _mm_cvtph_ps(a), + _mm_cvtph_ps(_mm_castps_si128(_mm_permute_ps(_mm_castsi128_ps(a), 0xee))) + ); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__m256_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_float16_to_float32(simde_uint16_as_float16(a_.u16[i])); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES) + #define _mm256_cvtph_ps(a) simde_mm256_cvtph_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_F16C_H) */ +/* :: End simde/x86/f16c.h :: */ diff --git a/include/simde/x86/fma.h b/include/simde/x86/fma.h new file mode 100644 index 00000000..8ffa763a --- /dev/null +++ b/include/simde/x86/fma.h @@ -0,0 +1,34500 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/fma.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2019 Evan Nemerson + */ + +#if !defined(SIMDE_X86_FMA_H) +#define SIMDE_X86_FMA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_pd(a, b, c); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); + #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_pd + #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_pd(a, b, c); + #else + return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_pd + #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_ps(a, b, c); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ps + #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_ps(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); + } + + return simde__m256_from_private(r_); + #else + return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_ps + #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_sd(a, b, c); + #else + return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_sd + #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_ss(a, b, c); + #else + return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ss + #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_pd(a, b, c); + #else + return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_pd + #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_pd(a, b, c); + #else + return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_pd + #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_ps(a, b, c); + #else + return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_ps + #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_ps(a, b, c); + #else + return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_ps + #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_pd(a, b, c); + #else + return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_pd + #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_pd(a, b, c); + #else + return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_pd + #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_ps(a, b, c); + #else + return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ps + #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_ps(a, b, c); + #else + return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_ps + #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_sd(a, b, c); + #else + return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_sd + #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_ss(a, b, c); + #else + return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ss + #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_pd + #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_pd + #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_ps + #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_ps + #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_pd + #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_pd + #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ps + #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_ps + #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_sd + #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = a_; + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ss + #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_pd + #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_pd + #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ps + #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_ps + #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_sd + #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = simde__m128_to_private(a); + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ss + #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_FMA_H) */ +/* :: End simde/x86/fma.h :: */ diff --git a/include/simde/x86/gfni.h b/include/simde/x86/gfni.h new file mode 100644 index 00000000..7100a491 --- /dev/null +++ b/include/simde/x86/gfni.h @@ -0,0 +1,53764 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/gfni.h :: */ +/* Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Christopher Moore + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_GFNI_H) +#define SIMDE_X86_GFNI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/add.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_ADD_H) +#define SIMDE_X86_AVX512_ADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_TYPES_H) +#define SIMDE_X86_AVX512_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for + * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte + * aligned even if we reduce the alignment requirements of other members. + * + * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the + * to/from private functions will break, and I'm not willing to change their APIs to use + * pointers (which would also require more verbose code on the caller side) just to make + * MSVC happy. + * + * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, + * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to + * fix this without requiring API changes (except transparently through macros), patches + * are welcome. + */ + +# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) +# if defined(SIMDE_X86_AVX512F_NATIVE) +# undef SIMDE_X86_AVX512F_NATIVE +# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") +# endif +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 +# else +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 +# endif + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_ALIGN_TO_16 __m128bh n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_BF16_NATIVE) + SIMDE_ALIGN_TO_32 __m256bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_AVX512_ALIGN __m512bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; + SIMDE_AVX512_ALIGN simde__m128d m128d[4]; + SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; + SIMDE_AVX512_ALIGN simde__m256d m256d[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; + SIMDE_AVX512_ALIGN simde__m128i m128i[4]; + SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; + SIMDE_AVX512_ALIGN simde__m256i m256i[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512i_private; + +/* Intel uses the same header (immintrin.h) for everything AVX and + * later. If native aliases are enabled, and the machine has native + * support for AVX imintrin.h will already have been included, which + * means simde__m512* will already have been defined. So, even + * if the machine doesn't support AVX512F we need to use the native + * type; it has already been defined. + * + * However, we also can't just assume that including immintrin.h does + * actually define these. It could be a compiler which supports AVX + * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we + * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, + * so we assume that if it's present AVX-512F has already been + * declared. + * + * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC + * uses the preprocessor to define all the _MM_CMPINT_* members, + * in most compilers they are simply normal enum members. However, + * all compilers I've looked at use an object-like macro for + * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT + * is included in case a compiler does the reverse, though I haven't + * run into one which does. + * + * As for the ICC check, unlike other compilers, merely using the + * AVX-512 types causes ICC to generate AVX-512 instructions. */ +#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m512 simde__m512; + typedef __m512i simde__m512i; + typedef __m512d simde__m512d; + + typedef __mmask8 simde__mmask8; + typedef __mmask16 simde__mmask16; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m512_private simde__m512; + typedef simde__m512i_private simde__m512i; + typedef simde__m512d_private simde__m512d; + #endif + + typedef uint8_t simde__mmask8; + typedef uint16_t simde__mmask16; +#endif + +#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m128bh simde__m128bh; + typedef __m256bh simde__m256bh; + typedef __m512bh simde__m512bh; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m128bh_private simde__m128bh; + typedef simde__m256bh_private simde__m256bh; + typedef simde__m512bh_private simde__m512bh; + #endif +#endif + +/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is + * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang + * both are in avx512bwintrin.h), not AVX-512F. However, we don't have + * a good (not-compiler-specific) way to detect if these headers have + * been included. In compilers which support AVX-512F but not + * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) + * won't exist. + * + * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t + * in all compilers, so it's safe to use these instead of typedefs to + * __mmask{16,32}. If you run into a problem with this please file an + * issue and we'll try to figure out a work-around. */ +typedef uint32_t simde__mmask32; +typedef uint64_t simde__mmask64; +#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef uint32_t __mmask32; + #else + #define __mmask32 uint32_t; + #endif +#endif +#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + #if defined(HEDLEY_GCC_VERSION) + typedef unsigned long long __mmask64; + #else + typedef uint64_t __mmask64; + #endif + #else + #define __mmask64 uint64_t; + #endif +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m512 __m512; + typedef simde__m512i __m512i; + typedef simde__m512d __m512d; + #else + #define __m512 simde__m512 + #define __m512i simde__m512i + #define __m512d simde__m512d + #endif +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m128bh __m128bh; + typedef simde__m256bh __m256bh; + typedef simde__m512bh __m512bh; + #else + #define __m128bh simde__m128bh + #define __m256bh simde__m256bh + #define __m512bh simde__m512bh + #endif +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); +#endif + +#define SIMDE_MM_CMPINT_EQ 0 +#define SIMDE_MM_CMPINT_LT 1 +#define SIMDE_MM_CMPINT_LE 2 +#define SIMDE_MM_CMPINT_FALSE 3 +#define SIMDE_MM_CMPINT_NE 4 +#define SIMDE_MM_CMPINT_NLT 5 +#define SIMDE_MM_CMPINT_NLE 6 +#define SIMDE_MM_CMPINT_TRUE 7 +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) +#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ +#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT +#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE +#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE +#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE +#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT +#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE +#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh +simde__m128bh_from_private(simde__m128bh_private v) { + simde__m128bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh_private +simde__m128bh_to_private(simde__m128bh v) { + simde__m128bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh +simde__m256bh_from_private(simde__m256bh_private v) { + simde__m256bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh_private +simde__m256bh_to_private(simde__m256bh v) { + simde__m256bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh +simde__m512bh_from_private(simde__m512bh_private v) { + simde__m512bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh_private +simde__m512bh_to_private(simde__m512bh v) { + simde__m512bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde__m512_from_private(simde__m512_private v) { + simde__m512 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512_private +simde__m512_to_private(simde__m512 v) { + simde__m512_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde__m512i_from_private(simde__m512i_private v) { + simde__m512i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i_private +simde__m512i_to_private(simde__m512i v) { + simde__m512i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde__m512d_from_private(simde__m512d_private v) { + simde__m512d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d_private +simde__m512d_to_private(simde__m512d v) { + simde__m512d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ +/* :: End simde/x86/avx512/types.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2019-2020 Michael R. Crusoe + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX2_H) +#define SIMDE_X86_AVX2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi8 + #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi16 + #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi32(simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi32 + #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi8 + #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi16 + #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi16(a, b); + #else + return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi16 + #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi32 + #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi32(a, b); + #else + return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi32 + #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi64 + #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm256_setzero_si256(); + + for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.m128i_private[h].i8[i] = 0; + } else if (srcpos > 15) { + r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; + } else { + r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; + } + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) +# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_alignr_epi8(a, b, count) \ + simde_mm256_set_m128i( \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_alignr_epi8 + #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_and_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_si256 + #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_andnot_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_si256 + #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi8 + #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi16 + #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadds_epi16(a, b); + #else + return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadds_epi16 + #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu8 + #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu16 + #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu8 + #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu16 + #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) +# define simde_mm_blend_epi32(a, b, imm8) \ + simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi32 + #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) +#elif defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi16(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi16 + #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi32(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi32 + #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_blendv_epi8(a, b, mask); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + mask_ = simde__m256i_to_private(mask); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(mask_.i8) tmp = mask_.i8 >> 7; + r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + int8_t tmp = mask_.i8[i] >> 7; + r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_epi8 + #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastb_epi8(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastb_epi8 + #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastb_epi8(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastb_epi8 + #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastw_epi16(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastw_epi16 + #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastw_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastw_epi16 + #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastd_epi32(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastd_epi32 + #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastd_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastd_epi32 + #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastq_epi64(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastq_epi64 + #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastq_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastq_epi64 + #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastss_ps(a); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_shuffle_ps(a, a, 0); + #else + simde__m128_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastss_ps + #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastss_ps(a); + #else + simde__m256_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + __m128 tmp = _mm_permute_ps(a_.n, 0); + r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); + #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) + r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastss_ps + #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_broadcastsd_pd (simde__m128d a) { + return simde_mm_movedup_pd(a); +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastsd_pd + #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastsd_pd(a); + #else + simde__m256d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsd_pd + #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) + return _mm256_broadcastsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = a_; + r_.m128i_private[1] = a_; + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = a_.i64[1]; + r_.i64[2] = a_.i64[0]; + r_.i64[3] = a_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsi128_si256 + #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) + #undef _mm_broadcastsi128_si256 + #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i - imm8; + if(i >= (ssize/2)) { + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i + imm8; + if(i < (ssize/2)) { + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi8 + #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi16 + #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi32 + #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi64 + #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi8 + #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 > b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi16 + #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi32 + #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi64 + #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi16 + #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi32 + #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi64 + #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi32 + #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi64 + #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_epi64 + #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi16 + #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi32 + #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi64 + #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi32 + #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi64 + #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu32_epi64 + #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi8 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31){ + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i8[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi8 + #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi16 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i16[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi16 + #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extracti128_si256 + #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi32 + #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi32 + #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi32 + #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi32 + #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi32 + #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi32 + #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi32 + #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi32 + #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi64 + #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi64 + #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256i_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi64 + #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi64 + #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi64 + #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi64 + #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi64 + #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi64 + #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_ps + #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_ps + #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_ps + #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + src_ = simde__m256_to_private(src), + mask_ = simde__m256_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_ps + #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_ps + #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_ps + #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_ps + #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_ps + #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_pd + #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_pd + #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_pd + #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_pd + #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_pd + #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_pd + #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_pd + #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_pd + #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[ imm8 & 1 ] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_inserti128_si256 + #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_madd_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); + SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); + + SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); + SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); + product = a32x16 * b32x16; + + even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); + odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); + + r_.i32 = even + odd; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_madd_epi16 + #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maddubs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maddubs_epi16 + #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi32(mem_addr, mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi32 + #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi32(mem_addr, mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi32 + #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi64 + #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi64 + #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi32(mem_addr, mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi32 + #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi32(mem_addr, mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi32 + #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi64 + #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi64 + #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_max_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi8 + #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu8 + #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu16 + #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu32 + #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi16 + #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi32 + #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_min_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi8 + #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi16 + #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi32 + #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu8 + #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu16 + #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu32 + #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_movemask_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_movemask_epi8(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + uint32_t r = 0; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); + } + #else + r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); + } + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_epi8 + #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + const int a_offset1 = imm8 & 4; + const int b_offset1 = (imm8 & 3) << 2; + const int a_offset2 = (imm8 >> 3) & 4; + const int b_offset2 = ((imm8 >> 3) & 3) << 2; + + #if defined(simde_math_abs) + const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; + for (int i = 0 ; i < halfway_point ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); + r_.u16[halfway_point + i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mpsadbw_epu8 + #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhrs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi16 + #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi32(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi32 + #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_si256 + #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi16 + #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi32(a, b); + #else + simde__m256i_private + r_, + v_[] = { + simde__m256i_to_private(a), + simde__m256i_to_private(b) + }; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi32 + #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi16 + #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi32 + #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2x128_si256 + #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; + r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; + r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; + r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_epi64 + #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; + r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; + r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; + r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_pd + #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 7]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_epi32 + #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); + #else + return _mm256_permutevar8x32_ps(a, idx); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[idx_.i32[i] & 7]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_ps + #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sad_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sad_epu8 + #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_shuffle_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { + r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; + r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi8 + #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_shuffle_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 32, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi32 + #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4, \ + 8, 9, 10, 11, \ + ((((imm8) ) & 3) + 8 + 4), \ + ((((imm8) >> 2) & 3) + 8 + 4), \ + ((((imm8) >> 4) & 3) + 8 + 4), \ + ((((imm8) >> 6) & 3) + 8 + 4) \ + ) }); })) +#else +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflehi_epi16 + #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7, \ + ((((imm8) ) & 3) + 8), \ + ((((imm8) >> 2) & 3) + 8), \ + ((((imm8) >> 4) & 3) + 8), \ + ((((imm8) >> 6) & 3) + 8), \ + 12, 13, 14, 15) }); })) +#else +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflelo_epi16 + #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi8 + #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi16 + #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi32 + #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi16 + #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi32 + #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi64 + #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* Note: There is no consistency in how compilers handle values outside of + the expected range, hence the discrepancy between what we allow and what + Intel specifies. Some compilers will return 0, others seem to just mask + off everything outside of the range. */ + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { + r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi16 + #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { + r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi32 + #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi64 + #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) - imm8; + r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_si256 + #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); + r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi32 + #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi32 + #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); + r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi64 + #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi64 + #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi16 + #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi32 + #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi16 + #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi32 + #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_srav_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); + r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srav_epi32 + #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srav_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + count_ = simde__m256i_to_private(count); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); + r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + if (shift > 31) shift = 31; + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srav_epi32 + #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi16 + #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi32 + #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi64 + #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + if (imm8 > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { + r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); + } + #else + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi16 + #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { + r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi32 + #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi64 + #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = imm8 + HEDLEY_STATIC_CAST(int, i); + r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_si256 + #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi32 + #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi32 + #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi64 + #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi64 + #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi8 + #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi16 + #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi16(a, b); + #else + return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi16 + #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi32 + #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi32(a, b); + #else + return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi32 + #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi64 + #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi8 + #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi16 + #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsubs_epi16(a, b); + #else + return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsubs_epi16 + #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu8 + #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu16 + #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_x_mm256_test_all_ones (simde__m256i a) { + simde__m256i_private a_ = simde__m256i_to_private(a); + int r; + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi8 + #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi16 + #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 0, 8, 1, 9, 4, 12, 5, 13); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi32 + #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi64 + #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi8 + #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 4, 20, 5, 21, 6, 22, 7, 23, + 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi16 + #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 2, 10, 3, 11, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi32 + #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi64 + #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_xor_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_si256 + #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX2_H) */ +/* :: End simde/x86/avx2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_MOV_H) +#define SIMDE_X86_AVX512_MOV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CAST_H) +#define SIMDE_X86_AVX512_CAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castpd_ps (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_ps + #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castpd_si512 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_si512 + #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castps_pd (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_pd + #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castps_si512 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_si512 + #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castsi512_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_ps + #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castsi512_pd (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_pd + #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd128_pd512 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd128_pd512(a); + #else + simde__m512d_private r_; + r_.m128d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd128_pd512 + #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd256_pd512 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd256_pd512(a); + #else + simde__m512d_private r_; + r_.m256d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd256_pd512 + #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm512_castpd512_pd128 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd128(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd128 + #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_castpd512_pd256 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd256(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m256d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd256 + #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps128_ps512 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps128_ps512(a); + #else + simde__m512_private r_; + r_.m128[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps128_ps512 + #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps256_ps512 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps256_ps512(a); + #else + simde__m512_private r_; + r_.m256[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps256_ps512 + #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_castps512_ps128 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps128(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps128 + #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_castps512_ps256 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps256(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m256[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps256 + #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi128_si512 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi128_si512(a); + #else + simde__m512i_private r_; + r_.m128i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi128_si512 + #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi256_si512 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi256_si512(a); + #else + simde__m512i_private r_; + r_.m256i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi256_si512 + #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_castsi512_si128 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si128(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si128 + #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_castsi512_si256 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si256(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m256i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si256 + #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ +/* :: End simde/x86/avx512/cast.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SET_H) +#define SIMDE_X86_AVX512_SET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/load.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LOAD_H) +#define SIMDE_X86_AVX512_LOAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_load_pd (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); + #else + simde__m512d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_pd + #define _mm512_load_pd(a) simde_mm512_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_load_ps (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); + #else + simde__m512 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_ps + #define _mm512_load_ps(a) simde_mm512_load_ps(a) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_load_si512 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); + #else + simde__m512i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); + return r; + #endif +} +#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_epi8 + #undef _mm512_load_epi16 + #undef _mm512_load_epi32 + #undef _mm512_load_epi64 + #undef _mm512_load_si512 + #define _mm512_load_si512(a) simde_mm512_load_si512(a) + #define _mm512_load_epi8(a) simde_mm512_load_si512(a) + #define _mm512_load_epi16(a) simde_mm512_load_si512(a) + #define _mm512_load_epi32(a) simde_mm512_load_si512(a) + #define _mm512_load_epi64(a) simde_mm512_load_si512(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ +/* :: End simde/x86/avx512/load.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, + int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + simde__m512i_private r_; + + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + r_.i16[16] = e16; + r_.i16[17] = e17; + r_.i16[18] = e18; + r_.i16[19] = e19; + r_.i16[20] = e20; + r_.i16[21] = e21; + r_.i16[22] = e22; + r_.i16[23] = e23; + r_.i16[24] = e24; + r_.i16[25] = e25; + r_.i16[26] = e26; + r_.i16[27] = e27; + r_.i16[28] = e28; + r_.i16[29] = e29; + r_.i16[30] = e30; + r_.i16[31] = e31; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi16 + #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + simde__m512i_private r_; + + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + r_.i32[ 8] = e8; + r_.i32[ 9] = e9; + r_.i32[10] = e10; + r_.i32[11] = e11; + r_.i32[12] = e12; + r_.i32[13] = e13; + r_.i32[14] = e14; + r_.i32[15] = e15; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi32 + #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + simde__m512i_private r_; + + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + r_.i64[4] = e4; + r_.i64[5] = e5; + r_.i64[6] = e6; + r_.i64[7] = e7; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi64 + #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, + uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, + uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, + uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, + uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m512i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + r_.u8[32] = e32; + r_.u8[33] = e33; + r_.u8[34] = e34; + r_.u8[35] = e35; + r_.u8[36] = e36; + r_.u8[37] = e37; + r_.u8[38] = e38; + r_.u8[39] = e39; + r_.u8[40] = e40; + r_.u8[41] = e41; + r_.u8[42] = e42; + r_.u8[43] = e43; + r_.u8[44] = e44; + r_.u8[45] = e45; + r_.u8[46] = e46; + r_.u8[47] = e47; + r_.u8[48] = e48; + r_.u8[49] = e49; + r_.u8[50] = e50; + r_.u8[51] = e51; + r_.u8[52] = e52; + r_.u8[53] = e53; + r_.u8[54] = e54; + r_.u8[55] = e55; + r_.u8[56] = e56; + r_.u8[57] = e57; + r_.u8[58] = e58; + r_.u8[59] = e59; + r_.u8[60] = e60; + r_.u8[61] = e61; + r_.u8[62] = e62; + r_.u8[63] = e63; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, + uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, + uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m512i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + r_.u16[16] = e16; + r_.u16[17] = e17; + r_.u16[18] = e18; + r_.u16[19] = e19; + r_.u16[20] = e20; + r_.u16[21] = e21; + r_.u16[22] = e22; + r_.u16[23] = e23; + r_.u16[24] = e24; + r_.u16[25] = e25; + r_.u16[26] = e26; + r_.u16[27] = e27; + r_.u16[28] = e28; + r_.u16[29] = e29; + r_.u16[30] = e30; + r_.u16[31] = e31; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, + uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + simde__m512i_private r_; + + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + r_.u32[ 8] = e8; + r_.u32[ 9] = e9; + r_.u32[10] = e10; + r_.u32[11] = e11; + r_.u32[12] = e12; + r_.u32[13] = e13; + r_.u32[14] = e14; + r_.u32[15] = e15; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m512i_private r_; + + r_.u64[ 0] = e0; + r_.u64[ 1] = e1; + r_.u64[ 2] = e2; + r_.u64[ 3] = e3; + r_.u64[ 4] = e4; + r_.u64[ 5] = e5; + r_.u64[ 6] = e6; + r_.u64[ 7] = e7; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, + int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, + int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, + int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) + return _mm512_set_epi8( + e63, e62, e61, e60, e59, e58, e57, e56, + e55, e54, e53, e52, e51, e50, e49, e48, + e47, e46, e45, e44, e43, e42, e41, e40, + e39, e38, e37, e36, e35, e34, e33, e32, + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0 + ); + #else + simde__m512i_private r_; + + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + r_.i8[32] = e32; + r_.i8[33] = e33; + r_.i8[34] = e34; + r_.i8[35] = e35; + r_.i8[36] = e36; + r_.i8[37] = e37; + r_.i8[38] = e38; + r_.i8[39] = e39; + r_.i8[40] = e40; + r_.i8[41] = e41; + r_.i8[42] = e42; + r_.i8[43] = e43; + r_.i8[44] = e44; + r_.i8[45] = e45; + r_.i8[46] = e46; + r_.i8[47] = e47; + r_.i8[48] = e48; + r_.i8[49] = e49; + r_.i8[50] = e50; + r_.i8[51] = e51; + r_.i8[52] = e52; + r_.i8[53] = e53; + r_.i8[54] = e54; + r_.i8[55] = e55; + r_.i8[56] = e56; + r_.i8[57] = e57; + r_.i8[58] = e58; + r_.i8[59] = e59; + r_.i8[60] = e60; + r_.i8[61] = e61; + r_.i8[62] = e62; + r_.i8[63] = e63; + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi8 + #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_16(simde__m128i) simde__m128i v[] = { d, c, b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m128i[0] = d; + r_.m128i[1] = c; + r_.m128i[2] = b; + r_.m128i[3] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_32(simde__m256i) simde__m256i v[] = { b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m256i[0] = b; + r_.m256i[1] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, + simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + simde__m512_private r_; + + r_.f32[ 0] = e0; + r_.f32[ 1] = e1; + r_.f32[ 2] = e2; + r_.f32[ 3] = e3; + r_.f32[ 4] = e4; + r_.f32[ 5] = e5; + r_.f32[ 6] = e6; + r_.f32[ 7] = e7; + r_.f32[ 8] = e8; + r_.f32[ 9] = e9; + r_.f32[10] = e10; + r_.f32[11] = e11; + r_.f32[12] = e12; + r_.f32[13] = e13; + r_.f32[14] = e14; + r_.f32[15] = e15; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_ps + #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + simde__m512d_private r_; + + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + r_.f64[4] = e4; + r_.f64[5] = e5; + r_.f64[6] = e6; + r_.f64[7] = e7; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_pd + #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ +/* :: End simde/x86/avx512/set.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi8(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi8 + #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi16(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi16 + #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi32(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi32 + #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi64(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi64 + #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_pd(src, k, a); + #else + return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_pd + #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_ps(src, k, a); + #else + return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_ps + #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi8(src, k, a); + #else + simde__m256i_private + r_, + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi8 + #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi16(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi16 + #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi32(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi32 + #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi64(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi64 + #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_pd(src, k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_pd + #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_ps(src, k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_ps + #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi8(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi8 + #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi16(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi16 + #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi32(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi32 + #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi64(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi64 + #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_pd(src, k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_pd + #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_ps(src, k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_ps + #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi8(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi8 + #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi16(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi16 + #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi32(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi32 + #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi64(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi64 + #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_pd(k, a); + #else + return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_pd + #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_ps(k, a); + #else + return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_ps + #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi8(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi8 + #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi16(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi16 + #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi32(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi32 + #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi64(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi64 + #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_pd(k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_pd + #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_ps(k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_ps + #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi8(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi8 + #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi16(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi16 + #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi32(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi32 + #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi64(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi64 + #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_pd(k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_pd + #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_ps(k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_ps + #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ +/* :: End simde/x86/avx512/mov.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi8 + #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi8 + #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi16 + #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi16 + #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi32 + #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi32 + #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi64 + #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi64 + #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_mask_add_ss(src, k, a, b); + #elif 1 + simde__m128_private + src_ = simde__m128_to_private(src), + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_ss + #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_maskz_add_ss(k, a, b); + #elif 1 + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_ss + #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_add_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi16 + #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_add_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi16 + #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi32 + #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi32 + #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi64 + #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi64 + #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi8 + #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi8 + #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi8 + #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi16 + #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi16 + #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi16 + #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_ARM_SVE_NATIVE) + const size_t n = sizeof(a_.i32) / sizeof(a_.i32[0]); + size_t i = 0; + svbool_t pg = svwhilelt_b32(i, n); + do { + svint32_t + va = svld1_s32(pg, &(a_.i32[i])), + vb = svld1_s32(pg, &(b_.i32[i])); + svst1_s32(pg, &(r_.i32[i]), svadd_s32_x(pg, va, vb)); + i += svcntw(); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi32 + #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi32 + #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi32 + #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi64 + #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi64 + #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi64 + #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_add_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_ps + #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_ps + #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_ps + #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_add_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_pd + #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_pd + #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_pd + #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ +/* :: End simde/x86/avx512/add.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_AND_H) +#define SIMDE_X86_AVX512_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_and_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_pd + #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_and_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_ps + #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_ps + #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_ps + #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_pd + #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_pd + #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi32 + #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi32 + #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi32 + #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi64 + #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi64 + #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi64 + #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_si512 + #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ +/* :: End simde/x86/avx512/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/broadcast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_BROADCAST_H) +#define SIMDE_X86_AVX512_BROADCAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SET1_H) +#define SIMDE_X86_AVX512_SET1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi8(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi8 + #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi8 + #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi8 + #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi16(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi16 + #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi16 + #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi16 + #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi32(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi32 + #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi32 + #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi32 + #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi64 (int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi64(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi64 + #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi64 + #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi64 + #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu8 (uint8_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu16 (uint16_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu32 (uint32_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu64 (uint64_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_ps(a); + #else + simde__m512_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_ps + #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_pd(a); + #else + simde__m512d_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_pd + #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ +/* :: End simde/x86/avx512/set1.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_f32x2 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_broadcast_f32x2(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f32x2 + #define _mm256_broadcast_f32x2(a) simde_mm256_broadcast_f32x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_broadcast_f32x2(simde__m256 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_mask_broadcast_f32x2(src, k, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f32x2 + #define _mm256_mask_broadcast_f32x2(src, k, a) simde_mm256_mask_broadcast_f32x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_broadcast_f32x2(simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_maskz_broadcast_f32x2(k, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f32x2 + #define _mm256_maskz_broadcast_f32x2(k, a) simde_mm256_maskz_broadcast_f32x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x2 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f32x2(a); + #else + simde__m512_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x2 + #define _mm512_broadcast_f32x2(a) simde_mm512_broadcast_f32x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x2(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f32x2(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x2 + #define _mm512_mask_broadcast_f32x2(src, k, a) simde_mm512_mask_broadcast_f32x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x2(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f32x2(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x2 + #define _mm512_maskz_broadcast_f32x2(k, a) simde_mm512_maskz_broadcast_f32x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x8 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f32x8(a); + #else + simde__m512_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=8) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + r_.f32[i + 2] = a_.f32[2]; + r_.f32[i + 3] = a_.f32[3]; + r_.f32[i + 4] = a_.f32[4]; + r_.f32[i + 5] = a_.f32[5]; + r_.f32[i + 6] = a_.f32[6]; + r_.f32[i + 7] = a_.f32[7]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x8 + #define _mm512_broadcast_f32x8(a) simde_mm512_broadcast_f32x8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x8(simde__m512 src, simde__mmask16 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f32x8(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x8 + #define _mm512_mask_broadcast_f32x8(src, k, a) simde_mm512_mask_broadcast_f32x8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x8(simde__mmask16 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f32x8(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x8 + #define _mm512_maskz_broadcast_f32x8(k, a) simde_mm512_maskz_broadcast_f32x8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcast_f64x2 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_broadcast_f64x2(a); + #else + simde__m512d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[0]; + r_.f64[i + 1] = a_.f64[1]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f64x2 + #define _mm512_broadcast_f64x2(a) simde_mm512_broadcast_f64x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcast_f64x2(simde__m512d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_broadcast_f64x2(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f64x2 + #define _mm512_mask_broadcast_f64x2(src, k, a) simde_mm512_mask_broadcast_f64x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_broadcast_f64x2(k, a); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f64x2 + #define _mm512_maskz_broadcast_f64x2(k, a) simde_mm512_maskz_broadcast_f64x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_f32x4 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_broadcast_f32x4(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = a_; + r_.m128_private[1] = a_; + #elif defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 1, 2, 3, 0, 1, 2, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 4) { + r_.f32[ i ] = a_.f32[0]; + r_.f32[i + 1] = a_.f32[1]; + r_.f32[i + 2] = a_.f32[2]; + r_.f32[i + 3] = a_.f32[3]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f32x4 + #define _mm256_broadcast_f32x4(a) simde_mm256_broadcast_f32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_broadcast_f32x4(simde__m256 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_broadcast_f32x4(src, k, a); + #else + return simde_mm256_mask_mov_ps(src, k, simde_mm256_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f32x4 + #define _mm256_mask_broadcast_f32x4(src, k, a) simde_mm256_mask_broadcast_f32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_broadcast_f32x4(simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_broadcast_f32x4(k, a); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f32x4 + #define _mm256_maskz_broadcast_f32x4(k, a) simde_mm256_maskz_broadcast_f32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_f64x2 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_broadcast_f64x2(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + /* I don't have a bug # for this, but when compiled with clang-10 without optimization on aarch64 + * the __builtin_shufflevector version doesn't work correctly. clang 9 and 11 aren't a problem */ + #if defined(SIMDE_VECTOR_SUBSCRIPT) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && \ + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION < 100000 || SIMDE_DETECT_CLANG_VERSION > 100000)) + r_.f64 = __builtin_shufflevector(a_.f64, a_.f64, 0, 1, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[0]; + r_.f64[i + 1] = a_.f64[1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_f64x2 + #define _mm256_broadcast_f64x2(a) simde_mm256_broadcast_f64x2(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_broadcast_f64x2(simde__m256d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_mask_broadcast_f64x2(src, k, a); + #else + return simde_mm256_mask_mov_pd(src, k, simde_mm256_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_broadcast_f64x2 + #define _mm256_mask_broadcast_f64x2(src, k, a) simde_mm256_mask_broadcast_f64x2(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_broadcast_f64x2(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_maskz_broadcast_f64x2(k, a); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_broadcast_f64x2(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_broadcast_f64x2 + #define _mm256_maskz_broadcast_f64x2(k, a) simde_mm256_maskz_broadcast_f64x2(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcast_f32x4 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_f32x4(a); + #else + simde__m512_private r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256[1] = r_.m256[0] = simde_mm256_castsi256_ps(simde_mm256_broadcastsi128_si256(simde_mm_castps_si128(a))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = a; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f32x4 + #define _mm512_broadcast_f32x4(a) simde_mm512_broadcast_f32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcast_f32x4(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_f32x4(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f32x4 + #define _mm512_mask_broadcast_f32x4(src, k, a) simde_mm512_mask_broadcast_f32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcast_f32x4(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_f32x4(k, a); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_broadcast_f32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f32x4 + #define _mm512_maskz_broadcast_f32x4(k, a) simde_mm512_maskz_broadcast_f32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcast_f64x4 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_f64x4(a); + #else + simde__m512d_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = a; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_f64x4 + #define _mm512_broadcast_f64x4(a) simde_mm512_broadcast_f64x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcast_f64x4(simde__m512d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_f64x4(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_broadcast_f64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_f64x4 + #define _mm512_mask_broadcast_f64x4(src, k, a) simde_mm512_mask_broadcast_f64x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcast_f64x4(simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_f64x4(k, a); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_broadcast_f64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_f64x4 + #define _mm512_maskz_broadcast_f64x4(k, a) simde_mm512_maskz_broadcast_f64x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcast_i32x4 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_i32x4(a); + #else + simde__m512i_private r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[1] = r_.m256i[0] = simde_mm256_broadcastsi128_si256(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[3] = r_.m128i[2] = r_.m128i[1] = r_.m128i[0] = a; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = a; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_i32x4 + #define _mm512_broadcast_i32x4(a) simde_mm512_broadcast_i32x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcast_i32x4(simde__m512i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_i32x4(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcast_i32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_i32x4 + #define _mm512_mask_broadcast_i32x4(src, k, a) simde_mm512_mask_broadcast_i32x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcast_i32x4(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_i32x4(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcast_i32x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_i32x4 + #define _mm512_maskz_broadcast_i32x4(k, a) simde_mm512_maskz_broadcast_i32x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcast_i64x4 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcast_i64x4(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcast_i64x4 + #define _mm512_broadcast_i64x4(a) simde_mm512_broadcast_i64x4(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcast_i64x4(simde__m512i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcast_i64x4(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcast_i64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcast_i64x4 + #define _mm512_mask_broadcast_i64x4(src, k, a) simde_mm512_mask_broadcast_i64x4(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcast_i64x4(simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcast_i64x4(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcast_i64x4(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcast_i64x4 + #define _mm512_maskz_broadcast_i64x4(k, a) simde_mm512_maskz_broadcast_i64x4(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastd_epi32(a); + #else + simde__m512i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastd_epi32 + #define _mm512_broadcastd_epi32(a) simde_mm512_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastd_epi32(simde__m512i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastd_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_broadcastd_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastd_epi32 + #define _mm512_mask_broadcastd_epi32(src, k, a) simde_mm512_mask_broadcastd_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastd_epi32(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastd_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_broadcastd_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastd_epi32 + #define _mm512_maskz_broadcastd_epi32(k, a) simde_mm512_maskz_broadcastd_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastq_epi64(a); + #else + simde__m512i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastq_epi64 + #define _mm512_broadcastq_epi64(a) simde_mm512_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastq_epi64(simde__m512i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastq_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_broadcastq_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastq_epi64 + #define _mm512_mask_broadcastq_epi64(src, k, a) simde_mm512_mask_broadcastq_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastq_epi64(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastq_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_broadcastq_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastq_epi64 + #define _mm512_maskz_broadcastq_epi64(k, a) simde_mm512_maskz_broadcastq_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastss_ps(a); + #else + simde__m512_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastss_ps + #define _mm512_broadcastss_ps(a) simde_mm512_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_broadcastss_ps(simde__m512 src, simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastss_ps(src, k, a); + #else + simde__m512_private + src_ = simde__m512_to_private(src), + r_; + simde__m128_private + a_ = simde__m128_to_private(a); + + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : src_.f32[i]; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastss_ps + #define _mm512_mask_broadcastss_ps(src, k, a) simde_mm512_mask_broadcastss_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_broadcastss_ps(simde__mmask16 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastss_ps(k, a); + #else + simde__m512_private + r_; + simde__m128_private + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((k >> i) & 1) ? a_.f32[0] : INT32_C(0); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastss_ps + #define _mm512_maskz_broadcastss_ps(k, a) simde_mm512_maskz_broadcastss_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_broadcastsd_pd(a); + #else + simde__m512d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastsd_pd + #define _mm512_broadcastsd_pd(a) simde_mm512_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_broadcastsd_pd(simde__m512d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_broadcastsd_pd(src, k, a); + #else + simde__m512d_private + src_ = simde__m512d_to_private(src), + r_; + simde__m128d_private + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : src_.f64[i]; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastsd_pd + #define _mm512_mask_broadcastsd_pd(src, k, a) simde_mm512_mask_broadcastsd_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_broadcastsd_pd(simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_broadcastsd_pd(k, a); + #else + simde__m512d_private + r_; + simde__m128d_private + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((k >> i) & 1) ? a_.f64[0] : INT64_C(0); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastsd_pd + #define _mm512_maskz_broadcastsd_pd(k, a) simde_mm512_maskz_broadcastsd_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_broadcastb_epi8(a); + #else + simde__m128i_private a_= simde__m128i_to_private(a); + return simde_mm512_set1_epi8(a_.i8[0]); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastb_epi8 + #define _mm512_broadcastb_epi8(a) simde_mm512_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_broadcastb_epi8 (simde__m512i src, simde__mmask64 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_broadcastb_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_broadcastb_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_broadcastb_epi8 + #define _mm512_mask_broadcastb_epi8(src, k, a) simde_mm512_mask_broadcastb_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_broadcastb_epi8 (simde__mmask64 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_broadcastb_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_broadcastb_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_broadcastb_epi8 + #define _mm512_maskz_broadcastb_epi8(k, a) simde_mm512_maskz_broadcastb_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_broadcastw_epi16(a); + #else + simde__m128i_private a_= simde__m128i_to_private(a); + return simde_mm512_set1_epi16(a_.i16[0]); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_broadcastw_epi16 + #define _mm512_broadcastw_epi16(a) simde_mm512_broadcastw_epi16(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_BROADCAST_H) */ +/* :: End simde/x86/avx512/broadcast.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpeq.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_CMPEQ_H) +#define SIMDE_X86_AVX512_CMPEQ_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov_mask.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) +#define SIMDE_X86_AVX512_MOV_MASK_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_movepi8_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi8_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask16 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi8_mask + #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi16_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi16_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* There is no 32-bit _mm_movemask_* function, so we use + * _mm_movemask_epi8 then extract the odd bits. */ + uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); + r = ( (r >> 1)) & UINT32_C(0x5555); + r = (r | (r >> 1)) & UINT32_C(0x3333); + r = (r | (r >> 2)) & UINT32_C(0x0f0f); + r = (r | (r >> 4)) & UINT32_C(0x00ff); + return HEDLEY_STATIC_CAST(simde__mmask8, r); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi16_mask + #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi32_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi32_mask(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi32_mask + #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi64_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi64_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi64_mask + #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_movepi8_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi8_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask32, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi8_mask + #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_movepi16_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi16_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi16_mask + #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi32_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi32_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi32_mask + #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi64_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi64_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi64_mask + #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_movepi8_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi8_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask64 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); + } + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask64, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi8_mask + #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_movepi16_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi16_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi16_mask + #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_movepi32_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi32_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi32_mask + #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_movepi64_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi64_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi64_mask + #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ +/* :: End simde/x86/avx512/mov_mask.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmp.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_CMP_H) +#define SIMDE_X86_AVX512_CMP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setzero.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SETZERO_H) +#define SIMDE_X86_AVX512_SETZERO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setzero_si512(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_si512(); + #else + simde__m512i r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_si512 + #define _mm512_setzero_si512() simde_mm512_setzero_si512() + #undef _mm512_setzero_epi32 + #define _mm512_setzero_epi32() simde_mm512_setzero_si512() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setzero_ps(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_ps(); + #else + return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_ps + #define _mm512_setzero_ps() simde_mm512_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setzero_pd(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_pd(); + #else + return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_pd + #define _mm512_setzero_pd() simde_mm512_setzero_pd() +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ +/* :: End simde/x86/avx512/setzero.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setone.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SETONE_H) +#define SIMDE_X86_AVX512_SETONE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_setone_si512(void) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } + + return simde__m512i_from_private(r_); +} +#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_setone_ps(void) { + return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_setone_pd(void) { + return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ +/* :: End simde/x86/avx512/setone.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512_to_private(simde_mm512_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_ps_mask + #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps_mask + #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps_mask + #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_pd_mask + #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd_mask + #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd_mask + #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + switch (imm8) { + case SIMDE_MM_CMPINT_EQ: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_FALSE: + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); + break; + + + case SIMDE_MM_CMPINT_NE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_TRUE: + r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_epu16_mask + #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) +#else + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmp_epu16_mask +#define _mm512_mask_cmp_epu16_mask(a, b, imm8) simde_mm512_mask_cmp_epu16_mask((a), (b), (imm8)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ +/* :: End simde/x86/avx512/cmp.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpeq_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpeq_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpeq_epi8(a_.m256i[i], b_.m256i[i]))); + r |= HEDLEY_STATIC_CAST(uint64_t, t) << (i * 32); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 == b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] == b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi8_mask + #define _mm512_cmpeq_epi8_mask(a, b) simde_mm512_cmpeq_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpeq_epi8_mask(simde__mmask64 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpeq_epi8_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi8_mask + #define _mm512_mask_cmpeq_epi8_mask(k1, a, b) simde_mm512_mask_cmpeq_epi8_mask((k1), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpeq_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpeq_epi32_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpeq_epi32(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi32_mask + #define _mm512_cmpeq_epi32_mask(a, b) simde_mm512_cmpeq_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpeq_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpeq_epi32_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi32_mask + #define _mm512_mask_cmpeq_epi32_mask(k1, a, b) simde_mm512_mask_cmpeq_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpeq_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpeq_epi64_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpeq_epi64(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epi64_mask + #define _mm512_cmpeq_epi64_mask(a, b) simde_mm512_cmpeq_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpeq_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpeq_epi64_mask(k1, a, b); + #else + return simde_mm512_cmpeq_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epi64_mask + #define _mm512_mask_cmpeq_epi64_mask(k1, a, b) simde_mm512_mask_cmpeq_epi64_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpeq_epu16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpeq_epu16_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask32 r; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.u16), a_.u16 == b_.u16); + r = simde_mm512_movepi16_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r |= (a_.u16[i] == b_.u16[i]) ? (UINT16_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_epu31_mask + #define _mm512_cmpeq_epu32_mask(a, b) simde_mm512_cmpeq_epu32_mask(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpeq_epu16_mask(simde__mmask32 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpeq_epu16_mask(k1, a, b); + #else + return k1 & simde_mm512_cmpeq_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpeq_epu16_mask + #define _mm512_mask_cmpeq_epu16_mask(k1, a, b) simde_mm512_mask_cmpeq_epu16_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpeq_ps_mask (simde__m512 a, simde__m512 b) { + return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_ps_mask + #define _mm512_cmpeq_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_EQ_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpeq_pd_mask (simde__m512d a, simde__m512d b) { + return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpeq_pd_mask + #define _mm512_cmpeq_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_EQ_OQ) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPEQ_H) */ +/* :: End simde/x86/avx512/cmpeq.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpge.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Christopher Moore + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_CMPGE_H) +#define SIMDE_X86_AVX512_CMPGE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/movm.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_MOVM_H) +#define SIMDE_X86_AVX512_MOVM_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi8 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi8(k); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + const simde__m128i zero = simde_mm_setzero_si128(); + const simde__m128i bits = simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80); + const simde__m128i shuffle = simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_shuffle_epi8(r, shuffle); + r = simde_mm_cmpgt_epi8(zero, r); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int8_t pos_data[] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + int8x8_t pos = vld1_s8(pos_data); + r_.neon_i8 = vcombine_s8( + vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k)), pos), 7), + vshr_n_s8(vshl_s8(vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, k >> 8)), pos), 7)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi8 + #define _mm_movm_epi8(k) simde_mm_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi8 (simde__mmask32 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi8(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const simde__m256i zero = simde_mm256_setzero_si256(); + const simde__m256i bits = simde_mm256_broadcastsi128_si256(simde_mm_set_epi16(0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80)); + const simde__m256i shuffle = simde_mm256_broadcastsi128_si256(simde_mm_set_epi8(15, 13, 11, 9, 7, 5, 3, 1, 14, 12, 10, 8, 6, 4, 2, 0)); + simde__m256i r; + + r = simde_mm256_set_m128i(_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k >> 16)), _mm_set1_epi16(HEDLEY_STATIC_CAST(short, k))); + r = simde_mm256_mullo_epi16(r, bits); + r = simde_mm256_shuffle_epi8(r, shuffle); + r = simde_mm256_cmpgt_epi8(zero, r); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k)); + r_.m128i[1] = simde_mm_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi8 + #define _mm256_movm_epi8(k) simde_mm256_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi8 (simde__mmask64 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movm_epi8(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k)); + r_.m256i[1] = simde_mm256_movm_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi8 + #define _mm512_movm_epi8(k) simde_mm512_movm_epi8(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi16 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi16(k); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi16(0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi16(r, 15); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int16_t pos_data[] = { 15, 14, 13, 12, 11, 10, 9, 8 }; + const int16x8_t pos = vld1q_s16(pos_data); + r_.neon_i16 = vshrq_n_s16(vshlq_s16(vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, k)), pos), 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi16 + #define _mm_movm_epi16(k) simde_mm_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi16 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi16(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i bits = _mm256_set_epi16(0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, + 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000, INT16_MIN /* 0x8000 */); + __m256i r; + + r = _mm256_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = _mm256_mullo_epi16(r, bits); + r = _mm256_srai_epi16(r, 15); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k)); + r_.m128i[1] = simde_mm_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi16 + #define _mm256_movm_epi16(k) simde_mm256_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi16 (simde__mmask32 k) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm512_movm_epi16(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k)); + r_.m256i[1] = simde_mm256_movm_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi16 + #define _mm512_movm_epi16(k) simde_mm512_movm_epi16(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi32 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movm_epi32(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i shifts = _mm_set_epi32(28, 29, 30, 31); + __m128i r; + + r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm_sllv_epi32(r, shifts); + r = _mm_srai_epi32(r, 31); + + return r; + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi32(0x10000000, 0x20000000, 0x40000000, INT32_MIN /* 0x80000000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi32(r, 31); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int32_t pos_data[] = { 31, 30, 29, 28 }; + const int32x4_t pos = vld1q_s32(pos_data); + r_.neon_i32 = vshrq_n_s32(vshlq_s32(vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, k)), pos), 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi32 + #define _mm_movm_epi32(k) simde_mm_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi32 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movm_epi32(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i shifts = _mm256_set_epi32(24, 25, 26, 27, 28, 29, 30, 31); + __m256i r; + + r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm256_sllv_epi32(r, shifts); + r = _mm256_srai_epi32(r, 31); + + return r; + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi32(k ); + r_.m128i[1] = simde_mm_movm_epi32(k >> 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi32 + #define _mm256_movm_epi32(k) simde_mm256_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi32 (simde__mmask16 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movm_epi32(k); + #else + simde__m512i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k )); + r_.m256i[1] = simde_mm256_movm_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi32 + #define _mm512_movm_epi32(k) simde_mm512_movm_epi32(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_movm_epi64(k); + /* N.B. CM: These fallbacks may not be faster as there are only two elements */ + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m128i shifts = _mm_set_epi32(30, 30, 31, 31); + __m128i r; + + r = _mm_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm_sllv_epi32(r, shifts); + r = _mm_srai_epi32(r, 31); + + return r; + #elif defined(SIMDE_X86_SSE2_NATIVE) + const simde__m128i bits = simde_mm_set_epi32(0x40000000, 0x40000000, INT32_MIN /* 0x80000000 */, INT32_MIN /* 0x80000000 */); + simde__m128i r; + + r = simde_mm_set1_epi16(HEDLEY_STATIC_CAST(short, k)); + r = simde_mm_mullo_epi16(r, bits); + r = simde_mm_srai_epi32(r, 31); + + return r; + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const int64_t pos_data[] = { 63, 62 }; + const int64x2_t pos = vld1q_s64(pos_data); + r_.neon_i64 = vshrq_n_s64(vshlq_s64(vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, k)), pos), 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_movm_epi64 + #define _mm_movm_epi64(k) simde_mm_movm_epi64(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_movm_epi64(k); + #elif defined(SIMDE_X86_AVX2_NATIVE) + const __m256i shifts = _mm256_set_epi32(28, 28, 29, 29, 30, 30, 31, 31); + __m256i r; + + r = _mm256_set1_epi32(HEDLEY_STATIC_CAST(int, k)); + r = _mm256_sllv_epi32(r, shifts); + r = _mm256_srai_epi32(r, 31); + + return r; + #else + simde__m256i_private r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_movm_epi64(k ); + r_.m128i[1] = simde_mm_movm_epi64(k >> 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_movm_epi64 + #define _mm256_movm_epi64(k) simde_mm256_movm_epi64(k) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_movm_epi64 (simde__mmask8 k) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movm_epi64(k); + #else + simde__m512i_private r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256i[0] = simde_mm256_movm_epi64(k ); + r_.m256i[1] = simde_mm256_movm_epi64(k >> 4); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movm_epi64 + #define _mm512_movm_epi64(k) simde_mm512_movm_epi64(k) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOVM_H) */ +/* :: End simde/x86/avx512/movm.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmpge_epi8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpge(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpge_epi8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epi8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpge_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi8_mask + #define _mm_mask_cmpge_epi8_mask(src, k, a, b) simde_mm_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmpge_epi8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpge_epi8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epi8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpge_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi8_mask + #define _mm256_mask_cmpge_epi8_mask(src, k, a, b) simde_mm256_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmpge_epi8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpge_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epi8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi8_mask + #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpge_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epi8_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi8_mask + #define _mm512_mask_cmpge_epi8_mask(src, k, a, b) simde_mm512_mask_cmpge_epi8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi8(_mm_cmpge_epu8_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpge(a_.altivec_u8, b_.altivec_u8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_cmpge_epu8_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epu8_mask(a, b); + #else + return simde_mm_movepi8_mask(simde_x_mm_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_mask_cmpge_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu8_mask + #define _mm_mask_cmpge_epu8_mask(src, k, a, b) simde_mm_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi8(_mm256_cmpge_epu8_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_cmpge_epu8_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epu8_mask(a, b); + #else + return simde_mm256_movepi8_mask(simde_x_mm256_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_mask_cmpge_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu8_mask + #define _mm256_mask_cmpge_epu8_mask(src, k, a, b) simde_mm256_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi8(_mm512_cmpge_epu8_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu8(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu8(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpge_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epu8_mask(a, b); + #else + return simde_mm512_movepi8_mask(simde_x_mm512_cmpge_epu8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu8_mask + #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_cmpge_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epu8_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu8_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu8_mask + #define _mm512_mask_cmpge_epu8_mask(src, k, a, b) simde_mm512_mask_cmpge_epu8_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmpge_epi16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpge(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epi16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi16_mask + #define _mm_mask_cmpge_epi16_mask(src, k, a, b) simde_mm_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmpge_epi16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpge_epi16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epi16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpge_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi16_mask + #define _mm256_mask_cmpge_epi16_mask(src, k, a, b) simde_mm256_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmpge_epi16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpge_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epi16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi16_mask + #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpge_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epi16_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi16_mask + #define _mm512_mask_cmpge_epi16_mask(src, k, a, b) simde_mm512_mask_cmpge_epi16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movm_epi16(_mm_cmpge_epu16_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), vec_cmpge(a_.altivec_u16, b_.altivec_u16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu16_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_cmpge_epu16_mask(a, b); + #else + return simde_mm_movepi16_mask(simde_x_mm_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu16_mask + #define _mm_mask_cmpge_epu16_mask(src, k, a, b) simde_mm_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm256_movm_epi16(_mm256_cmpge_epu16_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_cmpge_epu16_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_cmpge_epu16_mask(a, b); + #else + return simde_mm256_movepi16_mask(simde_x_mm256_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_mask_cmpge_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu16_mask + #define _mm256_mask_cmpge_epu16_mask(src, k, a, b) simde_mm256_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return simde_mm512_movm_epi16(_mm512_cmpge_epu16_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu16(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu16(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u16) / sizeof(a_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpge_epu16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpge_epu16_mask(a, b); + #else + return simde_mm512_movepi16_mask(simde_x_mm512_cmpge_epu16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu16_mask + #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_cmpge_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_cmpge_epu16_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu16_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu16_mask + #define _mm512_mask_cmpge_epu16_mask(src, k, a, b) simde_mm512_mask_cmpge_epu16_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmpge_epi32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpge(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epi32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi32_mask + #define _mm_mask_cmpge_epi32_mask(src, k, a, b) simde_mm_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmpge_epi32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epi32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epi32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi32_mask + #define _mm256_mask_cmpge_epi32_mask(src, k, a, b) simde_mm256_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmpge_epi32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpge_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epi32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi32_mask + #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpge_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epi32_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi32_mask + #define _mm512_mask_cmpge_epi32_mask(src, k, a, b) simde_mm512_mask_cmpge_epi32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi32(_mm_cmpge_epu32_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_cmpge(a_.altivec_u32, b_.altivec_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu32_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epu32_mask(a, b); + #else + return simde_mm_movepi32_mask(simde_x_mm_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu32_mask + #define _mm_mask_cmpge_epu32_mask(src, k, a, b) simde_mm_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi32(_mm256_cmpge_epu32_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epu32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epu32_mask(a, b); + #else + return simde_mm256_movepi32_mask(simde_x_mm256_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu32_mask + #define _mm256_mask_cmpge_epu32_mask(src, k, a, b) simde_mm256_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi32(_mm512_cmpge_epu32_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu32(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpge_epu32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epu32_mask(a, b); + #else + return simde_mm512_movepi32_mask(simde_x_mm512_cmpge_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu32_mask + #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpge_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epu32_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu32_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu32_mask + #define _mm512_mask_cmpge_epu32_mask(src, k, a, b) simde_mm512_mask_cmpge_epu32_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmpge_epi64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpge(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epi64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epi64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpge_epi64_mask + #define _mm_cmpge_epi64_mask(a, b) simde_mm_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epi64_mask + #define _mm_mask_cmpge_epi64_mask(src, k, a, b) simde_mm_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmpge_epi64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epi64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epi64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpge_epi64_mask + #define _mm256_cmpge_epi64_mask(a, b) simde_mm256_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epi64_mask + #define _mm256_mask_cmpge_epi64_mask(src, k, a, b) simde_mm256_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmpge_epi64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epi64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpge_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epi64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epi64_mask + #define _mm512_cmpge_epi64_mask(a, b) simde_mm512_cmpge_epi64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epi64_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epi64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epi64_mask + #define _mm512_mask_cmpge_epi64_mask(src, k, a, b) simde_mm512_mask_cmpge_epi64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cmpge_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm_movm_epi64(_mm_cmpge_epu64_mask(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpge(a_.altivec_u64, b_.altivec_u64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_cmpge_epu64_mask (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_cmpge_epu64_mask(a, b); + #else + return simde_mm_movepi64_mask(simde_x_mm_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_cmpge_epu64_mask + #define _mm_mask_cmpge_epu64_mask(src, k, a, b) simde_mm_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_cmpge_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm256_movm_epi64(_mm256_cmpge_epu64_mask(a, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_cmpge_epu64_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_cmpge_epu64_mask(a, b); + #else + return simde_mm256_movepi64_mask(simde_x_mm256_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm256_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_cmpge_epu64_mask + #define _mm256_mask_cmpge_epu64_mask(src, k, a, b) simde_mm256_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_cmpge_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return simde_mm512_movm_epi64(_mm512_cmpge_epu64_mask(a, b)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_cmpge_epu64(a_.m128i[i], b_.m128i[i]); + } + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_cmpge_epu64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpge_epu64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpge_epu64_mask(a, b); + #else + return simde_mm512_movepi64_mask(simde_x_mm512_cmpge_epu64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpge_epu64_mask + #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpge_epu64_mask(k, a, b); + #else + return k & simde_mm512_cmpge_epu64_mask(a, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpge_epu64_mask + #define _mm512_mask_cmpge_epu64_mask(src, k, a, b) simde_mm512_mask_cmpge_epu64_mask((src), (k), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPGE_H) */ +/* :: End simde/x86/avx512/cmpge.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmpgt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CMPGT_H) +#define SIMDE_X86_AVX512_CMPGT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpgt_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + const uint32_t t = HEDLEY_STATIC_CAST(uint32_t, simde_mm256_movemask_epi8(simde_mm256_cmpgt_epi8(a_.m256i[i], b_.m256i[i]))); + r |= HEDLEY_STATIC_CAST(uint64_t, t) << HEDLEY_STATIC_CAST(uint64_t, i * 32); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 > b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] > b_.i8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi8_mask + #define _mm512_cmpgt_epi8_mask(a, b) simde_mm512_cmpgt_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmpgt_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epu8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 > b_.u8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] > b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epu8_mask + #define _mm512_cmpgt_epu8_mask(a, b) simde_mm512_cmpgt_epu8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmpgt_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmpgt_epi16_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi16(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi16_mask + #define _mm512_cmpgt_epi16_mask(a, b) simde_mm512_cmpgt_epi16_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmpgt_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpgt_epi32_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi32(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi32_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi32_mask + #define _mm512_cmpgt_epi32_mask(a, b) simde_mm512_cmpgt_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_cmpgt_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpgt_epi32_mask(k1, a, b); + #else + return simde_mm512_cmpgt_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpgt_epi32_mask + #define _mm512_mask_cmpgt_epi32_mask(k1, a, b) simde_mm512_mask_cmpgt_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmpgt_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cmpgt_epi64_mask(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_cmpgt_epi64(a_.m256i[i], b_.m256i[i]); + } + + return simde_mm512_movepi64_mask(simde__m512i_from_private(r_)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmpgt_epi64_mask + #define _mm512_cmpgt_epi64_mask(a, b) simde_mm512_cmpgt_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_cmpgt_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cmpgt_epi64_mask(k1, a, b); + #else + return simde_mm512_cmpgt_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmpgt_epi64_mask + #define _mm512_mask_cmpgt_epi64_mask(k1, a, b) simde_mm512_mask_cmpgt_epi64_mask(k1, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPGT_H) */ +/* :: End simde/x86/avx512/cmpgt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmplt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_CMPLT_H) +#define SIMDE_X86_AVX512_CMPLT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmplt_ps_mask (simde__m512 a, simde__m512 b) { + return simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_ps_mask + #define _mm512_cmplt_ps_mask(a, b) simde_mm512_cmp_ps_mask(a, b, SIMDE_CMP_LT_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmplt_pd_mask (simde__m512d a, simde__m512d b) { + return simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_pd_mask + #define _mm512_cmplt_pd_mask(a, b) simde_mm512_cmp_pd_mask(a, b, SIMDE_CMP_LT_OQ) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmplt_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmplt_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.i8 < b_.i8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < b_.i8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_epi8_mask + #define _mm512_cmplt_epi8_mask(a, b) simde_mm512_cmplt_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_cmplt_epu8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cmplt_epu8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m512i_private tmp; + + tmp.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(tmp.i8), a_.u8 < b_.u8); + r = simde_mm512_movepi8_mask(simde__m512i_from_private(tmp)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[i] < b_.u8[i]) ? (UINT64_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmplt_epu8_mask + #define _mm512_cmplt_epu8_mask(a, b) simde_mm512_cmplt_epu8_mask(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMPLT_H) */ +/* :: End simde/x86/avx512/cmplt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/extract.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_EXTRACT_H) +#define SIMDE_X86_AVX512_EXTRACT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_extractf32x4_ps (simde__m512 a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512_private a_ = simde__m512_to_private(a); + + /* GCC 6 generates an ICE */ + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(7,0,0) + return a_.m128[imm8 & 3]; + #else + simde__m128_private r_; + const size_t offset = HEDLEY_STATIC_CAST(size_t, imm8 & 3) * (sizeof(r_.f32) / sizeof(r_.f32[0])); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i + offset]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_extractf32x4_ps(a, imm8) _mm512_extractf32x4_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf32x4_ps + #define _mm512_extractf32x4_ps(a, imm8) simde_mm512_extractf32x4_ps(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) _mm512_mask_extractf32x4_ps(src, k, a, imm8) +#else + #define simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm_mask_mov_ps(src, k, simde_mm512_extractf32x4_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extractf32x4_ps + #define _mm512_mask_extractf32x4_ps(src, k, a, imm8) simde_mm512_mask_extractf32x4_ps(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) _mm512_maskz_extractf32x4_ps(k, a, imm8) +#else + #define simde_mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm_maskz_mov_ps(k, simde_mm512_extractf32x4_ps(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extractf32x4_ps + #define _mm512_maskz_extractf32x4_ps(k, a, imm8) simde_mm512_maskz_extractf32x4_ps(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_extractf32x8_ps (simde__m512 a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512_private a_ = simde__m512_to_private(a); + + return a_.m256[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_extractf32x8_ps(a, imm8) _mm512_extractf32x8_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf32x8_ps + #define _mm512_extractf32x8_ps(a, imm8) simde_mm512_extractf32x8_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_extractf64x4_pd (simde__m512d a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + return a_.m256d[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_extractf64x4_pd(a, imm8) _mm512_extractf64x4_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extractf64x4_pd + #define _mm512_extractf64x4_pd(a, imm8) simde_mm512_extractf64x4_pd(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) _mm512_mask_extractf64x4_pd(src, k, a, imm8) +#else + #define simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm512_extractf64x4_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extractf64x4_pd + #define _mm512_mask_extractf64x4_pd(src, k, a, imm8) simde_mm512_mask_extractf64x4_pd(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) _mm512_maskz_extractf64x4_pd(k, a, imm8) +#else + #define simde_mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm256_maskz_mov_pd(k, simde_mm512_extractf64x4_pd(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extractf64x4_pd + #define _mm512_maskz_extractf64x4_pd(k, a, imm8) simde_mm512_maskz_extractf64x4_pd(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_extracti32x4_epi32 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + return a_.m128i[imm8 & 3]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_extracti32x4_epi32(a, imm8) _mm512_extracti32x4_epi32(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extracti32x4_epi32 + #define _mm512_extracti32x4_epi32(a, imm8) simde_mm512_extracti32x4_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) _mm512_mask_extracti32x4_epi32(src, k, a, imm8) +#else + #define simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm_mask_mov_epi32(src, k, simde_mm512_extracti32x4_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extracti32x4_epi32 + #define _mm512_mask_extracti32x4_epi32(src, k, a, imm8) simde_mm512_mask_extracti32x4_epi32(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) _mm512_maskz_extracti32x4_epi32(k, a, imm8) +#else + #define simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm_maskz_mov_epi32(k, simde_mm512_extracti32x4_epi32(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extracti32x4_epi32 + #define _mm512_maskz_extracti32x4_epi32(k, a, imm8) simde_mm512_maskz_extracti32x4_epi32(k, a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_extracti64x4_epi64 (simde__m512i a, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + return a_.m256i[imm8 & 1]; +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_extracti64x4_epi64(a, imm8) _mm512_extracti64x4_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_extracti64x4_epi64 + #define _mm512_extracti64x4_epi64(a, imm8) simde_mm512_extracti64x4_epi64(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) _mm512_mask_extracti64x4_epi64(src, k, a, imm8) +#else + #define simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm512_extracti64x4_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_extracti64x4_epi64 + #define _mm512_mask_extracti64x4_epi64(src, k, a, imm8) simde_mm512_mask_extracti64x4_epi64(src, k, a, imm8) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_CLANG_REV_299346) + #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) _mm512_maskz_extracti64x4_epi64(k, a, imm8) +#else + #define simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm512_extracti64x4_epi64(a, imm8)) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_extracti64x4_epi64 + #define _mm512_maskz_extracti64x4_epi64(k, a, imm8) simde_mm512_maskz_extracti64x4_epi64(k, a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_EXTRACT_H) */ +/* :: End simde/x86/avx512/extract.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/insert.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_INSERT_H) +#define SIMDE_X86_AVX512_INSERT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_insertf32x4 (simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + simde__m512 r; + switch(imm8) { + case 0: r = _mm512_insertf32x4(a, b, 0); break; + case 1: r = _mm512_insertf32x4(a, b, 1); break; + case 2: r = _mm512_insertf32x4(a, b, 2); break; + case 3: r = _mm512_insertf32x4(a, b, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_mm512_setzero_ps(); break; + } + return r; + #else + simde__m512_private a_ = simde__m512_to_private(a); + + a_.m128[imm8 & 3] = b; + + return simde__m512_from_private(a_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf32x4 + #define _mm512_insertf32x4(a, b, imm8) simde_mm512_insertf32x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_insertf32x4 (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512 r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_mask_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_mask_mov_ps(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf32x4 + #define _mm512_mask_insertf32x4(src, k, a, b, imm8) simde_mm512_mask_insertf32x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_insertf32x4 (simde__mmask16 k, simde__m512 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512 r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_maskz_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_insertf32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_maskz_mov_ps(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf32x4 + #define _mm512_maskz_insertf32x4(k, a, b, imm8) simde_mm512_maskz_insertf32x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_insertf64x4 (simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + a_.m256d[imm8 & 1] = b; + + return simde__m512d_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_insertf64x4(a, b, imm8) _mm512_insertf64x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf64x4 + #define _mm512_insertf64x4(a, b, imm8) simde_mm512_insertf64x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_insertf64x4 (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_mask_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_mask_mov_pd(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf64x4 + #define _mm512_mask_insertf64x4(src, k, a, b, imm8) simde_mm512_mask_insertf64x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_insertf64x4 (simde__mmask8 k, simde__m512d a, simde__m256d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512d r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_maskz_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_insertf64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_maskz_mov_pd(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf64x4 + #define _mm512_maskz_insertf64x4(k, a, b, imm8) simde_mm512_maskz_insertf64x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti32x4 (simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m128i[imm8 & 3] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_inserti32x4(a, b, imm8) _mm512_inserti32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti32x4 + #define _mm512_inserti32x4(a, b, imm8) simde_mm512_inserti32x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti32x4 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_mask_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi32(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti32x4 + #define _mm512_mask_inserti32x4(src, k, a, b, imm8) simde_mm512_mask_inserti32x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti32x4 (simde__mmask16 k, simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,0,0)) + SIMDE_CONSTIFY_4_(_mm512_maskz_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_4_(simde_mm512_inserti32x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi32(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti32x4 + #define _mm512_maskz_inserti32x4(k, a, b, imm8) simde_mm512_maskz_inserti32x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti64x4 (simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m256i[imm8 & 1] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_inserti64x4(a, b, imm8) _mm512_inserti64x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti64x4 + #define _mm512_inserti64x4(a, b, imm8) simde_mm512_inserti64x4(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti64x4 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_mask_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi64(src, k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti64x4 + #define _mm512_mask_inserti64x4(src, k, a, b, imm8) simde_mm512_mask_inserti64x4(src, k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti64x4 (simde__mmask8 k, simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 2) { + simde__m512i r; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_CONSTIFY_2_(_mm512_maskz_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + SIMDE_CONSTIFY_2_(simde_mm512_inserti64x4, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi64(k, r); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti64x4 + #define _mm512_maskz_inserti64x4(k, a, b, imm8) simde_mm512_maskz_inserti64x4(k, a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_insertf32x8 (simde__m512 a, simde__m256 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512_private a_ = simde__m512_to_private(a); + + a_.m256[imm8 & 1] = b; + + return simde__m512_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_insertf32x8(a, b, imm8) _mm512_insertf32x8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf32x8 + #define _mm512_insertf32x8(a, b, imm8) simde_mm512_insertf32x8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_insertf32x8(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512 r; + SIMDE_CONSTIFY_2_(_mm512_mask_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, src, k, a, b); + return r; + #else + simde__m512 r; + SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_mask_mov_ps(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf32x8 + #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_insertf32x8(simde__mmask16 k, simde__m512 a, simde__m256 b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512 r; + SIMDE_CONSTIFY_2_(_mm512_maskz_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, k, a, b); + return r; + #else + simde__m512 r; + SIMDE_CONSTIFY_2_(simde_mm512_insertf32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_ps ()), imm8, a, b); + return simde_mm512_maskz_mov_ps(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf32x8 + #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_insertf64x2 (simde__m512d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512d_private a_ = simde__m512d_to_private(a); + + a_.m128d[imm8 & 3] = b; + + return simde__m512d_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_insertf64x2(a, b, imm8) _mm512_insertf64x2(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_insertf64x2 + #define _mm512_insertf64x2(a, b, imm8) simde_mm512_insertf64x2(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_insertf64x2(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512d r; + SIMDE_CONSTIFY_4_(_mm512_mask_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, src, k, a, b); + return r; + #else + simde__m512d r; + SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_mask_mov_pd(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_insertf64x2 + #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_insertf64x2(simde__mmask8 k, simde__m512d a, simde__m128d b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512d r; + SIMDE_CONSTIFY_4_(_mm512_maskz_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, k, a, b); + return r; + #else + simde__m512d r; + SIMDE_CONSTIFY_4_(simde_mm512_insertf64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_pd ()), imm8, a, b); + return simde_mm512_maskz_mov_pd(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_insertf64x2 + #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti32x8 (simde__m512i a, simde__m256i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m256i[imm8 & 1] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_inserti32x8(a, b, imm8) _mm512_inserti32x8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti32x8 + #define _mm512_inserti32x8(a, b, imm8) simde_mm512_inserti32x8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti32x8(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_2_(_mm512_mask_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, src, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); + return simde_mm512_mask_mov_epi32(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti32x8 + #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti32x8(simde__mmask16 k, simde__m512i a, simde__m256i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_2_(_mm512_maskz_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_2_(simde_mm512_inserti32x8, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_epi32 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi32(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti32x8 + #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_inserti64x2 (simde__m512i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m512i_private a_ = simde__m512i_to_private(a); + + a_.m128i[imm8 & 3] = b; + + return simde__m512i_from_private(a_); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_inserti64x2(a, b, imm8) _mm512_inserti64x2(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_inserti64x2 + #define _mm512_inserti64x2(a, b, imm8) simde_mm512_inserti64x2(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_inserti64x2(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_4_(_mm512_mask_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, src, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_mask_mov_epi64(src, k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_inserti64x2 + #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imms8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_inserti64x2(simde__mmask8 k, simde__m512i a, simde__m128i b, const int imm8) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + simde__m512i r; + SIMDE_CONSTIFY_4_(_mm512_maskz_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, k, a, b); + return r; + #else + simde__m512i r; + SIMDE_CONSTIFY_4_(simde_mm512_inserti64x2, r, (HEDLEY_UNREACHABLE(), simde_mm512_setzero_si512 ()), imm8, a, b); + return simde_mm512_maskz_mov_epi64(k, r); + #endif + } +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_inserti64x2 + #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imms8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_INSERT_H) */ +/* :: End simde/x86/avx512/insert.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/kshift.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_KSHIFT_H) +#define SIMDE_X86_AVX512_KSHIFT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftli_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask16(a, count) _kshiftli_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask16 + #define _kshiftli_mask16(a, count) simde_kshiftli_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftli_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask32(a, count) _kshiftli_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask32 + #define _kshiftli_mask32(a, count) simde_kshiftli_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftli_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a << count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask64(a, count) _kshiftli_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask64 + #define _kshiftli_mask64(a, count) simde_kshiftli_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftli_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a << count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftli_mask8(a, count) _kshiftli_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftli_mask8 + #define _kshiftli_mask8(a, count) simde_kshiftli_mask8(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_kshiftri_mask16 (simde__mmask16 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask16, (count <= 15) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask16(a, count) _kshiftri_mask16(a, count) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask16 + #define _kshiftri_mask16(a, count) simde_kshiftri_mask16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_kshiftri_mask32 (simde__mmask32 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 31) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask32(a, count) _kshiftri_mask32(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask32 + #define _kshiftri_mask32(a, count) simde_kshiftri_mask32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_kshiftri_mask64 (simde__mmask64 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return (count <= 63) ? (a >> count) : 0; +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask64(a, count) _kshiftri_mask64(a, count) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask64 + #define _kshiftri_mask64(a, count) simde_kshiftri_mask64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_kshiftri_mask8 (simde__mmask8 a, unsigned int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + return HEDLEY_STATIC_CAST(simde__mmask8, (count <= 7) ? (a >> count) : 0); +} +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) && (!defined(SIMDE_DETECT_CLANG_VERSION) && SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0)) + #define simde_kshiftri_mask8(a, count) _kshiftri_mask8(a, count) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _kshiftri_mask8 + #define _kshiftri_mask8(a, count) simde_kshiftri_mask8(a, count) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_KSHIFT_H) */ +/* :: End simde/x86/avx512/kshift.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/permutex2var.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) +#define SIMDE_X86_AVX512_PERMUTEX2VAR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/andnot.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_ANDNOT_H) +#define SIMDE_X86_AVX512_ANDNOT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) +#else + #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_ps + #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_ps + #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_ps + #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) +#else + #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_pd + #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_pd + #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_pd + #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_andnot_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) +#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_si512 + #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi32 + #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi64 + #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi32 + #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi32 + #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi64 + #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi64 + #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ +/* :: End simde/x86/avx512/andnot.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/blend.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_BLEND_H) +#define SIMDE_X86_AVX512_BLEND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_blend_epi8(k, a, b); + #else + return simde_mm_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi8 + #define _mm_mask_blend_epi8(k, a, b) simde_mm_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_blend_epi16(k, a, b); + #else + return simde_mm_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi16 + #define _mm_mask_blend_epi16(k, a, b) simde_mm_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_epi32(k, a, b); + #else + return simde_mm_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi32 + #define _mm_mask_blend_epi32(k, a, b) simde_mm_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_blend_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_epi64(k, a, b); + #else + return simde_mm_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_epi64 + #define _mm_mask_blend_epi64(k, a, b) simde_mm_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_blend_ps(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_ps(k, a, b); + #else + return simde_mm_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_ps + #define _mm_mask_blend_ps(k, a, b) simde_mm_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_blend_pd(simde__mmask8 k, simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_blend_pd(k, a, b); + #else + return simde_mm_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_blend_pd + #define _mm_mask_blend_pd(k, a, b) simde_mm_mask_blend_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi8(simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_blend_epi8(k, a, b); + #else + return simde_mm256_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi8 + #define _mm256_mask_blend_epi8(k, a, b) simde_mm256_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_blend_epi16(k, a, b); + #else + return simde_mm256_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi16 + #define _mm256_mask_blend_epi16(k, a, b) simde_mm256_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_epi32(k, a, b); + #else + return simde_mm256_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi32 + #define _mm256_mask_blend_epi32(k, a, b) simde_mm256_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_blend_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_epi64(k, a, b); + #else + return simde_mm256_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_epi64 + #define _mm256_mask_blend_epi64(k, a, b) simde_mm256_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_blend_ps(simde__mmask8 k, simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_ps(k, a, b); + #else + return simde_mm256_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_ps + #define _mm256_mask_blend_ps(k, a, b) simde_mm256_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_blend_pd(simde__mmask8 k, simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_blend_pd(k, a, b); + #else + return simde_mm256_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_blend_pd + #define _mm256_mask_blend_pd(k, a, b) simde_mm256_mask_blend_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi8(simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_blend_epi8(k, a, b); + #else + return simde_mm512_mask_mov_epi8(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi8 + #define _mm512_mask_blend_epi8(k, a, b) simde_mm512_mask_blend_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi16(simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_blend_epi16(k, a, b); + #else + return simde_mm512_mask_mov_epi16(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi16 + #define _mm512_mask_blend_epi16(k, a, b) simde_mm512_mask_blend_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_epi32(k, a, b); + #else + return simde_mm512_mask_mov_epi32(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi32 + #define _mm512_mask_blend_epi32(k, a, b) simde_mm512_mask_blend_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_blend_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_epi64(k, a, b); + #else + return simde_mm512_mask_mov_epi64(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_epi64 + #define _mm512_mask_blend_epi64(k, a, b) simde_mm512_mask_blend_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_blend_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_ps(k, a, b); + #else + return simde_mm512_mask_mov_ps(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_ps + #define _mm512_mask_blend_ps(k, a, b) simde_mm512_mask_blend_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_blend_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_blend_pd(k, a, b); + #else + return simde_mm512_mask_mov_pd(a, k, b); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_blend_pd + #define _mm512_mask_blend_pd(k, a, b) simde_mm512_mask_blend_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_BLEND_H) */ +/* :: End simde/x86/avx512/blend.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/or.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_OR_H) +#define SIMDE_X86_AVX512_OR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_or_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_ps + #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_ps + #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_ps + #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_or_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_pd + #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_pd + #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_pd + #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 | b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] | b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi32 + #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi32 + #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi32 + #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi64 + #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi64 + #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi64 + #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_si512 + #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ +/* :: End simde/x86/avx512/or.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/slli.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SLLI_H) +#define SIMDE_X86_AVX512_SLLI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi16 (simde__m512i a, const unsigned int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_16_(_mm512_slli_epi16, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi16(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + if(imm8 < 16) + r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8))); + else + return simde_mm512_setzero_si512(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (imm8 < 16) ? HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)) : 0; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi16 + #define _mm512_slli_epi16(a, imm8) simde_mm512_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi32 (simde__m512i a, unsigned int imm8) { + /* I guess the restriction was added in 6.4, back-ported to 5.5, then + * removed (fixed) in 7? */ + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_32_(_mm512_slli_epi32, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi32(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff". However in + * practice all bits are used. */ + if (imm8 > 31) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_slli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_slli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_slli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_slli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_slli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_slli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << imm8; + } + #endif + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi32 + #define _mm512_slli_epi32(a, imm8) simde_mm512_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_slli_epi64 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_64_(_mm512_slli_epi64, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_slli_epi64(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff". However in + * practice all bits are used. */ + if (imm8 > 63) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_slli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_slli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_slli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_slli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_slli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_slli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) + r_.u64 = a_.u64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << imm8; + } + #endif + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_slli_epi64 + #define _mm512_slli_epi64(a, imm8) simde_mm512_slli_epi64(a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SLLI_H) */ +/* :: End simde/x86/avx512/slli.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/srli.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SRLI_H) +#define SIMDE_X86_AVX512_SRLI_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi16 (simde__m512i a, const unsigned int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_16_(_mm512_srli_epi16, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi16(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) + return simde_mm512_setzero_si512(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_srli_epi16(a, imm8) _mm512_srli_epi16(a, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi16 + #define _mm512_srli_epi16(a, imm8) simde_mm512_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi32 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_32_(_mm512_srli_epi32, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi32(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_srli_epi32(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_srli_epi32(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_srli_epi32(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_srli_epi32(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_srli_epi32(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_srli_epi32(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #else + if (imm8 > 31) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi32 + #define _mm512_srli_epi32(a, imm8) simde_mm512_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_srli_epi64 (simde__m512i a, unsigned int imm8) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (defined(HEDLEY_GCC_VERSION) && ((__GNUC__ == 5 && __GNUC_MINOR__ == 5) || (__GNUC__ == 6 && __GNUC_MINOR__ >= 4))) + simde__m512i r; + + SIMDE_CONSTIFY_64_(_mm512_srli_epi64, r, simde_mm512_setzero_si512(), imm8, a); + + return r; + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return SIMDE_BUG_IGNORE_SIGN_CONVERSION(_mm512_srli_epi64(a, imm8)); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_srli_epi64(a_.m256i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m256i[1] = simde_mm256_srli_epi64(a_.m256i[1], HEDLEY_STATIC_CAST(int, imm8)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_srli_epi64(a_.m128i[0], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[1] = simde_mm_srli_epi64(a_.m128i[1], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[2] = simde_mm_srli_epi64(a_.m128i[2], HEDLEY_STATIC_CAST(int, imm8)); + r_.m128i[3] = simde_mm_srli_epi64(a_.m128i[3], HEDLEY_STATIC_CAST(int, imm8)); + #else + /* The Intel Intrinsics Guide says that only the 8 LSBits of imm8 are + * used. In this case we should do "imm8 &= 0xff" here. However in + * practice all bits are used. */ + if (imm8 > 63) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_97248) + r_.u64 = a_.u64 >> imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_srli_epi64 + #define _mm512_srli_epi64(a, imm8) simde_mm512_srli_epi64(a, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SRLI_H) */ +/* :: End simde/x86/avx512/srli.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/test.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + * 2020 Christopher Moore + * 2021 Andrew Rodriguez + */ + +#if !defined(SIMDE_X86_AVX512_TEST_H) +#define SIMDE_X86_AVX512_TEST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_test_epi32_mask (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_test_epi32_mask(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_test_epi32_mask +#define _mm256_test_epi32_mask(a, b) simde_mm256_test_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_mask_test_epi32_mask (simde__mmask8 k1, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_test_epi32_mask(k1, a, b); + #else + return simde_mm256_test_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_test_epi32_mask + #define _mm256_mask_test_epi32_mask(k1, a, b) simde_mm256_mask_test_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_test_epi16_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi16_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask32 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, !!(a_.i16[i] & b_.i16[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi16_mask + #define _mm512_test_epi16_mask(a, b) simde_mm512_test_epi16_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_test_epi32_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_test_epi32_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask16 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, !!(a_.i32[i] & b_.i32[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi32_mask +#define _mm512_test_epi32_mask(a, b) simde_mm512_test_epi32_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_test_epi64_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_test_epi64_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask8, !!(a_.i64[i] & b_.i64[i]) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi64_mask + #define _mm512_test_epi64_mask(a, b) simde_mm512_test_epi64_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_test_epi8_mask (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_test_epi8_mask(a, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + simde__mmask64 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, HEDLEY_STATIC_CAST(uint64_t, !!(a_.i8[i] & b_.i8[i])) << i); + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_test_epi8_mask + #define _mm512_test_epi8_mask(a, b) simde_mm512_test_epi8_mask(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_mask_test_epi16_mask (simde__mmask32 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi16_mask(k1, a, b); + #else + return simde_mm512_test_epi16_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi16_mask + #define _mm512_mask_test_epi16_mask(k1, a, b) simde_mm512_mask_test_epi16_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_mask_test_epi32_mask (simde__mmask16 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi32_mask(k1, a, b); + #else + return simde_mm512_test_epi32_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi32_mask + #define _mm512_mask_test_epi32_mask(k1, a, b) simde_mm512_mask_test_epi32_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_mask_test_epi64_mask (simde__mmask8 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_test_epi64_mask(k1, a, b); + #else + return simde_mm512_test_epi64_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi64_mask + #define _mm512_mask_test_epi64_mask(k1, a, b) simde_mm512_mask_test_epi64_mask(k1, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_mask_test_epi8_mask (simde__mmask64 k1, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_test_epi8_mask(k1, a, b); + #else + return simde_mm512_test_epi8_mask(a, b) & k1; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_test_epi8_mask + #define _mm512_mask_test_epi8_mask(k1, a, b) simde_mm512_mask_test_epi8_mask(k1, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TEST_H) */ +/* :: End simde/x86/avx512/test.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The following generic code avoids many, nearly identical, repetitions of fairly complex code. + * If the compiler optimizes well, in particular extracting invariant code from loops + * and simplifying code involving constants passed as arguments, it should not be + * significantly slower than specific code. + * Note that when the original vector contains few elements, these implementations + * may not be faster than portable code. + */ +#if defined(SIMDE_X86_SSSE3_NATIVE) || defined(SIMDE_ARM_NEON_A64V8_NATIVE) || defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_X_PERMUTEX2VAR_USE_GENERIC +#endif + +#if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_permutex2var128 (const simde__m128i *a, const simde__m128i idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + const int idx_mask = (1 << (5 - log2_index_size + log2_data_length)) - 1; + + #if defined(SIMDE_X86_SSE3_NATIVE) + __m128i ra, rb, t, test, select, index; + const __m128i sixteen = _mm_set1_epi8(16); + + /* Avoid the mullo intrinsics which have high latency (and the 32-bit one requires SSE4.1) */ + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = _mm_and_si128(idx, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = _mm_and_si128(idx, _mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = _mm_slli_epi32(index, 1); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + index = _mm_add_epi16(index, _mm_set1_epi16(0x0100)); + break; + case 2: + index = _mm_and_si128(idx, _mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = _mm_slli_epi32(index, 2); + t = _mm_slli_epi32(index, 8); + index = _mm_or_si128(index, t); + t = _mm_slli_epi32(index, 16); + index = _mm_or_si128(index, t); + index = _mm_add_epi32(index, _mm_set1_epi32(0x03020100)); + break; + } + + test = index; + index = _mm_and_si128(index, _mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, (1 << (4 + log2_data_length)) - 1))); + test = _mm_cmpgt_epi8(test, index); + + ra = _mm_shuffle_epi8(a[0], index); + rb = _mm_shuffle_epi8(b[0], index); + + #if defined(SIMDE_X86_SSE4_1_NATIVE) + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_blendv_epi8(_mm_shuffle_epi8(a[i], index), ra, select); + rb = _mm_blendv_epi8(_mm_shuffle_epi8(b[i], index), rb, select); + } + + return _mm_blendv_epi8(ra, rb, test); + #else + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + select = _mm_cmplt_epi8(index, sixteen); + index = _mm_sub_epi8(index, sixteen); + ra = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(a[i], index)), _mm_and_si128(select, ra)); + rb = _mm_or_si128(_mm_andnot_si128(select, _mm_shuffle_epi8(b[i], index)), _mm_and_si128(select, rb)); + } + + return _mm_or_si128(_mm_andnot_si128(test, ra), _mm_and_si128(test, rb)); + #endif + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t index, r; + uint16x8_t index16; + uint32x4_t index32; + uint8x16x2_t table2_a, table2_b; + uint8x16x4_t table4_a, table4_b; + + switch (log2_index_size) { + case 0: + index = vandq_u8(simde__m128i_to_neon_u8(idx), vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); + break; + case 1: + index16 = vandq_u16(simde__m128i_to_neon_u16(idx), vdupq_n_u16(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); + index16 = vmulq_n_u16(index16, 0x0202); + index16 = vaddq_u16(index16, vdupq_n_u16(0x0100)); + index = vreinterpretq_u8_u16(index16); + break; + case 2: + index32 = vandq_u32(simde__m128i_to_neon_u32(idx), vdupq_n_u32(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); + index32 = vmulq_n_u32(index32, 0x04040404); + index32 = vaddq_u32(index32, vdupq_n_u32(0x03020100)); + index = vreinterpretq_u8_u32(index32); + break; + } + + uint8x16_t mask = vdupq_n_u8(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1)); + + switch (log2_data_length) { + case 0: + r = vqtbx1q_u8(vqtbl1q_u8(simde__m128i_to_neon_u8(b[0]), vandq_u8(index, mask)), simde__m128i_to_neon_u8(a[0]), index); + break; + case 1: + table2_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table2_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table2_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table2_b.val[1] = simde__m128i_to_neon_u8(b[1]); + r = vqtbx2q_u8(vqtbl2q_u8(table2_b, vandq_u8(index, mask)), table2_a, index); + break; + case 2: + table4_a.val[0] = simde__m128i_to_neon_u8(a[0]); + table4_a.val[1] = simde__m128i_to_neon_u8(a[1]); + table4_a.val[2] = simde__m128i_to_neon_u8(a[2]); + table4_a.val[3] = simde__m128i_to_neon_u8(a[3]); + table4_b.val[0] = simde__m128i_to_neon_u8(b[0]); + table4_b.val[1] = simde__m128i_to_neon_u8(b[1]); + table4_b.val[2] = simde__m128i_to_neon_u8(b[2]); + table4_b.val[3] = simde__m128i_to_neon_u8(b[3]); + r = vqtbx4q_u8(vqtbl4q_u8(table4_b, vandq_u8(index, mask)), table4_a, index); + break; + } + + return simde__m128i_from_neon_u8(r); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) r, ra, rb, t, index, s, thirty_two = vec_splats(HEDLEY_STATIC_CAST(uint8_t, 32)); + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) index16; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) temp32, index32; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) select, test; + + switch (log2_index_size) { + default: /* Avoid uninitialized variable warning/error */ + case 0: + index = vec_and(simde__m128i_to_altivec_u8(idx), vec_splats(HEDLEY_STATIC_CAST(uint8_t, idx_mask))); + break; + case 1: + index16 = simde__m128i_to_altivec_u16(idx); + index16 = vec_and(index16, vec_splats(HEDLEY_STATIC_CAST(uint16_t, idx_mask))); + index16 = vec_mladd(index16, vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0202)), vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index16); + break; + case 2: + index32 = simde__m128i_to_altivec_u32(idx); + index32 = vec_and(index32, vec_splats(HEDLEY_STATIC_CAST(uint32_t, idx_mask))); + + /* Multiply index32 by 0x04040404; unfortunately vec_mul isn't available so (mis)use 16-bit vec_mladd */ + temp32 = vec_sl(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 16))); + index32 = vec_add(index32, temp32); + index32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), + vec_mladd(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), index32), + vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x0404)), + vec_splat_u16(0))); + + index32 = vec_add(index32, vec_splats(HEDLEY_STATIC_CAST(unsigned int, 0x03020100))); + index = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index32); + break; + } + + if (log2_data_length == 0) { + r = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(b[0]), HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), index)); + } + else { + s = index; + index = vec_and(index, vec_splats(HEDLEY_STATIC_CAST(uint8_t, (1 << (4 + log2_data_length)) - 1))); + test = vec_cmpgt(s, index); + + ra = vec_perm(simde__m128i_to_altivec_u8(a[0]), simde__m128i_to_altivec_u8(a[1]), index); + rb = vec_perm(simde__m128i_to_altivec_u8(b[0]), simde__m128i_to_altivec_u8(b[1]), index); + + SIMDE_VECTORIZE + for (int i = 2 ; i < (1 << log2_data_length) ; i += 2) { + select = vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), index), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), thirty_two)); + index = vec_sub(index, thirty_two); + t = vec_perm(simde__m128i_to_altivec_u8(a[i]), simde__m128i_to_altivec_u8(a[i + 1]), index); + ra = vec_sel(t, ra, select); + t = vec_perm(simde__m128i_to_altivec_u8(b[i]), simde__m128i_to_altivec_u8(b[i + 1]), index); + rb = vec_sel(t, rb, select); + } + + r = vec_sel(ra, rb, test); + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sixteen = wasm_i8x16_splat(16); + + v128_t index = simde__m128i_to_wasm_v128(idx); + + switch (log2_index_size) { + case 0: + index = wasm_v128_and(index, wasm_i8x16_splat(HEDLEY_STATIC_CAST(int8_t, idx_mask))); + break; + case 1: + index = wasm_v128_and(index, wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, idx_mask))); + index = wasm_i16x8_mul(index, wasm_i16x8_splat(0x0202)); + index = wasm_i16x8_add(index, wasm_i16x8_splat(0x0100)); + break; + case 2: + index = wasm_v128_and(index, wasm_i32x4_splat(HEDLEY_STATIC_CAST(int32_t, idx_mask))); + index = wasm_i32x4_mul(index, wasm_i32x4_splat(0x04040404)); + index = wasm_i32x4_add(index, wasm_i32x4_splat(0x03020100)); + break; + } + + v128_t r = wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[0]), index); + + SIMDE_VECTORIZE + for (int i = 1 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(a[i]), index)); + } + + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + index = wasm_i8x16_sub(index, sixteen); + r = wasm_v128_or(r, wasm_i8x16_swizzle(simde__m128i_to_wasm_v128(b[i]), index)); + } + + return simde__m128i_from_wasm_v128(r); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_permutex2var (simde__m128i *r, const simde__m128i *a, const simde__m128i *idx, const simde__m128i *b, const unsigned int log2_index_size, const unsigned int log2_data_length) { + SIMDE_VECTORIZE + for (int i = 0 ; i < (1 << log2_data_length) ; i++) { + r[i] = simde_x_permutex2var128(a, idx[i], b, log2_index_size, log2_data_length); + } +} +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 1, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 8) ? b_ : a_).i16[idx_.i16[i] & 7]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi16 + #define _mm_permutex2var_epi16(a, idx, b) simde_mm_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi16 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm_mask_mov_epi16(a, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi16 +#define _mm_mask_permutex2var_epi16(a, k, idx, b) simde_mm_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi16 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm_mask_mov_epi16(idx, k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi16 +#define _mm_mask2_permutex2var_epi16(a, idx, k, b) simde_mm_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi16 +#define _mm_maskz_permutex2var_epi16(k, a, idx, b) simde_mm_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) /* This may not be faster than the portable version */ + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 2, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 4) ? b_ : a_).i32[idx_.i32[i] & 3]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi32 + #define _mm_permutex2var_epi32(a, idx, b) simde_mm_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi32 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm_mask_mov_epi32(a, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi32 +#define _mm_mask_permutex2var_epi32(a, k, idx, b) simde_mm_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi32 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm_mask_mov_epi32(idx, k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi32 +#define _mm_mask2_permutex2var_epi32(a, idx, k, b) simde_mm_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi32 +#define _mm_maskz_permutex2var_epi32(k, a, idx, b) simde_mm_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi64(a, idx, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 2) ? b_ : a_).i64[idx_.i64[i] & 1]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi64 + #define _mm_permutex2var_epi64(a, idx, b) simde_mm_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi64 (simde__m128i a, simde__mmask8 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm_mask_mov_epi64(a, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi64 +#define _mm_mask_permutex2var_epi64(a, k, idx, b) simde_mm_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi64 (simde__m128i a, simde__m128i idx, simde__mmask8 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm_mask_mov_epi64(idx, k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi64 +#define _mm_mask2_permutex2var_epi64(a, idx, k, b) simde_mm_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi64 +#define _mm_maskz_permutex2var_epi64(k, a, idx, b) simde_mm_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cvtepi32_epi8(_mm512_permutex2var_epi32(_mm512_cvtepu8_epi32(a), _mm512_cvtepu8_epi32(idx), _mm512_cvtepu8_epi32(b))); + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde__m128i r; + + simde_x_permutex2var(&r, &a, &idx, &b, 0, 0); + + return r; + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + idx_ = simde__m128i_to_private(idx), + b_ = simde__m128i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x10) ? b_ : a_).i8[idx_.i8[i] & 0x0F]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_epi8 + #define _mm_permutex2var_epi8(a, idx, b) simde_mm_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_permutex2var_epi8 (simde__m128i a, simde__mmask16 k, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm_mask_mov_epi8(a, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_epi8 +#define _mm_mask_permutex2var_epi8(a, k, idx, b) simde_mm_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask2_permutex2var_epi8 (simde__m128i a, simde__m128i idx, simde__mmask16 k, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm_mask_mov_epi8(idx, k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_epi8 +#define _mm_mask2_permutex2var_epi8(a, idx, k, b) simde_mm_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_permutex2var_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i idx, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_epi8 +#define _mm_maskz_permutex2var_epi8(k, a, idx, b) simde_mm_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_pd(a, idx, b); + #else + return simde_mm_castsi128_pd(simde_mm_permutex2var_epi64(simde_mm_castpd_si128(a), idx, simde_mm_castpd_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_pd + #define _mm_permutex2var_pd(a, idx, b) simde_mm_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_permutex2var_pd (simde__m128d a, simde__mmask8 k, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm_mask_mov_pd(a, k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_pd +#define _mm_mask_permutex2var_pd(a, k, idx, b) simde_mm_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask2_permutex2var_pd (simde__m128d a, simde__m128i idx, simde__mmask8 k, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm_mask_mov_pd(simde_mm_castsi128_pd(idx), k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_pd +#define _mm_mask2_permutex2var_pd(a, idx, k, b) simde_mm_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_permutex2var_pd (simde__mmask8 k, simde__m128d a, simde__m128i idx, simde__m128d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm_maskz_mov_pd(k, simde_mm_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_pd +#define _mm_maskz_permutex2var_pd(k, a, idx, b) simde_mm_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_permutex2var_ps(a, idx, b); + #else + return simde_mm_castsi128_ps(simde_mm_permutex2var_epi32(simde_mm_castps_si128(a), idx, simde_mm_castps_si128(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_permutex2var_ps + #define _mm_permutex2var_ps(a, idx, b) simde_mm_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_permutex2var_ps (simde__m128 a, simde__mmask8 k, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm_mask_mov_ps(a, k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_permutex2var_ps +#define _mm_mask_permutex2var_ps(a, k, idx, b) simde_mm_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask2_permutex2var_ps (simde__m128 a, simde__m128i idx, simde__mmask8 k, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm_mask_mov_ps(simde_mm_castsi128_ps(idx), k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask2_permutex2var_ps +#define _mm_mask2_permutex2var_ps(a, idx, k, b) simde_mm_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_permutex2var_ps (simde__mmask8 k, simde__m128 a, simde__m128i idx, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm_maskz_mov_ps(k, simde_mm_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_permutex2var_ps +#define _mm_maskz_permutex2var_ps(k, a, idx, b) simde_mm_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi16(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo2, hi, lo, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + idx2 = _mm256_srli_epi32(idx, 1); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + idx2 = _mm256_srli_epi32(idx2, 16); + + ta = _mm256_permutevar8x32_epi32(a, idx2); + tb = _mm256_permutevar8x32_epi32(b, idx2); + select = _mm256_slli_epi32(idx2, 28); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx, ones), ones); + return _mm256_blendv_epi8(lo, hi, select); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x10) ? b_ : a_).i16[idx_.i16[i] & 0x0F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi16 + #define _mm256_permutex2var_epi16(a, idx, b) simde_mm256_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi16 (simde__m256i a, simde__mmask16 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi16(a, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi16 +#define _mm256_mask_permutex2var_epi16(a, k, idx, b) simde_mm256_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi16 (simde__m256i a, simde__m256i idx, simde__mmask16 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi16(idx, k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi16 +#define _mm256_mask2_permutex2var_epi16(a, idx, k, b) simde_mm256_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi16 +#define _mm256_maskz_permutex2var_epi16(k, a, idx, b) simde_mm256_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi32(a, idx, b); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i ta, tb, select; + ta = _mm256_permutevar8x32_epi32(a, idx); + tb = _mm256_permutevar8x32_epi32(b, idx); + select = _mm256_slli_epi32(idx, 28); + return _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 8) ? b_ : a_).i32[idx_.i32[i] & 7]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi32 + #define _mm256_permutex2var_epi32(a, idx, b) simde_mm256_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi32 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi32(a, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi32 +#define _mm256_mask_permutex2var_epi32(a, k, idx, b) simde_mm256_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi32 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi32(idx, k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi32 +#define _mm256_mask2_permutex2var_epi32(a, idx, k, b) simde_mm256_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi32 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi32 +#define _mm256_maskz_permutex2var_epi32(k, a, idx, b) simde_mm256_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi64(a, idx, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 4) ? b_ : a_).i64[idx_.i64[i] & 3]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi64 + #define _mm256_permutex2var_epi64(a, idx, b) simde_mm256_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi64 (simde__m256i a, simde__mmask8 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi64(a, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi64 +#define _mm256_mask_permutex2var_epi64(a, k, idx, b) simde_mm256_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi64 (simde__m256i a, simde__m256i idx, simde__mmask8 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi64(idx, k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi64 +#define _mm256_mask2_permutex2var_epi64(a, idx, k, b) simde_mm256_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi64 +#define _mm256_maskz_permutex2var_epi64(k, a, idx, b) simde_mm256_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_cvtepi16_epi8(_mm512_permutex2var_epi16(_mm512_cvtepu8_epi16(a), _mm512_cvtepu8_epi16(idx), _mm512_cvtepu8_epi16(b))); + #elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, a01, b01; + const __m256i mask = _mm256_set1_epi8(0x3F); + const __m256i a0 = _mm256_permute4x64_epi64(a, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b, (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b, (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + index = _mm256_and_si256(idx, mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + a01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + b01 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + return _mm256_blendv_epi8(a01, b01, select0x20); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x20) ? b_ : a_).i8[idx_.i8[i] & 0x1F]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_epi8 + #define _mm256_permutex2var_epi8(a, idx, b) simde_mm256_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_permutex2var_epi8 (simde__m256i a, simde__mmask32 k, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm256_mask_mov_epi8(a, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_epi8 +#define _mm256_mask_permutex2var_epi8(a, k, idx, b) simde_mm256_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask2_permutex2var_epi8 (simde__m256i a, simde__m256i idx, simde__mmask32 k, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm256_mask_mov_epi8(idx, k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_epi8 +#define _mm256_mask2_permutex2var_epi8(a, idx, k, b) simde_mm256_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_permutex2var_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i idx, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_epi8 +#define _mm256_maskz_permutex2var_epi8(k, a, idx, b) simde_mm256_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_pd(a, idx, b); + #else + return simde_mm256_castsi256_pd(simde_mm256_permutex2var_epi64(simde_mm256_castpd_si256(a), idx, simde_mm256_castpd_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_pd + #define _mm256_permutex2var_pd(a, idx, b) simde_mm256_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_permutex2var_pd (simde__m256d a, simde__mmask8 k, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm256_mask_mov_pd(a, k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_pd +#define _mm256_mask_permutex2var_pd(a, k, idx, b) simde_mm256_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask2_permutex2var_pd (simde__m256d a, simde__m256i idx, simde__mmask8 k, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm256_mask_mov_pd(simde_mm256_castsi256_pd(idx), k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_pd +#define _mm256_mask2_permutex2var_pd(a, idx, k, b) simde_mm256_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_permutex2var_pd (simde__mmask8 k, simde__m256d a, simde__m256i idx, simde__m256d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm256_maskz_mov_pd(k, simde_mm256_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_pd +#define _mm256_maskz_permutex2var_pd(k, a, idx, b) simde_mm256_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_permutex2var_ps(a, idx, b); + #else + return simde_mm256_castsi256_ps(simde_mm256_permutex2var_epi32(simde_mm256_castps_si256(a), idx, simde_mm256_castps_si256(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutex2var_ps + #define _mm256_permutex2var_ps(a, idx, b) simde_mm256_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_permutex2var_ps (simde__m256 a, simde__mmask8 k, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm256_mask_mov_ps(a, k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_permutex2var_ps +#define _mm256_mask_permutex2var_ps(a, k, idx, b) simde_mm256_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask2_permutex2var_ps (simde__m256 a, simde__m256i idx, simde__mmask8 k, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm256_mask_mov_ps(simde_mm256_castsi256_ps(idx), k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask2_permutex2var_ps +#define _mm256_mask2_permutex2var_ps(a, idx, k, b) simde_mm256_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_permutex2var_ps (simde__mmask8 k, simde__m256 a, simde__m256i idx, simde__m256 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm256_maskz_mov_ps(k, simde_mm256_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_permutex2var_ps +#define _mm256_maskz_permutex2var_ps(k, a, idx, b) simde_mm256_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutex2var_epi16(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i hilo, hilo1, hilo2, hi, lo, idx1, idx2, ta, tb, select; + const __m256i ones = _mm256_set1_epi16(1); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + idx1 = idx_.m256i[i]; + idx2 = _mm256_srli_epi32(idx1, 1); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo1 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo1), + _mm256_castsi256_ps(select))); + + idx2 = _mm256_srli_epi32(idx2, 16); + + select = _mm256_slli_epi32(idx2, 27); + ta = _mm256_permutevar8x32_epi32(a_.m256i[0], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[0], idx2); + hilo = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + ta = _mm256_permutevar8x32_epi32(a_.m256i[1], idx2); + tb = _mm256_permutevar8x32_epi32(b_.m256i[1], idx2); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(ta), + _mm256_castsi256_ps(tb), + _mm256_castsi256_ps(select))); + select = _mm256_add_epi32(select, select); + hilo2 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(hilo), + _mm256_castsi256_ps(hilo2), + _mm256_castsi256_ps(select))); + + lo = _mm256_blend_epi16(_mm256_slli_epi32(hilo2, 16), hilo1, 0x55); + hi = _mm256_blend_epi16(hilo2, _mm256_srli_epi32(hilo1, 16), 0x55); + + select = _mm256_cmpeq_epi16(_mm256_and_si256(idx1, ones), ones); + r_.m256i[i] = _mm256_blendv_epi8(lo, hi, select); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 1, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((idx_.i16[i] & 0x20) ? b_ : a_).i16[idx_.i16[i] & 0x1F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi16 + #define _mm512_permutex2var_epi16(a, idx, b) simde_mm512_permutex2var_epi16(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi16 (simde__m512i a, simde__mmask32 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_permutex2var_epi16(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi16(a, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi16 +#define _mm512_mask_permutex2var_epi16(a, k, idx, b) simde_mm512_mask_permutex2var_epi16(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi16 (simde__m512i a, simde__m512i idx, simde__mmask32 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask2_permutex2var_epi16(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi16(idx, k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi16 +#define _mm512_mask2_permutex2var_epi16(a, idx, k, b) simde_mm512_mask2_permutex2var_epi16(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_permutex2var_epi16(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_permutex2var_epi16(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi16 +#define _mm512_maskz_permutex2var_epi16(k, a, idx, b) simde_mm512_maskz_permutex2var_epi16(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_epi32(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i index, t0, t1, a01, b01, select; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = idx_.m256i[i]; + t0 = _mm256_permutevar8x32_epi32(a_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(a_.m256i[1], index); + select = _mm256_slli_epi32(index, 28); + a01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + t0 = _mm256_permutevar8x32_epi32(b_.m256i[0], index); + t1 = _mm256_permutevar8x32_epi32(b_.m256i[1], index); + b01 = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(t0), + _mm256_castsi256_ps(t1), + _mm256_castsi256_ps(select))); + select = _mm256_slli_epi32(index, 27); + r_.m256i[i] = _mm256_castps_si256(_mm256_blendv_ps(_mm256_castsi256_ps(a01), + _mm256_castsi256_ps(b01), + _mm256_castsi256_ps(select))); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((idx_.i32[i] & 0x10) ? b_ : a_).i32[idx_.i32[i] & 0x0F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi32 + #define _mm512_permutex2var_epi32(a, idx, b) simde_mm512_permutex2var_epi32(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi32 (simde__m512i a, simde__mmask16 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_epi32(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi32(a, k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi32 +#define _mm512_mask_permutex2var_epi32(a, k, idx, b) simde_mm512_mask_permutex2var_epi32(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi32 (simde__m512i a, simde__m512i idx, simde__mmask16 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_epi32(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi32(idx, k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi32 +#define _mm512_mask2_permutex2var_epi32(a, idx, k, b) simde_mm512_mask2_permutex2var_epi32(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi32 (simde__mmask16 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_epi32(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_permutex2var_epi32(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi32 +#define _mm512_maskz_permutex2var_epi32(k, a, idx, b) simde_mm512_maskz_permutex2var_epi32(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_epi64(a, idx, b); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((idx_.i64[i] & 8) ? b_ : a_).i64[idx_.i64[i] & 7]; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi64 + #define _mm512_permutex2var_epi64(a, idx, b) simde_mm512_permutex2var_epi64(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi64 (simde__m512i a, simde__mmask8 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_epi64(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi64(a, k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi64 +#define _mm512_mask_permutex2var_epi64(a, k, idx, b) simde_mm512_mask_permutex2var_epi64(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi64 (simde__m512i a, simde__m512i idx, simde__mmask8 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_epi64(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi64(idx, k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi64 +#define _mm512_mask2_permutex2var_epi64(a, idx, k, b) simde_mm512_mask2_permutex2var_epi64(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi64 (simde__mmask8 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_epi64(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_permutex2var_epi64(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi64 +#define _mm512_maskz_permutex2var_epi64(k, a, idx, b) simde_mm512_maskz_permutex2var_epi64(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_permutex2var_epi8(a, idx, b); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + __m512i hilo, hi, lo, hi2, lo2, idx2; + const __m512i ones = _mm512_set1_epi8(1); + const __m512i low_bytes = _mm512_set1_epi16(0x00FF); + + idx2 = _mm512_srli_epi16(idx, 1); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + __mmask64 mask = _mm512_test_epi8_mask(idx, ones); + lo = _mm512_and_si512(hilo, low_bytes); + hi = _mm512_srli_epi16(hilo, 8); + + idx2 = _mm512_srli_epi16(idx, 9); + hilo = _mm512_permutex2var_epi16(a, idx2, b); + lo2 = _mm512_slli_epi16(hilo, 8); + hi2 = _mm512_andnot_si512(low_bytes, hilo); + + lo = _mm512_or_si512(lo, lo2); + hi = _mm512_or_si512(hi, hi2); + + return _mm512_mask_blend_epi8(mask, lo, hi); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + idx_ = simde__m512i_to_private(idx), + b_ = simde__m512i_to_private(b), + r_; + + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i t0, t1, index, select0x10, select0x20, select0x40, t01, t23, a0123, b0123; + const __m256i mask = _mm256_set1_epi8(0x7F); + const __m256i a0 = _mm256_permute4x64_epi64(a_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a1 = _mm256_permute4x64_epi64(a_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i a2 = _mm256_permute4x64_epi64(a_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i a3 = _mm256_permute4x64_epi64(a_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b0 = _mm256_permute4x64_epi64(b_.m256i[0], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b1 = _mm256_permute4x64_epi64(b_.m256i[0], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + const __m256i b2 = _mm256_permute4x64_epi64(b_.m256i[1], (1 << 6) + (0 << 4) + (1 << 2) + (0 << 0)); + const __m256i b3 = _mm256_permute4x64_epi64(b_.m256i[1], (3 << 6) + (2 << 4) + (3 << 2) + (2 << 0)); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i_private) / sizeof(r_.m256i_private[0])) ; i++) { + index = _mm256_and_si256(idx_.m256i[i], mask); + t0 = _mm256_shuffle_epi8(a0, index); + t1 = _mm256_shuffle_epi8(a1, index); + select0x10 = _mm256_slli_epi64(index, 3); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(a2, index); + t1 = _mm256_shuffle_epi8(a3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + select0x20 = _mm256_slli_epi64(index, 2); + a0123 = _mm256_blendv_epi8(t01, t23, select0x20); + t0 = _mm256_shuffle_epi8(b0, index); + t1 = _mm256_shuffle_epi8(b1, index); + t01 = _mm256_blendv_epi8(t0, t1, select0x10); + t0 = _mm256_shuffle_epi8(b2, index); + t1 = _mm256_shuffle_epi8(b3, index); + t23 = _mm256_blendv_epi8(t0, t1, select0x10); + b0123 = _mm256_blendv_epi8(t01, t23, select0x20); + select0x40 = _mm256_slli_epi64(index, 1); + r_.m256i[i] = _mm256_blendv_epi8(a0123, b0123, select0x40); + } + #elif defined(SIMDE_X_PERMUTEX2VAR_USE_GENERIC) + simde_x_permutex2var(r_.m128i, a_.m128i, idx_.m128i, b_.m128i, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((idx_.i8[i] & 0x40) ? b_ : a_).i8[idx_.i8[i] & 0x3F]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_epi8 + #define _mm512_permutex2var_epi8(a, idx, b) simde_mm512_permutex2var_epi8(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_permutex2var_epi8 (simde__m512i a, simde__mmask64 k, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask_permutex2var_epi8(a, k, idx, b); + #else + return simde_mm512_mask_mov_epi8(a, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_epi8 +#define _mm512_mask_permutex2var_epi8(a, k, idx, b) simde_mm512_mask_permutex2var_epi8(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask2_permutex2var_epi8 (simde__m512i a, simde__m512i idx, simde__mmask64 k, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_mask2_permutex2var_epi8(a, idx, k, b); + #else + return simde_mm512_mask_mov_epi8(idx, k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_epi8 +#define _mm512_mask2_permutex2var_epi8(a, idx, k, b) simde_mm512_mask2_permutex2var_epi8(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_permutex2var_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i idx, simde__m512i b) { + #if defined(SIMDE_X86_AVX512VBMI_NATIVE) + return _mm512_maskz_permutex2var_epi8(k, a, idx, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_permutex2var_epi8(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_epi8 +#define _mm512_maskz_permutex2var_epi8(k, a, idx, b) simde_mm512_maskz_permutex2var_epi8(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_permutex2var_pd(a, idx, b); + #else + return simde_mm512_castsi512_pd(simde_mm512_permutex2var_epi64(simde_mm512_castpd_si512(a), idx, simde_mm512_castpd_si512(b))); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_pd + #define _mm512_permutex2var_pd(a, idx, b) simde_mm512_permutex2var_pd(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_permutex2var_pd (simde__m512d a, simde__mmask8 k, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_pd(a, k, idx, b); + #else + return simde_mm512_mask_mov_pd(a, k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_pd +#define _mm512_mask_permutex2var_pd(a, k, idx, b) simde_mm512_mask_permutex2var_pd(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask2_permutex2var_pd (simde__m512d a, simde__m512i idx, simde__mmask8 k, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_pd(a, idx, k, b); + #else + return simde_mm512_mask_mov_pd(simde_mm512_castsi512_pd(idx), k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_pd +#define _mm512_mask2_permutex2var_pd(a, idx, k, b) simde_mm512_mask2_permutex2var_pd(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_permutex2var_pd (simde__mmask8 k, simde__m512d a, simde__m512i idx, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_pd(k, a, idx, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_permutex2var_pd(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_pd +#define _mm512_maskz_permutex2var_pd(k, a, idx, b) simde_mm512_maskz_permutex2var_pd(k, a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_permutex2var_ps(a, idx, b); + #else + return simde_mm512_castsi512_ps(simde_mm512_permutex2var_epi32(simde_mm512_castps_si512(a), idx, simde_mm512_castps_si512(b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_permutex2var_ps + #define _mm512_permutex2var_ps(a, idx, b) simde_mm512_permutex2var_ps(a, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_permutex2var_ps (simde__m512 a, simde__mmask16 k, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_permutex2var_ps(a, k, idx, b); + #else + return simde_mm512_mask_mov_ps(a, k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_permutex2var_ps +#define _mm512_mask_permutex2var_ps(a, k, idx, b) simde_mm512_mask_permutex2var_ps(a, k, idx, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask2_permutex2var_ps (simde__m512 a, simde__m512i idx, simde__mmask16 k, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask2_permutex2var_ps(a, idx, k, b); + #else + return simde_mm512_mask_mov_ps(simde_mm512_castsi512_ps(idx), k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask2_permutex2var_ps +#define _mm512_mask2_permutex2var_ps(a, idx, k, b) simde_mm512_mask2_permutex2var_ps(a, idx, k, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_permutex2var_ps (simde__mmask16 k, simde__m512 a, simde__m512i idx, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_permutex2var_ps(k, a, idx, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_permutex2var_ps(a, idx, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_permutex2var_ps +#define _mm512_maskz_permutex2var_ps(k, a, idx, b) simde_mm512_maskz_permutex2var_ps(k, a, idx, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_PERMUTEX2VAR_H) */ +/* :: End simde/x86/avx512/permutex2var.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/shuffle.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SHUFFLE_H) +#define SIMDE_X86_AVX512_SHUFFLE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_shuffle_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_shuffle_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_shuffle_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (b_.u8[i] & 0x80) ? 0 : a_.u8[(b_.u8[i] & 0x0f) + (i & 0x30)]; + } + #endif + + return simde__m512i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_epi8 + #define _mm512_shuffle_epi8(a, b) simde_mm512_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_shuffle_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_shuffle_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_shuffle_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_shuffle_epi8 + #define _mm512_mask_shuffle_epi8(src, k, a, b) simde_mm512_mask_shuffle_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_shuffle_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_shuffle_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_shuffle_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_shuffle_epi8 + #define _mm512_maskz_shuffle_epi8(k, a, b) simde_mm512_maskz_shuffle_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 1]; + r_.m128i[1] = b_.m128i[(imm8 >> 1) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_shuffle_i32x4(a, b, imm8) _mm256_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_i32x4 + #define _mm256_shuffle_i32x4(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_mov_epi32(k, simde_mm256_shuffle_i32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_mov_epi32(src, k, simde_mm256_shuffle_i32x4(a, b, imm8)) + +#define simde_mm256_shuffle_f32x4(a, b, imm8) simde_mm256_castsi256_ps(simde_mm256_shuffle_i32x4(simde_mm256_castps_si256(a), simde_mm256_castps_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_mov_ps(k, simde_mm256_shuffle_f32x4(a, b, imm8)) +#define simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_mov_ps(src, k, simde_mm256_shuffle_f32x4(a, b, imm8)) + +#define simde_mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i32x4(a, b, imm8) +#define simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_mov_epi64(k, simde_mm256_shuffle_i64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_mov_epi64(src, k, simde_mm256_shuffle_i64x2(a, b, imm8)) + +#define simde_mm256_shuffle_f64x2(a, b, imm8) simde_mm256_castsi256_pd(simde_mm256_shuffle_i64x2(simde_mm256_castpd_si256(a), simde_mm256_castpd_si256(b), imm8)) +#define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) +#define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + r_.m128i[0] = a_.m128i[ imm8 & 3]; + r_.m128i[1] = a_.m128i[(imm8 >> 2) & 3]; + r_.m128i[2] = b_.m128i[(imm8 >> 4) & 3]; + r_.m128i[3] = b_.m128i[(imm8 >> 6) & 3]; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_i32x4(a, b, imm8) _mm512_shuffle_i32x4(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_i32x4 + #define _mm512_shuffle_i32x4(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#endif + +#define simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_mov_epi32(k, simde_mm512_shuffle_i32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_mov_epi32(src, k, simde_mm512_shuffle_i32x4(a, b, imm8)) + +#define simde_mm512_shuffle_f32x4(a, b, imm8) simde_mm512_castsi512_ps(simde_mm512_shuffle_i32x4(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_mov_ps(k, simde_mm512_shuffle_f32x4(a, b, imm8)) +#define simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_mov_ps(src, k, simde_mm512_shuffle_f32x4(a, b, imm8)) + +#define simde_mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i32x4(a, b, imm8) +#define simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_mov_epi64(k, simde_mm512_shuffle_i64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_mov_epi64(src, k, simde_mm512_shuffle_i64x2(a, b, imm8)) + +#define simde_mm512_shuffle_f64x2(a, b, imm8) simde_mm512_castsi512_pd(simde_mm512_shuffle_i64x2(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b), imm8)) +#define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) +#define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) + +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_ps(a, b, imm8) _mm512_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ + \ + simde_mm512_shuffle_ps_a_.m256[0] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[0], simde_mm512_shuffle_ps_b_.m256[0], imm8); \ + simde_mm512_shuffle_ps_a_.m256[1] = simde_mm256_shuffle_ps(simde_mm512_shuffle_ps_a_.m256[1], simde_mm512_shuffle_ps_b_.m256[1], imm8); \ + \ + simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm512_shuffle_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_shuffle_ps_a_ = simde__m512_to_private(a), \ + simde_mm512_shuffle_ps_b_ = simde__m512_to_private(b); \ + \ + simde_mm512_shuffle_ps_a_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 64, \ + simde_mm512_shuffle_ps_a_.f32, \ + simde_mm512_shuffle_ps_b_.f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 16, \ + (((imm8) >> 6) & 3) + 16, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 20, \ + (((imm8) >> 6) & 3) + 20, \ + (((imm8) ) & 3) + 8, \ + (((imm8) >> 2) & 3) + 8, \ + (((imm8) >> 4) & 3) + 24, \ + (((imm8) >> 6) & 3) + 24, \ + (((imm8) ) & 3) + 12, \ + (((imm8) >> 2) & 3) + 12, \ + (((imm8) >> 4) & 3) + 28, \ + (((imm8) >> 6) & 3) + 28 \ + ); \ + \ + simde__m512_from_private(simde_mm512_shuffle_ps_a_); \ + })) +#else + SIMDE_FUNCTION_ATTRIBUTES + simde__m512 + simde_mm512_shuffle_ps(simde__m512 a, simde__m512 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + const size_t halfway = (sizeof(r_.m128_private[0].f32) / sizeof(r_.m128_private[0].f32[0]) / 2); + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + SIMDE_VECTORIZE + for (size_t j = 0 ; j < halfway ; j++) { + r_.m128_private[i].f32[j] = a_.m128_private[i].f32[(imm8 >> (j * 2)) & 3]; + r_.m128_private[i].f32[halfway + j] = b_.m128_private[i].f32[(imm8 >> ((halfway + j) * 2)) & 3]; + } + } + + return simde__m512_from_private(r_); + } +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_ps + #define _mm512_shuffle_ps(a, b, imm8) simde_mm512_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_shuffle_pd(simde__m512d a, simde__m512d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE (imm8, 0, 255) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.f64) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[i * 2] = (imm8 & ( 1 << (i*2) )) ? a_.f64[i * 2 + 1]: a_.f64[i * 2]; + r_.f64[i * 2 + 1] = (imm8 & ( 1 << (i*2+1) )) ? b_.f64[i * 2 + 1]: b_.f64[i * 2]; + } + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_shuffle_pd(a, b, imm8) _mm512_shuffle_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_shuffle_pd + #define _mm512_shuffle_pd(a, b, imm8) simde_mm512_shuffle_pd(a, b, imm8) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SHUFFLE_H) */ +/* :: End simde/x86/avx512/shuffle.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/xor.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_XOR_H) +#define SIMDE_X86_AVX512_XOR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + /* TODO: generate reduced case to give to Intel */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_ps + #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_ps + #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_ps + #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_pd + #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_pd + #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_pd + #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi32 + #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi32 + #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi32 + #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi64 + #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi64 + #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi64 + #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); + r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_si512 + #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ +/* :: End simde/x86/avx512/xor.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* In all the *gf2p8affine* intrinsics the argument b must be a compile-time constant so we must use macros and simde_x_mm* helper functions */ + +/* N.B. The _mm*gf2p8affineinv_epi64_epi8 and _mm*gf2p8mul_epi8 intrinsics are for a Field Generator Polynomial (FGP) (aka reduction polynomial) of 0x11B */ +/* Only the _mm*gf2p8affine_epi64_epi8 intrinsics do not assume this specific FGP */ + +/* The field generator polynomial is 0x11B but we make the 0x100 bit implicit to fit inside 8 bits */ +#define SIMDE_X86_GFNI_FGP 0x1B + +/* Computing the inverse of a GF element is expensive so use this LUT for an FGP of 0x11B */ + +static const union { + uint8_t u8[256]; + simde__m128i m128i[16]; +} simde_x_gf2p8inverse_lut = { + { + 0x00, 0x01, 0x8d, 0xf6, 0xcb, 0x52, 0x7b, 0xd1, 0xe8, 0x4f, 0x29, 0xc0, 0xb0, 0xe1, 0xe5, 0xc7, + 0x74, 0xb4, 0xaa, 0x4b, 0x99, 0x2b, 0x60, 0x5f, 0x58, 0x3f, 0xfd, 0xcc, 0xff, 0x40, 0xee, 0xb2, + 0x3a, 0x6e, 0x5a, 0xf1, 0x55, 0x4d, 0xa8, 0xc9, 0xc1, 0x0a, 0x98, 0x15, 0x30, 0x44, 0xa2, 0xc2, + 0x2c, 0x45, 0x92, 0x6c, 0xf3, 0x39, 0x66, 0x42, 0xf2, 0x35, 0x20, 0x6f, 0x77, 0xbb, 0x59, 0x19, + 0x1d, 0xfe, 0x37, 0x67, 0x2d, 0x31, 0xf5, 0x69, 0xa7, 0x64, 0xab, 0x13, 0x54, 0x25, 0xe9, 0x09, + 0xed, 0x5c, 0x05, 0xca, 0x4c, 0x24, 0x87, 0xbf, 0x18, 0x3e, 0x22, 0xf0, 0x51, 0xec, 0x61, 0x17, + 0x16, 0x5e, 0xaf, 0xd3, 0x49, 0xa6, 0x36, 0x43, 0xf4, 0x47, 0x91, 0xdf, 0x33, 0x93, 0x21, 0x3b, + 0x79, 0xb7, 0x97, 0x85, 0x10, 0xb5, 0xba, 0x3c, 0xb6, 0x70, 0xd0, 0x06, 0xa1, 0xfa, 0x81, 0x82, + 0x83, 0x7e, 0x7f, 0x80, 0x96, 0x73, 0xbe, 0x56, 0x9b, 0x9e, 0x95, 0xd9, 0xf7, 0x02, 0xb9, 0xa4, + 0xde, 0x6a, 0x32, 0x6d, 0xd8, 0x8a, 0x84, 0x72, 0x2a, 0x14, 0x9f, 0x88, 0xf9, 0xdc, 0x89, 0x9a, + 0xfb, 0x7c, 0x2e, 0xc3, 0x8f, 0xb8, 0x65, 0x48, 0x26, 0xc8, 0x12, 0x4a, 0xce, 0xe7, 0xd2, 0x62, + 0x0c, 0xe0, 0x1f, 0xef, 0x11, 0x75, 0x78, 0x71, 0xa5, 0x8e, 0x76, 0x3d, 0xbd, 0xbc, 0x86, 0x57, + 0x0b, 0x28, 0x2f, 0xa3, 0xda, 0xd4, 0xe4, 0x0f, 0xa9, 0x27, 0x53, 0x04, 0x1b, 0xfc, 0xac, 0xe6, + 0x7a, 0x07, 0xae, 0x63, 0xc5, 0xdb, 0xe2, 0xea, 0x94, 0x8b, 0xc4, 0xd5, 0x9d, 0xf8, 0x90, 0x6b, + 0xb1, 0x0d, 0xd6, 0xeb, 0xc6, 0x0e, 0xcf, 0xad, 0x08, 0x4e, 0xd7, 0xe3, 0x5d, 0x50, 0x1e, 0xb3, + 0x5b, 0x23, 0x38, 0x34, 0x68, 0x46, 0x03, 0x8c, 0xdd, 0x9c, 0x7d, 0xa0, 0xcd, 0x1a, 0x41, 0x1c + } +}; + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_gf2p8matrix_multiply_epi64_epi8 (simde__m128i x, simde__m128i A) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + const __m128i byte_select = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1); + const __m128i zero = _mm_setzero_si128(); + __m128i r, a, p, X; + + a = _mm_shuffle_epi8(A, _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); + X = x; + r = zero; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = _mm_insert_epi16(zero, _mm_movemask_epi8(a), 0); + p = _mm_shuffle_epi8(p, byte_select); + p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X)); + r = _mm_xor_si128(r, p); + a = _mm_add_epi8(a, a); + X = _mm_add_epi8(X, X); + } + + return r; + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i zero = _mm_setzero_si128(); + __m128i r, a, p, X; + + a = _mm_shufflehi_epi16(A, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0)); + a = _mm_shufflelo_epi16(a, (0 << 6) + (1 << 4) + (2 << 2) + (3 << 0)); + a = _mm_or_si128(_mm_slli_epi16(a, 8), _mm_srli_epi16(a, 8)); + X = _mm_unpacklo_epi8(x, _mm_unpackhi_epi64(x, x)); + r = zero; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = _mm_set1_epi16(HEDLEY_STATIC_CAST(short, _mm_movemask_epi8(a))); + p = _mm_and_si128(p, _mm_cmpgt_epi8(zero, X)); + r = _mm_xor_si128(r, p); + a = _mm_add_epi8(a, a); + X = _mm_add_epi8(X, X); + } + + return _mm_packus_epi16(_mm_srli_epi16(_mm_slli_epi16(r, 8), 8), _mm_srli_epi16(r, 8)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + static const uint8_t byte_interleave[16] = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const uint8_t byte_deinterleave[16] = {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; + static const uint8_t mask_d[16] = {128, 128, 64, 64, 32, 32, 16, 16, 8, 8, 4, 4, 2, 2, 1, 1}; + const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d)); + int8x16_t r, a, t, X; + + t = simde__m128i_to_neon_i8(A); + a = vqtbl1q_s8(t, vld1q_u8(byte_interleave)); + t = simde__m128i_to_neon_i8(x); + X = vqtbl1q_s8(t, vld1q_u8(byte_interleave)); + r = vdupq_n_s8(0); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + t = vshrq_n_s8(a, 7); + t = vandq_s8(t, mask); + t = vreinterpretq_s8_u16(vdupq_n_u16(vaddvq_u16(vreinterpretq_u16_s8(t)))); + t = vandq_s8(t, vshrq_n_s8(X, 7)); + r = veorq_s8(r, t); + a = vshlq_n_s8(a, 1); + X = vshlq_n_s8(X, 1); + } + + r = vqtbl1q_s8(r, vld1q_u8(byte_deinterleave)); + return simde__m128i_from_neon_i8(r); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint8_t mask_d[16] = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; + const int8x16_t mask = vreinterpretq_s8_u8(vld1q_u8(mask_d)); + int8x16_t r, a, t, X; + int16x8_t t16; + int32x4_t t32; + + a = simde__m128i_to_neon_i8(A); + X = simde__m128i_to_neon_i8(x); + r = vdupq_n_s8(0); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + t = vshrq_n_s8(a, 7); + t = vandq_s8(t, mask); + t16 = vreinterpretq_s16_s8 (vorrq_s8 (t , vrev64q_s8 (t ))); + t32 = vreinterpretq_s32_s16(vorrq_s16(t16, vrev64q_s16(t16))); + t = vreinterpretq_s8_s32 (vorrq_s32(t32, vrev64q_s32(t32))); + t = vandq_s8(t, vshrq_n_s8(X, 7)); + r = veorq_s8(r, t); + a = vshlq_n_s8(a, 1); + X = vshlq_n_s8(X, 1); + } + + return simde__m128i_from_neon_i8(r); + #elif defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120}; + static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; + + X = simde__m128i_to_altivec_i8(x); + a = simde__m128i_to_altivec_u8(A); + X = vec_perm(X, X, byte_interleave); + r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + #if defined(SIMDE_BUG_CLANG_50932) + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_bperm(HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select)); + #else + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm_u128(a, bit_select)); + #endif + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 3)); + p &= X < zero; + r ^= p; + a += a; + X += X; + } + + r = vec_perm(r, r, byte_deinterleave); + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15}; + static const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) p, r; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) a, X; + + X = simde__m128i_to_altivec_i8(x); + a = simde__m128i_to_altivec_i8(A); + r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = a < zero; + p &= mask; + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_sum2(vec_sum4(p, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero)), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero))); + p = vec_perm(p, p, byte_select); + p &= X < zero; + r ^= p; + a += a; + X += X; + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_interleave = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_deinterleave= {0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) bit_select = {64, 72, 80, 88, 96, 104, 112, 120, 0, 8, 16, 24, 32, 40, 48, 56}; + const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; + + X = simde__m128i_to_altivec_i8(x); + a = simde__m128i_to_altivec_u8(A); + X = vec_perm(X, X, byte_interleave); + r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + #if defined(SIMDE_BUG_CLANG_50932) + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_bperm(HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a), bit_select)); + #else + p = vec_bperm(a, bit_select); + #endif + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_splat(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), p), 4)); + p = vec_and(p, vec_cmplt(X, zero)); + r = vec_xor(r, p); + a = vec_add(a, a); + X = vec_add(X, X); + } + + r = vec_perm(r, r, byte_deinterleave); + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) mask = {128, 64, 32, 16, 8, 4, 2, 1, 128, 64, 32, 16, 8, 4, 2, 1}; + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) byte_select = {4, 4, 4, 4, 4, 4, 4, 4, 12, 12, 12, 12, 12, 12, 12, 12}; + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) sevens = vec_splats(HEDLEY_STATIC_CAST(unsigned char, 7)); + const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splats(HEDLEY_STATIC_CAST(signed char, 0)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) X; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) a, p, r; + + X = simde__m128i_to_altivec_i8(x); + a = simde__m128i_to_altivec_u8(A); + r = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), zero); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = vec_sr(a, sevens); + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_msum(p, + mask, + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), zero))); + p = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), + vec_sum2s(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), p), + HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), zero))); + p = vec_perm(p, p, byte_select); + p = vec_and(p, vec_cmplt(X, zero)); + r = vec_xor(r, p); + a = vec_add(a, a); + X = vec_add(X, X); + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t zero = wasm_i8x16_splat(0); + v128_t a, p, r, X; + + X = simde__m128i_to_wasm_v128(x); + a = simde__m128i_to_wasm_v128(A); + a = wasm_i8x16_shuffle(a, a, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + X = wasm_i8x16_shuffle(X, X, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); + r = zero; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = wasm_i16x8_splat(HEDLEY_STATIC_CAST(int16_t, wasm_i8x16_bitmask(a))); + p = wasm_v128_and(p, wasm_i8x16_lt(X, zero)); + r = wasm_v128_xor(r, p); + a = wasm_i8x16_add(a, a); + X = wasm_i8x16_add(X, X); + } + + r = wasm_i8x16_shuffle(r, r, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + return simde__m128i_from_wasm_v128(r); + #else + simde__m128i_private + r_, + x_ = simde__m128i_to_private(x), + A_ = simde__m128i_to_private(A); + + const uint64_t ones = UINT64_C(0x0101010101010101); + const uint64_t mask = UINT64_C(0x0102040810204080); + uint64_t q; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + q = simde_endian_bswap64_le(A_.u64[i / 8]); + q &= HEDLEY_STATIC_CAST(uint64_t, x_.u8[i]) * ones; + q ^= q >> 4; + q ^= q >> 2; + q ^= q >> 1; + q &= ones; + q *= 255; + q &= mask; + q |= q >> 32; + q |= q >> 16; + q |= q >> 8; + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, q); + } + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_gf2p8matrix_multiply_epi64_epi8 (simde__m256i x, simde__m256i A) { + #if defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i r, a, p; + const simde__m256i byte_select = simde_x_mm256_set_epu64x(UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), + UINT64_C(0x0101010101010101), UINT64_C(0x0000000000000000)); + a = simde_mm256_shuffle_epi8(A, simde_mm256_broadcastsi128_si256(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); + r = simde_mm256_setzero_si256(); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = simde_mm256_set1_epi32(simde_mm256_movemask_epi8(a)); + p = simde_mm256_shuffle_epi8(p, byte_select); + p = simde_mm256_xor_si256(r, p); + r = simde_mm256_blendv_epi8(r, p, x); + a = simde_mm256_add_epi8(a, a); + x = simde_mm256_add_epi8(x, x); + } + + return r; + #else + simde__m256i_private + r_, + x_ = simde__m256i_to_private(x), + A_ = simde__m256i_to_private(A); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x_.m128i[i], A_.m128i[i]); + } + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_gf2p8matrix_multiply_epi64_epi8 (simde__m512i x, simde__m512i A) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + simde__m512i r, a, p; + const simde__m512i byte_select = simde_x_mm512_set_epu64(UINT64_C(0x0707070707070707), UINT64_C(0x0606060606060606), UINT64_C(0x0505050505050505), UINT64_C(0x0404040404040404), + UINT64_C(0x0303030303030303), UINT64_C(0x0202020202020202), UINT64_C(0x0101010101010101), UINT64_C(0X0000000000000000)); + a = simde_mm512_shuffle_epi8(A, simde_mm512_broadcast_i32x4(simde_x_mm_set_epu64x(UINT64_C(0x08090A0B0C0D0E0F), UINT64_C(0x0001020304050607)))); + r = simde_mm512_setzero_si512(); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 8 ; i++) { + p = simde_mm512_set1_epi64(HEDLEY_STATIC_CAST(int64_t, simde_mm512_movepi8_mask(a))); + p = simde_mm512_maskz_shuffle_epi8(simde_mm512_movepi8_mask(x), p, byte_select); + r = simde_mm512_xor_si512(r, p); + a = simde_mm512_add_epi8(a, a); + x = simde_mm512_add_epi8(x, x); + } + + return r; + #else + simde__m512i_private + r_, + x_ = simde__m512i_to_private(x), + A_ = simde__m512i_to_private(A); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x_.m256i[i], A_.m256i[i]); + } + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_gf2p8inverse_epi8 (simde__m128i x) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + /* N.B. CM: this fallback may not be faster */ + simde__m128i r, u, t, test; + const simde__m128i sixteens = simde_mm_set1_epi8(16); + const simde__m128i masked_x = simde_mm_and_si128(x, simde_mm_set1_epi8(0x0F)); + + test = simde_mm_set1_epi8(INT8_MIN /* 0x80 */); + x = simde_mm_xor_si128(x, test); + r = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[0], masked_x); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 1 ; i < 16 ; i++) { + t = simde_mm_shuffle_epi8(simde_x_gf2p8inverse_lut.m128i[i], masked_x); + test = simde_mm_add_epi8(test, sixteens); + u = simde_mm_cmplt_epi8(x, test); + r = simde_mm_blendv_epi8(t, r, u); + } + + return r; + #else + simde__m128i_private + r_, + x_ = simde__m128i_to_private(x); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_x_gf2p8inverse_lut.u8[x_.u8[i]]; + } + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_gf2p8inverse_epi8 (simde__m256i x) { + #if defined(SIMDE_X86_AVX2_NATIVE) + /* N.B. CM: this fallback may not be faster */ + simde__m256i r, u, t, test; + const simde__m256i sixteens = simde_mm256_set1_epi8(16); + const simde__m256i masked_x = simde_mm256_and_si256(x, simde_mm256_set1_epi8(0x0F)); + + test = simde_mm256_set1_epi8(INT8_MIN /* 0x80 */); + x = simde_mm256_xor_si256(x, test); + r = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 1 ; i < 16 ; i++) { + t = simde_mm256_shuffle_epi8(simde_mm256_broadcastsi128_si256(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); + test = simde_mm256_add_epi8(test, sixteens); + u = simde_mm256_cmpgt_epi8(test, x); + r = simde_mm256_blendv_epi8(t, r, u); + } + + return r; + #else + simde__m256i_private + r_, + x_ = simde__m256i_to_private(x); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_x_mm_gf2p8inverse_epi8(x_.m128i[i]); + } + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_gf2p8inverse_epi8 (simde__m512i x) { + /* N.B. CM: TODO: later add VBMI version using just two _mm512_permutex2var_epi8 and friends */ + /* But except for Cannon Lake all processors with VBMI also have GFNI */ + #if defined(SIMDE_X86_AVX512BW_NATIVE) + /* N.B. CM: this fallback may not be faster */ + simde__m512i r, test; + const simde__m512i sixteens = simde_mm512_set1_epi8(16); + const simde__m512i masked_x = simde_mm512_and_si512(x, simde_mm512_set1_epi8(0x0F)); + + r = simde_mm512_shuffle_epi8(simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[0]), masked_x); + test = sixteens; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 1 ; i < 16 ; i++) { + r = simde_mm512_mask_shuffle_epi8(r, simde_mm512_cmpge_epu8_mask(x, test), simde_mm512_broadcast_i32x4(simde_x_gf2p8inverse_lut.m128i[i]), masked_x); + test = simde_mm512_add_epi8(test, sixteens); + } + + return r; + #else + simde__m512i_private + r_, + x_ = simde__m512i_to_private(x); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_x_mm256_gf2p8inverse_epi8(x_.m256i[i]); + } + + return simde__m512i_from_private(r_); + #endif +} + +#define simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm_gf2p8matrix_multiply_epi64_epi8(simde_x_mm_gf2p8inverse_epi8(x), A) +#define simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(simde_x_mm256_gf2p8inverse_epi8(x), A) +#define simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A) simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(simde_x_mm512_gf2p8inverse_epi8(x), A) + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_gf2p8affine_epi64_epi8 (simde__m128i x, simde__m128i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) + #define simde_mm_gf2p8affine_epi64_epi8(x, A, b) _mm_gf2p8affine_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_gf2p8affine_epi64_epi8 + #define _mm_gf2p8affine_epi64_epi8(x, A, b) simde_mm_gf2p8affine_epi64_epi8(x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_gf2p8affine_epi64_epi8 (simde__m256i x, simde__m256i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_gf2p8affine_epi64_epi8(x, A, b) _mm256_gf2p8affine_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_gf2p8affine_epi64_epi8 + #define _mm256_gf2p8affine_epi64_epi8(x, A, b) simde_mm256_gf2p8affine_epi64_epi8(x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_gf2p8affine_epi64_epi8 (simde__m512i x, simde__m512i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_gf2p8affine_epi64_epi8(x, A, b) _mm512_gf2p8affine_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_gf2p8affine_epi64_epi8 + #define _mm512_gf2p8affine_epi64_epi8(x, A, b) simde_mm512_gf2p8affine_epi64_epi8(x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_gf2p8affine_epi64_epi8 + #define _mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_gf2p8affine_epi64_epi8 + #define _mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_gf2p8affine_epi64_epi8 + #define _mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affine_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#else + #define simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_gf2p8affine_epi64_epi8 + #define _mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#else + #define simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_gf2p8affine_epi64_epi8 + #define _mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#else + #define simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affine_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_gf2p8affine_epi64_epi8 + #define _mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affine_epi64_epi8(k, x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_gf2p8affineinv_epi64_epi8 (simde__m128i x, simde__m128i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm_xor_si128(simde_x_mm_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) + #define simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) _mm_gf2p8affineinv_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_gf2p8affineinv_epi64_epi8 + #define _mm_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm_gf2p8affineinv_epi64_epi8(x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_gf2p8affineinv_epi64_epi8 (simde__m256i x, simde__m256i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm256_xor_si256(simde_x_mm256_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm256_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) _mm256_gf2p8affineinv_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_gf2p8affineinv_epi64_epi8 + #define _mm256_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_gf2p8affineinv_epi64_epi8 (simde__m512i x, simde__m512i A, int b) + SIMDE_REQUIRE_CONSTANT_RANGE(b, 0, 255) { + return simde_mm512_xor_si512(simde_x_mm512_gf2p8matrix_multiply_inverse_epi64_epi8(x, A), simde_mm512_set1_epi8(HEDLEY_STATIC_CAST(int8_t, b))); +} +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) _mm512_gf2p8affineinv_epi64_epi8(x, A, b) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_gf2p8affineinv_epi64_epi8 + #define _mm512_gf2p8affineinv_epi64_epi8(x, A, b) simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_gf2p8affineinv_epi64_epi8 + #define _mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_gf2p8affineinv_epi64_epi8 + #define _mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm256_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#else + #define simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_gf2p8affineinv_epi64_epi8 + #define _mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) simde_mm512_mask_gf2p8affineinv_epi64_epi8(src, k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#else + #define simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_gf2p8affineinv_epi64_epi8 + #define _mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#else + #define simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_gf2p8affineinv_epi64_epi8 + #define _mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm256_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#endif + +#if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#else + #define simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8affineinv_epi64_epi8(x, A, b)) +#endif +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_gf2p8affineinv_epi64_epi8 + #define _mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) simde_mm512_maskz_gf2p8affineinv_epi64_epi8(k, x, A, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i simde_mm_gf2p8mul_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || !defined(SIMDE_X86_AVX512F_NATIVE)) + return _mm_gf2p8mul_epi8(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const poly8x16_t pa = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(a)); + const poly8x16_t pb = vreinterpretq_p8_u8(simde__m128i_to_neon_u8(b)); + const uint8x16_t lo = vreinterpretq_u8_p16(vmull_p8(vget_low_p8(pa), vget_low_p8(pb))); + #if defined (SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x16_t hi = vreinterpretq_u8_p16(vmull_high_p8(pa, pb)); + #else + uint8x16_t hi = vreinterpretq_u8_p16(vmull_p8(vget_high_p8(pa), vget_high_p8(pb))); + #endif + uint8x16x2_t hilo = vuzpq_u8(lo, hi); + uint8x16_t r = hilo.val[0]; + hi = hilo.val[1]; + const uint8x16_t idxHi = vshrq_n_u8(hi, 4); + const uint8x16_t idxLo = vandq_u8(hi, vdupq_n_u8(0xF)); + + #if defined (SIMDE_ARM_NEON_A64V8_NATIVE) + static const uint8_t reduceLutHiData[] = { + 0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, + 0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53 + }; + static const uint8_t reduceLutLoData[] = { + 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, + 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 + }; + const uint8x16_t reduceLutHi = vld1q_u8(reduceLutHiData); + const uint8x16_t reduceLutLo = vld1q_u8(reduceLutLoData); + r = veorq_u8(r, vqtbl1q_u8(reduceLutHi, idxHi)); + r = veorq_u8(r, vqtbl1q_u8(reduceLutLo, idxLo)); + #else + static const uint8_t reduceLutHiData[] = { + 0x00, 0x2f, + 0xab, 0x84, + 0x4d, 0x62, + 0xe6, 0xc9, + 0x9a, 0xb5, + 0x31, 0x1e, + 0xd7, 0xf8, + 0x7c, 0x53 + }; + static const uint8_t reduceLutLoData[] = { + 0x00, 0xd8, + 0x1b, 0xc3, + 0x36, 0xee, + 0x2d, 0xf5, + 0x6c, 0xb4, + 0x77, 0xaf, + 0x5a, 0x82, + 0x41, 0x99 + }; + const uint8x8x2_t reduceLutHi = vld2_u8(reduceLutHiData); + const uint8x8x2_t reduceLutLo = vld2_u8(reduceLutLoData); + r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutHi, vget_low_u8(idxHi)), vtbl2_u8(reduceLutHi, vget_high_u8(idxHi)))); + r = veorq_u8(r, vcombine_u8(vtbl2_u8(reduceLutLo, vget_low_u8(idxLo)), vtbl2_u8(reduceLutLo, vget_high_u8(idxLo)))); + #endif + return simde__m128i_from_neon_u8(r); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, lo, hi; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) even, odd, mask0x00FF; + x = simde__m128i_to_altivec_u8(a); + y = simde__m128i_to_altivec_u8(b); + mask0x00FF = vec_splats(HEDLEY_STATIC_CAST(unsigned short, 0x00FF)); + lo = y & HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), mask0x00FF); + hi = y ^ lo; + even = vec_gfmsum(x, lo); + odd = vec_gfmsum(x, hi); + lo = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(vec_rli(odd, 8), even, mask0x00FF)); + hi = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_sel(odd, vec_rli(even, 8), mask0x00FF)); + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutHi = {0x00, 0xab, 0x4d, 0xe6, 0x9a, 0x31, 0xd7, 0x7c, 0x2f, 0x84, 0x62, 0xc9, 0xb5, 0x1e, 0xf8, 0x53}; + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) reduceLutLo = {0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41, 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99}; + lo = lo ^ vec_perm(reduceLutHi, reduceLutHi, vec_rli(hi, 4)); + lo = lo ^ vec_perm(reduceLutLo, reduceLutLo, hi); + return simde__m128i_from_altivec_u8(lo); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) x, y, r, t, m; + x = simde__m128i_to_altivec_u8(a); + y = simde__m128i_to_altivec_u8(b); + + const SIMDE_POWER_ALTIVEC_VECTOR(signed char) zero = vec_splat_s8(0); + + m = vec_splat_u8(0x01); + + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) fgp = vec_splats(HEDLEY_STATIC_CAST(unsigned char, SIMDE_X86_GFNI_FGP)); + t = vec_and(y, m); + t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m)); + r = vec_and(x, t); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 7 ; i++) { + t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmplt(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), x), zero)); + x = vec_add(x, x); + t = vec_and(fgp, t); + x = vec_xor(x, t); + m = vec_add(m, m); + t = vec_and(y, m); + t = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_cmpeq(t, m)); + t = vec_and(x, t); + r = vec_xor(r, t); + } + + return simde__m128i_from_altivec_u8(r); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t x, y, r, t, m; + x = simde__m128i_to_wasm_v128(a); + y = simde__m128i_to_wasm_v128(b); + + m = wasm_i8x16_splat(0x01); + + const v128_t fgp = wasm_i8x16_splat(SIMDE_X86_GFNI_FGP); + + t = wasm_v128_and(y, m); + t = wasm_i8x16_eq(t, m); + r = wasm_v128_and(x, t); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 7 ; i++) { + t = wasm_i8x16_shr(x, 7); + x = wasm_i8x16_add(x, x); + t = wasm_v128_and(fgp, t); + x = wasm_v128_xor(x, t); + m = wasm_i8x16_add(m, m); + t = wasm_v128_and(y, m); + t = wasm_i8x16_eq(t, m); + t = wasm_v128_and(x, t); + r = wasm_v128_xor(r, t); + } + + return simde__m128i_from_wasm_v128(r); + #elif defined(SIMDE_X86_AVX512BW_NATIVE) + simde__m512i r4, t4, u4; + simde__mmask64 ma, mb; + + simde__m512i a4 = simde_mm512_broadcast_i32x4(a); + const simde__m512i zero = simde_mm512_setzero_si512(); + simde__mmask16 m8 = simde_mm512_cmpeq_epi32_mask(zero, zero); + + const simde__m512i b4 = simde_mm512_broadcast_i32x4(b); + + simde__m512i bits = simde_mm512_set_epi64(0x4040404040404040, + 0x4040404040404040, + 0x1010101010101010, + 0x1010101010101010, + 0x0404040404040404, + 0x0404040404040404, + 0x0101010101010101, + 0x0101010101010101); + + const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); + + for (int i = 0 ; i < 3 ; i++) { + m8 = simde_kshiftli_mask16(m8, 4); + + ma = simde_mm512_cmplt_epi8_mask(a4, zero); + u4 = simde_mm512_add_epi8(a4, a4); + t4 = simde_mm512_maskz_mov_epi8(ma, fgp); + u4 = simde_mm512_xor_epi32(u4, t4); + + ma = simde_mm512_cmplt_epi8_mask(u4, zero); + u4 = simde_mm512_add_epi8(u4, u4); + t4 = simde_mm512_maskz_mov_epi8(ma, fgp); + a4 = simde_mm512_mask_xor_epi32(a4, m8, u4, t4); + } + + mb = simde_mm512_test_epi8_mask(b4, bits); + bits = simde_mm512_add_epi8(bits, bits); + ma = simde_mm512_cmplt_epi8_mask(a4, zero); + r4 = simde_mm512_maskz_mov_epi8(mb, a4); + mb = simde_mm512_test_epi8_mask(b4, bits); + a4 = simde_mm512_add_epi8(a4, a4); + t4 = simde_mm512_maskz_mov_epi8(ma, fgp); + a4 = simde_mm512_xor_si512(a4, t4); + t4 = simde_mm512_maskz_mov_epi8(mb, a4); + r4 = simde_mm512_xor_si512(r4, t4); + + r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (1 << 6) + (0 << 4) + (3 << 2) + 2)); + r4 = simde_mm512_xor_si512(r4, simde_mm512_shuffle_i32x4(r4, r4, (0 << 6) + (3 << 4) + (2 << 2) + 1)); + + return simde_mm512_extracti32x4_epi32(r4, 0); + #elif defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i r2, t2; + simde__m256i a2 = simde_mm256_broadcastsi128_si256(a); + const simde__m256i zero = simde_mm256_setzero_si256(); + const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP); + const simde__m256i ones = simde_mm256_set1_epi8(0x01); + simde__m256i b2 = simde_mm256_set_m128i(simde_mm_srli_epi64(b, 4), b); + + for (int i = 0 ; i < 4 ; i++) { + t2 = simde_mm256_cmpgt_epi8(zero, a2); + t2 = simde_mm256_and_si256(fgp, t2); + a2 = simde_mm256_add_epi8(a2, a2); + a2 = simde_mm256_xor_si256(a2, t2); + } + + a2 = simde_mm256_inserti128_si256(a2, a, 0); + + t2 = simde_mm256_and_si256(b2, ones); + t2 = simde_mm256_cmpeq_epi8(t2, ones); + r2 = simde_mm256_and_si256(a2, t2); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 3 ; i++) { + t2 = simde_mm256_cmpgt_epi8(zero, a2); + t2 = simde_mm256_and_si256(fgp, t2); + a2 = simde_mm256_add_epi8(a2, a2); + a2 = simde_mm256_xor_si256(a2, t2); + b2 = simde_mm256_srli_epi64(b2, 1); + t2 = simde_mm256_and_si256(b2, ones); + t2 = simde_mm256_cmpeq_epi8(t2, ones); + t2 = simde_mm256_and_si256(a2, t2); + r2 = simde_mm256_xor_si256(r2, t2); + } + + return simde_mm_xor_si128(simde_mm256_extracti128_si256(r2, 1), + simde_mm256_extracti128_si256(r2, 0)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + simde__m128i r, t; + const simde__m128i zero = simde_mm_setzero_si128(); + const simde__m128i ones = simde_mm_set1_epi8(0x01); + + const simde__m128i fgp = simde_mm_set1_epi8(SIMDE_X86_GFNI_FGP); + + t = simde_mm_and_si128(b, ones); + t = simde_mm_cmpeq_epi8(t, ones); + r = simde_mm_and_si128(a, t); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 7 ; i++) { + t = simde_mm_cmpgt_epi8(zero, a); + t = simde_mm_and_si128(fgp, t); + a = simde_mm_add_epi8(a, a); + a = simde_mm_xor_si128(a, t); + b = simde_mm_srli_epi64(b, 1); + t = simde_mm_and_si128(b, ones); + t = simde_mm_cmpeq_epi8(t, ones); + t = simde_mm_and_si128(a, t); + r = simde_mm_xor_si128(r, t); + } + + return r; + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const uint8_t fgp = SIMDE_X86_GFNI_FGP; + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = 0; + while ((a_.u8[i] != 0) && (b_.u8[i] != 0)) { + if (b_.u8[i] & 1) + r_.u8[i] ^= a_.u8[i]; + + if (a_.u8[i] & 0x80) + a_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.u8[i] << 1) ^ fgp); + else + a_.u8[i] <<= 1; + + b_.u8[i] >>= 1; + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_gf2p8mul_epi8 + #define _mm_gf2p8mul_epi8(a, b) simde_mm_gf2p8mul_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_gf2p8mul_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && (defined(SIMDE_X86_AVX512VL_NATIVE) || (defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE))) + return _mm256_gf2p8mul_epi8(a, b); + #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + simde__mmask64 ma, mb; + simde__m512i r, t, s; + simde__m512i a2 = simde_mm512_broadcast_i64x4(a); + const simde__m512i zero = simde_mm512_setzero_si512(); + + const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); + + s = simde_mm512_set1_epi8(0x01); + + for (int i = 0 ; i < 4 ; i++) { + ma = simde_mm512_cmplt_epi8_mask(a2, zero); + a2 = simde_mm512_add_epi8(a2, a2); + t = simde_mm512_xor_si512(a2, fgp); + a2 = simde_mm512_mask_mov_epi8(a2, ma, t); + } + + simde__m512i b2 = simde_mm512_inserti64x4(zero, simde_mm256_srli_epi64(b, 4), 1); + b2 = simde_mm512_inserti64x4(b2, b, 0); + a2 = simde_mm512_inserti64x4(a2, a, 0); + + mb = simde_mm512_test_epi8_mask(b2, s); + r = simde_mm512_maskz_mov_epi8(mb, a2); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 3 ; i++) { + ma = simde_mm512_cmplt_epi8_mask(a2, zero); + s = simde_mm512_add_epi8(s, s); + mb = simde_mm512_test_epi8_mask(b2, s); + a2 = simde_mm512_add_epi8(a2, a2); + t = simde_mm512_maskz_mov_epi8(ma, fgp); + a2 = simde_mm512_xor_si512(a2, t); + t = simde_mm512_maskz_mov_epi8(mb, a2); + r = simde_mm512_xor_si512(r, t); + } + + return simde_mm256_xor_si256(simde_mm512_extracti64x4_epi64(r, 1), + simde_mm512_extracti64x4_epi64(r, 0)); + #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX2_NATIVE) + simde__m256i r, t; + const simde__m256i zero = simde_mm256_setzero_si256(); + const simde__m256i ones = simde_mm256_set1_epi8(0x01); + + const simde__m256i fgp = simde_mm256_set1_epi8(SIMDE_X86_GFNI_FGP); + + t = simde_mm256_and_si256(b, ones); + t = simde_mm256_cmpeq_epi8(t, ones); + r = simde_mm256_and_si256(a, t); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 7 ; i++) { + t = simde_mm256_cmpgt_epi8(zero, a); + t = simde_mm256_and_si256(fgp, t); + a = simde_mm256_add_epi8(a, a); + a = simde_mm256_xor_si256(a, t); + b = simde_mm256_srli_epi64(b, 1); + t = simde_mm256_and_si256(b, ones); + t = simde_mm256_cmpeq_epi8(t, ones); + t = simde_mm256_and_si256(a, t); + r = simde_mm256_xor_si256(r, t); + } + + return r; + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_gf2p8mul_epi8 + #define _mm256_gf2p8mul_epi8(a, b) simde_mm256_gf2p8mul_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_gf2p8mul_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_gf2p8mul_epi8(a, b); + #elif !defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + simde__m512i r, s, t; + simde__mmask64 ma, mb; + const simde__m512i zero = simde_mm512_setzero_si512(); + + const simde__m512i fgp = simde_mm512_set1_epi8(SIMDE_X86_GFNI_FGP); + + s = simde_mm512_set1_epi8(0x01); + + mb = simde_mm512_test_epi8_mask(b, s); + r = simde_mm512_maskz_mov_epi8(mb, a); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (int i = 0 ; i < 7 ; i++) { + ma = simde_mm512_cmplt_epi8_mask(a, zero); + s = simde_mm512_add_epi8(s, s); + mb = simde_mm512_test_epi8_mask(b, s); + a = simde_mm512_add_epi8(a, a); + t = simde_mm512_maskz_mov_epi8(ma, fgp); + a = simde_mm512_xor_si512(a, t); + t = simde_mm512_maskz_mov_epi8(mb, a); + r = simde_mm512_xor_si512(r, t); + } + + return r; + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if !defined(__INTEL_COMPILER) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_gf2p8mul_epi8(a_.m128i[i], b_.m128i[i]); + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_gf2p8mul_epi8 + #define _mm512_gf2p8mul_epi8(a, b) simde_mm512_gf2p8mul_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_gf2p8mul_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_gf2p8mul_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_gf2p8mul_epi8 + #define _mm_mask_gf2p8mul_epi8(src, k, a, b) simde_mm_mask_gf2p8mul_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_gf2p8mul_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_gf2p8mul_epi8(src, k, a, b); + #else + return simde_mm256_mask_mov_epi8(src, k, simde_mm256_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_gf2p8mul_epi8 + #define _mm256_mask_gf2p8mul_epi8(src, k, a, b) simde_mm256_mask_gf2p8mul_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_gf2p8mul_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_gf2p8mul_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_gf2p8mul_epi8 + #define _mm512_mask_gf2p8mul_epi8(src, k, a, b) simde_mm512_mask_gf2p8mul_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_gf2p8mul_epi8 (simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_gf2p8mul_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_gf2p8mul_epi8 + #define _mm_maskz_gf2p8mul_epi8(k, a, b) simde_mm_maskz_gf2p8mul_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_gf2p8mul_epi8 (simde__mmask32 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_gf2p8mul_epi8(k, a, b); + #else + return simde_mm256_maskz_mov_epi8(k, simde_mm256_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_gf2p8mul_epi8 + #define _mm256_maskz_gf2p8mul_epi8(k, a, b) simde_mm256_maskz_gf2p8mul_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_gf2p8mul_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_GFNI_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_gf2p8mul_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_gf2p8mul_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_gf2p8mul_epi8 + #define _mm512_maskz_gf2p8mul_epi8(k, a, b) simde_mm512_maskz_gf2p8mul_epi8(k, a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_GFNI_H) */ +/* :: End simde/x86/gfni.h :: */ diff --git a/include/simde/x86/mmx.h b/include/simde/x86/mmx.h new file mode 100644 index 00000000..e2ee3e51 --- /dev/null +++ b/include/simde/x86/mmx.h @@ -0,0 +1,10667 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ diff --git a/include/simde/x86/sse.h b/include/simde/x86/sse.h new file mode 100644 index 00000000..08d1ac62 --- /dev/null +++ b/include/simde/x86/sse.h @@ -0,0 +1,15489 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ diff --git a/include/simde/x86/sse2.h b/include/simde/x86/sse2.h new file mode 100644 index 00000000..34f98ef9 --- /dev/null +++ b/include/simde/x86/sse2.h @@ -0,0 +1,23182 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ diff --git a/include/simde/x86/sse3.h b/include/simde/x86/sse3.h new file mode 100644 index 00000000..f974f979 --- /dev/null +++ b/include/simde/x86/sse3.h @@ -0,0 +1,23700 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ diff --git a/include/simde/x86/sse4.1.h b/include/simde/x86/sse4.1.h new file mode 100644 index 00000000..cbc3a89c --- /dev/null +++ b/include/simde/x86/sse4.1.h @@ -0,0 +1,27123 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ diff --git a/include/simde/x86/sse4.2.h b/include/simde/x86/sse4.2.h new file mode 100644 index 00000000..fb32c870 --- /dev/null +++ b/include/simde/x86/sse4.2.h @@ -0,0 +1,27507 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ diff --git a/include/simde/x86/ssse3.h b/include/simde/x86/ssse3.h new file mode 100644 index 00000000..41699b47 --- /dev/null +++ b/include/simde/x86/ssse3.h @@ -0,0 +1,24760 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ diff --git a/include/simde/x86/svml.h b/include/simde/x86/svml.h new file mode 100644 index 00000000..5831eb01 --- /dev/null +++ b/include/simde/x86/svml.h @@ -0,0 +1,60311 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/svml.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_SVML_H) +#define SIMDE_X86_SVML_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/fma.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2019 Evan Nemerson + */ + +#if !defined(SIMDE_X86_FMA_H) +#define SIMDE_X86_FMA_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +#if !defined(SIMDE_X86_FMA_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_pd(a, b, c); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_madd(a_.altivec_f64, b_.altivec_f64, c_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmaq_f64(c_.neon_f64, b_.neon_f64, a_.neon_f64); + #elif defined(simde_math_fma) && (defined(__FP_FAST_FMA) || defined(FP_FAST_FMA)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fma(a_.f64[i], b_.f64[i], c_.f64[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_pd + #define _mm_fmadd_pd(a, b, c) simde_mm_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_pd(a, b, c); + #else + return simde_mm256_add_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_pd + #define _mm256_fmadd_pd(a, b, c) simde_mm256_fmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmadd_ps(a, b, c); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c), + r_; + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32); + #elif defined(simde_math_fmaf) && (defined(__FP_FAST_FMAF) || defined(FP_FAST_FMAF)) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fmaf(a_.f32[i], b_.f32[i], c_.f32[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ps + #define _mm_fmadd_ps(a, b, c) simde_mm_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmadd_ps(a, b, c); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c), + r_; + + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_fmadd_ps(a_.m128[i], b_.m128[i], c_.m128[i]); + } + + return simde__m256_from_private(r_); + #else + return simde_mm256_add_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmadd_ps + #define _mm256_fmadd_ps(a, b, c) simde_mm256_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_sd(a, b, c); + #else + return simde_mm_add_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_sd + #define _mm_fmadd_sd(a, b, c) simde_mm_fmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmadd_ss(a, b, c); + #else + return simde_mm_add_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmadd_ss + #define _mm_fmadd_ss(a, b, c) simde_mm_fmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmaddsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_pd(a, b, c); + #else + return simde_mm_addsub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_pd + #define _mm_fmaddsub_pd(a, b, c) simde_mm_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmaddsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_pd(a, b, c); + #else + return simde_mm256_addsub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_pd + #define _mm256_fmaddsub_pd(a, b, c) simde_mm256_fmaddsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmaddsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmaddsub_ps(a, b, c); + #else + return simde_mm_addsub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmaddsub_ps + #define _mm_fmaddsub_ps(a, b, c) simde_mm_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmaddsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmaddsub_ps(a, b, c); + #else + return simde_mm256_addsub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmaddsub_ps + #define _mm256_fmaddsub_ps(a, b, c) simde_mm256_fmaddsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_pd(a, b, c); + #else + return simde_mm_sub_pd(simde_mm_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_pd + #define _mm_fmsub_pd(a, b, c) simde_mm_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_pd(a, b, c); + #else + return simde_mm256_sub_pd(simde_mm256_mul_pd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_pd + #define _mm256_fmsub_pd(a, b, c) simde_mm256_fmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsub_ps(a, b, c); + #else + return simde_mm_sub_ps(simde_mm_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ps + #define _mm_fmsub_ps(a, b, c) simde_mm_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsub_ps(a, b, c); + #else + return simde_mm256_sub_ps(simde_mm256_mul_ps(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsub_ps + #define _mm256_fmsub_ps(a, b, c) simde_mm256_fmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_sd(a, b, c); + #else + return simde_mm_sub_sd(simde_mm_mul_sd(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_sd + #define _mm_fmsub_sd(a, b, c) simde_mm_fmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fmsub_ss(a, b, c); + #else + return simde_mm_sub_ss(simde_mm_mul_ss(a, b), c); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsub_ss + #define _mm_fmsub_ss(a, b, c) simde_mm_fmsub_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fmsubadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_pd + #define _mm_fmsubadd_pd(a, b, c) simde_mm_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fmsubadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = (a_.f64[ i ] * b_.f64[ i ]) + c_.f64[ i ]; + r_.f64[i + 1] = (a_.f64[i + 1] * b_.f64[i + 1]) - c_.f64[i + 1]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_pd + #define _mm256_fmsubadd_pd(a, b, c) simde_mm256_fmsubadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fmsubadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fmsubadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fmsubadd_ps + #define _mm_fmsubadd_ps(a, b, c) simde_mm_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fmsubadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fmsubadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = (a_.f32[ i ] * b_.f32[ i ]) + c_.f32[ i ]; + r_.f32[i + 1] = (a_.f32[i + 1] * b_.f32[i + 1]) - c_.f32[i + 1]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fmsubadd_ps + #define _mm256_fmsubadd_ps(a, b, c) simde_mm256_fmsubadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vfmsq_f64(c_.neon_f64, a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_pd + #define _mm_fnmadd_pd(a, b, c) simde_mm_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmadd_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_pd + #define _mm256_fnmadd_pd(a, b, c) simde_mm256_fnmadd_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmadd_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA) + r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ps + #define _mm_fnmadd_ps(a, b, c) simde_mm_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmadd_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmadd_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmadd_ps + #define _mm256_fnmadd_ps(a, b, c) simde_mm256_fnmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmadd_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) + c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_sd + #define _mm_fnmadd_sd(a, b, c) simde_mm_fnmadd_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmadd_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmadd_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = a_; + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) + c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmadd_ss + #define _mm_fnmadd_ss(a, b, c) simde_mm_fnmadd_ss(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_pd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_pd + #define _mm_fnmsub_pd(a, b, c) simde_mm_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_fnmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_pd(a, b, c); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + c_ = simde__m256d_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -(a_.f64[i] * b_.f64[i]) - c_.f64[i]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_pd + #define _mm256_fnmsub_pd(a, b, c) simde_mm256_fnmsub_pd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm_fnmsub_ps(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ps + #define _mm_fnmsub_ps(a, b, c) simde_mm_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_fnmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c) { + #if defined(SIMDE_X86_FMA_NATIVE) + return _mm256_fnmsub_ps(a, b, c); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + c_ = simde__m256_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -(a_.f32[i] * b_.f32[i]) - c_.f32[i]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm256_fnmsub_ps + #define _mm256_fnmsub_ps(a, b, c) simde_mm256_fnmsub_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_fnmsub_sd (simde__m128d a, simde__m128d b, simde__m128d c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_sd(a, b, c); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + c_ = simde__m128d_to_private(c); + + r_ = a_; + r_.f64[0] = -(a_.f64[0] * b_.f64[0]) - c_.f64[0]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_sd + #define _mm_fnmsub_sd(a, b, c) simde_mm_fnmsub_sd(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_fnmsub_ss (simde__m128 a, simde__m128 b, simde__m128 c) { + #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT) + return _mm_fnmsub_ss(a, b, c); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + c_ = simde__m128_to_private(c); + + r_ = simde__m128_to_private(a); + r_.f32[0] = -(a_.f32[0] * b_.f32[0]) - c_.f32[0]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES) + #undef _mm_fnmsub_ss + #define _mm_fnmsub_ss(a, b, c) simde_mm_fnmsub_ss(a, b, c) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_FMA_H) */ +/* :: End simde/x86/fma.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2019-2020 Michael R. Crusoe + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX2_H) +#define SIMDE_X86_AVX2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi8 + #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi16 + #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi32(simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi32 + #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi8 + #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi16 + #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi16(a, b); + #else + return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi16 + #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi32 + #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi32(a, b); + #else + return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi32 + #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi64 + #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm256_setzero_si256(); + + for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.m128i_private[h].i8[i] = 0; + } else if (srcpos > 15) { + r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; + } else { + r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; + } + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) +# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_alignr_epi8(a, b, count) \ + simde_mm256_set_m128i( \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_alignr_epi8 + #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_and_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_si256 + #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_andnot_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_si256 + #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi8 + #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi16 + #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadds_epi16(a, b); + #else + return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadds_epi16 + #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu8 + #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu16 + #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu8 + #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu16 + #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) +# define simde_mm_blend_epi32(a, b, imm8) \ + simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi32 + #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) +#elif defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi16(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi16 + #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi32(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi32 + #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_blendv_epi8(a, b, mask); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + mask_ = simde__m256i_to_private(mask); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(mask_.i8) tmp = mask_.i8 >> 7; + r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + int8_t tmp = mask_.i8[i] >> 7; + r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_epi8 + #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastb_epi8(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastb_epi8 + #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastb_epi8(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastb_epi8 + #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastw_epi16(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastw_epi16 + #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastw_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastw_epi16 + #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastd_epi32(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastd_epi32 + #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastd_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastd_epi32 + #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastq_epi64(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastq_epi64 + #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastq_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastq_epi64 + #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastss_ps(a); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_shuffle_ps(a, a, 0); + #else + simde__m128_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastss_ps + #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastss_ps(a); + #else + simde__m256_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + __m128 tmp = _mm_permute_ps(a_.n, 0); + r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); + #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) + r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastss_ps + #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_broadcastsd_pd (simde__m128d a) { + return simde_mm_movedup_pd(a); +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastsd_pd + #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastsd_pd(a); + #else + simde__m256d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsd_pd + #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) + return _mm256_broadcastsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = a_; + r_.m128i_private[1] = a_; + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = a_.i64[1]; + r_.i64[2] = a_.i64[0]; + r_.i64[3] = a_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsi128_si256 + #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) + #undef _mm_broadcastsi128_si256 + #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i - imm8; + if(i >= (ssize/2)) { + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i + imm8; + if(i < (ssize/2)) { + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi8 + #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi16 + #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi32 + #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi64 + #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi8 + #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 > b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi16 + #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi32 + #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi64 + #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi16 + #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi32 + #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi64 + #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi32 + #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi64 + #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_epi64 + #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi16 + #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi32 + #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi64 + #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi32 + #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi64 + #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu32_epi64 + #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi8 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31){ + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i8[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi8 + #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi16 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i16[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi16 + #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extracti128_si256 + #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi32 + #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi32 + #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi32 + #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi32 + #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi32 + #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi32 + #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi32 + #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi32 + #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi64 + #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi64 + #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256i_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi64 + #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi64 + #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi64 + #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi64 + #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi64 + #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi64 + #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_ps + #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_ps + #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_ps + #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + src_ = simde__m256_to_private(src), + mask_ = simde__m256_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_ps + #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_ps + #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_ps + #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_ps + #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_ps + #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_pd + #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_pd + #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_pd + #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_pd + #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_pd + #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_pd + #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_pd + #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_pd + #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[ imm8 & 1 ] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_inserti128_si256 + #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_madd_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); + SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); + + SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); + SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); + product = a32x16 * b32x16; + + even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); + odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); + + r_.i32 = even + odd; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_madd_epi16 + #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maddubs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maddubs_epi16 + #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi32(mem_addr, mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi32 + #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi32(mem_addr, mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi32 + #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi64 + #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi64 + #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi32(mem_addr, mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi32 + #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi32(mem_addr, mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi32 + #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi64 + #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi64 + #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_max_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi8 + #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu8 + #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu16 + #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu32 + #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi16 + #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi32 + #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_min_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi8 + #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi16 + #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi32 + #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu8 + #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu16 + #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu32 + #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_movemask_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_movemask_epi8(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + uint32_t r = 0; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); + } + #else + r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); + } + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_epi8 + #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + const int a_offset1 = imm8 & 4; + const int b_offset1 = (imm8 & 3) << 2; + const int a_offset2 = (imm8 >> 3) & 4; + const int b_offset2 = ((imm8 >> 3) & 3) << 2; + + #if defined(simde_math_abs) + const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; + for (int i = 0 ; i < halfway_point ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); + r_.u16[halfway_point + i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mpsadbw_epu8 + #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhrs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi16 + #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi32(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi32 + #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_si256 + #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi16 + #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi32(a, b); + #else + simde__m256i_private + r_, + v_[] = { + simde__m256i_to_private(a), + simde__m256i_to_private(b) + }; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi32 + #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi16 + #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi32 + #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2x128_si256 + #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; + r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; + r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; + r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_epi64 + #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; + r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; + r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; + r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_pd + #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 7]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_epi32 + #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); + #else + return _mm256_permutevar8x32_ps(a, idx); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[idx_.i32[i] & 7]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_ps + #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sad_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sad_epu8 + #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_shuffle_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { + r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; + r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi8 + #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_shuffle_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 32, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi32 + #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4, \ + 8, 9, 10, 11, \ + ((((imm8) ) & 3) + 8 + 4), \ + ((((imm8) >> 2) & 3) + 8 + 4), \ + ((((imm8) >> 4) & 3) + 8 + 4), \ + ((((imm8) >> 6) & 3) + 8 + 4) \ + ) }); })) +#else +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflehi_epi16 + #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7, \ + ((((imm8) ) & 3) + 8), \ + ((((imm8) >> 2) & 3) + 8), \ + ((((imm8) >> 4) & 3) + 8), \ + ((((imm8) >> 6) & 3) + 8), \ + 12, 13, 14, 15) }); })) +#else +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflelo_epi16 + #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi8 + #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi16 + #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi32 + #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi16 + #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi32 + #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi64 + #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* Note: There is no consistency in how compilers handle values outside of + the expected range, hence the discrepancy between what we allow and what + Intel specifies. Some compilers will return 0, others seem to just mask + off everything outside of the range. */ + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { + r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi16 + #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { + r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi32 + #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi64 + #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) - imm8; + r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_si256 + #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); + r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi32 + #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi32 + #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); + r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi64 + #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi64 + #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi16 + #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi32 + #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi16 + #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi32 + #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_srav_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); + r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srav_epi32 + #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srav_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + count_ = simde__m256i_to_private(count); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); + r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + if (shift > 31) shift = 31; + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srav_epi32 + #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi16 + #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi32 + #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi64 + #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + if (imm8 > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { + r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); + } + #else + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi16 + #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { + r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi32 + #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi64 + #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = imm8 + HEDLEY_STATIC_CAST(int, i); + r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_si256 + #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi32 + #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi32 + #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi64 + #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi64 + #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi8 + #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi16 + #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi16(a, b); + #else + return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi16 + #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi32 + #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi32(a, b); + #else + return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi32 + #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi64 + #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi8 + #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi16 + #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsubs_epi16(a, b); + #else + return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsubs_epi16 + #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu8 + #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu16 + #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_x_mm256_test_all_ones (simde__m256i a) { + simde__m256i_private a_ = simde__m256i_to_private(a); + int r; + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi8 + #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi16 + #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 0, 8, 1, 9, 4, 12, 5, 13); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi32 + #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi64 + #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi8 + #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 4, 20, 5, 21, 6, 22, 7, 23, + 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi16 + #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 2, 10, 3, 11, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi32 + #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi64 + #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_xor_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_si256 + #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX2_H) */ +/* :: End simde/x86/avx2.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/abs.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_ABS_H) +#define SIMDE_X86_AVX512_ABS_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/types.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_TYPES_H) +#define SIMDE_X86_AVX512_TYPES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +/* The problem is that Microsoft doesn't support 64-byte aligned parameters, except for + * __m512/__m512i/__m512d. Since our private union has an __m512 member it will be 64-byte + * aligned even if we reduce the alignment requirements of other members. + * + * Even if we're on x86 and use the native AVX-512 types for arguments/return values, the + * to/from private functions will break, and I'm not willing to change their APIs to use + * pointers (which would also require more verbose code on the caller side) just to make + * MSVC happy. + * + * If you want to use AVX-512 in SIMDe, you'll need to either upgrade to MSVC 2017 or later, + * or upgrade to a different compiler (clang-cl, perhaps?). If you have an idea of how to + * fix this without requiring API changes (except transparently through macros), patches + * are welcome. + */ + +# if defined(HEDLEY_MSVC_VERSION) && !HEDLEY_MSVC_VERSION_CHECK(19,10,0) +# if defined(SIMDE_X86_AVX512F_NATIVE) +# undef SIMDE_X86_AVX512F_NATIVE +# pragma message("Native AVX-512 support requires MSVC 2017 or later. See comment above (in code) for details.") +# endif +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_32 +# else +# define SIMDE_AVX512_ALIGN SIMDE_ALIGN_TO_64 +# endif + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_ALIGN_TO_16 __m128bh n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_BF16_NATIVE) + SIMDE_ALIGN_TO_32 __m256bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512BF16_NATIVE) + SIMDE_AVX512_ALIGN __m512bh n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512bh_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128_private m128_private[4]; + SIMDE_AVX512_ALIGN simde__m128 m128[4]; + SIMDE_AVX512_ALIGN simde__m256_private m256_private[2]; + SIMDE_AVX512_ALIGN simde__m256 m256[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_AVX512_ALIGN simde__m128d_private m128d_private[4]; + SIMDE_AVX512_ALIGN simde__m128d m128d[4]; + SIMDE_AVX512_ALIGN simde__m256d_private m256d_private[2]; + SIMDE_AVX512_ALIGN simde__m256d m256d[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_AVX512_ALIGN int8_t i8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int16_t i16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int32_t i32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int64_t i64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint8_t u8 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint16_t u16 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint32_t u32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint64_t u64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_uint128 u128 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN simde_float64 f64 SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN int_fast32_t i32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + SIMDE_AVX512_ALIGN uint_fast32_t u32f SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + SIMDE_AVX512_ALIGN int8_t i8[64]; + SIMDE_AVX512_ALIGN int16_t i16[32]; + SIMDE_AVX512_ALIGN int32_t i32[16]; + SIMDE_AVX512_ALIGN int64_t i64[8]; + SIMDE_AVX512_ALIGN uint8_t u8[64]; + SIMDE_AVX512_ALIGN uint16_t u16[32]; + SIMDE_AVX512_ALIGN uint32_t u32[16]; + SIMDE_AVX512_ALIGN uint64_t u64[8]; + SIMDE_AVX512_ALIGN int_fast32_t i32f[64 / sizeof(int_fast32_t)]; + SIMDE_AVX512_ALIGN uint_fast32_t u32f[64 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_AVX512_ALIGN simde_int128 i128[4]; + SIMDE_AVX512_ALIGN simde_uint128 u128[4]; + #endif + SIMDE_AVX512_ALIGN simde_float32 f32[16]; + SIMDE_AVX512_ALIGN simde_float64 f64[8]; + #endif + + SIMDE_AVX512_ALIGN simde__m128i_private m128i_private[4]; + SIMDE_AVX512_ALIGN simde__m128i m128i[4]; + SIMDE_AVX512_ALIGN simde__m256i_private m256i_private[2]; + SIMDE_AVX512_ALIGN simde__m256i m256i[2]; + + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_AVX512_ALIGN __m512i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[4]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[4]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[4]; + #endif + #endif +} simde__m512i_private; + +/* Intel uses the same header (immintrin.h) for everything AVX and + * later. If native aliases are enabled, and the machine has native + * support for AVX imintrin.h will already have been included, which + * means simde__m512* will already have been defined. So, even + * if the machine doesn't support AVX512F we need to use the native + * type; it has already been defined. + * + * However, we also can't just assume that including immintrin.h does + * actually define these. It could be a compiler which supports AVX + * but not AVX512F, such as GCC < 4.9 or VS < 2017. That's why we + * check to see if _MM_CMPINT_GE is defined; it's part of AVX512F, + * so we assume that if it's present AVX-512F has already been + * declared. + * + * Note that the choice of _MM_CMPINT_GE is deliberate; while GCC + * uses the preprocessor to define all the _MM_CMPINT_* members, + * in most compilers they are simply normal enum members. However, + * all compilers I've looked at use an object-like macro for + * _MM_CMPINT_GE, which is defined to _MM_CMPINT_NLT. _MM_CMPINT_NLT + * is included in case a compiler does the reverse, though I haven't + * run into one which does. + * + * As for the ICC check, unlike other compilers, merely using the + * AVX-512 types causes ICC to generate AVX-512 instructions. */ +#if (defined(_MM_CMPINT_GE) || defined(_MM_CMPINT_NLT)) && (defined(SIMDE_X86_AVX512F_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m512 simde__m512; + typedef __m512i simde__m512i; + typedef __m512d simde__m512d; + + typedef __mmask8 simde__mmask8; + typedef __mmask16 simde__mmask16; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m512 SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m512i SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m512d SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m512_private simde__m512; + typedef simde__m512i_private simde__m512i; + typedef simde__m512d_private simde__m512d; + #endif + + typedef uint8_t simde__mmask8; + typedef uint16_t simde__mmask16; +#endif + +#if (defined(_AVX512BF16INTRIN_H_INCLUDED) || defined(__AVX512BF16INTRIN_H)) && (defined(SIMDE_X86_AVX512BF16_NATIVE) || !defined(HEDLEY_INTEL_VERSION)) + typedef __m128bh simde__m128bh; + typedef __m256bh simde__m256bh; + typedef __m512bh simde__m512bh; +#else + #if defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128bh SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m256bh SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float32 simde__m512bh SIMDE_AVX512_ALIGN SIMDE_VECTOR(64) SIMDE_MAY_ALIAS; + #else + typedef simde__m128bh_private simde__m128bh; + typedef simde__m256bh_private simde__m256bh; + typedef simde__m512bh_private simde__m512bh; + #endif +#endif + +/* These are really part of AVX-512VL / AVX-512BW (in GCC __mmask32 is + * in avx512vlintrin.h and __mmask64 is in avx512bwintrin.h, in clang + * both are in avx512bwintrin.h), not AVX-512F. However, we don't have + * a good (not-compiler-specific) way to detect if these headers have + * been included. In compilers which support AVX-512F but not + * AVX-512BW/VL (e.g., GCC 4.9) we need typedefs since __mmask{32,64) + * won't exist. + * + * AFAICT __mmask{32,64} are always just typedefs to uint{32,64}_t + * in all compilers, so it's safe to use these instead of typedefs to + * __mmask{16,32}. If you run into a problem with this please file an + * issue and we'll try to figure out a work-around. */ +typedef uint32_t simde__mmask32; +typedef uint64_t simde__mmask64; +#if !defined(__mmask32) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef uint32_t __mmask32; + #else + #define __mmask32 uint32_t; + #endif +#endif +#if !defined(__mmask64) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + #if defined(HEDLEY_GCC_VERSION) + typedef unsigned long long __mmask64; + #else + typedef uint64_t __mmask64; + #endif + #else + #define __mmask64 uint64_t; + #endif +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m512 __m512; + typedef simde__m512i __m512i; + typedef simde__m512d __m512d; + #else + #define __m512 simde__m512 + #define __m512i simde__m512i + #define __m512d simde__m512d + #endif +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) + typedef simde__m128bh __m128bh; + typedef simde__m256bh __m256bh; + typedef simde__m512bh __m512bh; + #else + #define __m128bh simde__m128bh + #define __m256bh simde__m256bh + #define __m512bh simde__m512bh + #endif +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh), "simde__m128bh size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128bh_private), "simde__m128bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh), "simde__m256bh size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256bh_private), "simde__m256bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh), "simde__m512bh size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512bh_private), "simde__m512bh_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512), "simde__m512 size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512_private), "simde__m512_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i), "simde__m512i size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512i_private), "simde__m512i_private size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d), "simde__m512d size incorrect"); +HEDLEY_STATIC_ASSERT(64 == sizeof(simde__m512d_private), "simde__m512d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh) == 16, "simde__m128bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128bh_private) == 16, "simde__m128bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh) == 32, "simde__m256bh is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256bh_private) == 32, "simde__m256bh_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh) == 32, "simde__m512bh is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512bh_private) == 32, "simde__m512bh_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512) == 32, "simde__m512 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512_private) == 32, "simde__m512_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i) == 32, "simde__m512i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512i_private) == 32, "simde__m512i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d) == 32, "simde__m512d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m512d_private) == 32, "simde__m512d_private is not 32-byte aligned"); +#endif + +#define SIMDE_MM_CMPINT_EQ 0 +#define SIMDE_MM_CMPINT_LT 1 +#define SIMDE_MM_CMPINT_LE 2 +#define SIMDE_MM_CMPINT_FALSE 3 +#define SIMDE_MM_CMPINT_NE 4 +#define SIMDE_MM_CMPINT_NLT 5 +#define SIMDE_MM_CMPINT_NLE 6 +#define SIMDE_MM_CMPINT_TRUE 7 +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && !defined(_MM_CMPINT_EQ) +#define _MM_CMPINT_EQ SIMDE_MM_CMPINT_EQ +#define _MM_CMPINT_LT SIMDE_MM_CMPINT_LT +#define _MM_CMPINT_LE SIMDE_MM_CMPINT_LE +#define _MM_CMPINT_FALSE SIMDE_MM_CMPINT_FALSE +#define _MM_CMPINT_NE SIMDE_MM_CMPINT_NE +#define _MM_CMPINT_NLT SIMDE_MM_CMPINT_NLT +#define _MM_CMPINT_NLE SIMDE_MM_CMPINT_NLE +#define _MM_CMPINT_TRUE SIMDE_CMPINT_TRUE +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh +simde__m128bh_from_private(simde__m128bh_private v) { + simde__m128bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128bh_private +simde__m128bh_to_private(simde__m128bh v) { + simde__m128bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh +simde__m256bh_from_private(simde__m256bh_private v) { + simde__m256bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256bh_private +simde__m256bh_to_private(simde__m256bh v) { + simde__m256bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh +simde__m512bh_from_private(simde__m512bh_private v) { + simde__m512bh r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512bh_private +simde__m512bh_to_private(simde__m512bh v) { + simde__m512bh_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde__m512_from_private(simde__m512_private v) { + simde__m512 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512_private +simde__m512_to_private(simde__m512 v) { + simde__m512_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde__m512i_from_private(simde__m512i_private v) { + simde__m512i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i_private +simde__m512i_to_private(simde__m512i v) { + simde__m512i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde__m512d_from_private(simde__m512d_private v) { + simde__m512d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d_private +simde__m512d_to_private(simde__m512d v) { + simde__m512d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_TYPES_H) */ +/* :: End simde/x86/avx512/types.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_MOV_H) +#define SIMDE_X86_AVX512_MOV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cast.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_CAST_H) +#define SIMDE_X86_AVX512_CAST_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castpd_ps (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_ps + #define _mm512_castpd_ps(a) simde_mm512_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castpd_si512 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd_si512 + #define _mm512_castpd_si512(a) simde_mm512_castpd_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castps_pd (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_pd + #define _mm512_castps_pd(a) simde_mm512_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castps_si512 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps_si512(a); + #else + simde__m512i r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps_si512 + #define _mm512_castps_si512(a) simde_mm512_castps_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castsi512_ps (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_ps(a); + #else + simde__m512 r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_ps + #define _mm512_castsi512_ps(a) simde_mm512_castsi512_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castsi512_pd (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_pd(a); + #else + simde__m512d r; + simde_memcpy(&r, &a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_pd + #define _mm512_castsi512_pd(a) simde_mm512_castsi512_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd128_pd512 (simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd128_pd512(a); + #else + simde__m512d_private r_; + r_.m128d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd128_pd512 + #define _mm512_castpd128_pd512(a) simde_mm512_castpd128_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_castpd256_pd512 (simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd256_pd512(a); + #else + simde__m512d_private r_; + r_.m256d[0] = a; + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd256_pd512 + #define _mm512_castpd256_pd512(a) simde_mm512_castpd256_pd512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm512_castpd512_pd128 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd128(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd128 + #define _mm512_castpd512_pd128(a) simde_mm512_castpd512_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm512_castpd512_pd256 (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castpd512_pd256(a); + #else + simde__m512d_private a_ = simde__m512d_to_private(a); + return a_.m256d[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castpd512_pd256 + #define _mm512_castpd512_pd256(a) simde_mm512_castpd512_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps128_ps512 (simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps128_ps512(a); + #else + simde__m512_private r_; + r_.m128[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps128_ps512 + #define _mm512_castps128_ps512(a) simde_mm512_castps128_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_castps256_ps512 (simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps256_ps512(a); + #else + simde__m512_private r_; + r_.m256[0] = a; + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps256_ps512 + #define _mm512_castps256_ps512(a) simde_mm512_castps256_ps512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm512_castps512_ps128 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps128(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps128 + #define _mm512_castps512_ps128(a) simde_mm512_castps512_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm512_castps512_ps256 (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castps512_ps256(a); + #else + simde__m512_private a_ = simde__m512_to_private(a); + return a_.m256[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castps512_ps256 + #define _mm512_castps512_ps256(a) simde_mm512_castps512_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi128_si512 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi128_si512(a); + #else + simde__m512i_private r_; + r_.m128i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi128_si512 + #define _mm512_castsi128_si512(a) simde_mm512_castsi128_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_castsi256_si512 (simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi256_si512(a); + #else + simde__m512i_private r_; + r_.m256i[0] = a; + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi256_si512 + #define _mm512_castsi256_si512(a) simde_mm512_castsi256_si512(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm512_castsi512_si128 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si128(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si128 + #define _mm512_castsi512_si128(a) simde_mm512_castsi512_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm512_castsi512_si256 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_castsi512_si256(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + return a_.m256i[0]; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_castsi512_si256 + #define _mm512_castsi512_si256(a) simde_mm512_castsi512_si256(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CAST_H) */ +/* :: End simde/x86/avx512/cast.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SET_H) +#define SIMDE_X86_AVX512_SET_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/load.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_LOAD_H) +#define SIMDE_X86_AVX512_LOAD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_load_pd (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_pd(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d)); + #else + simde__m512d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_pd + #define _mm512_load_pd(a) simde_mm512_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_load_ps (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_ps(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512)); + #else + simde__m512 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_ps + #define _mm512_load_ps(a) simde_mm512_load_ps(a) +#endif +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_load_si512 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_load_si512(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i)); + #else + simde__m512i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m512i), sizeof(r)); + return r; + #endif +} +#define simde_mm512_load_epi8(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi16(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi32(mem_addr) simde_mm512_load_si512(mem_addr) +#define simde_mm512_load_epi64(mem_addr) simde_mm512_load_si512(mem_addr) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_load_epi8 + #undef _mm512_load_epi16 + #undef _mm512_load_epi32 + #undef _mm512_load_epi64 + #undef _mm512_load_si512 + #define _mm512_load_si512(a) simde_mm512_load_si512(a) + #define _mm512_load_epi8(a) simde_mm512_load_si512(a) + #define _mm512_load_epi16(a) simde_mm512_load_si512(a) + #define _mm512_load_epi32(a) simde_mm512_load_si512(a) + #define _mm512_load_epi64(a) simde_mm512_load_si512(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_LOAD_H) */ +/* :: End simde/x86/avx512/load.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi16 (int16_t e31, int16_t e30, int16_t e29, int16_t e28, int16_t e27, int16_t e26, int16_t e25, int16_t e24, + int16_t e23, int16_t e22, int16_t e21, int16_t e20, int16_t e19, int16_t e18, int16_t e17, int16_t e16, + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + simde__m512i_private r_; + + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + r_.i16[16] = e16; + r_.i16[17] = e17; + r_.i16[18] = e18; + r_.i16[19] = e19; + r_.i16[20] = e20; + r_.i16[21] = e21; + r_.i16[22] = e22; + r_.i16[23] = e23; + r_.i16[24] = e24; + r_.i16[25] = e25; + r_.i16[26] = e26; + r_.i16[27] = e27; + r_.i16[28] = e28; + r_.i16[29] = e29; + r_.i16[30] = e30; + r_.i16[31] = e31; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi16 + #define _mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi16(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi32 (int32_t e15, int32_t e14, int32_t e13, int32_t e12, int32_t e11, int32_t e10, int32_t e9, int32_t e8, + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + simde__m512i_private r_; + + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + r_.i32[ 8] = e8; + r_.i32[ 9] = e9; + r_.i32[10] = e10; + r_.i32[11] = e11; + r_.i32[12] = e12; + r_.i32[13] = e13; + r_.i32[14] = e14; + r_.i32[15] = e15; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi32 + #define _mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi32(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi64 (int64_t e7, int64_t e6, int64_t e5, int64_t e4, int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + simde__m512i_private r_; + + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + r_.i64[4] = e4; + r_.i64[5] = e5; + r_.i64[6] = e6; + r_.i64[7] = e7; + + return simde__m512i_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi64 + #define _mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi64(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu8 (uint8_t e63, uint8_t e62, uint8_t e61, uint8_t e60, uint8_t e59, uint8_t e58, uint8_t e57, uint8_t e56, + uint8_t e55, uint8_t e54, uint8_t e53, uint8_t e52, uint8_t e51, uint8_t e50, uint8_t e49, uint8_t e48, + uint8_t e47, uint8_t e46, uint8_t e45, uint8_t e44, uint8_t e43, uint8_t e42, uint8_t e41, uint8_t e40, + uint8_t e39, uint8_t e38, uint8_t e37, uint8_t e36, uint8_t e35, uint8_t e34, uint8_t e33, uint8_t e32, + uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m512i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + r_.u8[32] = e32; + r_.u8[33] = e33; + r_.u8[34] = e34; + r_.u8[35] = e35; + r_.u8[36] = e36; + r_.u8[37] = e37; + r_.u8[38] = e38; + r_.u8[39] = e39; + r_.u8[40] = e40; + r_.u8[41] = e41; + r_.u8[42] = e42; + r_.u8[43] = e43; + r_.u8[44] = e44; + r_.u8[45] = e45; + r_.u8[46] = e46; + r_.u8[47] = e47; + r_.u8[48] = e48; + r_.u8[49] = e49; + r_.u8[50] = e50; + r_.u8[51] = e51; + r_.u8[52] = e52; + r_.u8[53] = e53; + r_.u8[54] = e54; + r_.u8[55] = e55; + r_.u8[56] = e56; + r_.u8[57] = e57; + r_.u8[58] = e58; + r_.u8[59] = e59; + r_.u8[60] = e60; + r_.u8[61] = e61; + r_.u8[62] = e62; + r_.u8[63] = e63; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu16 (uint16_t e31, uint16_t e30, uint16_t e29, uint16_t e28, uint16_t e27, uint16_t e26, uint16_t e25, uint16_t e24, + uint16_t e23, uint16_t e22, uint16_t e21, uint16_t e20, uint16_t e19, uint16_t e18, uint16_t e17, uint16_t e16, + uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m512i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + r_.u16[16] = e16; + r_.u16[17] = e17; + r_.u16[18] = e18; + r_.u16[19] = e19; + r_.u16[20] = e20; + r_.u16[21] = e21; + r_.u16[22] = e22; + r_.u16[23] = e23; + r_.u16[24] = e24; + r_.u16[25] = e25; + r_.u16[26] = e26; + r_.u16[27] = e27; + r_.u16[28] = e28; + r_.u16[29] = e29; + r_.u16[30] = e30; + r_.u16[31] = e31; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu32 (uint32_t e15, uint32_t e14, uint32_t e13, uint32_t e12, uint32_t e11, uint32_t e10, uint32_t e9, uint32_t e8, + uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + simde__m512i_private r_; + + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + r_.u32[ 8] = e8; + r_.u32[ 9] = e9; + r_.u32[10] = e10; + r_.u32[11] = e11; + r_.u32[12] = e12; + r_.u32[13] = e13; + r_.u32[14] = e14; + r_.u32[15] = e15; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_epu64 (uint64_t e7, uint64_t e6, uint64_t e5, uint64_t e4, uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m512i_private r_; + + r_.u64[ 0] = e0; + r_.u64[ 1] = e1; + r_.u64[ 2] = e2; + r_.u64[ 3] = e3; + r_.u64[ 4] = e4; + r_.u64[ 5] = e5; + r_.u64[ 6] = e6; + r_.u64[ 7] = e7; + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set_epi8 (int8_t e63, int8_t e62, int8_t e61, int8_t e60, int8_t e59, int8_t e58, int8_t e57, int8_t e56, + int8_t e55, int8_t e54, int8_t e53, int8_t e52, int8_t e51, int8_t e50, int8_t e49, int8_t e48, + int8_t e47, int8_t e46, int8_t e45, int8_t e44, int8_t e43, int8_t e42, int8_t e41, int8_t e40, + int8_t e39, int8_t e38, int8_t e37, int8_t e36, int8_t e35, int8_t e34, int8_t e33, int8_t e32, + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (HEDLEY_GCC_VERSION_CHECK(10,0,0) || SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0)) + return _mm512_set_epi8( + e63, e62, e61, e60, e59, e58, e57, e56, + e55, e54, e53, e52, e51, e50, e49, e48, + e47, e46, e45, e44, e43, e42, e41, e40, + e39, e38, e37, e36, e35, e34, e33, e32, + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0 + ); + #else + simde__m512i_private r_; + + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + r_.i8[32] = e32; + r_.i8[33] = e33; + r_.i8[34] = e34; + r_.i8[35] = e35; + r_.i8[36] = e36; + r_.i8[37] = e37; + r_.i8[38] = e38; + r_.i8[39] = e39; + r_.i8[40] = e40; + r_.i8[41] = e41; + r_.i8[42] = e42; + r_.i8[43] = e43; + r_.i8[44] = e44; + r_.i8[45] = e45; + r_.i8[46] = e46; + r_.i8[47] = e47; + r_.i8[48] = e48; + r_.i8[49] = e49; + r_.i8[50] = e50; + r_.i8[51] = e51; + r_.i8[52] = e52; + r_.i8[53] = e53; + r_.i8[54] = e54; + r_.i8[55] = e55; + r_.i8[56] = e56; + r_.i8[57] = e57; + r_.i8[58] = e58; + r_.i8[59] = e59; + r_.i8[60] = e60; + r_.i8[61] = e61; + r_.i8[62] = e62; + r_.i8[63] = e63; + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_epi8 + #define _mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_epi8(e63, e62, e61, e60, e59, e58, e57, e56, e55, e54, e53, e52, e51, e50, e49, e48, e47, e46, e45, e44, e43, e42, e41, e40, e39, e38, e37, e36, e35, e34, e33, e32, e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m128i (simde__m128i a, simde__m128i b, simde__m128i c, simde__m128i d) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_16(simde__m128i) simde__m128i v[] = { d, c, b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m128i[0] = d; + r_.m128i[1] = c; + r_.m128i[2] = b; + r_.m128i[3] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set_m256i (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + SIMDE_ALIGN_LIKE_32(simde__m256i) simde__m256i v[] = { b, a }; + return simde_mm512_load_si512(HEDLEY_STATIC_CAST(__m512i *, HEDLEY_STATIC_CAST(void *, v))); + #else + simde__m512i_private r_; + + r_.m256i[0] = b; + r_.m256i[1] = a; + + return simde__m512i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set_ps (simde_float32 e15, simde_float32 e14, simde_float32 e13, simde_float32 e12, + simde_float32 e11, simde_float32 e10, simde_float32 e9, simde_float32 e8, + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + simde__m512_private r_; + + r_.f32[ 0] = e0; + r_.f32[ 1] = e1; + r_.f32[ 2] = e2; + r_.f32[ 3] = e3; + r_.f32[ 4] = e4; + r_.f32[ 5] = e5; + r_.f32[ 6] = e6; + r_.f32[ 7] = e7; + r_.f32[ 8] = e8; + r_.f32[ 9] = e9; + r_.f32[10] = e10; + r_.f32[11] = e11; + r_.f32[12] = e12; + r_.f32[13] = e13; + r_.f32[14] = e14; + r_.f32[15] = e15; + + return simde__m512_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_ps + #define _mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_ps(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set_pd (simde_float64 e7, simde_float64 e6, simde_float64 e5, simde_float64 e4, simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + simde__m512d_private r_; + + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + r_.f64[4] = e4; + r_.f64[5] = e5; + r_.f64[6] = e6; + r_.f64[7] = e7; + + return simde__m512d_from_private(r_); +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set_pd + #define _mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm512_set_pd(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET_H) */ +/* :: End simde/x86/avx512/set.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi8 (simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi8(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi8 + #define _mm_mask_mov_epi8(src, k, a) simde_mm_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi16(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi16 + #define _mm_mask_mov_epi16(src, k, a) simde_mm_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi32 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi32(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi32 + #define _mm_mask_mov_epi32(src, k, a) simde_mm_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_mov_epi64 (simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_epi64(src, k, a); + #else + simde__m128i_private + src_ = simde__m128i_to_private(src), + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_epi64 + #define _mm_mask_mov_epi64(src, k, a) simde_mm_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_mov_pd(simde__m128d src, simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_pd(src, k, a); + #else + return simde_mm_castsi128_pd(simde_mm_mask_mov_epi64(simde_mm_castpd_si128(src), k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_pd + #define _mm_mask_mov_pd(src, k, a) simde_mm_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_mov_ps (simde__m128 src, simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_mov_ps(src, k, a); + #else + return simde_mm_castsi128_ps(simde_mm_mask_mov_epi32(simde_mm_castps_si128(src), k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_mov_ps + #define _mm_mask_mov_ps(src, k, a) simde_mm_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi8 (simde__m256i src, simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi8(src, k, a); + #else + simde__m256i_private + r_, + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi8(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi8(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi8 + #define _mm256_mask_mov_epi8(src, k, a) simde_mm256_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi16(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi16(src_.m128i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi16(src_.m128i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi16 + #define _mm256_mask_mov_epi16(src, k, a) simde_mm256_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi32 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi32(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi32(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi32(src_.m128i[1], k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi32 + #define _mm256_mask_mov_epi32(src, k, a) simde_mm256_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_mov_epi64 (simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_epi64(src, k, a); + #else + simde__m256i_private + src_ = simde__m256i_to_private(src), + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_mask_mov_epi64(src_.m128i[0], k , a_.m128i[0]); + r_.m128i[1] = simde_mm_mask_mov_epi64(src_.m128i[1], k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_epi64 + #define _mm256_mask_mov_epi64(src, k, a) simde_mm256_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_mov_pd (simde__m256d src, simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_pd(src, k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_mask_mov_epi64(simde_mm256_castpd_si256(src), k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_pd + #define _mm256_mask_mov_pd(src, k, a) simde_mm256_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_mov_ps (simde__m256 src, simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_mov_ps(src, k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_mask_mov_epi32(simde_mm256_castps_si256(src), k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_mov_ps + #define _mm256_mask_mov_ps(src, k, a) simde_mm256_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi8(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi8(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi8(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : src_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi8 + #define _mm512_mask_mov_epi8(src, k, a) simde_mm512_mask_mov_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_mov_epi16(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi16(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi16(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : src_.i16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi16 + #define _mm512_mask_mov_epi16(src, k, a) simde_mm512_mask_mov_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi32(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi32(src_.m256i[0], HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi32(src_.m256i[1], HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : src_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi32 + #define _mm512_mask_mov_epi32(src, k, a) simde_mm512_mask_mov_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mov_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_epi64(src, k, a); + #else + simde__m512i_private + src_ = simde__m512i_to_private(src), + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_mask_mov_epi64(src_.m256i[0], k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_mask_mov_epi64(src_.m256i[1], k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : src_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_epi64 + #define _mm512_mask_mov_epi64(src, k, a) simde_mm512_mask_mov_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_mov_pd (simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_pd(src, k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_mask_mov_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_pd + #define _mm512_mask_mov_pd(src, k, a) simde_mm512_mask_mov_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_mov_ps (simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mov_ps(src, k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_mask_mov_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mov_ps + #define _mm512_mask_mov_ps(src, k, a) simde_mm512_mask_mov_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi8 (simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi8(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi8 + #define _mm_maskz_mov_epi8(k, a) simde_mm_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi16 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi16(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi16 + #define _mm_maskz_mov_epi16(k, a) simde_mm_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi32 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi32(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi32 + #define _mm_maskz_mov_epi32(k, a) simde_mm_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_mov_epi64 (simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_epi64(k, a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + r_; + + /* N.B. CM: No fallbacks as there are only two elements */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_epi64 + #define _mm_maskz_mov_epi64(k, a) simde_mm_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskz_mov_pd (simde__mmask8 k, simde__m128d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_pd(k, a); + #else + return simde_mm_castsi128_pd(simde_mm_maskz_mov_epi64(k, simde_mm_castpd_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_pd + #define _mm_maskz_mov_pd(k, a) simde_mm_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_mov_ps (simde__mmask8 k, simde__m128 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_mov_ps(k, a); + #else + return simde_mm_castsi128_ps(simde_mm_maskz_mov_epi32(k, simde_mm_castps_si128(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_mov_ps + #define _mm_maskz_mov_ps(k, a) simde_mm_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi8 (simde__mmask32 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi8(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi8 + #define _mm256_maskz_mov_epi8(k, a) simde_mm256_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi16 (simde__mmask16 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi16(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi16 + #define _mm256_maskz_mov_epi16(k, a) simde_mm256_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi32 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi32(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi32(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi32(k >> 4, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi32 + #define _mm256_maskz_mov_epi32(k, a) simde_mm256_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_mov_epi64 (simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_epi64(k, a); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + r_; + + /* N.B. CM: This fallback may not be faster as there are only four elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_maskz_mov_epi64(k , a_.m128i[0]); + r_.m128i[1] = simde_mm_maskz_mov_epi64(k >> 2, a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_epi64 + #define _mm256_maskz_mov_epi64(k, a) simde_mm256_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskz_mov_pd (simde__mmask8 k, simde__m256d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_pd(k, a); + #else + return simde_mm256_castsi256_pd(simde_mm256_maskz_mov_epi64(k, simde_mm256_castpd_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_pd + #define _mm256_maskz_mov_pd(k, a) simde_mm256_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskz_mov_ps (simde__mmask8 k, simde__m256 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_mov_ps(k, a); + #else + return simde_mm256_castsi256_ps(simde_mm256_maskz_mov_epi32(k, simde_mm256_castps_si256(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_mov_ps + #define _mm256_maskz_mov_ps(k, a) simde_mm256_maskz_mov_ps(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi8(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSSE3_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi8(HEDLEY_STATIC_CAST(simde__mmask32, k >> 32), a_.m256i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = ((k >> i) & 1) ? a_.i8[i] : INT8_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi8 + #define _mm512_maskz_mov_epi8(k, a) simde_mm512_maskz_mov_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_mov_epi16(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi16(HEDLEY_STATIC_CAST(simde__mmask16, k >> 16), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((k >> i) & 1) ? a_.i16[i] : INT16_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi16 + #define _mm512_maskz_mov_epi16(k, a) simde_mm512_maskz_mov_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi32 (simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi32(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k ), a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi32(HEDLEY_STATIC_CAST(simde__mmask8, k >> 8), a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((k >> i) & 1) ? a_.i32[i] : INT32_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi32 + #define _mm512_maskz_mov_epi32(k, a) simde_mm512_maskz_mov_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mov_epi64 (simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_epi64(k, a); + #else + simde__m512i_private + a_ = simde__m512i_to_private(a), + r_; + + /* N.B. CM: Without AVX2 this fallback may not be faster as there are only eight elements */ + #if defined(SIMDE_X86_SSE2_NATIVE) + r_.m256i[0] = simde_mm256_maskz_mov_epi64(k , a_.m256i[0]); + r_.m256i[1] = simde_mm256_maskz_mov_epi64(k >> 4, a_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ((k >> i) & 1) ? a_.i64[i] : INT64_C(0); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_epi64 + #define _mm512_maskz_mov_epi64(k, a) simde_mm512_maskz_mov_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_mov_pd (simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_pd(k, a); + #else + return simde_mm512_castsi512_pd(simde_mm512_maskz_mov_epi64(k, simde_mm512_castpd_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_pd + #define _mm512_maskz_mov_pd(k, a) simde_mm512_maskz_mov_pd(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_mov_ps (simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mov_ps(k, a); + #else + return simde_mm512_castsi512_ps(simde_mm512_maskz_mov_epi32(k, simde_mm512_castps_si512(a))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mov_ps + #define _mm512_maskz_mov_ps(k, a) simde_mm512_maskz_mov_ps(k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_H) */ +/* :: End simde/x86/avx512/mov.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_abs_epi8(src, k, a); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi8 + #define _mm_mask_abs_epi8(src, k, a) simde_mm_mask_abs_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi8(simde__mmask16 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_abs_epi8(k, a); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi8 + #define _mm_maskz_abs_epi8(k, a) simde_mm_maskz_abs_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_abs_epi16(src, k, a); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi16 + #define _mm_mask_abs_epi16(src, k, a) simde_mm_mask_abs_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi16(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_abs_epi16(k, a); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi16 + #define _mm_maskz_abs_epi16(k, a) simde_mm_maskz_abs_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_abs_epi32(src, k, a); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi32 + #define _mm_mask_abs_epi32(src, k, a) simde_mm_mask_abs_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi32(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_abs_epi32(k, a); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi32 + #define _mm_maskz_abs_epi32(k, a) simde_mm_maskz_abs_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi64(simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_abs_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_srai_epi32(_mm_shuffle_epi32(a, 0xF5), 31); + return _mm_sub_epi64(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vabsq_s64(a_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64x2_t m = vshrq_n_s64(a_.neon_i64, 63); + r_.neon_i64 = vsubq_s64(veorq_s64(a_.neon_i64, m), m); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_abs(a_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_abs(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i64) z = { 0, }; + __typeof__(r_.i64) m = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < z); + r_.i64 = (-a_.i64 & m) | (a_.i64 & ~m); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_abs_epi64 + #define _mm_abs_epi64(a) simde_mm_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_abs_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_abs_epi64(src, k, a); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_abs_epi64 + #define _mm_mask_abs_epi64(src, k, a) simde_mm_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_abs_epi64(simde__mmask8 k, simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_abs_epi64(k, a); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_abs_epi64 + #define _mm_maskz_abs_epi64(k, a) simde_mm_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi64(simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_abs_epi64(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_abs_epi64(a_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi64 + #define _mm256_abs_epi64(a) simde_mm256_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_abs_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_abs_epi64(src, k, a); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_abs_epi64 + #define _mm256_mask_abs_epi64(src, k, a) simde_mm256_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_abs_epi64(simde__mmask8 k, simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_abs_epi64(k, a); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_abs_epi64 + #define _mm256_maskz_abs_epi64(k, a) simde_mm256_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi8 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_abs_epi8(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi8(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi8 + #define _mm512_abs_epi8(a) simde_mm512_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_abs_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi8 + #define _mm512_mask_abs_epi8(src, k, a) simde_mm512_mask_abs_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi8 (simde__mmask64 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_abs_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_abs_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi8 + #define _mm512_maskz_abs_epi8(k, a) simde_mm512_maskz_abs_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi16 (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_abs_epi16(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi16(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi16 + #define _mm512_abs_epi16(a) simde_mm512_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_abs_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi16 + #define _mm512_mask_abs_epi16(src, k, a) simde_mm512_mask_abs_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi16 (simde__mmask32 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_abs_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_abs_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi16 + #define _mm512_maskz_abs_epi16(k, a) simde_mm512_maskz_abs_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi32(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_abs_epi32(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi32(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT64_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi32 + #define _mm512_abs_epi32(a) simde_mm512_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_abs_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi32 + #define _mm512_mask_abs_epi32(src, k, a) simde_mm512_mask_abs_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi32(simde__mmask16 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_abs_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_abs_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi32 + #define _mm512_maskz_abs_epi32(k, a) simde_mm512_maskz_abs_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_abs_epi64(simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_abs_epi64(a); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_abs_epi64(a_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { + r_.i64[i] = (a_.i64[i] < INT64_C(0)) ? -a_.i64[i] : a_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_epi64 + #define _mm512_abs_epi64(a) simde_mm512_abs_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_abs_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_abs_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_epi64 + #define _mm512_mask_abs_epi64(src, k, a) simde_mm512_mask_abs_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_abs_epi64(simde__mmask8 k, simde__m512i a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_abs_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_abs_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_abs_epi64 + #define _mm512_maskz_abs_epi64(k, a) simde_mm512_maskz_abs_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_abs_ps(simde__m512 v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + return _mm512_abs_ps(v2); + #else + simde__m512_private + r_, + v2_ = simde__m512_to_private(v2); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].neon_f32 = vabsq_f32(v2_.m128_private[i].neon_f32); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128_private) / sizeof(r_.m128_private[0])) ; i++) { + r_.m128_private[i].altivec_f32 = vec_abs(v2_.m128_private[i].altivec_f32); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { + r_.f32[i] = (v2_.f32[i] < INT64_C(0)) ? -v2_.f32[i] : v2_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_ps + #define _mm512_abs_ps(v2) simde_mm512_abs_ps(v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_abs_ps(simde__m512 src, simde__mmask16 k, simde__m512 v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + return _mm512_mask_abs_ps(src, k, v2); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_abs_ps(v2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_ps + #define _mm512_mask_abs_ps(src, k, v2) simde_mm512_mask_abs_ps(src, k, v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_abs_pd(simde__m512d v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,3,0)) + return _mm512_abs_pd(v2); + #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ + return _mm512_abs_pd(_mm512_castpd_ps(v2)); + #else + simde__m512d_private + r_, + v2_ = simde__m512d_to_private(v2); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].neon_f64 = vabsq_f64(v2_.m128d_private[i].neon_f64); + } + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + for (size_t i = 0 ; i < (sizeof(r_.m128d_private) / sizeof(r_.m128d_private[0])) ; i++) { + r_.m128d_private[i].altivec_f64 = vec_abs(v2_.m128d_private[i].altivec_f64); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { + r_.f64[i] = (v2_.f64[i] < INT64_C(0)) ? -v2_.f64[i] : v2_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_abs_pd + #define _mm512_abs_pd(v2) simde_mm512_abs_pd(v2) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_abs_pd(simde__m512d src, simde__mmask8 k, simde__m512d v2) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,3,0)) + return _mm512_mask_abs_pd(src, k, v2); + #elif defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7,0,0)) + /* gcc bug: https://gcc.gnu.org/legacy-ml/gcc-patches/2018-01/msg01962.html */ + return _mm512_mask_abs_pd(src, k, _mm512_castpd_ps(v2)); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_abs_pd(v2)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_abs_pd + #define _mm512_mask_abs_pd(src, k, v2) simde_mm512_mask_abs_pd(src, k, v2) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ABS_H) */ +/* :: End simde/x86/avx512/abs.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/add.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_ADD_H) +#define SIMDE_X86_AVX512_ADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi8(simde__m128i src, simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi8(src, k, a, b); + #else + return simde_mm_mask_mov_epi8(src, k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi8 + #define _mm_mask_add_epi8(src, k, a, b) simde_mm_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi8(simde__mmask16 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi8(k, a, b); + #else + return simde_mm_maskz_mov_epi8(k, simde_mm_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi8 + #define _mm_maskz_add_epi8(k, a, b) simde_mm_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi16(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_mask_add_epi16(src, k, a, b); + #else + return simde_mm_mask_mov_epi16(src, k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi16 + #define _mm_mask_add_epi16(src, k, a, b) simde_mm_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi16(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_maskz_add_epi16(k, a, b); + #else + return simde_mm_maskz_mov_epi16(k, simde_mm_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi16 + #define _mm_maskz_add_epi16(k, a, b) simde_mm_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi32(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi32(src, k, a, b); + #else + return simde_mm_mask_mov_epi32(src, k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi32 + #define _mm_mask_add_epi32(src, k, a, b) simde_mm_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi32(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi32(k, a, b); + #else + return simde_mm_maskz_mov_epi32(k, simde_mm_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi32 + #define _mm_maskz_add_epi32(k, a, b) simde_mm_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_add_epi64(simde__m128i src, simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_mask_add_epi64(src, k, a, b); + #else + return simde_mm_mask_mov_epi64(src, k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_epi64 + #define _mm_mask_add_epi64(src, k, a, b) simde_mm_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskz_add_epi64(simde__mmask8 k, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_maskz_add_epi64(k, a, b); + #else + return simde_mm_maskz_mov_epi64(k, simde_mm_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_epi64 + #define _mm_maskz_add_epi64(k, a, b) simde_mm_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_add_ss(simde__m128 src, simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_mask_add_ss(src, k, a, b); + #elif 1 + simde__m128_private + src_ = simde__m128_to_private(src), + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : src_.f32[0]; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_mask_mov_ps(src, k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_add_ss + #define _mm_mask_add_ss(src, k, a, b) simde_mm_mask_add_ss(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskz_add_ss(simde__mmask8 k, simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + return _mm_maskz_add_ss(k, a, b); + #elif 1 + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + r_ = simde__m128_to_private(a); + + r_.f32[0] = (k & 1) ? (a_.f32[0] + b_.f32[0]) : 0.0f; + + return simde__m128_from_private(r_); + #else + return simde_mm_move_ss(a, simde_mm_maskz_mov_ps(k, simde_mm_add_ps(a, b))); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm_maskz_add_ss + #define _mm_maskz_add_ss(k, a, b) simde_mm_maskz_add_ss(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi16(simde__m256i src, simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_mask_add_epi16(src, k, a, b); + #else + return simde_mm256_mask_mov_epi16(src, k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi16 + #define _mm256_mask_add_epi16(src, k, a, b) simde_mm256_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi16(simde__mmask16 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_maskz_add_epi16(k, a, b); + #else + return simde_mm256_maskz_mov_epi16(k, simde_mm256_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi16 + #define _mm256_maskz_add_epi16(k, a, b) simde_mm256_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi32(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi32(src, k, a, b); + #else + return simde_mm256_mask_mov_epi32(src, k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi32 + #define _mm256_mask_add_epi32(src, k, a, b) simde_mm256_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi32(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi32(k, a, b); + #else + return simde_mm256_maskz_mov_epi32(k, simde_mm256_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi32 + #define _mm256_maskz_add_epi32(k, a, b) simde_mm256_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_add_epi64(simde__m256i src, simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_mask_add_epi64(src, k, a, b); + #else + return simde_mm256_mask_mov_epi64(src, k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_add_epi64 + #define _mm256_mask_add_epi64(src, k, a, b) simde_mm256_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskz_add_epi64(simde__mmask8 k, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_maskz_add_epi64(k, a, b); + #else + return simde_mm256_maskz_mov_epi64(k, simde_mm256_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_add_epi64 + #define _mm256_maskz_add_epi64(k, a, b) simde_mm256_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi8(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi8 + #define _mm512_add_epi8(a, b) simde_mm512_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi8 + #define _mm512_mask_add_epi8(src, k, a, b) simde_mm512_mask_add_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_add_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi8 + #define _mm512_maskz_add_epi8(k, a, b) simde_mm512_maskz_add_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_add_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi16(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi16 + #define _mm512_add_epi16(a, b) simde_mm512_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_add_epi16(src, k, a, b); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi16 + #define _mm512_mask_add_epi16(src, k, a, b) simde_mm512_mask_add_epi16(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_add_epi16(k, a, b); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_add_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi16 + #define _mm512_maskz_add_epi16(k, a, b) simde_mm512_maskz_add_epi16(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_ARM_SVE_NATIVE) + const size_t n = sizeof(a_.i32) / sizeof(a_.i32[0]); + size_t i = 0; + svbool_t pg = svwhilelt_b32(i, n); + do { + svint32_t + va = svld1_s32(pg, &(a_.i32[i])), + vb = svld1_s32(pg, &(b_.i32[i])); + svst1_s32(pg, &(r_.i32[i]), svadd_s32_x(pg, va, vb)); + i += svcntw(); + pg = svwhilelt_b32(i, n); + } while (svptest_any(svptrue_b32(), pg)); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi32(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi32 + #define _mm512_add_epi32(a, b) simde_mm512_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi32 + #define _mm512_mask_add_epi32(src, k, a, b) simde_mm512_mask_add_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_add_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi32 + #define _mm512_maskz_add_epi32(k, a, b) simde_mm512_maskz_add_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_add_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_add_epi64(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_epi64 + #define _mm512_add_epi64(a, b) simde_mm512_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_add_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_epi64 + #define _mm512_mask_add_epi64(src, k, a, b) simde_mm512_mask_add_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_add_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_add_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_epi64 + #define _mm512_maskz_add_epi64(k, a, b) simde_mm512_maskz_add_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_add_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_add_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_ps + #define _mm512_add_ps(a, b) simde_mm512_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_add_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_ps + #define _mm512_mask_add_ps(src, k, a, b) simde_mm512_mask_add_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_add_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_add_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_ps + #define _mm512_maskz_add_ps(k, a, b) simde_mm512_maskz_add_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_add_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_add_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_add_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_add_pd + #define _mm512_add_pd(a, b) simde_mm512_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_add_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_add_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_add_pd + #define _mm512_mask_add_pd(src, k, a, b) simde_mm512_mask_add_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_add_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_add_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_add_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_add_pd + #define _mm512_maskz_add_pd(k, a, b) simde_mm512_maskz_add_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ADD_H) */ +/* :: End simde/x86/avx512/add.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/cmp.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_CMP_H) +#define SIMDE_X86_AVX512_CMP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mov_mask.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_MOV_MASK_H) +#define SIMDE_X86_AVX512_MOV_MASK_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm_movepi8_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi8_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movemask_epi8(a)); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask16 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi8_mask + #define _mm_movepi8_mask(a) simde_mm_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi16_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm_movepi16_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* There is no 32-bit _mm_movemask_* function, so we use + * _mm_movemask_epi8 then extract the odd bits. */ + uint_fast16_t r = HEDLEY_STATIC_CAST(uint_fast16_t, simde_mm_movemask_epi8(a)); + r = ( (r >> 1)) & UINT32_C(0x5555); + r = (r | (r >> 1)) & UINT32_C(0x3333); + r = (r | (r >> 2)) & UINT32_C(0x0f0f); + r = (r | (r >> 4)) & UINT32_C(0x00ff); + return HEDLEY_STATIC_CAST(simde__mmask8, r); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi16_mask + #define _mm_movepi16_mask(a) simde_mm_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi32_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi32_mask(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_ps(simde_mm_castsi128_ps(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi32_mask + #define _mm_movepi32_mask(a) simde_mm_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm_movepi64_mask (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm_movepi64_mask(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movemask_pd(simde_mm_castsi128_pd(a))); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + simde__mmask8 r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm_movepi64_mask + #define _mm_movepi64_mask(a) simde_mm_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm256_movepi8_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi8_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm_movepi8_mask(a_.m128i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask32, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi8_mask + #define _mm256_movepi8_mask(a) simde_mm256_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm256_movepi16_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm256_movepi16_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi16_mask(a_.m128i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi16_mask + #define _mm256_movepi16_mask(a) simde_mm256_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi32_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi32_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm_movepi32_mask(a_.m128i[i])) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi32_mask + #define _mm256_movepi32_mask(a) simde_mm256_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm256_movepi64_mask (simde__m256i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm256_movepi64_mask(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask8, simde_mm_movepi64_mask(a_.m128i[i])) << (i * 2); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm256_movepi64_mask + #define _mm256_movepi64_mask(a) simde_mm256_movepi64_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask64 +simde_mm512_movepi8_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi8_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask64 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask64, simde_mm256_movepi8_mask(a_.m256i[i])) << (i * 32); + } + #else + r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + r |= (a_.i8[i] < 0) ? (UINT64_C(1) << i) : 0; + } + #endif + + return HEDLEY_STATIC_CAST(simde__mmask64, r); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi8_mask + #define _mm512_movepi8_mask(a) simde_mm512_movepi8_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_movepi16_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_movepi16_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask32 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask32, simde_mm256_movepi16_mask(a_.m256i[i])) << (i * 16); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i16) / sizeof(a_.i16[0])) ; i++) { + r |= (a_.i16[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi16_mask + #define _mm512_movepi16_mask(a) simde_mm512_movepi16_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_movepi32_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi32_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask16 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(simde__mmask16, simde_mm256_movepi32_mask(a_.m256i[i])) << (i * 8); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + r |= (a_.i32[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi32_mask + #define _mm512_movepi32_mask(a) simde_mm512_movepi32_mask(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_movepi64_mask (simde__m512i a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_movepi64_mask(a); + #else + simde__m512i_private a_ = simde__m512i_to_private(a); + simde__mmask8 r = 0; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(a_.m256i) / sizeof(a_.m256i[0])) ; i++) { + r |= simde_mm256_movepi64_mask(a_.m256i[i]) << (i * 4); + } + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + r |= (a_.i64[i] < 0) ? (UINT32_C(1) << i) : 0; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_movepi64_mask + #define _mm512_movepi64_mask(a) simde_mm512_movepi64_mask(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MOV_MASK_H) */ +/* :: End simde/x86/avx512/mov_mask.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setzero.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + * 2020 Christopher Moore + */ + +#if !defined(SIMDE_X86_AVX512_SETZERO_H) +#define SIMDE_X86_AVX512_SETZERO_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_setzero_si512(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_si512(); + #else + simde__m512i r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#define simde_mm512_setzero_epi32() simde_mm512_setzero_si512() +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_si512 + #define _mm512_setzero_si512() simde_mm512_setzero_si512() + #undef _mm512_setzero_epi32 + #define _mm512_setzero_epi32() simde_mm512_setzero_si512() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_setzero_ps(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_ps(); + #else + return simde_mm512_castsi512_ps(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_ps + #define _mm512_setzero_ps() simde_mm512_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_setzero_pd(void) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_setzero_pd(); + #else + return simde_mm512_castsi512_pd(simde_mm512_setzero_si512()); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_setzero_pd + #define _mm512_setzero_pd() simde_mm512_setzero_pd() +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETZERO_H) */ +/* :: End simde/x86/avx512/setzero.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/setone.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_AVX512_SETONE_H) +#define SIMDE_X86_AVX512_SETONE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_setone_si512(void) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } + + return simde__m512i_from_private(r_); +} +#define simde_x_mm512_setone_epi32() simde_x_mm512_setone_si512() + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_setone_ps(void) { + return simde_mm512_castsi512_ps(simde_x_mm512_setone_si512()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_setone_pd(void) { + return simde_mm512_castsi512_pd(simde_x_mm512_setone_si512()); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SETONE_H) */ +/* :: End simde/x86/avx512/setone.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask16 +simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512_to_private(simde_mm512_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512_to_private(simde_x_mm512_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_ps_mask(a, b, imm8) _mm512_cmp_ps_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m128) / sizeof(simde_mm512_cmp_ps_mask_r_.m128[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m128[i] = simde_mm_cmp_ps(simde_mm512_cmp_ps_mask_a_.m128[i], simde_mm512_cmp_ps_mask_b_.m128[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_ps_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512_private \ + simde_mm512_cmp_ps_mask_r_ = simde__m512_to_private(simde_mm512_setzero_ps()), \ + simde_mm512_cmp_ps_mask_a_ = simde__m512_to_private((a)), \ + simde_mm512_cmp_ps_mask_b_ = simde__m512_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm512_cmp_ps_mask_r_.m256) / sizeof(simde_mm512_cmp_ps_mask_r_.m256[0])) ; i++) { \ + simde_mm512_cmp_ps_mask_r_.m256[i] = simde_mm256_cmp_ps(simde_mm512_cmp_ps_mask_a_.m256[i], simde_mm512_cmp_ps_mask_b_.m256[i], (imm8)); \ + } \ + \ + simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(simde_mm512_cmp_ps_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_ps_mask + #define _mm512_cmp_ps_mask(a, b, imm8) simde_mm512_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_ps_mask(a, b, imm8) _mm256_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_ps_mask(a, b, imm8) simde_mm256_movepi32_mask(simde_mm256_castps_si256(simde_mm256_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps_mask + #define _mm256_cmp_ps_mask(a, b, imm8) simde_mm256_cmp_ps_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_ps_mask(a, b, imm8) _mm_cmp_ps_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_ps_mask(a, b, imm8) simde_mm_movepi32_mask(simde_mm_castps_si128(simde_mm_cmp_ps((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps_mask + #define _mm_cmp_ps_mask(a, b, imm8) simde_mm_cmp_ps_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask8 +simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m512d_to_private(simde_mm512_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m512d_to_private(simde_x_mm512_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_))); +} +#if defined(SIMDE_X86_AVX512F_NATIVE) + #define simde_mm512_cmp_pd_mask(a, b, imm8) _mm512_cmp_pd_mask((a), (b), (imm8)) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m128d) / sizeof(simde_mm512_cmp_pd_mask_r_.m128d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m128d[simde_mm512_cmp_pd_mask_i] = simde_mm_cmp_pd(simde_mm512_cmp_pd_mask_a_.m128d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m128d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(256) + #define simde_mm512_cmp_pd_mask(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m512d_private \ + simde_mm512_cmp_pd_mask_r_ = simde__m512d_to_private(simde_mm512_setzero_pd()), \ + simde_mm512_cmp_pd_mask_a_ = simde__m512d_to_private((a)), \ + simde_mm512_cmp_pd_mask_b_ = simde__m512d_to_private((b)); \ + \ + for (size_t simde_mm512_cmp_pd_mask_i = 0 ; simde_mm512_cmp_pd_mask_i < (sizeof(simde_mm512_cmp_pd_mask_r_.m256d) / sizeof(simde_mm512_cmp_pd_mask_r_.m256d[0])) ; simde_mm512_cmp_pd_mask_i++) { \ + simde_mm512_cmp_pd_mask_r_.m256d[simde_mm512_cmp_pd_mask_i] = simde_mm256_cmp_pd(simde_mm512_cmp_pd_mask_a_.m256d[simde_mm512_cmp_pd_mask_i], simde_mm512_cmp_pd_mask_b_.m256d[simde_mm512_cmp_pd_mask_i], (imm8)); \ + } \ + \ + simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(simde_mm512_cmp_pd_mask_r_))); \ + })) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_pd_mask + #define _mm512_cmp_pd_mask(a, b, imm8) simde_mm512_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm256_cmp_pd_mask(a, b, imm8) _mm256_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm256_cmp_pd_mask(a, b, imm8) simde_mm256_movepi64_mask(simde_mm256_castpd_si256(simde_mm256_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd_mask + #define _mm256_cmp_pd_mask(a, b, imm8) simde_mm256_cmp_pd_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_cmp_pd_mask(a, b, imm8) _mm_cmp_pd_mask((a), (b), (imm8)) +#else + #define simde_mm_cmp_pd_mask(a, b, imm8) simde_mm_movepi64_mask(simde_mm_castpd_si128(simde_mm_cmp_pd((a), (b), (imm8)))) +#endif +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd_mask + #define _mm_cmp_pd_mask(a, b, imm8) simde_mm_cmp_pd_mask((a), (b), (imm8)) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__mmask32 +simde_mm512_cmp_epu16_mask (simde__m512i a, simde__m512i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + switch (imm8) { + case SIMDE_MM_CMPINT_EQ: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 == b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_LE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_FALSE: + r_ = simde__m512i_to_private(simde_mm512_setzero_si512()); + break; + + + case SIMDE_MM_CMPINT_NE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), (a_.u16 != b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLT: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 < b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] < b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_NLE: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), ~(a_.u16 <= b_.u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = !(a_.u16[i] <= b_.u16[i]) ? ~UINT16_C(0) : UINT16_C(0); + } + #endif + break; + + case SIMDE_MM_CMPINT_TRUE: + r_ = simde__m512i_to_private(simde_x_mm512_setone_si512()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde_mm512_movepi16_mask(simde__m512i_from_private(r_)); +} +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_cmp_epu16_mask(a, b, imm8) _mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_cmp_epu16_mask + #define _mm512_cmp_epu16_mask(a, b, imm8) simde_mm512_cmp_epu16_mask((a), (b), (imm8)) +#endif + +#if defined(SIMDE_X86_AVX512BW_NATIVE) + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) _mm512_mask_cmp_epu16_mask(k1, a, b, imm8) +#else + #define simde_mm512_mask_cmp_epu16_mask(k1, a, b, imm8) (k1) & simde_mm512_cmp_epu16_mask(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cmp_epu16_mask +#define _mm512_mask_cmp_epu16_mask(a, b, imm8) simde_mm512_mask_cmp_epu16_mask((a), (b), (imm8)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_CMP_H) */ +/* :: End simde/x86/avx512/cmp.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/copysign.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_COPYSIGN_H) +#define SIMDE_X86_AVX512_COPYSIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/and.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_AND_H) +#define SIMDE_X86_AVX512_AND_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_and_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256d[0] = simde_mm256_and_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_and_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_pd + #define _mm512_and_pd(a, b) simde_mm512_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_and_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_and_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256[0] = simde_mm256_and_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_and_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_ps + #define _mm512_and_ps(a, b) simde_mm512_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_and_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_ps + #define _mm512_mask_and_ps(src, k, a, b) simde_mm512_mask_and_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_and_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_and_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_ps + #define _mm512_maskz_and_ps(k, a, b) simde_mm512_maskz_and_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_and_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_and_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_pd + #define _mm512_mask_and_pd(src, k, a, b) simde_mm512_mask_and_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_and_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_and_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_and_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_pd + #define _mm512_maskz_and_pd(k, a, b) simde_mm512_maskz_and_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi32 + #define _mm512_and_epi32(a, b) simde_mm512_and_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_and_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi32 + #define _mm512_mask_and_epi32(src, k, v2, v3) simde_mm512_mask_and_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_and_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi32 + #define _mm512_maskz_and_epi32(k, a, b) simde_mm512_maskz_and_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_epi64 + #define _mm512_and_epi64(a, b) simde_mm512_and_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_and_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_and_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_and_epi64 + #define _mm512_mask_and_epi64(src, k, a, b) simde_mm512_mask_and_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_and_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_and_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_and_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_and_epi64 + #define _mm512_maskz_and_epi64(k, a, b) simde_mm512_maskz_and_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_and_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_and_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_and_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_and_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_and_si512 + #define _mm512_and_si512(a, b) simde_mm512_and_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_AND_H) */ +/* :: End simde/x86/avx512/and.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/andnot.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_ANDNOT_H) +#define SIMDE_X86_AVX512_ANDNOT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_ps(a, b) _mm512_andnot_ps(a, b) +#else + #define simde_mm512_andnot_ps(a, b) simde_mm512_castsi512_ps(simde_mm512_andnot_si512(simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_ps + #define _mm512_andnot_ps(a, b) simde_mm512_andnot_ps(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_ps(src, k, a, b) _mm512_mask_andnot_ps((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_ps(src, k, a, b) simde_mm512_castsi512_ps(simde_mm512_mask_andnot_epi32(simde_mm512_castps_si512(src), k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_ps + #define _mm512_mask_andnot_ps(src, k, a, b) simde_mm512_mask_andnot_ps(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_ps(k, a, b) _mm512_maskz_andnot_ps((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_ps(k, a, b) simde_mm512_castsi512_ps(simde_mm512_maskz_andnot_epi32(k, simde_mm512_castps_si512(a), simde_mm512_castps_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_ps + #define _mm512_maskz_andnot_ps(k, a, b) simde_mm512_maskz_andnot_ps(k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_andnot_pd(a, b) _mm512_andnot_pd(a, b) +#else + #define simde_mm512_andnot_pd(a, b) simde_mm512_castsi512_pd(simde_mm512_andnot_si512(simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_pd + #define _mm512_andnot_pd(a, b) simde_mm512_andnot_pd(a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_mask_andnot_pd(src, k, a, b) _mm512_mask_andnot_pd((src), (k), (a), (b)) +#else + #define simde_mm512_mask_andnot_pd(src, k, a, b) simde_mm512_castsi512_pd(simde_mm512_mask_andnot_epi64(simde_mm512_castpd_si512(src), k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_pd + #define _mm512_mask_andnot_pd(src, k, a, b) simde_mm512_mask_andnot_pd(src, k, a, b) +#endif + +#if defined(SIMDE_X86_AVX512DQ_NATIVE) + #define simde_mm512_maskz_andnot_pd(k, a, b) _mm512_maskz_andnot_pd((k), (a), (b)) +#else + #define simde_mm512_maskz_andnot_pd(k, a, b) simde_mm512_castsi512_pd(simde_mm512_maskz_andnot_epi64(k, simde_mm512_castpd_si512(a), simde_mm512_castpd_si512(b))) +#endif +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_pd + #define _mm512_maskz_andnot_pd(k, a, b) simde_mm512_maskz_andnot_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_andnot_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_andnot_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_andnot_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_andnot_si256(a_.m256i[1], b_.m256i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#define simde_mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) +#define simde_mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_andnot_si512 + #define _mm512_andnot_si512(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi32 + #define _mm512_andnot_epi32(a, b) simde_mm512_andnot_si512(a, b) + #undef _mm512_andnot_epi64 + #define _mm512_andnot_epi64(a, b) simde_mm512_andnot_si512(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi32 + #define _mm512_mask_andnot_epi32(src, k, a, b) simde_mm512_mask_andnot_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_andnot_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi32 + #define _mm512_maskz_andnot_epi32(k, a, b) simde_mm512_maskz_andnot_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_andnot_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_andnot_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_andnot_epi64 + #define _mm512_mask_andnot_epi64(src, k, a, b) simde_mm512_mask_andnot_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_andnot_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_andnot_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_andnot_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_andnot_epi64 + #define _mm512_maskz_andnot_epi64(k, a, b) simde_mm512_maskz_andnot_epi64(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_ANDNOT_H) */ +/* :: End simde/x86/avx512/andnot.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/xor.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_XOR_H) +#define SIMDE_X86_AVX512_XOR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_xor_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + /* TODO: generate reduced case to give to Intel */ + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) && !defined(HEDLEY_INTEL_VERSION) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_xor_ps(a_.m256[i], b_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_ps + #define _mm512_xor_ps(a, b) simde_mm512_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_xor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_ps + #define _mm512_mask_xor_ps(src, k, a, b) simde_mm512_mask_xor_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_xor_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_xor_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_ps + #define _mm512_maskz_xor_ps(k, a, b) simde_mm512_maskz_xor_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_xor_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_xor_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_xor_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_xor_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_pd + #define _mm512_xor_pd(a, b) simde_mm512_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_xor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_xor_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_pd + #define _mm512_mask_xor_pd(src, k, a, b) simde_mm512_mask_xor_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_xor_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_xor_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_xor_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_pd + #define _mm512_maskz_xor_pd(k, a, b) simde_mm512_maskz_xor_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi32 + #define _mm512_xor_epi32(a, b) simde_mm512_xor_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_xor_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi32 + #define _mm512_mask_xor_epi32(src, k, v2, v3) simde_mm512_mask_xor_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_xor_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi32 + #define _mm512_maskz_xor_epi32(k, a, b) simde_mm512_maskz_xor_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_xor_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_epi64 + #define _mm512_xor_epi64(a, b) simde_mm512_xor_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_xor_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_xor_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_xor_epi64 + #define _mm512_mask_xor_epi64(src, k, a, b) simde_mm512_mask_xor_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_xor_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_xor_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_xor_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_xor_epi64 + #define _mm512_maskz_xor_epi64(k, a, b) simde_mm512_maskz_xor_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_xor_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_xor_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_xor_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_xor_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + r_.m128i[2] = simde_mm_xor_si128(a_.m128i[2], b_.m128i[2]); + r_.m128i[3] = simde_mm_xor_si128(a_.m128i[3], b_.m128i[3]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_xor_si512 + #define _mm512_xor_si512(a, b) simde_mm512_xor_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_XOR_H) */ +/* :: End simde/x86/avx512/xor.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_copysign_ps(simde__m512 dest, simde__m512 src) { + simde__m512_private + r_, + dest_ = simde__m512_to_private(dest), + src_ = simde__m512_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m512 sgnbit = simde_mm512_xor_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm512_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm512_xor_ps(simde_mm512_and_ps(sgnbit, src), simde_mm512_andnot_ps(sgnbit, dest)); + #endif + + return simde__m512_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_copysign_pd(simde__m512d dest, simde__m512d src) { + simde__m512d_private + r_, + dest_ = simde__m512d_to_private(dest), + src_ = simde__m512d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m512d sgnbit = simde_mm512_xor_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm512_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm512_xor_pd(simde_mm512_and_pd(sgnbit, src), simde_mm512_andnot_pd(sgnbit, dest)); + #endif + + return simde__m512d_from_private(r_); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_COPYSIGN_H) */ +/* :: End simde/x86/avx512/copysign.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/xorsign.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +/* This is a SIMDe extension which is not part of AVX-512. It exists + * because a lot of numerical methods in SIMDe have algoriths which do + * something like: + * + * float sgn = input < 0 ? -1 : 1; + * ... + * return res * sgn; + * + * Which can be replaced with a much more efficient call to xorsign: + * + * return simde_x_mm512_xorsign_ps(res, input); + * + * While this was originally intended for use in SIMDe, please feel + * free to use it in your code. + */ + +#if !defined(SIMDE_X86_AVX512_XORSIGN_H) +#define SIMDE_X86_AVX512_XORSIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/set1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SET1_H) +#define SIMDE_X86_AVX512_SET1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi8(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi8 + #define _mm512_set1_epi8(a) simde_mm512_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi8(simde__m512i src, simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi8(src, k, a); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi8 + #define _mm512_mask_set1_epi8(src, k, a) simde_mm512_mask_set1_epi8(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi8(simde__mmask64 k, int8_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi8(k, a); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_set1_epi8(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi8 + #define _mm512_maskz_set1_epi8(k, a) simde_mm512_maskz_set1_epi8(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi16(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi16 + #define _mm512_set1_epi16(a) simde_mm512_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi16(simde__m512i src, simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_set1_epi16(src, k, a); + #else + return simde_mm512_mask_mov_epi16(src, k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi16 + #define _mm512_mask_set1_epi16(src, k, a) simde_mm512_mask_set1_epi16(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi16(simde__mmask32 k, int16_t a) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_set1_epi16(k, a); + #else + return simde_mm512_maskz_mov_epi16(k, simde_mm512_set1_epi16(a)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi16 + #define _mm512_maskz_set1_epi16(k, a) simde_mm512_maskz_set1_epi16(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi32(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi32 + #define _mm512_set1_epi32(a) simde_mm512_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi32(simde__m512i src, simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi32(src, k, a); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi32 + #define _mm512_mask_set1_epi32(src, k, a) simde_mm512_mask_set1_epi32(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi32(simde__mmask16 k, int32_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi32(k, a); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_set1_epi32(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi32 + #define _mm512_maskz_set1_epi32(k, a) simde_mm512_maskz_set1_epi32(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_set1_epi64 (int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_epi64(a); + #else + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_epi64 + #define _mm512_set1_epi64(a) simde_mm512_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_set1_epi64(simde__m512i src, simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_set1_epi64(src, k, a); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_set1_epi64 + #define _mm512_mask_set1_epi64(src, k, a) simde_mm512_mask_set1_epi64(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_set1_epi64(simde__mmask8 k, int64_t a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_set1_epi64(k, a); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_set1_epi64(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_set1_epi64 + #define _mm512_maskz_set1_epi64(k, a) simde_mm512_maskz_set1_epi64(k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu8 (uint8_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu16 (uint16_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu32 (uint32_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_x_mm512_set1_epu64 (uint64_t a) { + simde__m512i_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a; + } + + return simde__m512i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_ps(a); + #else + simde__m512_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_ps + #define _mm512_set1_ps(a) simde_mm512_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_set1_pd(a); + #else + simde__m512d_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_set1_pd + #define _mm512_set1_pd(a) simde_mm512_set1_pd(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SET1_H) */ +/* :: End simde/x86/avx512/set1.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_xorsign_ps(simde__m512 dest, simde__m512 src) { + return simde_mm512_xor_ps(simde_mm512_and_ps(simde_mm512_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_xorsign_pd(simde__m512d dest, simde__m512d src) { + return simde_mm512_xor_pd(simde_mm512_and_pd(simde_mm512_set1_pd(-0.0), src), dest); +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_XORSIGN_H) */ +/* :: End simde/x86/avx512/xorsign.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/div.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_DIV_H) +#define SIMDE_X86_AVX512_DIV_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_div_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_div_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_ps + #define _mm512_div_ps(a, b) simde_mm512_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_div_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_div_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_ps + #define _mm512_mask_div_ps(src, k, a, b) simde_mm512_mask_div_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_div_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_div_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_div_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_div_ps + #define _mm512_maskz_div_ps(k, a, b) simde_mm512_maskz_div_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_div_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_div_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_pd + #define _mm512_div_pd(a, b) simde_mm512_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_div_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_div_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_pd + #define _mm512_mask_div_pd(src, k, a, b) simde_mm512_mask_div_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_div_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_div_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_div_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_div_pd + #define _mm512_maskz_div_pd(k, a, b) simde_mm512_maskz_div_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_DIV_H) */ +/* :: End simde/x86/avx512/div.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/fmadd.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_FMADD_H) +#define SIMDE_X86_AVX512_FMADD_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_fmadd_ps (simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmadd_ps(a, b, c); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b), + c_ = simde__m512_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_fmadd_ps(a_.m256[i], b_.m256[i], c_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = (a_.f32 * b_.f32) + c_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] * b_.f32[i]) + c_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmadd_ps + #define _mm512_fmadd_ps(a, b, c) simde_mm512_fmadd_ps(a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_fmadd_ps(simde__m512 a, simde__mmask16 k, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_fmadd_ps(a, k, b, c); + #else + return simde_mm512_mask_mov_ps(a, k, simde_mm512_fmadd_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_fmadd_ps + #define _mm512_mask_fmadd_ps(a, k, b, c) simde_mm512_mask_fmadd_ps(a, k, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_fmadd_ps(simde__mmask16 k, simde__m512 a, simde__m512 b, simde__m512 c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_fmadd_ps(k, a, b, c); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_fmadd_ps(a, b, c)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_fmadd_ps + #define _mm512_maskz_fmadd_ps(k, a, b, c) simde_mm512_maskz_fmadd_ps(k, a, b, c) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_fmadd_pd (simde__m512d a, simde__m512d b, simde__m512d c) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_fmadd_pd(a, b, c); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b), + c_ = simde__m512d_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_fmadd_pd(a_.m256d[i], b_.m256d[i], c_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = (a_.f64 * b_.f64) + c_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] * b_.f64[i]) + c_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_fmadd_pd + #define _mm512_fmadd_pd(a, b, c) simde_mm512_fmadd_pd(a, b, c) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_FMADD_H) */ +/* :: End simde/x86/avx512/fmadd.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/mul.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_MUL_H) +#define SIMDE_X86_AVX512_MUL_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mul_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_mul_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_ps + #define _mm512_mul_ps(a, b) simde_mm512_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_mul_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_mul_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_ps + #define _mm512_mask_mul_ps(src, k, a, b) simde_mm512_mask_mul_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_mul_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_mul_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_ps + #define _mm512_maskz_mul_ps(k, a, b) simde_mm512_maskz_mul_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mul_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_mul_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_pd + #define _mm512_mul_pd(a, b) simde_mm512_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_mul_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_mul_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_pd + #define _mm512_mask_mul_pd(src, k, a, b) simde_mm512_mask_mul_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_mul_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_mul_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_pd + #define _mm512_maskz_mul_pd(k, a, b) simde_mm512_maskz_mul_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mul_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + simde__m512i_private x; + __typeof__(r_.i64) ta, tb; + + /* Get even numbered 32-bit values */ + x.i32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.i32, b_.i32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + /* Cast to 64 bits */ + SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].i32); + SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].i32); + r_.i64 = ta * tb; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[i << 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i << 1]); + } + #endif + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_epi32 + #define _mm512_mul_epi32(a, b) simde_mm512_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mul_epi32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_epi32 + #define _mm512_mask_mul_epi32(src, k, a, b) simde_mm512_mask_mul_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mul_epi32(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_epi32 + #define _mm512_maskz_mul_epi32(k, a, b) simde_mm512_maskz_mul_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mul_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mul_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_SHUFFLE_VECTOR_) + simde__m512i_private x; + __typeof__(r_.u64) ta, tb; + + x.u32 = SIMDE_SHUFFLE_VECTOR_(32, 64, a_.u32, b_.u32, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + SIMDE_CONVERT_VECTOR_(ta, x.m256i_private[0].u32); + SIMDE_CONVERT_VECTOR_(tb, x.m256i_private[1].u32); + r_.u64 = ta * tb; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i << 1]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i << 1]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mul_epu32 + #define _mm512_mul_epu32(a, b) simde_mm512_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_mul_epu32(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_mul_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_mul_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_mul_epu32 + #define _mm512_mask_mul_epu32(src, k, a, b) simde_mm512_mask_mul_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_mul_epu32(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_mul_epu32(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_mul_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_mul_epu32 + #define _mm512_maskz_mul_epu32(k, a, b) simde_mm512_maskz_mul_epu32(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_MUL_H) */ +/* :: End simde/x86/avx512/mul.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/negate.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_NEGATE_H) +#define SIMDE_X86_AVX512_NEGATE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_negate_ps(simde__m512 a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return simde_mm512_xor_ps(a,_mm512_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_negate_pd(simde__m512d a) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return simde_mm512_xor_pd(a, _mm512_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_NEGATE_H) */ +/* :: End simde/x86/avx512/negate.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/or.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_OR_H) +#define SIMDE_X86_AVX512_OR_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_or_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256[0] = simde_mm256_or_ps(a_.m256[0], b_.m256[0]); + r_.m256[1] = simde_mm256_or_ps(a_.m256[1], b_.m256[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_ps + #define _mm512_or_ps(a, b) simde_mm512_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_or_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_ps + #define _mm512_mask_or_ps(src, k, a, b) simde_mm512_mask_or_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_or_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_or_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_ps + #define _mm512_maskz_or_ps(k, a, b) simde_mm512_maskz_or_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_or_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_or_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + r_.m256d[0] = simde_mm256_or_pd(a_.m256d[0], b_.m256d[0]); + r_.m256d[1] = simde_mm256_or_pd(a_.m256d[1], b_.m256d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_pd + #define _mm512_or_pd(a, b) simde_mm512_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_or_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_mask_or_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_pd + #define _mm512_mask_or_pd(src, k, a, b) simde_mm512_mask_or_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_or_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512DQ_NATIVE) + return _mm512_maskz_or_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_or_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_pd + #define _mm512_maskz_or_pd(k, a, b) simde_mm512_maskz_or_pd(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 | b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] | b_.i32[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi32 + #define _mm512_or_epi32(a, b) simde_mm512_or_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi32(simde__m512i src, simde__mmask16 k, simde__m512i v2, simde__m512i v3) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi32(src, k, v2, v3); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_or_epi32(v2, v3)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi32 + #define _mm512_mask_or_epi32(src, k, v2, v3) simde_mm512_mask_or_epi32(src, k, v2, v3) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_or_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi32 + #define _mm512_maskz_or_epi32(k, a, b) simde_mm512_maskz_or_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_or_si256(a_.m256i[i], b_.m256i[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_epi64 + #define _mm512_or_epi64(a, b) simde_mm512_or_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_or_epi64(simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_or_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_or_epi64 + #define _mm512_mask_or_epi64(src, k, a, b) simde_mm512_mask_or_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_or_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_or_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_or_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_or_epi64 + #define _mm512_maskz_or_epi64(k, a, b) simde_mm512_maskz_or_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_or_si512 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_or_si512(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_X86_AVX2_NATIVE) + r_.m256i[0] = simde_mm256_or_si256(a_.m256i[0], b_.m256i[0]); + r_.m256i[1] = simde_mm256_or_si256(a_.m256i[1], b_.m256i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_or_si512 + #define _mm512_or_si512(a, b) simde_mm512_or_si512(a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_OR_H) */ +/* :: End simde/x86/avx512/or.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sqrt.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + */ + +#if !defined(SIMDE_X86_AVX512_SQRT_H) +#define SIMDE_X86_AVX512_SQRT_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sqrt_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256[0] = simde_mm256_sqrt_ps(a_.m256[0]); + r_.m256[1] = simde_mm256_sqrt_ps(a_.m256[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_sqrt_ps(a) simde_mm512_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sqrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sqrt_ps + #define _mm512_mask_sqrt_ps(src, k, a) simde_mm512_mask_sqrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sqrt_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + r_.m256d[0] = simde_mm256_sqrt_pd(a_.m256d[0]); + r_.m256d[1] = simde_mm256_sqrt_pd(a_.m256d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) +# define _mm512_sqrt_pd(a) simde_mm512_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sqrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sqrt_pd + #define _mm512_mask_sqrt_pd(src, k, a) simde_mm512_mask_sqrt_pd(src, k, a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SQRT_H) */ +/* :: End simde/x86/avx512/sqrt.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx512/sub.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX512_SUB_H) +#define SIMDE_X86_AVX512_SUB_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sub_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi8(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi8 + #define _mm512_sub_epi8(a, b) simde_mm512_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi8 (simde__m512i src, simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_mask_sub_epi8(src, k, a, b); + #else + return simde_mm512_mask_mov_epi8(src, k, simde_mm512_sub_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi8 + #define _mm512_mask_sub_epi8(src, k, a, b) simde_mm512_mask_sub_epi8(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi8 (simde__mmask64 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_maskz_sub_epi8(k, a, b); + #else + return simde_mm512_maskz_mov_epi8(k, simde_mm512_sub_epi8(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi8 + #define _mm512_maskz_sub_epi8(k, a, b) simde_mm512_maskz_sub_epi8(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512BW_NATIVE) + return _mm512_sub_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi16(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi16 + #define _mm512_sub_epi16(a, b) simde_mm512_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi32(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi32 + #define _mm512_sub_epi32(a, b) simde_mm512_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi32 (simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_sub_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi32 + #define _mm512_mask_sub_epi32(src, k, a, b) simde_mm512_mask_sub_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi32(simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_epi32(k, a, b); + #else + return simde_mm512_maskz_mov_epi32(k, simde_mm512_sub_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi32 + #define _mm512_maskz_sub_epi32(k, a, b) simde_mm512_maskz_sub_epi32(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_sub_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_sub_epi64(a_.m256i[i], b_.m256i[i]); + } + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_epi64 + #define _mm512_sub_epi64(a, b) simde_mm512_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_sub_epi64 (simde__m512i src, simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_epi64(src, k, a, b); + #else + return simde_mm512_mask_mov_epi64(src, k, simde_mm512_sub_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_epi64 + #define _mm512_mask_sub_epi64(src, k, a, b) simde_mm512_mask_sub_epi64(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_maskz_sub_epi64(simde__mmask8 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_epi64(k, a, b); + #else + return simde_mm512_maskz_mov_epi64(k, simde_mm512_sub_epi64(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_epi64 + #define _mm512_maskz_sub_epi64(k, a, b) simde_mm512_maskz_sub_epi64(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sub_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_ps(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sub_ps(a_.m256[i], b_.m256[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_ps + #define _mm512_sub_ps(a, b) simde_mm512_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sub_ps (simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sub_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_ps + #define _mm512_mask_sub_ps(src, k, a, b) simde_mm512_mask_sub_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_maskz_sub_ps(simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_ps(k, a, b); + #else + return simde_mm512_maskz_mov_ps(k, simde_mm512_sub_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_ps + #define _mm512_maskz_sub_ps(k, a, b) simde_mm512_maskz_sub_ps(k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sub_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sub_pd(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sub_pd(a_.m256d[i], b_.m256d[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_sub_pd + #define _mm512_sub_pd(a, b) simde_mm512_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sub_pd (simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sub_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sub_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sub_pd + #define _mm512_mask_sub_pd(src, k, a, b) simde_mm512_mask_sub_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_maskz_sub_pd(simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_maskz_sub_pd(k, a, b); + #else + return simde_mm512_maskz_mov_pd(k, simde_mm512_sub_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_sub_pd + #define _mm512_maskz_sub_pd(k, a, b) simde_mm512_maskz_sub_pd(k, a, b) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX512_SUB_H) */ +/* :: End simde/x86/avx512/sub.h :: */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-complex.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020-2021 Evan Nemerson + */ + +/* Support for complex math. + * + * We try to avoid inculding (in C++ mode) since it pulls in + * a *lot* of code. Unfortunately this only works for GNU modes (i.e., + * -std=gnu++14 not -std=c++14) unless you pass -fext-numeric-literals, + * but there is no way (AFAICT) to detect that flag so we have to rely + * on __STRICT_ANSI__ to instead detect GNU mode. + * + * This header is separate from simde-math.h since there is a good + * chance it will pull in , and most of the time we don't need + * complex math (on x86 only SVML uses it). */ + +#if !defined(SIMDE_COMPLEX_H) +#define SIMDE_COMPLEX_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#if ( \ + HEDLEY_HAS_BUILTIN(__builtin_creal) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) \ + ) && (!defined(__cplusplus) && !defined(__STRICT_ANSI__)) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ + typedef __complex__ float simde_cfloat32; + typedef __complex__ double simde_cfloat64; + HEDLEY_DIAGNOSTIC_POP + #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) + #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) + + #if !defined(simde_math_creal) + #define simde_math_crealf(z) __builtin_crealf(z) + #endif + #if !defined(simde_math_crealf) + #define simde_math_creal(z) __builtin_creal(z) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimagf(z) __builtin_cimagf(z) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimag(z) __builtin_cimag(z) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) __builtin_cexp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) __builtin_cexpf(z) + #endif +#elif !defined(__cplusplus) + #include + + #if !defined(HEDLEY_MSVC_VERSION) + typedef float _Complex simde_cfloat32; + typedef double _Complex simde_cfloat64; + #else + typedef _Fcomplex simde_cfloat32; + typedef _Dcomplex simde_cfloat64; + #endif + + #if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64) { (x), (y) }) + #define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32) { (x), (y) }) + #elif defined(CMPLX) && defined(CMPLXF) + #define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) + #define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) + #else + #define SIMDE_MATH_CMPLX(x, y) (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) + #define SIMDE_MATH_CMPLXF(x, y) (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) + #endif + + #if !defined(simde_math_creal) + #define simde_math_creal(z) creal(z) + #endif + #if !defined(simde_math_crealf) + #define simde_math_crealf(z) crealf(z) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimag(z) cimag(z) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimagf(z) cimagf(z) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) cexp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) cexpf(z) + #endif +#else + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + #pragma warning(disable:4530) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + + typedef std::complex simde_cfloat32; + typedef std::complex simde_cfloat64; + #define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) + #define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) + + #if !defined(simde_math_creal) + #define simde_math_creal(z) ((z).real()) + #endif + #if !defined(simde_math_crealf) + #define simde_math_crealf(z) ((z).real()) + #endif + #if !defined(simde_math_cimag) + #define simde_math_cimag(z) ((z).imag()) + #endif + #if !defined(simde_math_cimagf) + #define simde_math_cimagf(z) ((z).imag()) + #endif + #if !defined(simde_math_cexp) + #define simde_math_cexp(z) std::exp(z) + #endif + #if !defined(simde_math_cexpf) + #define simde_math_cexpf(z) std::exp(z) + #endif +#endif + +#endif /* !defined(SIMDE_COMPLEX_H) */ +/* :: End simde/simde-complex.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_acos_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf4_u10(a); + #else + return Sleef_acosf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acos_ps + #define _mm_acos_ps(a) simde_mm_acos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_acos_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd2_u10(a); + #else + return Sleef_acosd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acos_pd + #define _mm_acos_pd(a) simde_mm_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_acos_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf8_u10(a); + #else + return Sleef_acosf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_acos_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acos_ps + #define _mm256_acos_ps(a) simde_mm256_acos_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_acos_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd4_u10(a); + #else + return Sleef_acosd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_acos_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acos_pd + #define _mm256_acos_pd(a) simde_mm256_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_acos_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosf16_u10(a); + #else + return Sleef_acosf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_acos_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acosf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acos_ps + #define _mm512_acos_ps(a) simde_mm512_acos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_acos_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_acosd8_u10(a); + #else + return Sleef_acosd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_acos_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acos(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acos_pd + #define _mm512_acos_pd(a) simde_mm512_acos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_acos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acos_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_acos_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acos_ps + #define _mm512_mask_acos_ps(src, k, a) simde_mm512_mask_acos_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_acos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acos_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_acos_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acos_pd + #define _mm512_mask_acos_pd(src, k, a) simde_mm512_mask_acos_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_acosh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_acoshf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acosh_ps + #define _mm_acosh_ps(a) simde_mm_acosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_acosh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_acoshd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_acosh_pd + #define _mm_acosh_pd(a) simde_mm_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_acosh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_acoshf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_acosh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acosh_ps + #define _mm256_acosh_ps(a) simde_mm256_acosh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_acosh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_acoshd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_acosh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_acosh_pd + #define _mm256_acosh_pd(a) simde_mm256_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_acosh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_acoshf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_acosh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_acoshf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acosh_ps + #define _mm512_acosh_ps(a) simde_mm512_acosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_acosh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_acosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_acoshd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_acosh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_acosh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_acosh_pd + #define _mm512_acosh_pd(a) simde_mm512_acosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_acosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acosh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_acosh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acosh_ps + #define _mm512_mask_acosh_ps(src, k, a) simde_mm512_mask_acosh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_acosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_acosh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_acosh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_acosh_pd + #define _mm512_mask_acosh_pd(src, k, a) simde_mm512_mask_acosh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_asin_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf4_u10(a); + #else + return Sleef_asinf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asin_ps + #define _mm_asin_ps(a) simde_mm_asin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_asin_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind2_u10(a); + #else + return Sleef_asind2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asin_pd + #define _mm_asin_pd(a) simde_mm_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_asin_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf8_u10(a); + #else + return Sleef_asinf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_asin_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asin_ps + #define _mm256_asin_ps(a) simde_mm256_asin_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_asin_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind4_u10(a); + #else + return Sleef_asind4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_asin_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asin_pd + #define _mm256_asin_pd(a) simde_mm256_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_asin_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asinf16_u10(a); + #else + return Sleef_asinf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_asin_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asin_ps + #define _mm512_asin_ps(a) simde_mm512_asin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_asin_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_asind8_u10(a); + #else + return Sleef_asind8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_asin_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asin(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asin_pd + #define _mm512_asin_pd(a) simde_mm512_asin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_asin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asin_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_asin_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asin_ps + #define _mm512_mask_asin_ps(src, k, a) simde_mm512_mask_asin_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_asin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asin_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_asin_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asin_pd + #define _mm512_mask_asin_pd(src, k, a) simde_mm512_mask_asin_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_asinh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_asinhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asinh_ps + #define _mm_asinh_ps(a) simde_mm_asinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_asinh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_asinhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_asinh_pd + #define _mm_asinh_pd(a) simde_mm_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_asinh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_asinhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_asinh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asinh_ps + #define _mm256_asinh_ps(a) simde_mm256_asinh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_asinh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_asinhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_asinh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_asinh_pd + #define _mm256_asinh_pd(a) simde_mm256_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_asinh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_asinhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_asinh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_asinhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asinh_ps + #define _mm512_asinh_ps(a) simde_mm512_asinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_asinh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_asinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_asinhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_asinh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_asinh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_asinh_pd + #define _mm512_asinh_pd(a) simde_mm512_asinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_asinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asinh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_asinh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asinh_ps + #define _mm512_mask_asinh_ps(src, k, a) simde_mm512_mask_asinh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_asinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_asinh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_asinh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_asinh_pd + #define _mm512_mask_asinh_pd(src, k, a) simde_mm512_mask_asinh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atan_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf4_u10(a); + #else + return Sleef_atanf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan_ps + #define _mm_atan_ps(a) simde_mm_atan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atan_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand2_u10(a); + #else + return Sleef_atand2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan_pd + #define _mm_atan_pd(a) simde_mm_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atan_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf8_u10(a); + #else + return Sleef_atanf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atan_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan_ps + #define _mm256_atan_ps(a) simde_mm256_atan_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atan_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand4_u10(a); + #else + return Sleef_atand4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atan_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan_pd + #define _mm256_atan_pd(a) simde_mm256_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atan_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atanf16_u10(a); + #else + return Sleef_atanf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atan_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan_ps + #define _mm512_atan_ps(a) simde_mm512_atan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atan_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atand8_u10(a); + #else + return Sleef_atand8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atan_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan_pd + #define _mm512_atan_pd(a) simde_mm512_atan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan_ps + #define _mm512_mask_atan_ps(src, k, a) simde_mm512_mask_atan_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan_pd + #define _mm512_mask_atan_pd(src, k, a) simde_mm512_mask_atan_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atan2_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f4_u10(a, b); + #else + return Sleef_atan2f4_u35(a, b); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan2_ps + #define _mm_atan2_ps(a, b) simde_mm_atan2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atan2_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d2_u10(a, b); + #else + return Sleef_atan2d2_u35(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atan2_pd + #define _mm_atan2_pd(a, b) simde_mm_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atan2_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f8_u10(a, b); + #else + return Sleef_atan2f8_u35(a, b); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atan2_ps(a_.m128[i], b_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan2_ps + #define _mm256_atan2_ps(a, b) simde_mm256_atan2_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atan2_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d4_u10(a, b); + #else + return Sleef_atan2d4_u35(a, b); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atan2_pd(a_.m128d[i], b_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atan2_pd + #define _mm256_atan2_pd(a, b) simde_mm256_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atan2_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan2_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2f16_u10(a, b); + #else + return Sleef_atan2f16_u35(a, b); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atan2_ps(a_.m256[i], b_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atan2f(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan2_ps + #define _mm512_atan2_ps(a, b) simde_mm512_atan2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atan2_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atan2_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_atan2d8_u10(a, b); + #else + return Sleef_atan2d8_u35(a, b); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atan2_pd(a_.m256d[i], b_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atan2(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atan2_pd + #define _mm512_atan2_pd(a, b) simde_mm512_atan2_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atan2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan2_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atan2_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan2_ps + #define _mm512_mask_atan2_ps(src, k, a, b) simde_mm512_mask_atan2_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atan2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atan2_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atan2_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atan2_pd + #define _mm512_mask_atan2_pd(src, k, a, b) simde_mm512_mask_atan2_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_atanh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_atanhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atanh_ps + #define _mm_atanh_ps(a) simde_mm_atanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_atanh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_atanhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_atanh_pd + #define _mm_atanh_pd(a) simde_mm_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_atanh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_atanhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_atanh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atanh_ps + #define _mm256_atanh_ps(a) simde_mm256_atanh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_atanh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_atanhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_atanh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_atanh_pd + #define _mm256_atanh_pd(a) simde_mm256_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_atanh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_atanhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_atanh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_atanhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atanh_ps + #define _mm512_atanh_ps(a) simde_mm512_atanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_atanh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_atanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_atanhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_atanh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_atanh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_atanh_pd + #define _mm512_atanh_pd(a) simde_mm512_atanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_atanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atanh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_atanh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atanh_ps + #define _mm512_mask_atanh_ps(src, k, a) simde_mm512_mask_atanh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_atanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_atanh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_atanh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_atanh_pd + #define _mm512_mask_atanh_pd(src, k, a) simde_mm512_mask_atanh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cbrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_cbrtf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cbrt_ps + #define _mm_cbrt_ps(a) simde_mm_cbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cbrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_cbrtd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cbrt_pd + #define _mm_cbrt_pd(a) simde_mm_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cbrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_cbrtf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cbrt_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cbrt_ps + #define _mm256_cbrt_ps(a) simde_mm256_cbrt_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cbrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_cbrtd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cbrt_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cbrt_pd + #define _mm256_cbrt_pd(a) simde_mm256_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cbrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cbrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_cbrtf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cbrt_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cbrtf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cbrt_ps + #define _mm512_cbrt_ps(a) simde_mm512_cbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cbrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cbrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_cbrtd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cbrt_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cbrt(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cbrt_pd + #define _mm512_cbrt_pd(a) simde_mm512_cbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cbrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cbrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cbrt_ps + #define _mm512_mask_cbrt_ps(src, k, a) simde_mm512_mask_cbrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cbrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cbrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cbrt_pd + #define _mm512_mask_cbrt_pd(src, k, a) simde_mm512_mask_cbrt_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cexp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cexp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); + r_.f32[ i ] = simde_math_crealf(val); + r_.f32[i + 1] = simde_math_cimagf(val); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cexp_ps + #define _mm_cexp_ps(a) simde_mm_cexp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cexp_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cexp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_cfloat32 val = simde_math_cexpf(SIMDE_MATH_CMPLXF(a_.f32[i], a_.f32[i+1])); + r_.f32[ i ] = simde_math_crealf(val); + r_.f32[i + 1] = simde_math_cimagf(val); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cexp_ps + #define _mm256_cexp_ps(a) simde_mm256_cexp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cos_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf4_u10(a); + #else + return Sleef_cosf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cos_ps + #define _mm_cos_ps(a) simde_mm_cos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cos_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd2_u10(a); + #else + return Sleef_cosd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cos_pd + #define _mm_cos_pd(a) simde_mm_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cos_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf8_u10(a); + #else + return Sleef_cosf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cos_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cos_ps + #define _mm256_cos_ps(a) simde_mm256_cos_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cos_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd4_u10(a); + #else + return Sleef_cosd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cos_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cos_pd + #define _mm256_cos_pd(a) simde_mm256_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cos_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cos_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf16_u10(a); + #else + return Sleef_cosf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cos_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cos_ps + #define _mm512_cos_ps(a) simde_mm512_cos_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cos_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cos_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd8_u10(a); + #else + return Sleef_cosd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cos_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cos_pd + #define _mm512_cos_pd(a) simde_mm512_cos_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cos_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cos_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cos_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cos_ps + #define _mm512_mask_cos_ps(src, k, a) simde_mm512_mask_cos_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cos_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cos_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cos_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cos_pd + #define _mm512_mask_cos_pd(src, k, a) simde_mm512_mask_cos_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deg2rad_ps(simde__m128 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_mm_mul_ps(a, simde_mm_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_n_f32(a_.neon_i32, SIMDE_MATH_PI_OVER_180F); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deg2rad_pd(simde__m128d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(128) + return simde_mm_mul_pd(a, simde_mm_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_n_f64(a_.neon_i64, SIMDE_MATH_PI_OVER_180); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deg2rad_ps(simde__m256 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) + return simde_mm256_mul_ps(a, simde_mm256_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_x_mm_deg2rad_ps(a_.m128[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F + }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deg2rad_pd(simde__m256d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(256) + return simde_mm256_mul_pd(a, simde_mm256_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_x_mm_deg2rad_pd(a_.m128d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_x_mm512_deg2rad_ps(simde__m512 a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) + return simde_mm512_mul_ps(a, simde_mm512_set1_ps(SIMDE_MATH_PI_OVER_180F)); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_x_mm256_deg2rad_ps(a_.m256[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f32 = a_.f32 * SIMDE_MATH_PI_OVER_180F; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f32) tmp = { + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, + SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F, SIMDE_MATH_PI_OVER_180F + }; + r_.f32 = a_.f32 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_deg2radf(a_.f32[i]); + } + + #endif + return simde__m512_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_x_mm512_deg2rad_pd(simde__m512d a) { + #if SIMDE_NATURAL_VECTOR_SIZE_GE(512) + return simde_mm512_mul_pd(a, simde_mm512_set1_pd(SIMDE_MATH_PI_OVER_180)); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_x_mm256_deg2rad_pd(a_.m256d[i]); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_53784) + r_.f64 = a_.f64 * SIMDE_MATH_PI_OVER_180; + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + const __typeof__(r_.f64) tmp = { + SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, + SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180, SIMDE_MATH_PI_OVER_180 + }; + r_.f64 = a_.f64 * tmp; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_deg2rad(a_.f64[i]); + } + + #endif + return simde__m512d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cosd_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_cosf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosd_ps + #define _mm_cosd_ps(a) simde_mm_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cosd_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_cosd2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosd_pd + #define _mm_cosd_pd(a) simde_mm_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cosd_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_cosf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cosd_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosd_ps + #define _mm256_cosd_ps(a) simde_mm256_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cosd_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_cosd4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cosd_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosd_pd + #define _mm256_cosd_pd(a) simde_mm256_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cosd_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosd_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_cosf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cosd_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cosf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosd_ps + #define _mm512_cosd_ps(a) simde_mm512_cosd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cosd_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosd_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_cosd8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_cosd8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cosd_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cos(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosd_pd + #define _mm512_cosd_pd(a) simde_mm512_cosd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cosd_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosd_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosd_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosd_ps + #define _mm512_mask_cosd_ps(src, k, a) simde_mm512_mask_cosd_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cosd_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosd_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosd_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosd_pd + #define _mm512_mask_cosd_pd(src, k, a) simde_mm512_mask_cosd_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cosh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_coshf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosh_ps + #define _mm_cosh_ps(a) simde_mm_cosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cosh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_coshd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cosh_pd + #define _mm_cosh_pd(a) simde_mm_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cosh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_coshf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cosh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosh_ps + #define _mm256_cosh_ps(a) simde_mm256_cosh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cosh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_coshd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cosh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cosh_pd + #define _mm256_cosh_pd(a) simde_mm256_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cosh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_coshf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cosh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_coshf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosh_ps + #define _mm512_cosh_ps(a) simde_mm512_cosh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cosh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cosh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_coshd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cosh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cosh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cosh_pd + #define _mm512_cosh_pd(a) simde_mm512_cosh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cosh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cosh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosh_ps + #define _mm512_mask_cosh_ps(src, k, a) simde_mm512_mask_cosh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cosh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cosh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cosh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cosh_pd + #define _mm512_mask_cosh_pd(src, k, a) simde_mm512_mask_cosh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi8 + #define _mm_div_epi8(a, b) simde_mm_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi16 + #define _mm_div_epi16(a, b) simde_mm_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi32 + #define _mm_div_epi32(a, b) simde_mm_div_epi32(a, b) + #undef _mm_idiv_epi32 + #define _mm_idiv_epi32(a, b) simde_mm_div_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x4_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epi64 + #define _mm_div_epi64(a, b) simde_mm_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu8 + #define _mm_div_epu8(a, b) simde_mm_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu16 + #define _mm_div_epu16(a, b) simde_mm_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu32 + #define _mm_div_epu32(a, b) simde_mm_div_epu32(a, b) + #undef _mm_udiv_epi32 + #define _mm_udiv_epi32(a, b) simde_mm_div_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_div_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x16_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_div_epu64 + #define _mm_div_epu64(a, b) simde_mm_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi8 + #define _mm256_div_epi8(a, b) simde_mm256_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi16 + #define _mm256_div_epi16(a, b) simde_mm256_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi32 + #define _mm256_div_epi32(a, b) simde_mm256_div_epi32(a, b) + #undef _mm256_idiv_epi32 + #define _mm256_idiv_epi32(a, b) simde_mm256_div_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epi64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epi64 + #define _mm256_div_epi64(a, b) simde_mm256_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu8 + #define _mm256_div_epu8(a, b) simde_mm256_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu16 + #define _mm256_div_epu16(a, b) simde_mm256_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu32 + #define _mm256_div_epu32(a, b) simde_mm256_div_epu32(a, b) + #undef _mm256_udiv_epi32 + #define _mm256_udiv_epi32(a, b) simde_mm256_div_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_div_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_epu64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_div_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_epu64 + #define _mm256_div_epu64(a, b) simde_mm256_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 / b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] / b_.i8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi8 + #define _mm512_div_epi8(a, b) simde_mm512_div_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 / b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] / b_.i16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi16 + #define _mm512_div_epi16(a, b) simde_mm512_div_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 / b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] / b_.i32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi32 + #define _mm512_div_epi32(a, b) simde_mm512_div_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_div_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_epi32 + #define _mm512_mask_div_epi32(src, k, a, b) simde_mm512_mask_div_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 / b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epi64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] / b_.i64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epi64 + #define _mm512_div_epi64(a, b) simde_mm512_div_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = a_.u8 / b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] / b_.u8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu8 + #define _mm512_div_epu8(a, b) simde_mm512_div_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = a_.u16 / b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] / b_.u16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu16 + #define _mm512_div_epu16(a, b) simde_mm512_div_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 / b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] / b_.u32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu32 + #define _mm512_div_epu32(a, b) simde_mm512_div_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_div_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_div_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_div_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_div_epu32 + #define _mm512_mask_div_epu32(src, k, a, b) simde_mm512_mask_div_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_div_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_div_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 / b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_div_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] / b_.u64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_div_epu64 + #define _mm512_div_epu64(a, b) simde_mm512_div_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erf_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erff4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erf_ps + #define _mm_erf_ps(a) simde_mm_erf_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erf_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erf_pd + #define _mm_erf_pd(a) simde_mm_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erf_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erff8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erf_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erf_ps + #define _mm256_erf_ps(a) simde_mm256_erf_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erf_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erf_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erf_pd + #define _mm256_erf_pd(a) simde_mm256_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erf_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erf_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erff16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erf_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erff(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erf_ps + #define _mm512_erf_ps(a) simde_mm512_erf_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erf_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erf_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erf_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erf(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erf_pd + #define _mm512_erf_pd(a) simde_mm512_erf_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erf_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erf_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erf_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erf_ps + #define _mm512_mask_erf_ps(src, k, a) simde_mm512_mask_erf_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erf_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erf_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erf_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erf_pd + #define _mm512_mask_erf_pd(src, k, a) simde_mm512_mask_erf_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfc_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfcf4_u15(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfc_ps + #define _mm_erfc_ps(a) simde_mm_erfc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfc_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_erfcd2_u15(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfc_pd + #define _mm_erfc_pd(a) simde_mm_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfc_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfcf8_u15(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erfc_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfc_ps + #define _mm256_erfc_ps(a) simde_mm256_erfc_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfc_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_erfcd4_u15(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erfc_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfc_pd + #define _mm256_erfc_pd(a) simde_mm256_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfc_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfcf16_u15(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erfc_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfc_ps + #define _mm512_erfc_ps(a) simde_mm512_erfc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfc_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_erfcd8_u15(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erfc_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfc(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfc_pd + #define _mm512_erfc_pd(a) simde_mm512_erfc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfc_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfc_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfc_ps + #define _mm512_mask_erfc_ps(src, k, a) simde_mm512_mask_erfc_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfc_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfc_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfc_pd + #define _mm512_mask_erfc_pd(src, k, a) simde_mm512_mask_erfc_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp_ps + #define _mm_exp_ps(a) simde_mm_exp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp_pd + #define _mm_exp_pd(a) simde_mm_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp_ps + #define _mm256_exp_ps(a) simde_mm256_exp_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp_pd + #define _mm256_exp_pd(a) simde_mm256_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp_ps + #define _mm512_exp_ps(a) simde_mm512_exp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp_pd + #define _mm512_exp_pd(a) simde_mm512_exp_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp_ps + #define _mm512_mask_exp_ps(src, k, a) simde_mm512_mask_exp_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp_pd + #define _mm512_mask_exp_pd(src, k, a) simde_mm512_mask_exp_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_expm1_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expm1f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_expm1_ps + #define _mm_expm1_ps(a) simde_mm_expm1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_expm1_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_expm1d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_expm1_pd + #define _mm_expm1_pd(a) simde_mm_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_expm1_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expm1f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_expm1_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_expm1_ps + #define _mm256_expm1_ps(a) simde_mm256_expm1_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_expm1_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_expm1d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_expm1_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_expm1_pd + #define _mm256_expm1_pd(a) simde_mm256_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_expm1_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_expm1_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expm1f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_expm1_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_expm1f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_expm1_ps + #define _mm512_expm1_ps(a) simde_mm512_expm1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_expm1_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_expm1_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_expm1d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_expm1_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_expm1(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_expm1_pd + #define _mm512_expm1_pd(a) simde_mm512_expm1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_expm1_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_expm1_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_expm1_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_expm1_ps + #define _mm512_mask_expm1_ps(src, k, a) simde_mm512_mask_expm1_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_expm1_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_expm1_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_expm1_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_expm1_pd + #define _mm512_mask_expm1_pd(src, k, a) simde_mm512_mask_expm1_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp2_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp2f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp2_ps + #define _mm_exp2_ps(a) simde_mm_exp2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp2_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp2d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp2_pd + #define _mm_exp2_pd(a) simde_mm_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp2_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp2f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp2_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp2_ps + #define _mm256_exp2_ps(a) simde_mm256_exp2_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp2_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp2d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp2_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp2_pd + #define _mm256_exp2_pd(a) simde_mm256_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp2_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp2f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp2_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp2f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp2_ps + #define _mm512_exp2_ps(a) simde_mm512_exp2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp2_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp2d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp2_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp2(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp2_pd + #define _mm512_exp2_pd(a) simde_mm512_exp2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp2_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp2_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp2_ps + #define _mm512_mask_exp2_ps(src, k, a) simde_mm512_mask_exp2_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp2_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp2_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp2_pd + #define _mm512_mask_exp2_pd(src, k, a) simde_mm512_mask_exp2_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_exp10_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp10f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp10_ps + #define _mm_exp10_ps(a) simde_mm_exp10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_exp10_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_exp10d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_exp10_pd + #define _mm_exp10_pd(a) simde_mm_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_exp10_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp10f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_exp10_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp10_ps + #define _mm256_exp10_ps(a) simde_mm256_exp10_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_exp10_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_exp10d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_exp10_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_exp10_pd + #define _mm256_exp10_pd(a) simde_mm256_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_exp10_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp10f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_exp10_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_exp10f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp10_ps + #define _mm512_exp10_ps(a) simde_mm512_exp10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_exp10_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_exp10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_exp10d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_exp10_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_exp10(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_exp10_pd + #define _mm512_exp10_pd(a) simde_mm512_exp10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_exp10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp10_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_exp10_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp10_ps + #define _mm512_mask_exp10_ps(src, k, a) simde_mm512_mask_exp10_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_exp10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_exp10_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_exp10_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_exp10_pd + #define _mm512_mask_exp10_pd(src, k, a) simde_mm512_mask_exp10_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cdfnorm_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m128 a1 = simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m128 a2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m128 a3 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m128 a4 = simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m128 a5 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m128 p = simde_mm_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m128 x = simde_mm_div_ps(simde_x_mm_abs_ps(a), simde_mm_sqrt_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m128 t = simde_mm_div_ps(one, simde_mm_add_ps(one, simde_mm_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m128 y = simde_mm_mul_ps(a5, t); + y = simde_mm_add_ps(y, a4); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a3); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a2); + y = simde_mm_mul_ps(y, t); + y = simde_mm_add_ps(y, a1); + y = simde_mm_mul_ps(y, t); + y = simde_mm_mul_ps(y, simde_mm_exp_ps(simde_mm_mul_ps(x, simde_x_mm_negate_ps(x)))); + y = simde_mm_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm_add_ps(one, simde_x_mm_xorsign_ps(y, a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorm_ps + #define _mm_cdfnorm_ps(a) simde_mm_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cdfnorm_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m128d a1 = simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m128d a2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m128d a3 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m128d a4 = simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m128d a5 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m128d p = simde_mm_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m128d x = simde_mm_div_pd(simde_x_mm_abs_pd(a), simde_mm_sqrt_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m128d t = simde_mm_div_pd(one, simde_mm_add_pd(one, simde_mm_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m128d y = simde_mm_mul_pd(a5, t); + y = simde_mm_add_pd(y, a4); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a3); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a2); + y = simde_mm_mul_pd(y, t); + y = simde_mm_add_pd(y, a1); + y = simde_mm_mul_pd(y, t); + y = simde_mm_mul_pd(y, simde_mm_exp_pd(simde_mm_mul_pd(x, simde_x_mm_negate_pd(x)))); + y = simde_mm_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm_add_pd(one, simde_x_mm_xorsign_pd(y, a))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorm_pd + #define _mm_cdfnorm_pd(a) simde_mm_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cdfnorm_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m256 a1 = simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m256 a2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m256 a3 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m256 a4 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m256 a5 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m256 p = simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m256 x = simde_mm256_div_ps(simde_x_mm256_abs_ps(a), simde_mm256_sqrt_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m256 t = simde_mm256_div_ps(one, simde_mm256_add_ps(one, simde_mm256_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m256 y = simde_mm256_mul_ps(a5, t); + y = simde_mm256_add_ps(y, a4); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a3); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a2); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_add_ps(y, a1); + y = simde_mm256_mul_ps(y, t); + y = simde_mm256_mul_ps(y, simde_mm256_exp_ps(simde_mm256_mul_ps(x, simde_x_mm256_negate_ps(x)))); + y = simde_mm256_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm256_add_ps(one, simde_x_mm256_xorsign_ps(y, a))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cdfnorm_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorm_ps + #define _mm256_cdfnorm_ps(a) simde_mm256_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cdfnorm_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m256d a1 = simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m256d a2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m256d a3 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m256d a4 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m256d a5 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m256d p = simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m256d x = simde_mm256_div_pd(simde_x_mm256_abs_pd(a), simde_mm256_sqrt_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m256d t = simde_mm256_div_pd(one, simde_mm256_add_pd(one, simde_mm256_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m256d y = simde_mm256_mul_pd(a5, t); + y = simde_mm256_add_pd(y, a4); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a3); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a2); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_add_pd(y, a1); + y = simde_mm256_mul_pd(y, t); + y = simde_mm256_mul_pd(y, simde_mm256_exp_pd(simde_mm256_mul_pd(x, simde_x_mm256_negate_pd(x)))); + y = simde_mm256_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm256_add_pd(one, simde_x_mm256_xorsign_pd(y, a))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cdfnorm_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorm_pd + #define _mm256_cdfnorm_pd(a) simde_mm256_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cdfnorm_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorm_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m512 a1 = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.254829592)); + const simde__m512 a2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.284496736)); + const simde__m512 a3 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.421413741)); + const simde__m512 a4 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.453152027)); + const simde__m512 a5 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.061405429)); + const simde__m512 p = simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.3275911)); + const simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); + + /* simde_math_fabsf(x) / sqrtf(2.0) */ + const simde__m512 x = simde_mm512_div_ps(simde_mm512_abs_ps(a), simde_mm512_sqrt_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m512 t = simde_mm512_div_ps(one, simde_mm512_add_ps(one, simde_mm512_mul_ps(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m512 y = simde_mm512_mul_ps(a5, t); + y = simde_mm512_add_ps(y, a4); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a3); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a2); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_add_ps(y, a1); + y = simde_mm512_mul_ps(y, t); + y = simde_mm512_mul_ps(y, simde_mm512_exp_ps(simde_mm512_mul_ps(x, simde_x_mm512_negate_ps(x)))); + y = simde_mm512_sub_ps(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), simde_mm512_add_ps(one, simde_x_mm512_xorsign_ps(y, a))); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cdfnorm_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnormf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorm_ps + #define _mm512_cdfnorm_ps(a) simde_mm512_cdfnorm_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cdfnorm_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorm_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://www.johndcook.com/blog/cpp_phi/ */ + const simde__m512d a1 = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.254829592)); + const simde__m512d a2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.284496736)); + const simde__m512d a3 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.421413741)); + const simde__m512d a4 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.453152027)); + const simde__m512d a5 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.061405429)); + const simde__m512d p = simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.6475911)); + const simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); + + /* simde_math_fabs(x) / sqrt(2.0) */ + const simde__m512d x = simde_mm512_div_pd(simde_mm512_abs_pd(a), simde_mm512_sqrt_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)))); + + /* 1.0 / (1.0 + p * x) */ + const simde__m512d t = simde_mm512_div_pd(one, simde_mm512_add_pd(one, simde_mm512_mul_pd(p, x))); + + /* 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-x * x) */ + simde__m512d y = simde_mm512_mul_pd(a5, t); + y = simde_mm512_add_pd(y, a4); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a3); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a2); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_add_pd(y, a1); + y = simde_mm512_mul_pd(y, t); + y = simde_mm512_mul_pd(y, simde_mm512_exp_pd(simde_mm512_mul_pd(x, simde_x_mm512_negate_pd(x)))); + y = simde_mm512_sub_pd(one, y); + + /* 0.5 * (1.0 + ((a < 0.0) ? -y : y)) */ + return simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), simde_mm512_add_pd(one, simde_x_mm512_xorsign_pd(y, a))); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cdfnorm_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorm(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorm_pd + #define _mm512_cdfnorm_pd(a) simde_mm512_cdfnorm_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cdfnorm_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorm_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorm_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorm_ps + #define _mm512_mask_cdfnorm_ps(src, k, a) simde_mm512_mask_cdfnorm_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cdfnorm_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorm_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorm_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorm_pd + #define _mm512_mask_cdfnorm_pd(src, k, a) simde_mm512_mask_cdfnorm_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_idivrem_epi32 (simde__m128i* mem_addr, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m128i*, mem_addr), a, b); + #else + simde__m128i r; + + r = simde_mm_div_epi32(a, b); + *mem_addr = simde_mm_sub_epi32(a, simde_mm_mullo_epi32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_idivrem_epi32 + #define _mm_idivrem_epi32(mem_addr, a, b) simde_mm_idivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_idivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_idivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); + #else + simde__m256i r; + + r = simde_mm256_div_epi32(a, b); + *mem_addr = simde_mm256_sub_epi32(a, simde_mm256_mullo_epi32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_idivrem_epi32 + #define _mm256_idivrem_epi32(mem_addr, a, b) simde_mm256_idivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hypot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf4_u05(a, b); + #else + return Sleef_hypotf4_u35(a, b); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_hypot_ps + #define _mm_hypot_ps(a, b) simde_mm_hypot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hypot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd2_u05(a, b); + #else + return Sleef_hypotd2_u35(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_hypot_pd + #define _mm_hypot_pd(a, b) simde_mm_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hypot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf8_u05(a, b); + #else + return Sleef_hypotf8_u35(a, b); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_hypot_ps(a_.m128[i], b_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_hypot_ps + #define _mm256_hypot_ps(a, b) simde_mm256_hypot_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hypot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd4_u05(a, b); + #else + return Sleef_hypotd4_u35(a, b); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_hypot_pd(a_.m128d[i], b_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_hypot_pd + #define _mm256_hypot_pd(a, b) simde_mm256_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_hypot_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_hypot_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotf16_u05(a, b); + #else + return Sleef_hypotf16_u35(a, b); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_hypot_ps(a_.m256[i], b_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_hypotf(a_.f32[i], b_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_hypot_ps + #define _mm512_hypot_ps(a, b) simde_mm512_hypot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_hypot_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_hypot_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_hypotd8_u05(a, b); + #else + return Sleef_hypotd8_u35(a, b); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_hypot_pd(a_.m256d[i], b_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_hypot(a_.f64[i], b_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_hypot_pd + #define _mm512_hypot_pd(a, b) simde_mm512_hypot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_hypot_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_hypot_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_hypot_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_hypot_ps + #define _mm512_mask_hypot_ps(src, k, a, b) simde_mm512_mask_hypot_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_hypot_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_hypot_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_hypot_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_hypot_pd + #define _mm512_mask_hypot_pd(src, k, a, b) simde_mm512_mask_hypot_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_invcbrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invcbrt_ps(a); + #else + return simde_mm_rcp_ps(simde_mm_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invcbrt_ps + #define _mm_invcbrt_ps(a) simde_mm_invcbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_invcbrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invcbrt_pd(a); + #else + return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invcbrt_pd + #define _mm_invcbrt_pd(a) simde_mm_invcbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_invcbrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invcbrt_ps(a); + #else + return simde_mm256_rcp_ps(simde_mm256_cbrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invcbrt_ps + #define _mm256_invcbrt_ps(a) simde_mm256_invcbrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_invcbrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invcbrt_pd(a); + #else + return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_cbrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invcbrt_pd + #define _mm256_invcbrt_pd(a) simde_mm256_invcbrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_invsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invsqrt_ps(a); + #else + return simde_mm_rcp_ps(simde_mm_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invsqrt_ps + #define _mm_invsqrt_ps(a) simde_mm_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_invsqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_invsqrt_pd(a); + #else + return simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_invsqrt_pd + #define _mm_invsqrt_pd(a) simde_mm_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_invsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invsqrt_ps(a); + #else + return simde_mm256_rcp_ps(simde_mm256_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invsqrt_ps + #define _mm256_invsqrt_ps(a) simde_mm256_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_invsqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_invsqrt_pd(a); + #else + return simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm256_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_invsqrt_pd + #define _mm256_invsqrt_pd(a) simde_mm256_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_invsqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_invsqrt_ps(a); + #else + return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), simde_mm512_sqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_invsqrt_ps + #define _mm512_invsqrt_ps(a) simde_mm512_invsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_invsqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_invsqrt_pd(a); + #else + return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), simde_mm512_sqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_invsqrt_pd + #define _mm512_invsqrt_pd(a) simde_mm512_invsqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_invsqrt_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_invsqrt_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_invsqrt_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_invsqrt_ps + #define _mm512_mask_invsqrt_ps(src, k, a) simde_mm512_mask_invsqrt_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_invsqrt_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_invsqrt_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_invsqrt_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_invsqrt_pd + #define _mm512_mask_invsqrt_pd(src, k, a) simde_mm512_mask_invsqrt_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf4_u10(a); + #else + return Sleef_logf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log_ps + #define _mm_log_ps(a) simde_mm_log_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd2_u10(a); + #else + return Sleef_logd2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log_pd + #define _mm_log_pd(a) simde_mm_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf8_u10(a); + #else + return Sleef_logf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log_ps + #define _mm256_log_ps(a) simde_mm256_log_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd4_u10(a); + #else + return Sleef_logd4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log_pd + #define _mm256_log_pd(a) simde_mm256_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logf16_u10(a); + #else + return Sleef_logf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log_ps + #define _mm512_log_ps(a) simde_mm512_log_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_logd8_u10(a); + #else + return Sleef_logd8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log_pd + #define _mm512_log_pd(a) simde_mm512_log_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log_ps + #define _mm512_mask_log_ps(src, k, a) simde_mm512_mask_log_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log_pd + #define _mm512_mask_log_pd(src, k, a) simde_mm512_mask_log_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cdfnorminv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128 matched, retval = simde_mm_setzero_ps(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm_or_ps(simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))), simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)))); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(-SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* else if (a == 1) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm_blendv_ps which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m128 mask_lo = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.02425))); + /* else if (a > 0.97575) */ + simde__m128 mask_hi = simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.97575))); + + simde__m128 mask = simde_mm_or_ps(mask_lo, mask_hi); + matched = simde_mm_or_ps(matched, mask); + + /* else */ + simde__m128 mask_el = simde_x_mm_not_ps(matched); + mask = simde_mm_or_ps(mask, mask_el); + + /* r = a - 0.5f */ + simde__m128 r = simde_mm_sub_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m128 q = simde_mm_and_ps(mask_lo, a); + q = simde_mm_or_ps(q, simde_mm_and_ps(mask_hi, simde_mm_sub_ps(simde_mm_set1_ps(1.0f), a))); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm_log_ps(q); + q = simde_mm_mul_ps(q, simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_x_mm_select_ps(q, simde_mm_mul_ps(r, r), mask_el); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m128 numerator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm_fmadd_ps(numerator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); + { + simde__m128 multiplier; + multiplier = simde_mm_and_ps(mask_lo, simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0))); + multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_hi, simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.0)))); + multiplier = simde_mm_or_ps(multiplier, simde_mm_and_ps(mask_el, r)); + numerator = simde_mm_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m128 denominator = simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), + simde_x_mm_select_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm_fmadd_ps(denominator, q, simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + return retval; + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorminv_ps + #define _mm_cdfnorminv_ps(a) simde_mm_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cdfnorminv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128d matched, retval = simde_mm_setzero_pd(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm_or_pd(simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))), simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)))); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(-SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* else if (a == 1) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm_blendv_pd which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m128d mask_lo = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.02425))); + /* else if (a > 0.97575) */ + simde__m128d mask_hi = simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.97575))); + + simde__m128d mask = simde_mm_or_pd(mask_lo, mask_hi); + matched = simde_mm_or_pd(matched, mask); + + /* else */ + simde__m128d mask_el = simde_x_mm_not_pd(matched); + mask = simde_mm_or_pd(mask, mask_el); + + /* r = a - 0.5 */ + simde__m128d r = simde_mm_sub_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m128d q = simde_mm_and_pd(mask_lo, a); + q = simde_mm_or_pd(q, simde_mm_and_pd(mask_hi, simde_mm_sub_pd(simde_mm_set1_pd(1.0), a))); + + /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ + q = simde_mm_log_pd(q); + q = simde_mm_mul_pd(q, simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_x_mm_select_pd(q, simde_mm_mul_pd(r, r), mask_el); + + /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ + /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ + /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m128d numerator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm_fmadd_pd(numerator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); + { + simde__m128d multiplier; + multiplier = simde_mm_and_pd(mask_lo, simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0))); + multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_hi, simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.0)))); + multiplier = simde_mm_or_pd(multiplier, simde_mm_and_pd(mask_el, r)); + numerator = simde_mm_mul_pd(numerator, multiplier); + } + + /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m128d denominator = simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), + simde_x_mm_select_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm_fmadd_pd(denominator, q, simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + return retval; + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_cdfnorminv_pd + #define _mm_cdfnorminv_pd(a) simde_mm_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_cdfnorminv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256 matched, retval = simde_mm256_setzero_ps(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm256_or_ps(simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ)); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* else if (a == 1) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_ps which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m256 mask_lo = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__m256 mask_hi = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__m256 mask = simde_mm256_or_ps(mask_lo, mask_hi); + matched = simde_mm256_or_ps(matched, mask); + + /* else */ + simde__m256 mask_el = simde_x_mm256_not_ps(matched); + mask = simde_mm256_or_ps(mask, mask_el); + + /* r = a - 0.5f */ + simde__m256 r = simde_mm256_sub_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m256 q = simde_mm256_and_ps(mask_lo, a); + q = simde_mm256_or_ps(q, simde_mm256_and_ps(mask_hi, simde_mm256_sub_ps(simde_mm256_set1_ps(1.0f), a))); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm256_log_ps(q); + q = simde_mm256_mul_ps(q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm256_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_x_mm256_select_ps(q, simde_mm256_mul_ps(r, r), mask_el); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m256 numerator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm256_fmadd_ps(numerator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)), mask_el)); + { + simde__m256 multiplier; + multiplier = simde_mm256_and_ps(mask_lo, simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0))); + multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_hi, simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.0)))); + multiplier = simde_mm256_or_ps(multiplier, simde_mm256_and_ps(mask_el, r)); + numerator = simde_mm256_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m256 denominator = simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), simde_mm256_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.0)), q, mask_el), + simde_x_mm256_select_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.0)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm256_fmadd_ps(denominator, q, simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + return retval; + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_cdfnorminv_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_cdfnorminvf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorminv_ps + #define _mm256_cdfnorminv_ps(a) simde_mm256_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cdfnorminv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256d matched, retval = simde_mm256_setzero_pd(); + + { /* if (a < 0 || a > 1) */ + matched = simde_mm256_or_pd(simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ), simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ)); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(-SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* else if (a == 1) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* Remaining conditions. + * + * Including the else case in this complicates things a lot, but + * we're using cheap operations to get rid of expensive multiply + * and add functions. This should be a small improvement on SSE + * prior to 4.1. On SSE 4.1 we can use _mm256_blendv_pd which is + * very fast and this becomes a huge win. NEON, AltiVec, and + * WASM also have blend operations, so this should be a big win + * there, too. */ + + /* else if (a < 0.02425) */ + simde__m256d mask_lo = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__m256d mask_hi = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__m256d mask = simde_mm256_or_pd(mask_lo, mask_hi); + matched = simde_mm256_or_pd(matched, mask); + + /* else */ + simde__m256d mask_el = simde_x_mm256_not_pd(matched); + mask = simde_mm256_or_pd(mask, mask_el); + + /* r = a - 0.5 */ + simde__m256d r = simde_mm256_sub_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m256d q = simde_mm256_and_pd(mask_lo, a); + q = simde_mm256_or_pd(q, simde_mm256_and_pd(mask_hi, simde_mm256_sub_pd(simde_mm256_set1_pd(1.0), a))); + + /* q = simde_math_sqrt(-2.0 * simde_math_log(q)) */ + q = simde_mm256_log_pd(q); + q = simde_mm256_mul_pd(q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm256_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_x_mm256_select_pd(q, simde_mm256_mul_pd(r, r), mask_el); + + /* lo: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0); */ + /* hi: double numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0); */ + /* el: double numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m256d numerator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01)), mask_el); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)), mask_el)); + numerator = simde_mm256_fmadd_pd(numerator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)), mask_el)); + { + simde__m256d multiplier; + multiplier = simde_mm256_and_pd(mask_lo, simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0))); + multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_hi, simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.0)))); + multiplier = simde_mm256_or_pd(multiplier, simde_mm256_and_pd(mask_el, r)); + numerator = simde_mm256_mul_pd(numerator, multiplier); + } + + /* lo/hi: double denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: double denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m256d denominator = simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01)), mask_el); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), simde_mm256_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.0)), q, mask_el), + simde_x_mm256_select_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.0)), simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)), mask_el)); + denominator = simde_mm256_fmadd_pd(denominator, q, simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + return retval; + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_cdfnorminv_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_cdfnorminv(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_cdfnorminv_pd + #define _mm256_cdfnorminv_pd(a) simde_mm256_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_cdfnorminv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorminv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_cdfnorminv_ps(a_.m256[i]); + } + + return simde__m512_from_private(r_); + #else + + simde__m512 retval = simde_mm512_setzero_ps(); + simde__mmask16 matched; + + { /* if (a < 0 || a > 1) */ + matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + matched |= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), SIMDE_CMP_GT_OQ); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF)); + } + + { /* else if (a == 1) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_ps(retval, mask, simde_mm512_set1_ps(SIMDE_MATH_INFINITYF)); + } + + { /* else if (a < 0.02425) */ + simde__mmask16 mask_lo = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__mmask16 mask_hi = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__mmask16 mask = mask_lo | mask_hi; + matched = matched | mask; + + /* else */ + simde__mmask16 mask_el = ~matched; + + /* r = a - 0.5f */ + simde__m512 r = simde_mm512_sub_ps(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m512 q = simde_mm512_maskz_mov_ps(mask_lo, a); + q = simde_mm512_mask_sub_ps(q, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm512_log_ps(q); + q = simde_mm512_mul_ps(q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.0))); + q = simde_mm512_sqrt_ps(q); + + /* el: q = r * r */ + q = simde_mm512_mask_mul_ps(q, mask_el, r, r); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m512 numerator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.969683028665376e+01))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.209460984245205e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.759285104469687e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.383577518672690e+02)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-3.066479806614716e+01)))); + numerator = simde_mm512_fmadd_ps(numerator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.506628277459239e+00)))); + { + simde__m512 multiplier; + multiplier = simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)); + multiplier = simde_mm512_mask_mov_ps(multiplier, mask_hi, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.0))); + multiplier = simde_mm512_mask_mov_ps(multiplier, mask_el, r); + numerator = simde_mm512_mul_ps(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m512 denominator = simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-5.447609879822406e+01))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.615858368580409e+02)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.556989798598866e+02)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C( 6.680131188771972e+01)))); + denominator = simde_mm512_fmadd_ps(denominator, simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.0)), mask_el, q), + simde_mm512_mask_mov_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.0)), mask_el, simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.328068155288572e+01)))); + denominator = simde_mm512_fmadd_ps(denominator, q, simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0))); + + /* res = numerator / denominator; */ + retval = simde_mm512_mask_div_ps(retval, mask_lo | mask_hi | mask_el, numerator, denominator); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorminv_ps + #define _mm512_cdfnorminv_ps(a) simde_mm512_cdfnorminv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_cdfnorminv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_cdfnorminv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_cdfnorminv_pd(a_.m256d[i]); + } + + return simde__m512d_from_private(r_); + #else + + simde__m512d retval = simde_mm512_setzero_pd(); + simde__mmask8 matched; + + { /* if (a < 0 || a > 1) */ + matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + matched |= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), SIMDE_CMP_GT_OQ); + + /* We don't actually need to do anything here since we initialize + * retval to 0.0. */ + } + + { /* else if (a == 0) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY)); + } + + { /* else if (a == 1) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + matched |= mask; + + retval = simde_mm512_mask_mov_pd(retval, mask, simde_mm512_set1_pd(SIMDE_MATH_INFINITY)); + } + + { /* else if (a < 0.02425) */ + simde__mmask8 mask_lo = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.02425)), SIMDE_CMP_LT_OQ); + /* else if (a > 0.97575) */ + simde__mmask8 mask_hi = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.97575)), SIMDE_CMP_GT_OQ); + + simde__mmask8 mask = mask_lo | mask_hi; + matched = matched | mask; + + /* else */ + simde__mmask8 mask_el = ~matched; + + /* r = a - 0.5f */ + simde__m512d r = simde_mm512_sub_pd(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5))); + + /* lo: q = a + * hi: q = (1.0 - a) */ + simde__m512d q = a; + q = simde_mm512_mask_sub_pd(q, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); + + /* q = simde_math_sqrtf(-2.0f * simde_math_logf(q)) */ + q = simde_mm512_log_pd(q); + q = simde_mm512_mul_pd(q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.0))); + q = simde_mm512_sqrt_pd(q); + + /* el: q = r * r */ + q = simde_mm512_mask_mul_pd(q, mask_el, r, r); + + /* lo: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * 1.0f); */ + /* hi: float numerator = ((((((c_c[0] * q + c_c[1]) * q + c_c[2]) * q + c_c[3]) * q + c_c[4]) * q + c_c[5]) * -1.0f); */ + /* el: float numerator = ((((((c_a[0] * q + c_a[1]) * q + c_a[2]) * q + c_a[3]) * q + c_a[4]) * q + c_a[5]) * r); */ + simde__m512d numerator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-7.784894002430293e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.969683028665376e+01))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.223964580411365e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.209460984245205e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.400758277161838e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.759285104469687e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(-2.549732539343734e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.383577518672690e+02)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 4.374664141464968e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-3.066479806614716e+01)))); + numerator = simde_mm512_fmadd_pd(numerator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.938163982698783e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.506628277459239e+00)))); + { + simde__m512d multiplier; + multiplier = simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)); + multiplier = simde_mm512_mask_mov_pd(multiplier, mask_hi, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.0))); + multiplier = simde_mm512_mask_mov_pd(multiplier, mask_el, r); + numerator = simde_mm512_mul_pd(numerator, multiplier); + } + + /* lo/hi: float denominator = (((((c_d[0] * q + c_d[1]) * q + c_d[2]) * q + c_d[3]) * 1 + 0.0f) * q + 1); */ + /* el: float denominator = (((((c_b[0] * q + c_b[1]) * q + c_b[2]) * q + c_b[3]) * q + c_b[4]) * q + 1); */ + simde__m512d denominator = simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 7.784695709041462e-03)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-5.447609879822406e+01))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.224671290700398e-01)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.615858368580409e+02)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 2.445134137142996e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.556989798598866e+02)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 3.754408661907416e+00)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C( 6.680131188771972e+01)))); + denominator = simde_mm512_fmadd_pd(denominator, simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.0)), mask_el, q), + simde_mm512_mask_mov_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.0)), mask_el, simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.328068155288572e+01)))); + denominator = simde_mm512_fmadd_pd(denominator, q, simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0))); + + /* res = numerator / denominator; */ + retval = simde_mm512_mask_div_pd(retval, mask_lo | mask_hi | mask_el, numerator, denominator); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_cdfnorminv_pd + #define _mm512_cdfnorminv_pd(a) simde_mm512_cdfnorminv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_cdfnorminv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorminv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_cdfnorminv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorminv_ps + #define _mm512_mask_cdfnorminv_ps(src, k, a) simde_mm512_mask_cdfnorminv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_cdfnorminv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_cdfnorminv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_cdfnorminv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_cdfnorminv_pd + #define _mm512_mask_cdfnorminv_pd(src, k, a) simde_mm512_mask_cdfnorminv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfinv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c */ + simde__m128 one = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)); + + simde__m128 lnx = simde_mm_log_ps(simde_mm_mul_ps(simde_mm_sub_ps(one, a), simde_mm_add_ps(one, a))); + + simde__m128 tt1 = simde_mm_mul_ps(simde_mm_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm_add_ps(tt1, simde_mm_mul_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m128 tt2 = simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm_mul_ps(tt2, lnx); + + simde__m128 r = simde_mm_mul_ps(tt1, tt1); + r = simde_mm_sub_ps(r, tt2); + r = simde_mm_sqrt_ps(r); + r = simde_mm_add_ps(simde_x_mm_negate_ps(tt1), r); + r = simde_mm_sqrt_ps(r); + + return simde_x_mm_xorsign_ps(r, a); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfinv_ps + #define _mm_erfinv_ps(a) simde_mm_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfinv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m128d one = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)); + + simde__m128d lnx = simde_mm_log_pd(simde_mm_mul_pd(simde_mm_sub_pd(one, a), simde_mm_add_pd(one, a))); + + simde__m128d tt1 = simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_MATH_PI), simde_mm_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm_add_pd(tt1, simde_mm_mul_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m128d tt2 = simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm_mul_pd(tt2, lnx); + + simde__m128d r = simde_mm_mul_pd(tt1, tt1); + r = simde_mm_sub_pd(r, tt2); + r = simde_mm_sqrt_pd(r); + r = simde_mm_add_pd(simde_x_mm_negate_pd(tt1), r); + r = simde_mm_sqrt_pd(r); + + return simde_x_mm_xorsign_pd(r, a); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfinv_pd + #define _mm_erfinv_pd(a) simde_mm_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfinv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m256 one = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)); + simde__m256 sgn = simde_x_mm256_copysign_ps(one, a); + + a = simde_mm256_mul_ps(simde_mm256_sub_ps(one, a), simde_mm256_add_ps(one, a)); + simde__m256 lnx = simde_mm256_log_ps(a); + + simde__m256 tt1 = simde_mm256_mul_ps(simde_mm256_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm256_add_ps(tt1, simde_mm256_mul_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m256 tt2 = simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm256_mul_ps(tt2, lnx); + + simde__m256 r = simde_mm256_mul_ps(tt1, tt1); + r = simde_mm256_sub_ps(r, tt2); + r = simde_mm256_sqrt_ps(r); + r = simde_mm256_add_ps(simde_x_mm256_negate_ps(tt1), r); + r = simde_mm256_sqrt_ps(r); + + return simde_mm256_mul_ps(sgn, r); + #else + simde__m256_private + a_ = simde__m256_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfinv_ps + #define _mm256_erfinv_ps(a) simde_mm256_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfinv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m256d one = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)); + simde__m256d sgn = simde_x_mm256_copysign_pd(one, a); + + a = simde_mm256_mul_pd(simde_mm256_sub_pd(one, a), simde_mm256_add_pd(one, a)); + simde__m256d lnx = simde_mm256_log_pd(a); + + simde__m256d tt1 = simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_MATH_PI), simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm256_add_pd(tt1, simde_mm256_mul_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m256d tt2 = simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm256_mul_pd(tt2, lnx); + + simde__m256d r = simde_mm256_mul_pd(tt1, tt1); + r = simde_mm256_sub_pd(r, tt2); + r = simde_mm256_sqrt_pd(r); + r = simde_mm256_add_pd(simde_x_mm256_negate_pd(tt1), r); + r = simde_mm256_sqrt_pd(r); + + return simde_mm256_mul_pd(sgn, r); + #else + simde__m256d_private + a_ = simde__m256d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfinv_pd + #define _mm256_erfinv_pd(a) simde_mm256_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfinv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfinv_ps(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m512 one = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)); + simde__m512 sgn = simde_x_mm512_copysign_ps(one, a); + + a = simde_mm512_mul_ps(simde_mm512_sub_ps(one, a), simde_mm512_add_ps(one, a)); + simde__m512 lnx = simde_mm512_log_ps(a); + + simde__m512 tt1 = simde_mm512_mul_ps(simde_mm512_set1_ps(HEDLEY_STATIC_CAST(simde_float32, SIMDE_MATH_PI)), simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.147))); + tt1 = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), tt1); + tt1 = simde_mm512_add_ps(tt1, simde_mm512_mul_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.5)), lnx)); + + simde__m512 tt2 = simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0) / SIMDE_FLOAT32_C(0.147)); + tt2 = simde_mm512_mul_ps(tt2, lnx); + + simde__m512 r = simde_mm512_mul_ps(tt1, tt1); + r = simde_mm512_sub_ps(r, tt2); + r = simde_mm512_sqrt_ps(r); + r = simde_mm512_add_ps(simde_x_mm512_negate_ps(tt1), r); + r = simde_mm512_sqrt_ps(r); + + return simde_mm512_mul_ps(sgn, r); + #else + simde__m512_private + a_ = simde__m512_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfinvf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfinv_ps + #define _mm512_erfinv_ps(a) simde_mm512_erfinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfinv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfinv_pd(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + simde__m512d one = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)); + simde__m512d sgn = simde_x_mm512_copysign_pd(one, a); + + a = simde_mm512_mul_pd(simde_mm512_sub_pd(one, a), simde_mm512_add_pd(one, a)); + simde__m512d lnx = simde_mm512_log_pd(a); + + simde__m512d tt1 = simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_MATH_PI), simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.147))); + tt1 = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), tt1); + tt1 = simde_mm512_add_pd(tt1, simde_mm512_mul_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.5)), lnx)); + + simde__m512d tt2 = simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0) / SIMDE_FLOAT64_C(0.147)); + tt2 = simde_mm512_mul_pd(tt2, lnx); + + simde__m512d r = simde_mm512_mul_pd(tt1, tt1); + r = simde_mm512_sub_pd(r, tt2); + r = simde_mm512_sqrt_pd(r); + r = simde_mm512_add_pd(simde_x_mm512_negate_pd(tt1), r); + r = simde_mm512_sqrt_pd(r); + + return simde_mm512_mul_pd(sgn, r); + #else + simde__m512d_private + a_ = simde__m512d_to_private(a), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfinv(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfinv_pd + #define _mm512_erfinv_pd(a) simde_mm512_erfinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfinv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfinv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfinv_ps + #define _mm512_mask_erfinv_ps(src, k, a) simde_mm512_mask_erfinv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfinv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfinv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfinv_pd + #define _mm512_mask_erfinv_pd(src, k, a) simde_mm512_mask_erfinv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_erfcinv_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128 matched, retval = simde_mm_setzero_ps(); + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); + matched = simde_mm_and_ps(matched, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625)))); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(matched), simde_x_mm_setone_si128())) { + retval = simde_mm_erfinv_ps(simde_mm_sub_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0625))); + mask = simde_mm_and_ps(mask, simde_mm_cmpgt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0)))); + mask = simde_mm_andnot_ps(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); + t = simde_mm_sqrt_ps(t); + t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m128 p[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm_set1_ps(SIMDE_FLOAT32_C(-0.164441567910)) + }; + + const simde__m128 q[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m128 numerator = simde_mm_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm_fmadd_ps(denominator, t, q[0]); + + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__m128 mask = simde_mm_cmplt_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castps_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128 t = simde_x_mm_negate_ps(simde_mm_log_ps(a)); + t = simde_mm_sqrt_ps(t); + t = simde_mm_div_ps(simde_mm_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m128 p[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) + }; + + const simde__m128 q[] = { + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m128 numerator = simde_mm_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm_add_ps(numerator, simde_mm_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128 denominator = simde_mm_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm_fmadd_ps(denominator, t, q[0]); + + simde__m128 res = simde_mm_div_ps(numerator, denominator); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + + if (simde_mm_test_all_ones(simde_mm_castps_si128(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__m128 mask = simde_mm_cmpeq_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(0.0))); + mask = simde_mm_andnot_ps(matched, mask); + matched = simde_mm_or_ps(matched, mask); + + simde__m128 res = simde_mm_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm_or_ps(retval, simde_mm_and_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm_or_ps(retval, simde_mm_andnot_ps(matched, simde_mm_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfcinv_ps + #define _mm_erfcinv_ps(a) simde_mm_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_erfcinv_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + simde__m128d matched, retval = simde_mm_setzero_pd(); + + { /* if (a < 2.0 && a > 0.0625) */ + matched = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(2.0))); + matched = simde_mm_and_pd(matched, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625)))); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(matched), simde_x_mm_setone_si128())) { + retval = simde_mm_erfinv_pd(simde_mm_sub_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625 && a > 0.0) */ + simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0625))); + mask = simde_mm_and_pd(mask, simde_mm_cmpgt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0)))); + mask = simde_mm_andnot_pd(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); + t = simde_mm_sqrt_pd(t); + t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m128d p[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm_set1_pd(SIMDE_FLOAT64_C(-0.164441567910)) + }; + + const simde__m128d q[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m128d numerator = simde_mm_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm_fmadd_pd(denominator, t, q[0]); + + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + } + + { /* else if (a < 0.0) */ + simde__m128d mask = simde_mm_cmplt_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + + if (!simde_mm_test_all_zeros(simde_mm_castpd_si128(mask), simde_x_mm_setone_si128())) { + matched = simde_mm_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m128d t = simde_x_mm_negate_pd(simde_mm_log_pd(a)); + t = simde_mm_sqrt_pd(t); + t = simde_mm_div_pd(simde_mm_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m128d p[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) + }; + + const simde__m128d q[] = { + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m128d numerator = simde_mm_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm_add_pd(numerator, simde_mm_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m128d denominator = simde_mm_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm_fmadd_pd(denominator, t, q[0]); + + simde__m128d res = simde_mm_div_pd(numerator, denominator); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + + if (simde_mm_test_all_ones(simde_mm_castpd_si128(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0) */ + simde__m128d mask = simde_mm_cmpeq_pd(a, simde_mm_set1_pd(SIMDE_FLOAT64_C(0.0))); + mask = simde_mm_andnot_pd(matched, mask); + matched = simde_mm_or_pd(matched, mask); + + simde__m128d res = simde_mm_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm_or_pd(retval, simde_mm_and_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0) */ + retval = simde_mm_or_pd(retval, simde_mm_andnot_pd(matched, simde_mm_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfcinv(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_erfcinv_pd + #define _mm_erfcinv_pd(a) simde_mm_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_erfcinv_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256 matched, retval = simde_mm256_setzero_ps(); + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); + matched = simde_mm256_and_ps(matched, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ)); + + if (!simde_mm256_testz_ps(matched, matched)) { + retval = simde_mm256_erfinv_ps(simde_mm256_sub_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_and_ps(mask, simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ)); + mask = simde_mm256_andnot_ps(matched, mask); + + if (!simde_mm256_testz_ps(mask, mask)) { + matched = simde_mm256_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); + t = simde_mm256_sqrt_ps(t); + t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m256 p[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) + }; + + const simde__m256 q[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m256 numerator = simde_mm256_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); + + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + + if (!simde_mm256_testz_ps(mask, mask)) { + matched = simde_mm256_or_ps(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256 t = simde_x_mm256_negate_ps(simde_mm256_log_ps(a)); + t = simde_mm256_sqrt_ps(t); + t = simde_mm256_div_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m256 p[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C(-0.5374947401000)) + }; + + const simde__m256 q[] = { + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm256_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m256 numerator = simde_mm256_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm256_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm256_add_ps(numerator, simde_mm256_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256 denominator = simde_mm256_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm256_fmadd_ps(denominator, t, q[0]); + + simde__m256 res = simde_mm256_div_ps(numerator, denominator); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + + if (simde_x_mm256_test_all_ones(simde_mm256_castps_si256(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__m256 mask = simde_mm256_cmp_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_ps(matched, mask); + matched = simde_mm256_or_ps(matched, mask); + + simde__m256 res = simde_mm256_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm256_or_ps(retval, simde_mm256_and_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm256_or_ps(retval, simde_mm256_andnot_ps(matched, simde_mm256_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_erfcinv_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_erfcinvf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfcinv_ps + #define _mm256_erfcinv_ps(a) simde_mm256_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_erfcinv_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(256) + simde__m256d matched, retval = simde_mm256_setzero_pd(); + + { /* if (a < 2.0 && a > 0.0625) */ + matched = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); + matched = simde_mm256_and_pd(matched, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ)); + + if (!simde_mm256_testz_pd(matched, matched)) { + retval = simde_mm256_erfinv_pd(simde_mm256_sub_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { + return retval; + } + } + + { /* else if (a < 0.0625 && a > 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_and_pd(mask, simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ)); + mask = simde_mm256_andnot_pd(matched, mask); + + if (!simde_mm256_testz_pd(mask, mask)) { + matched = simde_mm256_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); + t = simde_mm256_sqrt_pd(t); + t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m256d p[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) + }; + + const simde__m256d q[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m256d numerator = simde_mm256_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); + + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + } + + { /* else if (a < 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + + if (!simde_mm256_testz_pd(mask, mask)) { + matched = simde_mm256_or_pd(matched, mask); + + /* t = 1/(sqrt(-log(a))) */ + simde__m256d t = simde_x_mm256_negate_pd(simde_mm256_log_pd(a)); + t = simde_mm256_sqrt_pd(t); + t = simde_mm256_div_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m256d p[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C(-0.5374947401000)) + }; + + const simde__m256d q[] = { + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm256_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m256d numerator = simde_mm256_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm256_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm256_add_pd(numerator, simde_mm256_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m256d denominator = simde_mm256_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm256_fmadd_pd(denominator, t, q[0]); + + simde__m256d res = simde_mm256_div_pd(numerator, denominator); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + + if (simde_x_mm256_test_all_ones(simde_mm256_castpd_si256(matched))) { + return retval; + } + } + } + + { /* else if (a == 0.0) */ + simde__m256d mask = simde_mm256_cmp_pd(a, simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = simde_mm256_andnot_pd(matched, mask); + matched = simde_mm256_or_pd(matched, mask); + + simde__m256d res = simde_mm256_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm256_or_pd(retval, simde_mm256_and_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0) */ + retval = simde_mm256_or_pd(retval, simde_mm256_andnot_pd(matched, simde_mm256_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_erfcinv_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_erfcinv(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_erfcinv_pd + #define _mm256_erfcinv_pd(a) simde_mm256_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_erfcinv_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfcinv_ps(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && (!defined(SIMDE_ARCH_ARM) || defined(SIMDE_ARCH_AARCH64)) + /* The results on Arm are *slightly* off, which causes problems for + * the edge cases; for example, if you pass 2.0 sqrt will be called + * with a value of -0.0 instead of 0.0, resulting in a NaN. */ + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_erfcinv_ps(a_.m256[i]); + } + return simde__m512_from_private(r_); + #else + simde__m512 retval = simde_mm512_setzero_ps(); + simde__mmask16 matched; + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(2.0)), SIMDE_CMP_LT_OQ); + matched &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_GT_OQ); + + if (matched != 0) { + retval = simde_mm512_erfinv_ps(simde_mm512_sub_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a)); + } + + if (matched == 1) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0625)), SIMDE_CMP_LT_OQ); + mask &= simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_GT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); + t = simde_mm512_sqrt_ps(t); + t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m512 p[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.1550470003116)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.382719649631)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.690969348887)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C(-1.128081391617)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.680544246825)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C(-0.16444156791)) + }; + + const simde__m512 q[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.155024849822)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.385228141995)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m512 numerator = simde_mm512_fmadd_ps(p[5], t, p[4]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[3]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[2]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); + + simde__m512 res = simde_mm512_div_ps(numerator, denominator); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_LT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512 t = simde_x_mm512_negate_ps(simde_mm512_log_ps(a)); + t = simde_mm512_sqrt_ps(t); + t = simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), t); + + const simde__m512 p[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980456202915)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36366788917100)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.97302949837000)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( -0.5374947401000)) + }; + + const simde__m512 q[] = { + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.00980451277802)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 0.36369997154400)), + simde_mm512_set1_ps(SIMDE_FLOAT32_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m512 numerator = simde_mm512_fmadd_ps(p[3], t, p[2]); + numerator = simde_mm512_fmadd_ps(numerator, t, p[1]); + numerator = simde_mm512_add_ps(numerator, simde_mm512_div_ps(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512 denominator = simde_mm512_fmadd_ps(q[2], t, q[1]); + denominator = simde_mm512_fmadd_ps(denominator, t, q[0]); + + simde__m512 res = simde_mm512_div_ps(numerator, denominator); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + + if (matched == 1) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__mmask16 mask = simde_mm512_cmp_ps_mask(a, simde_mm512_set1_ps(SIMDE_FLOAT32_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = ~matched & mask; + matched = matched | mask; + + simde__m512 res = simde_mm512_set1_ps(SIMDE_MATH_INFINITYF); + + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm512_or_ps(retval, simde_mm512_maskz_mov_ps(~matched, simde_mm512_set1_ps(-SIMDE_MATH_INFINITYF))); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfcinv_ps + #define _mm512_erfcinv_ps(a) simde_mm512_erfcinv_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_erfcinv_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_erfcinv_pd(a); + #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_erfcinv_pd(a_.m256d[i]); + } + return simde__m512d_from_private(r_); + #else + simde__m512d retval = simde_mm512_setzero_pd(); + simde__mmask8 matched; + + { /* if (a < 2.0f && a > 0.0625f) */ + matched = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(2.0)), SIMDE_CMP_LT_OQ); + matched &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_GT_OQ); + + if (matched != 0) { + retval = simde_mm512_erfinv_pd(simde_mm512_sub_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a)); + } + + if (matched == 1) { + return retval; + } + } + + { /* else if (a < 0.0625f && a > 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0625)), SIMDE_CMP_LT_OQ); + mask &= simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_GT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); + t = simde_mm512_sqrt_pd(t); + t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m512d p[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.1550470003116)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.382719649631)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.690969348887)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C(-1.128081391617)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.680544246825)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C(-0.16444156791)) + }; + + const simde__m512d q[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.155024849822)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.385228141995)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.000000000000)) + }; + + /* float numerator = p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) */ + simde__m512d numerator = simde_mm512_fmadd_pd(p[5], t, p[4]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[3]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[2]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); + + simde__m512d res = simde_mm512_div_pd(numerator, denominator); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + } + } + + { /* else if (a < 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_LT_OQ); + mask = ~matched & mask; + + if (mask != 0) { + matched = matched | mask; + + /* t = 1/(sqrt(-log(a))) */ + simde__m512d t = simde_x_mm512_negate_pd(simde_mm512_log_pd(a)); + t = simde_mm512_sqrt_pd(t); + t = simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), t); + + const simde__m512d p[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980456202915)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36366788917100)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.97302949837000)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( -0.5374947401000)) + }; + + const simde__m512d q[] = { + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.00980451277802)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 0.36369997154400)), + simde_mm512_set1_pd(SIMDE_FLOAT64_C( 1.00000000000000)) + }; + + /* float numerator = (p[0] / t + p[1] + t * (p[2] + t * p[3])) */ + simde__m512d numerator = simde_mm512_fmadd_pd(p[3], t, p[2]); + numerator = simde_mm512_fmadd_pd(numerator, t, p[1]); + numerator = simde_mm512_add_pd(numerator, simde_mm512_div_pd(p[0], t)); + + /* float denominator = (q[0] + t * (q[1] + t * (q[2]))) */ + simde__m512d denominator = simde_mm512_fmadd_pd(q[2], t, q[1]); + denominator = simde_mm512_fmadd_pd(denominator, t, q[0]); + + simde__m512d res = simde_mm512_div_pd(numerator, denominator); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + + if (matched == 1) { + return retval; + } + } + } + + { /* else if (a == 0.0f) */ + simde__mmask8 mask = simde_mm512_cmp_pd_mask(a, simde_mm512_set1_pd(SIMDE_FLOAT64_C(0.0)), SIMDE_CMP_EQ_OQ); + mask = ~matched & mask; + matched = matched | mask; + + simde__m512d res = simde_mm512_set1_pd(SIMDE_MATH_INFINITY); + + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(mask, res)); + } + + { /* else */ + /* (a >= 2.0f) */ + retval = simde_mm512_or_pd(retval, simde_mm512_maskz_mov_pd(~matched, simde_mm512_set1_pd(-SIMDE_MATH_INFINITY))); + } + + return retval; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_erfcinv_pd + #define _mm512_erfcinv_pd(a) simde_mm512_erfcinv_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_erfcinv_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfcinv_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_erfcinv_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfcinv_ps + #define _mm512_mask_erfcinv_ps(src, k, a) simde_mm512_mask_erfcinv_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_erfcinv_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_erfcinv_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_erfcinv_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_erfcinv_pd + #define _mm512_mask_erfcinv_pd(src, k, a) simde_mm512_mask_erfcinv_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_logb_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_logb_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_logb_ps + #define _mm_logb_ps(a) simde_mm_logb_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_logb_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_logb_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_logb_pd + #define _mm_logb_pd(a) simde_mm_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_logb_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_logb_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_logb_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_logb_ps + #define _mm256_logb_ps(a) simde_mm256_logb_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_logb_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_logb_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_logb_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_logb_pd + #define _mm256_logb_pd(a) simde_mm256_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_logb_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_logb_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_logb_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_logbf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_logb_ps + #define _mm512_logb_ps(a) simde_mm512_logb_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_logb_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_logb_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_logb_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_logb(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_logb_pd + #define _mm512_logb_pd(a) simde_mm512_logb_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_logb_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_logb_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_logb_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_logb_ps + #define _mm512_mask_logb_ps(src, k, a) simde_mm512_mask_logb_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_logb_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_logb_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_logb_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_logb_pd + #define _mm512_mask_logb_pd(src, k, a) simde_mm512_mask_logb_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log2_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f4_u35(a); + #else + return Sleef_log2f4_u10(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log2_ps + #define _mm_log2_ps(a) simde_mm_log2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log2_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d2_u35(a); + #else + return Sleef_log2d2_u10(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log2_pd + #define _mm_log2_pd(a) simde_mm_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log2_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f8_u35(a); + #else + return Sleef_log2f8_u10(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log2_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log2_ps + #define _mm256_log2_ps(a) simde_mm256_log2_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log2_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d4_u35(a); + #else + return Sleef_log2d4_u10(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log2_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log2_pd + #define _mm256_log2_pd(a) simde_mm256_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log2_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log2_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2f16_u35(a); + #else + return Sleef_log2f16_u10(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log2_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log2f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log2_ps + #define _mm512_log2_ps(a) simde_mm512_log2_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log2_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log2_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_MATH_SLEEF_VERSION_CHECK(3,4,0) && (SIMDE_ACCURACY_PREFERENCE <= 1) + return Sleef_log2d8_u35(a); + #else + return Sleef_log2d8_u10(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log2_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log2(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log2_pd + #define _mm512_log2_pd(a) simde_mm512_log2_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log2_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log2_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log2_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log2_ps + #define _mm512_mask_log2_ps(src, k, a) simde_mm512_mask_log2_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log2_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log2_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log2_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log2_pd + #define _mm512_mask_log2_pd(src, k, a) simde_mm512_mask_log2_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log1p_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log1pf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log1p_ps + #define _mm_log1p_ps(a) simde_mm_log1p_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log1p_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log1pd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log1p_pd + #define _mm_log1p_pd(a) simde_mm_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log1p_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log1pf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log1p_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log1p_ps + #define _mm256_log1p_ps(a) simde_mm256_log1p_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log1p_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log1pd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log1p_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log1p_pd + #define _mm256_log1p_pd(a) simde_mm256_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log1p_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log1p_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log1pf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log1p_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log1pf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log1p_ps + #define _mm512_log1p_ps(a) simde_mm512_log1p_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log1p_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log1p_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log1pd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log1p_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log1p(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log1p_pd + #define _mm512_log1p_pd(a) simde_mm512_log1p_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log1p_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log1p_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log1p_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log1p_ps + #define _mm512_mask_log1p_ps(src, k, a) simde_mm512_mask_log1p_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log1p_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log1p_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log1p_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log1p_pd + #define _mm512_mask_log1p_pd(src, k, a) simde_mm512_mask_log1p_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_log10_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log10f4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log10_ps + #define _mm_log10_ps(a) simde_mm_log10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_log10_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_log10d2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_log10_pd + #define _mm_log10_pd(a) simde_mm_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_log10_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log10f8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_log10_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log10_ps + #define _mm256_log10_ps(a) simde_mm256_log10_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_log10_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_log10d4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_log10_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_log10_pd + #define _mm256_log10_pd(a) simde_mm256_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_log10_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log10_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log10f16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_log10_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_log10f(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log10_ps + #define _mm512_log10_ps(a) simde_mm512_log10_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_log10_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_log10_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_log10d8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_log10_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_log10(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_log10_pd + #define _mm512_log10_pd(a) simde_mm512_log10_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_log10_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log10_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_log10_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log10_ps + #define _mm512_mask_log10_ps(src, k, a) simde_mm512_mask_log10_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_log10_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_log10_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_log10_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_log10_pd + #define _mm512_mask_log10_pd(src, k, a) simde_mm512_mask_log10_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_nearbyint_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_nearbyint_ps(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_nearbyint_ps + #define _mm512_nearbyint_ps(a) simde_mm512_nearbyint_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_nearbyint_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_nearbyint_pd(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_nearbyint_pd + #define _mm512_nearbyint_pd(a) simde_mm512_nearbyint_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_nearbyint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_nearbyint_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_nearbyint_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_nearbyint_ps + #define _mm512_mask_nearbyint_ps(src, k, a) simde_mm512_mask_nearbyint_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_nearbyint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_nearbyint_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_nearbyint_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_nearbyint_pd + #define _mm512_mask_nearbyint_pd(src, k, a) simde_mm512_mask_nearbyint_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_pow_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_powf4_u10(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_pow_ps + #define _mm_pow_ps(a, b) simde_mm_pow_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_pow_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_powd2_u10(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_pow_pd + #define _mm_pow_pd(a, b) simde_mm_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_pow_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_powf8_u10(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_pow_ps + #define _mm256_pow_ps(a, b) simde_mm256_pow_ps(a, b) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_pow_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_powd4_u10(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_pow_pd + #define _mm256_pow_pd(a, b) simde_mm256_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_pow_ps (simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_pow_ps(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_powf16_u10(a, b); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a), + b_ = simde__m512_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_powf(a_.f32[i], b_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_pow_ps + #define _mm512_pow_ps(a, b) simde_mm512_pow_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_pow_pd (simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_pow_pd(a, b); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_powd8_u10(a, b); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a), + b_ = simde__m512d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_pow(a_.f64[i], b_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_pow_pd + #define _mm512_pow_pd(a, b) simde_mm512_pow_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_pow_ps(simde__m512 src, simde__mmask16 k, simde__m512 a, simde__m512 b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_pow_ps(src, k, a, b); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_pow_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_pow_ps + #define _mm512_mask_pow_ps(src, k, a, b) simde_mm512_mask_pow_ps(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_pow_pd(simde__m512d src, simde__mmask8 k, simde__m512d a, simde__m512d b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_pow_pd(src, k, a, b); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_pow_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_pow_pd + #define _mm512_mask_pow_pd(src, k, a, b) simde_mm512_mask_pow_pd(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_clog_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_clog_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + simde__m128_private pow_res_ = simde__m128_to_private(simde_mm_pow_ps(a, simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0)))); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1])); + r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_clog_ps + #define _mm_clog_ps(a) simde_mm_clog_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_clog_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_clog_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + simde__m256_private pow_res_ = simde__m256_to_private(simde_mm256_pow_ps(a, simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0)))); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = simde_math_logf(simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i + 1])); + r_.f32[i + 1] = simde_math_atan2f(a_.f32[i + 1], a_.f32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_clog_ps + #define _mm256_clog_ps(a) simde_mm256_clog_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_csqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_csqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + simde__m128 pow_res= simde_mm_pow_ps(a,simde_mm_set1_ps(SIMDE_FLOAT32_C(2.0))); + simde__m128_private pow_res_=simde__m128_to_private(pow_res); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); + simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); + + r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_csqrt_ps + #define _mm_csqrt_ps(a) simde_mm_csqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_csqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_csqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + simde__m256 pow_res= simde_mm256_pow_ps(a,simde_mm256_set1_ps(SIMDE_FLOAT32_C(2.0))); + simde__m256_private pow_res_=simde__m256_to_private(pow_res); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i+=2) { + simde_float32 sign = simde_math_copysignf(SIMDE_FLOAT32_C(1.0), a_.f32[i + 1]); + simde_float32 temp = simde_math_sqrtf(pow_res_.f32[i] + pow_res_.f32[i+1]); + + r_.f32[ i ] = simde_math_sqrtf(( a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + r_.f32[i + 1] = sign * simde_math_sqrtf((-a_.f32[i] + temp) / SIMDE_FLOAT32_C(2.0)); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_csqrt_ps + #define _mm256_csqrt_ps(a) simde_mm256_csqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi8 + #define _mm_rem_epi8(a, b) simde_mm_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi16 + #define _mm_rem_epi16(a, b) simde_mm_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi32 + #define _mm_rem_epi32(a, b) simde_mm_rem_epi32(a, b) + #undef _mm_irem_epi32 + #define _mm_irem_epi32(a, b) simde_mm_rem_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epi64 + #define _mm_rem_epi64(a, b) simde_mm_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu8 + #define _mm_rem_epu8(a, b) simde_mm_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu16 + #define _mm_rem_epu16(a, b) simde_mm_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu32 + #define _mm_rem_epu32(a, b) simde_mm_rem_epu32(a, b) + #undef _mm_urem_epi32 + #define _mm_urem_epi32(a, b) simde_mm_rem_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rem_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_rem_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_rem_epu64 + #define _mm_rem_epu64(a, b) simde_mm_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi8 + #define _mm256_rem_epi8(a, b) simde_mm256_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi16 + #define _mm256_rem_epi16(a, b) simde_mm256_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi32 + #define _mm256_rem_epi32(a, b) simde_mm256_rem_epi32(a, b) + #undef _mm256_irem_epi32 + #define _mm256_irem_epi32(a, b) simde_mm256_rem_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epi64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epi64 + #define _mm256_rem_epi64(a, b) simde_mm256_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu8(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu8 + #define _mm256_rem_epu8(a, b) simde_mm256_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu16(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu16 + #define _mm256_rem_epu16(a, b) simde_mm256_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu32(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu32 + #define _mm256_rem_epu32(a, b) simde_mm256_rem_epu32(a, b) + #undef _mm256_urem_epi32 + #define _mm256_urem_epi32(a, b) simde_mm256_rem_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_rem_epu64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rem_epu64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_rem_epu64(a_.m128i[i], b_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_rem_epu64 + #define _mm256_rem_epu64(a, b) simde_mm256_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i8 = a_.i8 % b_.i8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] % b_.i8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi8 + #define _mm512_rem_epi8(a, b) simde_mm512_rem_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i16 = a_.i16 % b_.i16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] % b_.i16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi16 + #define _mm512_rem_epi16(a, b) simde_mm512_rem_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i32 = a_.i32 % b_.i32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] % b_.i32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi32 + #define _mm512_rem_epi32(a, b) simde_mm512_rem_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rem_epi32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rem_epi32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rem_epi32 + #define _mm512_mask_rem_epi32(src, k, a, b) simde_mm512_mask_rem_epi32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epi64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epi64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epi64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epi64 + #define _mm512_rem_epi64(a, b) simde_mm512_rem_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu8 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu8(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u8 = a_.u8 % b_.u8; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu8(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = a_.u8[i] % b_.u8[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu8 + #define _mm512_rem_epu8(a, b) simde_mm512_rem_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu16 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu16(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u16 = a_.u16 % b_.u16; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu16(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] % b_.u16[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu16 + #define _mm512_rem_epu16(a, b) simde_mm512_rem_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu32 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu32(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u32 = a_.u32 % b_.u32; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu32(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] % b_.u32[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu32 + #define _mm512_rem_epu32(a, b) simde_mm512_rem_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_mask_rem_epu32(simde__m512i src, simde__mmask16 k, simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rem_epu32(src, k, a, b); + #else + return simde_mm512_mask_mov_epi32(src, k, simde_mm512_rem_epu32(a, b)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rem_epu32 + #define _mm512_mask_rem_epu32(src, k, a, b) simde_mm512_mask_rem_epu32(src, k, a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512i +simde_mm512_rem_epu64 (simde__m512i a, simde__m512i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rem_epu64(a, b); + #else + simde__m512i_private + r_, + a_ = simde__m512i_to_private(a), + b_ = simde__m512i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.u64 = a_.u64 % b_.u64; + #else + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256i) / sizeof(r_.m256i[0])) ; i++) { + r_.m256i[i] = simde_mm256_rem_epu64(a_.m256i[i], b_.m256i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] % b_.u64[i]; + } + #endif + #endif + + return simde__m512i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rem_epu64 + #define _mm512_rem_epu64(a, b) simde_mm512_rem_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_recip_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_recip_ps(a); + #else + return simde_mm512_div_ps(simde_mm512_set1_ps(SIMDE_FLOAT32_C(1.0)), a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_recip_ps + #define _mm512_recip_ps(a) simde_mm512_recip_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_recip_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_recip_pd(a); + #else + return simde_mm512_div_pd(simde_mm512_set1_pd(SIMDE_FLOAT64_C(1.0)), a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_recip_pd + #define _mm512_recip_pd(a) simde_mm512_recip_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_recip_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_recip_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_recip_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_recip_ps + #define _mm512_mask_recip_ps(src, k, a) simde_mm512_mask_recip_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_recip_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_recip_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_recip_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_recip_pd + #define _mm512_mask_recip_pd(src, k, a) simde_mm512_mask_recip_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_rint_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rint_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_rintf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_rintf(a_.f32[i]); + } + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rint_ps + #define _mm512_rint_ps(a) simde_mm512_rint_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_rint_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_rint_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_rintd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_rint(a_.f64[i]); + } + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_rint_pd + #define _mm512_rint_pd(a) simde_mm512_rint_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_rint_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rint_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_rint_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rint_ps + #define _mm512_mask_rint_ps(src, k, a) simde_mm512_mask_rint_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_rint_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_rint_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_rint_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_rint_pd + #define _mm512_mask_rint_pd(src, k, a) simde_mm512_mask_rint_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sin_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf4_u10(a); + #else + return Sleef_sinf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sin_ps + #define _mm_sin_ps(a) simde_mm_sin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sin_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind2_u10(a); + #else + return Sleef_sind2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sin_pd + #define _mm_sin_pd(a) simde_mm_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sin_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf8_u10(a); + #else + return Sleef_sinf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sin_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sin_ps + #define _mm256_sin_ps(a) simde_mm256_sin_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sin_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind4_u10(a); + #else + return Sleef_sind4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sin_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sin_pd + #define _mm256_sin_pd(a) simde_mm256_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sin_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sin_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf16_u10(a); + #else + return Sleef_sinf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sin_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sin_ps + #define _mm512_sin_ps(a) simde_mm512_sin_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sin_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sin_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind8_u10(a); + #else + return Sleef_sind8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sin_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sin_pd + #define _mm512_sin_pd(a) simde_mm512_sin_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sin_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sin_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sin_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sin_ps + #define _mm512_mask_sin_ps(src, k, a) simde_mm512_mask_sin_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sin_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sin_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sin_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sin_pd + #define _mm512_mask_sin_pd(src, k, a) simde_mm512_mask_sin_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sincos_ps (simde__m128* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sincos_ps(HEDLEY_REINTERPRET_CAST(__m128*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + Sleef___m128_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf4_u10(a); + #else + temp = Sleef_sincosf4_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m128 r; + + r = simde_mm_sin_ps(a); + *mem_addr = simde_mm_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sincos_ps + #define _mm_sincos_ps(mem_addr, a) simde_mm_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sincos_pd (simde__m128d* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sincos_pd(HEDLEY_REINTERPRET_CAST(__m128d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + Sleef___m128d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd2_u10(a); + #else + temp = Sleef_sincosd2_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m128d r; + + r = simde_mm_sin_pd(a); + *mem_addr = simde_mm_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sincos_pd + #define _mm_sincos_pd(mem_addr, a) simde_mm_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sincos_ps (simde__m256* mem_addr, simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sincos_ps(HEDLEY_REINTERPRET_CAST(__m256*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + Sleef___m256_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf8_u10(a); + #else + temp = Sleef_sincosf8_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m256 r; + + r = simde_mm256_sin_ps(a); + *mem_addr = simde_mm256_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sincos_ps + #define _mm256_sincos_ps(mem_addr, a) simde_mm256_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sincos_pd (simde__m256d* mem_addr, simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sincos_pd(HEDLEY_REINTERPRET_CAST(__m256d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + Sleef___m256d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd4_u10(a); + #else + temp = Sleef_sincosd4_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m256d r; + + r = simde_mm256_sin_pd(a); + *mem_addr = simde_mm256_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sincos_pd + #define _mm256_sincos_pd(mem_addr, a) simde_mm256_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sincos_ps (simde__m512* mem_addr, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sincos_ps(HEDLEY_REINTERPRET_CAST(__m512*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + Sleef___m512_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosf16_u10(a); + #else + temp = Sleef_sincosf16_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m512 r; + + r = simde_mm512_sin_ps(a); + *mem_addr = simde_mm512_cos_ps(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sincos_ps + #define _mm512_sincos_ps(mem_addr, a) simde_mm512_sincos_ps((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sincos_pd (simde__m512d* mem_addr, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sincos_pd(HEDLEY_REINTERPRET_CAST(__m512d*, mem_addr), a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + Sleef___m512d_2 temp; + + #if SIMDE_ACCURACY_PREFERENCE > 1 + temp = Sleef_sincosd8_u10(a); + #else + temp = Sleef_sincosd8_u35(a); + #endif + + *mem_addr = temp.y; + return temp.x; + #else + simde__m512d r; + + r = simde_mm512_sin_pd(a); + *mem_addr = simde_mm512_cos_pd(a); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sincos_pd + #define _mm512_sincos_pd(mem_addr, a) simde_mm512_sincos_pd((mem_addr),(a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sincos_ps(simde__m512* mem_addr, simde__m512 sin_src, simde__m512 cos_src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a); + #else + simde__m512 cos_res, sin_res; + sin_res = simde_mm512_sincos_ps(&cos_res, a); + *mem_addr = simde_mm512_mask_mov_ps(cos_src, k, cos_res); + return simde_mm512_mask_mov_ps(sin_src, k, sin_res); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sincos_ps + #define _mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_ps(mem_addr, sin_src, cos_src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sincos_pd(simde__m512d* mem_addr, simde__m512d sin_src, simde__m512d cos_src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a); + #else + simde__m512d cos_res, sin_res; + sin_res = simde_mm512_sincos_pd(&cos_res, a); + *mem_addr = simde_mm512_mask_mov_pd(cos_src, k, cos_res); + return simde_mm512_mask_mov_pd(sin_src, k, sin_res); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sincos_pd + #define _mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) simde_mm512_mask_sincos_pd(mem_addr, sin_src, cos_src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sind_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_sinf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sind_ps + #define _mm_sind_ps(a) simde_mm_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sind_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_sind2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sind_pd + #define _mm_sind_pd(a) simde_mm_sind_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sind_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_sinf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sind_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sind_ps + #define _mm256_sind_ps(a) simde_mm256_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sind_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_sind4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sind_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sind_pd + #define _mm256_sind_pd(a) simde_mm256_sind_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sind_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sind_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sinf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_sinf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sind_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sind_ps + #define _mm512_sind_ps(a) simde_mm512_sind_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sind_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sind_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_sind8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_sind8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sind_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sin(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sind_pd + #define _mm512_sind_pd(a) simde_mm512_sind_pd(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sind_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sind_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sind_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sind_ps + #define _mm512_mask_sind_ps(src, k, a) simde_mm512_mask_sind_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sind_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sind_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sind_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sind_pd + #define _mm512_mask_sind_pd(src, k, a) simde_mm512_mask_sind_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sinh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sinhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sinh_ps + #define _mm_sinh_ps(a) simde_mm_sinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sinh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sinhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_sinh_pd + #define _mm_sinh_pd(a) simde_mm_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sinh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sinhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_sinh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sinh_ps + #define _mm256_sinh_ps(a) simde_mm256_sinh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sinh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sinhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_sinh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_sinh_pd + #define _mm256_sinh_pd(a) simde_mm256_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_sinh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sinh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sinhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_sinh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sinhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sinh_ps + #define _mm512_sinh_ps(a) simde_mm512_sinh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_sinh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_sinh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sinhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_sinh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sinh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_sinh_pd + #define _mm512_sinh_pd(a) simde_mm512_sinh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_sinh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sinh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_sinh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sinh_ps + #define _mm512_mask_sinh_ps(src, k, a) simde_mm512_mask_sinh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_sinh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_sinh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_sinh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_sinh_pd + #define _mm512_mask_sinh_pd(src, k, a) simde_mm512_mask_sinh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_ceil_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_ceilf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_ceil_ps + #define _mm_svml_ceil_ps(a) simde_mm_svml_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_ceil_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_ceild2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_ceil_pd + #define _mm_svml_ceil_pd(a) simde_mm_svml_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_ceil_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_ceilf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_ceil_ps + #define _mm256_svml_ceil_ps(a) simde_mm256_svml_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_ceil_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_ceild4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_ceil_pd + #define _mm256_svml_ceil_pd(a) simde_mm256_svml_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_ceil_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_ceil_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_ceilf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_ceil_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_ceil_ps + #define _mm512_ceil_ps(a) simde_mm512_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_ceil_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_ceil_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_ceild8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_ceil_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_ceil_pd + #define _mm512_ceil_pd(a) simde_mm512_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_ceil_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_ceil_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_ceil_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ceil_ps + #define _mm512_mask_ceil_ps(src, k, a) simde_mm512_mask_ceil_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_ceil_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_ceil_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_ceil_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_ceil_pd + #define _mm512_mask_ceil_pd(src, k, a) simde_mm512_mask_ceil_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_floor_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_floorf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_floor_ps + #define _mm_svml_floor_ps(a) simde_mm_svml_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_floor_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_floord2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_floor_pd + #define _mm_svml_floor_pd(a) simde_mm_svml_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_floor_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_floorf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_floor_ps + #define _mm256_svml_floor_ps(a) simde_mm256_svml_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_floor_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_floord4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_floor_pd + #define _mm256_svml_floor_pd(a) simde_mm256_svml_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_floor_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_floor_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_floorf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_floor_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_floor_ps + #define _mm512_floor_ps(a) simde_mm512_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_floor_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_floor_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_floord8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_floor_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_floor_pd + #define _mm512_floor_pd(a) simde_mm512_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_floor_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_floor_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_floor_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_floor_ps + #define _mm512_mask_floor_ps(src, k, a) simde_mm512_mask_floor_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_floor_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_floor_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_floor_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_floor_pd + #define _mm512_mask_floor_pd(src, k, a) simde_mm512_mask_floor_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_round_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_round_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_roundf4(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_round_ps + #define _mm_svml_round_ps(a) simde_mm_svml_round_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_round_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_roundd2(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_round_pd + #define _mm_svml_round_pd(a) simde_mm_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_round_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_round_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_roundf8(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_svml_round_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_round_ps + #define _mm256_svml_round_ps(a) simde_mm256_svml_round_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_round_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_roundd4(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_svml_round_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_round_pd + #define _mm256_svml_round_pd(a) simde_mm256_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_svml_round_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_round_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_roundd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_svml_round_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_round_pd + #define _mm512_svml_round_pd(a) simde_mm512_svml_round_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_svml_round_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_svml_round_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_svml_round_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_svml_round_pd + #define _mm512_mask_svml_round_pd(src, k, a) simde_mm512_mask_svml_round_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_svml_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sqrtf4(a); + #else + return simde_mm_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_sqrt_ps + #define _mm_svml_sqrt_ps(a) simde_mm_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_svml_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_sqrtd2(a); + #else + return simde_mm_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_svml_sqrt_pd + #define _mm_svml_sqrt_pd(a) simde_mm_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_svml_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sqrtf8(a); + #else + return simde_mm256_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_sqrt_ps + #define _mm256_svml_sqrt_ps(a) simde_mm256_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_svml_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_sqrtd4(a); + #else + return simde_mm256_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_svml_sqrt_pd + #define _mm256_svml_sqrt_pd(a) simde_mm256_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_svml_sqrt_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_sqrt_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sqrtf16(a); + #else + return simde_mm512_sqrt_ps(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_sqrt_ps + #define _mm512_svml_sqrt_ps(a) simde_mm512_svml_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_svml_sqrt_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_svml_sqrt_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_sqrtd8(a); + #else + return simde_mm512_sqrt_pd(a); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_svml_sqrt_pd + #define _mm512_svml_sqrt_pd(a) simde_mm512_svml_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tan_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf4_u10(a); + #else + return Sleef_tanf4_u35(a); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tan_ps + #define _mm_tan_ps(a) simde_mm_tan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tan_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand2_u10(a); + #else + return Sleef_tand2_u35(a); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tan_pd + #define _mm_tan_pd(a) simde_mm_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tan_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf8_u10(a); + #else + return Sleef_tanf8_u35(a); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tan_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tan_ps + #define _mm256_tan_ps(a) simde_mm256_tan_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tan_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand4_u10(a); + #else + return Sleef_tand4_u35(a); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tan_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tan_pd + #define _mm256_tan_pd(a) simde_mm256_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tan_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tan_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf16_u10(a); + #else + return Sleef_tanf16_u35(a); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tan_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tan_ps + #define _mm512_tan_ps(a) simde_mm512_tan_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tan_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tan_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand8_u10(a); + #else + return Sleef_tand8_u35(a); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tan_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tan_pd + #define _mm512_tan_pd(a) simde_mm512_tan_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tan_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tan_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tan_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tan_ps + #define _mm512_mask_tan_ps(src, k, a) simde_mm512_mask_tan_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tan_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tan_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tan_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tan_pd + #define _mm512_mask_tan_pd(src, k, a) simde_mm512_mask_tan_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tand_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf4_u10(simde_x_mm_deg2rad_ps(a)); + #else + return Sleef_tanf4_u35(simde_x_mm_deg2rad_ps(a)); + #endif + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tand_ps + #define _mm_tand_ps(a) simde_mm_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tand_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand2_u10(simde_x_mm_deg2rad_pd(a)); + #else + return Sleef_tand2_u35(simde_x_mm_deg2rad_pd(a)); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tand_pd + #define _mm_tand_pd(a) simde_mm_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tand_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf8_u10(simde_x_mm256_deg2rad_ps(a)); + #else + return Sleef_tanf8_u35(simde_x_mm256_deg2rad_ps(a)); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tand_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tand_ps + #define _mm256_tand_ps(a) simde_mm256_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tand_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand4_u10(simde_x_mm256_deg2rad_pd(a)); + #else + return Sleef_tand4_u35(simde_x_mm256_deg2rad_pd(a)); + #endif + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tand_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tand_pd + #define _mm256_tand_pd(a) simde_mm256_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tand_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tand_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tanf16_u10(simde_x_mm512_deg2rad_ps(a)); + #else + return Sleef_tanf16_u35(simde_x_mm512_deg2rad_ps(a)); + #endif + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tand_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanf(simde_math_deg2radf(a_.f32[i])); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tand_ps + #define _mm512_tand_ps(a) simde_mm512_tand_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tand_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tand_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + #if SIMDE_ACCURACY_PREFERENCE > 1 + return Sleef_tand8_u10(simde_x_mm512_deg2rad_pd(a)); + #else + return Sleef_tand8_u35(simde_x_mm512_deg2rad_pd(a)); + #endif + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tand_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tan(simde_math_deg2rad(a_.f64[i])); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tand_pd + #define _mm512_tand_pd(a) simde_mm512_tand_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tand_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tand_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tand_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tand_ps + #define _mm512_mask_tand_ps(src, k, a) simde_mm512_mask_tand_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tand_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tand_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tand_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tand_pd + #define _mm512_mask_tand_pd(src, k, a) simde_mm512_mask_tand_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_tanh_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_tanhf4_u10(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tanh_ps + #define _mm_tanh_ps(a) simde_mm_tanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_tanh_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_tanhd2_u10(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_tanh_pd + #define _mm_tanh_pd(a) simde_mm_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_tanh_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_tanhf8_u10(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_tanh_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tanh_ps + #define _mm256_tanh_ps(a) simde_mm256_tanh_ps(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_tanh_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_tanhd4_u10(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_tanh_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_tanh_pd + #define _mm256_tanh_pd(a) simde_mm256_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_tanh_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tanh_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_tanhf16_u10(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_tanh_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_tanhf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tanh_ps + #define _mm512_tanh_ps(a) simde_mm512_tanh_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_tanh_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_tanh_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_tanhd8_u10(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_tanh_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_tanh(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_tanh_pd + #define _mm512_tanh_pd(a) simde_mm512_tanh_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_tanh_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tanh_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_tanh_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tanh_ps + #define _mm512_mask_tanh_ps(src, k, a) simde_mm512_mask_tanh_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_tanh_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_tanh_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_tanh_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_tanh_pd + #define _mm512_mask_tanh_pd(src, k, a) simde_mm512_mask_tanh_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_trunc_ps (simde__m128 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_truncf4(a); + #else + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_trunc_ps + #define _mm_trunc_ps(a) simde_mm_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_trunc_pd (simde__m128d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE_NATIVE) + return _mm_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_SSE_NATIVE) + return Sleef_truncd2(a); + #else + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_trunc_pd + #define _mm_trunc_pd(a) simde_mm_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_trunc_ps (simde__m256 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_truncf8(a); + #else + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_trunc_ps + #define _mm256_trunc_ps(a) simde_mm256_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_trunc_pd (simde__m256d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX_NATIVE) + return Sleef_truncd4(a); + #else + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_ZERO); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_trunc_pd + #define _mm256_trunc_pd(a) simde_mm256_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_trunc_ps (simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_trunc_ps(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_truncf16(a); + #else + simde__m512_private + r_, + a_ = simde__m512_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256) / sizeof(r_.m256[0])) ; i++) { + r_.m256[i] = simde_mm256_trunc_ps(a_.m256[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #endif + + return simde__m512_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_trunc_ps + #define _mm512_trunc_ps(a) simde_mm512_trunc_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_trunc_pd (simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_trunc_pd(a); + #elif defined(SIMDE_MATH_SLEEF_ENABLE) && defined(SIMDE_X86_AVX512F_NATIVE) + return Sleef_truncd8(a); + #else + simde__m512d_private + r_, + a_ = simde__m512d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(256) + for (size_t i = 0 ; i < (sizeof(r_.m256d) / sizeof(r_.m256d[0])) ; i++) { + r_.m256d[i] = simde_mm256_trunc_pd(a_.m256d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + + return simde__m512d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_trunc_pd + #define _mm512_trunc_pd(a) simde_mm512_trunc_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512 +simde_mm512_mask_trunc_ps(simde__m512 src, simde__mmask16 k, simde__m512 a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_trunc_ps(src, k, a); + #else + return simde_mm512_mask_mov_ps(src, k, simde_mm512_trunc_ps(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_trunc_ps + #define _mm512_mask_trunc_ps(src, k, a) simde_mm512_mask_trunc_ps(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m512d +simde_mm512_mask_trunc_pd(simde__m512d src, simde__mmask8 k, simde__m512d a) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX512F_NATIVE) + return _mm512_mask_trunc_pd(src, k, a); + #else + return simde_mm512_mask_mov_pd(src, k, simde_mm512_trunc_pd(a)); + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm512_mask_trunc_pd + #define _mm512_mask_trunc_pd(src, k, a) simde_mm512_mask_trunc_pd(src, k, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_udivrem_epi32 (simde__m128i * mem_addr, simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_SSE2_NATIVE) + return _mm_udivrem_epi32(mem_addr, a, b); + #else + simde__m128i r; + + r = simde_mm_div_epu32(a, b); + *mem_addr = simde_x_mm_sub_epu32(a, simde_x_mm_mullo_epu32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm_udivrem_epi32 + #define _mm_udivrem_epi32(mem_addr, a, b) simde_mm_udivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_udivrem_epi32 (simde__m256i* mem_addr, simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_SVML_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) + return _mm256_udivrem_epi32(HEDLEY_REINTERPRET_CAST(__m256i*, mem_addr), a, b); + #else + simde__m256i r; + + r = simde_mm256_div_epu32(a, b); + *mem_addr = simde_x_mm256_sub_epu32(a, simde_x_mm256_mullo_epu32(r, b)); + + return r; + #endif +} +#if defined(SIMDE_X86_SVML_ENABLE_NATIVE_ALIASES) + #undef _mm256_udivrem_epi32 + #define _mm256_udivrem_epi32(mem_addr, a, b) simde_mm256_udivrem_epi32((mem_addr),(a), (b)) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SVML_H) */ +/* :: End simde/x86/svml.h :: */ diff --git a/include/simde/x86/xop.h b/include/simde/x86/xop.h new file mode 100644 index 00000000..fed6b244 --- /dev/null +++ b/include/simde/x86/xop.h @@ -0,0 +1,43267 @@ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/xop.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_XOP_H) +#define SIMDE_X86_XOP_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2019-2020 Michael R. Crusoe + * 2020 Himanshi Mathur + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_AVX2_H) +#define SIMDE_X86_AVX2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/avx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2018-2020 Evan Nemerson + * 2020 Michael R. Crusoe + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + */ + +#if !defined(SIMDE_X86_SSE_H) +#define SIMDE_X86_SSE_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/mmx.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_MMX_H) +#define SIMDE_X86_MMX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-common.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_COMMON_H) +#define SIMDE_COMMON_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/hedley.h :: */ +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) +#if defined(HEDLEY_VERSION) +# undef HEDLEY_VERSION +#endif +#define HEDLEY_VERSION 16 + +#if defined(HEDLEY_STRINGIFY_EX) +# undef HEDLEY_STRINGIFY_EX +#endif +#define HEDLEY_STRINGIFY_EX(x) #x + +#if defined(HEDLEY_STRINGIFY) +# undef HEDLEY_STRINGIFY +#endif +#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) + +#if defined(HEDLEY_CONCAT_EX) +# undef HEDLEY_CONCAT_EX +#endif +#define HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(HEDLEY_CONCAT) +# undef HEDLEY_CONCAT +#endif +#define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) + +#if defined(HEDLEY_CONCAT3_EX) +# undef HEDLEY_CONCAT3_EX +#endif +#define HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(HEDLEY_CONCAT3) +# undef HEDLEY_CONCAT3 +#endif +#define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(HEDLEY_VERSION_ENCODE) +# undef HEDLEY_VERSION_ENCODE +#endif +#define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(HEDLEY_VERSION_DECODE_MAJOR) +# undef HEDLEY_VERSION_DECODE_MAJOR +#endif +#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(HEDLEY_VERSION_DECODE_MINOR) +# undef HEDLEY_VERSION_DECODE_MINOR +#endif +#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(HEDLEY_VERSION_DECODE_REVISION) +# undef HEDLEY_VERSION_DECODE_REVISION +#endif +#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(HEDLEY_GNUC_VERSION) +# undef HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) +# define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(HEDLEY_GNUC_VERSION_CHECK) +# undef HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(HEDLEY_GNUC_VERSION) +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MSVC_VERSION) +# undef HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) +# define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(HEDLEY_MSVC_VERSION_CHECK) +# undef HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(HEDLEY_MSVC_VERSION) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else +# define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(HEDLEY_INTEL_VERSION) +# undef HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) +# define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(HEDLEY_INTEL_VERSION_CHECK) +# undef HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION) +# undef HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) +# define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) +# undef HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(HEDLEY_INTEL_CL_VERSION) +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PGI_VERSION) +# undef HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) +# define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(HEDLEY_PGI_VERSION_CHECK) +# undef HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(HEDLEY_PGI_VERSION) +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION) +# undef HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) +# define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(HEDLEY_SUNPRO_VERSION_CHECK) +# undef HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(HEDLEY_SUNPRO_VERSION) +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# undef HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) +# define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) +# undef HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(HEDLEY_EMSCRIPTEN_VERSION) +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_ARM_VERSION) +# undef HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) +# define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(HEDLEY_ARM_VERSION_CHECK) +# undef HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(HEDLEY_ARM_VERSION) +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IBM_VERSION) +# undef HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) +# define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(HEDLEY_IBM_VERSION_CHECK) +# undef HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(HEDLEY_IBM_VERSION) +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_VERSION) +# undef HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +# if (__TI_COMPILER_VERSION__ >= 16000000) +# define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +# endif +#endif + +#if defined(HEDLEY_TI_VERSION_CHECK) +# undef HEDLEY_TI_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_VERSION) +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION) +# undef HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) +# define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) +# undef HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL2000_VERSION) +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION) +# undef HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) +# define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL430_VERSION_CHECK) +# undef HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL430_VERSION) +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION) +# undef HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) +# define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) +# undef HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_ARMCL_VERSION) +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION) +# undef HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) +# define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) +# undef HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL6X_VERSION) +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION) +# undef HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) +# define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) +# undef HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CL7X_VERSION) +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION) +# undef HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) +# define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) +# undef HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(HEDLEY_TI_CLPRU_VERSION) +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_CRAY_VERSION) +# undef HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) +# if defined(_RELEASE_PATCHLEVEL) +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) +# else +# define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) +# endif +#endif + +#if defined(HEDLEY_CRAY_VERSION_CHECK) +# undef HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(HEDLEY_CRAY_VERSION) +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_IAR_VERSION) +# undef HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) +# if __VER__ > 1000 +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) +# else +# define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) +# endif +#endif + +#if defined(HEDLEY_IAR_VERSION_CHECK) +# undef HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(HEDLEY_IAR_VERSION) +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_TINYC_VERSION) +# undef HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) +# define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(HEDLEY_TINYC_VERSION_CHECK) +# undef HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_DMC_VERSION) +# undef HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) +# define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(HEDLEY_DMC_VERSION_CHECK) +# undef HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(HEDLEY_DMC_VERSION) +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION) +# undef HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) +# define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(HEDLEY_COMPCERT_VERSION_CHECK) +# undef HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(HEDLEY_COMPCERT_VERSION) +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_PELLES_VERSION) +# undef HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) +# define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(HEDLEY_PELLES_VERSION_CHECK) +# undef HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(HEDLEY_PELLES_VERSION) +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION) +# undef HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) +# define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(HEDLEY_MCST_LCC_VERSION_CHECK) +# undef HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_GCC_VERSION) +# undef HEDLEY_GCC_VERSION +#endif +#if \ + defined(HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(HEDLEY_INTEL_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_ARM_VERSION) && \ + !defined(HEDLEY_CRAY_VERSION) && \ + !defined(HEDLEY_TI_VERSION) && \ + !defined(HEDLEY_TI_ARMCL_VERSION) && \ + !defined(HEDLEY_TI_CL430_VERSION) && \ + !defined(HEDLEY_TI_CL2000_VERSION) && \ + !defined(HEDLEY_TI_CL6X_VERSION) && \ + !defined(HEDLEY_TI_CL7X_VERSION) && \ + !defined(HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(HEDLEY_MCST_LCC_VERSION) +# define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION +#endif + +#if defined(HEDLEY_GCC_VERSION_CHECK) +# undef HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(HEDLEY_GCC_VERSION) +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else +# define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(HEDLEY_HAS_ATTRIBUTE) +# undef HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) +#else +# define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) +# undef HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION) && \ + (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else +# define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_BUILTIN) +# undef HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else +# define HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_BUILTIN) +# undef HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_BUILTIN) +# undef HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else +# define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_FEATURE) +# undef HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else +# define HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_FEATURE) +# undef HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_FEATURE) +# undef HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else +# define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_EXTENSION) +# undef HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else +# define HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_EXTENSION) +# undef HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_EXTENSION) +# undef HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else +# define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else +# define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else +# define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_HAS_WARNING) +# undef HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else +# define HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(HEDLEY_GNUC_HAS_WARNING) +# undef HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_GCC_HAS_WARNING) +# undef HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else +# define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) +# define HEDLEY_PRAGMA(value) _Pragma(#value) +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_PRAGMA(value) __pragma(value) +#else +# define HEDLEY_PRAGMA(value) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_PUSH) +# undef HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(HEDLEY_DIAGNOSTIC_POP) +# undef HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) +# define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif HEDLEY_ARM_VERSION_CHECK(5,6,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") +# define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else +# define HEDLEY_DIAGNOSTIC_PUSH +# define HEDLEY_DIAGNOSTIC_POP +#endif + +/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wc++98-compat") +# if HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) +# define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(HEDLEY_CONST_CAST) +# undef HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + HEDLEY_HAS_WARNING("-Wcast-qual") || \ + HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_REINTERPRET_CAST) +# undef HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else +# define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_STATIC_CAST) +# undef HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) +# define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else +# define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(HEDLEY_CPP_CAST) +# undef HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if HEDLEY_HAS_WARNING("-Wold-style-cast") +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + HEDLEY_DIAGNOSTIC_POP +# elif HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define HEDLEY_CPP_CAST(T, expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) +# undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-attributes") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif HEDLEY_PGI_VERSION_CHECK(20,7,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif HEDLEY_PGI_VERSION_CHECK(17,10,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) +# undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if HEDLEY_HAS_WARNING("-Wcast-qual") +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) +# undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if HEDLEY_HAS_WARNING("-Wunused-function") +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,4,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else +# define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(HEDLEY_DEPRECATED) +# undef HEDLEY_DEPRECATED +#endif +#if defined(HEDLEY_DEPRECATED_FOR) +# undef HEDLEY_DEPRECATED_FOR +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) +# define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) +# define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_DEPRECATED(since) __declspec(deprecated) +# define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_DEPRECATED(since) _Pragma("deprecated") +# define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else +# define HEDLEY_DEPRECATED(since) +# define HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(HEDLEY_UNAVAILABLE) +# undef HEDLEY_UNAVAILABLE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warning) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else +# define HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(HEDLEY_WARN_UNUSED_RESULT) +# undef HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) +# undef HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) +# define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ +# define HEDLEY_WARN_UNUSED_RESULT _Check_return_ +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else +# define HEDLEY_WARN_UNUSED_RESULT +# define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(HEDLEY_SENTINEL) +# undef HEDLEY_SENTINEL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else +# define HEDLEY_SENTINEL(position) +#endif + +#if defined(HEDLEY_NO_RETURN) +# undef HEDLEY_NO_RETURN +#endif +#if HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NO_RETURN __noreturn +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) +# define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NO_RETURN __attribute((noreturn)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NO_RETURN __declspec(noreturn) +#else +# define HEDLEY_NO_RETURN +#endif + +#if defined(HEDLEY_NO_ESCAPE) +# undef HEDLEY_NO_ESCAPE +#endif +#if HEDLEY_HAS_ATTRIBUTE(noescape) +# define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else +# define HEDLEY_NO_ESCAPE +#endif + +#if defined(HEDLEY_UNREACHABLE) +# undef HEDLEY_UNREACHABLE +#endif +#if defined(HEDLEY_UNREACHABLE_RETURN) +# undef HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(HEDLEY_ASSUME) +# undef HEDLEY_ASSUME +#endif +#if \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ASSUME(expr) __assume(expr) +#elif HEDLEY_HAS_BUILTIN(__builtin_assume) +# define HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# if defined(__cplusplus) +# define HEDLEY_ASSUME(expr) std::_nassert(expr) +# else +# define HEDLEY_ASSUME(expr) _nassert(expr) +# endif +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ + HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(HEDLEY_ASSUME) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif +#if !defined(HEDLEY_ASSUME) +# if defined(HEDLEY_UNREACHABLE) +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) +# else +# define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) +# endif +#endif +#if defined(HEDLEY_UNREACHABLE) +# if \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) +# define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) +# else +# define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() +# endif +#else +# define HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(HEDLEY_UNREACHABLE) +# define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wpedantic") +# pragma clang diagnostic ignored "-Wpedantic" +#endif +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) +# if defined(__clang__) +# pragma clang diagnostic ignored "-Wvariadic-macros" +# elif defined(HEDLEY_GCC_VERSION) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif +#endif +#if defined(HEDLEY_NON_NULL) +# undef HEDLEY_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else +# define HEDLEY_NON_NULL(...) +#endif +HEDLEY_DIAGNOSTIC_POP + +#if defined(HEDLEY_PRINTF_FORMAT) +# undef HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + HEDLEY_HAS_ATTRIBUTE(format) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else +# define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(HEDLEY_CONSTEXPR) +# undef HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) +# endif +#endif +#if !defined(HEDLEY_CONSTEXPR) +# define HEDLEY_CONSTEXPR +#endif + +#if defined(HEDLEY_PREDICT) +# undef HEDLEY_PREDICT +#endif +#if defined(HEDLEY_LIKELY) +# undef HEDLEY_LIKELY +#endif +#if defined(HEDLEY_UNLIKELY) +# undef HEDLEY_UNLIKELY +#endif +#if defined(HEDLEY_UNPREDICTABLE) +# undef HEDLEY_UNPREDICTABLE +#endif +#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) +# define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) +# define HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) +# define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define HEDLEY_LIKELY(expr) (!!(expr)) +# define HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(HEDLEY_UNPREDICTABLE) +# define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(HEDLEY_MALLOC) +# undef HEDLEY_MALLOC +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(malloc) || \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_MALLOC __attribute__((__malloc__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_MALLOC __declspec(restrict) +#else +# define HEDLEY_MALLOC +#endif + +#if defined(HEDLEY_PURE) +# undef HEDLEY_PURE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(pure) || \ + HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PURE __attribute__((__pure__)) +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define HEDLEY_PURE +#endif + +#if defined(HEDLEY_CONST) +# undef HEDLEY_CONST +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(const) || \ + HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_CONST __attribute__((__const__)) +#elif \ + HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define HEDLEY_CONST _Pragma("no_side_effect") +#else +# define HEDLEY_CONST HEDLEY_PURE +#endif + +#if defined(HEDLEY_RESTRICT) +# undef HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) +# define HEDLEY_RESTRICT restrict +#elif \ + HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RESTRICT __restrict +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) +# define HEDLEY_RESTRICT _Restrict +#else +# define HEDLEY_RESTRICT +#endif + +#if defined(HEDLEY_INLINE) +# undef HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) +# define HEDLEY_INLINE inline +#elif \ + defined(HEDLEY_GCC_VERSION) || \ + HEDLEY_ARM_VERSION_CHECK(6,2,0) +# define HEDLEY_INLINE __inline__ +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_INLINE __inline +#else +# define HEDLEY_INLINE +#endif + +#if defined(HEDLEY_ALWAYS_INLINE) +# undef HEDLEY_ALWAYS_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE +#elif \ + HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE +#endif + +#if defined(HEDLEY_NEVER_INLINE) +# undef HEDLEY_NEVER_INLINE +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(noinline) || \ + HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#elif HEDLEY_PGI_VERSION_CHECK(10,2,0) +# define HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) +# define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) +# define HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) +# define HEDLEY_NEVER_INLINE __declspec(noinline) +#else +# define HEDLEY_NEVER_INLINE +#endif + +#if defined(HEDLEY_PRIVATE) +# undef HEDLEY_PRIVATE +#endif +#if defined(HEDLEY_PUBLIC) +# undef HEDLEY_PUBLIC +#endif +#if defined(HEDLEY_IMPORT) +# undef HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC __declspec(dllexport) +# define HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + HEDLEY_HAS_ATTRIBUTE(visibility) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define HEDLEY_PRIVATE +# define HEDLEY_PUBLIC +# endif +# define HEDLEY_IMPORT extern +#endif + +#if defined(HEDLEY_NO_THROW) +# undef HEDLEY_NO_THROW +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) +# define HEDLEY_NO_THROW __declspec(nothrow) +#else +# define HEDLEY_NO_THROW +#endif + +#if defined(HEDLEY_FALL_THROUGH) +# undef HEDLEY_FALL_THROUGH +#endif +#if defined(HEDLEY_INTEL_VERSION) +# define HEDLEY_FALL_THROUGH +#elif \ + HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) +# define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ +# define HEDLEY_FALL_THROUGH __fallthrough +#else +# define HEDLEY_FALL_THROUGH +#endif + +#if defined(HEDLEY_RETURNS_NON_NULL) +# undef HEDLEY_RETURNS_NON_NULL +#endif +#if \ + HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ +# define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else +# define HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(HEDLEY_ARRAY_PARAM) +# undef HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_TINYC_VERSION) +# define HEDLEY_ARRAY_PARAM(name) (name) +#else +# define HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(HEDLEY_IS_CONSTANT) +# undef HEDLEY_IS_CONSTANT +#endif +#if defined(HEDLEY_REQUIRE_CONSTEXPR) +# undef HEDLEY_REQUIRE_CONSTEXPR +#endif +/* HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(HEDLEY_IS_CONSTEXPR_) +# undef HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +# endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(HEDLEY_SUNPRO_VERSION) && \ + !defined(HEDLEY_PGI_VERSION) && \ + !defined(HEDLEY_IAR_VERSION)) || \ + (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) +# if defined(__INTPTR_TYPE__) +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +# else +# include +# define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +# endif +# elif \ + defined(HEDLEY_GCC_VERSION) || \ + defined(HEDLEY_INTEL_VERSION) || \ + defined(HEDLEY_TINYC_VERSION) || \ + defined(HEDLEY_TI_ARMCL_VERSION) || \ + HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(HEDLEY_TI_CL2000_VERSION) || \ + defined(HEDLEY_TI_CL6X_VERSION) || \ + defined(HEDLEY_TI_CL7X_VERSION) || \ + defined(HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ + ((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(HEDLEY_IS_CONSTEXPR_) +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else +# if !defined(HEDLEY_IS_CONSTANT) +# define HEDLEY_IS_CONSTANT(expr) (0) +# endif +# define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(HEDLEY_BEGIN_C_DECLS) +# undef HEDLEY_BEGIN_C_DECLS +#endif +#if defined(HEDLEY_END_C_DECLS) +# undef HEDLEY_END_C_DECLS +#endif +#if defined(HEDLEY_C_DECL) +# undef HEDLEY_C_DECL +#endif +#if defined(__cplusplus) +# define HEDLEY_BEGIN_C_DECLS extern "C" { +# define HEDLEY_END_C_DECLS } +# define HEDLEY_C_DECL extern "C" +#else +# define HEDLEY_BEGIN_C_DECLS +# define HEDLEY_END_C_DECLS +# define HEDLEY_C_DECL +#endif + +#if defined(HEDLEY_STATIC_ASSERT) +# undef HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(HEDLEY_NULL) +# undef HEDLEY_NULL +#endif +#if defined(__cplusplus) +# if __cplusplus >= 201103L +# define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) +# elif defined(NULL) +# define HEDLEY_NULL NULL +# else +# define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) +# endif +#elif defined(NULL) +# define HEDLEY_NULL NULL +#else +# define HEDLEY_NULL ((void*) 0) +#endif + +#if defined(HEDLEY_MESSAGE) +# undef HEDLEY_MESSAGE +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_MESSAGE(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(message msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) +#elif HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_WARNING) +# undef HEDLEY_WARNING +#endif +#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define HEDLEY_WARNING(msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + HEDLEY_PRAGMA(clang warning msg) \ + HEDLEY_DIAGNOSTIC_POP +#elif \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) +#else +# define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) +#endif + +#if defined(HEDLEY_REQUIRE) +# undef HEDLEY_REQUIRE +#endif +#if defined(HEDLEY_REQUIRE_MSG) +# undef HEDLEY_REQUIRE_MSG +#endif +#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if HEDLEY_HAS_WARNING("-Wgcc-compat") +# define HEDLEY_REQUIRE(expr) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# define HEDLEY_REQUIRE_MSG(expr,msg) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define HEDLEY_REQUIRE(expr) +# define HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(HEDLEY_FLAGS) +# undef HEDLEY_FLAGS +#endif +#if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) +# define HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else +# define HEDLEY_FLAGS +#endif + +#if defined(HEDLEY_FLAGS_CAST) +# undef HEDLEY_FLAGS_CAST +#endif +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(HEDLEY_EMPTY_BASES) +# undef HEDLEY_EMPTY_BASES +#endif +#if \ + (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else +# define HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) +# undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else +# define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_BUILTIN) +# undef HEDLEY_CLANG_HAS_BUILTIN +#endif +#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) + +#if defined(HEDLEY_CLANG_HAS_FEATURE) +# undef HEDLEY_CLANG_HAS_FEATURE +#endif +#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) + +#if defined(HEDLEY_CLANG_HAS_EXTENSION) +# undef HEDLEY_CLANG_HAS_EXTENSION +#endif +#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) + +#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) +# undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(HEDLEY_CLANG_HAS_WARNING) +# undef HEDLEY_CLANG_HAS_WARNING +#endif +#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ +/* :: End simde/hedley.h :: */ + +#define SIMDE_VERSION_MAJOR 0 +#define SIMDE_VERSION_MINOR 7 +#define SIMDE_VERSION_MICRO 6 +#define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) +// Also update meson.build in the root directory of the repository + +#include +#include + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-detect-clang.h :: */ +/* Detect Clang Version + * Created by Evan Nemerson + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to + * the public domain worldwide. This software is distributed without + * any warranty. + * + * For details, see . + * SPDX-License-Identifier: CC0-1.0 + */ + +/* This file was originally part of SIMDe + * (). You're free to do with it as + * you please, but I do have a few small requests: + * + * * If you make improvements, please submit them back to SIMDe + * (at ) so others can + * benefit from them. + * * Please keep a link to SIMDe intact so people know where to submit + * improvements. + * * If you expose it publicly, please change the SIMDE_ prefix to + * something specific to your project. + * + * The version numbers clang exposes (in the ___clang_major__, + * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. + * Vendors such as Apple will define these values to their version + * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but + * __clang_major__ and __clang_minor__ are defined to 4 and 0 + * respectively, instead of 3 and 1. + * + * The solution is *usually* to use clang's feature detection macros + * () + * to determine if the feature you're interested in is available. This + * generally works well, and it should probably be the first thing you + * try. Unfortunately, it's not possible to check for everything. In + * particular, compiler bugs. + * + * This file just uses the feature checking macros to detect features + * added in specific versions of clang to identify which version of + * clang the compiler is based on. + * + * Right now it only goes back to 3.6, but I'm happy to accept patches + * to go back further. And, of course, newer versions are welcome if + * they're not already present, and if you find a way to detect a point + * release that would be great, too! + */ + +#if !defined(SIMDE_DETECT_CLANG_H) +#define SIMDE_DETECT_CLANG_H 1 + +/* Attempt to detect the upstream clang version number. I usually only + * worry about major version numbers (at least for 4.0+), but if you + * need more resolution I'm happy to accept patches that are able to + * detect minor versions as well. That said, you'll probably have a + * hard time with detection since AFAIK most minor releases don't add + * anything we can detect. Updated based on + * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 + * - would welcome patches/updates there as well. + */ + +#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) +# if __has_attribute(nouwtable) // no new warnings in 16.0 +# define SIMDE_DETECT_CLANG_VERSION 160000 +# elif __has_warning("-Warray-parameter") +# define SIMDE_DETECT_CLANG_VERSION 150000 +# elif __has_warning("-Wbitwise-instead-of-logical") +# define SIMDE_DETECT_CLANG_VERSION 140000 +# elif __has_warning("-Wwaix-compat") +# define SIMDE_DETECT_CLANG_VERSION 130000 +# elif __has_warning("-Wformat-insufficient-args") +# define SIMDE_DETECT_CLANG_VERSION 120000 +# elif __has_warning("-Wimplicit-const-int-float-conversion") +# define SIMDE_DETECT_CLANG_VERSION 110000 +# elif __has_warning("-Wmisleading-indentation") +# define SIMDE_DETECT_CLANG_VERSION 100000 +# elif defined(__FILE_NAME__) +# define SIMDE_DETECT_CLANG_VERSION 90000 +# elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) +# define SIMDE_DETECT_CLANG_VERSION 80000 +// For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently +// based on Clang 7, but does not support the warning we test. +// See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and +// https://trac.macports.org/wiki/XcodeVersionInfo. +# elif __has_warning("-Wc++98-compat-extra-semi") || \ + (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) +# define SIMDE_DETECT_CLANG_VERSION 70000 +# elif __has_warning("-Wpragma-pack") +# define SIMDE_DETECT_CLANG_VERSION 60000 +# elif __has_warning("-Wbitfield-enum-conversion") +# define SIMDE_DETECT_CLANG_VERSION 50000 +# elif __has_attribute(diagnose_if) +# define SIMDE_DETECT_CLANG_VERSION 40000 +# elif __has_warning("-Wcomma") +# define SIMDE_DETECT_CLANG_VERSION 39000 +# elif __has_warning("-Wdouble-promotion") +# define SIMDE_DETECT_CLANG_VERSION 38000 +# elif __has_warning("-Wshift-negative-value") +# define SIMDE_DETECT_CLANG_VERSION 37000 +# elif __has_warning("-Wambiguous-ellipsis") +# define SIMDE_DETECT_CLANG_VERSION 36000 +# else +# define SIMDE_DETECT_CLANG_VERSION 1 +# endif +#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ + +/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty + * straightforward; it returns true if the compiler is a derivative + * of clang >= the specified version. + * + * Since this file is often (primarily?) useful for working around bugs + * it is also helpful to have a macro which returns true if only if the + * compiler is a version of clang *older* than the specified version to + * make it a bit easier to ifdef regions to add code for older versions, + * such as pragmas to disable a specific warning. */ + +#if defined(SIMDE_DETECT_CLANG_VERSION) +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) +#else +# define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) +# define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) +#endif + +#endif /* !defined(SIMDE_DETECT_CLANG_H) */ +/* :: End simde/simde-detect-clang.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-arch.h :: */ +/* Architecture detection + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + * Different compilers define different preprocessor macros for the + * same architecture. This is an attempt to provide a single + * interface which is usable on any compiler. + * + * In general, a macro named SIMDE_ARCH_* is defined for each + * architecture the CPU supports. When there are multiple possible + * versions, we try to define the macro to the target version. For + * example, if you want to check for i586+, you could do something + * like: + * + * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) + * ... + * #endif + * + * You could also just check that SIMDE_ARCH_X86 >= 5 without checking + * if it's defined first, but some compilers may emit a warning about + * an undefined macro being used (e.g., GCC with -Wundef). + * + * This was originally created for SIMDe + * (hence the prefix), but this + * header has no dependencies and may be used anywhere. It is + * originally based on information from + * , though it + * has been enhanced with additional information. + * + * If you improve this file, or find a bug, please file the issue at + * . If you copy this into + * your project, even if you change the prefix, please keep the links + * to SIMDe intact so others know where to report issues, submit + * enhancements, and find the latest version. */ + +#if !defined(SIMDE_ARCH_H) +#define SIMDE_ARCH_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* Alpha + */ +#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) +# if defined(__alpha_ev6__) +# define SIMDE_ARCH_ALPHA 6 +# elif defined(__alpha_ev5__) +# define SIMDE_ARCH_ALPHA 5 +# elif defined(__alpha_ev4__) +# define SIMDE_ARCH_ALPHA 4 +# else +# define SIMDE_ARCH_ALPHA 1 +# endif +#endif +#if defined(SIMDE_ARCH_ALPHA) +# define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) +#else +# define SIMDE_ARCH_ALPHA_CHECK(version) (0) +#endif + +/* Atmel AVR + */ +#if defined(__AVR_ARCH__) +# define SIMDE_ARCH_AVR __AVR_ARCH__ +#endif + +/* AMD64 / x86_64 + */ +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +# if !defined(_M_ARM64EC) +# define SIMDE_ARCH_AMD64 1000 +# endif +#endif + +/* ARM + */ +#if defined(__ARM_ARCH) +# if __ARM_ARCH > 100 +# define SIMDE_ARCH_ARM (__ARM_ARCH) +# else +# define SIMDE_ARCH_ARM (__ARM_ARCH * 100) +# endif +#elif defined(_M_ARM) +# if _M_ARM > 100 +# define SIMDE_ARCH_ARM (_M_ARM) +# else +# define SIMDE_ARCH_ARM (_M_ARM * 100) +# endif +#elif defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_ARM 800 +#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) +# define SIMDE_ARCH_ARM 1 +#endif +#if defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) +#else +# define SIMDE_ARCH_ARM_CHECK(major, minor) (0) +#endif + +/* AArch64 + */ +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +# define SIMDE_ARCH_AARCH64 1000 +#endif +#if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) +#else +# define SIMDE_ARCH_AARCH64_CHECK(version) (0) +#endif + +/* ARM SIMD ISA extensions */ +#if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 +# elif defined(SIMDE_ARCH_ARM) +# define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM +# endif +#endif +#if defined(__ARM_FEATURE_SVE) +# define SIMDE_ARCH_ARM_SVE +#endif + +/* Blackfin + */ +#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) +# define SIMDE_ARCH_BLACKFIN 1 +#endif + +/* CRIS + */ +#if defined(__CRIS_arch_version) +# define SIMDE_ARCH_CRIS __CRIS_arch_version +#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) +# define SIMDE_ARCH_CRIS 1 +#endif + +/* Convex + */ +#if defined(__convex_c38__) +# define SIMDE_ARCH_CONVEX 38 +#elif defined(__convex_c34__) +# define SIMDE_ARCH_CONVEX 34 +#elif defined(__convex_c32__) +# define SIMDE_ARCH_CONVEX 32 +#elif defined(__convex_c2__) +# define SIMDE_ARCH_CONVEX 2 +#elif defined(__convex__) +# define SIMDE_ARCH_CONVEX 1 +#endif +#if defined(SIMDE_ARCH_CONVEX) +# define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) +#else +# define SIMDE_ARCH_CONVEX_CHECK(version) (0) +#endif + +/* Adapteva Epiphany + */ +#if defined(__epiphany__) +# define SIMDE_ARCH_EPIPHANY 1 +#endif + +/* Fujitsu FR-V + */ +#if defined(__frv__) +# define SIMDE_ARCH_FRV 1 +#endif + +/* H8/300 + */ +#if defined(__H8300__) +# define SIMDE_ARCH_H8300 +#endif + +/* Elbrus (8S, 8SV and successors) + */ +#if defined(__e2k__) +# define SIMDE_ARCH_E2K +#endif + +/* HP/PA / PA-RISC + */ +#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) +# define SIMDE_ARCH_HPPA 20 +#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) +# define SIMDE_ARCH_HPPA 11 +#elif defined(_PA_RISC1_0) +# define SIMDE_ARCH_HPPA 10 +#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) +# define SIMDE_ARCH_HPPA 1 +#endif +#if defined(SIMDE_ARCH_HPPA) +# define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) +#else +# define SIMDE_ARCH_HPPA_CHECK(version) (0) +#endif + +/* x86 + */ +#if defined(_M_IX86) +# define SIMDE_ARCH_X86 (_M_IX86 / 100) +#elif defined(__I86__) +# define SIMDE_ARCH_X86 __I86__ +#elif defined(i686) || defined(__i686) || defined(__i686__) +# define SIMDE_ARCH_X86 6 +#elif defined(i586) || defined(__i586) || defined(__i586__) +# define SIMDE_ARCH_X86 5 +#elif defined(i486) || defined(__i486) || defined(__i486__) +# define SIMDE_ARCH_X86 4 +#elif defined(i386) || defined(__i386) || defined(__i386__) +# define SIMDE_ARCH_X86 3 +#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) +# define SIMDE_ARCH_X86 3 +#endif +#if defined(SIMDE_ARCH_X86) +# define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) +#else +# define SIMDE_ARCH_X86_CHECK(version) (0) +#endif + +/* SIMD ISA extensions for x86/x86_64 and Elbrus */ +#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) +# if defined(_M_IX86_FP) +# define SIMDE_ARCH_X86_MMX +# if (_M_IX86_FP >= 1) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if (_M_IX86_FP >= 2) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# elif defined(_M_X64) +# define SIMDE_ARCH_X86_SSE 1 +# define SIMDE_ARCH_X86_SSE2 1 +# else +# if defined(__MMX__) +# define SIMDE_ARCH_X86_MMX 1 +# endif +# if defined(__SSE__) +# define SIMDE_ARCH_X86_SSE 1 +# endif +# if defined(__SSE2__) +# define SIMDE_ARCH_X86_SSE2 1 +# endif +# endif +# if defined(__SSE3__) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if defined(__SSSE3__) +# define SIMDE_ARCH_X86_SSSE3 1 +# endif +# if defined(__SSE4_1__) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if defined(__SSE4_2__) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# if defined(__XOP__) +# define SIMDE_ARCH_X86_XOP 1 +# endif +# if defined(__AVX__) +# define SIMDE_ARCH_X86_AVX 1 +# if !defined(SIMDE_ARCH_X86_SSE3) +# define SIMDE_ARCH_X86_SSE3 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_1 1 +# endif +# if !defined(SIMDE_ARCH_X86_SSE4_1) +# define SIMDE_ARCH_X86_SSE4_2 1 +# endif +# endif +# if defined(__AVX2__) +# define SIMDE_ARCH_X86_AVX2 1 +# if defined(_MSC_VER) +# define SIMDE_ARCH_X86_FMA 1 +# endif +# endif +# if defined(__FMA__) +# define SIMDE_ARCH_X86_FMA 1 +# if !defined(SIMDE_ARCH_X86_AVX) +# define SIMDE_ARCH_X86_AVX 1 +# endif +# endif +# if defined(__AVX512VP2INTERSECT__) +# define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 +# endif +# if defined(__AVX512BITALG__) +# define SIMDE_ARCH_X86_AVX512BITALG 1 +# endif +# if defined(__AVX512VPOPCNTDQ__) +# define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 +# endif +# if defined(__AVX512VBMI__) +# define SIMDE_ARCH_X86_AVX512VBMI 1 +# endif +# if defined(__AVX512VBMI2__) +# define SIMDE_ARCH_X86_AVX512VBMI2 1 +# endif +# if defined(__AVX512VNNI__) +# define SIMDE_ARCH_X86_AVX512VNNI 1 +# endif +# if defined(__AVX5124VNNIW__) +# define SIMDE_ARCH_X86_AVX5124VNNIW 1 +# endif +# if defined(__AVX512BW__) +# define SIMDE_ARCH_X86_AVX512BW 1 +# endif +# if defined(__AVX512BF16__) +# define SIMDE_ARCH_X86_AVX512BF16 1 +# endif +# if defined(__AVX512CD__) +# define SIMDE_ARCH_X86_AVX512CD 1 +# endif +# if defined(__AVX512DQ__) +# define SIMDE_ARCH_X86_AVX512DQ 1 +# endif +# if defined(__AVX512F__) +# define SIMDE_ARCH_X86_AVX512F 1 +# endif +# if defined(__AVX512VL__) +# define SIMDE_ARCH_X86_AVX512VL 1 +# endif +# if defined(__GFNI__) +# define SIMDE_ARCH_X86_GFNI 1 +# endif +# if defined(__PCLMUL__) +# define SIMDE_ARCH_X86_PCLMUL 1 +# endif +# if defined(__VPCLMULQDQ__) +# define SIMDE_ARCH_X86_VPCLMULQDQ 1 +# endif +# if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) +# define SIMDE_ARCH_X86_F16C 1 +# endif +#endif + +/* Itanium + */ +#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) +# define SIMDE_ARCH_IA64 1 +#endif + +/* Renesas M32R + */ +#if defined(__m32r__) || defined(__M32R__) +# define SIMDE_ARCH_M32R +#endif + +/* Motorola 68000 + */ +#if defined(__mc68060__) || defined(__MC68060__) +# define SIMDE_ARCH_M68K 68060 +#elif defined(__mc68040__) || defined(__MC68040__) +# define SIMDE_ARCH_M68K 68040 +#elif defined(__mc68030__) || defined(__MC68030__) +# define SIMDE_ARCH_M68K 68030 +#elif defined(__mc68020__) || defined(__MC68020__) +# define SIMDE_ARCH_M68K 68020 +#elif defined(__mc68010__) || defined(__MC68010__) +# define SIMDE_ARCH_M68K 68010 +#elif defined(__mc68000__) || defined(__MC68000__) +# define SIMDE_ARCH_M68K 68000 +#endif +#if defined(SIMDE_ARCH_M68K) +# define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) +#else +# define SIMDE_ARCH_M68K_CHECK(version) (0) +#endif + +/* Xilinx MicroBlaze + */ +#if defined(__MICROBLAZE__) || defined(__microblaze__) +# define SIMDE_ARCH_MICROBLAZE +#endif + +/* MIPS + */ +#if defined(_MIPS_ISA_MIPS64R2) +# define SIMDE_ARCH_MIPS 642 +#elif defined(_MIPS_ISA_MIPS64) +# define SIMDE_ARCH_MIPS 640 +#elif defined(_MIPS_ISA_MIPS32R2) +# define SIMDE_ARCH_MIPS 322 +#elif defined(_MIPS_ISA_MIPS32) +# define SIMDE_ARCH_MIPS 320 +#elif defined(_MIPS_ISA_MIPS4) +# define SIMDE_ARCH_MIPS 4 +#elif defined(_MIPS_ISA_MIPS3) +# define SIMDE_ARCH_MIPS 3 +#elif defined(_MIPS_ISA_MIPS2) +# define SIMDE_ARCH_MIPS 2 +#elif defined(_MIPS_ISA_MIPS1) +# define SIMDE_ARCH_MIPS 1 +#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) +# define SIMDE_ARCH_MIPS 1 +#endif +#if defined(SIMDE_ARCH_MIPS) +# define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) +#else +# define SIMDE_ARCH_MIPS_CHECK(version) (0) +#endif + +#if defined(__mips_loongson_mmi) +# define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 +#endif + +#if defined(__mips_msa) +# define SIMDE_ARCH_MIPS_MSA 1 +#endif + +/* Matsushita MN10300 + */ +#if defined(__MN10300__) || defined(__mn10300__) +# define SIMDE_ARCH_MN10300 1 +#endif + +/* POWER + */ +#if defined(_M_PPC) +# define SIMDE_ARCH_POWER _M_PPC +#elif defined(_ARCH_PWR9) +# define SIMDE_ARCH_POWER 900 +#elif defined(_ARCH_PWR8) +# define SIMDE_ARCH_POWER 800 +#elif defined(_ARCH_PWR7) +# define SIMDE_ARCH_POWER 700 +#elif defined(_ARCH_PWR6) +# define SIMDE_ARCH_POWER 600 +#elif defined(_ARCH_PWR5) +# define SIMDE_ARCH_POWER 500 +#elif defined(_ARCH_PWR4) +# define SIMDE_ARCH_POWER 400 +#elif defined(_ARCH_440) || defined(__ppc440__) +# define SIMDE_ARCH_POWER 440 +#elif defined(_ARCH_450) || defined(__ppc450__) +# define SIMDE_ARCH_POWER 450 +#elif defined(_ARCH_601) || defined(__ppc601__) +# define SIMDE_ARCH_POWER 601 +#elif defined(_ARCH_603) || defined(__ppc603__) +# define SIMDE_ARCH_POWER 603 +#elif defined(_ARCH_604) || defined(__ppc604__) +# define SIMDE_ARCH_POWER 604 +#elif defined(_ARCH_605) || defined(__ppc605__) +# define SIMDE_ARCH_POWER 605 +#elif defined(_ARCH_620) || defined(__ppc620__) +# define SIMDE_ARCH_POWER 620 +#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) +# define SIMDE_ARCH_POWER 1 +#endif +#if defined(SIMDE_ARCH_POWER) + #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_CHECK(version) (0) +#endif + +#if defined(__ALTIVEC__) +# define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) +#else + #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) +#endif + +/* SPARC + */ +#if defined(__sparc_v9__) || defined(__sparcv9) +# define SIMDE_ARCH_SPARC 9 +#elif defined(__sparc_v8__) || defined(__sparcv8) +# define SIMDE_ARCH_SPARC 8 +#elif defined(__sparc_v7__) || defined(__sparcv7) +# define SIMDE_ARCH_SPARC 7 +#elif defined(__sparc_v6__) || defined(__sparcv6) +# define SIMDE_ARCH_SPARC 6 +#elif defined(__sparc_v5__) || defined(__sparcv5) +# define SIMDE_ARCH_SPARC 5 +#elif defined(__sparc_v4__) || defined(__sparcv4) +# define SIMDE_ARCH_SPARC 4 +#elif defined(__sparc_v3__) || defined(__sparcv3) +# define SIMDE_ARCH_SPARC 3 +#elif defined(__sparc_v2__) || defined(__sparcv2) +# define SIMDE_ARCH_SPARC 2 +#elif defined(__sparc_v1__) || defined(__sparcv1) +# define SIMDE_ARCH_SPARC 1 +#elif defined(__sparc__) || defined(__sparc) +# define SIMDE_ARCH_SPARC 1 +#endif +#if defined(SIMDE_ARCH_SPARC) + #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) +#else + #define SIMDE_ARCH_SPARC_CHECK(version) (0) +#endif + +/* SuperH + */ +#if defined(__sh5__) || defined(__SH5__) +# define SIMDE_ARCH_SUPERH 5 +#elif defined(__sh4__) || defined(__SH4__) +# define SIMDE_ARCH_SUPERH 4 +#elif defined(__sh3__) || defined(__SH3__) +# define SIMDE_ARCH_SUPERH 3 +#elif defined(__sh2__) || defined(__SH2__) +# define SIMDE_ARCH_SUPERH 2 +#elif defined(__sh1__) || defined(__SH1__) +# define SIMDE_ARCH_SUPERH 1 +#elif defined(__sh__) || defined(__SH__) +# define SIMDE_ARCH_SUPERH 1 +#endif + +/* IBM System z + */ +#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) +# define SIMDE_ARCH_ZARCH __ARCH__ +#endif +#if defined(SIMDE_ARCH_ZARCH) + #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) +#else + #define SIMDE_ARCH_ZARCH_CHECK(version) (0) +#endif + +#if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) + #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH +#endif + +/* TMS320 DSP + */ +#if defined(_TMS320C6740) || defined(__TMS320C6740__) +# define SIMDE_ARCH_TMS320 6740 +#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) +# define SIMDE_ARCH_TMS320 6701 +#elif defined(_TMS320C6700) || defined(__TMS320C6700__) +# define SIMDE_ARCH_TMS320 6700 +#elif defined(_TMS320C6600) || defined(__TMS320C6600__) +# define SIMDE_ARCH_TMS320 6600 +#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) +# define SIMDE_ARCH_TMS320 6401 +#elif defined(_TMS320C6400) || defined(__TMS320C6400__) +# define SIMDE_ARCH_TMS320 6400 +#elif defined(_TMS320C6200) || defined(__TMS320C6200__) +# define SIMDE_ARCH_TMS320 6200 +#elif defined(_TMS320C55X) || defined(__TMS320C55X__) +# define SIMDE_ARCH_TMS320 550 +#elif defined(_TMS320C54X) || defined(__TMS320C54X__) +# define SIMDE_ARCH_TMS320 540 +#elif defined(_TMS320C28X) || defined(__TMS320C28X__) +# define SIMDE_ARCH_TMS320 280 +#endif +#if defined(SIMDE_ARCH_TMS320) + #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) +#else + #define SIMDE_ARCH_TMS320_CHECK(version) (0) +#endif + +/* WebAssembly */ +#if defined(__wasm__) +# define SIMDE_ARCH_WASM 1 +#endif + +#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) +# define SIMDE_ARCH_WASM_SIMD128 +#endif + +/* Xtensa + */ +#if defined(__xtensa__) || defined(__XTENSA__) +# define SIMDE_ARCH_XTENSA 1 +#endif + +/* Availability of 16-bit floating-point arithmetic intrinsics */ +#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) +# define SIMDE_ARCH_ARM_NEON_FP16 +#endif + +/* LoongArch + */ +#if defined(__loongarch32) +# define SIMDE_ARCH_LOONGARCH 1 +#elif defined(__loongarch64) +# define SIMDE_ARCH_LOONGARCH 2 +#endif + +/* LSX: LoongArch 128-bits SIMD extension */ +#if defined(__loongarch_sx) +# define SIMDE_ARCH_LOONGARCH_LSX 1 +#endif + +/* LASX: LoongArch 256-bits SIMD extension */ +#if defined(__loongarch_asx) +# define SIMDE_ARCH_LOONGARCH_LASX 2 +#endif + +#endif /* !defined(SIMDE_ARCH_H) */ +/* :: End simde/simde-arch.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-features.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* simde-arch.h is used to determine which features are available according + to the compiler. However, we want to make it possible to forcibly enable + or disable APIs */ + +#if !defined(SIMDE_FEATURES_H) +#define SIMDE_FEATURES_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-diagnostic.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* SIMDe targets a very wide range of standards and compilers, and our + * goal is to compile cleanly even with extremely aggressive warnings + * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) + * treated as errors. + * + * While our preference is to resolve the underlying issue a given + * diagnostic is warning us about, sometimes that's not possible. + * Fixing a warning in one compiler may cause problems in another. + * Sometimes a warning doesn't really apply to us (false positives), + * and sometimes adhering to a warning would mean dropping a feature + * we *know* the compiler supports since we have tested specifically + * for the compiler or feature. + * + * When practical, warnings are only disabled for specific code. For + * a list of warnings which are enabled by default in all SIMDe code, + * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the + * warning stack when SIMDe is done parsing, so code which includes + * SIMDe is not deprived of these warnings. + */ + +#if !defined(SIMDE_DIAGNOSTIC_H) +#define SIMDE_DIAGNOSTIC_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* This is only to help us implement functions like _mm_undefined_ps. */ +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif +#if HEDLEY_HAS_WARNING("-Wuninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,2,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") +#elif HEDLEY_PGI_VERSION_CHECK(19,10,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") +#elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") +#elif \ + HEDLEY_TI_VERSION_CHECK(16,9,9) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) + #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) +#endif + +/* GCC emits a lot of "notes" about the ABI being different for things + * in newer versions of GCC. We don't really care because all our + * functions are inlined and don't generate ABI. */ +#if HEDLEY_GCC_VERSION_CHECK(7,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ +#endif + +/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() + * after each MMX function before any floating point instructions. + * Some compilers warn about functions which use MMX functions but + * don't call _mm_empty(). However, since SIMDe is implementyng the + * MMX API we shouldn't be calling _mm_empty(); we leave it to the + * caller to invoke simde_mm_empty(). */ +#if HEDLEY_INTEL_VERSION_CHECK(19,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") +#elif defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ +#endif + +/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they + * emit a diagnostic if you use #pragma simd instead of + * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to + * compile with -qopenmp or -qopenmp-simd and define + * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ +#if HEDLEY_INTEL_VERSION_CHECK(18,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ +#endif + +/* MSVC emits a diagnostic when we call a function (like + * simde_mm_set_epi32) while initializing a struct. We currently do + * this a *lot* in the tests. */ +#if \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ +#endif + +/* This warning needs a lot of work. It is triggered if all you do is + * pass the value to memcpy/__builtin_memcpy, or if you initialize a + * member of the union, even if that member takes up the entire union. + * Last tested with clang-10, hopefully things will improve in the + * future; if clang fixes this I'd love to enable it. */ +#if \ + HEDLEY_HAS_WARNING("-Wconditional-uninitialized") + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ +#endif + +/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which + * will is false. However, SIMDe uses these operations exclusively + * for things like _mm_cmpeq_ps, for which we really do want to check + * for equality (or inequality). + * + * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro + * which just wraps a check in some code do disable this diagnostic I'd + * be happy to accept it. */ +#if \ + HEDLEY_HAS_WARNING("-Wfloat-equal") || \ + HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ +#endif + +/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. + * If Hedley can't find an implementation it will preprocess to + * nothing, which means there will be a trailing semi-colon. */ +#if HEDLEY_HAS_WARNING("-Wextra-semi") + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") +#elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ +#endif + +/* We do use a few variadic macros, which technically aren't available + * until C99 and C++11, but every compiler I'm aware of has supported + * them for much longer. That said, usage is isolated to the test + * suite and compilers known to support them. */ +#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) + #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ + _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +#endif + +/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro + * before we can access certain SIMD intrinsics, but this diagnostic + * warns about it being a reserved name. It is a reserved name, but + * it's reserved for the compiler and we are using it to convey + * information to the compiler. + * + * This is also used when enabling native aliases since we don't get to + * choose the macro names. */ +#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#endif + +/* Similar to above; types like simde__m128i are reserved due to the + * double underscore, but we didn't choose them, Intel did. */ +#if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ +#endif + +/* clang 3.8 warns about the packed attribute being unnecessary when + * used in the _mm_loadu_* functions. That *may* be true for version + * 3.8, but for later versions it is crucial in order to make unaligned + * access safe. */ +#if HEDLEY_HAS_WARNING("-Wpacked") + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ +#endif + +/* Triggered when assigning a float to a double implicitly. We use + * explicit casts in SIMDe, this is only used in the test suite. */ +#if HEDLEY_HAS_WARNING("-Wdouble-promotion") + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ +#endif + +/* Several compilers treat conformant array parameters as VLAs. We + * test to make sure we're in C mode (C++ doesn't support CAPs), and + * that the version of the standard supports CAPs. We also reject + * some buggy compilers like MSVC (the logic is in Hedley if you want + * to take a look), but with certain warnings enabled some compilers + * still like to emit a diagnostic. */ +#if HEDLEY_HAS_WARNING("-Wvla") + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ +#endif + +/* If you add an unused attribute to a function and don't use it, clang + * may emit this. */ +#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpass-failed") + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wpadded") + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") +#elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ +#endif + +#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ +#endif + +#if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ +#endif + +#if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ +#endif + +/* clang will emit this warning when we use C99 extensions whan not in + * C99 mode, even though it does support this. In such cases we check + * the compiler and version first, so we know it's not a problem. */ +#if HEDLEY_HAS_WARNING("-Wc99-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ +#endif + +/* Similar problm as above; we rely on some basic C99 support, but clang + * has started warning obut this even in C17 mode with -Weverything. */ +#if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ +#endif + +/* https://github.com/simd-everywhere/simde/issues/277 */ +#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ +#endif + +/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS + * to silence, but you have to do that before including anything and + * that would require reordering includes. */ +#if defined(_MSC_VER) + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) +#else + #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ +#endif + +/* Some compilers, such as clang, may use `long long` for 64-bit + * integers, but `long long` triggers a diagnostic with + * -Wc++98-compat-pedantic which says 'long long' is incompatible with + * C++98. */ +#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") + #if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ + _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") + #else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ +#endif + +/* Some problem as above */ +#if HEDLEY_HAS_WARNING("-Wc++11-long-long") + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ +#endif + +/* emscripten emits this whenever stdin/stdout/stderr is used in a + * macro. */ +#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ +#endif + +/* Clang uses C11 generic selections to implement some AltiVec + * functions, which triggers this diagnostic when not compiling + * in C11 mode */ +#if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ +#endif + +/* Clang sometimes triggers this warning in macros in the AltiVec and + * NEON headers, or due to missing functions. */ +#if HEDLEY_HAS_WARNING("-Wvector-conversion") + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") + /* For NEON, the situation with -Wvector-conversion in clang < 10 is + * bad enough that we just disable the warning altogether. On x86, + * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ + #if \ + (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ + SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + #endif +#else + #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ +#endif +#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ +#endif + +/* Prior to 5.0, clang didn't support disabling diagnostics in + * statement exprs. As a result, some macros we use don't + * properly silence warnings. */ +#if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ +#endif + +/* SLEEF triggers this a *lot* in their headers */ +#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") +#elif HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ +#endif + +/* GCC emits this under some circumstances when using __int128 */ +#if HEDLEY_GCC_VERSION_CHECK(4,8,0) + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +#endif + +/* MSVC doesn't like (__assume(0), code) and will warn about code being + * unreachable, but we want it there because not all compilers + * understand the unreachable macro and will complain if it is missing. + * I'm planning on adding a new macro to Hedley to handle this a bit + * more elegantly, but until then... */ +#if defined(HEDLEY_MSVC_VERSION) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) +#elif defined(__clang__) + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +#endif + +/* This is a false positive from GCC in a few places. */ +#if HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +#else + #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ +#endif + +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ +#else + #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ +#endif + +/* Some native functions on E2K with instruction set < v6 are declared + * as deprecated due to inefficiency. Still they are more efficient + * than SIMDe implementation. So we're using them, and switching off + * these deprecation warnings. */ +#if defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") +#else +# define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS +# define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS +#endif + +#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ + HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ + SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ + SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ + SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ + SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ + SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ + SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ + SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ + SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ + SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ + SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ + SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ + SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ + +#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ +/* :: End simde/simde-diagnostic.h :: */ + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SVML) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) + #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) + #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BITALG) + #define SIMDE_X86_AVX512BITALG_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI) + #define SIMDE_X86_AVX512VBMI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VBMI2) + #define SIMDE_X86_AVX512VBMI2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VNNI) + #define SIMDE_X86_AVX512VNNI_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) + #define SIMDE_X86_AVX5124VNNIW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512CD) + #define SIMDE_X86_AVX512CD_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512DQ) + #define SIMDE_X86_AVX512DQ_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512VL) + #define SIMDE_X86_AVX512VL_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BW) + #define SIMDE_X86_AVX512BW_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512BF16) + #define SIMDE_X86_AVX512BF16_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX512F) + #define SIMDE_X86_AVX512F_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_NATIVE +#endif + +#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_FMA) + #define SIMDE_X86_FMA_NATIVE + #endif +#endif +#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX2) + #define SIMDE_X86_AVX2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_NATIVE +#endif + +#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_AVX) + #define SIMDE_X86_AVX_NATIVE + #endif +#endif +#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_XOP) + #define SIMDE_X86_XOP_NATIVE + #endif +#endif +#if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_2) + #define SIMDE_X86_SSE4_2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE4_1) + #define SIMDE_X86_SSE4_1_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSSE3) + #define SIMDE_X86_SSSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE3) + #define SIMDE_X86_SSE3_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE2) + #define SIMDE_X86_SSE2_NATIVE + #endif +#endif +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_NATIVE +#endif + +#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_SSE) + #define SIMDE_X86_SSE_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_MMX) + #define SIMDE_X86_MMX_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_GFNI) + #define SIMDE_X86_GFNI_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_PCLMUL) + #define SIMDE_X86_PCLMUL_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) + #define SIMDE_X86_VPCLMULQDQ_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_X86_F16C) + #define SIMDE_X86_F16C_NATIVE + #endif +#endif + +#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(__INTEL_COMPILER) + #define SIMDE_X86_SVML_NATIVE + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(push) + #pragma warning(disable:4799) +#endif + +#if \ + defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE4_1_NATIVE) + #include +#elif defined(SIMDE_X86_SSSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE3_NATIVE) + #include +#elif defined(SIMDE_X86_SSE2_NATIVE) + #include +#elif defined(SIMDE_X86_SSE_NATIVE) + #include +#elif defined(SIMDE_X86_MMX_NATIVE) + #include +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(_MSC_VER) + #include + #else + #include + #endif +#endif + +#if defined(HEDLEY_MSVC_VERSION) + #pragma warning(pop) +#endif + +#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) + #define SIMDE_ARM_NEON_A64V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) + #define SIMDE_ARM_NEON_A32V8_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_NATIVE +#endif + +#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) + #define SIMDE_ARM_NEON_A32V7_NATIVE + #endif +#endif +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + #include + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_ARM_SVE) + #define SIMDE_ARM_SVE_NATIVE + #include + #endif +#endif + +#if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_WASM_SIMD128) + #define SIMDE_WASM_SIMD128_NATIVE + #endif +#endif +#if defined(SIMDE_WASM_SIMD128_NATIVE) + #include +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) + #define SIMDE_POWER_ALTIVEC_P9_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) + #define SIMDE_POWER_ALTIVEC_P8_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) + #define SIMDE_POWER_ALTIVEC_P7_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) + #define SIMDE_POWER_ALTIVEC_P6_NATIVE + #endif +#endif +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE +#endif + +#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) + #define SIMDE_POWER_ALTIVEC_P5_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_15_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_14_NATIVE + #endif +#endif + +#if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) + #define SIMDE_ZARCH_ZVECTOR_13_NATIVE + #endif +#endif + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + /* AltiVec conflicts with lots of stuff. The bool keyword conflicts + * with the bool keyword in C++ and the bool macro in C99+ (defined + * in stdbool.h). The vector keyword conflicts with std::vector in + * C++ if you are `using std;`. + * + * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` + * instead, but altivec.h will unconditionally define + * `vector`/`bool`/`pixel` so we need to work around that. + * + * Unfortunately this means that if your code uses AltiVec directly + * it may break. If this is the case you'll want to define + * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even + * better, port your code to use the double-underscore versions. */ + #if defined(bool) + #undef bool + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #include + + #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #if defined(vector) + #undef vector + #endif + #if defined(pixel) + #undef pixel + #endif + #if defined(bool) + #undef bool + #endif + #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #include + #endif + + /* Use these intsead of vector/pixel/bool in SIMDe. */ + #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T + #define SIMDE_POWER_ALTIVEC_PIXEL __pixel + #define SIMDE_POWER_ALTIVEC_BOOL __bool + + /* Re-define bool if we're using stdbool.h */ + #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) + #define bool _Bool + #endif +#endif + +#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) + #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_MIPS_MSA) + #define SIMDE_MIPS_MSA_NATIVE 1 + #endif +#endif +#if defined(SIMDE_MIPS_MSA_NATIVE) + #include +#endif + +/* This is used to determine whether or not to fall back on a vector + * function in an earlier ISA extensions, as well as whether + * we expected any attempts at vectorization to be fruitful or if we + * expect to always be running serial code. + * + * Note that, for some architectures (okay, *one* architecture) there + * can be a split where some types are supported for one vector length + * but others only for a shorter length. Therefore, it is possible to + * provide separate values for float/int/double types. */ + +#if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (512) + #elif defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (256) + #elif defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) + #elif \ + defined(SIMDE_X86_SSE2_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ + defined(SIMDE_WASM_SIMD128_NATIVE) || \ + defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ + defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ + defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_NATURAL_VECTOR_SIZE (128) + #elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) + #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) + #endif + + #if !defined(SIMDE_NATURAL_VECTOR_SIZE) + #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE + #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE + #else + #define SIMDE_NATURAL_VECTOR_SIZE (0) + #endif + #endif + + #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) + #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) + #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif + #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) + #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #endif +#endif + +#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) +#define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) + +/* Native aliases */ +#if defined(SIMDE_ENABLE_NATIVE_ALIASES) + #if !defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE2_NATIVE) + #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE3_NATIVE) + #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSSE3_NATIVE) + #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_1_NATIVE) + #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX_NATIVE) + #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX2_NATIVE) + #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_FMA_NATIVE) + #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512F_NATIVE) + #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VL_NATIVE) + #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) + #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) + #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BW_NATIVE) + #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) + #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) + #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BF16_NATIVE) + #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) + #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) + #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512DQ_NATIVE) + #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_AVX512CD_NATIVE) + #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_GFNI_NATIVE) + #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_PCLMUL_NATIVE) + #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) + #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_X86_F16C_NATIVE) + #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_ARM_SVE_NATIVE) + #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES + #endif + + #if !defined(SIMDE_WASM_SIMD128_NATIVE) + #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES + #endif +#endif + +/* Are floating point values stored using IEEE 754? Knowing + * this at during preprocessing is a bit tricky, mostly because what + * we're curious about is how values are stored and not whether the + * implementation is fully conformant in terms of rounding, NaN + * handling, etc. + * + * For example, if you use -ffast-math or -Ofast on + * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 + * support is not advertised (by defining __STDC_IEC_559__). + * + * However, what we care about is whether it is safe to assume that + * floating point values are stored in IEEE 754 format, in which case + * we can provide faster implementations of some functions. + * + * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- + * so we just assume IEEE 754 for now. There is a test which verifies + * this, if that test fails sowewhere please let us know and we'll add + * an exception for that platform. Meanwhile, you can define + * SIMDE_NO_IEEE754_STORAGE. */ +#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) + #define SIMDE_IEEE754_STORAGE +#endif + +#if defined(SIMDE_ARCH_ARM_NEON_FP16) + #define SIMDE_ARM_NEON_FP16 +#endif + +#if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LASX) + #define SIMDE_LOONGARCH_LASX_NATIVE + #endif +#endif + +#if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) + #if defined(SIMDE_ARCH_LOONGARCH_LSX) + #define SIMDE_LOONGARCH_LSX_NATIVE + #endif +#endif + +#if defined(SIMDE_LOONGARCH_LASX_NATIVE) + #include +#endif +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + #include +#endif + +#endif /* !defined(SIMDE_FEATURES_H) */ +/* :: End simde/simde-features.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-math.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* Attempt to find math functions. Functions may be in , + * , compiler built-ins/intrinsics, or platform/architecture + * specific headers. In some cases, especially those not built in to + * libm, we may need to define our own implementations. */ + +#if !defined(SIMDE_MATH_H) +#define SIMDE_MATH_H 1 + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +#include +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +/* SLEEF support + * https://sleef.org/ + * + * If you include prior to including SIMDe, SIMDe will use + * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to + * including SIMDe to force the issue. + * + * Note that SLEEF does requires linking to libsleef. + * + * By default, SIMDe will use the 1 ULP functions, but if you use + * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is + * only the case for the simde_math_* functions; for code in other + * SIMDe headers which calls SLEEF directly we may use functions with + * greater error if the API we're implementing is less precise (for + * example, SVML guarantees 4 ULP, so we will generally use the 3.5 + * ULP functions from SLEEF). */ +#if !defined(SIMDE_MATH_SLEEF_DISABLE) + #if defined(__SLEEF_H__) + #define SIMDE_MATH_SLEEF_ENABLE + #endif +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ + #include + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) + #if defined(SLEEF_VERSION_MAJOR) + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) + #endif +#else + #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) +#endif + +#if defined(__has_builtin) + #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) +#elif \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(4,4,0) + #define SIMDE_MATH_BUILTIN_LIBM(func) (1) +#else + #define SIMDE_MATH_BUILTIN_LIBM(func) (0) +#endif + +#if defined(HUGE_VAL) + /* Looks like or has already been included. */ + + /* The math.h from libc++ (yes, the C header from the C++ standard + * library) will define an isnan function, but not an isnan macro + * like the C standard requires. So we detect the header guards + * macro libc++ uses. */ + #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) + #define SIMDE_MATH_HAVE_MATH_H + #elif defined(__cplusplus) + #define SIMDE_MATH_HAVE_CMATH + #endif +#elif defined(__has_include) + #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() + #define SIMDE_MATH_HAVE_CMATH + #include + #elif __has_include() + #define SIMDE_MATH_HAVE_MATH_H + #include + #elif !defined(SIMDE_MATH_NO_LIBM) + #define SIMDE_MATH_NO_LIBM + #endif +#elif !defined(SIMDE_MATH_NO_LIBM) + #if defined(__cplusplus) && (__cplusplus >= 201103L) + #define SIMDE_MATH_HAVE_CMATH + HEDLEY_DIAGNOSTIC_PUSH + #if defined(HEDLEY_MSVC_VERSION) + /* VS 14 emits this diagnostic about noexcept being used on a + * function, which we can't do anything about. */ + #pragma warning(disable:4996) + #endif + #include + HEDLEY_DIAGNOSTIC_POP + #else + #define SIMDE_MATH_HAVE_MATH_H + #include + #endif +#endif + +#if !defined(SIMDE_MATH_INFINITY) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_INFINITY (__builtin_inf()) + #elif defined(INFINITY) + #define SIMDE_MATH_INFINITY INFINITY + #endif +#endif + +#if !defined(SIMDE_INFINITYF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_inff) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_INFINITYF (__builtin_inff()) + #elif defined(INFINITYF) + #define SIMDE_MATH_INFINITYF INFINITYF + #elif defined(SIMDE_MATH_INFINITY) + #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) + #endif +#endif + +#if !defined(SIMDE_MATH_NAN) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nan) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) + #define SIMDE_MATH_NAN (__builtin_nan("")) + #elif defined(NAN) + #define SIMDE_MATH_NAN NAN + #endif +#endif + +#if !defined(SIMDE_NANF) + #if \ + HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) + #define SIMDE_MATH_NANF (__builtin_nanf("")) + #elif defined(NANF) + #define SIMDE_MATH_NANF NANF + #elif defined(SIMDE_MATH_NAN) + #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) + #endif +#endif + +#if !defined(SIMDE_MATH_PI) + #if defined(M_PI) + #define SIMDE_MATH_PI M_PI + #else + #define SIMDE_MATH_PI 3.14159265358979323846 + #endif +#endif + +#if !defined(SIMDE_MATH_PIF) + #if defined(M_PI) + #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) + #else + #define SIMDE_MATH_PIF 3.14159265358979323846f + #endif +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180) + #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 +#endif + +#if !defined(SIMDE_MATH_PI_OVER_180F) + #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f +#endif + +#if !defined(SIMDE_MATH_180_OVER_PI) + #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 +#endif + +#if !defined(SIMDE_MATH_180_OVER_PIF) + #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f +#endif + +#if !defined(SIMDE_MATH_FLT_MIN) + #if defined(__FLT_MIN__) + #define SIMDE_MATH_FLT_MIN __FLT_MIN__ + #else + #if !defined(FLT_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MIN FLT_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_FLT_MAX) + #if defined(__FLT_MAX__) + #define SIMDE_MATH_FLT_MAX __FLT_MAX__ + #else + #if !defined(FLT_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_FLT_MAX FLT_MAX + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MIN) + #if defined(__DBL_MIN__) + #define SIMDE_MATH_DBL_MIN __DBL_MIN__ + #else + #if !defined(DBL_MIN) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MIN DBL_MIN + #endif +#endif + +#if !defined(SIMDE_MATH_DBL_MAX) + #if defined(__DBL_MAX__) + #define SIMDE_MATH_DBL_MAX __DBL_MAX__ + #else + #if !defined(DBL_MAX) + #if defined(__cplusplus) + #include + #else + #include + #endif + #endif + #define SIMDE_MATH_DBL_MAX DBL_MAX + #endif +#endif + +/*** Classification macros from C99 ***/ + +#if !defined(simde_math_isinf) + #if SIMDE_MATH_BUILTIN_LIBM(isinf) + #define simde_math_isinf(v) __builtin_isinf(v) + #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isinf(v) isinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinf(v) std::isinf(v) + #endif +#endif + +#if !defined(simde_math_isinff) + #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define simde_math_isinff(v) __builtin_isinff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isinff(v) std::isinf(v) + #elif defined(simde_math_isinf) + #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnan) + #if SIMDE_MATH_BUILTIN_LIBM(isnan) + #define simde_math_isnan(v) __builtin_isnan(v) + #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnan(v) isnan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnan(v) std::isnan(v) + #endif +#endif + +#if !defined(simde_math_isnanf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) + /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ + #define simde_math_isnanf(v) __builtin_isnanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnanf(v) std::isnan(v) + #elif defined(simde_math_isnan) + #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) + #endif +#endif + +#if !defined(simde_math_isnormal) + #if SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormal(v) __builtin_isnormal(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormal(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormal(v) std::isnormal(v) + #endif +#endif + +#if !defined(simde_math_isnormalf) + #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) + #define simde_math_isnormalf(v) __builtin_isnormalf(v) + #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) + #define simde_math_isnormalf(v) __builtin_isnormal(v) + #elif defined(isnormalf) + #define simde_math_isnormalf(v) isnormalf(v) + #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_isnormalf(v) isnormal(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_isnormalf(v) std::isnormal(v) + #elif defined(simde_math_isnormal) + #define simde_math_isnormalf(v) simde_math_isnormal(v) + #endif +#endif + +#if !defined(simde_math_issubnormalf) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) + #endif +#endif + +#if !defined(simde_math_issubnormal) + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) + #elif defined(fpclassify) + #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) + #elif defined(SIMDE_IEEE754_STORAGE) + #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) + #endif +#endif + +#if defined(FP_NAN) + #define SIMDE_MATH_FP_NAN FP_NAN +#else + #define SIMDE_MATH_FP_NAN 0 +#endif +#if defined(FP_INFINITE) + #define SIMDE_MATH_FP_INFINITE FP_INFINITE +#else + #define SIMDE_MATH_FP_INFINITE 1 +#endif +#if defined(FP_ZERO) + #define SIMDE_MATH_FP_ZERO FP_ZERO +#else + #define SIMDE_MATH_FP_ZERO 2 +#endif +#if defined(FP_SUBNORMAL) + #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL +#else + #define SIMDE_MATH_FP_SUBNORMAL 3 +#endif +#if defined(FP_NORMAL) + #define SIMDE_MATH_FP_NORMAL FP_NORMAL +#else + #define SIMDE_MATH_FP_NORMAL 4 +#endif + +static HEDLEY_INLINE +int +simde_math_fpclassifyf(float v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0f) ? SIMDE_MATH_FP_ZERO : + simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +static HEDLEY_INLINE +int +simde_math_fpclassify(double v) { + #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) + return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); + #elif defined(fpclassify) + return fpclassify(v); + #else + return + simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : + (v == 0.0) ? SIMDE_MATH_FP_ZERO : + simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : + simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : + SIMDE_MATH_FP_SUBNORMAL; + #endif +} + +/*** Manipulation functions ***/ + +#if !defined(simde_math_nextafter) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafter(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafter(x, y) nextafter(x, y) + #endif +#endif + +#if !defined(simde_math_nextafterf) + #if \ + (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nextafterf(x, y) std::nextafter(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nextafterf(x, y) nextafterf(x, y) + #endif +#endif + +/*** Functions from C99 ***/ + +#if !defined(simde_math_abs) + #if SIMDE_MATH_BUILTIN_LIBM(abs) + #define simde_math_abs(v) __builtin_abs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_abs(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_abs(v) abs(v) + #endif +#endif + +#if !defined(simde_math_labs) + #if SIMDE_MATH_BUILTIN_LIBM(labs) + #define simde_math_labs(v) __builtin_labs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_labs(v) std::labs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_labs(v) labs(v) + #endif +#endif + +#if !defined(simde_math_llabs) + #if SIMDE_MATH_BUILTIN_LIBM(llabs) + #define simde_math_llabs(v) __builtin_llabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_llabs(v) std::llabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_llabs(v) llabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::abs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_acos) + #if SIMDE_MATH_BUILTIN_LIBM(acos) + #define simde_math_acos(v) __builtin_acos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acos(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acos(v) acos(v) + #endif +#endif + +#if !defined(simde_math_acosf) + #if SIMDE_MATH_BUILTIN_LIBM(acosf) + #define simde_math_acosf(v) __builtin_acosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosf(v) std::acos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosf(v) acosf(v) + #endif +#endif + +#if !defined(simde_math_acosh) + #if SIMDE_MATH_BUILTIN_LIBM(acosh) + #define simde_math_acosh(v) __builtin_acosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acosh(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acosh(v) acosh(v) + #endif +#endif + +#if !defined(simde_math_acoshf) + #if SIMDE_MATH_BUILTIN_LIBM(acoshf) + #define simde_math_acoshf(v) __builtin_acoshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_acoshf(v) std::acosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_acoshf(v) acoshf(v) + #endif +#endif + +#if !defined(simde_math_asin) + #if SIMDE_MATH_BUILTIN_LIBM(asin) + #define simde_math_asin(v) __builtin_asin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asin(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asin(v) asin(v) + #endif +#endif + +#if !defined(simde_math_asinf) + #if SIMDE_MATH_BUILTIN_LIBM(asinf) + #define simde_math_asinf(v) __builtin_asinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinf(v) std::asin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinf(v) asinf(v) + #endif +#endif + +#if !defined(simde_math_asinh) + #if SIMDE_MATH_BUILTIN_LIBM(asinh) + #define simde_math_asinh(v) __builtin_asinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinh(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinh(v) asinh(v) + #endif +#endif + +#if !defined(simde_math_asinhf) + #if SIMDE_MATH_BUILTIN_LIBM(asinhf) + #define simde_math_asinhf(v) __builtin_asinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_asinhf(v) std::asinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_asinhf(v) asinhf(v) + #endif +#endif + +#if !defined(simde_math_atan) + #if SIMDE_MATH_BUILTIN_LIBM(atan) + #define simde_math_atan(v) __builtin_atan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan(v) atan(v) + #endif +#endif + +#if !defined(simde_math_atan2) + #if SIMDE_MATH_BUILTIN_LIBM(atan2) + #define simde_math_atan2(y, x) __builtin_atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2(y, x) atan2(y, x) + #endif +#endif + +#if !defined(simde_math_atan2f) + #if SIMDE_MATH_BUILTIN_LIBM(atan2f) + #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atan2f(y, x) std::atan2(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atan2f(y, x) atan2f(y, x) + #endif +#endif + +#if !defined(simde_math_atanf) + #if SIMDE_MATH_BUILTIN_LIBM(atanf) + #define simde_math_atanf(v) __builtin_atanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanf(v) std::atan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanf(v) atanf(v) + #endif +#endif + +#if !defined(simde_math_atanh) + #if SIMDE_MATH_BUILTIN_LIBM(atanh) + #define simde_math_atanh(v) __builtin_atanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanh(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanh(v) atanh(v) + #endif +#endif + +#if !defined(simde_math_atanhf) + #if SIMDE_MATH_BUILTIN_LIBM(atanhf) + #define simde_math_atanhf(v) __builtin_atanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_atanhf(v) std::atanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_atanhf(v) atanhf(v) + #endif +#endif + +#if !defined(simde_math_cbrt) + #if SIMDE_MATH_BUILTIN_LIBM(cbrt) + #define simde_math_cbrt(v) __builtin_cbrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrt(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrt(v) cbrt(v) + #endif +#endif + +#if !defined(simde_math_cbrtf) + #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) + #define simde_math_cbrtf(v) __builtin_cbrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cbrtf(v) std::cbrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cbrtf(v) cbrtf(v) + #endif +#endif + +#if !defined(simde_math_ceil) + #if SIMDE_MATH_BUILTIN_LIBM(ceil) + #define simde_math_ceil(v) __builtin_ceil(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceil(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceil(v) ceil(v) + #endif +#endif + +#if !defined(simde_math_ceilf) + #if SIMDE_MATH_BUILTIN_LIBM(ceilf) + #define simde_math_ceilf(v) __builtin_ceilf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_ceilf(v) std::ceil(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_ceilf(v) ceilf(v) + #endif +#endif + +#if !defined(simde_math_copysign) + #if SIMDE_MATH_BUILTIN_LIBM(copysign) + #define simde_math_copysign(x, y) __builtin_copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysign(x, y) std::copysign(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysign(x, y) copysign(x, y) + #endif +#endif + +#if !defined(simde_math_copysignf) + #if SIMDE_MATH_BUILTIN_LIBM(copysignf) + #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_copysignf(x, y) std::copysignf(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_copysignf(x, y) copysignf(x, y) + #endif +#endif + +#if !defined(simde_math_signbit) + #if SIMDE_MATH_BUILTIN_LIBM(signbit) + #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) + #define simde_math_signbit(x) __builtin_signbit(x) + #else + #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) + #endif + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_signbit(x) std::signbit(x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_signbit(x) signbit(x) + #endif +#endif + +#if !defined(simde_math_cos) + #if SIMDE_MATH_BUILTIN_LIBM(cos) + #define simde_math_cos(v) __builtin_cos(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cos(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cos(v) cos(v) + #endif +#endif + +#if !defined(simde_math_cosf) + #if defined(SIMDE_MATH_SLEEF_ENABLE) + #if SIMDE_ACCURACY_PREFERENCE < 1 + #define simde_math_cosf(v) Sleef_cosf_u35(v) + #else + #define simde_math_cosf(v) Sleef_cosf_u10(v) + #endif + #elif SIMDE_MATH_BUILTIN_LIBM(cosf) + #define simde_math_cosf(v) __builtin_cosf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosf(v) std::cos(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosf(v) cosf(v) + #endif +#endif + +#if !defined(simde_math_cosh) + #if SIMDE_MATH_BUILTIN_LIBM(cosh) + #define simde_math_cosh(v) __builtin_cosh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_cosh(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_cosh(v) cosh(v) + #endif +#endif + +#if !defined(simde_math_coshf) + #if SIMDE_MATH_BUILTIN_LIBM(coshf) + #define simde_math_coshf(v) __builtin_coshf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_coshf(v) std::cosh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_coshf(v) coshf(v) + #endif +#endif + +#if !defined(simde_math_erf) + #if SIMDE_MATH_BUILTIN_LIBM(erf) + #define simde_math_erf(v) __builtin_erf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erf(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erf(v) erf(v) + #endif +#endif + +#if !defined(simde_math_erff) + #if SIMDE_MATH_BUILTIN_LIBM(erff) + #define simde_math_erff(v) __builtin_erff(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erff(v) std::erf(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erff(v) erff(v) + #endif +#endif + +#if !defined(simde_math_erfc) + #if SIMDE_MATH_BUILTIN_LIBM(erfc) + #define simde_math_erfc(v) __builtin_erfc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfc(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfc(v) erfc(v) + #endif +#endif + +#if !defined(simde_math_erfcf) + #if SIMDE_MATH_BUILTIN_LIBM(erfcf) + #define simde_math_erfcf(v) __builtin_erfcf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_erfcf(v) std::erfc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_erfcf(v) erfcf(v) + #endif +#endif + +#if !defined(simde_math_exp) + #if SIMDE_MATH_BUILTIN_LIBM(exp) + #define simde_math_exp(v) __builtin_exp(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp(v) exp(v) + #endif +#endif + +#if !defined(simde_math_expf) + #if SIMDE_MATH_BUILTIN_LIBM(expf) + #define simde_math_expf(v) __builtin_expf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expf(v) std::exp(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expf(v) expf(v) + #endif +#endif + +#if !defined(simde_math_expm1) + #if SIMDE_MATH_BUILTIN_LIBM(expm1) + #define simde_math_expm1(v) __builtin_expm1(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1(v) expm1(v) + #endif +#endif + +#if !defined(simde_math_expm1f) + #if SIMDE_MATH_BUILTIN_LIBM(expm1f) + #define simde_math_expm1f(v) __builtin_expm1f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_expm1f(v) std::expm1(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_expm1f(v) expm1f(v) + #endif +#endif + +#if !defined(simde_math_exp2) + #if SIMDE_MATH_BUILTIN_LIBM(exp2) + #define simde_math_exp2(v) __builtin_exp2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2(v) exp2(v) + #endif +#endif + +#if !defined(simde_math_exp2f) + #if SIMDE_MATH_BUILTIN_LIBM(exp2f) + #define simde_math_exp2f(v) __builtin_exp2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_exp2f(v) std::exp2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_exp2f(v) exp2f(v) + #endif +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10(v) __builtin_exp10(v) +#else +# define simde_math_exp10(v) pow(10.0, (v)) +#endif + +#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + # define simde_math_exp10f(v) __builtin_exp10f(v) +#else +# define simde_math_exp10f(v) powf(10.0f, (v)) +#endif + +#if !defined(simde_math_fabs) + #if SIMDE_MATH_BUILTIN_LIBM(fabs) + #define simde_math_fabs(v) __builtin_fabs(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabs(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabs(v) fabs(v) + #endif +#endif + +#if !defined(simde_math_fabsf) + #if SIMDE_MATH_BUILTIN_LIBM(fabsf) + #define simde_math_fabsf(v) __builtin_fabsf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fabsf(v) std::fabs(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fabsf(v) fabsf(v) + #endif +#endif + +#if !defined(simde_math_floor) + #if SIMDE_MATH_BUILTIN_LIBM(floor) + #define simde_math_floor(v) __builtin_floor(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floor(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floor(v) floor(v) + #endif +#endif + +#if !defined(simde_math_floorf) + #if SIMDE_MATH_BUILTIN_LIBM(floorf) + #define simde_math_floorf(v) __builtin_floorf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_floorf(v) std::floor(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_floorf(v) floorf(v) + #endif +#endif + +#if !defined(simde_math_fma) + #if SIMDE_MATH_BUILTIN_LIBM(fma) + #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fma(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fma(x, y, z) fma(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmaf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaf) + #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaf(x, y, z) std::fma(x, y, z) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaf(x, y, z) fmaf(x, y, z) + #endif +#endif + +#if !defined(simde_math_fmax) + #if SIMDE_MATH_BUILTIN_LIBM(fmax) + #define simde_math_fmax(x, y) __builtin_fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmax(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmax(x, y) fmax(x, y) + #endif +#endif + +#if !defined(simde_math_fmaxf) + #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) + #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_fmaxf(x, y) std::fmax(x, y) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_fmaxf(x, y) fmaxf(x, y) + #endif +#endif + +#if !defined(simde_math_hypot) + #if SIMDE_MATH_BUILTIN_LIBM(hypot) + #define simde_math_hypot(y, x) __builtin_hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypot(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypot(y, x) hypot(y, x) + #endif +#endif + +#if !defined(simde_math_hypotf) + #if SIMDE_MATH_BUILTIN_LIBM(hypotf) + #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_hypotf(y, x) std::hypot(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_hypotf(y, x) hypotf(y, x) + #endif +#endif + +#if !defined(simde_math_log) + #if SIMDE_MATH_BUILTIN_LIBM(log) + #define simde_math_log(v) __builtin_log(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log(v) log(v) + #endif +#endif + +#if !defined(simde_math_logf) + #if SIMDE_MATH_BUILTIN_LIBM(logf) + #define simde_math_logf(v) __builtin_logf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logf(v) std::log(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logf(v) logf(v) + #endif +#endif + +#if !defined(simde_math_logb) + #if SIMDE_MATH_BUILTIN_LIBM(logb) + #define simde_math_logb(v) __builtin_logb(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logb(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logb(v) logb(v) + #endif +#endif + +#if !defined(simde_math_logbf) + #if SIMDE_MATH_BUILTIN_LIBM(logbf) + #define simde_math_logbf(v) __builtin_logbf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_logbf(v) std::logb(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_logbf(v) logbf(v) + #endif +#endif + +#if !defined(simde_math_log1p) + #if SIMDE_MATH_BUILTIN_LIBM(log1p) + #define simde_math_log1p(v) __builtin_log1p(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1p(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1p(v) log1p(v) + #endif +#endif + +#if !defined(simde_math_log1pf) + #if SIMDE_MATH_BUILTIN_LIBM(log1pf) + #define simde_math_log1pf(v) __builtin_log1pf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log1pf(v) std::log1p(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log1pf(v) log1pf(v) + #endif +#endif + +#if !defined(simde_math_log2) + #if SIMDE_MATH_BUILTIN_LIBM(log2) + #define simde_math_log2(v) __builtin_log2(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2(v) log2(v) + #endif +#endif + +#if !defined(simde_math_log2f) + #if SIMDE_MATH_BUILTIN_LIBM(log2f) + #define simde_math_log2f(v) __builtin_log2f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log2f(v) std::log2(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log2f(v) log2f(v) + #endif +#endif + +#if !defined(simde_math_log10) + #if SIMDE_MATH_BUILTIN_LIBM(log10) + #define simde_math_log10(v) __builtin_log10(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10(v) log10(v) + #endif +#endif + +#if !defined(simde_math_log10f) + #if SIMDE_MATH_BUILTIN_LIBM(log10f) + #define simde_math_log10f(v) __builtin_log10f(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_log10f(v) std::log10(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_log10f(v) log10f(v) + #endif +#endif + +#if !defined(simde_math_modf) + #if SIMDE_MATH_BUILTIN_LIBM(modf) + #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modf(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modf(x, iptr) modf(x, iptr) + #endif +#endif + +#if !defined(simde_math_modff) + #if SIMDE_MATH_BUILTIN_LIBM(modff) + #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_modff(x, iptr) std::modf(x, iptr) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_modff(x, iptr) modff(x, iptr) + #endif +#endif + +#if !defined(simde_math_nearbyint) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) + #define simde_math_nearbyint(v) __builtin_nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyint(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyint(v) nearbyint(v) + #endif +#endif + +#if !defined(simde_math_nearbyintf) + #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) + #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_nearbyintf(v) std::nearbyint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_nearbyintf(v) nearbyintf(v) + #endif +#endif + +#if !defined(simde_math_pow) + #if SIMDE_MATH_BUILTIN_LIBM(pow) + #define simde_math_pow(y, x) __builtin_pow(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_pow(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_pow(y, x) pow(y, x) + #endif +#endif + +#if !defined(simde_math_powf) + #if SIMDE_MATH_BUILTIN_LIBM(powf) + #define simde_math_powf(y, x) __builtin_powf(y, x) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_powf(y, x) std::pow(y, x) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_powf(y, x) powf(y, x) + #endif +#endif + +#if !defined(simde_math_rint) + #if SIMDE_MATH_BUILTIN_LIBM(rint) + #define simde_math_rint(v) __builtin_rint(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rint(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rint(v) rint(v) + #endif +#endif + +#if !defined(simde_math_rintf) + #if SIMDE_MATH_BUILTIN_LIBM(rintf) + #define simde_math_rintf(v) __builtin_rintf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_rintf(v) std::rint(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_rintf(v) rintf(v) + #endif +#endif + +#if !defined(simde_math_round) + #if SIMDE_MATH_BUILTIN_LIBM(round) + #define simde_math_round(v) __builtin_round(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_round(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_round(v) round(v) + #endif +#endif + +#if !defined(simde_math_roundf) + #if SIMDE_MATH_BUILTIN_LIBM(roundf) + #define simde_math_roundf(v) __builtin_roundf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_roundf(v) std::round(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_roundf(v) roundf(v) + #endif +#endif + +#if !defined(simde_math_roundeven) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundeven(v) __builtin_roundeven(v) + #elif defined(simde_math_round) && defined(simde_math_fabs) + static HEDLEY_INLINE + double + simde_math_roundeven(double v) { + double rounded = simde_math_round(v); + double diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundeven simde_math_roundeven + #endif +#endif + +#if !defined(simde_math_roundevenf) + #if \ + (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ + HEDLEY_GCC_VERSION_CHECK(10,0,0) + #define simde_math_roundevenf(v) __builtin_roundevenf(v) + #elif defined(simde_math_roundf) && defined(simde_math_fabsf) + static HEDLEY_INLINE + float + simde_math_roundevenf(float v) { + float rounded = simde_math_roundf(v); + float diff = rounded - v; + if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { + rounded = v - diff; + } + return rounded; + } + #define simde_math_roundevenf simde_math_roundevenf + #endif +#endif + +#if !defined(simde_math_sin) + #if SIMDE_MATH_BUILTIN_LIBM(sin) + #define simde_math_sin(v) __builtin_sin(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sin(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sin(v) sin(v) + #endif +#endif + +#if !defined(simde_math_sinf) + #if SIMDE_MATH_BUILTIN_LIBM(sinf) + #define simde_math_sinf(v) __builtin_sinf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinf(v) std::sin(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinf(v) sinf(v) + #endif +#endif + +#if !defined(simde_math_sinh) + #if SIMDE_MATH_BUILTIN_LIBM(sinh) + #define simde_math_sinh(v) __builtin_sinh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinh(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinh(v) sinh(v) + #endif +#endif + +#if !defined(simde_math_sinhf) + #if SIMDE_MATH_BUILTIN_LIBM(sinhf) + #define simde_math_sinhf(v) __builtin_sinhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sinhf(v) std::sinh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sinhf(v) sinhf(v) + #endif +#endif + +#if !defined(simde_math_sqrt) + #if SIMDE_MATH_BUILTIN_LIBM(sqrt) + #define simde_math_sqrt(v) __builtin_sqrt(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrt(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrt(v) sqrt(v) + #endif +#endif + +#if !defined(simde_math_sqrtf) + #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) + #define simde_math_sqrtf(v) __builtin_sqrtf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_sqrtf(v) std::sqrt(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_sqrtf(v) sqrtf(v) + #endif +#endif + +#if !defined(simde_math_tan) + #if SIMDE_MATH_BUILTIN_LIBM(tan) + #define simde_math_tan(v) __builtin_tan(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tan(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tan(v) tan(v) + #endif +#endif + +#if !defined(simde_math_tanf) + #if SIMDE_MATH_BUILTIN_LIBM(tanf) + #define simde_math_tanf(v) __builtin_tanf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanf(v) std::tan(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanf(v) tanf(v) + #endif +#endif + +#if !defined(simde_math_tanh) + #if SIMDE_MATH_BUILTIN_LIBM(tanh) + #define simde_math_tanh(v) __builtin_tanh(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanh(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanh(v) tanh(v) + #endif +#endif + +#if !defined(simde_math_tanhf) + #if SIMDE_MATH_BUILTIN_LIBM(tanhf) + #define simde_math_tanhf(v) __builtin_tanhf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_tanhf(v) std::tanh(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_tanhf(v) tanhf(v) + #endif +#endif + +#if !defined(simde_math_trunc) + #if SIMDE_MATH_BUILTIN_LIBM(trunc) + #define simde_math_trunc(v) __builtin_trunc(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_trunc(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_trunc(v) trunc(v) + #endif +#endif + +#if !defined(simde_math_truncf) + #if SIMDE_MATH_BUILTIN_LIBM(truncf) + #define simde_math_truncf(v) __builtin_truncf(v) + #elif defined(SIMDE_MATH_HAVE_CMATH) + #define simde_math_truncf(v) std::trunc(v) + #elif defined(SIMDE_MATH_HAVE_MATH_H) + #define simde_math_truncf(v) truncf(v) + #endif +#endif + +/*** Comparison macros (which don't raise invalid errors) ***/ + +#if defined(isunordered) + #define simde_math_isunordered(x, y) isunordered(x, y) +#elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) + #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) +#else + static HEDLEY_INLINE + int simde_math_isunordered(double x, double y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunordered simde_math_isunordered + + static HEDLEY_INLINE + int simde_math_isunorderedf(float x, float y) { + return (x != y) && (x != x || y != y); + } + #define simde_math_isunorderedf simde_math_isunorderedf +#endif +#if !defined(simde_math_isunorderedf) + #define simde_math_isunorderedf simde_math_isunordered +#endif + +/*** Additional functions not in libm ***/ + +#if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) + static HEDLEY_INLINE + double + simde_math_cdfnorm(double x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const double a1 = 0.254829592; + static const double a2 = -0.284496736; + static const double a3 = 1.421413741; + static const double a4 = -1.453152027; + static const double a5 = 1.061405429; + static const double p = 0.3275911; + + const int sign = x < 0; + x = simde_math_fabs(x) / simde_math_sqrt(2.0); + + /* A&S formula 7.1.26 */ + double t = 1.0 / (1.0 + p * x); + double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); + + return 0.5 * (1.0 + (sign ? -y : y)); + } + #define simde_math_cdfnorm simde_math_cdfnorm +#endif + +#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) + static HEDLEY_INLINE + float + simde_math_cdfnormf(float x) { + /* https://www.johndcook.com/blog/cpp_phi/ + * Public Domain */ + static const float a1 = 0.254829592f; + static const float a2 = -0.284496736f; + static const float a3 = 1.421413741f; + static const float a4 = -1.453152027f; + static const float a5 = 1.061405429f; + static const float p = 0.3275911f; + + const int sign = x < 0; + x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); + + /* A&S formula 7.1.26 */ + float t = 1.0f / (1.0f + p * x); + float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); + + return 0.5f * (1.0f + (sign ? -y : y)); + } + #define simde_math_cdfnormf simde_math_cdfnormf +#endif + +#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) + /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ + static HEDLEY_INLINE + double + simde_math_cdfnorminv(double p) { + static const double a[] = { + -3.969683028665376e+01, + 2.209460984245205e+02, + -2.759285104469687e+02, + 1.383577518672690e+02, + -3.066479806614716e+01, + 2.506628277459239e+00 + }; + + static const double b[] = { + -5.447609879822406e+01, + 1.615858368580409e+02, + -1.556989798598866e+02, + 6.680131188771972e+01, + -1.328068155288572e+01 + }; + + static const double c[] = { + -7.784894002430293e-03, + -3.223964580411365e-01, + -2.400758277161838e+00, + -2.549732539343734e+00, + 4.374664141464968e+00, + 2.938163982698783e+00 + }; + + static const double d[] = { + 7.784695709041462e-03, + 3.224671290700398e-01, + 2.445134137142996e+00, + 3.754408661907416e+00 + }; + + static const double low = 0.02425; + static const double high = 0.97575; + double q, r; + + if (p < 0 || p > 1) { + return 0.0; + } else if (p == 0) { + return -SIMDE_MATH_INFINITY; + } else if (p == 1) { + return SIMDE_MATH_INFINITY; + } else if (p < low) { + q = simde_math_sqrt(-2.0 * simde_math_log(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } +} +#define simde_math_cdfnorminv simde_math_cdfnorminv +#endif + +#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_cdfnorminvf(float p) { + static const float a[] = { + -3.969683028665376e+01f, + 2.209460984245205e+02f, + -2.759285104469687e+02f, + 1.383577518672690e+02f, + -3.066479806614716e+01f, + 2.506628277459239e+00f + }; + static const float b[] = { + -5.447609879822406e+01f, + 1.615858368580409e+02f, + -1.556989798598866e+02f, + 6.680131188771972e+01f, + -1.328068155288572e+01f + }; + static const float c[] = { + -7.784894002430293e-03f, + -3.223964580411365e-01f, + -2.400758277161838e+00f, + -2.549732539343734e+00f, + 4.374664141464968e+00f, + 2.938163982698783e+00f + }; + static const float d[] = { + 7.784695709041462e-03f, + 3.224671290700398e-01f, + 2.445134137142996e+00f, + 3.754408661907416e+00f + }; + static const float low = 0.02425f; + static const float high = 0.97575f; + float q, r; + + if (p < 0 || p > 1) { + return 0.0f; + } else if (p == 0) { + return -SIMDE_MATH_INFINITYF; + } else if (p == 1) { + return SIMDE_MATH_INFINITYF; + } else if (p < low) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); + return + (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else if (p > high) { + q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); + return + -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / + (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); + } else { + q = p - 0.5f; + r = q * q; + return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * + q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); + } + } + #define simde_math_cdfnorminvf simde_math_cdfnorminvf +#endif + +#if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfinv(double x) { + /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c + * + * The original answer on SO uses a constant of 0.147, but in my + * testing 0.14829094707965850830078125 gives a lower average absolute error + * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). + * That said, if your goal is to minimize the *maximum* absolute + * error, 0.15449436008930206298828125 provides significantly better + * results; 0.0009250640869140625000000000 vs ~ 0.005. */ + double tt1, tt2, lnx; + double sgn = simde_math_copysign(1.0, x); + + x = (1.0 - x) * (1.0 + x); + lnx = simde_math_log(x); + + tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; + tt2 = (1.0 / 0.14829094707965850830078125) * lnx; + + return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); + } + #define simde_math_erfinv simde_math_erfinv +#endif + +#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfinvf(float x) { + float tt1, tt2, lnx; + float sgn = simde_math_copysignf(1.0f, x); + + x = (1.0f - x) * (1.0f + x); + lnx = simde_math_logf(x); + + tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; + tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; + + return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); + } + #define simde_math_erfinvf simde_math_erfinvf +#endif + +#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) + static HEDLEY_INLINE + double + simde_math_erfcinv(double x) { + if(x >= 0.0625 && x < 2.0) { + return simde_math_erfinv(1.0 - x); + } else if (x < 0.0625 && x >= 1.0e-100) { + static const double p[6] = { + 0.1550470003116, + 1.382719649631, + 0.690969348887, + -1.128081391617, + 0.680544246825, + -0.16444156791 + }; + static const double q[3] = { + 0.155024849822, + 1.385228141995, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { + static const double p[4] = { + 0.00980456202915, + 0.363667889171, + 0.97302949837, + -0.5374947401 + }; + static const double q[3] = { + 0.00980451277802, + 0.363699971544, + 1.000000000000 + }; + + const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (!simde_math_isnormal(x)) { + return SIMDE_MATH_INFINITY; + } else { + return -SIMDE_MATH_INFINITY; + } + } + + #define simde_math_erfcinv simde_math_erfcinv +#endif + +#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) + static HEDLEY_INLINE + float + simde_math_erfcinvf(float x) { + if(x >= 0.0625f && x < 2.0f) { + return simde_math_erfinvf(1.0f - x); + } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { + static const float p[6] = { + 0.1550470003116f, + 1.382719649631f, + 0.690969348887f, + -1.128081391617f, + 0.680544246825f + -0.164441567910f + }; + static const float q[3] = { + 0.155024849822f, + 1.385228141995f, + 1.000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / + (q[0] + t * (q[1] + t * (q[2]))); + } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { + static const float p[4] = { + 0.00980456202915f, + 0.36366788917100f, + 0.97302949837000f, + -0.5374947401000f + }; + static const float q[3] = { + 0.00980451277802f, + 0.36369997154400f, + 1.00000000000000f + }; + + const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); + return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / + (q[0] + t * (q[1] + t * (q[2]))); + } else { + return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; + } + } + + #define simde_math_erfcinvf simde_math_erfcinvf +#endif + +static HEDLEY_INLINE +double +simde_math_rad2deg(double radians) { + return radians * SIMDE_MATH_180_OVER_PI; +} + +static HEDLEY_INLINE +float +simde_math_rad2degf(float radians) { + return radians * SIMDE_MATH_180_OVER_PIF; +} + +static HEDLEY_INLINE +double +simde_math_deg2rad(double degrees) { + return degrees * SIMDE_MATH_PI_OVER_180; +} + +static HEDLEY_INLINE +float +simde_math_deg2radf(float degrees) { + return degrees * (SIMDE_MATH_PI_OVER_180F); +} + +/*** Saturated arithmetic ***/ + +static HEDLEY_INLINE +int8_t +simde_math_adds_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; + if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_adds_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; + if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_adds_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; + if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_adds_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ + b_; + + a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; + if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_adds_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddb_u8(a, b); + #else + uint8_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_adds_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddh_u16(a, b); + #else + uint16_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_adds_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqadds_u32(a, b); + #else + uint32_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_adds_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqaddd_u64(a, b); + #else + uint64_t r = a + b; + r |= -(r < a); + return r; + #endif +} + +static HEDLEY_INLINE +int8_t +simde_math_subs_i8(int8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_s8(a, b); + #else + uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); + uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); + uint8_t r_ = a_ - b_; + + a_ = (a_ >> 7) + INT8_MAX; + + if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int8_t, r_); + #endif +} + +static HEDLEY_INLINE +int16_t +simde_math_subs_i16(int16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_s16(a, b); + #else + uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); + uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); + uint16_t r_ = a_ - b_; + + a_ = (a_ >> 15) + INT16_MAX; + + if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int16_t, r_); + #endif +} + +static HEDLEY_INLINE +int32_t +simde_math_subs_i32(int32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_s32(a, b); + #else + uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); + uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); + uint32_t r_ = a_ - b_; + + a_ = (a_ >> 31) + INT32_MAX; + + if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int32_t, r_); + #endif +} + +static HEDLEY_INLINE +int64_t +simde_math_subs_i64(int64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_s64(a, b); + #else + uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); + uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); + uint64_t r_ = a_ - b_; + + a_ = (a_ >> 63) + INT64_MAX; + + if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { + r_ = a_; + } + + return HEDLEY_STATIC_CAST(int64_t, r_); + #endif +} + +static HEDLEY_INLINE +uint8_t +simde_math_subs_u8(uint8_t a, uint8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubb_u8(a, b); + #else + uint8_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint16_t +simde_math_subs_u16(uint16_t a, uint16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubh_u16(a, b); + #else + uint16_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint32_t +simde_math_subs_u32(uint32_t a, uint32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubs_u32(a, b); + #else + uint32_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +static HEDLEY_INLINE +uint64_t +simde_math_subs_u64(uint64_t a, uint64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vqsubd_u64(a, b); + #else + uint64_t res = a - b; + res &= -(res <= a); + return res; + #endif +} + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_MATH_H) */ +/* :: End simde/simde-math.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-constify.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2020 Evan Nemerson + */ + +/* Constify macros. For internal use only. + * + * These are used to make it possible to call a function which takes + * an Integer Constant Expression (ICE) using a compile time constant. + * Technically it would also be possible to use a value not trivially + * known by the compiler, but there would be a siginficant performance + * hit (a switch switch is used). + * + * The basic idea is pretty simple; we just emit a do while loop which + * contains a switch with a case for every possible value of the + * constant. + * + * As long as the value you pass to the function in constant, pretty + * much any copmiler shouldn't have a problem generating exactly the + * same code as if you had used an ICE. + * + * This is intended to be used in the SIMDe implementations of + * functions the compilers require to be an ICE, but the other benefit + * is that if we also disable the warnings from + * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests + * to use non-ICE parameters + */ + +#if !defined(SIMDE_CONSTIFY_H) +#define SIMDE_CONSTIFY_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: result = func_name(__VA_ARGS__, 0); break; \ + case 1: result = func_name(__VA_ARGS__, 1); break; \ + case 2: result = func_name(__VA_ARGS__, 2); break; \ + case 3: result = func_name(__VA_ARGS__, 3); break; \ + case 4: result = func_name(__VA_ARGS__, 4); break; \ + case 5: result = func_name(__VA_ARGS__, 5); break; \ + case 6: result = func_name(__VA_ARGS__, 6); break; \ + case 7: result = func_name(__VA_ARGS__, 7); break; \ + case 8: result = func_name(__VA_ARGS__, 8); break; \ + case 9: result = func_name(__VA_ARGS__, 9); break; \ + case 10: result = func_name(__VA_ARGS__, 10); break; \ + case 11: result = func_name(__VA_ARGS__, 11); break; \ + case 12: result = func_name(__VA_ARGS__, 12); break; \ + case 13: result = func_name(__VA_ARGS__, 13); break; \ + case 14: result = func_name(__VA_ARGS__, 14); break; \ + case 15: result = func_name(__VA_ARGS__, 15); break; \ + case 16: result = func_name(__VA_ARGS__, 16); break; \ + case 17: result = func_name(__VA_ARGS__, 17); break; \ + case 18: result = func_name(__VA_ARGS__, 18); break; \ + case 19: result = func_name(__VA_ARGS__, 19); break; \ + case 20: result = func_name(__VA_ARGS__, 20); break; \ + case 21: result = func_name(__VA_ARGS__, 21); break; \ + case 22: result = func_name(__VA_ARGS__, 22); break; \ + case 23: result = func_name(__VA_ARGS__, 23); break; \ + case 24: result = func_name(__VA_ARGS__, 24); break; \ + case 25: result = func_name(__VA_ARGS__, 25); break; \ + case 26: result = func_name(__VA_ARGS__, 26); break; \ + case 27: result = func_name(__VA_ARGS__, 27); break; \ + case 28: result = func_name(__VA_ARGS__, 28); break; \ + case 29: result = func_name(__VA_ARGS__, 29); break; \ + case 30: result = func_name(__VA_ARGS__, 30); break; \ + case 31: result = func_name(__VA_ARGS__, 31); break; \ + case 32: result = func_name(__VA_ARGS__, 32); break; \ + case 33: result = func_name(__VA_ARGS__, 33); break; \ + case 34: result = func_name(__VA_ARGS__, 34); break; \ + case 35: result = func_name(__VA_ARGS__, 35); break; \ + case 36: result = func_name(__VA_ARGS__, 36); break; \ + case 37: result = func_name(__VA_ARGS__, 37); break; \ + case 38: result = func_name(__VA_ARGS__, 38); break; \ + case 39: result = func_name(__VA_ARGS__, 39); break; \ + case 40: result = func_name(__VA_ARGS__, 40); break; \ + case 41: result = func_name(__VA_ARGS__, 41); break; \ + case 42: result = func_name(__VA_ARGS__, 42); break; \ + case 43: result = func_name(__VA_ARGS__, 43); break; \ + case 44: result = func_name(__VA_ARGS__, 44); break; \ + case 45: result = func_name(__VA_ARGS__, 45); break; \ + case 46: result = func_name(__VA_ARGS__, 46); break; \ + case 47: result = func_name(__VA_ARGS__, 47); break; \ + case 48: result = func_name(__VA_ARGS__, 48); break; \ + case 49: result = func_name(__VA_ARGS__, 49); break; \ + case 50: result = func_name(__VA_ARGS__, 50); break; \ + case 51: result = func_name(__VA_ARGS__, 51); break; \ + case 52: result = func_name(__VA_ARGS__, 52); break; \ + case 53: result = func_name(__VA_ARGS__, 53); break; \ + case 54: result = func_name(__VA_ARGS__, 54); break; \ + case 55: result = func_name(__VA_ARGS__, 55); break; \ + case 56: result = func_name(__VA_ARGS__, 56); break; \ + case 57: result = func_name(__VA_ARGS__, 57); break; \ + case 58: result = func_name(__VA_ARGS__, 58); break; \ + case 59: result = func_name(__VA_ARGS__, 59); break; \ + case 60: result = func_name(__VA_ARGS__, 60); break; \ + case 61: result = func_name(__VA_ARGS__, 61); break; \ + case 62: result = func_name(__VA_ARGS__, 62); break; \ + case 63: result = func_name(__VA_ARGS__, 63); break; \ + default: result = default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + default: default_case; break; \ + } \ + } while (0) + +#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ + do { \ + switch(imm) { \ + case 0: func_name(__VA_ARGS__, 0); break; \ + case 1: func_name(__VA_ARGS__, 1); break; \ + case 2: func_name(__VA_ARGS__, 2); break; \ + case 3: func_name(__VA_ARGS__, 3); break; \ + case 4: func_name(__VA_ARGS__, 4); break; \ + case 5: func_name(__VA_ARGS__, 5); break; \ + case 6: func_name(__VA_ARGS__, 6); break; \ + case 7: func_name(__VA_ARGS__, 7); break; \ + case 8: func_name(__VA_ARGS__, 8); break; \ + case 9: func_name(__VA_ARGS__, 9); break; \ + case 10: func_name(__VA_ARGS__, 10); break; \ + case 11: func_name(__VA_ARGS__, 11); break; \ + case 12: func_name(__VA_ARGS__, 12); break; \ + case 13: func_name(__VA_ARGS__, 13); break; \ + case 14: func_name(__VA_ARGS__, 14); break; \ + case 15: func_name(__VA_ARGS__, 15); break; \ + case 16: func_name(__VA_ARGS__, 16); break; \ + case 17: func_name(__VA_ARGS__, 17); break; \ + case 18: func_name(__VA_ARGS__, 18); break; \ + case 19: func_name(__VA_ARGS__, 19); break; \ + case 20: func_name(__VA_ARGS__, 20); break; \ + case 21: func_name(__VA_ARGS__, 21); break; \ + case 22: func_name(__VA_ARGS__, 22); break; \ + case 23: func_name(__VA_ARGS__, 23); break; \ + case 24: func_name(__VA_ARGS__, 24); break; \ + case 25: func_name(__VA_ARGS__, 25); break; \ + case 26: func_name(__VA_ARGS__, 26); break; \ + case 27: func_name(__VA_ARGS__, 27); break; \ + case 28: func_name(__VA_ARGS__, 28); break; \ + case 29: func_name(__VA_ARGS__, 29); break; \ + case 30: func_name(__VA_ARGS__, 30); break; \ + case 31: func_name(__VA_ARGS__, 31); break; \ + case 32: func_name(__VA_ARGS__, 32); break; \ + case 33: func_name(__VA_ARGS__, 33); break; \ + case 34: func_name(__VA_ARGS__, 34); break; \ + case 35: func_name(__VA_ARGS__, 35); break; \ + case 36: func_name(__VA_ARGS__, 36); break; \ + case 37: func_name(__VA_ARGS__, 37); break; \ + case 38: func_name(__VA_ARGS__, 38); break; \ + case 39: func_name(__VA_ARGS__, 39); break; \ + case 40: func_name(__VA_ARGS__, 40); break; \ + case 41: func_name(__VA_ARGS__, 41); break; \ + case 42: func_name(__VA_ARGS__, 42); break; \ + case 43: func_name(__VA_ARGS__, 43); break; \ + case 44: func_name(__VA_ARGS__, 44); break; \ + case 45: func_name(__VA_ARGS__, 45); break; \ + case 46: func_name(__VA_ARGS__, 46); break; \ + case 47: func_name(__VA_ARGS__, 47); break; \ + case 48: func_name(__VA_ARGS__, 48); break; \ + case 49: func_name(__VA_ARGS__, 49); break; \ + case 50: func_name(__VA_ARGS__, 50); break; \ + case 51: func_name(__VA_ARGS__, 51); break; \ + case 52: func_name(__VA_ARGS__, 52); break; \ + case 53: func_name(__VA_ARGS__, 53); break; \ + case 54: func_name(__VA_ARGS__, 54); break; \ + case 55: func_name(__VA_ARGS__, 55); break; \ + case 56: func_name(__VA_ARGS__, 56); break; \ + case 57: func_name(__VA_ARGS__, 57); break; \ + case 58: func_name(__VA_ARGS__, 58); break; \ + case 59: func_name(__VA_ARGS__, 59); break; \ + case 60: func_name(__VA_ARGS__, 60); break; \ + case 61: func_name(__VA_ARGS__, 61); break; \ + case 62: func_name(__VA_ARGS__, 62); break; \ + case 63: func_name(__VA_ARGS__, 63); break; \ + default: default_case; break; \ + } \ + } while (0) + +HEDLEY_DIAGNOSTIC_POP + +#endif +/* :: End simde/simde-constify.h :: */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/simde-align.h :: */ +/* Alignment + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * + * + * SPDX-License-Identifier: CC0-1.0 + * + ********************************************************************** + * + * This is portability layer which should help iron out some + * differences across various compilers, as well as various verisons of + * C and C++. + * + * It was originally developed for SIMD Everywhere + * (), but since its only + * dependency is Hedley (, also CC0) + * it can easily be used in other projects, so please feel free to do + * so. + * + * If you do use this in your project, please keep a link to SIMDe in + * your code to remind you where to report any bugs and/or check for + * updated versions. + * + * # API Overview + * + * The API has several parts, and most macros have a few variations. + * There are APIs for declaring aligned fields/variables, optimization + * hints, and run-time alignment checks. + * + * Briefly, macros ending with "_TO" take numeric values and are great + * when you know the value you would like to use. Macros ending with + * "_LIKE", on the other hand, accept a type and are used when you want + * to use the alignment of a type instead of hardcoding a value. + * + * Documentation for each section of the API is inline. + * + * True to form, MSVC is the main problem and imposes several + * limitations on the effectiveness of the APIs. Detailed descriptions + * of the limitations of each macro are inline, but in general: + * + * * On C11+ or C++11+ code written using this API will work. The + * ASSUME macros may or may not generate a hint to the compiler, but + * that is only an optimization issue and will not actually cause + * failures. + * * If you're using pretty much any compiler other than MSVC, + * everything should basically work as well as in C11/C++11. + */ + +#if !defined(SIMDE_ALIGN_H) +#define SIMDE_ALIGN_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +/* I know this seems a little silly, but some non-hosted compilers + * don't have stddef.h, so we try to accomodate them. */ +#if !defined(SIMDE_ALIGN_SIZE_T_) + #if defined(__SIZE_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__SIZE_T_TYPE__) + #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #else + #include + #define SIMDE_ALIGN_SIZE_T_ size_t + #endif +#endif + +#if !defined(SIMDE_ALIGN_INTPTR_T_) + #if defined(__INTPTR_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ + #elif defined(__PTRDIFF_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ + #elif defined(__PTRDIFF_T_TYPE__) + #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ + #elif defined(__cplusplus) + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #else + #include + #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t + #endif +#endif + +#if defined(SIMDE_ALIGN_DEBUG) + #if defined(__cplusplus) + #include + #else + #include + #endif +#endif + +/* SIMDE_ALIGN_OF(Type) + * + * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or + * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. + * It isn't defined everywhere (only when the compiler has some alignof- + * like feature we can use to implement it), but it should work in most + * modern compilers, as well as C11 and C++11. + * + * If we can't find an implementation for SIMDE_ALIGN_OF then the macro + * will not be defined, so if you can handle that situation sensibly + * you may need to sprinkle some ifdefs into your code. + */ +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (0 && HEDLEY_HAS_FEATURE(c_alignof)) + #define SIMDE_ALIGN_OF(Type) _Alignof(Type) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) + #define SIMDE_ALIGN_OF(Type) alignof(Type) +#elif \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ + HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + defined(__IBM__ALIGNOF__) || \ + defined(__clang__) + #define SIMDE_ALIGN_OF(Type) __alignof__(Type) +#elif \ + HEDLEY_IAR_VERSION_CHECK(8,40,0) + #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) +#elif \ + HEDLEY_MSVC_VERSION_CHECK(19,0,0) + /* Probably goes back much further, but MS takes down their old docs. + * If you can verify that this works in earlier versions please let + * me know! */ + #define SIMDE_ALIGN_OF(Type) __alignof(Type) +#endif + +/* SIMDE_ALIGN_MAXIMUM: + * + * This is the maximum alignment that the compiler supports. You can + * define the value prior to including SIMDe if necessary, but in that + * case *please* submit an issue so we can add the platform to the + * detection code. + * + * Most compilers are okay with types which are aligned beyond what + * they think is the maximum, as long as the alignment is a power + * of two. Older versions of MSVC is the exception, so we need to cap + * the alignment requests at values that the implementation supports. + * + * XL C/C++ will accept values larger than 16 (which is the alignment + * of an AltiVec vector), but will not reliably align to the larger + * value, so so we cap the value at 16 there. + * + * If the compiler accepts any power-of-two value within reason then + * this macro should be left undefined, and the SIMDE_ALIGN_CAP + * macro will just return the value passed to it. */ +#if !defined(SIMDE_ALIGN_MAXIMUM) + #if defined(HEDLEY_MSVC_VERSION) + #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) + // Visual studio 2017 and newer does not need a max + #else + #if defined(_M_IX86) || defined(_M_AMD64) + #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 + #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) + /* VS 2010 is really a guess based on Wikipedia; if anyone can + * test with old VS versions I'd really appreciate it. */ + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 + #else + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif + #elif defined(_M_ARM) || defined(_M_ARM64) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 + #endif + #endif + #elif defined(HEDLEY_IBM_VERSION) + #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 + #endif +#endif + +/* You can mostly ignore these; they're intended for internal use. + * If you do need to use them please let me know; if they fulfill + * a common use case I'll probably drop the trailing underscore + * and make them part of the public API. */ +#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) + #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 + #define SIMDE_ALIGN_64_ 32 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 + #define SIMDE_ALIGN_64_ 16 + #define SIMDE_ALIGN_32_ 16 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 + #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 + #define SIMDE_ALIGN_64_ 8 + #define SIMDE_ALIGN_32_ 8 + #define SIMDE_ALIGN_16_ 8 + #define SIMDE_ALIGN_8_ 8 + #else + #error Max alignment expected to be >= 8 + #endif +#else + #define SIMDE_ALIGN_64_ 64 + #define SIMDE_ALIGN_32_ 32 + #define SIMDE_ALIGN_16_ 16 + #define SIMDE_ALIGN_8_ 8 +#endif + +/** + * SIMDE_ALIGN_CAP(Alignment) + * + * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. + */ +#if defined(SIMDE_ALIGN_MAXIMUM) + #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) +#else + #define SIMDE_ALIGN_CAP(Alignment) (Alignment) +#endif + +/* SIMDE_ALIGN_TO(Alignment) + * + * SIMDE_ALIGN_TO is used to declare types or variables. It basically + * maps to the align attribute in most compilers, the align declspec + * in MSVC, or _Alignas/alignas in C11/C++11. + * + * Example: + * + * struct i32x4 { + * SIMDE_ALIGN_TO(16) int32_t values[4]; + * } + * + * Limitations: + * + * MSVC requires that the Alignment parameter be numeric; you can't do + * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is + * unfortunate because that's really how the LIKE macros are + * implemented, and I am not aware of a way to get anything like this + * to work without using the C11/C++11 keywords. + * + * It also means that we can't use SIMDE_ALIGN_CAP to limit the + * alignment to the value specified, which MSVC also requires, so on + * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. + * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, + * but should be safe to use on MSVC. + * + * All this is to say that, if you want your code to work on MSVC, you + * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of + * SIMDE_ALIGN_TO(8/16/32/64). + */ +#if \ + HEDLEY_HAS_ATTRIBUTE(aligned) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ + HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ + HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ + HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) + #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) +#elif \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) + #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) + #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) +#elif \ + defined(HEDLEY_MSVC_VERSION) + #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) + /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); + * the alignment passed to the declspec has to be an integer. */ + #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE +#endif +#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) +#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) +#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) +#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) + +/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) + * + * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's + * std::assume_aligned, or __builtin_assume_aligned. It tells the + * compiler to assume that the provided pointer is aligned to an + * `Alignment`-byte boundary. + * + * If you define SIMDE_ALIGN_DEBUG prior to including this header then + * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't + * integrate with NDEBUG in this header, but it may be a good idea to + * put something like this in your code: + * + * #if !defined(NDEBUG) + * #define SIMDE_ALIGN_DEBUG + * #endif + * #include <.../simde-align.h> + */ +#if \ + HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ + HEDLEY_GCC_VERSION_CHECK(4,7,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ + HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) +#elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ + __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ + __assume_aligned(simde_assume_aligned_t_, Alignment); \ + simde_assume_aligned_t_; \ + })) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) +#else + #if defined(__cplusplus) + template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) + #else + HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) + #endif + { + HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); + return ptr; + } + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) + #else + #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) + #endif +#endif + +#if !defined(SIMDE_ALIGN_DEBUG) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) +#else + #include + #if defined(__cplusplus) + template + static HEDLEY_ALWAYS_INLINE + T* + simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #else + static HEDLEY_ALWAYS_INLINE + void* + simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) + #endif + { + if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { + fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", + file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), + HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), + HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); + } + + return ptr; + } + + #if defined(__cplusplus) + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) + #else + #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) + #endif +#endif + +/* SIMDE_ALIGN_LIKE(Type) + * SIMDE_ALIGN_LIKE_#(Type) + * + * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros + * except instead of an integer they take a type; basically, it's just + * a more convenient way to do something like: + * + * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + * + * The versions with a numeric suffix will fall back on using a numeric + * value in the event we can't use SIMDE_ALIGN_OF(Type). This is + * mainly for MSVC, where __declspec(align()) can't handle anything + * other than hard-coded numeric values. + */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) + #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) +#else + #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 + #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 + #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 + #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 +#endif + +/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) + * + * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a + * type instead of a numeric value. */ +#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) + #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) +#endif + +/* SIMDE_ALIGN_CAST(Type, Pointer) + * + * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try + * to silence warnings that some compilers may produce if you try + * to assign to a type with increased alignment requirements. + * + * Note that it does *not* actually attempt to tell the compiler that + * the pointer is aligned like the destination should be; that's the + * job of the next macro. This macro is necessary for stupid APIs + * like _mm_loadu_si128 where the input is a __m128i* but the function + * is specifically for data which isn't necessarily aligned to + * _Alignof(__m128i). + */ +#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ + Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_r_; \ + })) +#else + #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) +#endif + +/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) + * + * This is sort of like a combination of a reinterpret_cast and a + * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell + * the compiler that the pointer is aligned like the specified type + * and casts the pointer to the specified type while suppressing any + * warnings from the compiler about casting to a type with greater + * alignment requirements. + */ +#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) + +#endif /* !defined(SIMDE_ALIGN_H) */ +/* :: End simde/simde-align.h :: */ + +/* In some situations, SIMDe has to make large performance sacrifices + * for small increases in how faithfully it reproduces an API, but + * only a relatively small number of users will actually need the API + * to be completely accurate. The SIMDE_FAST_* options can be used to + * disable these trade-offs. + * + * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or + * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to + * enable some optimizations. Using -ffast-math and/or + * -ffinite-math-only will also enable the relevant options. If you + * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ + +/* Most programs avoid NaNs by never passing values which can result in + * a NaN; for example, if you only pass non-negative values to the sqrt + * functions, it won't generate a NaN. On some platforms, similar + * functions handle NaNs differently; for example, the _mm_min_ps SSE + * function will return 0.0 if you pass it (0.0, NaN), but the NEON + * vminq_f32 function will return NaN. Making them behave like one + * another is expensive; it requires generating a mask of all lanes + * with NaNs, then performing the operation (e.g., vminq_f32), then + * blending together the result with another vector using the mask. + * + * If you don't want SIMDe to worry about the differences between how + * NaNs are handled on the two platforms, define this (or pass + * -ffinite-math-only) */ +#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) + #define SIMDE_FAST_MATH +#endif + +#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) + #if defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_NANS + #elif defined(__FINITE_MATH_ONLY__) + #if __FINITE_MATH_ONLY__ + #define SIMDE_FAST_NANS + #endif + #endif +#endif + +/* Many functions are defined as using the current rounding mode + * (i.e., the SIMD version of fegetround()) when converting to + * an integer. For example, _mm_cvtpd_epi32. Unfortunately, + * on some platforms (such as ARMv8+ where round-to-nearest is + * always used, regardless of the FPSCR register) this means we + * have to first query the current rounding mode, then choose + * the proper function (rounnd + , ceil, floor, etc.) */ +#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_MODE +#endif + +/* This controls how ties are rounded. For example, does 10.5 round to + * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for + * example) doesn't support it and it must be emulated (which is rather + * slow). If you're okay with just using the default for whatever arch + * you're on, you should definitely define this. + * + * Note that we don't use this macro to avoid correct implementations + * in functions which are explicitly about rounding (such as vrnd* on + * NEON, _mm_round_* on x86, etc.); it is only used for code where + * rounding is a component in another function, and even then it isn't + * usually a problem since such functions will use the current rounding + * mode. */ +#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_ROUND_TIES +#endif + +/* For functions which convert from one type to another (mostly from + * floating point to integer types), sometimes we need to do a range + * check and potentially return a different result if the value + * falls outside that range. Skipping this check can provide a + * performance boost, at the expense of faithfulness to the API we're + * emulating. */ +#if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_CONVERSION_RANGE +#endif + +/* Due to differences across platforms, sometimes it can be much + * faster for us to allow spurious floating point exceptions, + * or to no generate them when we should. */ +#if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) + #define SIMDE_FAST_EXCEPTIONS +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) +#elif defined(__cplusplus) && (__cplusplus > 201703L) + #include + #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) +#endif + +#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) + #if defined(SIMDE_CHECK_CONSTANT_) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ + (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) + #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") + #else + #define SIMDE_REQUIRE_CONSTANT(arg) + #endif +#else + #define SIMDE_REQUIRE_CONSTANT(arg) +#endif + +#define SIMDE_REQUIRE_RANGE(arg, min, max) \ + HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") + +#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ + SIMDE_REQUIRE_CONSTANT(arg) \ + SIMDE_REQUIRE_RANGE(arg, min, max) + +/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty + * fallback if we can't find an implementation; instead we have to + * check if SIMDE_STATIC_ASSERT is defined before using it. */ +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + HEDLEY_HAS_FEATURE(c_static_assert) || \ + HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) + /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which + * starts with a double-underscore. This is a system header so we have no + * control over it, but since it's a macro it will emit a diagnostic which + * prevents compilation with -Werror. */ + #if HEDLEY_HAS_WARNING("-Wreserved-identifier") + #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ + _Static_assert(expr, message); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) + #endif +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + HEDLEY_MSVC_VERSION_CHECK(16,0,0) + #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#endif + +/* Statement exprs */ +#if \ + HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ + HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ + HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ + HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ + HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) +#endif + +/* This is just a convenience macro to make it easy to call a single + * function with a specific diagnostic disabled. */ +#if defined(SIMDE_STATEMENT_EXPR_) + #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ + SIMDE_STATEMENT_EXPR_(({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + diagnostic \ + (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + })) +#endif + +#if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) + #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") +#endif + +#if \ + (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ + HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) +# define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) +#else +# define SIMDE_MAY_ALIAS +#endif + +/* Lots of compilers support GCC-style vector extensions, but many + don't support all the features. Define different macros depending + on support for + + * SIMDE_VECTOR - Declaring a vector. + * SIMDE_VECTOR_OPS - basic operations (binary and unary). + * SIMDE_VECTOR_NEGATE - negating a vector + * SIMDE_VECTOR_SCALAR - For binary operators, the second argument + can be a scalar, in which case the result is as if that scalar + had been broadcast to all lanes of a vector. + * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for + extracting/inserting a single element.= + + SIMDE_VECTOR can be assumed if any others are defined, the + others are independent. */ +#if !defined(SIMDE_NO_VECTOR) +# if \ + HEDLEY_GCC_VERSION_CHECK(4,8,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SCALAR +# define SIMDE_VECTOR_SUBSCRIPT +# elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ +# define SIMDE_VECTOR_SUBSCRIPT +# elif \ + HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# elif HEDLEY_HAS_ATTRIBUTE(vector_size) +# define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) +# define SIMDE_VECTOR_OPS +# define SIMDE_VECTOR_NEGATE +# define SIMDE_VECTOR_SUBSCRIPT +# if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) +# define SIMDE_VECTOR_SCALAR +# endif +# endif + +/* GCC and clang have built-in functions to handle shuffling and + converting of vectors, but the implementations are slightly + different. This macro is just an abstraction over them. Note that + elem_size is in bits but vec_size is in bytes. */ +# if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) + HEDLEY_DIAGNOSTIC_PUSH + /* We don't care about -Wvariadic-macros; all compilers that support + * shufflevector/shuffle support them. */ +# if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") +# pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +# endif +# if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) +# pragma GCC diagnostic ignored "-Wvariadic-macros" +# endif + +# if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) +# elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) +# define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ + int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ + __builtin_shuffle(a, b, simde_shuffle_); \ + })) +# endif + HEDLEY_DIAGNOSTIC_POP +# endif + +/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT + but the code needs to be refactored a bit to take advantage. */ +# if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) +# if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) +# if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) + /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ + __typeof__(from) from_ = (from); \ + ((void) from_); \ + __builtin_convertvector(from_, __typeof__(to)); \ + }))) +# else +# define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) +# endif +# endif +# endif +#endif + +/* Since we currently require SUBSCRIPT before using a vector in a + union, we define these as dependencies of SUBSCRIPT. They are + likely to disappear in the future, once SIMDe learns how to make + use of vectors without using the union members. Do not use them + in your code unless you're okay with it breaking when SIMDe + changes. */ +#if defined(SIMDE_VECTOR_SUBSCRIPT) +# if defined(SIMDE_VECTOR_OPS) +# define SIMDE_VECTOR_SUBSCRIPT_OPS +# endif +# if defined(SIMDE_VECTOR_SCALAR) +# define SIMDE_VECTOR_SUBSCRIPT_SCALAR +# endif +#endif + +#if !defined(SIMDE_DISABLE_OPENMP) + #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) + #define SIMDE_ENABLE_OPENMP + #endif +#endif + +#if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) +# define SIMDE_ENABLE_CILKPLUS +#endif + +#if defined(SIMDE_ENABLE_OPENMP) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) +# if defined(__clang__) +# define SIMDE_VECTORIZE_REDUCTION(r) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ + HEDLEY_PRAGMA(omp simd reduction(r)) \ + HEDLEY_DIAGNOSTIC_POP +# else +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) +# endif +# if !defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) +# else +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) +# endif +#elif defined(SIMDE_ENABLE_CILKPLUS) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) +# define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) +#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) +# define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) +# define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_ALIGNED(a) +#else +# define SIMDE_VECTORIZE +# define SIMDE_VECTORIZE_SAFELEN(l) +# define SIMDE_VECTORIZE_REDUCTION(r) +# define SIMDE_VECTORIZE_ALIGNED(a) +#endif + +#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) + +/* Intended for checking coverage, you should never use this in + production. */ +#if defined(SIMDE_NO_INLINE) +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#else +# define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if defined(SIMDE_NO_INLINE) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static +#elif defined(SIMDE_CONSTRAINED_COMPILATION) +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES static +#else +# define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static +#endif + +#if \ + HEDLEY_HAS_ATTRIBUTE(unused) || \ + HEDLEY_GCC_VERSION_CHECK(2,95,0) +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) +#else +# define SIMDE_FUNCTION_POSSIBLY_UNUSED_ +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ + +#if defined(_MSC_VER) +# define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS +#else +# define SIMDE_BEGIN_DECLS_ \ + HEDLEY_DIAGNOSTIC_PUSH \ + SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ + HEDLEY_BEGIN_C_DECLS +# define SIMDE_END_DECLS_ \ + HEDLEY_END_C_DECLS \ + HEDLEY_DIAGNOSTIC_POP +#endif + +#if defined(__SIZEOF_INT128__) +# define SIMDE_HAVE_INT128_ +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ +typedef __int128 simde_int128; +typedef unsigned __int128 simde_uint128; +HEDLEY_DIAGNOSTIC_POP +#endif + +#if !defined(SIMDE_ENDIAN_LITTLE) +# define SIMDE_ENDIAN_LITTLE 1234 +#endif +#if !defined(SIMDE_ENDIAN_BIG) +# define SIMDE_ENDIAN_BIG 4321 +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ +# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# elif defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +/* We know the endianness of some common architectures. Common + * architectures not listed (ARM, POWER, MIPS, etc.) here are + * bi-endian. */ +# elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__s390x__) || defined(__zarch__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +/* Looks like we'll have to rely on the platform. If we're missing a + * platform, please let us know. */ +# elif defined(_WIN32) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(sun) || defined(__sun) /* Solaris */ +# include +# if defined(_LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(_BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__APPLE__) +# include +# if defined(__LITTLE_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BIG_ENDIAN__) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) +# include +# if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) +# include +# if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE +# elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) +# define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG +# endif +# endif +#endif + +#if \ + HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define simde_bswap64(v) __builtin_bswap64(v) +#elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) + #define simde_bswap64(v) _byteswap_uint64(v) +#else + SIMDE_FUNCTION_ATTRIBUTES + uint64_t + simde_bswap64(uint64_t v) { + return + ((v & (((uint64_t) 0xff) << 56)) >> 56) | + ((v & (((uint64_t) 0xff) << 48)) >> 40) | + ((v & (((uint64_t) 0xff) << 40)) >> 24) | + ((v & (((uint64_t) 0xff) << 32)) >> 8) | + ((v & (((uint64_t) 0xff) << 24)) << 8) | + ((v & (((uint64_t) 0xff) << 16)) << 24) | + ((v & (((uint64_t) 0xff) << 8)) << 40) | + ((v & (((uint64_t) 0xff) )) << 56); + } +#endif + +#if !defined(SIMDE_ENDIAN_ORDER) +# error Unknown byte order; please file a bug +#else +# if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE +# define simde_endian_bswap64_be(value) simde_bswap64(value) +# define simde_endian_bswap64_le(value) (value) +# elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG +# define simde_endian_bswap64_be(value) (value) +# define simde_endian_bswap64_le(value) simde_bswap64(value) +# endif +#endif + +/* TODO: we should at least make an attempt to detect the correct + types for simde_float32/float64 instead of just assuming float and + double. */ + +#if !defined(SIMDE_FLOAT32_TYPE) +# define SIMDE_FLOAT32_TYPE float +# define SIMDE_FLOAT32_C(value) value##f +#else +# define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) +#endif +typedef SIMDE_FLOAT32_TYPE simde_float32; + +#if !defined(SIMDE_FLOAT64_TYPE) +# define SIMDE_FLOAT64_TYPE double +# define SIMDE_FLOAT64_C(value) value +#else +# define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) +#endif +typedef SIMDE_FLOAT64_TYPE simde_float64; + +#if defined(__cplusplus) + typedef bool simde_bool; +#elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + typedef _Bool simde_bool; +#elif defined(bool) + typedef bool simde_bool; +#else + #include + typedef bool simde_bool; +#endif + +#if HEDLEY_HAS_WARNING("-Wbad-function-cast") +# define SIMDE_CONVERT_FTOI(T,v) \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ + HEDLEY_STATIC_CAST(T, (v)) \ + HEDLEY_DIAGNOSTIC_POP +#else +# define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) +#endif + +/* TODO: detect compilers which support this outside of C11 mode */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) +#else + #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) + #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) +#endif + +#if HEDLEY_HAS_WARNING("-Wfloat-equal") +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") +#elif HEDLEY_GCC_VERSION_CHECK(3,0,0) +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#else +# define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL +#endif + +/* Some functions can trade accuracy for speed. For those functions + you can control the trade-off using this macro. Possible values: + + 0: prefer speed + 1: reasonable trade-offs + 2: prefer accuracy */ +#if !defined(SIMDE_ACCURACY_PREFERENCE) +# define SIMDE_ACCURACY_PREFERENCE 1 +#endif + +#if defined(__STDC_HOSTED__) +# define SIMDE_STDC_HOSTED __STDC_HOSTED__ +#else +# if \ + defined(HEDLEY_PGI_VERSION) || \ + defined(HEDLEY_MSVC_VERSION) +# define SIMDE_STDC_HOSTED 1 +# else +# define SIMDE_STDC_HOSTED 0 +# endif +#endif + +/* Try to deal with environments without a standard library. */ +#if !defined(simde_memcpy) + #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) + #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) + #endif +#endif +#if !defined(simde_memset) + #if HEDLEY_HAS_BUILTIN(__builtin_memset) + #define simde_memset(s, c, n) __builtin_memset(s, c, n) + #endif +#endif +#if !defined(simde_memcmp) + #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) + #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) + #endif +#endif + +#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) + #if !defined(SIMDE_NO_STRING_H) + #if defined(__has_include) + #if !__has_include() + #define SIMDE_NO_STRING_H + #endif + #elif (SIMDE_STDC_HOSTED == 0) + #define SIMDE_NO_STRING_H + #endif + #endif + + #if !defined(SIMDE_NO_STRING_H) + #include + #if !defined(simde_memcpy) + #define simde_memcpy(dest, src, n) memcpy(dest, src, n) + #endif + #if !defined(simde_memset) + #define simde_memset(s, c, n) memset(s, c, n) + #endif + #if !defined(simde_memcmp) + #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) + #endif + #else + /* These are meant to be portable, not fast. If you're hitting them you + * should think about providing your own (by defining the simde_memcpy + * macro prior to including any SIMDe files) or submitting a patch to + * SIMDe so we can detect your system-provided memcpy/memset, like by + * adding your compiler to the checks for __builtin_memcpy and/or + * __builtin_memset. */ + #if !defined(simde_memcpy) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memcpy_(void* dest, const void* src, size_t len) { + char* dest_ = HEDLEY_STATIC_CAST(char*, dest); + char* src_ = HEDLEY_STATIC_CAST(const char*, src); + for (size_t i = 0 ; i < len ; i++) { + dest_[i] = src_[i]; + } + } + #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) + #endif + + #if !defined(simde_memset) + SIMDE_FUNCTION_ATTRIBUTES + void + simde_memset_(void* s, int c, size_t len) { + char* s_ = HEDLEY_STATIC_CAST(char*, s); + char c_ = HEDLEY_STATIC_CAST(char, c); + for (size_t i = 0 ; i < len ; i++) { + s_[i] = c_[i]; + } + } + #define simde_memset(s, c, n) simde_memset_(s, c, n) + #endif + + #if !defined(simde_memcmp) + SIMDE_FUCTION_ATTRIBUTES + int + simde_memcmp_(const void *s1, const void *s2, size_t n) { + unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); + unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); + for (size_t i = 0 ; i < len ; i++) { + if (s1_[i] != s2_[i]) { + return (int) (s1_[i] - s2_[i]); + } + } + return 0; + } + #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) + #endif + #endif +#endif + +/*** Functions that quiet a signaling NaN ***/ + +static HEDLEY_INLINE +double +simde_math_quiet(double x) { + uint64_t tmp, mask; + if (!simde_math_isnan(x)) { + return x; + } + simde_memcpy(&tmp, &x, 8); + mask = 0x7ff80000; + mask <<= 32; + tmp |= mask; + simde_memcpy(&x, &tmp, 8); + return x; +} + +static HEDLEY_INLINE +float +simde_math_quietf(float x) { + uint32_t tmp; + if (!simde_math_isnanf(x)) { + return x; + } + simde_memcpy(&tmp, &x, 4); + tmp |= 0x7fc00000lu; + simde_memcpy(&x, &tmp, 4); + return x; +} + +#if defined(FE_ALL_EXCEPT) + #define SIMDE_HAVE_FENV_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_FENV_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_FENV_H +#endif + +#if defined(EXIT_FAILURE) + #define SIMDE_HAVE_STDLIB_H +#elif defined(__has_include) + #if __has_include() + #include + #define SIMDE_HAVE_STDLIB_H + #endif +#elif SIMDE_STDC_HOSTED == 1 + #include + #define SIMDE_HAVE_STDLIB_H +#endif + +#if defined(__has_include) +# if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() +# include +# elif __has_include() +# include +# endif +# if __has_include() +# include +# endif +#elif SIMDE_STDC_HOSTED == 1 +# include +# include +#endif + +#define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ + static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ + T_To \ + Name (T_From value) { \ + T_To r; \ + simde_memcpy(&r, &value, sizeof(r)); \ + return r; \ + } + +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) +SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/check.h :: */ +/* Check (assertions) + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_CHECK_H) +#define SIMDE_CHECK_H + +#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#include + +#if !defined(_WIN32) +# define SIMDE_SIZE_MODIFIER "z" +# define SIMDE_CHAR_MODIFIER "hh" +# define SIMDE_SHORT_MODIFIER "h" +#else +# if defined(_M_X64) || defined(__amd64__) +# define SIMDE_SIZE_MODIFIER "I64" +# else +# define SIMDE_SIZE_MODIFIER "" +# endif +# define SIMDE_CHAR_MODIFIER "" +# define SIMDE_SHORT_MODIFIER "" +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1500) +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) +# define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) +#else +# define SIMDE_PUSH_DISABLE_MSVC_C4127_ +# define SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#if !defined(simde_errorf) +# if defined(__has_include) +# if __has_include() +# include +# endif +# elif defined(SIMDE_STDC_HOSTED) +# if SIMDE_STDC_HOSTED == 1 +# include +# endif +# elif defined(__STDC_HOSTED__) +# if __STDC_HOSTETD__ == 1 +# include +# endif +# endif + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/debug-trap.h :: */ +/* Debugging assertions and traps + * Portable Snippets - https://github.com/nemequ/portable-snippets + * Created by Evan Nemerson + * + * To the extent possible under law, the authors have waived all + * copyright and related or neighboring rights to this code. For + * details, see the Creative Commons Zero 1.0 Universal license at + * https://creativecommons.org/publicdomain/zero/1.0/ + * + * SPDX-License-Identifier: CC0-1.0 + */ + +#if !defined(SIMDE_DEBUG_TRAP_H) +#define SIMDE_DEBUG_TRAP_H + +#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) +# define SIMDE_NDEBUG 1 +#endif + +#if defined(__has_builtin) && !defined(__ibmxl__) +# if __has_builtin(__builtin_debugtrap) +# define simde_trap() __builtin_debugtrap() +# elif __has_builtin(__debugbreak) +# define simde_trap() __debugbreak() +# endif +#endif +#if !defined(simde_trap) +# if defined(_MSC_VER) || defined(__INTEL_COMPILER) +# define simde_trap() __debugbreak() +# elif defined(__ARMCC_VERSION) +# define simde_trap() __breakpoint(42) +# elif defined(__ibmxl__) || defined(__xlC__) +# include +# define simde_trap() __trap(42) +# elif defined(__DMC__) && defined(_M_IX86) + static inline void simde_trap(void) { __asm int 3h; } +# elif defined(__i386__) || defined(__x86_64__) + static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } +# elif defined(__thumb__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } +# elif defined(__aarch64__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } +# elif defined(__arm__) + static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } +# elif defined (__alpha__) && !defined(__osf__) + static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } +# elif defined(_54_) + static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } +# elif defined(_55_) + static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } +# elif defined(_64P_) + static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } +# elif defined(_6x_) + static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } +# elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) +# define simde_trap() __builtin_trap() +# else +# include +# if defined(SIGTRAP) +# define simde_trap() raise(SIGTRAP) +# else +# define simde_trap() raise(SIGABRT) +# endif +# endif +#endif + +#if defined(HEDLEY_LIKELY) +# define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) +#elif defined(__GNUC__) && (__GNUC__ >= 3) +# define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) +#else +# define SIMDE_DBG_LIKELY(expr) (!!(expr)) +#endif + +#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) +# define simde_dbg_assert(expr) do { \ + if (!SIMDE_DBG_LIKELY(expr)) { \ + simde_trap(); \ + } \ + } while (0) +#else +# define simde_dbg_assert(expr) +#endif + +#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ +/* :: End simde/debug-trap.h :: */ + + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ +# if defined(EOF) +# define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) +# else +# define simde_errorf(format, ...) (simde_trap()) +# endif + HEDLEY_DIAGNOSTIC_POP +#endif + +#define simde_error(msg) simde_errorf("%s", msg) + +#if defined(SIMDE_NDEBUG) || \ + (defined(__cplusplus) && (__cplusplus < 201103L)) || \ + (defined(__STDC__) && (__STDC__ < 199901L)) +# if defined(SIMDE_CHECK_FAIL_DEFINED) +# define simde_assert(expr) +# else +# if defined(HEDLEY_ASSUME) +# define simde_assert(expr) HEDLEY_ASSUME(expr) +# elif HEDLEY_GCC_VERSION_CHECK(4,5,0) +# define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) +# elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) +# define simde_assert(expr) __assume(expr) +# else +# define simde_assert(expr) +# endif +# endif +# define simde_assert_true(expr) simde_assert(expr) +# define simde_assert_false(expr) simde_assert(!(expr)) +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) +# define simde_assert_double_equal(a, b, precision) +# define simde_assert_string_equal(a, b) +# define simde_assert_string_not_equal(a, b) +# define simde_assert_memory_equal(size, a, b) +# define simde_assert_memory_not_equal(size, a, b) +#else +# define simde_assert(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr "\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_true(expr) \ + do { \ + if (!HEDLEY_LIKELY(expr)) { \ + simde_error("assertion failed: " #expr " is not true\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_false(expr) \ + do { \ + if (!HEDLEY_LIKELY(!(expr))) { \ + simde_error("assertion failed: " #expr " is not false\n"); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ + do { \ + T simde_tmp_a_ = (a); \ + T simde_tmp_b_ = (b); \ + if (!(simde_tmp_a_ op simde_tmp_b_)) { \ + simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ + #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_double_equal(a, b, precision) \ + do { \ + const double simde_tmp_a_ = (a); \ + const double simde_tmp_b_ = (b); \ + const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ + -(simde_tmp_a_ - simde_tmp_b_) : \ + (simde_tmp_a_ - simde_tmp_b_); \ + if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ + simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# include +# define simde_assert_string_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ + simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_string_not_equal(a, b) \ + do { \ + const char* simde_tmp_a_ = a; \ + const char* simde_tmp_b_ = b; \ + if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ + simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ + #a, #b, simde_tmp_a_, simde_tmp_b_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ + size_t simde_tmp_pos_; \ + for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ + if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ + simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ + #a, #b, simde_tmp_pos_); \ + break; \ + } \ + } \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ + +# define simde_assert_memory_not_equal(size, a, b) \ + do { \ + const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ + const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ + const size_t simde_tmp_size_ = (size); \ + if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ + simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ + #a, #b, simde_tmp_size_); \ + } \ + SIMDE_PUSH_DISABLE_MSVC_C4127_ \ + } while (0) \ + SIMDE_POP_DISABLE_MSVC_C4127_ +#endif + +#define simde_assert_type(T, fmt, a, op, b) \ + simde_assert_type_full("", "", T, fmt, a, op, b) + +#define simde_assert_char(a, op, b) \ + simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_uchar(a, op, b) \ + simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) +#define simde_assert_short(a, op, b) \ + simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) +#define simde_assert_ushort(a, op, b) \ + simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) +#define simde_assert_int(a, op, b) \ + simde_assert_type(int, "d", a, op, b) +#define simde_assert_uint(a, op, b) \ + simde_assert_type(unsigned int, "u", a, op, b) +#define simde_assert_long(a, op, b) \ + simde_assert_type(long int, "ld", a, op, b) +#define simde_assert_ulong(a, op, b) \ + simde_assert_type(unsigned long int, "lu", a, op, b) +#define simde_assert_llong(a, op, b) \ + simde_assert_type(long long int, "lld", a, op, b) +#define simde_assert_ullong(a, op, b) \ + simde_assert_type(unsigned long long int, "llu", a, op, b) + +#define simde_assert_size(a, op, b) \ + simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) + +#define simde_assert_float(a, op, b) \ + simde_assert_type(float, "f", a, op, b) +#define simde_assert_double(a, op, b) \ + simde_assert_type(double, "g", a, op, b) +#define simde_assert_ptr(a, op, b) \ + simde_assert_type(const void*, "p", a, op, b) + +#define simde_assert_int8(a, op, b) \ + simde_assert_type(int8_t, PRIi8, a, op, b) +#define simde_assert_uint8(a, op, b) \ + simde_assert_type(uint8_t, PRIu8, a, op, b) +#define simde_assert_int16(a, op, b) \ + simde_assert_type(int16_t, PRIi16, a, op, b) +#define simde_assert_uint16(a, op, b) \ + simde_assert_type(uint16_t, PRIu16, a, op, b) +#define simde_assert_int32(a, op, b) \ + simde_assert_type(int32_t, PRIi32, a, op, b) +#define simde_assert_uint32(a, op, b) \ + simde_assert_type(uint32_t, PRIu32, a, op, b) +#define simde_assert_int64(a, op, b) \ + simde_assert_type(int64_t, PRIi64, a, op, b) +#define simde_assert_uint64(a, op, b) \ + simde_assert_type(uint64_t, PRIu64, a, op, b) + +#define simde_assert_ptr_equal(a, b) \ + simde_assert_ptr(a, ==, b) +#define simde_assert_ptr_not_equal(a, b) \ + simde_assert_ptr(a, !=, b) +#define simde_assert_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) +#define simde_assert_ptr_null(ptr) \ + simde_assert_ptr(ptr, ==, NULL) +#define simde_assert_ptr_not_null(ptr) \ + simde_assert_ptr(ptr, !=, NULL) + +#endif /* !defined(SIMDE_CHECK_H) */ +/* :: End simde/check.h :: */ + +/* GCC/clang have a bunch of functionality in builtins which we would + * like to access, but the suffixes indicate whether the operate on + * int, long, or long long, not fixed width types (e.g., int32_t). + * we use these macros to attempt to map from fixed-width to the + * names GCC uses. Note that you should still cast the input(s) and + * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if + * types are the same size they may not be compatible according to the + * compiler. For example, on x86 long and long lonsg are generally + * both 64 bits, but platforms vary on whether an int64_t is mapped + * to a long or long long. */ + +#include + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ + +#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ + #define SIMDE_BUILTIN_TYPE_8_ int +#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ l + #define SIMDE_BUILTIN_TYPE_8_ long +#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_8_ ll + #define SIMDE_BUILTIN_TYPE_8_ long long +#endif + +#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ + #define SIMDE_BUILTIN_TYPE_16_ int +#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ l + #define SIMDE_BUILTIN_TYPE_16_ long +#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_16_ ll + #define SIMDE_BUILTIN_TYPE_16_ long long +#endif + +#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ + #define SIMDE_BUILTIN_TYPE_32_ int +#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ l + #define SIMDE_BUILTIN_TYPE_32_ long +#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_32_ ll + #define SIMDE_BUILTIN_TYPE_32_ long long +#endif + +#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ + #define SIMDE_BUILTIN_TYPE_64_ int +#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ l + #define SIMDE_BUILTIN_TYPE_64_ long +#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) + #define SIMDE_BUILTIN_SUFFIX_64_ ll + #define SIMDE_BUILTIN_TYPE_64_ long long +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ +HEDLEY_DIAGNOSTIC_POP + +#if defined(SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) + #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) +#else + #define SIMDE_BUILTIN_HAS_8_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) + #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) +#else + #define SIMDE_BUILTIN_HAS_16_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) + #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) +#else + #define SIMDE_BUILTIN_HAS_32_(name) 0 +#endif +#if defined(SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) + #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) +#else + #define SIMDE_BUILTIN_HAS_64_(name) 0 +#endif + +#if !defined(__cplusplus) + #if defined(__clang__) + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") + #define SIMDE_GENERIC_(...) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ + _Generic(__VA_ARGS__); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #endif + #elif \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ + HEDLEY_HAS_EXTENSION(c_generic_selections) || \ + HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + HEDLEY_ARM_VERSION_CHECK(5,3,0) + #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) + #endif +#endif + +/* Sometimes we run into problems with specific versions of compilers + which make the native versions unusable for us. Often this is due + to missing functions, sometimes buggy implementations, etc. These + macros are how we check for specific bugs. As they are fixed we'll + start only defining them for problematic compiler versions. */ + +#if !defined(SIMDE_IGNORE_COMPILER_BUGS) +# if defined(HEDLEY_GCC_VERSION) +# if !HEDLEY_GCC_VERSION_CHECK(4,9,0) +# define SIMDE_BUG_GCC_REV_208793 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(5,0,0) +# define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(6,0,0) +# define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE +# endif +# if !HEDLEY_GCC_VERSION_CHECK(4,6,0) +# define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ +# endif +# if !HEDLEY_GCC_VERSION_CHECK(8,0,0) +# define SIMDE_BUG_GCC_REV_247851 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_REV_274313 +# define SIMDE_BUG_GCC_91341 +# define SIMDE_BUG_GCC_92035 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_BAD_VEXT_REV32 +# endif +# if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) +# define SIMDE_BUG_GCC_94482 +# endif +# if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_53784 +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ +# define SIMDE_BUG_GCC_95144 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(11,2,0) +# define SIMDE_BUG_GCC_95483 +# endif +# if defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_100927 +# endif +# define SIMDE_BUG_GCC_98521 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_94488 +# endif +# if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_GCC_REV_264019 +# endif +# if defined(SIMDE_ARCH_ARM) +# define SIMDE_BUG_GCC_95399 +# define SIMDE_BUG_GCC_95471 +# elif defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_GCC_95227 +# define SIMDE_BUG_GCC_95782 +# if !HEDLEY_GCC_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_96174 +# endif +# elif defined(SIMDE_ARCH_ZARCH) +# define SIMDE_BUG_GCC_95782 +# if HEDLEY_GCC_VERSION_CHECK(10,0,0) +# define SIMDE_BUG_GCC_101614 +# endif +# endif +# if defined(SIMDE_ARCH_MIPS_MSA) +# define SIMDE_BUG_GCC_97248 +# if !HEDLEY_GCC_VERSION_CHECK(12,1,0) +# define SIMDE_BUG_GCC_100760 +# define SIMDE_BUG_GCC_100761 +# define SIMDE_BUG_GCC_100762 +# endif +# endif +# define SIMDE_BUG_GCC_95399 +# if !defined(__OPTIMIZE__) +# define SIMDE_BUG_GCC_105339 +# endif +# elif defined(__clang__) +# if defined(SIMDE_ARCH_AARCH64) +# define SIMDE_BUG_CLANG_45541 +# define SIMDE_BUG_CLANG_48257 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46840 +# define SIMDE_BUG_CLANG_46844 +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VI64_OPS +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) +# define SIMDE_BUG_CLANG_GIT_4EC445B8 +# define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ +# endif +# endif +# if defined(SIMDE_ARCH_ARM) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) +# define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES +# endif +# endif +# if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_CLANG_46770 +# endif +# if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) +# define SIMDE_BUG_CLANG_50893 +# define SIMDE_BUG_CLANG_50901 +# endif +# endif +# if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) +# define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT +# endif +# if defined(SIMDE_ARCH_POWER) +# define SIMDE_BUG_CLANG_50932 +# if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) +# define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS +# endif +# endif +# if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) +# if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) +# define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_BAD_MADD +# endif +# if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) +# define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ +# endif +# if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) +# define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ +# endif +# if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_45931 +# endif +# if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) +# define SIMDE_BUG_CLANG_44589 +# endif +# define SIMDE_BUG_CLANG_48673 +# endif +# define SIMDE_BUG_CLANG_45959 +# if defined(SIMDE_ARCH_WASM_SIMD128) +# define SIMDE_BUG_CLANG_60655 +# endif +# elif defined(HEDLEY_MSVC_VERSION) +# if defined(SIMDE_ARCH_X86) +# define SIMDE_BUG_MSVC_ROUND_EXTRACT +# endif +# elif defined(HEDLEY_INTEL_VERSION) +# define SIMDE_BUG_INTEL_857088 +# elif defined(HEDLEY_MCST_LCC_VERSION) +# define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS +# define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 +# define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT +# elif defined(HEDLEY_PGI_VERSION) +# define SIMDE_BUG_PGI_30104 +# define SIMDE_BUG_PGI_30107 +# define SIMDE_BUG_PGI_30106 +# endif +#endif + +/* GCC and Clang both have the same issue: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 + * https://bugs.llvm.org/show_bug.cgi?id=45931 + * This is just an easy way to work around it. + */ +#if \ + (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ + HEDLEY_GCC_VERSION_CHECK(4,3,0) +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ + __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ + HEDLEY_DIAGNOSTIC_POP \ + simde_bug_ignore_sign_conversion_v_; \ + })) +#else +# define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) +#endif + +/* Usually the shift count is signed (for example, NEON or SSE). + * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. + * Further info: https://github.com/simd-everywhere/simde/pull/700 + */ +#if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) +#else + #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) +#endif + +/* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_COMMON_H) */ +/* :: End simde/simde-common.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS + +#if defined(SIMDE_X86_MMX_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#elif defined(SIMDE_X86_SSE_NATIVE) + #define SIMDE_X86_MMX_USE_NATIVE_TYPE +#endif + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + #include +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #include +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + #include +#endif + +#include +#include + +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_8 int8_t i8[8]; + SIMDE_ALIGN_TO_8 int16_t i16[4]; + SIMDE_ALIGN_TO_8 int32_t i32[2]; + SIMDE_ALIGN_TO_8 int64_t i64[1]; + SIMDE_ALIGN_TO_8 uint8_t u8[8]; + SIMDE_ALIGN_TO_8 uint16_t u16[4]; + SIMDE_ALIGN_TO_8 uint32_t u32[2]; + SIMDE_ALIGN_TO_8 uint64_t u64[1]; + SIMDE_ALIGN_TO_8 simde_float32 f32[2]; + SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; + #endif + + #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + __m64 n; + #endif + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t neon_i8; + int16x4_t neon_i16; + int32x2_t neon_i32; + int64x1_t neon_i64; + uint8x8_t neon_u8; + uint16x4_t neon_u16; + uint32x2_t neon_u32; + uint64x1_t neon_u64; + float32x2_t neon_f32; + #endif + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + int8x8_t mmi_i8; + int16x4_t mmi_i16; + int32x2_t mmi_i32; + int64_t mmi_i64; + uint8x8_t mmi_u8; + uint16x4_t mmi_u16; + uint32x2_t mmi_u32; + uint64_t mmi_u64; + #endif +} simde__m64_private; + +#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) + typedef __m64 simde__m64; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + typedef int32x2_t simde__m64; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; +#else + typedef simde__m64_private simde__m64; +#endif + +#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) + #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES + typedef simde__m64 __m64; +#endif + +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); +HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde__m64_from_private(simde__m64_private v) { + simde__m64 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64_private +simde__m64_to_private(simde__m64 v) { + simde__m64_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ + SIMDE_FUNCTION_ATTRIBUTES \ + simde__##simde_type \ + simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ + simde__##simde_type##_private r_; \ + r_.isax##_##fragment = value; \ + return simde__##simde_type##_from_private(r_); \ + } \ + \ + SIMDE_FUNCTION_ATTRIBUTES \ + source_type \ + simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ + simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ + return r_.isax##_##fragment; \ + } + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) +#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) +# define _m_paddb(a, b) simde_m_paddb(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) +# define _m_paddw(a, b) simde_mm_add_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) +# define _m_paddd(a, b) simde_mm_add_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { + r_.i8[i] = INT8_MAX; + } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { + r_.i8[i] = INT8_MIN; + } else { + r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) +# define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); + if (x > UINT8_MAX) + r_.u8[i] = UINT8_MAX; + else + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) +# define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { + r_.i16[i] = INT16_MAX; + } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { + r_.i16[i] = SHRT_MIN; + } else { + r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) +# define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_adds_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const uint32_t x = a_.u16[i] + b_.u16[i]; + if (x > UINT16_MAX) + r_.u16[i] = UINT16_MAX; + else + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) +# define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_and_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_and_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 & b_.i64; + #else + r_.i64[0] = a_.i64[0] & b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pand(a, b) simde_mm_and_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_and_si64(a, b) simde_mm_and_si64(a, b) +# define _m_pand(a, b) simde_mm_and_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_andnot_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) +# define _m_pandn(a, b) simde_mm_andnot_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) +# define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) +# define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpeq_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) +# define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) +# define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) +# define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cmpgt_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) +# define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtm64_si64 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtm64_si64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s64(a_.neon_i64, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i64[0]; + #endif + #endif +} +#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) +# define _m_to_int64(a) simde_mm_cvtm64_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi32_si64 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi32_si64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t av[2] = { a, 0 }; + r_.neon_i32 = vld1_s32(av); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) +# define _m_from_int(a) simde_mm_cvtsi32_si64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtsi64_m64 (int64_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) + return _mm_cvtsi64_m64(a); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1_s64(&a); + #else + r_.i64[0] = a; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) +# define _m_from_int64(a) simde_mm_cvtsi64_m64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi64_si32 (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtsi64_si32(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_empty (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + _mm_empty(); + #else + /* noop */ + #endif +} +#define simde_m_empty() simde_mm_empty() +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_empty() simde_mm_empty() +# define _m_empty() simde_mm_empty() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_madd_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) +# define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) +# define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_mullo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); + const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); + r_.neon_u16 = t2; + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) +# define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_or_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_or_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 | b_.i64; + #else + r_.i64[0] = a_.i64[0] | b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_por(a, b) simde_mm_or_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_or_si64(a, b) simde_mm_or_si64(a, b) +# define _m_por(a, b) simde_mm_or_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] < INT8_MIN) { + r_.i8[i] = INT8_MIN; + } else if (a_.i16[i] > INT8_MAX) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < INT8_MIN) { + r_.i8[i + 4] = INT8_MIN; + } else if (b_.i16[i] > INT8_MAX) { + r_.i8[i + 4] = INT8_MAX; + } else { + r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) +# define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { + if (a_.i32[i] < SHRT_MIN) { + r_.i16[i] = SHRT_MIN; + } else if (a_.i32[i] > INT16_MAX) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { + if (b_.i32[i] < SHRT_MIN) { + r_.i16[i + 2] = SHRT_MIN; + } else if (b_.i32[i] > INT16_MAX) { + r_.i16[i + 2] = INT16_MAX; + } else { + r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) +# define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_packs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); + + /* Set elements which are < 0 to 0 */ + const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); + + /* Vector with all s16 elements set to UINT8_MAX */ + const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); + + /* Elements which are within the acceptable range */ + const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); + const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); + + /* Final values as 16-bit integers */ + const int16x8_t values = vorrq_s16(le_max, gt_max); + + r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (a_.i16[i] > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else if (a_.i16[i] < 0) { + r_.u8[i] = 0; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); + } + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] > UINT8_MAX) { + r_.u8[i + 4] = UINT8_MAX; + } else if (b_.i16[i] < 0) { + r_.u8[i + 4] = 0; + } else { + r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) +# define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i8 = vld1_s8(v); + #else + r_.i8[0] = e0; + r_.i8[1] = e1; + r_.i8[2] = e2; + r_.i8[3] = e3; + r_.i8[4] = e4; + r_.i8[5] = e5; + r_.i8[6] = e6; + r_.i8[7] = e7; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m64_private r_; + + #if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi8( + HEDLEY_STATIC_CAST(int8_t, e7), + HEDLEY_STATIC_CAST(int8_t, e6), + HEDLEY_STATIC_CAST(int8_t, e5), + HEDLEY_STATIC_CAST(int8_t, e4), + HEDLEY_STATIC_CAST(int8_t, e3), + HEDLEY_STATIC_CAST(int8_t, e2), + HEDLEY_STATIC_CAST(int8_t, e1), + HEDLEY_STATIC_CAST(int8_t, e0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u8 = vld1_u8(v); + #else + r_.u8[0] = e0; + r_.u8[1] = e1; + r_.u8[2] = e2; + r_.u8[3] = e3; + r_.u8[4] = e4; + r_.u8[5] = e5; + r_.u8[6] = e6; + r_.u8[7] = e7; + #endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_pi16(e3, e2, e1, e0); + #else + simde__m64_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; + r_.neon_i16 = vld1_s16(v); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi16( + HEDLEY_STATIC_CAST(int16_t, e3), + HEDLEY_STATIC_CAST(int16_t, e2), + HEDLEY_STATIC_CAST(int16_t, e1), + HEDLEY_STATIC_CAST(int16_t, e0) + ); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; + r_.neon_u16 = vld1_u16(v); +#else + r_.u16[0] = e0; + r_.u16[1] = e1; + r_.u16[2] = e2; + r_.u16[3] = e3; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32( + HEDLEY_STATIC_CAST(int32_t, e1), + HEDLEY_STATIC_CAST(int32_t, e0)); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; + r_.neon_u32 = vld1_u32(v); +#else + r_.u32[0] = e0; + r_.u32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set_pi32 (int32_t e1, int32_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_X86_MMX_NATIVE) + r_.n = _mm_set_pi32(e1, e0); +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; + r_.neon_i32 = vld1_s32(v); +#else + r_.i32[0] = e0; + r_.i32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_pi64 (int64_t e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; + r_.neon_i64 = vld1_s64(v); +#else + r_.i64[0] = e0; +#endif + + return simde__m64_from_private(r_); +} + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { + simde__m64_private r_; + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; + r_.neon_f32 = vld1_f32(v); +#else + r_.f32[0] = e0; + r_.f32[1] = e1; +#endif + + return simde__m64_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi8 (int8_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi8(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i8 = vmov_n_s8(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi8(a, a, a, a, a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi8(a) simde_mm_set1_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi16 (int16_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi16(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i16 = vmov_n_s16(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi16(a, a, a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi16(a) simde_mm_set1_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_set1_pi32 (int32_t a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_pi32(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_i32 = vmov_n_s32(a); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(a, a); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_set1_pi32(a) simde_mm_set1_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi16(e3, e2, e1, e0); + #else + return simde_mm_set_pi16(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setr_pi32 (int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_pi32(e1, e0); + #else + return simde_mm_set_pi32(e0, e1); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_setzero_si64 (void) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_setzero_si64(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + simde__m64_private r_; + r_.neon_u32 = vmov_n_u32(0); + return simde__m64_from_private(r_); + #else + return simde_mm_set_pi32(0, 0); + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_si64() simde_mm_setzero_si64() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_load_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_loadu_si64 (const void* mem_addr) { + simde__m64 r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { + simde_memcpy(mem_addr, &value, sizeof(value)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_x_mm_setone_si64 (void) { + return simde_mm_set1_pi32(~INT32_C(0)); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) +# define _m_psllw(a, count) simde_mm_sll_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count_.u64[0]; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) +# define _m_pslld(a, count) simde_mm_sll_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psllh_s(a_.mmi_i16, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) +# define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_slli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] << count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) +# define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_slli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_slli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); + #else + r_.u64[0] = a_.u64[0] << count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) +# define _m_psllqi(a, count) simde_mm_slli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sll_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 << count_.i64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] << count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) +# define _m_psllq(a, count) simde_mm_sll_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) + return simde_mm_setzero_si64(); + + r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { + r_.u16[i] = a_.u16[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) +# define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count_.u64[0]; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { + r_.u32[i] = a_.u32[i] >> count_.u64[0]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) +# define _m_psrld(a, count) simde_mm_srl_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) +# define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> count; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> count; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) +# define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srli_si64 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srli_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> count; + #else + r_.u64[0] = a_.u64[0] >> count; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) +# define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_srl_si64(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = a_.u64 >> count_.u64; + #else + if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { + simde_memset(&r_, 0, sizeof(r_)); + return simde__m64_from_private(r_); + } + + r_.u64[0] = a_.u64[0] >> count_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) +# define _m_psrlq(a, count) simde_mm_srl_si64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi16 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psrah_s(a_.mmi_i16, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) +# define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_srai_pi32 (simde__m64 a, int count) { + #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_srai_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> (count & 0xff); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psraw_s(a_.mmi_i32, count); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> (count & 0xff); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) +# define _m_psradi(a, count) simde_mm_srai_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi16(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) +# define _m_psraw(a, count) simde_mm_sra_pi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sra_pi32(a, count); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private count_ = simde__m64_to_private(count); + const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> cnt; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) +# define _m_psrad(a, count) simde_mm_sra_pi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) +# define _m_psubb(a, b) simde_mm_sub_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) +# define _m_psubw(a, b) simde_mm_sub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) +# define _m_psubd(a, b) simde_mm_sub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { + r_.i8[i] = INT8_MIN; + } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { + r_.i8[i] = INT8_MAX; + } else { + r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) +# define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + const int32_t x = a_.u8[i] - b_.u8[i]; + if (x < 0) { + r_.u8[i] = 0; + } else if (x > UINT8_MAX) { + r_.u8[i] = UINT8_MAX; + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) +# define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { + r_.i16[i] = SHRT_MIN; + } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { + r_.i16[i] = INT16_MAX; + } else { + r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) +# define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_subs_pu16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + const int x = a_.u16[i] - b_.u16[i]; + if (x < 0) { + r_.u16[i] = 0; + } else if (x > UINT16_MAX) { + r_.u16[i] = UINT16_MAX; + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); + } + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) +# define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); + #else + r_.i8[0] = a_.i8[4]; + r_.i8[1] = b_.i8[4]; + r_.i8[2] = a_.i8[5]; + r_.i8[3] = b_.i8[5]; + r_.i8[4] = a_.i8[6]; + r_.i8[5] = b_.i8[6]; + r_.i8[6] = a_.i8[7]; + r_.i8[7] = b_.i8[7]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) +# define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); + #else + r_.i16[0] = a_.i16[2]; + r_.i16[1] = b_.i16[2]; + r_.i16[2] = a_.i16[3]; + r_.i16[3] = b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) +# define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpackhi_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[1]; + r_.i32[1] = b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) +# define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi8(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); + #else + r_.i8[0] = a_.i8[0]; + r_.i8[1] = b_.i8[0]; + r_.i8[2] = a_.i8[1]; + r_.i8[3] = b_.i8[1]; + r_.i8[4] = a_.i8[2]; + r_.i8[5] = b_.i8[2]; + r_.i8[6] = a_.i8[3]; + r_.i8[7] = b_.i8[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) +# define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi16(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); + #else + r_.i16[0] = a_.i16[0]; + r_.i16[1] = b_.i16[0]; + r_.i16[2] = a_.i16[1]; + r_.i16[3] = b_.i16[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) +# define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_unpacklo_pi32(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) + r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); + #else + r_.i32[0] = a_.i32[0]; + r_.i32[1] = b_.i32[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) +# define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _mm_xor_si64(a, b); + #else + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + r_.u64[0] = a_.u64[0] ^ b_.u64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) +# define _m_pxor(a, b) simde_mm_xor_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_m_to_int (simde__m64 a) { + #if defined(SIMDE_X86_MMX_NATIVE) + return _m_to_int(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) + #pragma clang diagnostic ignored "-Wvector-conversion" + #endif + return vget_lane_s32(a_.neon_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) +# define _m_to_int(a) simde_m_to_int(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_MMX_H) */ +/* :: End simde/x86/mmx.h :: */ + +#if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) + #define NOMINMAX + #include +#endif + +#if defined(__ARM_ACLE) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_ALIGN_TO_16 __m128 n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v16i8 lsx_i8; + v8i16 lsx_i16; + v4i32 lsx_i32; + v2i64 lsx_i64; + v16u8 lsx_u8; + v8u16 lsx_u16; + v4u32 lsx_u32; + v2u64 lsx_u64; + v4f32 lsx_f32; + v2f64 lsx_f64; + #endif +} simde__m128_private; + +#if defined(SIMDE_X86_SSE_NATIVE) + typedef __m128 simde__m128; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef float32x4_t simde__m128; +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; +#elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + typedef v4f32 simde__m128; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128_private simde__m128; +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + typedef simde__m128 __m128; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde__m128_from_private(simde__m128_private v) { + simde__m128 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128_private +simde__m128_to_private(simde__m128 v) { + simde__m128_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(float) + simde__m128_to_altivec_f32(simde__m128 value) { + simde__m128_private r_ = simde__m128_to_private(value); + return r_.altivec_f32; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128 + simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { + simde__m128_private r_; + r_.altivec_f32 = value; + return simde__m128_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); +#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ + +#if defined(SIMDE_LOONGARCH_LSX_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) +#endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ + +enum { + #if defined(SIMDE_X86_SSE_NATIVE) + SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, + SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, + SIMDE_MM_ROUND_UP = _MM_ROUND_UP, + SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO + #else + SIMDE_MM_ROUND_NEAREST = 0x0000, + SIMDE_MM_ROUND_DOWN = 0x2000, + SIMDE_MM_ROUND_UP = 0x4000, + SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 + #endif +}; + +#if defined(_MM_FROUND_TO_NEAREST_INT) +# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT +# define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF +# define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF +# define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO +# define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION + +# define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC +# define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC +#else +# define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 +# define SIMDE_MM_FROUND_TO_NEG_INF 0x01 +# define SIMDE_MM_FROUND_TO_POS_INF 0x02 +# define SIMDE_MM_FROUND_TO_ZERO 0x03 +# define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 + +# define SIMDE_MM_FROUND_RAISE_EXC 0x00 +# define SIMDE_MM_FROUND_NO_EXC 0x08 +#endif + +#define SIMDE_MM_FROUND_NINT \ + (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_FLOOR \ + (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_CEIL \ + (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_TRUNC \ + (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_RINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) +#define SIMDE_MM_FROUND_NEARBYINT \ + (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) + +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) +# define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT +# define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF +# define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF +# define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO +# define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION +# define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC +# define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT +# define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR +# define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL +# define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC +# define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT +# define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT +#endif + +#if defined(_MM_EXCEPT_INVALID) +# define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID +#else +# define SIMDE_MM_EXCEPT_INVALID (0x0001) +#endif +#if defined(_MM_EXCEPT_DENORM) +# define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM +#else +# define SIMDE_MM_EXCEPT_DENORM (0x0002) +#endif +#if defined(_MM_EXCEPT_DIV_ZERO) +# define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO +#else +# define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) +#endif +#if defined(_MM_EXCEPT_OVERFLOW) +# define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW +#else +# define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) +#endif +#if defined(_MM_EXCEPT_UNDERFLOW) +# define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW +#else +# define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) +#endif +#if defined(_MM_EXCEPT_INEXACT) +# define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT +#else +# define SIMDE_MM_EXCEPT_INEXACT (0x0020) +#endif +#if defined(_MM_EXCEPT_MASK) +# define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK +#else +# define SIMDE_MM_EXCEPT_MASK \ + (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ + SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ + SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID + #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM + #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO + #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW + #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW + #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT + #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK +#endif + +#if defined(_MM_MASK_INVALID) +# define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID +#else +# define SIMDE_MM_MASK_INVALID (0x0080) +#endif +#if defined(_MM_MASK_DENORM) +# define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM +#else +# define SIMDE_MM_MASK_DENORM (0x0100) +#endif +#if defined(_MM_MASK_DIV_ZERO) +# define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO +#else +# define SIMDE_MM_MASK_DIV_ZERO (0x0200) +#endif +#if defined(_MM_MASK_OVERFLOW) +# define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW +#else +# define SIMDE_MM_MASK_OVERFLOW (0x0400) +#endif +#if defined(_MM_MASK_UNDERFLOW) +# define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW +#else +# define SIMDE_MM_MASK_UNDERFLOW (0x0800) +#endif +#if defined(_MM_MASK_INEXACT) +# define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT +#else +# define SIMDE_MM_MASK_INEXACT (0x1000) +#endif +#if defined(_MM_MASK_MASK) +# define SIMDE_MM_MASK_MASK _MM_MASK_MASK +#else +# define SIMDE_MM_MASK_MASK \ + (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ + SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ + SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID + #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM + #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO + #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW + #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW + #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT + #define _MM_MASK_MASK SIMDE_MM_MASK_MASK +#endif + +#if defined(_MM_FLUSH_ZERO_MASK) +# define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK +#else +# define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_ON) +# define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON +#else +# define SIMDE_MM_FLUSH_ZERO_ON (0x8000) +#endif +#if defined(_MM_FLUSH_ZERO_OFF) +# define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF +#else +# define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK + #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF +#endif + +SIMDE_FUNCTION_ATTRIBUTES +unsigned int +SIMDE_MM_GET_ROUNDING_MODE(void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _MM_GET_ROUNDING_MODE(); + #elif defined(SIMDE_HAVE_FENV_H) + unsigned int vfe_mode; + + switch (fegetround()) { + #if defined(FE_TONEAREST) + case FE_TONEAREST: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case FE_TOWARDZERO: + vfe_mode = SIMDE_MM_ROUND_DOWN; + break; + #endif + + #if defined(FE_UPWARD) + case FE_UPWARD: + vfe_mode = SIMDE_MM_ROUND_UP; + break; + #endif + + #if defined(FE_DOWNWARD) + case FE_DOWNWARD: + vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; + break; + #endif + + default: + vfe_mode = SIMDE_MM_ROUND_NEAREST; + break; + } + + return vfe_mode; + #else + return SIMDE_MM_ROUND_NEAREST; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_ROUNDING_MODE(a); + #elif defined(SIMDE_HAVE_FENV_H) + int fe_mode = FE_TONEAREST; + + switch (a) { + #if defined(FE_TONEAREST) + case SIMDE_MM_ROUND_NEAREST: + fe_mode = FE_TONEAREST; + break; + #endif + + #if defined(FE_TOWARDZERO) + case SIMDE_MM_ROUND_TOWARD_ZERO: + fe_mode = FE_TOWARDZERO; + break; + #endif + + #if defined(FE_DOWNWARD) + case SIMDE_MM_ROUND_DOWN: + fe_mode = FE_DOWNWARD; + break; + #endif + + #if defined(FE_UPWARD) + case SIMDE_MM_ROUND_UP: + fe_mode = FE_UPWARD; + break; + #endif + + default: + return; + } + + fesetround(fe_mode); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; + #else + return SIMDE_MM_FLUSH_ZERO_OFF; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _MM_SET_FLUSH_ZERO_MODE(a); + #else + (void) a; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_getcsr (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_getcsr(); + #else + return SIMDE_MM_GET_ROUNDING_MODE(); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_getcsr() simde_mm_getcsr() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_setcsr (uint32_t a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_setcsr(a); + #else + SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_setcsr(a) simde_mm_setcsr(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + (void) lax_rounding; + + /* For architectures which lack a current direction SIMD instruction. + * + * Note that NEON actually has a current rounding mode instruction, + * but in ARMv8+ the rounding mode is ignored and nearest is always + * used, so we treat ARMv7 as having a rounding mode but ARMv8 as + * not. */ + #if \ + defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ + defined(SIMDE_ARM_NEON_A32V8) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_f32 = vrndiq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndnq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); + #elif defined(simde_math_roundevenf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundevenf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndmq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); + #elif defined(simde_math_floorf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndpq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); + #elif defined(simde_math_ceilf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) + r_.neon_f32 = vrndq_f32(a_.neon_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); + #elif defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) +#else + #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps(e3, e2, e1, e0); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; + r_.neon_f32 = vld1q_f32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ps1 (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ps1(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + (void) a; + return vec_splats(a); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + return (simde__m128)__lsx_vldrepl_w(&a, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_splat(a); + #else + return simde_mm_set_ps(a, a, a, a); + #endif +} +#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ps1(a) simde_mm_set_ps1(a) +# define _mm_set1_ps(a) simde_mm_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_move_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_move_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; + r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); + #else + r_.f32[0] = b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_broadcastlow_ps(simde__m128 a) { + /* This function broadcasts the first element in the inpu vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_ss functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_shuffle_ps(a, a, 0); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_add_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_add_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); + float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); + // the upper values in the result must be the remnants of . + r_.neon_f32 = vaddq_f32(a_.neon_f32, value); + #else + r_.f32[0] = a_.f32[0] + b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_and_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_and_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 & b_.i32; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_andnot_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32 & b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_xor_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_xor_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_or_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_or_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_not_ps(simde__m128 a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* Note: we use ints instead of floats because we don't want cmpeq + * to return false for (NaN, NaN) */ + __m128i ai = _mm_castps_si128(a); + return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b), + mask_ = simde__m128_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint32_t wa SIMDE_VECTOR(16); + uint32_t wb SIMDE_VECTOR(16); + uint32_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) +# define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_avg_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) + uint16_t wa SIMDE_VECTOR(16); + uint16_t wb SIMDE_VECTOR(16); + uint16_t wr SIMDE_VECTOR(16); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) +# define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_abs_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + simde_float32 mask_; + uint32_t u32_ = UINT32_C(0x7FFFFFFF); + simde_memcpy(&mask_, &u32_, sizeof(u32_)); + return _mm_and_ps(_mm_set1_ps(mask_), a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vabsq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_abs(a_.altivec_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpeq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpge_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpge_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpgt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpgt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmple_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmplt_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpneq_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmplt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmple_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpgt_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ps(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { + return simde_mm_cmpge_ss(a, b); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vandq_u32(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpunord_ps(a, b); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); + r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); + r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(simde_math_isnanf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) + return _mm_cmpunord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] == b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] >= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] > b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] <= b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] < b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_comineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #else + return a_.f32[0] != b_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { + simde__m128_private + r_, + dest_ = simde__m128_to_private(dest), + src_ = simde__m128_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); + r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t sign_pos = wasm_f32x4_splat(-0.0f); + r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); + #else + r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); + r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; + r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); + #elif defined(SIMDE_IEEE754_STORAGE) + (void) src_; + (void) dest_; + simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); + r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { + return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_pi2ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) + a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); + #else + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_si2ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + r_.i32[1] = a_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvt_ss2si(a); + #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); + #else + simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && + (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.i16[i]; + r_.f32[i] = v; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m64_private b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); + r_.m64_private[1] = a_.m64_private[1]; + #else + r_.f32[0] = (simde_float32) b_.i32[0]; + r_.f32[1] = (simde_float32) b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32x2_ps(a, b); + #else + simde__m128_private r_; + simde__m64_private + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); + SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); + #else + r_.f32[0] = (simde_float32) a_.i32[0]; + r_.f32[1] = (simde_float32) a_.i32[1]; + r_.f32[2] = (simde_float32) b_.i32[0]; + r_.f32[3] = (simde_float32) b_.i32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpi8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); + r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); + r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi16 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi16(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi32(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtps_pi8 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtps_pi8(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) + /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to + * i16, combine with an all-zero vector of i16 (which will become the upper + * half), narrow to i8. */ + float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); + float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); + float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); + r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) + r_.i8[i] = INT8_MAX; + else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) + r_.i8[i] = INT8_MIN; + else + r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); + } + /* Note: the upper half is undefined */ + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu16_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu16_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.u16[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpu8_ps (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpu8_ps(a); + #else + simde__m128_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtsi32_ss(a, b); + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_ss(a, b); + #else + return _mm_cvtsi64x_ss(a, b); + #endif + #else + simde__m128_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); + #else + r_ = a_; + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm_cvtss_f32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtss_f32(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_f32(a_.neon_f32, 0); + #else + return a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtss_si32 (simde__m128 a) { + return simde_mm_cvt_ss2si(a); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtss_si64(a); + #else + return _mm_cvtss_si64x(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); + #else + return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtt_ps2pi (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtt_ps2pi(a); + #else + simde__m64_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) +# define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtt_ss2si (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cvtt_ss2si(a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + simde_float32 v = a_.f32[0]; + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #else + return SIMDE_CONVERT_FTOI(int32_t, v); + #endif + #endif + #endif +} +#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) +# define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttss_si64 (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) + #if defined(__PGI) + return _mm_cvttss_si64x(a); + #else + return _mm_cvttss_si64(a); + #endif + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); + #else + return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) +# define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_cmpord_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(simde_math_isnanf) + r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.u32[i] = a_.u32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); + float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); + r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) + r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_div_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_div_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = a_.f32[0] / b_.f32[0]; + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_mm_extract_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private a_ = simde__m64_to_private(a); + return a_.i16[imm8]; +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) +#endif +#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) +# define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m64_private + a_ = simde__m64_to_private(a); + + a_.i16[imm8] = i; + + return simde__m64_from_private(a_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) + #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) +#endif +#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +# define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { +#if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps(mem_addr); +#else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); + #endif + + return simde__m128_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load1_ps (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ps1(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_dup_f32(mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); + #else + r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) +# define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_load_ss (simde_float32 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_load_ss(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); + #else + r_.f32[0] = *mem_addr; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); + #else + simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +/* The SSE documentation says that there are no alignment requirements + for mem_addr. Unfortunately they used the __m64 type for the argument + which is supposed to be 8-byte aligned, so some compilers (like clang + with -Wcast-align) will generate a warning if you try to cast, say, + a simde_float32* to a simde__m64* for this function. + + I think the choice of argument type is unfortunate, but I do think we + need to stick to it here. If there is demand I can always add something + like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcombine_f32(vld1_f32( + HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); + #else + simde__m64_private b_; + simde_memcpy(&b_, mem_addr, sizeof(b_)); + r_.i32[0] = b_.i32[0]; + r_.i32[1] = b_.i32[1]; + r_.i32[2] = a_.i32[2]; + r_.i32[3] = a_.i32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #if HEDLEY_HAS_WARNING("-Wold-style-cast") + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) + #else + #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadr_ps(mem_addr); + #else + simde__m128_private + r_, + v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrev64q_f32(v_.neon_f32); + r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_reve(v_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); + #else + r_.f32[0] = v_.f32[3]; + r_.f32[1] = v_.f32[2]; + r_.f32[2] = v_.f32[1]; + r_.f32[3] = v_.f32[0]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_loadu_ps(mem_addr); + #else + simde__m128_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) + r_.altivec_f32 = vec_vsx_ld(0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vld(mem_addr, 0); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m64_private + a_ = simde__m64_to_private(a), + mask_ = simde__m64_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) + if (mask_.i8[i] < 0) + mem_addr[i] = a_.i8[i]; + #endif +} +#define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +# define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) +# define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) + r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) + r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); + #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) + r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_max_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) +# define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_max_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_max_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) +# define _m_pminsw(a, b) simde_mm_min_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + #if defined(SIMDE_FAST_NANS) + r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); + #else + r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); + #endif + #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); + r_.f32 = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.f32), + ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | + (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) + ) + ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_min_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) +# define _m_pminub(a, b) simde_mm_min_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_min_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_min_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #else + r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movehl_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a32 = vget_high_f32(a_.neon_f32); + float32x2_t b32 = vget_high_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(b32, a32); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergel(b_.altivec_i64, a_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); + #else + r_.f32[0] = b_.f32[2]; + r_.f32[1] = b_.f32[3]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movelh_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a10 = vget_low_f32(a_.neon_f32); + float32x2_t b10 = vget_low_f32(b_.neon_f32); + r_.neon_f32 = vcombine_f32(a10, b10); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), + vec_mergeh(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = b_.f32[0]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movemask_pi8(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + int r = 0; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint8x8_t input = a_.neon_u8; + const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; + const uint8x8_t mask_and = vdup_n_u8(0x80); + const int8x8_t mask_shift = vld1_s8(xr); + const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); + uint8x8_t lo = mask_result; + r = vaddv_u8(lo); + #else + const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < nmemb ; i++) { + r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); + } + #endif + + return r; + #endif +} +#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) +# define _m_pmovmskb(a) simde_mm_movemask_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_movemask_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_movemask_ps(a); + #else + int r = 0; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // Shift out everything but the sign bits with a 32-bit unsigned shift right. + uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); + // Merge the two pairs together with a 64-bit unsigned shift right + add. + uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); + // Extract the result. + return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + static const uint32_t md[4] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3 + }; + + uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); + uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); + #else + uint64x2_t t64 = vpaddlq_u32(masked); + return + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); + r = __lsx_vpickve2gr_wu(t64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { + r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mul_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_mul_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] * b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhi_pu16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); + const uint32x4_t t2 = vshrq_n_u32(t1, 16); + const uint16x4_t t3 = vmovn_u32(t2); + r_.neon_u16 = t3; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) +# define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) + #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) + #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) + #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) + #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) + #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) + #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) + #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) + #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) +#else + #define SIMDE_MM_HINT_NTA 0 + #define SIMDE_MM_HINT_T0 1 + #define SIMDE_MM_HINT_T1 2 + #define SIMDE_MM_HINT_T2 3 + #define SIMDE_MM_HINT_ENTA 4 + #define SIMDE_MM_HINT_ET0 5 + #define SIMDE_MM_HINT_ET1 6 + #define SIMDE_MM_HINT_ET2 7 +#endif + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") + _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") + #endif + #undef _MM_HINT_NTA + #define _MM_HINT_NTA SIMDE_MM_HINT_NTA + #undef _MM_HINT_T0 + #define _MM_HINT_T0 SIMDE_MM_HINT_T0 + #undef _MM_HINT_T1 + #define _MM_HINT_T1 SIMDE_MM_HINT_T1 + #undef _MM_HINT_T2 + #define _MM_HINT_T2 SIMDE_MM_HINT_T2 + #undef _MM_HINT_ENTA + #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA + #undef _MM_HINT_ET0 + #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 + #undef _MM_HINT_ET1 + #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_prefetch (const void* p, int i) { + #if \ + HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ + HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __builtin_prefetch(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __builtin_prefetch(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __builtin_prefetch(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __builtin_prefetch(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __builtin_prefetch(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __builtin_prefetch(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __builtin_prefetch(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __builtin_prefetch(p, 0, 1); + break; + } + #elif defined(__ARM_ACLE) + #if (__ARM_ACLE >= 101) + switch(i) { + case SIMDE_MM_HINT_NTA: + __pldx(0, 0, 1, p); + break; + case SIMDE_MM_HINT_T0: + __pldx(0, 0, 0, p); + break; + case SIMDE_MM_HINT_T1: + __pldx(0, 1, 0, p); + break; + case SIMDE_MM_HINT_T2: + __pldx(0, 2, 0, p); + break; + case SIMDE_MM_HINT_ENTA: + __pldx(1, 0, 1, p); + break; + case SIMDE_MM_HINT_ET0: + __pldx(1, 0, 0, p); + break; + case SIMDE_MM_HINT_ET1: + __pldx(1, 1, 0, p); + break; + case SIMDE_MM_HINT_ET2: + __pldx(1, 2, 0, p); + break; + } + #else + (void) i; + __pld(p) + #endif + #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) + (void) i; + #pragma mem prefetch p + #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) + switch (i) { + case SIMDE_MM_HINT_NTA: + #pragma _CRI prefetch (nt) p + break; + case SIMDE_MM_HINT_T0: + case SIMDE_MM_HINT_T1: + case SIMDE_MM_HINT_T2: + #pragma _CRI prefetch p + break; + case SIMDE_MM_HINT_ENTA: + #pragma _CRI prefetch (write, nt) p + break; + case SIMDE_MM_HINT_ET0: + case SIMDE_MM_HINT_ET1: + case SIMDE_MM_HINT_ET2: + #pragma _CRI prefetch (write) p + break; + } + #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) + switch(i) { + case SIMDE_MM_HINT_NTA: + __prefetch_by_load(p, 0, 0); + break; + case SIMDE_MM_HINT_T0: + __prefetch_by_load(p, 0, 3); + break; + case SIMDE_MM_HINT_T1: + __prefetch_by_load(p, 0, 2); + break; + case SIMDE_MM_HINT_T2: + __prefetch_by_load(p, 0, 1); + break; + case SIMDE_MM_HINT_ENTA: + __prefetch_by_load(p, 1, 0); + break; + case SIMDE_MM_HINT_ET0: + __prefetch_by_load(p, 1, 3); + break; + case SIMDE_MM_HINT_ET1: + __prefetch_by_load(p, 1, 2); + break; + case SIMDE_MM_HINT_ET2: + __prefetch_by_load(p, 0, 1); + break; + } + #elif HEDLEY_MSVC_VERSION + (void) i; + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ + #define simde_mm_prefetch(p, i) \ + (__extension__({ \ + HEDLEY_DIAGNOSTIC_PUSH \ + HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + _mm_prefetch((p), (i)); \ + HEDLEY_DIAGNOSTIC_POP \ + })) + #else + #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) + #endif +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) + #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_negate_ps(simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vnegq_f32(a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_f32 = vec_neg(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; + r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t recip = vrecpeq_f32(a_.neon_f32); + + #if SIMDE_ACCURACY_PREFERENCE > 0 + for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { + recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); + } + #endif + + r_.neon_f32 = recip; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_re(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.f32 = 1.0f / a_.f32; + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + int32_t ix; + simde_float32 fx = a_.f32[i]; + simde_memcpy(&ix, &fx, sizeof(ix)); + int32_t x = INT32_C(0x7EF311C3) - ix; + simde_float32 temp; + simde_memcpy(&temp, &x, sizeof(temp)); + r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / a_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rcp_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rcp_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + r_.f32[0] = 1.0f / a_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); + #elif defined(SIMDE_IEEE754_STORAGE) + /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf + Pages 100 - 103 */ + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); + #else + simde_float32 x = a_.f32[i]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[i] = x; + #endif + } + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_rsqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_rsqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); + #elif defined(SIMDE_IEEE754_STORAGE) + { + #if SIMDE_ACCURACY_PREFERENCE <= 0 + r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); + #else + simde_float32 x = a_.f32[0]; + simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; + int32_t ix; + + simde_memcpy(&ix, &x, sizeof(ix)); + + #if SIMDE_ACCURACY_PREFERENCE == 1 + ix = INT32_C(0x5F375A82) - (ix >> 1); + #else + ix = INT32_C(0x5F37599E) - (ix >> 1); + #endif + + simde_memcpy(&x, &ix, sizeof(x)); + + #if SIMDE_ACCURACY_PREFERENCE >= 2 + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + #endif + x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); + + r_.f32[0] = x; + #endif + } + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #elif defined(simde_math_sqrtf) + r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sad_pu8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); + r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); + #else + uint16_t sum = 0; + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); + } + + r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + #endif + + return simde__m64_from_private(r_); + #endif +} +#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) +# define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_set_ss (simde_float32 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_set_ss(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); + #else + return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_set_ss(a) simde_mm_set_ss(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setr_ps(e3, e2, e1, e0); + #else + return simde_mm_set_ps(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_setzero_ps (void) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_setzero_ps(); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return vec_splats(SIMDE_FLOAT32_C(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); + #else + simde__m128 r; + simde_memset(&r, 0, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_setzero_ps() simde_mm_setzero_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_undefined_ps (void) { + simde__m128_private r_; + + #if defined(SIMDE_HAVE_UNDEFINED128) + r_.n = _mm_undefined_ps(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_undefined_ps() simde_mm_undefined_ps() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_setone_ps (void) { + simde__m128 t = simde_mm_setzero_ps(); + return simde_mm_cmpeq_ps(t, t); +} + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_sfence (void) { + /* TODO: Use Hedley. */ + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_sfence(); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) + #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) + __atomic_thread_fence(__ATOMIC_SEQ_CST); + #else + atomic_thread_fence(memory_order_seq_cst); + #endif + #elif defined(_MSC_VER) + MemoryBarrier(); + #elif HEDLEY_HAS_EXTENSION(c_atomic) + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) + __sync_synchronize(); + #elif defined(_OPENMP) + #pragma omp critical(simde_mm_sfence_) + { } + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sfence() simde_mm_sfence() +#endif + +#define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ + const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ + simde__m64_from_private((simde__m64_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 8, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3)) }); })) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m64_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { + r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; + } + +HEDLEY_DIAGNOSTIC_PUSH +#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") +# pragma clang diagnostic ignored "-Wconditional-uninitialized" +#endif + return simde__m64_from_private(r_); +HEDLEY_DIAGNOSTIC_POP +} +#endif +#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) +# define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) +#else +# define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) +# define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) +# define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ + wasm_i32x4_shuffle( \ + simde__m128_to_private(a).wasm_v128, \ + simde__m128_to_private(b).wasm_v128, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_ps(a, b, imm8) \ + (__extension__({ \ + float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ + float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ + float32x4_t simde_mm_shuffle_ps_r_; \ + \ + simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ + simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ + vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ + simde__m128_from_private((simde__m128_private) { .f32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + simde__m128_to_private(a).f32, \ + simde__m128_to_private(b).f32, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsqrtq_f32(a_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t est = vrsqrteq_f32(a_.neon_f32); + for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { + est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); + } + r_.neon_f32 = vmulq_f32(a_.neon_f32, est); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) + r_.altivec_f32 = vec_sqrt(a_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sqrt_ss (simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sqrt_ss(a); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32_t value = + vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); + r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); + #elif defined(simde_math_sqrtf) + r_.f32[0] = simde_math_sqrtf(a_.f32[0]); + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #else + simde_memcpy(mem_addr, &a_, sizeof(a)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { + simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); + + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ps1(mem_addr_, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + simde__m128_private tmp_; + tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + simde_mm_store_ps(mem_addr_, tmp_.f32); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr_[i] = a_.f32[0]; + } + #endif + #endif +} +#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +# define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_store_ss(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_lane_f32(mem_addr, a_.neon_f32, 0); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + *mem_addr = a_.f32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128_private a_ = simde__m128_to_private(a); + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest_->neon_f32 = vget_low_f32(a_.neon_f32); + #else + dest_->f32[0] = a_.f32[0]; + dest_->f32[1] = a_.f32[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storer_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t tmp = vrev64q_f32(a_.neon_f32); + vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); + simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE_ALIGNED(mem_addr:16) + for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { + mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_storeu_ps(mem_addr, a); + #else + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_f32(mem_addr, a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + vec_vsx_st(a_.altivec_f32, 0, mem_addr); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + __lsx_vst(a_.lsx_f32, mem_addr, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store(mem_addr, a_.wasm_v128); + #else + simde_memcpy(mem_addr, &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_sub_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_sub_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + r_.f32[0] = a_.f32[0] - b_.f32[0]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[3]; + + return simde__m128_from_private(r_); + #endif +} + +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomieq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] == b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] == b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomige_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] >= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] >= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomigt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] > b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] > b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomile_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] <= b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] <= b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomilt_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); + uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); + r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] < b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] < b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_ucomineq_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); + uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); + uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); + uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); + r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f32[0] != b_.f32[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f32[0] != b_.f32[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) +#endif + +#if defined(SIMDE_X86_SSE_NATIVE) +# if defined(__has_builtin) +# if __has_builtin(__builtin_ia32_undef128) +# define SIMDE_HAVE_UNDEFINED128 +# endif +# elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) +# define SIMDE_HAVE_UNDEFINED128 +# endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpackhi_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_high_f32(a_.neon_f32); + float32x2_t b1 = vget_high_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE_NATIVE) + return _mm_unpacklo_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); + #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) + r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x2_t a1 = vget_low_f32(a_.neon_f32); + float32x2_t b1 = vget_low_f32(b_.neon_f32); + float32x2x2_t result = vzip_f32(a1, b1); + r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { + #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); + #else + simde__m64_private* + dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); + #else + dest->i64[0] = a_.i64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { + #if defined(SIMDE_X86_SSE_NATIVE) + _mm_stream_ps(mem_addr, a); + #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + simde__m128_private a_ = simde__m128_to_private(a); + __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); + #else + simde_mm_store_ps(mem_addr, a); + #endif +} +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ + float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ + row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ + row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ + vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ + } while (0) +#else + #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ + do { \ + simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ + SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ + SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ + row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ + row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ + row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ + row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ + } while (0) +#endif +#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) +# define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE_H) */ +/* :: End simde/x86/sse.h :: */ +#if !defined(SIMDE_X86_AVX_H) +#define SIMDE_X86_AVX_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017 Evan Nemerson + * 2020 Hidayat Khan + */ + +#if !defined(SIMDE_X86_SSE4_2_H) +#define SIMDE_X86_SSE4_2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse4.1.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +#if !defined(SIMDE_X86_SSE4_1_H) +#define SIMDE_X86_SSE4_1_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/ssse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSSE3_H) +#define SIMDE_X86_SSSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse3.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + */ + +#if !defined(SIMDE_X86_SSE3_H) +#define SIMDE_X86_SSE3_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ +/* :: Begin simde/x86/sse2.h :: */ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2017-2020 Evan Nemerson + * 2015-2017 John W. Ratcliff + * 2015 Brandon Rowlett + * 2015 Ken Fast + * 2017 Hasindu Gamaarachchi + * 2018 Jeff Daily + */ + +#if !defined(SIMDE_X86_SSE2_H) +#define SIMDE_X86_SSE2_H + +/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ +/* e134cc793faf11b84d22d5893637efa919052688 */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_16 simde_int128 i128[1]; + SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; + #endif + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128i n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + #if defined(__ARM_FP16_FORMAT_IEEE) + SIMDE_ALIGN_TO_16 float16x8_t neon_f16; + #endif + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128i_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_16 int8_t i8[16]; + SIMDE_ALIGN_TO_16 int16_t i16[8]; + SIMDE_ALIGN_TO_16 int32_t i32[4]; + SIMDE_ALIGN_TO_16 int64_t i64[2]; + SIMDE_ALIGN_TO_16 uint8_t u8[16]; + SIMDE_ALIGN_TO_16 uint16_t u16[8]; + SIMDE_ALIGN_TO_16 uint32_t u32[4]; + SIMDE_ALIGN_TO_16 uint64_t u64[2]; + SIMDE_ALIGN_TO_16 simde_float32 f32[4]; + SIMDE_ALIGN_TO_16 simde_float64 f64[2]; + SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; + SIMDE_ALIGN_TO_16 simde__m64 m64[2]; + + #if defined(SIMDE_X86_SSE2_NATIVE) + SIMDE_ALIGN_TO_16 __m128d n; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_TO_16 int8x16_t neon_i8; + SIMDE_ALIGN_TO_16 int16x8_t neon_i16; + SIMDE_ALIGN_TO_16 int32x4_t neon_i32; + SIMDE_ALIGN_TO_16 int64x2_t neon_i64; + SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; + SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; + SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; + SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; + SIMDE_ALIGN_TO_16 float32x4_t neon_f32; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 float64x2_t neon_f64; + #endif + #elif defined(SIMDE_MIPS_MSA_NATIVE) + v16i8 msa_i8; + v8i16 msa_i16; + v4i32 msa_i32; + v2i64 msa_i64; + v16u8 msa_u8; + v8u16 msa_u16; + v4u32 msa_u32; + v2u64 msa_u64; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_ALIGN_TO_16 v128_t wasm_v128; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; + #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; + #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; + #else + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; + #endif + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; + #endif + #endif +} simde__m128d_private; + +#if defined(SIMDE_X86_SSE2_NATIVE) + typedef __m128i simde__m128i; + typedef __m128d simde__m128d; +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + typedef int64x2_t simde__m128i; +# if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + typedef float64x2_t simde__m128d; +# elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +# else + typedef simde__m128d_private simde__m128d; +# endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + typedef v128_t simde__m128i; + typedef v128_t simde__m128d; +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; + #else + typedef simde__m128d_private simde__m128d; + #endif +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; +#else + typedef simde__m128i_private simde__m128i; + typedef simde__m128d_private simde__m128d; +#endif + +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + typedef simde__m128i __m128i; + typedef simde__m128d __m128d; +#endif + +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); +HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde__m128i_from_private(simde__m128i_private v) { + simde__m128i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i_private +simde__m128i_to_private(simde__m128i v) { + simde__m128i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde__m128d_from_private(simde__m128d_private v) { + simde__m128d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d_private +simde__m128d_to_private(simde__m128d v) { + simde__m128d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #endif +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) + #endif +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) + #if defined(SIMDE_BUG_GCC_95782) + SIMDE_FUNCTION_ATTRIBUTES + SIMDE_POWER_ALTIVEC_VECTOR(double) + simde__m128d_to_altivec_f64(simde__m128d value) { + simde__m128d_private r_ = simde__m128d_to_private(value); + return r_.altivec_f64; + } + + SIMDE_FUNCTION_ATTRIBUTES + simde__m128d + simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { + simde__m128d_private r_; + r_.altivec_f64 = value; + return simde__m128d_from_private(r_); + } + #else + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) + #endif + #endif +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); + SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); +#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_pd(e1, e0); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make(e0, e1); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; + r_.neon_f64 = vld1q_f64(data); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_pd(a); + #else + simde__m128d_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_pd(a) simde_mm_set1_pd(a) + #define _mm_set_pd1(a) simde_mm_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_abs_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + simde_float64 mask_; + uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); + simde_memcpy(&mask_, &u64_, sizeof(u64_)); + return _mm_and_pd(_mm_set1_pd(mask_), a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vabsq_f64(a_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_abs(a_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_not_pd(simde__m128d a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + __m128i ai = _mm_castpd_si128(a); + return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b), + mask_ = simde__m128d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_move_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(HEDLEY_IBM_VERSION) + r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); + #else + r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); + #else + r_.f64[0] = b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_broadcastlow_pd(simde__m128d a) { + /* This function broadcasts the first element in the input vector to + * all lanes. It is used to avoid generating spurious exceptions in + * *_sd functions since there may be garbage in the upper lanes. */ + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_add_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_add_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] + b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_add_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_add_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] + b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_adds_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_and_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_and_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_and_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = ~a_.u64[i] & b_.u64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_andnot_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_xor_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint16_t wa SIMDE_VECTOR(32); + uint16_t wb SIMDE_VECTOR(32); + uint16_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u8); + SIMDE_CONVERT_VECTOR_(wb, b_.u8); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u8, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_avg_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) + uint32_t wa SIMDE_VECTOR(32); + uint32_t wb SIMDE_VECTOR(32); + uint32_t wr SIMDE_VECTOR(32); + SIMDE_CONVERT_VECTOR_(wa, a_.u16); + SIMDE_CONVERT_VECTOR_(wb, b_.u16); + wr = (wa + wb + 1) >> 1; + SIMDE_CONVERT_VECTOR_(r_.u16, wr); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setzero_si128 (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_si128(); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(0); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT) + r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_si128() (simde_mm_setzero_si128()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bslli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_slo + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_sro + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.u128[0] = a_.u128[0] << (imm8 * 8); + #else + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i - imm8]; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bslli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ + simde__m128i_from_wasm_v128( \ + wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ + simde__m128i_to_wasm_v128((a)), \ + ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ + ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_; \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) + #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_bsrli_si128 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & ~15))) { + return simde_mm_setzero_si128(); + } + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) + r_.altivec_i8 = + #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + vec_sro + #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ + vec_slo + #endif + (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) + imm8; + r_.i8[i] = (e < 16) ? a_.i8[e] : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) + #define simde_mm_bsrli_si128(a, imm8) \ + simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.wasm_v128 = \ + wasm_i8x16_shuffle( \ + simde_tmp_z_.wasm_v128, \ + simde_tmp_a_.wasm_v128, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ + if (HEDLEY_UNLIKELY(imm8 > 15)) { \ + simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ + } else { \ + simde_tmp_r_.i8 = \ + SIMDE_SHUFFLE_VECTOR_(8, 16, \ + simde_tmp_z_.i8, \ + (simde_tmp_a_).i8, \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ + HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ + } \ + simde__m128i_from_private(simde_tmp_r_); })) +#endif +#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) + #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_clflush (void const* p) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_clflush(p); + #else + (void) p; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_clflush(p) simde_mm_clflush(p) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] == b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] >= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] > b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] <= b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] < b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_comineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #else + return a_.f64[0] != b_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { + simde__m128d_private + r_, + dest_ = simde__m128d_to_private(dest), + src_ = simde__m128d_to_private(src); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); + #else + simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); + uint64_t u64_nz; + simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); + uint64x2_t sign_pos = vdupq_n_u64(u64_nz); + #endif + r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) + r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); + #else + r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); + #endif + #elif defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); + return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { + return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_ps(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f32_f64(a); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castpd_si128 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castpd_si128(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_s64_f64(a); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_f32(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_pd(a) simde_mm_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_castps_si128 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castps_si128(a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); + #else + simde__m128i r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castps_si128(a) simde_mm_castps_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_castsi128_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_pd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vreinterpretq_f64_s64(a); + #else + simde__m128d r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_castsi128_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_castsi128_ps(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); + #else + simde__m128 r; + simde_memcpy(&r, &a, sizeof(a)); + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = (a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_MIPS_MSA_NATIVE) + r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpeq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpneq_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmplt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmple_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpgt_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpgt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpge_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpge_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpngt_pd(a, b); + #else + return simde_mm_cmple_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpngt_sd(a, b); + #else + return simde_mm_cmple_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnge_pd(a, b); + #else + return simde_mm_cmplt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cmpnge_sd(a, b); + #else + return simde_mm_cmplt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_pd(a, b); + #else + return simde_mm_cmpge_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnlt_sd(a, b); + #else + return simde_mm_cmpge_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_pd(a, b); + #else + return simde_mm_cmpgt_pd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpnle_sd(a, b); + #else + return simde_mm_cmpgt_sd(a, b); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + /* Note: NEON does not have ordered compare builtin + Need to compare a eq a and b eq b to check for NaN + Do AND of results to get final */ + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vandq_u64(ceqaa, ceqbb); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm_cvtsd_f64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_cvtsd_f64(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); + #else + return a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); + r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), + wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); + #elif defined(simde_math_isnan) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cmpunord_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_isnan) + r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); + r_.u64[1] = a_.u64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_pd(a); + #else + simde__m128d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtepi32_ps (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtepi32_ps(a); + #else + simde__m128_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + #if HEDLEY_HAS_WARNING("-Wc11-extensions") + #pragma clang diagnostic ignored "-Wc11-extensions" + #endif + r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (simde_float32) a_.i32[i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvtpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = simde_math_round(a_.f64[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) + return _mm_cvtpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvtpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtpd_ps (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) + float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; + r_.f32 = + __builtin_shufflevector( + __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, + 0, 1, 2, 3 + ); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); + r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); + r_.f32[2] = SIMDE_FLOAT32_C(0.0); + r_.f32[3] = SIMDE_FLOAT32_C(0.0); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtpi32_pd (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvtpi32_pd(a); + #else + simde__m128d_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (simde_float64) a_.i32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_; + + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) + a_ = simde__m128_to_private(a); + r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) + a_ = simde__m128_to_private(a); + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + #else + a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = simde_math_roundf(a_.f32[i]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtps_pd(a); + #else + simde__m128d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f32[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 v = simde_math_round(a_.f64[0]); + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsd_si64x(a); + #else + return _mm_cvtsd_si64(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); + #endif +} +#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) + #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsd_ss(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); + #else + r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); + + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i]; + } + #endif + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_mm_cvtsi128_si16 (simde__m128i a) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s16(a_.neon_i16, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i16, 0); + #else + return a_.i16[0]; + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvtsi128_si32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi128_si32(a); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s32(a_.neon_i32, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + return vec_extract(a_.altivec_i32, 0); + #else + return a_.i32[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvtsi128_si64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if defined(__PGI) + return _mm_cvtsi128_si64x(a); + #else + return _mm_cvtsi128_si64(a); + #endif + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) + return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); + #endif + return a_.i64[0]; + #endif +} +#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) + #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_sd(a, b); + #else + simde__m128d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.i64[1] = a_.i64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_cvtsi16_si128 (int16_t a) { + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); + #else + r_.i16[0] = a; + r_.i16[1] = 0; + r_.i16[2] = 0; + r_.i16[3] = 0; + r_.i16[4] = 0; + r_.i16[5] = 0; + r_.i16[6] = 0; + r_.i16[7] = 0; + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi32_si128 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtsi32_si128(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); + #else + r_.i32[0] = a; + r_.i32[1] = 0; + r_.i32[2] = 0; + r_.i32[3] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_sd(a, b); + #else + return _mm_cvtsi64x_sd(a, b); + #endif + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); + #else + r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) + #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtsi64_si128 (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvtsi64_si128(a); + #else + return _mm_cvtsi64x_si128(a); + #endif + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(a, 0); + #else + r_.i64[0] = a; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) + #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvtss_sd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); + return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_cvttpd_pi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_cvttpd_pi32(a); + #else + simde__m64_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float64 v = a_.f64[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttpd_epi32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttpd_epi32(a); + #else + simde__m128i_private r_; + + r_.m64[0] = simde_mm_cvttpd_pi32(a); + r_.m64[1] = simde_mm_setzero_si64(); + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvttps_epi32 (simde__m128 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttps_epi32(a); + #else + simde__m128i_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + /* Values below INT32_MIN saturate anyways, so we don't need to + * test for that. */ + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = + vandq_u32( + vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), + vceqq_f32(a_.neon_f32, a_.neon_f32) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); + #endif + + r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) + v128_t valid_input = + wasm_v128_and( + wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), + wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) + ); + #elif !defined(SIMDE_FAST_CONVERSION_RANGE) + v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); + #elif !defined(SIMDE_FAST_NANS) + v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); + #endif + + r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); + #endif + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); + + #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; + + __typeof__(r_.i32) valid_input = + HEDLEY_REINTERPRET_CAST( + __typeof__(r_.i32), + (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) + ); + #elif !defined(SIMDE_FAST_NANS) + __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); + #endif + + __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; + r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); + #endif + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + simde_float32 v = a_.f32[i]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); + #else + r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_cvttsd_si32 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_cvttsd_si32(a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + simde_float64 v = a_.f64[0]; + #if defined(SIMDE_FAST_CONVERSION_RANGE) + return SIMDE_CONVERT_FTOI(int32_t, v); + #else + return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? + SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_cvttsd_si64 (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(__PGI) + return _mm_cvttsd_si64(a); + #else + return _mm_cvttsd_si64x(a); + #endif + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); + #endif +} +#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) + #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_div_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_div_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] / b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + uint16_t r; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); + #else + r = a_.u16[imm8 & 7]; + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) + #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + simde__m128i_private a_ = simde__m128i_to_private(a); + a_.i16[imm8 & 7] = i; + return simde__m128i_from_private(a_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load(mem_addr); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load1_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load1_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); + #else + return simde_mm_set1_pd(*mem_addr); + #endif +} +#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) + #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_load_sd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_sd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = UINT64_C(0); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_load_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadh_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); + #else + simde_float64 t; + + simde_memcpy(&t, mem_addr, sizeof(t)); + r_.f64[0] = a_.f64[0]; + r_.f64[1] = t; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_epi64(mem_addr); + #else + simde__m128i_private r_; + + int64_t value; + simde_memcpy(&value, mem_addr, sizeof(value)); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); + #else + r_.i64[0] = value; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadl_pd(a, mem_addr); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vcombine_f64(vld1_f64( + HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); + #else + r_.f64[0] = *mem_addr; + r_.u64[1] = a_.u64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadr_pd(mem_addr); + #else + simde__m128d_private + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vld1q_f64(mem_addr); + r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); + r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t tmp = wasm_v128_load(mem_addr); + r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); + #else + r_.f64[0] = mem_addr[1]; + r_.f64[1] = mem_addr[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_pd(mem_addr); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vld1q_f64(mem_addr); + #else + simde__m128d_private r_; + + simde_memcpy(&r_, mem_addr, sizeof(r_)); + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi8 + #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi16 + #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi32 + #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#endif +#define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm_loadu_epi64 + #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si128 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); + #else + simde__m128i_private r_; + + #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_PACKED_ + struct simde_mm_loadu_si128_s { + __typeof__(r_) v; + } __attribute__((__packed__, __may_alias__)); + r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vpaddq_s32(pl, ph); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); + int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); + int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); + r_.neon_i32 = vcombine_s32(rl, rh); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); + #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) a32, b32, p32; + SIMDE_CONVERT_VECTOR_(a32, a_.i16); + SIMDE_CONVERT_VECTOR_(b32, b_.i16); + p32 = a32 * b32; + r_.i32 = + __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { + if (mask_.u8[i] & 0x80) { + mem_addr[i] = a_.i8[i]; + } + } + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) + /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ + return _mm_movemask_epi8(a); + #else + int32_t r = 0; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ + static const uint8_t md[16] = { + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1 << 7, + }; + + /* Extend sign bit over entire lane */ + uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); + /* Clear all but the bit we're interested in. */ + uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); + /* Alternate bytes from low half and high half */ + uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vaddvq_u16(x); + #else + uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); + #endif + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) + static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= (a_.u8[15 - i] >> 7) << (15 - i); + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_movemask_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_movemask_pd(a); + #else + int32_t r = 0; + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ + uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); + r = + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); + HEDLEY_DIAGNOSTIC_POP + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); + r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_movepi64_pi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movepi64_pi64(a); + #else + simde__m64_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i64 = vget_low_s64(a_.neon_i64); + #else + r_.i64[0] = a_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_movpi64_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_movpi64_epi64(a); + #else + simde__m128i_private r_; + simde__m64_private a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_min_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_max_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_move_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_move_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = 0; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_move_epi64(a) simde_mm_move_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t a_lo = vmovn_u64(a_.neon_u64); + uint32x2_t b_lo = vmovn_u64(b_.neon_u64); + r_.neon_u64 = vmull_u32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( + wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), + wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(a_.u32) z = { 0, }; + a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); + b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * + HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 * b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] * b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) + r_.i64 = a_.i64 % b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] % b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mul_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mul_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); + r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); + #else + r_.f64[0] = a_.f64[0] * b_.f64[0]; + r_.f64[1] = a_.f64[1]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) + return _mm_mul_su32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); + #else + r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mulhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a3210 = vget_low_s16(a_.neon_i16); + int16x4_t b3210 = vget_low_s16(b_.neon_i16); + int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); + #else + int16x4_t a7654 = vget_high_s16(a_.neon_i16); + int16x4_t b7654 = vget_high_s16(b_.neon_i16); + int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); + r_.neon_u16 = rv.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + return _mm_mulhi_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t a3210 = vget_low_u16(a_.neon_u16); + uint16x4_t b3210 = vget_low_u16(b_.neon_u16); + uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); + r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + #else + uint16x4_t a7654 = vget_high_u16(a_.neon_u16); + uint16x4_t b7654 = vget_high_u16(b_.neon_u16); + uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ + uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); + r_.neon_u16 = neon_r.val[1]; + #endif + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); + const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); + r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_mullo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_or_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_or_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; + const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; + + int16_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = v > max; + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packs_epi32(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_X86_SSE2_NATIVE) + r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; + const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; + + int32_t m SIMDE_VECTOR(32); + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); + v = (v & ~m) | (min & m); + + m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); + v = (v & ~m) | (max & m); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_packus_epi16(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); + #else + r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = + vcombine_u8( + vqmovun_s16(a_.neon_i16), + vqmovun_s16(b_.neon_i16) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + + v &= ~(v >> 15); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i8, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; + r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_pause (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_pause(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_pause() (simde_mm_pause()) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sad_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); + r_.neon_u64 = vcombine_u64( + vpaddl_u32(vpaddl_u16(vget_low_u16(t))), + vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_make( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_i8 = vld1q_s8(data); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_i16 = vld1q_s16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.i16[0] = e0; + r_.i16[1] = e1; + r_.i16[2] = e2; + r_.i16[3] = e3; + r_.i16[4] = e4; + r_.i16[5] = e5; + r_.i16[6] = e6; + r_.i16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si16 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si16(mem_addr); + #else + int16_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_x_mm_cvtsi16_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32(e3, e2, e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_i32 = vld1q_s32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); + #else + r_.i32[0] = e0; + r_.i32[1] = e1; + r_.i32[2] = e2; + r_.i32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si32 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ + HEDLEY_GCC_VERSION_CHECK(12,1,0)) + return _mm_loadu_si32(mem_addr); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); + #else + int32_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi32_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set_epi64(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); + #else + r_.m64[0] = e0; + r_.m64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set_epi64x (int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(e1, e0); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; + r_.neon_i64 = vld1q_s64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make(e0, e1); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_loadu_si64 (void const* mem_addr) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + return _mm_loadu_si64(mem_addr); + #else + int64_t val; + simde_memcpy(&val, mem_addr, sizeof(val)); + return simde_mm_cvtsi64_si128(val); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi8( + HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), + HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), + HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), + HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { + e0, e1, e2, e3, + e4, e5, e6, e7, + e8, e9, e10, e11, + e12, e13, e14, e15}; + r_.neon_u8 = vld1q_u8(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); + #else + r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; + r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; + r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; + r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi16( + HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), + HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; + r_.neon_u16 = vld1q_u16(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); + #else + r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; + r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_epi32( + HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; + r_.neon_u32 = vld1q_u32(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); + #else + r_.u32[0] = e0; + r_.u32[1] = e1; + r_.u32[2] = e2; + r_.u32[3] = e3; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; + r_.neon_u64 = vld1q_u64(data); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_make(e0, e1); + #else + r_.u64[0] = e0; + r_.u64[1] = e1; + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_set_sd (simde_float64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set_sd(a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); + #else + return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set_sd(a) simde_mm_set_sd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi8(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vdupq_n_s8(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi16(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vdupq_n_s16(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_set1_epi32(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vdupq_n_s32(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) + return _mm_set1_epi64x(a); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_splat(a); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_set1_epi64 (simde__m64 a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_set1_epi64(a); + #else + simde__m64_private a_ = simde__m64_to_private(a); + return simde_mm_set1_epi64x(a_.i64[0]); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu8 (uint8_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); + #else + return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu16 (uint16_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); + #else + return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu32 (uint32_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); + #else + return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_set1_epu64 (uint64_t value) { + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); + #else + return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_epi32(e3, e2, e1, e0); + #else + return simde_mm_set_epi32(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_setr_epi64(e1, e0); + #else + return simde_mm_set_epi64(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setr_pd(e1, e0); + #else + return simde_mm_set_pd(e0, e1); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_setzero_pd (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_setzero_pd(); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); + #else + return simde_mm_castsi128_pd(simde_mm_setzero_si128()); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_setzero_pd() simde_mm_setzero_pd() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_undefined_pd (void) { + simde__m128d_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_pd(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_pd() simde_mm_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_undefined_si128 (void) { + simde__m128i_private r_; + + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) + r_.n = _mm_undefined_si128(); + #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_undefined_si128() (simde_mm_undefined_si128()) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) +HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_setone_pd (void) { + return simde_mm_castps_pd(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_setone_si128 (void) { + return simde_mm_castps_si128(simde_x_mm_setone_ps()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_wasm_v128( \ + wasm_i32x4_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3)); })) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shuffle_epi32(a, imm8) \ + (__extension__ ({ \ + const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ + int32x4_t simde_mm_shuffle_epi32_r_; \ + simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ + simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ + vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 16, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; + r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) + #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d_from_private((simde__m128d_private) { .f64 = \ + SIMDE_SHUFFLE_VECTOR_(64, 16, \ + simde__m128d_to_private(a).f64, \ + simde__m128d_to_private(b).f64, \ + (((imm8) ) & 1), \ + (((imm8) >> 1) & 1) + 2) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[i]; + } + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflehi_epi16(a, imm8) \ + (__extension__ ({ \ + int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ + simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ + simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ + })) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ + wasm_i16x8_shuffle( \ + (simde_tmp_a_).wasm_v128, \ + (simde_tmp_a_).wasm_v128, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; + } + SIMDE_VECTORIZE + for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_shufflelo_epi16(a, imm8) \ + simde__m128i_from_wasm_v128( \ + wasm_i16x8_shuffle( \ + simde__m128i_to_wasm_v128((a)), \ + wasm_i16x8_splat(0), \ + (((imm8) & 0x03) ), \ + (((imm8) & 0x0c) >> 2), \ + (((imm8) & 0x30) >> 4), \ + (((imm8) & 0xc0) >> 6), \ + 4, 5, 6, 7)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm_shufflelo_epi16(a, imm8) \ + (__extension__({ \ + int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ + int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ + simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ + simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ + })) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ + simde__m128i_from_private((simde__m128i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 16, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7) }); })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 15) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = (a_.u16 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 31) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = (a_.u32 << count_.u64[0]); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sll_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + if (count_.u64[0] > 63) + return simde_mm_setzero_si128(); + + const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] << s; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsqrtq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = vec_sqrt(a_.altivec_f64); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sqrt_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_sqrt) + r_.f64[0] = simde_math_sqrt(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srl_epi64(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + #if !defined(SIMDE_BUG_GCC_94488) + SIMDE_VECTORIZE + #endif + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~15) ? 15 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srai_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* MSVC requires a range of (0, 255). */ + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + const int cnt = (imm8 & ~31) ? 31 : imm8; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sra_epi16(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) + return _mm_sra_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> cnt; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i16( \ + ((imm8) > 15) ? \ + vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ + vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i32( \ + ((imm8) > 31) ? \ + vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ + vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_slli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sl(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_slli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 63))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << imm8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_i64( \ + ((imm8) > 63) ? \ + vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ + vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_slli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi16 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 15))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u16( \ + ((imm8) > 15) ? \ + vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ + vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi16(a, imm8) \ + ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + if (HEDLEY_UNLIKELY((imm8 > 31))) { + return simde_mm_setzero_si128(); + } + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u32( \ + ((imm8) > 31) ? \ + vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ + vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #define simde_mm_srli_epi32(a, imm8) \ + (__extension__ ({ \ + simde__m128i ret; \ + if ((imm8) <= 0) { \ + ret = a; \ + } else if ((imm8) > 31) { \ + ret = simde_mm_setzero_si128(); \ + } else { \ + ret = simde__m128i_from_altivec_i32( \ + vec_sr(simde__m128i_to_altivec_i32(a), \ + vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ + } \ + ret; \ + })) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srli_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) + return simde_mm_setzero_si128(); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); + #else + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } + #endif + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE2_NATIVE) + #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + (((imm8) <= 0) ? \ + (a) : \ + simde__m128i_from_neon_u64( \ + ((imm8) > 63) ? \ + vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ + vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_srli_epi64(a, imm8) \ + ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) +#endif +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store1_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); + #else + mem_addr[0] = a_.f64[0]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) + #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_sd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); + simde_memcpy(mem_addr, &v, sizeof(v)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); + simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); + #else + simde_float64 v = a_.f64[0]; + simde_memcpy(mem_addr, &v, sizeof(simde_float64)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void + simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeh_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); + #else + *mem_addr = a_.f64[1]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int64_t tmp; + + /* memcpy to prevent aliasing, tmp because we can't take the + * address of a vector element. */ + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + tmp = vgetq_lane_s64(a_.neon_i64, 0); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + #endif + tmp = vec_extract(a_.altivec_i64, 0); + #else + tmp = a_.i64[0]; + #endif + + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storel_pd(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + simde_float64 tmp; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + tmp = vgetq_lane_f64(a_.neon_f64, 0); + #else + tmp = a_.f64[0]; + #endif + simde_memcpy(mem_addr, &tmp, sizeof(tmp)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storer_pd(mem_addr, a); + #else + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); + simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); + #else + mem_addr[0] = a_.f64[1]; + mem_addr[1] = a_.f64[0]; + #endif + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_pd(mem_addr, a); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si16(mem_addr, a); + #else + int16_t val = simde_x_mm_cvtsi128_si16(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si32(mem_addr, a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); + #else + int32_t val = simde_mm_cvtsi128_si32(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ + HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(20,21,1)) + _mm_storeu_si64(mem_addr, a); + #else + int64_t val = simde_mm_cvtsi128_si64(a); + simde_memcpy(mem_addr, &val, sizeof(val)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) + _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_stream_si32(mem_addr, a); + #else + *mem_addr = a; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) + _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); + #else + *mem_addr = a; + #endif +} +#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) + #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_sub_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_sub_sd(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + r_.f64[0] = a_.f64[0] - b_.f64[0]; + r_.f64[1] = a_.f64[1]; + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sub_si64(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); + #else + r_.i64[0] = a_.i64[0] - b_.i64[0]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_subs_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomieq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] == b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] == b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomige_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] >= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] >= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomigt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] > b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] > b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomile_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] <= b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] <= b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomilt_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); + uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); + r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] < b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] < b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_ucomineq_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + int r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); + uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); + uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); + uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); + r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); + #elif defined(SIMDE_HAVE_FENV_H) + fenv_t envp; + int x = feholdexcept(&envp); + r = a_.f64[0] != b_.f64[0]; + if (HEDLEY_LIKELY(x == 0)) + fesetenv(&envp); + #else + r = a_.f64[0] != b_.f64[0]; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_lfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_lfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_lfence() simde_mm_lfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_mfence (void) { + #if defined(SIMDE_X86_SSE2_NATIVE) + _mm_mfence(); + #else + simde_mm_sfence(); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_mfence() simde_mm_mfence() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_high_s16(a_.neon_i16); + int16x4_t b1 = vget_high_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_high_s32(a_.neon_i32); + int32x2_t b1 = vget_high_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_h = vget_high_s64(a_.neon_i64); + int64x1_t b_h = vget_high_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_h, b_h); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpackhi_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); + int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); + int8x8x2_t result = vzip_s8(a1, b1); + r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { + r_.i8[(i * 2)] = a_.i8[i]; + r_.i8[(i * 2) + 1] = b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4_t a1 = vget_low_s16(a_.neon_i16); + int16x4_t b1 = vget_low_s16(b_.neon_i16); + int16x4x2_t result = vzip_s16(a1, b1); + r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[(i * 2)] = a_.i16[i]; + r_.i16[(i * 2) + 1] = b_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2_t a1 = vget_low_s32(a_.neon_i32); + int32x2_t b1 = vget_low_s32(b_.neon_i32); + int32x2x2_t result = vzip_s32(a1, b1); + r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[(i * 2)] = a_.i32[i]; + r_.i32[(i * 2) + 1] = b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x1_t a_l = vget_low_s64(a_.neon_i64); + int64x1_t b_l = vget_low_s64(b_.neon_i64); + r_.neon_i64 = vcombine_s64(a_l, b_l); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { + r_.i64[(i * 2)] = a_.i64[i]; + r_.i64[(i * 2) + 1] = b_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { + r_.f64[(i * 2)] = a_.f64[i]; + r_.f64[(i * 2) + 1] = b_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_negate_pd(simde__m128d a) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) + r_.altivec_f64 = vec_neg(a_.altivec_f64); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vnegq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE2_NATIVE) + return _mm_xor_si128(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_not_si128 (simde__m128i a) { + #if defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, a, a, 0x55); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmvnq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) +#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) + #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/sse2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6, 8, 10, 12, 14); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 2, 4, 6, 8, 10, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + halfway_point] = b_.i16[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi16 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vuzp2q_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7, 9, 11, 13, 15); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 1, 3, 5, 7, 9, 11, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + halfway_point] = b_.i16[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveeven_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + halfway_point] = b_.i32[2 * i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_deinterleaveodd_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + halfway_point] = b_.i32[2 * i + 1]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveeven_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp1q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[0]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2, 4, 6); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 2, 4, 6); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + halfway_point] = b_.f32[2 * i]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_x_mm_deinterleaveodd_ps (simde__m128 a, simde__m128 b) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vuzp2q_f32(a_.neon_f32, b_.neon_f32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(a_.neon_f32, b_.neon_f32); + r_.neon_f32 = t.val[1]; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3, 5, 7); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 1, 3, 5, 7); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + halfway_point] = b_.f32[2 * i + 1]; + } + #endif + + return simde__m128_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveeven_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp1q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + halfway_point] = b_.f64[2 * i]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_x_mm_deinterleaveodd_pd (simde__m128d a, simde__m128d b) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vuzp2q_f64(a_.neon_f64, b_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + for(size_t i = 0 ; i < halfway_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + halfway_point] = b_.f64[2 * i + 1]; + } + #endif + + return simde__m128d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_addsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t rs = vsubq_f64(a_.neon_f64, b_.neon_f64); + float64x2_t ra = vaddq_f64(a_.neon_f64, b_.neon_f64); + return vcombine_f64(vget_low_f64(rs), vget_high_f64(ra)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64 - b_.f64, a_.f64 + b_.f64, 0, 3); + #else + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[1 + i] = a_.f64[1 + i] + b_.f64[1 + i]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_pd(a, b) simde_mm_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_addsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_addsub_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float32x4_t rs = vsubq_f32(a_.neon_f32, b_.neon_f32); + float32x4_t ra = vaddq_f32(a_.neon_f32, b_.neon_f32); + return vtrn2q_f32(vreinterpretq_f32_s32(vrev64q_s32(vreinterpretq_s32_f32(rs))), ra); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32 - b_.f32, a_.f32 + b_.f32, 0, 5, 2, 7); + #else + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[1 + i] = a_.f32[1 + i] + b_.f32[1 + i]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_addsub_ps(a, b) simde_mm_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hadd_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_pd(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128d_from_neon_f64(vpaddq_f64(simde__m128d_to_neon_f64(a), simde__m128d_to_neon_f64(b))); + #else + return simde_mm_add_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pd(a, b) simde_mm_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hadd_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hadd_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128_from_neon_f32(vpaddq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], t.val[1])); + #else + return simde_mm_add_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_ps(a, b) simde_mm_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_hsub_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_pd(a, b); + #else + return simde_mm_sub_pd(simde_x_mm_deinterleaveeven_pd(a, b), simde_x_mm_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pd(a, b) simde_mm_hsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_hsub_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_hsub_ps(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4x2_t t = vuzpq_f32(simde__m128_to_neon_f32(a), simde__m128_to_neon_f32(b)); + return simde__m128_from_neon_f32(vaddq_f32(t.val[0], vnegq_f32(t.val[1]))); + #else + return simde_mm_sub_ps(simde_x_mm_deinterleaveeven_ps(a, b), simde_x_mm_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_ps(a, b) simde_mm_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_lddqu_si128 (simde__m128i const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_lddqu_si128(mem_addr); + #else + simde__m128i_private r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); + #else + simde_memcpy(&r_, mem_addr, sizeof(r_)); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_lddqu_si128(mem_addr) simde_mm_lddqu_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_loaddup_pd (simde_float64 const* mem_addr) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_loaddup_pd(mem_addr); + #else + simde__m128d_private r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_n_f64(*mem_addr); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vdupq_n_s64(*HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr)); + #else + r_.f64[0] = *mem_addr; + r_.f64[1] = *mem_addr; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_loaddup_pd(mem_addr) simde_mm_loaddup_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_movedup_pd (simde__m128d a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movedup_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = a_.f64[0]; + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movedup_pd(a) simde_mm_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_movehdup_ps (simde__m128 a) { + #if defined(SIMDE_X86_SSE3_NATIVE) + return _mm_movehdup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn2q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 1, 3, 3); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 1, 1, 3, 3); + #else + r_.f32[0] = a_.f32[1]; + r_.f32[1] = a_.f32[1]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = a_.f32[3]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_movehdup_ps(a) simde_mm_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_moveldup_ps (simde__m128 a) { + #if defined(SIMDE__SSE3_NATIVE) + return _mm_moveldup_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vtrn1q_f32(a_.neon_f32, a_.neon_f32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 2, 2); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 2, 2); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = a_.f32[0]; + r_.f32[2] = a_.f32[2]; + r_.f32[3] = a_.f32[2]; + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES) +# define _mm_moveldup_ps(a) simde_mm_moveldup_ps(a) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE3_H) */ +/* :: End simde/x86/sse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi8(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_min_epu8(a, _mm_sub_epi8(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabsq_s8(a_.neon_i8); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_abs(a_.altivec_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi8(a) simde_mm_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_max_epi16(a, _mm_sub_epi16(_mm_setzero_si128(), a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabsq_s16(a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_abs(a_.altivec_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi16(a) simde_mm_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_abs_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_abs_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i m = _mm_cmpgt_epi32(_mm_setzero_si128(), a); + return _mm_sub_epi32(_mm_xor_si128(a, m), m); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabsq_s32(a_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_abs(a_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_abs(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_PUSH + #pragma warning(disable:4146) + #endif + r_.u32[i] = (a_.i32[i] < 0) ? (- HEDLEY_STATIC_CAST(uint32_t, a_.i32[i])) : HEDLEY_STATIC_CAST(uint32_t, a_.i32[i]); + #if defined(_MSC_VER) + HEDLEY_DIAGNOSTIC_POP + #endif + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_epi32(a) simde_mm_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi8 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi8(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vabs_s8(a_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, (a_.i8[i] < 0) ? (- a_.i8[i]) : a_.i8[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi8(a) simde_mm_abs_pi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi16 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi16(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vabs_s16(a_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.i16[i] < 0) ? (- a_.i16[i]) : a_.i16[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi16(a) simde_mm_abs_pi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_abs_pi32 (simde__m64 a) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_abs_pi32(a); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vabs_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.i32[i] < 0) ? (- a_.i32[i]) : a_.i32[i]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_abs_pi32(a) simde_mm_abs_pi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_alignr_epi8 (simde__m128i a, simde__m128i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm_setzero_si128(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.i8[i] = 0; + } else if (srcpos > 15) { + r_.i8[i] = a_.i8[(srcpos) & 15]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) _mm_alignr_epi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_epi8(a, b, count) \ + ( \ + ((count) > 31) \ + ? simde__m128i_from_neon_i8(vdupq_n_s8(0)) \ + : ( \ + ((count) > 15) \ + ? (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(a), vdupq_n_s8(0), (count) & 15))) \ + : (simde__m128i_from_neon_i8(vextq_s8(simde__m128i_to_neon_i8(b), simde__m128i_to_neon_i8(a), ((count) & 15)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) + #define _mm_alignr_epi8(a, b, count) simde_mm_alignr_epi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_alignr_pi8 (simde__m64 a, simde__m64 b, const int count) + SIMDE_REQUIRE_CONSTANT(count) { + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + if (HEDLEY_UNLIKELY(count > 15)) + return simde_mm_setzero_si64(); + + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 15) { + r_.i8[i] = 0; + } else if (srcpos > 7) { + r_.i8[i] = a_.i8[(srcpos) & 7]; + } else { + r_.i8[i] = b_.i8[srcpos]; + } + } + + return simde__m64_from_private(r_); +} +#if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) +# define simde_mm_alignr_pi8(a, b, count) _mm_alignr_pi8(a, b, count) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_alignr_pi8(a, b, count) \ + ( \ + ((count) > 15) \ + ? simde__m64_from_neon_i8(vdup_n_s8(0)) \ + : ( \ + ((count) > 7) \ + ? (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(a), vdup_n_s8(0), (count) & 7))) \ + : (simde__m64_from_neon_i8(vext_s8(simde__m64_to_neon_i8(b), simde__m64_to_neon_i8(a), ((count) & 7)))))) +#endif +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_alignr_pi8(a, b, count) simde_mm_alignr_pi8(a, b, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shuffle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_shuffle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i8 = vqtbl1q_s8(a_.neon_i8, vandq_u8(b_.neon_u8, vdupq_n_u8(0x8F))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Mask out the bits we're not interested in. vtbl will result in 0 + * for any values outside of [0, 15], so if the high bit is set it + * will return 0, just like in SSSE3. */ + b_.neon_i8 = vandq_s8(b_.neon_i8, vdupq_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 15))); + + /* Convert a from an int8x16_t to an int8x8x2_t */ + int8x8x2_t i; + i.val[0] = vget_low_s8(a_.neon_i8); + i.val[1] = vget_high_s8(a_.neon_i8); + + /* Table lookups */ + int8x8_t l = vtbl2_s8(i, vget_low_s8(b_.neon_i8)); + int8x8_t h = vtbl2_s8(i, vget_high_s8(b_.neon_i8)); + + r_.neon_i8 = vcombine_s8(l, h); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + /* This is a bit ugly because of the casts and the awful type + * macros (SIMDE_POWER_ALTIVEC_VECTOR), but it's really just + * vec_sel(vec_perm(a, a, b), 0, vec_cmplt(b, 0)) */ + SIMDE_POWER_ALTIVEC_VECTOR(signed char) z = { 0, }; + SIMDE_POWER_ALTIVEC_VECTOR(signed char) msb_mask = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmplt(b_.altivec_i8, z)); + SIMDE_POWER_ALTIVEC_VECTOR(signed char) c = vec_perm(a_.altivec_i8, a_.altivec_i8, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), b_.altivec_i8)); + r_.altivec_i8 = vec_sel(c, z, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), msb_mask)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_swizzle( + a_.wasm_v128, wasm_v128_and(b_.wasm_v128, wasm_i8x16_splat(0x8F))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 15] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m128i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_epi8(a, b) simde_mm_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_shuffle_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_shuffle_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + b_.neon_i8 = vand_s8(b_.neon_i8, vdup_n_s8(HEDLEY_STATIC_CAST(int8_t, (1 << 7) | 7))); + r_.neon_i8 = vtbl1_s8(a_.neon_i8, b_.neon_i8); + #else + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.i8[i] = a_.i8[b_.i8[i] & 7] & (~(b_.i8[i]) >> 7); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_shuffle_pi8(a, b) simde_mm_shuffle_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i16(vpaddq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_add_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi16(a, b) simde_mm_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadd_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadd_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return simde__m128i_from_neon_i32(vpaddq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b))); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vaddq_s32(t.val[0], t.val[1])); + #else + return simde_mm_add_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_epi32(a, b) simde_mm_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i16 = vpadd_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vadd_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) + + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] + a_.i16[1]; + r_.i16[1] = a_.i16[2] + a_.i16[3]; + r_.i16[2] = b_.i16[0] + b_.i16[1]; + r_.i16[3] = b_.i16[2] + b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi16(a, b) simde_mm_hadd_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadd_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadd_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_i32 = vpadd_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vadd_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) + + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] + a_.i32[1]; + r_.i32[1] = b_.i32[0] + b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadd_pi32(a, b) simde_mm_hadd_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hadds_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hadds_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqaddq_s16(t.val[0], t.val[1])); + #else + return simde_mm_adds_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_epi16(a, b) simde_mm_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hadds_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hadds_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqadd_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + int32_t ta = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + r_.i16[ i ] = HEDLEY_LIKELY(ta > INT16_MIN) ? (HEDLEY_LIKELY(ta < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ta) : INT16_MAX) : INT16_MIN; + int32_t tb = HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]) + HEDLEY_STATIC_CAST(int32_t, b_.i16[(i * 2) + 1]); + r_.i16[i + 2] = HEDLEY_LIKELY(tb > INT16_MIN) ? (HEDLEY_LIKELY(tb < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, tb) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hadds_pi16(a, b) simde_mm_hadds_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi16(a, b) simde_mm_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsub_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsub_epi32(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4x2_t t = vuzpq_s32(simde__m128i_to_neon_i32(a), simde__m128i_to_neon_i32(b)); + return simde__m128i_from_neon_i32(vsubq_s32(t.val[0], t.val[1])); + #else + return simde_mm_sub_epi32(simde_x_mm_deinterleaveeven_epi32(a, b), simde_x_mm_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_epi32(a, b) simde_mm_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vsub_s16(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 2, 4, 6) - + SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 1, 3, 5, 7); + #else + r_.i16[0] = a_.i16[0] - a_.i16[1]; + r_.i16[1] = a_.i16[2] - a_.i16[3]; + r_.i16[2] = b_.i16[0] - b_.i16[1]; + r_.i16[3] = b_.i16[2] - b_.i16[3]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi16(a, b) simde_mm_hsub_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsub_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsub_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x2x2_t t = vuzp_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vsub_s32(t.val[0], t.val[1]); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2) - + SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); + #else + r_.i32[0] = a_.i32[0] - a_.i32[1]; + r_.i32[1] = b_.i32[0] - b_.i32[1]; + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsub_pi32(a, b) simde_mm_hsub_pi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_hsubs_epi16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8x2_t t = vuzpq_s16(simde__m128i_to_neon_i16(a), simde__m128i_to_neon_i16(b)); + return simde__m128i_from_neon_i16(vqsubq_s16(t.val[0], t.val[1])); + #else + return simde_mm_subs_epi16(simde_x_mm_deinterleaveeven_epi16(a, b), simde_x_mm_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_epi16(a, b) simde_mm_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_hsubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_hsubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x4x2_t t = vuzp_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vqsub_s16(t.val[0], t.val[1]); + #else + for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { + r_.i16[ i ] = simde_math_subs_i16(a_.i16[i * 2], a_.i16[(i * 2) + 1]); + r_.i16[i + 2] = simde_math_subs_i16(b_.i16[i * 2], b_.i16[(i * 2) + 1]); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_hsubs_pi16(a, b) simde_mm_hsubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddubs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Zero extend a */ + int16x8_t a_odd = vreinterpretq_s16_u16(vshrq_n_u16(a_.neon_u16, 8)); + int16x8_t a_even = vreinterpretq_s16_u16(vbicq_u16(a_.neon_u16, vdupq_n_u16(0xff00))); + + /* Sign extend by shifting left then shifting right. */ + int16x8_t b_even = vshrq_n_s16(vshlq_n_s16(b_.neon_i16, 8), 8); + int16x8_t b_odd = vshrq_n_s16(b_.neon_i16, 8); + + /* multiply */ + int16x8_t prod1 = vmulq_s16(a_even, b_even); + int16x8_t prod2 = vmulq_s16(a_odd, b_odd); + + /* saturated add */ + r_.neon_i16 = vqaddq_s16(prod1, prod2); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_epi16(a, b) simde_mm_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_maddubs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_maddubs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t ai = vreinterpretq_s16_u16(vmovl_u8(a_.neon_u8)); + int16x8_t bi = vmovl_s8(b_.neon_i8); + int16x8_t p = vmulq_s16(ai, bi); + int16x4_t l = vget_low_s16(p); + int16x4_t h = vget_high_s16(p); + r_.neon_i16 = vqadd_s16(vuzp1_s16(l, h), vuzp2_s16(l, h)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_maddubs_pi16(a, b) simde_mm_maddubs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mulhrs_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_mulhrs_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul_lo = vmull_s16(vget_low_s16(a_.neon_i16), + vget_low_s16(b_.neon_i16)); + int32x4_t mul_hi = vmull_s16(vget_high_s16(a_.neon_i16), + vget_high_s16(b_.neon_i16)); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow_lo = vrshrn_n_s32(mul_lo, 15); + int16x4_t narrow_hi = vrshrn_n_s32(mul_hi, 15); + + /* Join together */ + r_.neon_i16 = vcombine_s16(narrow_lo, narrow_hi); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t __lo = wasm_i32x4_mul(wasm_i32x4_extend_low_i16x8(a_.wasm_v128), wasm_i32x4_extend_low_i16x8(b_.wasm_v128)); + v128_t __hi = wasm_i32x4_mul(wasm_i32x4_extend_high_i16x8(a_.wasm_v128), wasm_i32x4_extend_high_i16x8(b_.wasm_v128)); + const v128_t __inc = wasm_i32x4_splat(0x4000); + __lo = wasm_i32x4_add(__lo, __inc); + __hi = wasm_i32x4_add(__hi, __inc); + __lo = wasm_i32x4_add(__lo, __lo); + __hi = wasm_i32x4_add(__hi, __hi); + r_.wasm_v128 = wasm_i16x8_shuffle(__lo, __hi, 1, 3, 5, 7, 9, 11, 13, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_epi16(a, b) simde_mm_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_mulhrs_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_mulhrs_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Multiply */ + int32x4_t mul = vmull_s16(a_.neon_i16, b_.neon_i16); + + /* Rounding narrowing shift right + * narrow = (int16_t)((mul + 16384) >> 15); */ + int16x4_t narrow = vrshrn_n_s32(mul, 15); + + /* Join together */ + r_.neon_i16 = narrow; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_mulhrs_pi16(a, b) simde_mm_mulhrs_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t aneg_mask = vreinterpretq_u8_s8(vshrq_n_s8(b_.neon_i8, 7)); + uint8x16_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s8(b_.neon_i8); + #else + bnz_mask = vceqq_s8(b_.neon_i8, vdupq_n_s8(0)); + #endif + bnz_mask = vmvnq_u8(bnz_mask); + + r_.neon_i8 = vbslq_s8(aneg_mask, vnegq_s8(a_.neon_i8), vandq_s8(a_.neon_i8, vreinterpretq_s8_u8(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = wasm_i8x16_shr(b_.wasm_v128, 7); + simde__m128i zeromask = simde_mm_cmpeq_epi8(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi8(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi8(a, b) simde_mm_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t aneg_mask = vreinterpretq_u16_s16(vshrq_n_s16(b_.neon_i16, 15)); + uint16x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s16(b_.neon_i16); + #else + bnz_mask = vceqq_s16(b_.neon_i16, vdupq_n_s16(0)); + #endif + bnz_mask = vmvnq_u16(bnz_mask); + + r_.neon_i16 = vbslq_s16(aneg_mask, vnegq_s16(a_.neon_i16), vandq_s16(a_.neon_i16, vreinterpretq_s16_u16(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi16(b_.wasm_v128, 15); + simde__m128i zeromask = simde_mm_cmpeq_epi16(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi16(a_.wasm_v128, mask), mask)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] != 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi16(a, b) simde_mm_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sign_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_sign_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t aneg_mask = vreinterpretq_u32_s32(vshrq_n_s32(b_.neon_i32, 31)); + uint32x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqzq_s32(b_.neon_i32); + #else + bnz_mask = vceqq_s32(b_.neon_i32, vdupq_n_s32(0)); + #endif + bnz_mask = vmvnq_u32(bnz_mask); + + r_.neon_i32 = vbslq_s32(aneg_mask, vnegq_s32(a_.neon_i32), vandq_s32(a_.neon_i32, vreinterpretq_s32_u32(bnz_mask))); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + simde__m128i mask = simde_mm_srai_epi32(b_.wasm_v128, 31); + simde__m128i zeromask = simde_mm_cmpeq_epi32(b_.wasm_v128, simde_mm_setzero_si128()); + r_.wasm_v128 = simde_mm_andnot_si128(zeromask, simde_mm_xor_si128(simde_mm_add_epi32(a_.wasm_v128, mask), mask)); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] != 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_epi32(a, b) simde_mm_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi8 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi8(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x8_t aneg_mask = vreinterpret_u8_s8(vshr_n_s8(b_.neon_i8, 7)); + uint8x8_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s8(b_.neon_i8); + #else + bnz_mask = vceq_s8(b_.neon_i8, vdup_n_s8(0)); + #endif + bnz_mask = vmvn_u8(bnz_mask); + + r_.neon_i8 = vbsl_s8(aneg_mask, vneg_s8(a_.neon_i8), vand_s8(a_.neon_i8, vreinterpret_s8_u8(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < 0) ? (- a_.i8[i]) : ((b_.i8[i] != 0) ? (a_.i8[i]) : INT8_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi8(a, b) simde_mm_sign_pi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi16 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi16(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x4_t aneg_mask = vreinterpret_u16_s16(vshr_n_s16(b_.neon_i16, 15)); + uint16x4_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s16(b_.neon_i16); + #else + bnz_mask = vceq_s16(b_.neon_i16, vdup_n_s16(0)); + #endif + bnz_mask = vmvn_u16(bnz_mask); + + r_.neon_i16 = vbsl_s16(aneg_mask, vneg_s16(a_.neon_i16), vand_s16(a_.neon_i16, vreinterpret_s16_u16(bnz_mask))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < 0) ? (- a_.i16[i]) : ((b_.i16[i] > 0) ? (a_.i16[i]) : INT16_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi16(a, b) simde_mm_sign_pi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m64 +simde_mm_sign_pi32 (simde__m64 a, simde__m64 b) { + #if defined(SIMDE_X86_SSSE3_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) + return _mm_sign_pi32(a, b); + #else + simde__m64_private + r_, + a_ = simde__m64_to_private(a), + b_ = simde__m64_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x2_t aneg_mask = vreinterpret_u32_s32(vshr_n_s32(b_.neon_i32, 31)); + uint32x2_t bnz_mask; + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + bnz_mask = vceqz_s32(b_.neon_i32); + #else + bnz_mask = vceq_s32(b_.neon_i32, vdup_n_s32(0)); + #endif + bnz_mask = vmvn_u32(bnz_mask); + + r_.neon_i32 = vbsl_s32(aneg_mask, vneg_s32(a_.neon_i32), vand_s32(a_.neon_i32, vreinterpret_s32_u32(bnz_mask))); + #else + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (b_.i32[i] < 0) ? (- a_.i32[i]) : ((b_.i32[i] > 0) ? (a_.i32[i]) : INT32_C(0)); + } + #endif + + return simde__m64_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES) +# define _mm_sign_pi32(a, b) simde_mm_sign_pi32(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE2_H) */ +/* :: End simde/x86/ssse3.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if !defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = ((imm8 >> i) & 1) ? b_.u16[i] : a_.u16[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_epi16(a, b, imm8) _mm_blend_epi16(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_epi16(a, b, imm8) \ + (__extension__ ({ \ + simde__m128i_private \ + simde_mm_blend_epi16_a_ = simde__m128i_to_private(a), \ + simde_mm_blend_epi16_b_ = simde__m128i_to_private(b), \ + simde_mm_blend_epi16_r_; \ + \ + simde_mm_blend_epi16_r_.i16 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 16, 16, \ + simde_mm_blend_epi16_a_.i16, \ + simde_mm_blend_epi16_b_.i16, \ + ((imm8) & (1 << 0)) ? 8 : 0, \ + ((imm8) & (1 << 1)) ? 9 : 1, \ + ((imm8) & (1 << 2)) ? 10 : 2, \ + ((imm8) & (1 << 3)) ? 11 : 3, \ + ((imm8) & (1 << 4)) ? 12 : 4, \ + ((imm8) & (1 << 5)) ? 13 : 5, \ + ((imm8) & (1 << 6)) ? 14 : 6, \ + ((imm8) & (1 << 7)) ? 15 : 7 \ + ); \ + \ + simde__m128i_from_private(simde_mm_blend_epi16_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi16 + #define _mm_blend_epi16(a, b, imm8) simde_mm_blend_epi16(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blend_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_blend_pd(a, b, imm8) _mm_blend_pd(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_pd(a, b, imm8) \ + (__extension__ ({ \ + simde__m128d_private \ + simde_mm_blend_pd_a_ = simde__m128d_to_private(a), \ + simde_mm_blend_pd_b_ = simde__m128d_to_private(b), \ + simde_mm_blend_pd_r_; \ + \ + simde_mm_blend_pd_r_.f64 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 64, 16, \ + simde_mm_blend_pd_a_.f64, \ + simde_mm_blend_pd_b_.f64, \ + ((imm8) & (1 << 0)) ? 2 : 0, \ + ((imm8) & (1 << 1)) ? 3 : 1 \ + ); \ + \ + simde__m128d_from_private(simde_mm_blend_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_pd + #define _mm_blend_pd(a, b, imm8) simde_mm_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blend_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_blend_ps(a, b, imm8) _mm_blend_ps(a, b, imm8) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm_blend_ps(a, b, imm8) \ + (__extension__ ({ \ + simde__m128_private \ + simde_mm_blend_ps_a_ = simde__m128_to_private(a), \ + simde_mm_blend_ps_b_ = simde__m128_to_private(b), \ + simde_mm_blend_ps_r_; \ + \ + simde_mm_blend_ps_r_.f32 = \ + SIMDE_SHUFFLE_VECTOR_( \ + 32, 16, \ + simde_mm_blend_ps_a_.f32, \ + simde_mm_blend_ps_b_.f32, \ + ((imm8) & (1 << 0)) ? 4 : 0, \ + ((imm8) & (1 << 1)) ? 5 : 1, \ + ((imm8) & (1 << 2)) ? 6 : 2, \ + ((imm8) & (1 << 3)) ? 7 : 3 \ + ); \ + \ + simde__m128_from_private(simde_mm_blend_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_ps + #define _mm_blend_ps(a, b, imm8) simde_mm_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blendv_epi8 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_epi8(a, b, mask); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(_mm_setzero_si128(), mask); + return _mm_xor_si128(_mm_subs_epu8(_mm_xor_si128(a, b), m), b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* Use a signed shift right to create a mask with the sign bit */ + mask_.neon_i8 = vshrq_n_s8(mask_.neon_i8, 7); + r_.neon_i8 = vbslq_s8(mask_.neon_u8, b_.neon_i8, a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i8x16_shr(mask_.wasm_v128, 7); + r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, m); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_sel(a_.altivec_i8, b_.altivec_i8, vec_cmplt(mask_.altivec_i8, vec_splat_s8(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + /* https://software.intel.com/en-us/forums/intel-c-compiler/topic/850087 */ + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i8) z = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i8), mask_.i8 < z); + #else + mask_.i8 >>= (CHAR_BIT * sizeof(mask_.i8[0])) - 1; + #endif + + r_.i8 = (mask_.i8 & b_.i8) | (~mask_.i8 & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t m = mask_.i8[i] >> 7; + r_.i8[i] = (m & b_.i8[i]) | (~m & a_.i8[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_epi8 + #define _mm_blendv_epi8(a, b, mask) simde_mm_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi16 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE2_NATIVE) + mask = simde_mm_srai_epi16(mask, 15); + return simde_mm_or_si128(simde_mm_and_si128(mask, b), simde_mm_andnot_si128(mask, a)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi16(mask, simde_mm_setzero_si128())); + r_.neon_i16 = vbslq_s16(mask_.neon_u16, b_.neon_i16, a_.neon_i16); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i16 = vec_sel(a_.altivec_i16, b_.altivec_i16, vec_cmplt(mask_.altivec_i16, vec_splat_s16(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i16) z = { 0, 0, 0, 0, 0, 0, 0, 0 }; + mask_.i16 = mask_.i16 < z; + #else + mask_.i16 >>= (CHAR_BIT * sizeof(mask_.i16[0])) - 1; + #endif + + r_.i16 = (mask_.i16 & b_.i16) | (~mask_.i16 & a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int16_t m = mask_.i16[i] >> 15; + r_.i16[i] = (m & b_.i16[i]) | (~m & a_.i16[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi32 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), _mm_castsi128_ps(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_ = simde__m128i_to_private(simde_mm_cmplt_epi32(mask, simde_mm_setzero_si128())); + r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i32x4_shr(mask_.wasm_v128, 31); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, vec_cmplt(mask_.altivec_i32, vec_splat_s32(0))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i32) z = { 0, 0, 0, 0 }; + mask_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i32), mask_.i32 < z); + #else + mask_.i32 >>= (CHAR_BIT * sizeof(mask_.i32[0])) - 1; + #endif + + r_.i32 = (mask_.i32 & b_.i32) | (~mask_.i32 & a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t m = mask_.i32[i] >> 31; + r_.i32[i] = (m & b_.i32[i]) | (~m & a_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_blendv_epi64 (simde__m128i a, simde__m128i b, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_castpd_si128(_mm_blendv_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b), _mm_castsi128_pd(mask))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + mask_.neon_u64 = vcltq_s64(mask_.neon_i64, vdupq_n_s64(UINT64_C(0))); + r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_i64x2_shr(mask_.wasm_v128, 63); + r_.wasm_v128 = wasm_v128_or(wasm_v128_and(b_.wasm_v128, m), wasm_v128_andnot(a_.wasm_v128, m)); + #elif (defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_BUG_CLANG_46770)) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i64 = vec_sel(a_.altivec_i64, b_.altivec_i64, vec_cmplt(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(signed long long, 0)))); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long) selector = vec_sra(mask_.altivec_i64, vec_splats(HEDLEY_STATIC_CAST(unsigned long long, 63))); + r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), selector)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + #if defined(HEDLEY_INTEL_VERSION_CHECK) + __typeof__(mask_.i64) z = { 0, 0 }; + mask_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(mask_.i64), mask_.i64 < z); + #else + mask_.i64 >>= (CHAR_BIT * sizeof(mask_.i64[0])) - 1; + #endif + + r_.i64 = (mask_.i64 & b_.i64) | (~mask_.i64 & a_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t m = mask_.i64[i] >> 63; + r_.i64[i] = (m & b_.i64[i]) | (~m & a_.i64[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_blendv_pd (simde__m128d a, simde__m128d b, simde__m128d mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_pd(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i64x2_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 63); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_pd(simde_x_mm_blendv_epi64(simde_mm_castpd_si128(a), simde_mm_castpd_si128(b), simde_mm_castpd_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_pd + #define _mm_blendv_pd(a, b, mask) simde_mm_blendv_pd(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_blendv_ps (simde__m128 a, simde__m128 b, simde__m128 mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_blendv_ps(a, b, mask); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m_ = wasm_i32x4_shr(HEDLEY_REINTERPRET_CAST(v128_t, mask), 31); + return simde__m128d_from_wasm_v128(wasm_v128_bitselect(simde__m128d_to_wasm_v128(b), simde__m128d_to_wasm_v128(a), m_)); + #else + return simde_mm_castsi128_ps(simde_x_mm_blendv_epi32(simde_mm_castps_si128(a), simde_mm_castps_si128(b), simde_mm_castps_si128(mask))); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_blendv_ps + #define _mm_blendv_ps(a, b, mask) simde_mm_blendv_ps(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_pd (simde__m128d a, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + /* For architectures which lack a current direction SIMD instruction. */ + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) + rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; + #endif + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + case SIMDE_MM_FROUND_CUR_DIRECTION: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndiq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEAREST_INT: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_round(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndaq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_nearest(a_.wasm_v128); + #elif defined(simde_math_roundeven) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_roundeven(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_NEG_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_floor(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndmq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_floor(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + #endif + break; + + case SIMDE_MM_FROUND_TO_POS_INF: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_ceil(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndpq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_ceil(a_.wasm_v128); + #elif defined(simde_math_ceil) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + #endif + break; + + case SIMDE_MM_FROUND_TO_ZERO: + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_trunc(a_.altivec_f64)); + #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vrndq_f64(a_.neon_f64); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_trunc(a_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + #endif + break; + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_pd(a, rounding) _mm_round_pd(a, rounding) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_pd + #define _mm_round_pd(a, rounding) simde_mm_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_ceil(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_pd + #define _mm_ceil_pd(a) simde_mm_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_ceil(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ps + #define _mm_ceil_ps(a) simde_mm_ceil_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_ceil_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128d_to_private(simde_mm_set_pd(a_.f64[1], simde_math_ceil(b_.f64[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_sd + #define _mm_ceil_sd(a, b) simde_mm_ceil_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_ceil_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_ceil_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_ceil_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_ceil_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_ceilf) + r_ = simde__m128_to_private(simde_mm_set_ps(a_.f32[3], a_.f32[2], a_.f32[1], simde_math_ceilf(b_.f32[0]))); + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_ceil_ss + #define _mm_ceil_ss(a, b) simde_mm_ceil_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cmpeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* (a == b) -> (a_lo == b_lo) && (a_hi == b_hi) */ + uint32x4_t cmp = vceqq_u32(a_.neon_u32, b_.neon_u32); + uint32x4_t swapped = vrev64q_u32(cmp); + r_.neon_u32 = vandq_u32(cmp, swapped); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_i64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), vec_cmpeq(a_.altivec_i64, b_.altivec_i64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~UINT64_C(0) : UINT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpeq_epi64 + #define _mm_cmpeq_epi64(a, b) simde_mm_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_i16 = s16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extend_low_i8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, 0, -1, 1, -1, 2, -1, 3, + -1, 4, -1, 5, -1, 6, -1, 7)); + r_.i16 >>= 8; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi16 + #define _mm_cvtepi8_epi16(a) simde_mm_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_unpacklo_epi8(a, a); + tmp = _mm_unpacklo_epi16(tmp, tmp); + return _mm_srai_epi32(tmp, 24); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx DCBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000D 000C 000B 000A */ + r_.neon_i32 = s32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, 0, -1, -1, -1, 1, + -1, -1, -1, 2, -1, -1, -1, 3)); + r_.i32 >>= 24; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi32 + #define _mm_cvtepi8_epi32(a) simde_mm_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi8_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t s8x16 = a_.neon_i8; /* xxxx xxxx xxxx xxBA */ + int16x8_t s16x8 = vmovl_s8(vget_low_s8(s8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t extra = wasm_i32x4_extend_low_i16x8(wasm_i16x8_extend_low_i8x16(a_.wasm_v128)); + v128_t sign = wasm_i32x4_gt(wasm_i64x2_const(0, 0), extra); + r_.wasm_v128 = wasm_i32x4_shuffle(extra, sign, 0, 4, 1, 5); + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + /* Disabled on x86 due to lack of 64-bit arithmetic shift until + * until AVX-512 (at which point we would be using the native + * _mm_cvtepi_epi64 anyways). */ + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, a_.i8, + -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, -1, -1, 1)); + r_.i64 >>= 56; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi8_epi64 + #define _mm_cvtepi8_epi64(a) simde_mm_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi8(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + r_.neon_u16 = u16x8; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extend_low_u8x16(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 16, 1, 17, 2, 18, 3, 19, + 4, 20, 5, 21, 6, 22, 7, 23)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi16 + #define _mm_cvtepu8_epi16(a) simde_mm_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi32(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x03), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x02), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx DCBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0D0C 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000D 000C 000B 000A */ + r_.neon_u32 = u32x4; + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(wasm_u16x8_extend_low_u8x16(a_.wasm_v128)); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 1, 21, 22, 23, + 2, 25, 26, 27, 3, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi32 + #define _mm_cvtepu8_epi32(a) simde_mm_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu8_epi64(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + __m128i s = _mm_set_epi8( + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x01), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), + HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x80), HEDLEY_STATIC_CAST(char, 0x00)); + return _mm_shuffle_epi8(a, s); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(_mm_unpacklo_epi8(a, z), z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t u8x16 = a_.neon_u8; /* xxxx xxxx xxxx xxBA */ + uint16x8_t u16x8 = vmovl_u8(vget_low_u8(u8x16)); /* 0x0x 0x0x 0x0x 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.i8) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, z, + 0, 17, 18, 19, 20, 21, 22, 23, + 1, 25, 26, 27, 28, 29, 30, 31)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu8_epi64 + #define _mm_cvtepu8_epi64(a) simde_mm_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_srai_epi32(_mm_unpacklo_epi16(a, a), 16); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmovl_s16(vget_low_s16(a_.neon_i16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extend_low_i16x8(a_.wasm_v128); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, 8, 0, 10, 1, 12, 2, 14, 3)); + r_.i32 >>= 16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi32 + #define _mm_cvtepi16_epi32(a) simde_mm_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi32(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi16(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmovl_u16(vget_low_u16(a_.neon_u16)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extend_low_u16x8(a_.wasm_v128); + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 1, 11, 2, 13, 3, 15)); + #elif defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_CLANG_45541) && (!defined(SIMDE_ARCH_POWER) || !defined(__clang__)) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi32 + #define _mm_cvtepu16_epi32(a) simde_mm_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu16_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i z = _mm_setzero_si128(); + return _mm_unpacklo_epi32(_mm_unpacklo_epi16(a, z), z); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t u16x8 = a_.neon_u16; /* xxxx xxxx xxxx 0B0A */ + uint32x4_t u32x4 = vmovl_u16(vget_low_u16(u16x8)); /* 000x 000x 000B 000A */ + uint64x2_t u64x2 = vmovl_u32(vget_low_u32(u32x4)); /* 0000 000B 0000 000A */ + r_.neon_u64 = u64x2; + #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u16) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.u16, z, + 0, 9, 10, 11, + 1, 13, 14, 15)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu16_epi64 + #define _mm_cvtepu16_epi64(a) simde_mm_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi16_epi64(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t s16x8 = a_.neon_i16; /* xxxx xxxx xxxx 0B0A */ + int32x4_t s32x4 = vmovl_s16(vget_low_s16(s16x8)); /* 000x 000x 000B 000A */ + int64x2_t s64x2 = vmovl_s32(vget_low_s32(s32x4)); /* 0000 000B 0000 000A */ + r_.neon_i64 = s64x2; + #elif (!defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, a_.i16, + 8, 9, 10, 0, + 12, 13, 14, 1)); + r_.i64 >>= 48; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi16_epi64 + #define _mm_cvtepi16_epi64(a) simde_mm_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepi32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i tmp = _mm_shuffle_epi32(a, 0x50); + tmp = _mm_srai_epi32(tmp, 31); + tmp = _mm_shuffle_epi32(tmp, 0xed); + return _mm_unpacklo_epi32(a, tmp); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vmovl_s32(vget_low_s32(a_.neon_i32)); + #elif !defined(SIMDE_ARCH_X86) && defined(SIMDE_SHUFFLE_VECTOR_) && defined(SIMDE_VECTOR_SCALAR) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, a_.i32, -1, 0, -1, 1)); + r_.i64 >>= 32; + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepi32_epi64 + #define _mm_cvtepi32_epi64(a) simde_mm_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_cvtepu32_epi64(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_unpacklo_epi32(a, _mm_setzero_si128()); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vmovl_u32(vget_low_u32(a_.neon_u32)); + #elif defined(SIMDE_VECTOR_SCALAR) && defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) + __typeof__(r_.u32) z = { 0, }; + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 1, 6)); + #elif defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_cvtepu32_epi64 + #define _mm_cvtepu32_epi64(a) simde_mm_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_dp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); + + switch (imm8) { + case 0xff: + r_.neon_f64 = vaddq_f64(r_.neon_f64, vextq_f64(r_.neon_f64, r_.neon_f64, 1)); + break; + case 0x13: + r_.neon_f64 = vdupq_lane_f64(vget_low_f64(r_.neon_f64), 0); + break; + default: + { /* imm8 is a compile-time constant, so this all becomes just a load */ + uint64_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & (1 << 5)) ? ~UINT64_C(0) : UINT64_C(0), + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + + r_.neon_f64 = vdupq_n_f64(vaddvq_f64(r_.neon_f64)); + + { + uint64_t mask_data[] = { + (imm8 & 1) ? ~UINT64_C(0) : UINT64_C(0), + (imm8 & 2) ? ~UINT64_C(0) : UINT64_C(0) + }; + r_.neon_f64 = vreinterpretq_f64_u64(vandq_u64(vld1q_u64(mask_data), vreinterpretq_u64_f64(r_.neon_f64))); + } + break; + } + #else + simde_float64 sum = SIMDE_FLOAT64_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f64[i] * b_.f64[i]) : 0.0; + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? sum : 0.0; + } + #endif + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_dp_pd(a, b, imm8) _mm_dp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_pd + #define _mm_dp_pd(a, b, imm8) simde_mm_dp_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_dp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); + + switch (imm8) { + case 0xff: + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + case 0x7f: + r_.neon_f32 = vsetq_lane_f32(0, r_.neon_f32, 3); + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + break; + default: + { + { + uint32_t mask_data[] = { + (imm8 & (1 << 4)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 5)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 6)) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & (1 << 7)) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + + r_.neon_f32 = vdupq_n_f32(vaddvq_f32(r_.neon_f32)); + + { + uint32_t mask_data[] = { + (imm8 & 1) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 2) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 4) ? ~UINT32_C(0) : UINT32_C(0), + (imm8 & 8) ? ~UINT32_C(0) : UINT32_C(0) + }; + r_.neon_f32 = vreinterpretq_f32_u32(vandq_u32(vld1q_u32(mask_data), vreinterpretq_u32_f32(r_.neon_f32))); + } + } + break; + } + #else + simde_float32 sum = SIMDE_FLOAT32_C(0.0); + + SIMDE_VECTORIZE_REDUCTION(+:sum) + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + sum += ((imm8 >> (i + 4)) & 1) ? (a_.f32[i] * b_.f32[i]) : SIMDE_FLOAT32_C(0.0); + } + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? sum : SIMDE_FLOAT32_C(0.0); + } + #endif + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_dp_ps(a, b, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_dp_ps((a), (b), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_dp_ps(a, b, imm8) _mm_dp_ps(a, b, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_dp_ps + #define _mm_dp_ps(a, b, imm8) simde_mm_dp_ps(a, b, imm8) +#endif + +#if defined(simde_mm_extract_epi8) +# undef simde_mm_extract_epi8 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_mm_extract_epi8 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i8, imm8); + #else + return a_.i8[imm8 & 15]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8) +# define simde_mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int8_t, _mm_extract_epi8(a, imm8)) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi8(a, imm8) vgetq_lane_s8(simde__m128i_to_neon_i8(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi8(a, imm8) wasm_u8x16_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi8 + #define _mm_extract_epi8(a, imm8) HEDLEY_STATIC_CAST(int, simde_mm_extract_epi8(a, imm8)) +#endif + +#if defined(simde_mm_extract_epi32) +# undef simde_mm_extract_epi32 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_epi32 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i32, imm8); + #else + return a_.i32[imm8 & 3]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_extract_epi32(a, imm8) _mm_extract_epi32(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi32(a, imm8) vgetq_lane_s32(simde__m128i_to_neon_i32(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) +# define simde_mm_extract_epi32(a, imm8) HEDLEY_STATIC_CAST(int32_t, vec_extract(simde__m128i_to_altivec_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_extract_epi32(a, imm8) wasm_i32x4_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_epi32 + #define _mm_extract_epi32(a, imm8) simde_mm_extract_epi32(a, imm8) +#endif + +#if defined(simde_mm_extract_epi64) +# undef simde_mm_extract_epi64 +#endif +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm_extract_epi64 (simde__m128i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m128i_private + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + #if defined(SIMDE_BUG_GCC_95227) + (void) a_; + (void) imm8; + #endif + return vec_extract(a_.altivec_i64, imm8); + #else + return a_.i64[imm8 & 1]; + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_extract_epi64(a, imm8) _mm_extract_epi64(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) vgetq_lane_s64(simde__m128i_to_neon_i64(a), imm8) +#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) +# define simde_mm_extract_epi64(a, imm8) HEDLEY_STATIC_CAST(int64_t, vec_extract(simde__m128i_to_altivec_i64(a), imm8)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_extract_epi64 + #define _mm_extract_epi64(a, imm8) simde_mm_extract_epi64(a, imm8) +#endif + +#if defined(simde_mm_extract_ps) +# undef simde_mm_extract_ps +#endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm_extract_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128_private + a_ = simde__m128_to_private(a); + + return a_.i32[imm8 & 3]; +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_extract_ps(a, imm8) _mm_extract_ps(a, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_mm_extract_ps(a, imm8) vgetq_lane_s32(simde__m128_to_neon_i32(a), imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) + #define simde_mm_extract_ps(a, imm8) wasm_i32x4_extract_lane(simde__m128_to_wasm_v128((a)), (imm8) & 3) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_extract_ps + #define _mm_extract_ps(a, imm8) simde_mm_extract_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_pd (simde__m128d a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128d_from_wasm_v128(wasm_f64x2_floor(simde__m128d_to_wasm_v128(a))); + #endif + return simde_mm_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_pd + #define _mm_floor_pd(a) simde_mm_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ps (simde__m128 a) { + #if defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_f32x4_floor(simde__m128_to_wasm_v128(a))); + #endif + return simde_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ps + #define _mm_floor_ps(a) simde_mm_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_floor_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_sd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_floor) + r_.f64[0] = simde_math_floor(b_.f64[0]); + r_.f64[1] = a_.f64[1]; + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_sd + #define _mm_floor_sd(a, b) simde_mm_floor_sd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_floor_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_floor_ss(a, b); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) + return simde_mm_move_ss(a, simde_mm_floor_ps(b)); + #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) + return simde_mm_move_ss(a, simde_mm_floor_ps(simde_x_mm_broadcastlow_ps(b))); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_floorf) + r_.f32[0] = simde_math_floorf(b_.f32[0]); + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i]; + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_floor_ss + #define _mm_floor_ss(a, b) simde_mm_floor_ss(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi8 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i8[imm8] = HEDLEY_STATIC_CAST(int8_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + /* clang-3.8 returns an incompatible type, so we need the cast. MSVC + * can't handle the cast ("error C2440: 'type cast': cannot convert + * from '__m128i' to '__m128i'"). */ + #if defined(__clang__) + #define simde_mm_insert_epi8(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi8(a, i, imm8)) + #else + #define simde_mm_insert_epi8(a, i, imm8) _mm_insert_epi8(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_neon_i8(vsetq_lane_s8(i, simde__m128i_to_neon_i8(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi8(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i8x16_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 15, HEDLEY_STATIC_CAST(int8_t, (i)))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi8 + #define _mm_insert_epi8(a, i, imm8) simde_mm_insert_epi8(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi32 (simde__m128i a, int i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i32[imm8] = HEDLEY_STATIC_CAST(int32_t, i); + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #if defined(__clang__) + #define simde_mm_insert_epi32(a, i, imm8) HEDLEY_REINTERPRET_CAST(__m128i, _mm_insert_epi32(a, i, imm8)) + #else + #define simde_mm_insert_epi32(a, i, imm8) _mm_insert_epi32(a, i, imm8) + #endif +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_neon_i32(vsetq_lane_s32(i, simde__m128i_to_neon_i32(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi32(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i32x4_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 3, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_epi32 + #define _mm_insert_epi32(a, i, imm8) simde_mm_insert_epi32(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_insert_epi64 (simde__m128i a, int64_t i, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + #if defined(SIMDE_BUG_GCC_94482) + simde__m128i_private + a_ = simde__m128i_to_private(a); + + switch(imm8) { + case 0: + return simde_mm_set_epi64x(a_.i64[1], i); + break; + case 1: + return simde_mm_set_epi64x(i, a_.i64[0]); + break; + default: + HEDLEY_UNREACHABLE(); + break; + } + #else + simde__m128i_private + r_ = simde__m128i_to_private(a); + + r_.i64[imm8] = i; + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && defined(SIMDE_ARCH_AMD64) +# define simde_mm_insert_epi64(a, i, imm8) _mm_insert_epi64(a, i, imm8) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_neon_i64(vsetq_lane_s64(i, simde__m128i_to_neon_i64(a), imm8)) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_insert_epi64(a, i, imm8) simde__m128i_from_wasm_v128(wasm_i64x2_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 1, (i))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm_insert_epi64 + #define _mm_insert_epi64(a, i, imm8) simde_mm_insert_epi64(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_insert_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + float tmp1_ = b_.f32[(imm8 >> 6) & 3]; + a_.f32[(imm8 >> 4) & 3] = tmp1_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1 ) ? SIMDE_FLOAT32_C(0.0) : a_.f32[i]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_insert_ps(a, b, imm8) _mm_insert_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_insert_ps + #define _mm_insert_ps(a, b, imm8) simde_mm_insert_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi8(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi8(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vmaxq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_max(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi8 + #define _mm_max_epi8(a, b) simde_mm_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_max_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + __m128i m = _mm_cmpgt_epi32(a, b); + return _mm_or_si128(_mm_and_si128(m, a), _mm_andnot_si128(m, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmaxq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_max(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epi32 + #define _mm_max_epi32(a, b) simde_mm_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_add_epi16(b, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmaxq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_max(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] > b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu16 + #define _mm_max_epu16(a, b) simde_mm_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_max_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_max_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmaxq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_max(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_max(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] > b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_max_epu32 + #define _mm_max_epu32(a, b) simde_mm_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vminq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i8x16_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i8 = vec_min(a_.altivec_i8, b_.altivec_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi8 + #define _mm_min_epi8(a, b) simde_mm_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(__PGI) + return _mm_min_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vminq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_i32 = vec_min(a_.altivec_i32, b_.altivec_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epi32 + #define _mm_min_epi32(a, b) simde_mm_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu16(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://github.com/simd-everywhere/simde/issues/855#issuecomment-881656284 */ + return _mm_sub_epi16(a, _mm_subs_epu16(a, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vminq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u16 = vec_min(a_.altivec_u16, b_.altivec_u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] < b_.u16[i] ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu16 + #define _mm_min_epu16(a, b) simde_mm_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_min_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_min_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vminq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_min(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + r_.altivec_u32 = vec_min(a_.altivec_u32, b_.altivec_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] < b_.u32[i] ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_min_epu32 + #define _mm_min_epu32(a, b) simde_mm_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_minpos_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_minpos_epu16(a); + #else + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()), + a_ = simde__m128i_to_private(a); + + r_.u16[0] = UINT16_MAX; + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (a_.u16[i] < r_.u16[0]) { + r_.u16[0] = a_.u16[i]; + r_.u16[1] = HEDLEY_STATIC_CAST(uint16_t, i); + } + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_minpos_epu16 + #define _mm_minpos_epu16(a) simde_mm_minpos_epu16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mpsadbw_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + const int a_offset = imm8 & 4; + const int b_offset = (imm8 & 3) << 2; + +#if defined(simde_math_abs) + for (int i = 0 ; i < HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0]))) ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 0] - b_.u8[b_offset + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 1] - b_.u8[b_offset + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 2] - b_.u8[b_offset + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset + i + 3] - b_.u8[b_offset + 3]))); + } +#else + HEDLEY_UNREACHABLE(); +#endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_BUG_PGI_30107) +# define simde_mm_mpsadbw_epu8(a, b, imm8) _mm_mpsadbw_epu8(a, b, imm8) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mpsadbw_epu8 + #define _mm_mpsadbw_epu8(a, b, imm8) simde_mm_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mul_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mul_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + // vmull_s32 upcasts instead of masking, so we downcast. + int32x2_t a_lo = vmovn_s64(a_.neon_i64); + int32x2_t b_lo = vmovn_s64(b_.neon_i64); + r_.neon_i64 = vmull_s32(a_lo, b_lo); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_make( + wasm_i32x4_extract_lane(a_.wasm_v128, 0) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 0)), + wasm_i32x4_extract_lane(a_.wasm_v128, 2) * HEDLEY_STATIC_CAST(int64_t, wasm_i32x4_extract_lane(b_.wasm_v128, 2))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mul_epi32 + #define _mm_mul_epi32(a, b) simde_mm_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mullo_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_mullo_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmulq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + (void) a_; + (void) b_; + r_.altivec_i32 = vec_mul(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_mul(a_.wasm_v128, b_.wasm_v128); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (HEDLEY_STATIC_CAST(uint64_t, (HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]))) & 0xffffffff)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_mullo_epi32 + #define _mm_mullo_epi32(a, b) simde_mm_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_x_mm_mullo_epu32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmulq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m128i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_packus_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_packus_epi32(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + const __m128i max = _mm_set1_epi32(UINT16_MAX); + const __m128i tmpa = _mm_andnot_si128(_mm_srai_epi32(a, 31), a); + const __m128i tmpb = _mm_andnot_si128(_mm_srai_epi32(b, 31), b); + return + _mm_packs_epi32( + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpa, _mm_cmpgt_epi32(tmpa, max)), 16), 16), + _mm_srai_epi32(_mm_slli_epi32(_mm_or_si128(tmpb, _mm_cmpgt_epi32(tmpb, max)), 16), 16) + ); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_BUG_CLANG_46840) + r_.neon_u16 = vqmovun_high_s32(vreinterpret_s16_u16(vqmovun_s32(a_.neon_i32)), b_.neon_i32); + #else + r_.neon_u16 = vqmovun_high_s32(vqmovun_s32(a_.neon_i32), b_.neon_i32); + #endif + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = + vcombine_u16( + vqmovun_s32(a_.neon_i32), + vqmovun_s32(b_.neon_i32) + ); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r_.altivec_u16 = vec_packsu(a_.altivec_i32, b_.altivec_i32); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + int32_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); + + v &= ~(v >> 31); + v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT16_MAX); + + SIMDE_CONVERT_VECTOR_(r_.i16, v); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; + r_.u16[i] = (v < 0) ? UINT16_C(0) : ((v > UINT16_MAX) ? UINT16_MAX : HEDLEY_STATIC_CAST(uint16_t, v)); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_packus_epi32 + #define _mm_packus_epi32(a, b) simde_mm_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_round_sd (simde__m128d a, simde__m128d b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128d_private + r_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f64[0] = simde_math_nearbyint(b_.f64[0]); + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f64[0] = simde_math_floor(b_.f64[0]); + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f64[0] = simde_math_ceil(b_.f64[0]); + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f64[0] = simde_math_trunc(b_.f64[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) +# define simde_mm_round_sd(a, b, rounding) _mm_round_sd(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_FAST_EXCEPTIONS) +# define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(b, rounding)) +#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + #define simde_mm_round_sd(a, b, rounding) simde_mm_move_sd(a, simde_mm_round_pd(simde_x_mm_broadcastlow_pd(b), rounding)) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_sd + #define _mm_round_sd(a, b, rounding) simde_mm_round_sd(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_round_ss (simde__m128 a, simde__m128 b, int rounding) + SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) { + simde__m128_private + r_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + case SIMDE_MM_FROUND_CUR_DIRECTION: + r_.f32[0] = simde_math_nearbyintf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + r_.f32[0] = simde_math_floorf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + r_.f32[0] = simde_math_ceilf(b_.f32[0]); + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + r_.f32[0] = simde_math_truncf(b_.f32[0]); + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_SSE4_1_NATIVE) + #define simde_mm_round_ss(a, b, rounding) _mm_round_ss(a, b, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 && defined(SIMDE_FAST_EXCEPTIONS) + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps((b), (rounding))) +#elif SIMDE_NATURAL_VECTOR_SIZE > 0 + #define simde_mm_round_ss(a, b, rounding) simde_mm_move_ss((a), simde_mm_round_ps(simde_x_mm_broadcastlow_ps(b), (rounding))) +#endif +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_round_ss + #define _mm_round_ss(a, b, rounding) simde_mm_round_ss(a, b, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_stream_load_si128 (const simde__m128i* mem_addr) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_stream_load_si128(HEDLEY_CONST_CAST(simde__m128i*, mem_addr)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vreinterpretq_s64_s32(vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr))); + #else + return *mem_addr; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_stream_load_si128 + #define _mm_stream_load_si128(mem_addr) simde_mm_stream_load_si128(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_ones (simde__m128i a) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_ones(a); + #else + simde__m128i_private a_ = simde__m128i_to_private(a); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(a_.altivec_i32, vec_splats(~0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = ((vgetq_lane_s64(a_.neon_i64, 0) & vgetq_lane_s64(a_.neon_i64, 1)) == ~HEDLEY_STATIC_CAST(int64_t, 0)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(a_.wasm_v128, 0) & wasm_i64x2_extract_lane(a_.wasm_v128, 1)) == 0xFFFFFFFFFFFFFFFFull; + #else + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_ones + #define _mm_test_all_ones(a) simde_mm_test_all_ones(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_all_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_all_zeros(a, mask); + #else + simde__m128i_private tmp_ = simde__m128i_to_private(simde_mm_and_si128(a, mask)); + int r; + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + r = vec_all_eq(tmp_.altivec_i32, vec_splats(0)); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r = !(vgetq_lane_s64(tmp_.neon_i64, 0) | vgetq_lane_s64(tmp_.neon_i64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r = (wasm_i64x2_extract_lane(tmp_.wasm_v128, 0) | wasm_i64x2_extract_lane(tmp_.wasm_v128, 1)) == 0; + #else + int_fast32_t r_ = HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(|:r_) + for (size_t i = 0 ; i < (sizeof(tmp_.i32f) / sizeof(tmp_.i32f[0])) ; i++) { + r_ |= tmp_.i32f[i]; + } + + r = !r_; + #endif + + return r; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_all_zeros + #define _mm_test_all_zeros(a, mask) simde_mm_test_all_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_test_mix_ones_zeros (simde__m128i a, simde__m128i mask) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_test_mix_ones_zeros(a, mask); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + mask_ = simde__m128i_to_private(mask); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(a_.neon_i64, mask_.neon_i64); + int64x2_t s641 = vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(vreinterpretq_s32_s64(a_.neon_i64))), mask_.neon_i64); + return (((vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) & (vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)))!=0); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, mask_.wasm_v128); + long long c0 = wasm_i64x2_extract_lane(m, 0); + long long c1 = wasm_i64x2_extract_lane(m, 1); + long long ones = c0 | c1; + long long zeros = ~(c0 & c1); + return ones && zeros; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) + if (((a_.u64[i] & mask_.u64[i]) != 0) && ((~a_.u64[i] & mask_.u64[i]) != 0)) + return 1; + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_test_mix_ones_zeros + #define _mm_test_mix_ones_zeros(a, mask) simde_mm_test_mix_ones_zeros(a, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + int_fast32_t r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_si128 + #define _mm_testc_si128(a, b) simde_mm_testc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testnzc_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s640 = vandq_s64(b_.neon_i64, a_.neon_i64); + int64x2_t s641 = vbicq_s64(b_.neon_i64, a_.neon_i64); + return !( !(vgetq_lane_s64(s641, 0) | vgetq_lane_s64(s641, 1)) \ + | !(vgetq_lane_s64(s640, 0) | vgetq_lane_s64(s640, 1)) ); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m1 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + v128_t m2 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); + return (wasm_i64x2_extract_lane(m1, 0) | wasm_i64x2_extract_lane(m1, 1)) \ + && (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1)); + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if (((a_.u64[i] & b_.u64[i]) != 0) && ((~a_.u64[i] & b_.u64[i]) != 0)) + return 1; + } + + return 0; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_si128 + #define _mm_testnzc_si128(a, b) simde_mm_testnzc_si128(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_si128 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_1_NATIVE) + return _mm_testz_si128(a, b); + #else + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t s64 = vandq_s64(a_.neon_i64, b_.neon_i64); + return !(vgetq_lane_s64(s64, 0) | vgetq_lane_s64(s64, 1)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); + return (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) == 0; + #else + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + if ((a_.u64[i] & b_.u64[i]) == 0) + return 1; + } + #endif + + return 0; + #endif +} +#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_si128 + #define _mm_testz_si128(a, b) simde_mm_testz_si128(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_1_H) */ +/* :: End simde/x86/sse4.1.h :: */ + +#if defined(__ARM_ACLE) || (defined(__GNUC__) && defined(__ARM_FEATURE_CRC32)) + #include +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #define SIMDE_SIDD_UBYTE_OPS _SIDD_UBYTE_OPS + #define SIMDE_SIDD_UWORD_OPS _SIDD_UWORD_OPS + #define SIMDE_SIDD_SBYTE_OPS _SIDD_SBYTE_OPS + #define SIMDE_SIDD_SWORD_OPS _SIDD_SWORD_OPS + #define SIMDE_SIDD_CMP_EQUAL_ANY _SIDD_CMP_EQUAL_ANY + #define SIMDE_SIDD_CMP_RANGES _SIDD_CMP_RANGES + #define SIMDE_SIDD_CMP_EQUAL_EACH _SIDD_CMP_EQUAL_EACH + #define SIMDE_SIDD_CMP_EQUAL_ORDERED _SIDD_CMP_EQUAL_ORDERED + #define SIMDE_SIDD_POSITIVE_POLARITY _SIDD_POSITIVE_POLARITY + #define SIMDE_SIDD_NEGATIVE_POLARITY _SIDD_NEGATIVE_POLARITY + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY _SIDD_MASKED_POSITIVE_POLARITY + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY _SIDD_MASKED_NEGATIVE_POLARITY + #define SIMDE_SIDD_LEAST_SIGNIFICANT _SIDD_LEAST_SIGNIFICANT + #define SIMDE_SIDD_MOST_SIGNIFICANT _SIDD_MOST_SIGNIFICANT + #define SIMDE_SIDD_BIT_MASK _SIDD_BIT_MASK + #define SIMDE_SIDD_UNIT_MASK _SIDD_UNIT_MASK +#else + #define SIMDE_SIDD_UBYTE_OPS 0x00 + #define SIMDE_SIDD_UWORD_OPS 0x01 + #define SIMDE_SIDD_SBYTE_OPS 0x02 + #define SIMDE_SIDD_SWORD_OPS 0x03 + #define SIMDE_SIDD_CMP_EQUAL_ANY 0x00 + #define SIMDE_SIDD_CMP_RANGES 0x04 + #define SIMDE_SIDD_CMP_EQUAL_EACH 0x08 + #define SIMDE_SIDD_CMP_EQUAL_ORDERED 0x0c + #define SIMDE_SIDD_POSITIVE_POLARITY 0x00 + #define SIMDE_SIDD_NEGATIVE_POLARITY 0x10 + #define SIMDE_SIDD_MASKED_POSITIVE_POLARITY 0x20 + #define SIMDE_SIDD_MASKED_NEGATIVE_POLARITY 0x30 + #define SIMDE_SIDD_LEAST_SIGNIFICANT 0x00 + #define SIMDE_SIDD_MOST_SIGNIFICANT 0x40 + #define SIMDE_SIDD_BIT_MASK 0x00 + #define SIMDE_SIDD_UNIT_MASK 0x40 +#endif + +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) && !defined(_SIDD_UBYTE_OPS) + #define _SIDD_UBYTE_OPS SIMDE_SIDD_UBYTE_OPS + #define _SIDD_UWORD_OPS SIMDE_SIDD_UWORD_OPS + #define _SIDD_SBYTE_OPS SIMDE_SIDD_SBYTE_OPS + #define _SIDD_SWORD_OPS SIMDE_SIDD_SWORD_OPS + #define _SIDD_CMP_EQUAL_ANY SIMDE_SIDD_CMP_EQUAL_ANY + #define _SIDD_CMP_RANGES SIMDE_SIDD_CMP_RANGES + #define _SIDD_CMP_EQUAL_EACH SIMDE_SIDD_CMP_EQUAL_EACH + #define _SIDD_CMP_EQUAL_ORDERED SIMDE_SIDD_CMP_EQUAL_ORDERED + #define _SIDD_POSITIVE_POLARITY SIMDE_SIDD_POSITIVE_POLARITY + #define _SIDD_NEGATIVE_POLARITY SIMDE_SIDD_NEGATIVE_POLARITY + #define _SIDD_MASKED_POSITIVE_POLARITY SIMDE_SIDD_MASKED_POSITIVE_POLARITY + #define _SIDD_MASKED_NEGATIVE_POLARITY SIMDE_SIDD_MASKED_NEGATIVE_POLARITY + #define _SIDD_LEAST_SIGNIFICANT SIMDE_SIDD_LEAST_SIGNIFICANT + #define _SIDD_MOST_SIGNIFICANT SIMDE_SIDD_MOST_SIGNIFICANT + #define _SIDD_BIT_MASK SIMDE_SIDD_BIT_MASK + #define _SIDD_UNIT_MASK SIMDE_SIDD_UNIT_MASK +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrs (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return la <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrs(a, la, b, lb, imm8) \ + _mm_cmpestrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrs(a, la, b, lb, imm8) _mm_cmpestrs(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrs + #define _mm_cmpestrs(a, la, b, lb, imm8) simde_mm_cmpestrs(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int simde_mm_cmpestrz (simde__m128i a, int la, simde__m128i b, int lb, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + #if !defined(HEDLEY_PGI_VERSION) + /* https://www.pgroup.com/userforum/viewtopic.php?f=4&p=27590&sid=cf89f8bf30be801831fe4a2ff0a2fa6c */ + (void) a; + (void) b; + #endif + (void) la; + (void) lb; + return lb <= ((128 / ((imm8 & SIMDE_SIDD_UWORD_OPS) ? 16 : 8)) - 1); +} +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpestrz(a, la, b, lb, imm8) \ + _mm_cmpestrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), la, \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), lb, \ + imm8) + #else + #define simde_mm_cmpestrz(a, la, b, lb, imm8) _mm_cmpestrz(a, la, b, lb, imm8) + #endif +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpestrz + #define _mm_cmpestrz(a, la, b, lb, imm8) simde_mm_cmpestrz(a, la, b, lb, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmpgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_cmpgt_epi64(a, b); + #elif defined(SIMDE_X86_SSE2_NATIVE) + /* https://stackoverflow.com/a/65175746/501126 */ + __m128i r = _mm_and_si128(_mm_cmpeq_epi32(a, b), _mm_sub_epi64(b, a)); + r = _mm_or_si128(r, _mm_cmpgt_epi32(a, b)); + return _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) + /* https://stackoverflow.com/a/65223269/501126 */ + r_.neon_i64 = vshrq_n_s64(vqsubq_s64(b_.neon_i64, a_.neon_i64), 63); + #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) + r_.altivec_u64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), vec_cmpgt(a_.altivec_i64, b_.altivec_i64)); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i64x2_gt(a_.wasm_v128, b_.wasm_v128); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpgt_epi64 + #define _mm_cmpgt_epi64(a, b) simde_mm_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_8_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 8) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i8[i]) + a_invalid = 1; + } + return a_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrs_16_(simde__m128i a) { + simde__m128i_private a_= simde__m128i_to_private(a); + const int upper_bound = (128 / 16) - 1; + int a_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!a_.i16[i]) + a_invalid = 1; + } + return a_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrs(a, b, imm8) \ + _mm_cmpistrs( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrs(a, b, imm8) _mm_cmpistrs(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrs(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrs_16_((a)) \ + : simde_mm_cmpistrs_8_((a))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrs + #define _mm_cmpistrs(a, b, imm8) simde_mm_cmpistrs(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_8_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 8) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i8[i]) + b_invalid = 1; + } + return b_invalid; +} + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_cmpistrz_16_(simde__m128i b) { + simde__m128i_private b_= simde__m128i_to_private(b); + const int upper_bound = (128 / 16) - 1; + int b_invalid = 0; + SIMDE_VECTORIZE + for (int i = 0 ; i <= upper_bound ; i++) { + if(!b_.i16[i]) + b_invalid = 1; + } + return b_invalid; +} + +#if defined(SIMDE_X86_SSE4_2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_cmpistrz(a, b, imm8) \ + _mm_cmpistrz( \ + HEDLEY_REINTERPRET_CAST(__v16qi, a), \ + HEDLEY_REINTERPRET_CAST(__v16qi, b), \ + imm8) + #else + #define simde_mm_cmpistrz(a, b, imm8) _mm_cmpistrz(a, b, imm8) + #endif +#else + #define simde_mm_cmpistrz(a, b, imm8) \ + (((imm8) & SIMDE_SIDD_UWORD_OPS) \ + ? simde_mm_cmpistrz_16_((b)) \ + : simde_mm_cmpistrz_8_((b))) +#endif +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #undef _mm_cmpistrz + #define _mm_cmpistrz(a, b, imm8) simde_mm_cmpistrz(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u8(uint32_t prevcrc, uint8_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u8(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cb(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc ^= v; + for(int bit = 0 ; bit < 8 ; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u8(prevcrc, v) simde_mm_crc32_u8(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u16(uint32_t prevcrc, uint16_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u16(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32ch(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u8(crc, v & 0xff); + crc = simde_mm_crc32_u8(crc, (v >> 8) & 0xff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u16(prevcrc, v) simde_mm_crc32_u16(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_mm_crc32_u32(uint32_t prevcrc, uint32_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) + return _mm_crc32_u32(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cw(prevcrc, v); + #else + uint32_t crc = prevcrc; + crc = simde_mm_crc32_u16(crc, v & 0xffff); + crc = simde_mm_crc32_u16(crc, (v >> 16) & 0xffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) + #define _mm_crc32_u32(prevcrc, v) simde_mm_crc32_u32(prevcrc, v) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_mm_crc32_u64(uint64_t prevcrc, uint64_t v) { + #if defined(SIMDE_X86_SSE4_2_NATIVE) && defined(SIMDE_ARCH_AMD64) + return _mm_crc32_u64(prevcrc, v); + #else + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(__ARM_FEATURE_CRC32) + return __crc32cd(HEDLEY_STATIC_CAST(uint32_t, prevcrc), v); + #else + uint64_t crc = prevcrc; + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), v & 0xffffffff); + crc = simde_mm_crc32_u32(HEDLEY_STATIC_CAST(uint32_t, crc), (v >> 32) & 0xffffffff); + return crc; + #endif + #endif +} +#if defined(SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #define _mm_crc32_u64(prevcrc, v) simde_mm_crc32_u64(prevcrc, v) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_SSE4_2_H) */ +/* :: End simde/x86/sse4.2.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128_private m128_private[2]; + SIMDE_ALIGN_TO_32 simde__m128 m128[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256 n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128d_private m128d_private[2]; + SIMDE_ALIGN_TO_32 simde__m128d m128d[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256d n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256d_private; + +typedef union { + #if defined(SIMDE_VECTOR_SUBSCRIPT) + SIMDE_ALIGN_TO_32 int8_t i8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int16_t i16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int32_t i32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int64_t i64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint8_t u8 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint16_t u16 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint32_t u32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint64_t u64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_uint128 u128 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 simde_float64 f64 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 int_fast32_t i32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + #else + SIMDE_ALIGN_TO_32 int8_t i8[32]; + SIMDE_ALIGN_TO_32 int16_t i16[16]; + SIMDE_ALIGN_TO_32 int32_t i32[8]; + SIMDE_ALIGN_TO_32 int64_t i64[4]; + SIMDE_ALIGN_TO_32 uint8_t u8[32]; + SIMDE_ALIGN_TO_32 uint16_t u16[16]; + SIMDE_ALIGN_TO_32 uint32_t u32[8]; + SIMDE_ALIGN_TO_32 uint64_t u64[4]; + SIMDE_ALIGN_TO_32 int_fast32_t i32f[32 / sizeof(int_fast32_t)]; + SIMDE_ALIGN_TO_32 uint_fast32_t u32f[32 / sizeof(uint_fast32_t)]; + #if defined(SIMDE_HAVE_INT128_) + SIMDE_ALIGN_TO_32 simde_int128 i128[2]; + SIMDE_ALIGN_TO_32 simde_uint128 u128[2]; + #endif + SIMDE_ALIGN_TO_32 simde_float32 f32[8]; + SIMDE_ALIGN_TO_32 simde_float64 f64[4]; + #endif + + SIMDE_ALIGN_TO_32 simde__m128i_private m128i_private[2]; + SIMDE_ALIGN_TO_32 simde__m128i m128i[2]; + + #if defined(SIMDE_X86_AVX_NATIVE) + SIMDE_ALIGN_TO_32 __m256i n; + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32[2]; + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64[2]; + SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64[2]; + #endif + #endif +} simde__m256i_private; + +#if defined(SIMDE_X86_AVX_NATIVE) + typedef __m256 simde__m256; + typedef __m256i simde__m256i; + typedef __m256d simde__m256d; +#elif defined(SIMDE_VECTOR_SUBSCRIPT) + typedef simde_float32 simde__m256 SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef int_fast32_t simde__m256i SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; + typedef simde_float64 simde__m256d SIMDE_ALIGN_TO_32 SIMDE_VECTOR(32) SIMDE_MAY_ALIAS; +#else + typedef simde__m256_private simde__m256; + typedef simde__m256i_private simde__m256i; + typedef simde__m256d_private simde__m256d; +#endif + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #if !defined(HEDLEY_INTEL_VERSION) && !defined(_AVXINTRIN_H_INCLUDED) && !defined(__AVXINTRIN_H) && !defined(_CMP_EQ_OQ) + typedef simde__m256 __m256; + typedef simde__m256i __m256i; + typedef simde__m256d __m256d; + #else + #undef __m256 + #define __m256 simde__m256 + #undef __m256i + #define __m256i simde__m256i + #undef __m256d + #define __m256d simde__m256d + #endif +#endif + +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256), "simde__m256 size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256_private), "simde__m256_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i), "simde__m256i size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256i_private), "simde__m256i_private size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d), "simde__m256d size incorrect"); +HEDLEY_STATIC_ASSERT(32 == sizeof(simde__m256d_private), "simde__m256d_private size incorrect"); +#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256) == 32, "simde__m256 is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256_private) == 32, "simde__m256_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i) == 32, "simde__m256i is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256i_private) == 32, "simde__m256i_private is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d) == 32, "simde__m256d is not 32-byte aligned"); +HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m256d_private) == 32, "simde__m256d_private is not 32-byte aligned"); +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde__m256_from_private(simde__m256_private v) { + simde__m256 r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256_private +simde__m256_to_private(simde__m256 v) { + simde__m256_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde__m256i_from_private(simde__m256i_private v) { + simde__m256i r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i_private +simde__m256i_to_private(simde__m256i v) { + simde__m256i_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde__m256d_from_private(simde__m256d_private v) { + simde__m256d r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d_private +simde__m256d_to_private(simde__m256d v) { + simde__m256d_private r; + simde_memcpy(&r, &v, sizeof(r)); + return r; +} + +#define SIMDE_CMP_EQ_OQ 0 +#define SIMDE_CMP_LT_OS 1 +#define SIMDE_CMP_LE_OS 2 +#define SIMDE_CMP_UNORD_Q 3 +#define SIMDE_CMP_NEQ_UQ 4 +#define SIMDE_CMP_NLT_US 5 +#define SIMDE_CMP_NLE_US 6 +#define SIMDE_CMP_ORD_Q 7 +#define SIMDE_CMP_EQ_UQ 8 +#define SIMDE_CMP_NGE_US 9 +#define SIMDE_CMP_NGT_US 10 +#define SIMDE_CMP_FALSE_OQ 11 +#define SIMDE_CMP_NEQ_OQ 12 +#define SIMDE_CMP_GE_OS 13 +#define SIMDE_CMP_GT_OS 14 +#define SIMDE_CMP_TRUE_UQ 15 +#define SIMDE_CMP_EQ_OS 16 +#define SIMDE_CMP_LT_OQ 17 +#define SIMDE_CMP_LE_OQ 18 +#define SIMDE_CMP_UNORD_S 19 +#define SIMDE_CMP_NEQ_US 20 +#define SIMDE_CMP_NLT_UQ 21 +#define SIMDE_CMP_NLE_UQ 22 +#define SIMDE_CMP_ORD_S 23 +#define SIMDE_CMP_EQ_US 24 +#define SIMDE_CMP_NGE_UQ 25 +#define SIMDE_CMP_NGT_UQ 26 +#define SIMDE_CMP_FALSE_OS 27 +#define SIMDE_CMP_NEQ_OS 28 +#define SIMDE_CMP_GE_OQ 29 +#define SIMDE_CMP_GT_OQ 30 +#define SIMDE_CMP_TRUE_US 31 + +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) && !defined(_CMP_EQ_OQ) +#define _CMP_EQ_OQ SIMDE_CMP_EQ_OQ +#define _CMP_LT_OS SIMDE_CMP_LT_OS +#define _CMP_LE_OS SIMDE_CMP_LE_OS +#define _CMP_UNORD_Q SIMDE_CMP_UNORD_Q +#define _CMP_NEQ_UQ SIMDE_CMP_NEQ_UQ +#define _CMP_NLT_US SIMDE_CMP_NLT_US +#define _CMP_NLE_US SIMDE_CMP_NLE_US +#define _CMP_ORD_Q SIMDE_CMP_ORD_Q +#define _CMP_EQ_UQ SIMDE_CMP_EQ_UQ +#define _CMP_NGE_US SIMDE_CMP_NGE_US +#define _CMP_NGT_US SIMDE_CMP_NGT_US +#define _CMP_FALSE_OQ SIMDE_CMP_FALSE_OQ +#define _CMP_NEQ_OQ SIMDE_CMP_NEQ_OQ +#define _CMP_GE_OS SIMDE_CMP_GE_OS +#define _CMP_GT_OS SIMDE_CMP_GT_OS +#define _CMP_TRUE_UQ SIMDE_CMP_TRUE_UQ +#define _CMP_EQ_OS SIMDE_CMP_EQ_OS +#define _CMP_LT_OQ SIMDE_CMP_LT_OQ +#define _CMP_LE_OQ SIMDE_CMP_LE_OQ +#define _CMP_UNORD_S SIMDE_CMP_UNORD_S +#define _CMP_NEQ_US SIMDE_CMP_NEQ_US +#define _CMP_NLT_UQ SIMDE_CMP_NLT_UQ +#define _CMP_NLE_UQ SIMDE_CMP_NLE_UQ +#define _CMP_ORD_S SIMDE_CMP_ORD_S +#define _CMP_EQ_US SIMDE_CMP_EQ_US +#define _CMP_NGE_UQ SIMDE_CMP_NGE_UQ +#define _CMP_NGT_UQ SIMDE_CMP_NGT_UQ +#define _CMP_FALSE_OS SIMDE_CMP_FALSE_OS +#define _CMP_NEQ_OS SIMDE_CMP_NEQ_OS +#define _CMP_GE_OQ SIMDE_CMP_GE_OQ +#define _CMP_GT_OQ SIMDE_CMP_GT_OQ +#define _CMP_TRUE_US SIMDE_CMP_TRUE_US +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castps_pd (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_pd + #define _mm256_castps_pd(a) simde_mm256_castps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castps_si256 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps_si256 + #define _mm256_castps_si256(a) simde_mm256_castps_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castsi256_pd (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_pd(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256d*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_pd + #define _mm256_castsi256_pd(a) simde_mm256_castsi256_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castsi256_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_ps + #define _mm256_castsi256_ps(a) simde_mm256_castsi256_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_ps(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_ps + #define _mm256_castpd_ps(a) simde_mm256_castpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castpd_si256 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd_si256(a); + #else + return *HEDLEY_REINTERPRET_CAST(simde__m256i*, &a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd_si256 + #define _mm256_castpd_si256(a) simde_mm256_castpd_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setzero_si256 (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_si256(); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_setzero_si128(); + r_.m128i[1] = simde_mm_setzero_si128(); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = 0; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_si256 + #define _mm256_setzero_si256() simde_mm256_setzero_si256() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setzero_ps (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_ps(); + #else + return simde_mm256_castsi256_ps(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_ps + #define _mm256_setzero_ps() simde_mm256_setzero_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setzero_pd (void) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setzero_pd(); + #else + return simde_mm256_castsi256_pd(simde_mm256_setzero_si256()); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setzero_pd + #define _mm256_setzero_pd() simde_mm256_setzero_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_not_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = ~a_.i32; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_not_ps(a_.m128[0]); + r_.m128[1] = simde_x_mm_not_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ~(a_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_select_ps(simde__m256 a, simde__m256 b, simde__m256 mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_ps, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128[0] = simde_x_mm_select_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_x_mm_select_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_not_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = ~a_.i64; + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_not_pd(a_.m128d[0]); + r_.m128d[1] = simde_x_mm_not_pd(a_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = ~(a_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_select_pd(simde__m256d a, simde__m256d b, simde__m256d mask) { + /* This function is for when you want to blend two elements together + * according to a mask. It is similar to _mm256_blendv_pd, except that + * it is undefined whether the blend is based on the highest bit in + * each lane (like blendv) or just bitwise operations. This allows + * us to implement the function efficiently everywhere. + * + * Basically, you promise that all the lanes in mask are either 0 or + * ~0. */ + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); + #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) + r_.m128d[0] = simde_x_mm_select_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_x_mm_select_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_setone_si256 (void) { + simde__m256i_private r_; + +#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + __typeof__(r_.i32f) rv = { 0, }; + r_.i32f = ~rv; +#elif defined(SIMDE_X86_AVX2_NATIVE) + __m256i t = _mm256_setzero_si256(); + r_.n = _mm256_cmpeq_epi32(t, t); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + } +#endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_setone_ps (void) { + return simde_mm256_castsi256_ps(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_setone_pd (void) { + return simde_mm256_castsi256_pd(simde_x_mm256_setone_si256()); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi8 (int8_t e31, int8_t e30, int8_t e29, int8_t e28, + int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, + int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, + int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, + int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi8( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16); + #else + r_.i8[ 0] = e0; + r_.i8[ 1] = e1; + r_.i8[ 2] = e2; + r_.i8[ 3] = e3; + r_.i8[ 4] = e4; + r_.i8[ 5] = e5; + r_.i8[ 6] = e6; + r_.i8[ 7] = e7; + r_.i8[ 8] = e8; + r_.i8[ 9] = e9; + r_.i8[10] = e10; + r_.i8[11] = e11; + r_.i8[12] = e12; + r_.i8[13] = e13; + r_.i8[14] = e14; + r_.i8[15] = e15; + r_.i8[16] = e16; + r_.i8[17] = e17; + r_.i8[18] = e18; + r_.i8[19] = e19; + r_.i8[20] = e20; + r_.i8[21] = e21; + r_.i8[22] = e22; + r_.i8[23] = e23; + r_.i8[24] = e24; + r_.i8[25] = e25; + r_.i8[26] = e26; + r_.i8[27] = e27; + r_.i8[28] = e28; + r_.i8[29] = e29; + r_.i8[30] = e30; + r_.i8[31] = e31; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi8 + #define _mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi16 (int16_t e15, int16_t e14, int16_t e13, int16_t e12, + int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, + int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi16( e7, e6, e5, e4, e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8); + #else + r_.i16[ 0] = e0; + r_.i16[ 1] = e1; + r_.i16[ 2] = e2; + r_.i16[ 3] = e3; + r_.i16[ 4] = e4; + r_.i16[ 5] = e5; + r_.i16[ 6] = e6; + r_.i16[ 7] = e7; + r_.i16[ 8] = e8; + r_.i16[ 9] = e9; + r_.i16[10] = e10; + r_.i16[11] = e11; + r_.i16[12] = e12; + r_.i16[13] = e13; + r_.i16[14] = e14; + r_.i16[15] = e15; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi16 + #define _mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi32 (int32_t e7, int32_t e6, int32_t e5, int32_t e4, + int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(e3, e2, e1, e0); + r_.m128i[1] = simde_mm_set_epi32(e7, e6, e5, e4); + #else + r_.i32[ 0] = e0; + r_.i32[ 1] = e1; + r_.i32[ 2] = e2; + r_.i32[ 3] = e3; + r_.i32[ 4] = e4; + r_.i32[ 5] = e5; + r_.i32[ 6] = e6; + r_.i32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi32 + #define _mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi64x(e3, e2, e1, e0); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi64x(e1, e0); + r_.m128i[1] = simde_mm_set_epi64x(e3, e2); + #else + r_.i64[0] = e0; + r_.i64[1] = e1; + r_.i64[2] = e2; + r_.i64[3] = e3; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_epi64x + #define _mm256_set_epi64x(e3, e2, e1, e0) simde_mm256_set_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu8 (uint8_t e31, uint8_t e30, uint8_t e29, uint8_t e28, + uint8_t e27, uint8_t e26, uint8_t e25, uint8_t e24, + uint8_t e23, uint8_t e22, uint8_t e21, uint8_t e20, + uint8_t e19, uint8_t e18, uint8_t e17, uint8_t e16, + uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, + uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, + uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, + uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { + simde__m256i_private r_; + + r_.u8[ 0] = e0; + r_.u8[ 1] = e1; + r_.u8[ 2] = e2; + r_.u8[ 3] = e3; + r_.u8[ 4] = e4; + r_.u8[ 5] = e5; + r_.u8[ 6] = e6; + r_.u8[ 7] = e7; + r_.u8[ 8] = e8; + r_.u8[ 9] = e9; + r_.u8[10] = e10; + r_.u8[11] = e11; + r_.u8[12] = e12; + r_.u8[13] = e13; + r_.u8[14] = e14; + r_.u8[15] = e15; + r_.u8[16] = e16; + r_.u8[17] = e17; + r_.u8[18] = e18; + r_.u8[19] = e19; + r_.u8[20] = e20; + r_.u8[20] = e20; + r_.u8[21] = e21; + r_.u8[22] = e22; + r_.u8[23] = e23; + r_.u8[24] = e24; + r_.u8[25] = e25; + r_.u8[26] = e26; + r_.u8[27] = e27; + r_.u8[28] = e28; + r_.u8[29] = e29; + r_.u8[30] = e30; + r_.u8[31] = e31; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu16 (uint16_t e15, uint16_t e14, uint16_t e13, uint16_t e12, + uint16_t e11, uint16_t e10, uint16_t e9, uint16_t e8, + uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, + uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { + simde__m256i_private r_; + + r_.u16[ 0] = e0; + r_.u16[ 1] = e1; + r_.u16[ 2] = e2; + r_.u16[ 3] = e3; + r_.u16[ 4] = e4; + r_.u16[ 5] = e5; + r_.u16[ 6] = e6; + r_.u16[ 7] = e7; + r_.u16[ 8] = e8; + r_.u16[ 9] = e9; + r_.u16[10] = e10; + r_.u16[11] = e11; + r_.u16[12] = e12; + r_.u16[13] = e13; + r_.u16[14] = e14; + r_.u16[15] = e15; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu32 (uint32_t e7, uint32_t e6, uint32_t e5, uint32_t e4, + uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4), + HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e3), HEDLEY_STATIC_CAST(int32_t, e2), HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); + r_.m128i[1] = simde_mm_set_epi32(HEDLEY_STATIC_CAST(int32_t, e7), HEDLEY_STATIC_CAST(int32_t, e6), HEDLEY_STATIC_CAST(int32_t, e5), HEDLEY_STATIC_CAST(int32_t, e4)); + #else + r_.u32[ 0] = e0; + r_.u32[ 1] = e1; + r_.u32[ 2] = e2; + r_.u32[ 3] = e3; + r_.u32[ 4] = e4; + r_.u32[ 5] = e5; + r_.u32[ 6] = e6; + r_.u32[ 7] = e7; + #endif + + return simde__m256i_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_set_epu64x (uint64_t e3, uint64_t e2, uint64_t e1, uint64_t e0) { + simde__m256i_private r_; + + r_.u64[0] = e0; + r_.u64[1] = e1; + r_.u64[2] = e2; + r_.u64[3] = e3; + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_ps (simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set_ps(e3, e2, e1, e0); + r_.m128[1] = simde_mm_set_ps(e7, e6, e5, e4); + #else + r_.f32[0] = e0; + r_.f32[1] = e1; + r_.f32[2] = e2; + r_.f32[3] = e3; + r_.f32[4] = e4; + r_.f32[5] = e5; + r_.f32[6] = e6; + r_.f32[7] = e7; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_ps + #define _mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_set_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set_pd(e3, e2, e1, e0); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set_pd(e1, e0); + r_.m128d[1] = simde_mm_set_pd(e3, e2); + #else + r_.f64[0] = e0; + r_.f64[1] = e1; + r_.f64[2] = e2; + r_.f64[3] = e3; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_pd + #define _mm256_set_pd(e3, e2, e1, e0) \ + simde_mm256_set_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set_m128 (simde__m128 e1, simde__m128 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_castps128_ps256(e0), e1, 1); + #else + simde__m256_private r_; + simde__m128_private + e1_ = simde__m128_to_private(e1), + e0_ = simde__m128_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128_private[0] = e0_; + r_.m128_private[1] = e1_; + #elif defined(SIMDE_HAVE_INT128_) + r_.i128[0] = e0_.i128[0]; + r_.i128[1] = e1_.i128[0]; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128 + #define _mm256_set_m128(e1, e0) simde_mm256_set_m128(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set_m128d (simde__m128d e1, simde__m128d e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_castpd128_pd256(e0), e1, 1); + #else + simde__m256d_private r_; + simde__m128d_private + e1_ = simde__m128d_to_private(e1), + e0_ = simde__m128d_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d_private[0] = e0_; + r_.m128d_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128d + #define _mm256_set_m128d(e1, e0) simde_mm256_set_m128d(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set_m128i (simde__m128i e1, simde__m128i e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_castsi128_si256(e0), e1, 1); + #else + simde__m256i_private r_; + simde__m128i_private + e1_ = simde__m128i_to_private(e1), + e0_ = simde__m128i_to_private(e0); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = e0_; + r_.m128i_private[1] = e1_; + #else + r_.i64[0] = e0_.i64[0]; + r_.i64[1] = e0_.i64[1]; + r_.i64[2] = e1_.i64[0]; + r_.i64[3] = e1_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set_m128i + #define _mm256_set_m128i(e1, e0) simde_mm256_set_m128i(e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi8 (int8_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi8(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi8(a); + r_.m128i[1] = simde_mm_set1_epi8(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi8 + #define _mm256_set1_epi8(a) simde_mm256_set1_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi16 (int16_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi16(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi16(a); + r_.m128i[1] = simde_mm_set1_epi16(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi16 + #define _mm256_set1_epi16(a) simde_mm256_set1_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi32 (int32_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi32(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi32(a); + r_.m128i[1] = simde_mm_set1_epi32(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi32 + #define _mm256_set1_epi32(a) simde_mm256_set1_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_set1_epi64x (int64_t a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_epi64x(a); + #else + simde__m256i_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_set1_epi64x(a); + r_.m128i[1] = simde_mm_set1_epi64x(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_epi64x + #define _mm256_set1_epi64x(a) simde_mm256_set1_epi64x(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_set1_ps (simde_float32 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_ps(a); + #else + simde__m256_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_set1_ps(a); + r_.m128[1] = simde_mm_set1_ps(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_ps + #define _mm256_set1_ps(a) simde_mm256_set1_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_set1_pd (simde_float64 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_set1_pd(a); + #else + simde__m256d_private r_; + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_set1_pd(a); + r_.m128d[1] = simde_mm_set1_pd(a); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_set1_pd + #define _mm256_set1_pd(a) simde_mm256_set1_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i]; + r_.i16[i + quarter_point] = b_.i16[2 * i]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi16 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i16[i] = a_.i16[2 * i + 1]; + r_.i16[i + quarter_point] = b_.i16[2 * i + 1]; + r_.i16[halfway_point + i] = a_.i16[halfway_point + 2 * i + 1]; + r_.i16[halfway_point + i + quarter_point] = b_.i16[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveeven_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveeven_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i]; + r_.i32[i + quarter_point] = b_.i32[2 * i]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_deinterleaveodd_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_deinterleaveodd_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 2; + const size_t quarter_point = (sizeof(r_.i32) / sizeof(r_.i32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i32[i] = a_.i32[2 * i + 1]; + r_.i32[i + quarter_point] = b_.i32[2 * i + 1]; + r_.i32[halfway_point + i] = a_.i32[halfway_point + 2 * i + 1]; + r_.i32[halfway_point + i + quarter_point] = b_.i32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveeven_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveeven_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveeven_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 2, 8, 10, 4, 6, 12, 14); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i]; + r_.f32[i + quarter_point] = b_.f32[2 * i]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_deinterleaveodd_ps (simde__m256 a, simde__m256 b) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_x_mm_deinterleaveodd_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_x_mm_deinterleaveodd_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 1, 3, 9, 11, 5, 7, 13, 15); + #else + const size_t halfway_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 2; + const size_t quarter_point = (sizeof(r_.f32) / sizeof(r_.f32[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f32[i] = a_.f32[2 * i + 1]; + r_.f32[i + quarter_point] = b_.f32[2 * i + 1]; + r_.f32[halfway_point + i] = a_.f32[halfway_point + 2 * i + 1]; + r_.f32[halfway_point + i + quarter_point] = b_.f32[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveeven_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveeven_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveeven_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i]; + r_.f64[i + quarter_point] = b_.f64[2 * i]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_deinterleaveodd_pd (simde__m256d a, simde__m256d b) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_x_mm_deinterleaveodd_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_x_mm_deinterleaveodd_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + const size_t halfway_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 2; + const size_t quarter_point = (sizeof(r_.f64) / sizeof(r_.f64[0])) / 4; + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.f64[i] = a_.f64[2 * i + 1]; + r_.f64[i + quarter_point] = b_.f64[2 * i + 1]; + r_.f64[halfway_point + i] = a_.f64[halfway_point + 2 * i + 1]; + r_.f64[halfway_point + i + quarter_point] = b_.f64[halfway_point + 2 * i + 1]; + } + #endif + + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_abs_ps(simde__m256 a) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_fabsf(a_.f32[i]); + } + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_abs_pd(simde__m256d a) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_fabs(a_.f64[i]); + } + return simde__m256d_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_add_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_add_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_add_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 + b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] + b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_ps + #define _mm256_add_ps(a, b) simde_mm256_add_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hadd_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_ps(a, b); + #else + return simde_mm256_add_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_ps + #define _mm256_hadd_ps(a, b) simde_mm256_hadd_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_add_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_add_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_add_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_add_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 + b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] + b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_pd + #define _mm256_add_pd(a, b) simde_mm256_add_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hadd_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hadd_pd(a, b); + #else + return simde_mm256_add_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_pd + #define _mm256_hadd_pd(a, b) simde_mm256_hadd_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_addsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_addsub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_addsub_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[ i ] = a_.f32[ i ] - b_.f32[ i ]; + r_.f32[i + 1] = a_.f32[i + 1] + b_.f32[i + 1]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_ps + #define _mm256_addsub_ps(a, b) simde_mm256_addsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_addsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_addsub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_addsub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_addsub_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[ i ] = a_.f64[ i ] - b_.f64[ i ]; + r_.f64[i + 1] = a_.f64[i + 1] + b_.f64[i + 1]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_addsub_pd + #define _mm256_addsub_pd(a, b) simde_mm256_addsub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_and_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_and_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_and_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_ps + #define _mm256_and_ps(a, b) simde_mm256_and_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_and_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_and_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_and_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_and_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_pd + #define _mm256_and_pd(a, b) simde_mm256_and_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_andnot_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_andnot_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_andnot_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_ps + #define _mm256_andnot_ps(a, b) simde_mm256_andnot_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_andnot_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_andnot_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_andnot_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_andnot_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = ~a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~a_.i32f[i] & b_.i32f[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_pd + #define _mm256_andnot_pd(a, b) simde_mm256_andnot_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blend_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = ((imm8 >> i) & 1) ? b_.f32[i] : a_.f32[i]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_ps(a, b, imm8) _mm256_blend_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8) >> 4), \ + simde_mm_blend_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_ps + #define _mm256_blend_ps(a, b, imm8) simde_mm256_blend_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blend_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = ((imm8 >> i) & 1) ? b_.f64[i] : a_.f64[i]; + } + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_blend_pd(a, b, imm8) _mm256_blend_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8) >> 2), \ + simde_mm_blend_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8) & 3)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_pd + #define _mm256_blend_pd(a, b, imm8) simde_mm256_blend_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_blendv_ps (simde__m256 a, simde__m256 b, simde__m256 mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_ps(a, b, mask); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b), + mask_ = simde__m256_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_blendv_ps(a_.m128[0], b_.m128[0], mask_.m128[0]); + r_.m128[1] = simde_mm_blendv_ps(a_.m128[1], b_.m128[1], mask_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.f32[i] = (mask_.u32[i] & (UINT32_C(1) << 31)) ? b_.f32[i] : a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_ps + #define _mm256_blendv_ps(a, b, imm8) simde_mm256_blendv_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_blendv_pd (simde__m256d a, simde__m256d b, simde__m256d mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_blendv_pd(a, b, mask); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b), + mask_ = simde__m256d_to_private(mask); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_blendv_pd(a_.m128d[0], b_.m128d[0], mask_.m128d[0]); + r_.m128d[1] = simde_mm_blendv_pd(a_.m128d[1], b_.m128d[1], mask_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.f64[i] = (mask_.u64[i] & (UINT64_C(1) << 63)) ? b_.f64[i] : a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_pd + #define _mm256_blendv_pd(a, b, imm8) simde_mm256_blendv_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_pd (simde__m128d const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_pd(mem_addr); + #else + simde__m256d_private r_; + + simde__m128d tmp = simde_mm_loadu_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, mem_addr)); + r_.m128d[0] = tmp; + r_.m128d[1] = tmp; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_pd + #define _mm256_broadcast_pd(mem_addr) simde_mm256_broadcast_pd(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ps (simde__m128 const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ps(mem_addr); + #else + simde__m256_private r_; + + simde__m128 tmp = simde_mm_loadu_ps(HEDLEY_REINTERPRET_CAST(simde_float32 const*, mem_addr)); + r_.m128[0] = tmp; + r_.m128[1] = tmp; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ps + #define _mm256_broadcast_ps(mem_addr) simde_mm256_broadcast_ps(HEDLEY_REINTERPRET_CAST(simde__m128 const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcast_sd (simde_float64 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_sd(a); + #else + return simde_mm256_set1_pd(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_sd + #define _mm256_broadcast_sd(mem_addr) simde_mm256_broadcast_sd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_broadcast_ss(a); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde__m128_from_wasm_v128(wasm_v128_load32_splat(a)); + #else + return simde_mm_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcast_ss + #define _mm_broadcast_ss(mem_addr) simde_mm_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcast_ss (simde_float32 const * a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_broadcast_ss(a); + #else + return simde_mm256_set1_ps(*a); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcast_ss + #define _mm256_broadcast_ss(mem_addr) simde_mm256_broadcast_ss(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_castpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd128_pd256(a); + #else + simde__m256d_private r_; + simde__m128d_private a_ = simde__m128d_to_private(a); + + r_.m128d_private[0] = a_; + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd128_pd256 + #define _mm256_castpd128_pd256(a) simde_mm256_castpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_castpd256_pd128 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castpd256_pd128(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castpd256_pd128 + #define _mm256_castpd256_pd128(a) simde_mm256_castpd256_pd128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_castps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps128_ps256(a); + #else + simde__m256_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + r_.m128_private[0] = a_; + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps128_ps256 + #define _mm256_castps128_ps256(a) simde_mm256_castps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_castps256_ps128 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castps256_ps128(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castps256_ps128 + #define _mm256_castps256_ps128(a) simde_mm256_castps256_ps128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_castsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + r_.m128i_private[0] = a_; + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi128_si256 + #define _mm256_castsi128_si256(a) simde_mm256_castsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_castsi256_si128 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_castsi256_si128(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_castsi256_si128 + #define _mm256_castsi256_si128(a) simde_mm256_castsi256_si128(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_round_ps (simde__m256 a, const int rounding) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyintf) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_roundf) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_roundf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_floorf) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_floorf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_ceilf) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_ceilf(a_.f32[i]); + } + break; + #endif + + #if defined(simde_math_truncf) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_truncf(a_.f32[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps()); + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_ps(a, rounding) _mm256_round_ps(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_ps(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_round_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_round_ps_a_ = simde__m256_to_private(a); \ + \ + for (size_t simde_mm256_round_ps_i = 0 ; simde_mm256_round_ps_i < (sizeof(simde_mm256_round_ps_r_.m128) / sizeof(simde_mm256_round_ps_r_.m128[0])) ; simde_mm256_round_ps_i++) { \ + simde_mm256_round_ps_r_.m128[simde_mm256_round_ps_i] = simde_mm_round_ps(simde_mm256_round_ps_a_.m128[simde_mm256_round_ps_i], rounding); \ + } \ + \ + simde__m256_from_private(simde_mm256_round_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_ps + #define _mm256_round_ps(a, rounding) simde_mm256_round_ps(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_round_pd (simde__m256d a, const int rounding) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { + #if defined(simde_math_nearbyint) + case SIMDE_MM_FROUND_CUR_DIRECTION: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_nearbyint(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_round) + case SIMDE_MM_FROUND_TO_NEAREST_INT: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_round(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_floor) + case SIMDE_MM_FROUND_TO_NEG_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_floor(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_ceil) + case SIMDE_MM_FROUND_TO_POS_INF: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_ceil(a_.f64[i]); + } + break; + #endif + + #if defined(simde_math_trunc) + case SIMDE_MM_FROUND_TO_ZERO: + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_trunc(a_.f64[i]); + } + break; + #endif + + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd()); + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_round_pd(a, rounding) _mm256_round_pd(a, rounding) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) && defined(SIMDE_STATEMENT_EXPR_) + #define simde_mm256_round_pd(a, rounding) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256d_private \ + simde_mm256_round_pd_r_ = simde__m256d_to_private(simde_mm256_setzero_pd()), \ + simde_mm256_round_pd_a_ = simde__m256d_to_private(a); \ + \ + for (size_t simde_mm256_round_pd_i = 0 ; simde_mm256_round_pd_i < (sizeof(simde_mm256_round_pd_r_.m128d) / sizeof(simde_mm256_round_pd_r_.m128d[0])) ; simde_mm256_round_pd_i++) { \ + simde_mm256_round_pd_r_.m128d[simde_mm256_round_pd_i] = simde_mm_round_pd(simde_mm256_round_pd_a_.m128d[simde_mm256_round_pd_i], rounding); \ + } \ + \ + simde__m256d_from_private(simde_mm256_round_pd_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_round_pd + #define _mm256_round_pd(a, rounding) simde_mm256_round_pd(a, rounding) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_ceil_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_pd + #define _mm256_ceil_pd(a) simde_mm256_ceil_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_ceil_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_POS_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_ceil_ps + #define _mm256_ceil_ps(a) simde_mm256_ceil_ps(a) +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL + +/* This implementation does not support signaling NaNs (yet?) */ +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_pd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_pd(simde_mm_cmpunord_pd(a, b), simde_mm_cmpeq_pd(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_pd(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_pd(simde_mm_cmpge_pd(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_pd(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_pd(simde_mm_cmpgt_pd(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_pd(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_pd(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_pd(simde_mm_cmpord_pd(a, b), simde_mm_cmpneq_pd(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_pd(simde_mm_cmplt_pd(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_pd(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_pd(simde_mm_cmple_pd(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_pd(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_pd(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_pd(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_pd(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_pd(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_pd()); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m128d simde_mm_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_pd_r = simde_mm_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_pd_r = simde_x_mm_setone_pd(); \ + break; \ + default: \ + simde_mm_cmp_pd_r = simde_mm_cmp_pd(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_pd(a, b, imm8) _mm_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_pd + #define _mm_cmp_pd(a, b, imm8) simde_mm_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ps (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + switch (imm8) { + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + return simde_mm_or_ps(simde_mm_cmpunord_ps(a, b), simde_mm_cmpeq_ps(a, b)); + break; + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + return simde_mm_cmpeq_ps(a, b); + break; + case SIMDE_CMP_NGE_US: + case SIMDE_CMP_NGE_UQ: + return simde_x_mm_not_ps(simde_mm_cmpge_ps(a, b)); + break; + case SIMDE_CMP_LT_OS: + case SIMDE_CMP_LT_OQ: + return simde_mm_cmplt_ps(a, b); + break; + case SIMDE_CMP_NGT_US: + case SIMDE_CMP_NGT_UQ: + return simde_x_mm_not_ps(simde_mm_cmpgt_ps(a, b)); + break; + case SIMDE_CMP_LE_OS: + case SIMDE_CMP_LE_OQ: + return simde_mm_cmple_ps(a, b); + break; + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + return simde_mm_cmpneq_ps(a, b); + break; + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + return simde_mm_and_ps(simde_mm_cmpord_ps(a, b), simde_mm_cmpneq_ps(a, b)); + break; + case SIMDE_CMP_NLT_US: + case SIMDE_CMP_NLT_UQ: + return simde_x_mm_not_ps(simde_mm_cmplt_ps(a, b)); + break; + case SIMDE_CMP_GE_OS: + case SIMDE_CMP_GE_OQ: + return simde_mm_cmpge_ps(a, b); + break; + case SIMDE_CMP_NLE_US: + case SIMDE_CMP_NLE_UQ: + return simde_x_mm_not_ps(simde_mm_cmple_ps(a, b)); + break; + case SIMDE_CMP_GT_OS: + case SIMDE_CMP_GT_OQ: + return simde_mm_cmpgt_ps(a, b); + break; + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + return simde_mm_setzero_ps(); + break; + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + return simde_x_mm_setone_ps(); + break; + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + return simde_mm_cmpunord_ps(a, b); + break; + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + return simde_mm_cmpord_ps(a, b); + break; + } + + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_ps()); +} +/* Prior to 9.0 clang has problems with _mm{,256}_cmp_{ps,pd} for all four of the true/false + * comparisons, but only when AVX-512 is enabled. */ +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m128 simde_mm_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm_cmp_ps_r = simde_mm_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm_cmp_ps_r = simde_x_mm_setone_ps(); \ + break; \ + default: \ + simde_mm_cmp_ps_r = simde_mm_cmp_ps(a, b, imm8); \ + break; \ + } \ + simde_mm_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ps(a, b, imm8) _mm_cmp_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ps + #define _mm_cmp_ps(a, b, imm8) simde_mm_cmp_ps(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_cmp_sd (simde__m128d a, simde__m128d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i64[0] = (a_.f64[0] == b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i64[0] = (a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i64[0] = (a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) || (b_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0]) & (a_.f64[0] != b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i64[0] = !(a_.f64[0] < b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i64[0] = !(a_.f64[0] <= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i64[0] = ((a_.f64[0] == a_.f64[0]) & (b_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i64[0] = ((a_.f64[0] != a_.f64[0]) | (b_.f64[0] != b_.f64[0]) | (a_.f64[0] == b_.f64[0])) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i64[0] = !(a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i64[0] = !(a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i64[0] = INT64_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i64[0] = (a_.f64[0] >= b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i64[0] = (a_.f64[0] > b_.f64[0]) ? ~INT64_C(0) : INT64_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i64[0] = ~INT64_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_cmp_sd(a, b, imm8) _mm_cmp_sd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_sd + #define _mm_cmp_sd(a, b, imm8) simde_mm_cmp_sd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_cmp_ss (simde__m128 a, simde__m128 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + a_.i32[0] = (a_.f32[0] == b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + a_.i32[0] = (a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + a_.i32[0] = (a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) || (b_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0]) & (a_.f32[0] != b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + a_.i32[0] = !(a_.f32[0] < b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + a_.i32[0] = !(a_.f32[0] <= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + a_.i32[0] = ((a_.f32[0] == a_.f32[0]) & (b_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + a_.i32[0] = ((a_.f32[0] != a_.f32[0]) | (b_.f32[0] != b_.f32[0]) | (a_.f32[0] == b_.f32[0])) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + a_.i32[0] = !(a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + a_.i32[0] = !(a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + a_.i32[0] = INT32_C(0); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + a_.i32[0] = (a_.f32[0] >= b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + a_.i32[0] = (a_.f32[0] > b_.f32[0]) ? ~INT32_C(0) : INT32_C(0); + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + a_.i32[0] = ~INT32_C(0); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m128_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm_cmp_ss(a, b, imm8) _mm_cmp_ss(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_cmp_ss + #define _mm_cmp_ss(a, b, imm8) simde_mm_cmp_ss(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256d +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_pd_internal_ +#else +simde_mm256_cmp_pd +#endif +(simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] == b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) || (b_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] != b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == a_.f64) & (b_.f64 == b_.f64) & (a_.f64 != b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i]) & (a_.f64[i] != b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 < b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] < b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 <= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] <= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ((a_.f64 == a_.f64) & (b_.f64 == b_.f64))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] == a_.f64[i]) & (b_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != a_.f64) | (b_.f64 != b_.f64) | (a_.f64 == b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = ((a_.f64[i] != a_.f64[i]) | (b_.f64[i] != b_.f64[i]) | (a_.f64[i] == b_.f64[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), ~(a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = !(a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] >= b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.i64[i] = (a_.f64[i] > b_.f64[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256d_to_private(simde_x_mm256_setone_pd()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256d_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_pd(a, b, imm8) (__extension__ ({ \ + simde__m256d simde_mm256_cmp_pd_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_pd_r = simde_mm256_setzero_pd(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_pd_r = simde_x_mm256_setone_pd(); \ + break; \ + default: \ + simde_mm256_cmp_pd_r = simde_mm256_cmp_pd_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_pd_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_pd(a, b, imm8) _mm256_cmp_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_pd + #define _mm256_cmp_pd(a, b, imm8) simde_mm256_cmp_pd(a, b, imm8) +#endif + +SIMDE_HUGE_FUNCTION_ATTRIBUTES +simde__m256 +#if defined(__clang__) && defined(__AVX512DQ__) +simde_mm256_cmp_ps_internal_ +#else +simde_mm256_cmp_ps +#endif +(simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 31) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + switch (imm8) { + case SIMDE_CMP_EQ_OQ: + case SIMDE_CMP_EQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] == b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LT_OQ: + case SIMDE_CMP_LT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_LE_OQ: + case SIMDE_CMP_LE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_UNORD_Q: + case SIMDE_CMP_UNORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) || (b_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_UQ: + case SIMDE_CMP_NEQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] != b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NEQ_OQ: + case SIMDE_CMP_NEQ_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 == a_.f32) & (b_.f32 == b_.f32) & (a_.f32 != b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i]) & (a_.f32[i] != b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLT_UQ: + case SIMDE_CMP_NLT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 < b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] < b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NLE_UQ: + case SIMDE_CMP_NLE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 <= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] <= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_ORD_Q: + case SIMDE_CMP_ORD_S: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ((a_.f32 == a_.f32) & (b_.f32 == b_.f32))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] == a_.f32[i]) & (b_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_EQ_UQ: + case SIMDE_CMP_EQ_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != a_.f32) | (b_.f32 != b_.f32) | (a_.f32 == b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = ((a_.f32[i] != a_.f32[i]) | (b_.f32[i] != b_.f32[i]) | (a_.f32[i] == b_.f32[i])) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGE_UQ: + case SIMDE_CMP_NGE_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_NGT_UQ: + case SIMDE_CMP_NGT_US: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), ~(a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = !(a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_FALSE_OQ: + case SIMDE_CMP_FALSE_OS: + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); + break; + + case SIMDE_CMP_GE_OQ: + case SIMDE_CMP_GE_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] >= b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_GT_OQ: + case SIMDE_CMP_GT_OS: + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.i32[i] = (a_.f32[i] > b_.f32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + break; + + case SIMDE_CMP_TRUE_UQ: + case SIMDE_CMP_TRUE_US: + r_ = simde__m256_to_private(simde_x_mm256_setone_ps()); + break; + + default: + HEDLEY_UNREACHABLE(); + } + + return simde__m256_from_private(r_); +} +#if defined(__clang__) && defined(__AVX512DQ__) + #define simde_mm256_cmp_ps(a, b, imm8) (__extension__ ({ \ + simde__m256 simde_mm256_cmp_ps_r; \ + switch (imm8) { \ + case SIMDE_CMP_FALSE_OQ: \ + case SIMDE_CMP_FALSE_OS: \ + simde_mm256_cmp_ps_r = simde_mm256_setzero_ps(); \ + break; \ + case SIMDE_CMP_TRUE_UQ: \ + case SIMDE_CMP_TRUE_US: \ + simde_mm256_cmp_ps_r = simde_x_mm256_setone_ps(); \ + break; \ + default: \ + simde_mm256_cmp_ps_r = simde_mm256_cmp_ps_internal_(a, b, imm8); \ + break; \ + } \ + simde_mm256_cmp_ps_r; \ + })) +#elif defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8) +#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \ + simde__m256_private \ + simde_mm256_cmp_ps_r_ = simde__m256_to_private(simde_mm256_setzero_ps()), \ + simde_mm256_cmp_ps_a_ = simde__m256_to_private((a)), \ + simde_mm256_cmp_ps_b_ = simde__m256_to_private((b)); \ + \ + for (size_t i = 0 ; i < (sizeof(simde_mm256_cmp_ps_r_.m128) / sizeof(simde_mm256_cmp_ps_r_.m128[0])) ; i++) { \ + simde_mm256_cmp_ps_r_.m128[i] = simde_mm_cmp_ps(simde_mm256_cmp_ps_a_.m128[i], simde_mm256_cmp_ps_b_.m128[i], (imm8)); \ + } \ + \ + simde__m256_from_private(simde_mm256_cmp_ps_r_); \ + })) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmp_ps + #define _mm256_cmp_ps(a, b, imm8) simde_mm256_cmp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_copysign_ps(simde__m256 dest, simde__m256 src) { + simde__m256_private + r_, + dest_ = simde__m256_to_private(dest), + src_ = simde__m256_to_private(src); + + #if defined(simde_math_copysignf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); + } + #else + simde__m256 sgnbit = simde_mm256_xor_ps(simde_mm256_set1_ps(SIMDE_FLOAT32_C(0.0)), simde_mm256_set1_ps(-SIMDE_FLOAT32_C(0.0))); + return simde_mm256_xor_ps(simde_mm256_and_ps(sgnbit, src), simde_mm256_andnot_ps(sgnbit, dest)); + #endif + + return simde__m256_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_copysign_pd(simde__m256d dest, simde__m256d src) { + simde__m256d_private + r_, + dest_ = simde__m256d_to_private(dest), + src_ = simde__m256d_to_private(src); + + #if defined(simde_math_copysign) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); + } + #else + simde__m256d sgnbit = simde_mm256_xor_pd(simde_mm256_set1_pd(SIMDE_FLOAT64_C(0.0)), simde_mm256_set1_pd(-SIMDE_FLOAT64_C(0.0))); + return simde_mm256_xor_pd(simde_mm256_and_pd(sgnbit, src), simde_mm256_andnot_pd(sgnbit, dest)); + #endif + + return simde__m256d_from_private(r_); +} + +HEDLEY_DIAGNOSTIC_POP /* -Wfloat-equal */ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtepi32_pd (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_pd(a); + #else + simde__m256d_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(simde_float64, a_.i32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_pd + #define _mm256_cvtepi32_pd(a) simde_mm256_cvtepi32_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 + simde_mm256_cvtepi32_ps (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtepi32_ps(a); + #else + simde__m256_private r_; + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.i32[i]); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_ps + #define _mm256_cvtepi32_ps(a) simde_mm256_cvtepi32_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvtpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_nearbyint) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyint(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_epi32 + #define _mm256_cvtpd_epi32(a) simde_mm256_cvtpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_cvtpd_ps (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtpd_ps(a); + #else + simde__m128_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[i]); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtpd_ps + #define _mm256_cvtpd_ps(a) simde_mm256_cvtpd_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_nearbyintf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_nearbyintf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_epi32 + #define _mm256_cvtps_epi32(a) simde_mm256_cvtps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_cvtps_pd (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvtps_pd(a); + #else + simde__m256d_private r_; + simde__m128_private a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.f64[i] = HEDLEY_STATIC_CAST(double, a_.f32[i]); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtps_pd + #define _mm256_cvtps_pd(a) simde_mm256_cvtps_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64 +simde_mm256_cvtsd_f64 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsd_f64(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.f64[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsd_f64 + #define _mm256_cvtsd_f64(a) simde_mm256_cvtsd_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_cvtsi256_si32 (simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtsi256_si32(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtsi256_si32 + #define _mm256_cvtsi256_si32(a) simde_mm256_cvtsi256_si32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32 +simde_mm256_cvtss_f32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && ( \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) || \ + HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + HEDLEY_MSVC_VERSION_CHECK(19,14,0)) + return _mm256_cvtss_f32(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + return a_.f32[0]; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtss_f32 + #define _mm256_cvtss_f32(a) simde_mm256_cvtss_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_cvttpd_epi32 (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttpd_epi32(a); + #else + simde__m128i_private r_; + simde__m256d_private a_ = simde__m256d_to_private(a); + + #if defined(simde_math_trunc) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_trunc(a_.f64[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttpd_epi32 + #define _mm256_cvttpd_epi32(a) simde_mm256_cvttpd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvttps_epi32 (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_cvttps_epi32(a); + #else + simde__m256i_private r_; + simde__m256_private a_ = simde__m256_to_private(a); + + #if defined(simde_math_truncf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, simde_math_truncf(a_.f32[i])); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvttps_epi32 + #define _mm256_cvttps_epi32(a) simde_mm256_cvttps_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_div_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_div_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_div_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 / b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] / b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_ps + #define _mm256_div_ps(a, b) simde_mm256_div_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_div_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_div_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_div_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_div_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 / b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] / b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_div_pd + #define _mm256_div_pd(a, b) simde_mm256_div_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm256_extractf128_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + return a_.m128d[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_pd(a, imm8) _mm256_extractf128_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_pd + #define _mm256_extractf128_pd(a, imm8) simde_mm256_extractf128_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_extractf128_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + return a_.m128[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_ps(a, imm8) _mm256_extractf128_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_ps + #define _mm256_extractf128_ps(a, imm8) simde_mm256_extractf128_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extractf128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_extractf128_si256(a, imm8) _mm256_extractf128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extractf128_si256 + #define _mm256_extractf128_si256(a, imm8) simde_mm256_extractf128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_floor_pd (simde__m256d a) { + return simde_mm256_round_pd(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_pd + #define _mm256_floor_pd(a) simde_mm256_floor_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_floor_ps (simde__m256 a) { + return simde_mm256_round_ps(a, SIMDE_MM_FROUND_TO_NEG_INF); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_floor_ps + #define _mm256_floor_ps(a) simde_mm256_floor_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi8 (simde__m256i a, int8_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i8[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi8(a, i, index) _mm256_insert_epi8(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi8 + #define _mm256_insert_epi8(a, i, index) simde_mm256_insert_epi8(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi16 (simde__m256i a, int16_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i16[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi16(a, i, index) _mm256_insert_epi16(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi16 + #define _mm256_insert_epi16(a, i, imm8) simde_mm256_insert_epi16(a, i, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi32 (simde__m256i a, int32_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i32[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_insert_epi32(a, i, index) _mm256_insert_epi32(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insert_epi32 + #define _mm256_insert_epi32(a, i, index) simde_mm256_insert_epi32(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_insert_epi64 (simde__m256i a, int64_t i, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + + a_.i64[index] = i; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_insert_epi64(a, i, index) _mm256_insert_epi64(a, i, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_insert_epi64 + #define _mm256_insert_epi64(a, i, index) simde_mm256_insert_epi64(a, i, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d simde_mm256_insertf128_pd(simde__m256d a, simde__m128d b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256d_private a_ = simde__m256d_to_private(a); + simde__m128d_private b_ = simde__m128d_to_private(b); + + a_.m128d_private[imm8] = b_; + + return simde__m256d_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_pd(a, b, imm8) _mm256_insertf128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_pd + #define _mm256_insertf128_pd(a, b, imm8) simde_mm256_insertf128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 simde_mm256_insertf128_ps(simde__m256 a, simde__m128 b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256_private a_ = simde__m256_to_private(a); + simde__m128_private b_ = simde__m128_to_private(b); + + a_.m128_private[imm8] = b_; + + return simde__m256_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_ps(a, b, imm8) _mm256_insertf128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_ps + #define _mm256_insertf128_ps(a, b, imm8) simde_mm256_insertf128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i simde_mm256_insertf128_si256(simde__m256i a, simde__m128i b, int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[imm8] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_insertf128_si256(a, b, imm8) _mm256_insertf128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_insertf128_si256 + #define _mm256_insertf128_si256(a, b, imm8) simde_mm256_insertf128_si256(a, b, imm8) +#endif + +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_dp_ps(a, b, imm8) _mm256_dp_ps(a, b, imm8) +#else +# define simde_mm256_dp_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), imm8), \ + simde_mm_dp_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_dp_ps + #define _mm256_dp_ps(a, b, imm8) simde_mm256_dp_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_extract_epi32 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 7) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i32[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi32(a, index) _mm256_extract_epi32(a, index) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi32 + #define _mm256_extract_epi32(a, index) simde_mm256_extract_epi32(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_mm256_extract_epi64 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 3) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i64[index]; +} +#if defined(SIMDE_X86_AVX_NATIVE) && defined(SIMDE_ARCH_AMD64) + #if !defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0) + #define simde_mm256_extract_epi64(a, index) _mm256_extract_epi64(a, index) + #endif +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) + #undef _mm256_extract_epi64 + #define _mm256_extract_epi64(a, index) simde_mm256_extract_epi64(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_lddqu_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_lddqu_si256 + #define _mm256_lddqu_si256(a) simde_mm256_lddqu_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_load_pd (const double mem_addr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_pd(mem_addr); + #else + simde__m256d r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_pd + #define _mm256_load_pd(a) simde_mm256_load_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_load_ps (const float mem_addr[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_ps(mem_addr); + #else + simde__m256 r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_ps + #define _mm256_load_ps(a) simde_mm256_load_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_load_si256 (simde__m256i const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_load_si256(mem_addr); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_load_si256 + #define _mm256_load_si256(a) simde_mm256_load_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu_pd (const double a[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_pd(a); + #else + simde__m256d r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_pd + #define _mm256_loadu_pd(a) simde_mm256_loadu_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu_ps (const float a[HEDLEY_ARRAY_PARAM(8)]) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_ps(a); + #else + simde__m256 r; + simde_memcpy(&r, a, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_ps + #define _mm256_loadu_ps(a) simde_mm256_loadu_ps(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi8(mem_addr) _mm256_loadu_epi8(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi8(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi8(mem_addr) simde_mm256_loadu_epi8(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi8 + #define _mm256_loadu_epi8(a) simde_mm256_loadu_epi8(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ + && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi16(mem_addr) _mm256_loadu_epi16(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi16(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi16(mem_addr) simde_mm256_loadu_epi16(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi16 + #define _mm256_loadu_epi16(a) simde_mm256_loadu_epi16(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi32(mem_addr) _mm256_loadu_epi32(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi32(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi32(mem_addr) simde_mm256_loadu_epi32(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi32 + #define _mm256_loadu_epi32(a) simde_mm256_loadu_epi32(a) +#endif + +#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ + && !defined(SIMDE_BUG_CLANG_REV_344862) \ + && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define simde_mm256_loadu_epi64(mem_addr) _mm256_loadu_epi64(mem_addr) +#else +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_epi64(void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(__m256i const *, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#endif +#define simde_x_mm256_loadu_epi64(mem_addr) simde_mm256_loadu_epi64(mem_addr) +#if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) + #undef _mm256_loadu_epi64 + #define _mm256_loadu_epi64(a) simde_mm256_loadu_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu_si256 (void const * mem_addr) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_loadu_si256(SIMDE_ALIGN_CAST(const __m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, mem_addr, sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu_si256 + #define _mm256_loadu_si256(mem_addr) simde_mm256_loadu_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_loadu2_m128 (const float hiaddr[HEDLEY_ARRAY_PARAM(4)], const float loaddr[HEDLEY_ARRAY_PARAM(4)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_ps(simde_mm256_castps128_ps256(simde_mm_loadu_ps(loaddr)), + simde_mm_loadu_ps(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128 + #define _mm256_loadu2_m128(hiaddr, loaddr) simde_mm256_loadu2_m128(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_loadu2_m128d (const double hiaddr[HEDLEY_ARRAY_PARAM(2)], const double loaddr[HEDLEY_ARRAY_PARAM(2)]) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128d(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_pd(simde_mm256_castpd128_pd256(simde_mm_loadu_pd(loaddr)), + simde_mm_loadu_pd(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128d + #define _mm256_loadu2_m128d(hiaddr, loaddr) simde_mm256_loadu2_m128d(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_loadu2_m128i (const simde__m128i* hiaddr, const simde__m128i* loaddr) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + return _mm256_loadu2_m128i(hiaddr, loaddr); + #else + return + simde_mm256_insertf128_si256(simde_mm256_castsi128_si256(simde_mm_loadu_si128(loaddr)), + simde_mm_loadu_si128(hiaddr), 1); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_loadu2_m128i + #define _mm256_loadu2_m128i(hiaddr, loaddr) simde_mm256_loadu2_m128i(hiaddr, loaddr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask)); + #else + return _mm_maskload_pd(mem_addr, mask); + #endif + #else + simde__m128d_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_pd(simde_mm_load_pd(mem_addr), + simde__m128d_from_wasm_v128(wasm_i64x2_shr(mask_.wasm_v128, 63))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = mask_shr_.i64[i] ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_pd + #define _mm_maskload_pd(mem_addr, mask) simde_mm_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_maskload_pd (const simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask)); + #else + return _mm256_maskload_pd(mem_addr, mask); + #endif + #else + simde__m256d_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : SIMDE_FLOAT64_C(0.0); + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_pd + #define _mm256_maskload_pd(mem_addr, mask) simde_mm256_maskload_pd(HEDLEY_REINTERPRET_CAST(double const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask)); + #else + return _mm_maskload_ps(mem_addr, mask); + #endif + #else + simde__m128_private r_; + simde__m128i_private + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + return simde_mm_and_ps(simde_mm_load_ps(mem_addr), + simde__m128_from_wasm_v128(wasm_i32x4_shr(mask_.wasm_v128, 31))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i32) / sizeof(mask_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = mask_shr_.i32[i] ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_ps + #define _mm_maskload_ps(mem_addr, mask) simde_mm_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_maskload_ps (const simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_maskload_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask)); + #else + return _mm256_maskload_ps(mem_addr, mask); + #endif + #else + simde__m256_private r_; + simde__m256i_private mask_ = simde__m256i_to_private(mask); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : SIMDE_FLOAT32_C(0.0); + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_ps + #define _mm256_maskload_ps(mem_addr, mask) simde_mm256_maskload_ps(HEDLEY_REINTERPRET_CAST(float const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128d, mask), a); + #else + _mm_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128d_private a_ = simde__m128d_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 0)) & 0x8000000000000000ull) != 0) + mem_addr[0] = wasm_f64x2_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i64x2_extract_lane(mask_.wasm_v128, 1)) & 0x8000000000000000ull) != 0) + mem_addr[1] = wasm_f64x2_extract_lane(a_.wasm_v128, 1); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.f64[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_pd + #define _mm_maskstore_pd(mem_addr, mask, a) simde_mm_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_pd(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256d, mask), a); + #else + _mm256_maskstore_pd(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256d_private a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.f64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_pd + #define _mm256_maskstore_pd(mem_addr, mask, a) simde_mm256_maskstore_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m128, mask), a); + #else + _mm_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128_private a_ = simde__m128_to_private(a); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 0)) & 0x80000000ull) != 0) + mem_addr[0] = wasm_f32x4_extract_lane(a_.wasm_v128, 0); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 1)) & 0x80000000ull) != 0) + mem_addr[1] = wasm_f32x4_extract_lane(a_.wasm_v128, 1); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 2)) & 0x80000000ull) != 0) + mem_addr[2] = wasm_f32x4_extract_lane(a_.wasm_v128, 2); + if ((HEDLEY_STATIC_CAST(unsigned long long, wasm_i32x4_extract_lane(mask_.wasm_v128, 3)) & 0x80000000ull) != 0) + mem_addr[3] = wasm_f32x4_extract_lane(a_.wasm_v128, 3); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_ps + #define _mm_maskstore_ps(mem_addr, mask, a) simde_mm_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_ps (simde_float32 mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + _mm256_maskstore_ps(mem_addr, HEDLEY_REINTERPRET_CAST(simde__m256, mask), a); + #else + _mm256_maskstore_ps(mem_addr, mask, a); + #endif + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256_private a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.f32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_ps + #define _mm256_maskstore_ps(mem_addr, mask, a) simde_mm256_maskstore_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_min_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_min_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_min_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_ps + #define _mm256_min_ps(a, b) simde_mm256_min_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_min_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_min_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_min_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_min_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_pd + #define _mm256_min_pd(a, b) simde_mm256_min_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_max_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_max_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_max_ps(a_.m128[1], b_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_ps + #define _mm256_max_ps(a, b) simde_mm256_max_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_max_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_max_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_max_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_max_pd(a_.m128d[1], b_.m128d[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_pd + #define _mm256_max_pd(a, b) simde_mm256_max_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_movedup_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movedup_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, a_.f64, 0, 0, 2, 2); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i += 2) { + r_.f64[i] = r_.f64[i + 1] = a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movedup_pd + #define _mm256_movedup_pd(a) simde_mm256_movedup_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_movehdup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movehdup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 1, 1, 3, 3, 5, 5, 7, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i - 1] = r_.f32[i] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movehdup_ps + #define _mm256_movehdup_ps(a) simde_mm256_movehdup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_moveldup_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_moveldup_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, a_.f32, 0, 0, 2, 2, 4, 4, 6, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i += 2) { + r_.f32[i] = r_.f32[i + 1] = a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_moveldup_ps + #define _mm256_moveldup_ps(a) simde_mm256_moveldup_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_ps(a); + #else + simde__m256_private a_ = simde__m256_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { + r |= (a_.u32[i] >> 31) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_ps + #define _mm256_movemask_ps(a) simde_mm256_movemask_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_movemask_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_movemask_pd(a); + #else + simde__m256d_private a_ = simde__m256d_to_private(a); + int r = 0; + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.f64) / sizeof(a_.f64[0])) ; i++) { + r |= (a_.u64[i] >> 63) << i; + } + + return r; + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_pd + #define _mm256_movemask_pd(a) simde_mm256_movemask_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mul_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_mul_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_mul_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 * b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] * b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_ps + #define _mm256_mul_ps(a, b) simde_mm256_mul_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mul_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_mul_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_mul_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_mul_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 * b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] * b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_mul_pd + #define _mm256_mul_pd(a, b) simde_mm256_mul_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_or_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_or_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_or_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] | b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_ps + #define _mm256_or_ps(a, b) simde_mm256_or_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_or_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_or_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_or_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_or_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] | b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_pd + #define _mm256_or_pd(a, b) simde_mm256_or_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute_ps (simde__m256 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.m128_private[i >> 2].f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_ps(a, imm8) _mm256_permute_ps(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_ps + #define _mm256_permute_ps(a, imm8) simde_mm256_permute_ps(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute_pd(a, imm8) _mm256_permute_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute_pd + #define _mm256_permute_pd(a, imm8) simde_mm256_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute_ps (simde__m128 a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(imm8 >> ((i << 1) & 7)) & 3]; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_ps(a, imm8) _mm_permute_ps(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_ps(a, imm8) simde__m128_from_wasm_v128(wasm_i32x4_shuffle(simde__m128_to_wasm_v128(a), simde__m128_to_wasm_v128(a), ((imm8) & 3), (((imm8) >> 2) & 3 ), (((imm8) >> 4) & 3), (((imm8) >> 6) & 3))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_ps + #define _mm_permute_ps(a, imm8) simde_mm_permute_ps(a, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute_pd (simde__m128d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((imm8 >> i) & 1) + (i & 2)]; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm_permute_pd(a, imm8) _mm_permute_pd(a, imm8) +#elif defined(SIMDE_WASM_SIMD128_NATIVE) +# define simde_mm_permute_pd(a, imm8) simde__m128d_from_wasm_v128(wasm_i64x2_shuffle(simde__m128d_to_wasm_v128(a), simde__m128d_to_wasm_v128(a), ((imm8) & 1), (((imm8) >> 1) & 1 ))) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permute_pd + #define _mm_permute_pd(a, imm8) simde_mm_permute_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permutevar_ps (simde__m128 a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_ps(a, b); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f32x4_make( + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 0) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 1) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 2) & 3]), + (a_.f32[wasm_i32x4_extract_lane(b_.wasm_v128, 3) & 3])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[b_.i32[i] & 3]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_ps + #define _mm_permutevar_ps(a, b) simde_mm_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permutevar_pd (simde__m128d a, simde__m128i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_permutevar_pd(a, b); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_f64x2_make( + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 0) >> 1) & 1]), + (a_.f64[(wasm_i64x2_extract_lane(b_.wasm_v128, 1) >> 1) & 1])); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[(b_.i64[i] & 2) >> 1]; + } + #endif + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_permutevar_pd + #define _mm_permutevar_pd(a, b) simde_mm_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar_ps (simde__m256 a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[(b_.i32[i] & 3) + (i & 4)]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_ps + #define _mm256_permutevar_ps(a, b) simde_mm256_permutevar_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permutevar_pd (simde__m256d a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_permutevar_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + simde__m256i_private b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[((b_.i64[i] & 2) >> 1) + (i & 2)]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar_pd + #define _mm256_permutevar_pd(a, b) simde_mm256_permutevar_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2f128_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.m128_private[0] = (imm8 & 0x08) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x02) ? b_.m128_private[(imm8 ) & 1] : a_.m128_private[(imm8 ) & 1]); + r_.m128_private[1] = (imm8 & 0x80) ? simde__m128_to_private(simde_mm_setzero_ps()) : ((imm8 & 0x20) ? b_.m128_private[(imm8 >> 4) & 1] : a_.m128_private[(imm8 >> 4) & 1]); + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_ps(a, b, imm8) _mm256_permute2f128_ps(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_ps + #define _mm256_permute2f128_ps(a, b, imm8) simde_mm256_permute2f128_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2f128_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.m128d_private[0] = (imm8 & 0x08) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x02) ? b_.m128d_private[(imm8 ) & 1] : a_.m128d_private[(imm8 ) & 1]); + r_.m128d_private[1] = (imm8 & 0x80) ? simde__m128d_to_private(simde_mm_setzero_pd()) : ((imm8 & 0x20) ? b_.m128d_private[(imm8 >> 4) & 1] : a_.m128d_private[(imm8 >> 4) & 1]); + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_pd(a, b, imm8) _mm256_permute2f128_pd(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_pd + #define _mm256_permute2f128_pd(a, b, imm8) simde_mm256_permute2f128_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2f128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) +# define simde_mm256_permute2f128_si128(a, b, imm8) _mm256_permute2f128_si128(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2f128_si256 + #define _mm256_permute2f128_si256(a, b, imm8) simde_mm256_permute2f128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rcp_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rcp_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_rcp_ps(a_.m128[0]); + r_.m128[1] = simde_mm_rcp_ps(a_.m128[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = SIMDE_FLOAT32_C(1.0) / a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rcp_ps + #define _mm256_rcp_ps(a) simde_mm256_rcp_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_rsqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_rsqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_rsqrt_ps + #define _mm256_rsqrt_ps(a) simde_mm256_rsqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi8 ( + int8_t e31, int8_t e30, int8_t e29, int8_t e28, int8_t e27, int8_t e26, int8_t e25, int8_t e24, + int8_t e23, int8_t e22, int8_t e21, int8_t e20, int8_t e19, int8_t e18, int8_t e17, int8_t e16, + int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, + int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi8( + e31, e30, e29, e28, e27, e26, e25, e24, + e23, e22, e21, e20, e19, e18, e17, e16, + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi8( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15, + e16, e17, e18, e19, e20, e21, e22, e23, + e24, e25, e26, e27, e28, e29, e30, e31); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi8 + #define _mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi8(e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi16 ( + int16_t e15, int16_t e14, int16_t e13, int16_t e12, int16_t e11, int16_t e10, int16_t e9, int16_t e8, + int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi16( + e15, e14, e13, e12, e11, e10, e9, e8, + e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi16( + e0, e1, e2, e3, e4, e5, e6, e7, + e8, e9, e10, e11, e12, e13, e14, e15); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi16 + #define _mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi16(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi32 ( + int32_t e7, int32_t e6, int32_t e5, int32_t e4, int32_t e3, int32_t e2, int32_t e1, int32_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_epi32(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi32 + #define _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_epi64x (int64_t e3, int64_t e2, int64_t e1, int64_t e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_epi64x(e3, e2, e1, e0); + #else + return simde_mm256_set_epi64x(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_epi64x + #define _mm256_setr_epi64x(e3, e2, e1, e0) \ + simde_mm256_setr_epi64x(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_ps ( + simde_float32 e7, simde_float32 e6, simde_float32 e5, simde_float32 e4, + simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0); + #else + return simde_mm256_set_ps(e0, e1, e2, e3, e4, e5, e6, e7); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_ps + #define _mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) \ + simde_mm256_setr_ps(e7, e6, e5, e4, e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_pd (simde_float64 e3, simde_float64 e2, simde_float64 e1, simde_float64 e0) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_setr_pd(e3, e2, e1, e0); + #else + return simde_mm256_set_pd(e0, e1, e2, e3); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_pd + #define _mm256_setr_pd(e3, e2, e1, e0) \ + simde_mm256_setr_pd(e3, e2, e1, e0) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_setr_m128 (simde__m128 lo, simde__m128 hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128(lo, hi); + #else + return simde_mm256_set_m128(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128 + #define _mm256_setr_m128(lo, hi) \ + simde_mm256_setr_m128(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_setr_m128d (simde__m128d lo, simde__m128d hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128d(lo, hi); + #else + return simde_mm256_set_m128d(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128d + #define _mm256_setr_m128d(lo, hi) \ + simde_mm256_setr_m128d(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_setr_m128i (simde__m128i lo, simde__m128i hi) { + #if defined(SIMDE_X86_AVX_NATIVE) && \ + !defined(SIMDE_BUG_GCC_REV_247851) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,6,0) + return _mm256_setr_m128i(lo, hi); + #else + return simde_mm256_set_m128i(hi, lo); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_setr_m128i + #define _mm256_setr_m128i(lo, hi) \ + simde_mm256_setr_m128i(lo, hi) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_shuffle_ps (simde__m256 a, simde__m256 b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + r_.f32[0] = a_.m128_private[0].f32[(imm8 >> 0) & 3]; + r_.f32[1] = a_.m128_private[0].f32[(imm8 >> 2) & 3]; + r_.f32[2] = b_.m128_private[0].f32[(imm8 >> 4) & 3]; + r_.f32[3] = b_.m128_private[0].f32[(imm8 >> 6) & 3]; + r_.f32[4] = a_.m128_private[1].f32[(imm8 >> 0) & 3]; + r_.f32[5] = a_.m128_private[1].f32[(imm8 >> 2) & 3]; + r_.f32[6] = b_.m128_private[1].f32[(imm8 >> 4) & 3]; + r_.f32[7] = b_.m128_private[1].f32[(imm8 >> 6) & 3]; + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_ps(a, b, imm8) _mm256_shuffle_ps(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + simde_mm256_set_m128( \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 1), simde_mm256_extractf128_ps(b, 1), (imm8)), \ + simde_mm_shuffle_ps(simde_mm256_extractf128_ps(a, 0), simde_mm256_extractf128_ps(b, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_ps(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(32, 32, a, b, \ + (((imm8) >> 0) & 3) + 0, \ + (((imm8) >> 2) & 3) + 0, \ + (((imm8) >> 4) & 3) + 8, \ + (((imm8) >> 6) & 3) + 8, \ + (((imm8) >> 0) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 12, \ + (((imm8) >> 6) & 3) + 12) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_ps + #define _mm256_shuffle_ps(a, b, imm8) simde_mm256_shuffle_ps(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_shuffle_pd (simde__m256d a, simde__m256d b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + r_.f64[0] = a_.f64[((imm8 ) & 1) ]; + r_.f64[1] = b_.f64[((imm8 >> 1) & 1) ]; + r_.f64[2] = a_.f64[((imm8 >> 2) & 1) | 2]; + r_.f64[3] = b_.f64[((imm8 >> 3) & 1) | 2]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_NATIVE) + #define simde_mm256_shuffle_pd(a, b, imm8) _mm256_shuffle_pd(a, b, imm8) +#elif SIMDE_NATURAL_VECTOR_SIZE_LE(128) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + simde_mm256_set_m128d( \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 1), simde_mm256_extractf128_pd(b, 1), (imm8 >> 0) & 3), \ + simde_mm_shuffle_pd(simde_mm256_extractf128_pd(a, 0), simde_mm256_extractf128_pd(b, 0), (imm8 >> 2) & 3)) +#elif defined(SIMDE_SHUFFLE_VECTOR_) + #define simde_mm256_shuffle_pd(a, b, imm8) \ + SIMDE_SHUFFLE_VECTOR_(64, 32, a, b, \ + (((imm8) >> 0) & 1) + 0, \ + (((imm8) >> 1) & 1) + 4, \ + (((imm8) >> 2) & 1) + 2, \ + (((imm8) >> 3) & 1) + 6) +#endif +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_pd + #define _mm256_shuffle_pd(a, b, imm8) simde_mm256_shuffle_pd(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sqrt_ps (simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sqrt_ps(a_.m128[0]); + r_.m128[1] = simde_mm_sqrt_ps(a_.m128[1]); + #elif defined(simde_math_sqrtf) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = simde_math_sqrtf(a_.f32[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_ps + #define _mm256_sqrt_ps(a) simde_mm256_sqrt_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sqrt_pd (simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sqrt_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sqrt_pd(a_.m128d[0]); + r_.m128d[1] = simde_mm_sqrt_pd(a_.m128d[1]); + #elif defined(simde_math_sqrt) + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = simde_math_sqrt(a_.f64[i]); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sqrt_pd + #define _mm256_sqrt_pd(a) simde_mm256_sqrt_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_ps + #define _mm256_store_ps(mem_addr, a) simde_mm256_store_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_pd + #define _mm256_store_pd(mem_addr, a) simde_mm256_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_store_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_store_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_store_si256 + #define _mm256_store_si256(mem_addr, a) simde_mm256_store_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_ps(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_ps + #define _mm256_storeu_ps(mem_addr, a) simde_mm256_storeu_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_pd(mem_addr, a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_pd + #define _mm256_storeu_pd(mem_addr, a) simde_mm256_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu_si256 (void* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_storeu_si256(SIMDE_ALIGN_CAST(__m256i*, mem_addr), a); + #else + simde_memcpy(mem_addr, &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu_si256 + #define _mm256_storeu_si256(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128 (simde_float32 hi_addr[4], simde_float32 lo_addr[4], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128(hi_addr, lo_addr, a); + #else + simde_mm_storeu_ps(lo_addr, simde_mm256_castps256_ps128(a)); + simde_mm_storeu_ps(hi_addr, simde_mm256_extractf128_ps(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128 + #define _mm256_storeu2_m128(hi_addr, lo_addr, a) simde_mm256_storeu2_m128(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128d (simde_float64 hi_addr[2], simde_float64 lo_addr[2], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128d(hi_addr, lo_addr, a); + #else + simde_mm_storeu_pd(lo_addr, simde_mm256_castpd256_pd128(a)); + simde_mm_storeu_pd(hi_addr, simde_mm256_extractf128_pd(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128d + #define _mm256_storeu2_m128d(hi_addr, lo_addr, a) simde_mm256_storeu2_m128d(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_storeu2_m128i (simde__m128i* hi_addr, simde__m128i* lo_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_91341) && !defined(SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS) + _mm256_storeu2_m128i(hi_addr, lo_addr, a); + #else + simde_mm_storeu_si128(lo_addr, simde_mm256_castsi256_si128(a)); + simde_mm_storeu_si128(hi_addr, simde_mm256_extractf128_si256(a, 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(hi_addr, lo_addr, a) simde_mm256_storeu2_m128i(hi_addr, lo_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_ps (simde_float32 mem_addr[8], simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_ps(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_ps + #define _mm256_stream_ps(mem_addr, a) simde_mm256_stream_ps(HEDLEY_REINTERPRET_CAST(float*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_pd (simde_float64 mem_addr[4], simde__m256d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_pd(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256d), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_pd + #define _mm256_stream_pd(mem_addr, a) simde_mm256_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_stream_si256 (simde__m256i* mem_addr, simde__m256i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + _mm256_stream_si256(mem_addr, a); + #else + simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), &a, sizeof(a)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_stream_si256 + #define _mm256_stream_si256(mem_addr, a) simde_mm256_stream_si256(mem_addr, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_sub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_sub_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_sub_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f32 = a_.f32 - b_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[i] - b_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_ps + #define _mm256_sub_ps(a, b) simde_mm256_sub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_hsub_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_ps(a, b); + #else + return simde_mm256_sub_ps(simde_x_mm256_deinterleaveeven_ps(a, b), simde_x_mm256_deinterleaveodd_ps(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_ps + #define _mm256_hsub_ps(a, b) simde_mm256_hsub_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_sub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_sub_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_sub_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_sub_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.f64 = a_.f64 - b_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[i] - b_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_pd + #define _mm256_sub_pd(a, b) simde_mm256_sub_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_hsub_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_hsub_pd(a, b); + #else + return simde_mm256_sub_pd(simde_x_mm256_deinterleaveeven_pd(a, b), simde_x_mm256_deinterleaveodd_pd(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_pd + #define _mm256_hsub_pd(a, b) simde_mm256_hsub_pd(a, b) +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_PUSH + SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_undefined_ps (void) { + simde__m256_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_ps(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256_to_private(simde_mm256_setzero_ps()); +#endif + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_ps + #define _mm256_undefined_ps() simde_mm256_undefined_ps() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_undefined_pd (void) { + simde__m256d_private r_; + +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_pd(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); +#endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_pd + #define _mm256_undefined_pd() simde_mm256_undefined_pd() +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_undefined_si256 (void) { + simde__m256i_private r_; +#if \ + defined(SIMDE_X86_AVX_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(5,0,0)) && \ + (!defined(__has_builtin) || HEDLEY_HAS_BUILTIN(__builtin_ia32_undef256)) + r_.n = _mm256_undefined_si256(); +#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_undefined_si256 + #define _mm256_undefined_si256() simde_mm256_undefined_si256() +#endif + +#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) + HEDLEY_DIAGNOSTIC_POP +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_xor_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128[0] = simde_mm_xor_ps(a_.m128[0], b_.m128[0]); + r_.m128[1] = simde_mm_xor_ps(a_.m128[1], b_.m128[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] ^ b_.u32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_ps + #define _mm256_xor_ps(a, b) simde_mm256_xor_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_xor_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_xor_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r_.m128d[0] = simde_mm_xor_pd(a_.m128d[0], b_.m128d[0]); + r_.m128d[1] = simde_mm_xor_pd(a_.m128d[1], b_.m128d[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] ^ b_.u64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_pd + #define _mm256_xor_pd(a, b) simde_mm256_xor_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_xorsign_ps(simde__m256 dest, simde__m256 src) { + return simde_mm256_xor_ps(simde_mm256_and_ps(simde_mm256_set1_ps(-0.0f), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_xorsign_pd(simde__m256d dest, simde__m256d src) { + return simde_mm256_xor_pd(simde_mm256_and_pd(simde_mm256_set1_pd(-0.0), src), dest); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_x_mm256_negate_ps(simde__m256 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return simde_mm256_xor_ps(a,_mm256_set1_ps(SIMDE_FLOAT32_C(-0.0))); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f32 = -a_.f32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = -a_.f32[i]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_x_mm256_negate_pd(simde__m256d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return simde_mm256_xor_pd(a, _mm256_set1_pd(SIMDE_FLOAT64_C(-0.0))); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if defined(SIMDE_VECTOR_NEGATE) + r_.f64 = -a_.f64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = -a_.f64[i]; + } + #endif + + return simde__m256d_from_private(r_); + #endif +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpackhi_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 2, 10, 3, 11, 6, 14, 7, 15); + #else + r_.f32[0] = a_.f32[2]; + r_.f32[1] = b_.f32[2]; + r_.f32[2] = a_.f32[3]; + r_.f32[3] = b_.f32[3]; + r_.f32[4] = a_.f32[6]; + r_.f32[5] = b_.f32[6]; + r_.f32[6] = a_.f32[7]; + r_.f32[7] = b_.f32[7]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_ps + #define _mm256_unpackhi_ps(a, b) simde_mm256_unpackhi_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpackhi_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpackhi_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 1, 5, 3, 7); + #else + r_.f64[0] = a_.f64[1]; + r_.f64[1] = b_.f64[1]; + r_.f64[2] = a_.f64[3]; + r_.f64[3] = b_.f64[3]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_pd + #define _mm256_unpackhi_pd(a, b) simde_mm256_unpackhi_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_unpacklo_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_ps(a, b); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.f32, b_.f32, 0, 8, 1, 9, 4, 12, 5, 13); + #else + r_.f32[0] = a_.f32[0]; + r_.f32[1] = b_.f32[0]; + r_.f32[2] = a_.f32[1]; + r_.f32[3] = b_.f32[1]; + r_.f32[4] = a_.f32[4]; + r_.f32[5] = b_.f32[4]; + r_.f32[6] = a_.f32[5]; + r_.f32[7] = b_.f32[5]; + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_ps + #define _mm256_unpacklo_ps(a, b) simde_mm256_unpacklo_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_unpacklo_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_unpacklo_pd(a, b); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.f64, b_.f64, 0, 4, 2, 6); + #else + r_.f64[0] = a_.f64[0]; + r_.f64[1] = b_.f64[0]; + r_.f64[2] = a_.f64[2]; + r_.f64[3] = b_.f64[2]; + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_pd + #define _mm256_unpacklo_pd(a, b) simde_mm256_unpacklo_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_zextps128_ps256 (simde__m128 a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_ps(_mm256_setzero_ps(), a, 0); + #else + simde__m256_private r_; + + r_.m128_private[0] = simde__m128_to_private(a); + r_.m128_private[1] = simde__m128_to_private(simde_mm_setzero_ps()); + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextps128_ps256 + #define _mm256_zextps128_ps256(a) simde_mm256_zextps128_ps256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_zextpd128_pd256 (simde__m128d a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_pd(_mm256_setzero_pd(), a, 0); + #else + simde__m256d_private r_; + + r_.m128d_private[0] = simde__m128d_to_private(a); + r_.m128d_private[1] = simde__m128d_to_private(simde_mm_setzero_pd()); + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextpd128_pd256 + #define _mm256_zextpd128_pd256(a) simde_mm256_zextpd128_pd256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_zextsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_insertf128_si256(_mm256_setzero_si256(), a, 0); + #else + simde__m256i_private r_; + + r_.m128i_private[0] = simde__m128i_to_private(a); + r_.m128i_private[1] = simde__m128i_to_private(simde_mm_setzero_si128()); + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_zextsi128_si256 + #define _mm256_zextsi128_si256(a) simde_mm256_zextsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_ps + #define _mm_testc_ps(a, b) simde_mm_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_or(wasm_v128_not(b_.wasm_v128), a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testc_pd + #define _mm_testc_pd(a, b) simde_mm_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= ~a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_ps + #define _mm256_testc_ps(a, b) simde_mm256_testc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= ~a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_pd + #define _mm256_testc_pd(a, b) simde_mm256_testc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testc_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= ~a_.i32f[i] & b_.i32f[i]; + } + + return HEDLEY_STATIC_CAST(int, !r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testc_si256 + #define _mm256_testc_si256(a, b) simde_mm256_testc_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 31); + m = wasm_v128_and(m, simde_mm_movehl_ps(m, m)); + m = wasm_v128_and(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0); + #else + uint_fast32_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_ps + #define _mm_testz_ps(a, b) simde_mm_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testz_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testz_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_not(wasm_v128_and(a_.wasm_v128, b_.wasm_v128)), 63); + return HEDLEY_STATIC_CAST(int, wasm_i64x2_extract_lane(m, 0) & wasm_i64x2_extract_lane(m, 1)); + #else + uint_fast64_t r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testz_pd + #define _mm_testz_pd(a, b) simde_mm_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_ps(a, b); + #else + uint_fast32_t r = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + r |= a_.u32[i] & b_.u32[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 31) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_ps + #define _mm256_testz_ps(a, b) simde_mm256_testz_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_pd(a, b); + #else + uint_fast64_t r = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + r |= a_.u64[i] & b_.u64[i]; + } + + return HEDLEY_STATIC_CAST(int, ((~r >> 63) & 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_pd + #define _mm256_testz_pd(a, b) simde_mm256_testz_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testz_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testz_si256(a, b); + #else + int_fast32_t r = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + r = simde_mm_testz_si128(a_.m128i[0], b_.m128i[0]) && simde_mm_testz_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r |= a_.i32f[i] & b_.i32f[i]; + } + + r = !r; + #endif + + return HEDLEY_STATIC_CAST(int, r); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testz_si256 + #define _mm256_testz_si256(a, b) simde_mm256_testz_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_ps (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_ps(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u32x4_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 31); + v128_t m2 = wasm_u32x4_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 31); + m = wasm_v128_or(m, simde_mm_movehl_ps(m, m)); + m2 = wasm_v128_or(m2, simde_mm_movehl_ps(m2, m2)); + m = wasm_v128_or(m, simde_mm_shuffle_epi32(m, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + m2 = wasm_v128_or(m2, simde_mm_shuffle_epi32(m2, SIMDE_MM_SHUFFLE(3, 2, 0, 1))); + return wasm_i32x4_extract_lane(m, 0) & wasm_i32x4_extract_lane(m2, 0); + #else + uint32_t rz = 0, rc = 0; + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_ps + #define _mm_testnzc_ps(a, b) simde_mm_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm_testnzc_pd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm_testnzc_pd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t m = wasm_u64x2_shr(wasm_v128_and(a_.wasm_v128, b_.wasm_v128), 63); + v128_t m2 = wasm_u64x2_shr(wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128), 63); + return HEDLEY_STATIC_CAST(int, (wasm_i64x2_extract_lane(m, 0) | wasm_i64x2_extract_lane(m, 1)) + & (wasm_i64x2_extract_lane(m2, 0) | wasm_i64x2_extract_lane(m2, 1))); + #else + uint64_t rc = 0, rz = 0; + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm_testnzc_pd + #define _mm_testnzc_pd(a, b) simde_mm_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_ps (simde__m256 a, simde__m256 b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_ps(a, b); + #else + uint32_t rc = 0, rz = 0; + simde__m256_private + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u32) / sizeof(a_.u32[0])) ; i++) { + rc |= ~a_.u32[i] & b_.u32[i]; + rz |= a_.u32[i] & b_.u32[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_ps + #define _mm256_testnzc_ps(a, b) simde_mm256_testnzc_ps(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_pd (simde__m256d a, simde__m256d b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_pd(a, b); + #else + uint64_t rc = 0, rz = 0; + simde__m256d_private + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { + rc |= ~a_.u64[i] & b_.u64[i]; + rz |= a_.u64[i] & b_.u64[i]; + } + + return + (rc >> ((sizeof(rc) * CHAR_BIT) - 1)) & + (rz >> ((sizeof(rz) * CHAR_BIT) - 1)); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_pd + #define _mm256_testnzc_pd(a, b) simde_mm256_testnzc_pd(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_testnzc_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX_NATIVE) + return _mm256_testnzc_si256(a, b); + #else + int32_t rc = 0, rz = 0; + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + rc |= ~a_.i32f[i] & b_.i32f[i]; + rz |= a_.i32f[i] & b_.i32f[i]; + } + + return !!(rc & rz); + #endif +} +#if defined(SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES) + #undef _mm256_testnzc_si256 + #define _mm256_testnzc_si256(a, b) simde_mm256_testnzc_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX_H) */ +/* :: End simde/x86/avx.h :: */ + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi8(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi8(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi8(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi8 + #define _mm256_abs_epi8(a) simde_mm256_abs_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi16 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi16(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi16(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi16(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi16 + #define _mm256_abs_epi16(a) simde_mm256_abs_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_abs_epi32(simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_abs_epi32(a); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_abs_epi32(a_.m128i[0]); + r_.m128i[1] = simde_mm_abs_epi32(a_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (a_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_abs_epi32 + #define _mm256_abs_epi32(a) simde_mm256_abs_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 + b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] + b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi8 + #define _mm256_add_epi8(a, b) simde_mm256_add_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 + b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] + b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi16 + #define _mm256_add_epi16(a, b) simde_mm256_add_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi16(a, b); + #else + return simde_mm256_add_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi16 + #define _mm256_hadd_epi16(a, b) simde_mm256_hadd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 + b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] + b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi32 + #define _mm256_add_epi32(a, b) simde_mm256_add_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadd_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadd_epi32(a, b); + #else + return simde_mm256_add_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadd_epi32 + #define _mm256_hadd_epi32(a, b) simde_mm256_hadd_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_add_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_add_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_add_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_add_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_CLANG_BAD_VI64_OPS) + r_.i64 = a_.i64 + b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] + b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_add_epi64 + #define _mm256_add_epi64(a, b) simde_mm256_add_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_alignr_epi8 (simde__m256i a, simde__m256i b, int count) + SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + if (HEDLEY_UNLIKELY(count > 31)) + return simde_mm256_setzero_si256(); + + for (size_t h = 0 ; h < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int srcpos = count + HEDLEY_STATIC_CAST(int, i); + if (srcpos > 31) { + r_.m128i_private[h].i8[i] = 0; + } else if (srcpos > 15) { + r_.m128i_private[h].i8[i] = a_.m128i_private[h].i8[(srcpos) & 15]; + } else { + r_.m128i_private[h].i8[i] = b_.m128i_private[h].i8[srcpos]; + } + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_BUG_PGI_30106) +# define simde_mm256_alignr_epi8(a, b, count) _mm256_alignr_epi8(a, b, count) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_alignr_epi8(a, b, count) \ + simde_mm256_set_m128i( \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (count)), \ + simde_mm_alignr_epi8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (count))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_alignr_epi8 + #define _mm256_alignr_epi8(a, b, count) simde_mm256_alignr_epi8(a, b, (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_and_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_and_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_and_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_and_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f & b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] & b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_and_si256 + #define _mm256_and_si256(a, b) simde_mm256_and_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_andnot_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_andnot_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_andnot_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_andnot_si128(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_andnot_si256 + #define _mm256_andnot_si256(a, b) simde_mm256_andnot_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi8 + #define _mm256_adds_epi8(a, b) simde_mm256_adds_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epi16 + #define _mm256_adds_epi16(a, b) simde_mm256_adds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hadds_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hadds_epi16(a, b); + #else + return simde_mm256_adds_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hadds_epi16 + #define _mm256_hadds_epi16(a, b) simde_mm256_hadds_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu8 + #define _mm256_adds_epu8(a, b) simde_mm256_adds_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_adds_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_adds_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_adds_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_adds_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_adds_epu16 + #define _mm256_adds_epu16(a, b) simde_mm256_adds_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu8 + #define _mm256_avg_epu8(a, b) simde_mm256_avg_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_avg_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_avg_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_avg_epu16 + #define _mm256_avg_epu16(a, b) simde_mm256_avg_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_blend_epi32(simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 15) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm_blend_epi32(a, b, imm8) _mm_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) +# define simde_mm_blend_epi32(a, b, imm8) \ + simde_mm_castps_si128(simde_mm_blend_ps(simde_mm_castsi128_ps(a), simde_mm_castsi128_ps(b), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_blend_epi32 + #define _mm_blend_epi32(a, b, imm8) simde_mm_blend_epi32(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi16(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = ((imm8 >> i%8) & 1) ? b_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && defined(SIMDE_BUG_CLANG_REV_234560) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_castpd_si256(_mm256_blend_epi16(a, b, imm8)) +#elif defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi16(a, b, imm8) _mm256_blend_epi16(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi16(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8)), \ + simde_mm_blend_epi16(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi16 + #define _mm256_blend_epi16(a, b, imm8) simde_mm256_blend_epi16(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blend_epi32(simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = ((imm8 >> i) & 1) ? b_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blend_epi32(a, b, imm8) _mm256_blend_epi32(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_blend_epi32(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8) >> 4), \ + simde_mm_blend_epi32(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8) & 0x0F)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blend_epi32 + #define _mm256_blend_epi32(a, b, imm8) simde_mm256_blend_epi32(a, b, imm8) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_blendv_epi8(simde__m256i a, simde__m256i b, simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_blendv_epi8(a, b, mask); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + mask_ = simde__m256i_to_private(mask); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_blendv_epi8(a_.m128i[0], b_.m128i[0], mask_.m128i[0]); + r_.m128i[1] = simde_mm_blendv_epi8(a_.m128i[1], b_.m128i[1], mask_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + __typeof__(mask_.i8) tmp = mask_.i8 >> 7; + r_.i8 = (tmp & b_.i8) | (~tmp & a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + int8_t tmp = mask_.i8[i] >> 7; + r_.i8[i] = (tmp & b_.i8[i]) | (~tmp & a_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_blendv_epi8(a, b, imm8) _mm256_blendv_epi8(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_blendv_epi8 + #define _mm256_blendv_epi8(a, b, mask) simde_mm256_blendv_epi8(a, b, mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastb_epi8(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastb_epi8 + #define _mm_broadcastb_epi8(a) simde_mm_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastb_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastb_epi8(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastb_epi8 + #define _mm256_broadcastb_epi8(a) simde_mm256_broadcastb_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastw_epi16(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastw_epi16 + #define _mm_broadcastw_epi16(a) simde_mm_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastw_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastw_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastw_epi16 + #define _mm256_broadcastw_epi16(a) simde_mm256_broadcastw_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastd_epi32(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastd_epi32 + #define _mm_broadcastd_epi32(a) simde_mm_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastd_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastd_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastd_epi32 + #define _mm256_broadcastd_epi32(a) simde_mm256_broadcastd_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastq_epi64(a); + #else + simde__m128i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastq_epi64 + #define _mm_broadcastq_epi64(a) simde_mm_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastq_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastq_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_= simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[0]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastq_epi64 + #define _mm256_broadcastq_epi64(a) simde_mm256_broadcastq_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_broadcastss_ps(a); + #elif defined(SIMDE_X86_SSE_NATIVE) + return simde_mm_shuffle_ps(a, a, 0); + #else + simde__m128_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_SHUFFLE_VECTOR_) + r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastss_ps + #define _mm_broadcastss_ps(a) simde_mm_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_broadcastss_ps (simde__m128 a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastss_ps(a); + #else + simde__m256_private r_; + simde__m128_private a_= simde__m128_to_private(a); + + #if defined(SIMDE_X86_AVX_NATIVE) + __m128 tmp = _mm_permute_ps(a_.n, 0); + r_.n = _mm256_insertf128_ps(_mm256_castps128_ps256(tmp), tmp, 1); + #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + r_.f32 = __builtin_shufflevector(a_.f32, a_.f32, 0, 0, 0, 0, 0, 0, 0, 0); + #elif SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(128) + r_.m128[0] = r_.m128[1] = simde_mm_broadcastss_ps(simde__m128_from_private(a_)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[0]; + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastss_ps + #define _mm256_broadcastss_ps(a) simde_mm256_broadcastss_ps(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_broadcastsd_pd (simde__m128d a) { + return simde_mm_movedup_pd(a); +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_broadcastsd_pd + #define _mm_broadcastsd_pd(a) simde_mm_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_broadcastsd_pd (simde__m128d a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_broadcastsd_pd(a); + #else + simde__m256d_private r_; + simde__m128d_private a_= simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + r_.f64[i] = a_.f64[0]; + } + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsd_pd + #define _mm256_broadcastsd_pd(a) simde_mm256_broadcastsd_pd(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_broadcastsi128_si256 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) + return _mm256_broadcastsi128_si256(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i_private[0] = a_; + r_.m128i_private[1] = a_; + #else + r_.i64[0] = a_.i64[0]; + r_.i64[1] = a_.i64[1]; + r_.i64[2] = a_.i64[0]; + r_.i64[3] = a_.i64[1]; + #endif + + return simde__m256i_from_private(r_); + #endif +} +#define simde_mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_broadcastsi128_si256 + #define _mm256_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) + #undef _mm_broadcastsi128_si256 + #define _mm_broadcastsi128_si256(a) simde_mm256_broadcastsi128_si256(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bslli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i - imm8; + if(i >= (ssize/2)) { + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bslli_epi128(a, imm8) _mm256_bslli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bslli_epi128 + #define _mm256_bslli_epi128(a, imm8) simde_mm256_bslli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_bsrli_epi128 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + const int ssize = HEDLEY_STATIC_CAST(int, (sizeof(r_.i8) / sizeof(r_.i8[0]))); + + SIMDE_VECTORIZE + for (int i = 0 ; i < ssize ; i++) { + const int e = i + imm8; + if(i < (ssize/2)) { + if(e >= 0 && e < (ssize/2)) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + else{ + if(e >= (ssize/2) && e < ssize) + r_.i8[i] = a_.i8[e]; + else + r_.i8[i] = 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,8,0)) && \ + SIMDE_DETECT_CLANG_VERSION_CHECK(3,7,0) + #define simde_mm256_bsrli_epi128(a, imm8) _mm256_bsrli_epi128(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_bsrli_epi128 + #define _mm256_bsrli_epi128(a, imm8) simde_mm256_bsrli_epi128(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi8 + #define _mm256_cmpeq_epi8(a, b) simde_mm256_cmpeq_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi16 + #define _mm256_cmpeq_epi16(a, b) simde_mm256_cmpeq_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi32 + #define _mm256_cmpeq_epi32(a, b) simde_mm256_cmpeq_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpeq_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpeq_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpeq_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpeq_epi64(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpeq_epi64 + #define _mm256_cmpeq_epi64(a, b) simde_mm256_cmpeq_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi8 + #define _mm256_cmpgt_epi8(a, b) simde_mm256_cmpgt_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 > b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi16 + #define _mm256_cmpgt_epi16(a, b) simde_mm256_cmpgt_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi32 + #define _mm256_cmpgt_epi32(a, b) simde_mm256_cmpgt_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmpgt_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cmpgt_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_cmpgt_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_cmpgt_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cmpgt_epi64 + #define _mm256_cmpgt_epi64(a, b) simde_mm256_cmpgt_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi16 + #define _mm256_cvtepi8_epi16(a) simde_mm256_cvtepi8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi32 + #define _mm256_cvtepi8_epi32(a) simde_mm256_cvtepi8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi8_epi64 + #define _mm256_cvtepi8_epi64(a) simde_mm256_cvtepi8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi32 + #define _mm256_cvtepi16_epi32(a) simde_mm256_cvtepi16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi16_epi64 + #define _mm256_cvtepi16_epi64(a) simde_mm256_cvtepi16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepi32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepi32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepi32_epi64 + #define _mm256_cvtepi32_epi64(a) simde_mm256_cvtepi32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi16(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i16, a_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi16 + #define _mm256_cvtepu8_epi16(a) simde_mm256_cvtepu8_epi16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi32 + #define _mm256_cvtepu8_epi32(a) simde_mm256_cvtepu8_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu8_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu8_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu8_epi64 + #define _mm256_cvtepu8_epi64(a) simde_mm256_cvtepu8_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi32(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i32, a_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi32 + #define _mm256_cvtepu16_epi32(a) simde_mm256_cvtepu16_epi32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu16_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu16_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.m64_private[0].u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu16_epi64 + #define _mm256_cvtepu16_epi64(a) simde_mm256_cvtepu16_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cvtepu32_epi64 (simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_cvtepu32_epi64(a); + #else + simde__m256i_private r_; + simde__m128i_private a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_CONVERT_VECTOR_) + SIMDE_CONVERT_VECTOR_(r_.i64, a_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_cvtepu32_epi64 + #define _mm256_cvtepu32_epi64(a) simde_mm256_cvtepu32_epi64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi8 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 31){ + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i8[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi8(a, index) _mm256_extract_epi8(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi8 + #define _mm256_extract_epi8(a, index) simde_mm256_extract_epi8(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_mm256_extract_epi16 (simde__m256i a, const int index) + SIMDE_REQUIRE_RANGE(index, 0, 15) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.i16[index]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) && \ + (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,10,0)) + #define simde_mm256_extract_epi16(a, index) _mm256_extract_epi16(a, index) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extract_epi16 + #define _mm256_extract_epi16(a, index) simde_mm256_extract_epi16(a, index) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_extracti128_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + return a_.m128i[imm8]; +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_extracti128_si256(a, imm8) _mm256_extracti128_si256(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_extracti128_si256 + #define _mm256_extracti128_si256(a, imm8) simde_mm256_extracti128_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_epi32(base_addr, vindex, scale) _mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi32 + #define _mm_i32gather_epi32(base_addr, vindex, scale) simde_mm_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi32 + #define _mm_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_epi32(base_addr, vindex, scale) _mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi32 + #define _mm256_i32gather_epi32(base_addr, vindex, scale) simde_mm256_i32gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi32(simde__m256i src, const int32_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi32 + #define _mm256_mask_i32gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi32(const int32_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_epi32(base_addr, vindex, scale) _mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi32 + #define _mm_i64gather_epi32(base_addr, vindex, scale) simde_mm_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi32 + #define _mm_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_i64gather_epi32(const int32_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i32[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_epi32(base_addr, vindex, scale) _mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi32 + #define _mm256_i64gather_epi32(base_addr, vindex, scale) simde_mm256_i64gather_epi32(SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm256_mask_i64gather_epi32(simde__m128i src, const int32_t* base_addr, simde__m256i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128i_private + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int32_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i32[i] = dst; + } + else { + r_.i32[i] = src_.i32[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int const*, int32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi32 + #define _mm256_mask_i64gather_epi32(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi32(src, SIMDE_CHECKED_REINTERPRET_CAST(int32_t const*, int const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i32gather_epi64(base_addr, vindex, scale) _mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_epi64 + #define _mm_i32gather_epi64(base_addr, vindex, scale) simde_mm_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i32gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_epi64 + #define _mm_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i32gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256i_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i32gather_epi64(base_addr, vindex, scale) _mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_epi64 + #define _mm256_i32gather_epi64(base_addr, vindex, scale) simde_mm256_i32gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i32gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m128i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_epi64 + #define _mm256_mask_i32gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_i64gather_epi64(const int64_t* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm_i64gather_epi64(base_addr, vindex, scale) _mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_epi64 + #define _mm_i64gather_epi64(base_addr, vindex, scale) simde_mm_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_mask_i64gather_epi64(simde__m128i src, const int64_t* base_addr, simde__m128i vindex, simde__m128i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex), + src_ = simde__m128i_to_private(src), + mask_ = simde__m128i_to_private(mask), + r_ = simde__m128i_to_private(simde_mm_setzero_si128()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_epi64 + #define _mm_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_i64gather_epi64(const int64_t* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.i64[i] = dst; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) + #else + #define simde_mm256_i64gather_epi64(base_addr, vindex, scale) _mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_epi64 + #define _mm256_i64gather_epi64(base_addr, vindex, scale) simde_mm256_i64gather_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mask_i64gather_epi64(simde__m256i src, const int64_t* base_addr, simde__m256i vindex, simde__m256i mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex), + src_ = simde__m256i_to_private(src), + mask_ = simde__m256i_to_private(mask), + r_ = simde__m256i_to_private(simde_mm256_setzero_si256()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + int64_t dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.i64[i] = dst; + } + else { + r_.i64[i] = src_.i64[i]; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) + #else + #define simde_mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(long long const*, base_addr), vindex, mask, scale) + #endif +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_epi64 + #define _mm256_mask_i64gather_epi64(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_epi64(src, HEDLEY_REINTERPRET_CAST(int64_t const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i32gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_ps(base_addr, vindex, scale) _mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_ps + #define _mm_i32gather_ps(base_addr, vindex, scale) simde_mm_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i32gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_ps + #define _mm_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_i32gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_ps(base_addr, vindex, scale) _mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_ps + #define _mm256_i32gather_ps(base_addr, vindex, scale) simde_mm256_i32gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_mask_i32gather_ps(simde__m256 src, const simde_float32* base_addr, simde__m256i vindex, simde__m256 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256_private + src_ = simde__m256_to_private(src), + mask_ = simde__m256_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m256_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_ps + #define _mm256_mask_i32gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_i64gather_ps(const simde_float32* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_ps(base_addr, vindex, scale) _mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_ps + #define _mm_i64gather_ps(base_addr, vindex, scale) simde_mm_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m128i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, float32_t const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_ps + #define _mm_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_i64gather_ps(const simde_float32* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f32[i] = dst; + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_ps(base_addr, vindex, scale) _mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_ps + #define _mm256_i64gather_ps(base_addr, vindex, scale) simde_mm256_i64gather_ps(SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm256_mask_i64gather_ps(simde__m128 src, const simde_float32* base_addr, simde__m256i vindex, simde__m128 mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m128_private + src_ = simde__m128_to_private(src), + mask_ = simde__m128_to_private(mask), + r_ = simde__m128_to_private(simde_mm_setzero_ps()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i32[i] >> 31) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float32 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f32[i] = dst; + } + else { + r_.f32[i] = src_.f32[i]; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(float const*, simde_float32 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_ps + #define _mm256_mask_i64gather_ps(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_ps(src, SIMDE_CHECKED_REINTERPRET_CAST(simde_float32 const*, float const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i32gather_pd(base_addr, vindex, scale) _mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i32gather_pd + #define _mm_i32gather_pd(base_addr, vindex, scale) simde_mm_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i32gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i32gather_pd + #define _mm_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i32gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m256d_private + r_; + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i32gather_pd(base_addr, vindex, scale) _mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i32gather_pd + #define _mm256_i32gather_pd(base_addr, vindex, scale) simde_mm256_i32gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i32gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m128i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_; + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i32) / sizeof(vindex_.i32[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i32[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i32gather_pd + #define _mm256_mask_i32gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i32gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_i64gather_pd(const simde_float64* base_addr, simde__m128i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_i64gather_pd(base_addr, vindex, scale) _mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_i64gather_pd + #define _mm_i64gather_pd(base_addr, vindex, scale) simde_mm_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_mask_i64gather_pd(simde__m128d src, const simde_float64* base_addr, simde__m128i vindex, simde__m128d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m128i_private + vindex_ = simde__m128i_to_private(vindex); + simde__m128d_private + src_ = simde__m128d_to_private(src), + mask_ = simde__m128d_to_private(mask), + r_ = simde__m128d_to_private(simde_mm_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m128d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_mask_i64gather_pd + #define _mm_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_i64gather_pd(const simde_float64* base_addr, simde__m256i vindex, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + const uint8_t* src = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src, sizeof(dst)); + r_.f64[i] = dst; + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_i64gather_pd(base_addr, vindex, scale) _mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_i64gather_pd + #define _mm256_i64gather_pd(base_addr, vindex, scale) simde_mm256_i64gather_pd(HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_mask_i64gather_pd(simde__m256d src, const simde_float64* base_addr, simde__m256i vindex, simde__m256d mask, const int32_t scale) + SIMDE_REQUIRE_CONSTANT(scale) + HEDLEY_REQUIRE_MSG((scale && scale <= 8 && !(scale & (scale - 1))), "`scale' must be a power of two less than or equal to 8") { + simde__m256i_private + vindex_ = simde__m256i_to_private(vindex); + simde__m256d_private + src_ = simde__m256d_to_private(src), + mask_ = simde__m256d_to_private(mask), + r_ = simde__m256d_to_private(simde_mm256_setzero_pd()); + const uint8_t* addr = HEDLEY_REINTERPRET_CAST(const uint8_t*, base_addr); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(vindex_.i64) / sizeof(vindex_.i64[0])) ; i++) { + if ((mask_.i64[i] >> 63) & 1) { + const uint8_t* src1 = addr + (HEDLEY_STATIC_CAST(size_t , vindex_.i64[i]) * HEDLEY_STATIC_CAST(size_t , scale)); + simde_float64 dst; + simde_memcpy(&dst, src1, sizeof(dst)); + r_.f64[i] = dst; + } + else { + r_.f64[i] = src_.f64[i]; + } + } + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) _mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mask_i64gather_pd + #define _mm256_mask_i64gather_pd(src, base_addr, vindex, mask, scale) simde_mm256_mask_i64gather_pd(src, HEDLEY_REINTERPRET_CAST(simde_float64 const*, base_addr), vindex, mask, scale) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_inserti128_si256(simde__m256i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 1) { + simde__m256i_private a_ = simde__m256i_to_private(a); + simde__m128i_private b_ = simde__m128i_to_private(b); + + a_.m128i_private[ imm8 & 1 ] = b_; + + return simde__m256i_from_private(a_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_inserti128_si256(a, b, imm8) _mm256_inserti128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_inserti128_si256 + #define _mm256_inserti128_si256(a, b, imm8) simde_mm256_inserti128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_madd_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_madd_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_madd_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_madd_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) + SIMDE_ALIGN_TO_32 int32_t product SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t a32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t b32x16 SIMDE_VECTOR(64); + SIMDE_ALIGN_TO_32 int32_t even SIMDE_VECTOR(32); + SIMDE_ALIGN_TO_32 int32_t odd SIMDE_VECTOR(32); + + SIMDE_CONVERT_VECTOR_(a32x16, a_.i16); + SIMDE_CONVERT_VECTOR_(b32x16, b_.i16); + product = a32x16 * b32x16; + + even = __builtin_shufflevector(product, product, 0, 2, 4, 6, 8, 10, 12, 14); + odd = __builtin_shufflevector(product, product, 1, 3, 5, 7, 9, 11, 13, 15); + + r_.i32 = even + odd; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { + r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_madd_epi16 + #define _mm256_madd_epi16(a, b) simde_mm256_madd_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maddubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maddubs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_maddubs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_maddubs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int idx = HEDLEY_STATIC_CAST(int, i) << 1; + int32_t ts = + (HEDLEY_STATIC_CAST(int16_t, a_.u8[ idx ]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[ idx ])) + + (HEDLEY_STATIC_CAST(int16_t, a_.u8[idx + 1]) * HEDLEY_STATIC_CAST(int16_t, b_.i8[idx + 1])); + r_.i16[i] = (ts > INT16_MIN) ? ((ts < INT16_MAX) ? HEDLEY_STATIC_CAST(int16_t, ts) : INT16_MAX) : INT16_MIN; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maddubs_epi16 + #define _mm256_maddubs_epi16(a, b) simde_mm256_maddubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi32(mem_addr, mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i32 = vshrq_n_s32(mask_.neon_i32, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + mask_shr_.i32[i] = mask_.i32[i] >> 31; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = mask_shr_.i32[i] ? mem_addr[i] : INT32_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi32 + #define _mm_maskload_epi32(mem_addr, mask) simde_mm_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi32 (const int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi32(mem_addr, mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (mask_.i32[i] >> 31) ? mem_addr[i] : INT32_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi32 + #define _mm256_maskload_epi32(mem_addr, mask) simde_mm256_maskload_epi32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m128i_private + r_, + mask_ = simde__m128i_to_private(mask), + mask_shr_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + mask_shr_.neon_i64 = vshrq_n_s64(mask_.neon_i64, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(mask_.i64) / sizeof(mask_.i64[0])) ; i++) { + mask_shr_.i64[i] = mask_.i64[i] >> 63; + } + #endif + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = mask_shr_.i64[i] ? mem_addr[i] : INT64_C(0); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskload_epi64 + #define _mm_maskload_epi64(mem_addr, mask) simde_mm_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_maskload_epi64 (const int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(const long long *, mem_addr), mask); + #else + simde__m256i_private + mask_ = simde__m256i_to_private(mask), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (mask_.i64[i] >> 63) ? mem_addr[i] : INT64_C(0); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskload_epi64 + #define _mm256_maskload_epi64(mem_addr, mask) simde_mm256_maskload_epi64(HEDLEY_REINTERPRET_CAST(int64_t const*, mem_addr), mask) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi32(mem_addr, mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi32 + #define _mm_maskstore_epi32(mem_addr, mask, a) simde_mm_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi32 (int32_t mem_addr[HEDLEY_ARRAY_PARAM(8)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi32(mem_addr, mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i32) / sizeof(a_.i32[0])) ; i++) { + if (mask_.u32[i] & (UINT32_C(1) << 31)) + mem_addr[i] = a_.i32[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi32 + #define _mm256_maskstore_epi32(mem_addr, mask, a) simde_mm256_maskstore_epi32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128i mask, simde__m128i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m128i_private mask_ = simde__m128i_to_private(mask); + simde__m128i_private a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] >> 63) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_maskstore_epi64 + #define _mm_maskstore_epi64(mem_addr, mask, a) simde_mm_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +void +simde_mm256_maskstore_epi64 (int64_t mem_addr[HEDLEY_ARRAY_PARAM(4)], simde__m256i mask, simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + _mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(long long *, mem_addr), mask, a); + #else + simde__m256i_private mask_ = simde__m256i_to_private(mask); + simde__m256i_private a_ = simde__m256i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(a_.i64) / sizeof(a_.i64[0])) ; i++) { + if (mask_.u64[i] & (UINT64_C(1) << 63)) + mem_addr[i] = a_.i64[i]; + } + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskstore_epi64 + #define _mm256_maskstore_epi64(mem_addr, mask, a) simde_mm256_maskstore_epi64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), mask, a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_max_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] > b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi8 + #define _mm256_max_epi8(a, b) simde_mm256_max_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu8 + #define _mm256_max_epu8(a, b) simde_mm256_max_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu16 + #define _mm256_max_epu16(a, b) simde_mm256_max_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epu32 + #define _mm256_max_epu32(a, b) simde_mm256_max_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi16 + #define _mm256_max_epi16(a, b) simde_mm256_max_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_max_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_max_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_max_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_max_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] > b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_max_epi32 + #define _mm256_max_epi32(a, b) simde_mm256_max_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(__PGI) + return _mm256_min_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] < b_.i8[i] ? a_.i8[i] : b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi8 + #define _mm256_min_epi8(a, b) simde_mm256_min_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi16 + #define _mm256_min_epi16(a, b) simde_mm256_min_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] < b_.i32[i] ? a_.i32[i] : b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epi32 + #define _mm256_min_epi32(a, b) simde_mm256_min_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu8 + #define _mm256_min_epu8(a, b) simde_mm256_min_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? a_.u16[i] : b_.u16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu16 + #define _mm256_min_epu16(a, b) simde_mm256_min_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_min_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_min_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_min_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_min_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? a_.u32[i] : b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_min_epu32 + #define _mm256_min_epu32(a, b) simde_mm256_min_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_mm256_movemask_epi8 (simde__m256i a) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_movemask_epi8(a); + #else + simde__m256i_private a_ = simde__m256i_to_private(a); + uint32_t r = 0; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(a_.m128i) / sizeof(a_.m128i[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t,simde_mm_movemask_epi8(a_.m128i[i])) << (16 * i); + } + #else + r = 0; + SIMDE_VECTORIZE_REDUCTION(|:r) + for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { + r |= HEDLEY_STATIC_CAST(uint32_t, (a_.u8[31 - i] >> 7)) << (31 - i); + } + #endif + + return HEDLEY_STATIC_CAST(int32_t, r); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_movemask_epi8 + #define _mm256_movemask_epi8(a) simde_mm256_movemask_epi8(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mpsadbw_epu8 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + const int a_offset1 = imm8 & 4; + const int b_offset1 = (imm8 & 3) << 2; + const int a_offset2 = (imm8 >> 3) & 4; + const int b_offset2 = ((imm8 >> 3) & 3) << 2; + + #if defined(simde_math_abs) + const int halfway_point = HEDLEY_STATIC_CAST(int, (sizeof(r_.u16) / sizeof(r_.u16[0])) ) / 2; + for (int i = 0 ; i < halfway_point ; i++) { + r_.u16[i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 0] - b_.u8[b_offset1 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 1] - b_.u8[b_offset1 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 2] - b_.u8[b_offset1 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[a_offset1 + i + 3] - b_.u8[b_offset1 + 3]))); + r_.u16[halfway_point + i] = + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 0] - b_.u8[2 * halfway_point + b_offset2 + 0]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 1] - b_.u8[2 * halfway_point + b_offset2 + 1]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 2] - b_.u8[2 * halfway_point + b_offset2 + 2]))) + + HEDLEY_STATIC_CAST(uint16_t, simde_math_abs(HEDLEY_STATIC_CAST(int, a_.u8[2 * halfway_point + a_offset2 + i + 3] - b_.u8[2 * halfway_point + b_offset2 + 3]))); + } + #else + HEDLEY_UNREACHABLE(); + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) && SIMDE_DETECT_CLANG_VERSION_CHECK(3,9,0) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) _mm256_mpsadbw_epu8(a, b, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + #define simde_mm256_mpsadbw_epu8(a, b, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 1), simde_mm256_extracti128_si256(b, 1), (imm8 >> 3)), \ + simde_mm_mpsadbw_epu8(simde_mm256_extracti128_si256(a, 0), simde_mm256_extracti128_si256(b, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mpsadbw_epu8 + #define _mm256_mpsadbw_epu8(a, b, imm8) simde_mm256_mpsadbw_epu8(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epi32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i32[i * 2]) * + HEDLEY_STATIC_CAST(int64_t, b_.i32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epi32(a, b) simde_mm256_mul_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mul_epu32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mul_epu32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_mul_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_mul_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mul_epu32(a, b) simde_mm256_mul_epu32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epi16(a, b) simde_mm256_mulhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhi_epu16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhi_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhi_epu16(a, b) simde_mm256_mulhi_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mulhrs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mulhrs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (((HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) + 0x4000) >> 15)); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_mulhrs_epi16(a, b) simde_mm256_mulhrs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi16(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] * b_.i16[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi16 + #define _mm256_mullo_epi16(a, b) simde_mm256_mullo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_mullo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_mullo_epi32(a, b); + #else + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] * b_.i32[i]); + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_mullo_epi32 + #define _mm256_mullo_epi32(a, b) simde_mm256_mullo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_mullo_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 * b_.u32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] * b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_or_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_or_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_or_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f | b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_or_si256 + #define _mm256_or_si256(a, b) simde_mm256_or_si256(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packs_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0]))/4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.i8[i] = (a_.i16[i] > INT8_MAX) ? INT8_MAX : ((a_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[i])); + r_.i8[i + quarter_point] = (b_.i16[i] > INT8_MAX) ? INT8_MAX : ((b_.i16[i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[i])); + r_.i8[halfway_point + i] = (a_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((a_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, a_.i16[quarter_point + i])); + r_.i8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > INT8_MAX) ? INT8_MAX : ((b_.i16[quarter_point + i] < INT8_MIN) ? INT8_MIN : HEDLEY_STATIC_CAST(int8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi16 + #define _mm256_packs_epi16(a, b) simde_mm256_packs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packs_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packs_epi32(a, b); + #else + simde__m256i_private + r_, + v_[] = { + simde__m256i_to_private(a), + simde__m256i_to_private(b) + }; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packs_epi32(v_[0].m128i[0], v_[1].m128i[0]); + r_.m128i[1] = simde_mm_packs_epi32(v_[0].m128i[1], v_[1].m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + const int32_t v = v_[(i >> 2) & 1].i32[(i & 11) - ((i & 8) >> 1)]; + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, (v > INT16_MAX) ? INT16_MAX : ((v < INT16_MIN) ? INT16_MIN : v)); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packs_epi32 + #define _mm256_packs_epi32(a, b) simde_mm256_packs_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi16(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 2; + const size_t quarter_point = (sizeof(r_.i8) / sizeof(r_.i8[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u8[i] = (a_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[i])); + r_.u8[i + quarter_point] = (b_.i16[i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[i])); + r_.u8[halfway_point + i] = (a_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((a_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, a_.i16[quarter_point + i])); + r_.u8[halfway_point + i + quarter_point] = (b_.i16[quarter_point + i] > UINT8_MAX) ? UINT8_MAX : ((b_.i16[quarter_point + i] < 0) ? UINT8_C(0) : HEDLEY_STATIC_CAST(uint8_t, b_.i16[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi16 + #define _mm256_packus_epi16(a, b) simde_mm256_packus_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_packus_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_packus_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_packus_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_packus_epi32(a_.m128i[1], b_.m128i[1]); + #else + const size_t halfway_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 2; + const size_t quarter_point = (sizeof(r_.i16) / sizeof(r_.i16[0])) / 4; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < quarter_point ; i++) { + r_.u16[i] = (a_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[i])); + r_.u16[i + quarter_point] = (b_.i32[i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[i])); + r_.u16[halfway_point + i] = (a_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((a_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, a_.i32[quarter_point + i])); + r_.u16[halfway_point + i + quarter_point] = (b_.i32[quarter_point + i] > UINT16_MAX) ? UINT16_MAX : ((b_.i32[quarter_point + i] < 0) ? UINT16_C(0) : HEDLEY_STATIC_CAST(uint16_t, b_.i32[quarter_point + i])); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_packus_epi32 + #define _mm256_packus_epi32(a, b) simde_mm256_packus_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute2x128_si256 (simde__m256i a, simde__m256i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + r_.m128i_private[0] = (imm8 & 0x08) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x02) ? b_.m128i_private[(imm8 ) & 1] : a_.m128i_private[(imm8 ) & 1]); + r_.m128i_private[1] = (imm8 & 0x80) ? simde__m128i_to_private(simde_mm_setzero_si128()) : ((imm8 & 0x20) ? b_.m128i_private[(imm8 >> 4) & 1] : a_.m128i_private[(imm8 >> 4) & 1]); + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute2x128_si256(a, b, imm8) _mm256_permute2x128_si256(a, b, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute2x128_si256 + #define _mm256_permute2x128_si256(a, b, imm8) simde_mm256_permute2x128_si256(a, b, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permute4x64_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + r_.i64[0] = (imm8 & 0x02) ? a_.i64[((imm8 ) & 1)+2] : a_.i64[(imm8 ) & 1]; + r_.i64[1] = (imm8 & 0x08) ? a_.i64[((imm8 >> 2 ) & 1)+2] : a_.i64[(imm8 >> 2 ) & 1]; + r_.i64[2] = (imm8 & 0x20) ? a_.i64[((imm8 >> 4 ) & 1)+2] : a_.i64[(imm8 >> 4 ) & 1]; + r_.i64[3] = (imm8 & 0x80) ? a_.i64[((imm8 >> 6 ) & 1)+2] : a_.i64[(imm8 >> 6 ) & 1]; + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_epi64(a, imm8) _mm256_permute4x64_epi64(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_epi64 + #define _mm256_permute4x64_epi64(a, imm8) simde_mm256_permute4x64_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute4x64_pd (simde__m256d a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + r_.f64[0] = (imm8 & 0x02) ? a_.f64[((imm8 ) & 1)+2] : a_.f64[(imm8 ) & 1]; + r_.f64[1] = (imm8 & 0x08) ? a_.f64[((imm8 >> 2 ) & 1)+2] : a_.f64[(imm8 >> 2 ) & 1]; + r_.f64[2] = (imm8 & 0x20) ? a_.f64[((imm8 >> 4 ) & 1)+2] : a_.f64[(imm8 >> 4 ) & 1]; + r_.f64[3] = (imm8 & 0x80) ? a_.f64[((imm8 >> 6 ) & 1)+2] : a_.f64[(imm8 >> 6 ) & 1]; + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_permute4x64_pd(a, imm8) _mm256_permute4x64_pd(a, imm8) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permute4x64_pd + #define _mm256_permute4x64_pd(a, imm8) simde_mm256_permute4x64_pd(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_permutevar8x32_epi32 (simde__m256i a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_permutevar8x32_epi32(a, idx); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[idx_.i32[i] & 7]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_epi32 + #define _mm256_permutevar8x32_epi32(a, idx) simde_mm256_permutevar8x32_epi32(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permutevar8x32_ps (simde__m256 a, simde__m256i idx) { + #if defined(SIMDE_X86_AVX2_NATIVE) + #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) + return _mm256_permutevar8x32_ps(a, HEDLEY_REINTERPRET_CAST(simde__m256, idx)); + #else + return _mm256_permutevar8x32_ps(a, idx); + #endif + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + simde__m256i_private + idx_ = simde__m256i_to_private(idx); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + r_.f32[i] = a_.f32[idx_.i32[i] & 7]; + } + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_permutevar8x32_ps + #define _mm256_permutevar8x32_ps(a, idx) simde_mm256_permutevar8x32_ps(a, idx) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sad_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sad_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sad_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sad_epu8(a_.m128i[1], b_.m128i[1]); + #else + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + uint16_t tmp = 0; + SIMDE_VECTORIZE_REDUCTION(+:tmp) + for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 4) ; j++) { + const size_t e = j + (i * 8); + tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); + } + r_.i64[i] = tmp; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sad_epu8 + #define _mm256_sad_epu8(a, b) simde_mm256_sad_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_shuffle_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_shuffle_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_shuffle_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; i++) { + r_.u8[ i ] = (b_.u8[ i ] & 0x80) ? 0 : a_.u8[(b_.u8[ i ] & 0x0f) ]; + r_.u8[i + 16] = (b_.u8[i + 16] & 0x80) ? 0 : a_.u8[(b_.u8[i + 16] & 0x0f) + 16]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi8 + #define _mm256_shuffle_epi8(a, b) simde_mm256_shuffle_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_shuffle_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; + } + for (size_t i = 0 ; i < ((sizeof(r_.i32) / sizeof(r_.i32[0])) / 2) ; i++) { + r_.i32[i + 4] = a_.i32[((imm8 >> (i * 2)) & 3) + 4]; + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shuffle_epi32(a, imm8) _mm256_shuffle_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_shuffle_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shuffle_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shuffle_epi32(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i32 = \ + SIMDE_SHUFFLE_VECTOR_(32, 32, \ + (simde_tmp_a_).i32, \ + (simde_tmp_a_).i32, \ + ((imm8) ) & 3, \ + ((imm8) >> 2) & 3, \ + ((imm8) >> 4) & 3, \ + ((imm8) >> 6) & 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4) }); })) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shuffle_epi32 + #define _mm256_shuffle_epi32(a, imm8) simde_mm256_shuffle_epi32(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflehi_epi16(a, imm8) _mm256_shufflehi_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflehi_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + 0, 1, 2, 3, \ + (((imm8) ) & 3) + 4, \ + (((imm8) >> 2) & 3) + 4, \ + (((imm8) >> 4) & 3) + 4, \ + (((imm8) >> 6) & 3) + 4, \ + 8, 9, 10, 11, \ + ((((imm8) ) & 3) + 8 + 4), \ + ((((imm8) >> 2) & 3) + 8 + 4), \ + ((((imm8) >> 4) & 3) + 8 + 4), \ + ((((imm8) >> 6) & 3) + 8 + 4) \ + ) }); })) +#else +# define simde_mm256_shufflehi_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflehi_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflehi_epi16 + #define _mm256_shufflehi_epi16(a, imm8) simde_mm256_shufflehi_epi16(a, imm8) +#endif + +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_shufflelo_epi16(a, imm8) _mm256_shufflelo_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_SHUFFLE_VECTOR_) +# define simde_mm256_shufflelo_epi16(a, imm8) (__extension__ ({ \ + const simde__m256i_private simde_tmp_a_ = simde__m256i_to_private(a); \ + simde__m256i_from_private((simde__m256i_private) { .i16 = \ + SIMDE_SHUFFLE_VECTOR_(16, 32, \ + (simde_tmp_a_).i16, \ + (simde_tmp_a_).i16, \ + (((imm8) ) & 3), \ + (((imm8) >> 2) & 3), \ + (((imm8) >> 4) & 3), \ + (((imm8) >> 6) & 3), \ + 4, 5, 6, 7, \ + ((((imm8) ) & 3) + 8), \ + ((((imm8) >> 2) & 3) + 8), \ + ((((imm8) >> 4) & 3) + 8), \ + ((((imm8) >> 6) & 3) + 8), \ + 12, 13, 14, 15) }); })) +#else +# define simde_mm256_shufflelo_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 1), imm8), \ + simde_mm_shufflelo_epi16(simde_mm256_extracti128_si256(a, 0), imm8)) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_shufflelo_epi16 + #define _mm256_shufflelo_epi16(a, imm8) simde_mm256_shufflelo_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (b_.i8[i] < INT32_C(0)) ? -a_.i8[i] : a_.i8[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi8 + #define _mm256_sign_epi8(a, b) simde_mm256_sign_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (b_.i16[i] < INT32_C(0)) ? -a_.i16[i] : a_.i16[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi16 + #define _mm256_sign_epi16(a, b) simde_mm256_sign_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sign_epi32(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sign_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { + r_.i32[i] = (b_.i32[i] < INT32_C(0)) ? -a_.i32[i] : a_.i32[i]; + } + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sign_epi32 + #define _mm256_sign_epi32(a, b) simde_mm256_sign_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi16 + #define _mm256_sll_epi16(a, count) simde_mm256_sll_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 31) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi32 + #define _mm256_sll_epi32(a, count) simde_mm256_sll_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sll_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sll_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sll_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sll_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + if (shift > 63) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << (shift)); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sll_epi64 + #define _mm256_sll_epi64(a, count) simde_mm256_sll_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + /* Note: There is no consistency in how compilers handle values outside of + the expected range, hence the discrepancy between what we allow and what + Intel specifies. Some compilers will return 0, others seem to just mask + off everything outside of the range. */ + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i16) / sizeof(a_.altivec_i16[0])) ; i++) { + r_.altivec_i16[i] = vec_sl(a_.altivec_i16[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << (imm8 & 0xff)); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi16(a, imm8) _mm256_slli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi16 + #define _mm256_slli_epi16(a, imm8) simde_mm256_slli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_i32) / sizeof(a_.altivec_i32[0])) ; i++) { + r_.altivec_i32[i] = vec_sl(a_.altivec_i32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 << HEDLEY_STATIC_CAST(int32_t, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] << (imm8 & 0xff); + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi32(a, imm8) _mm256_slli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi32 + #define _mm256_slli_epi32(a, imm8) simde_mm256_slli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i64 = a_.i64 << HEDLEY_STATIC_CAST(int64_t, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] << (imm8 & 0xff); + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_epi64(a, imm8) _mm256_slli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_slli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_epi64 + #define _mm256_slli_epi64(a, imm8) simde_mm256_slli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_slli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = HEDLEY_STATIC_CAST(int, i) - imm8; + r_.m128i_private[h].i8[i] = (e >= 0) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_slli_si256(a, imm8) _mm256_slli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_slli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_slli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bslli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_slli_si256 + #define _mm256_slli_si256(a, imm8) simde_mm256_slli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, vreinterpretq_s32_u32(b_.neon_u32)); + r_.neon_u32 = vandq_u32(r_.neon_u32, vcltq_u32(b_.neon_u32, vdupq_n_u32(32))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < UINT32_C(32))) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi32(a, b) _mm_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi32 + #define _mm_sllv_epi32(a, b) simde_mm_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 << b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] << b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi32(a, b) _mm256_sllv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi32 + #define _mm256_sllv_epi32(a, b) simde_mm256_sllv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sllv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, vreinterpretq_s64_u64(b_.neon_u64)); + r_.neon_u64 = vandq_u64(r_.neon_u64, vcltq_u64(b_.neon_u64, vdupq_n_u64(64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_sllv_epi64(a, b) _mm_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_sllv_epi64 + #define _mm_sllv_epi64(a, b) simde_mm_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sllv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sllv_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sllv_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 << b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] << b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_sllv_epi64(a, b) _mm256_sllv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sllv_epi64 + #define _mm256_sllv_epi64(a, b) simde_mm256_sllv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi16 + #define _mm256_sra_epi16(a, count) simde_mm256_sra_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sra_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sra_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sra_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_sra_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t, count_.i64[0]); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sra_epi32 + #define _mm256_sra_epi32(a, count) simde_mm256_sra_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 15) shift = 15; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi16(a, imm8) _mm256_srai_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi16 + #define _mm256_srai_epi16(a, imm8) simde_mm256_srai_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srai_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + unsigned int shift = HEDLEY_STATIC_CAST(unsigned int, imm8); + + if (shift > 31) shift = 31; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = a_.i32 >> HEDLEY_STATIC_CAST(int16_t, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srai_epi32(a, imm8) _mm256_srai_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srai_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srai_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srai_epi32 + #define _mm256_srai_epi32(a, imm8) simde_mm256_srai_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srav_epi32 (simde__m128i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm_srav_epi32(a, count); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + count_ = simde__m128i_to_private(count); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t cnt = vreinterpretq_s32_u32(vminq_u32(count_.neon_u32, vdupq_n_u32(31))); + r_.neon_i32 = vshlq_s32(a_.neon_i32, vnegq_s32(cnt)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + r_.i32[i] = a_.i32[i] >> HEDLEY_STATIC_CAST(int, shift > 31 ? 31 : shift); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srav_epi32 + #define _mm_srav_epi32(a, count) simde_mm_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srav_epi32 (simde__m256i a, simde__m256i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srav_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + count_ = simde__m256i_to_private(count); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srav_epi32(a_.m128i[0], count_.m128i[0]); + r_.m128i[1] = simde_mm_srav_epi32(a_.m128i[1], count_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + uint32_t shift = HEDLEY_STATIC_CAST(uint32_t, count_.i32[i]); + if (shift > 31) shift = 31; + r_.i32[i] = a_.i32[i] >> shift; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srav_epi32 + #define _mm256_srav_epi32(a, count) simde_mm256_srav_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi16 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi16(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi16(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi16(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 16 ? 16 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi16 + #define _mm256_srl_epi16(a, count) simde_mm256_srl_epi16(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi32 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi32(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi32(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi32(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 32 ? 32 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi32 + #define _mm256_srl_epi32(a, count) simde_mm256_srl_epi32(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srl_epi64 (simde__m256i a, simde__m128i count) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_srl_epi64(a, count); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_srl_epi64(a_.m128i[0], count); + r_.m128i[1] = simde_mm_srl_epi64(a_.m128i[1], count); + #else + simde__m128i_private + count_ = simde__m128i_to_private(count); + + uint64_t shift = HEDLEY_STATIC_CAST(uint64_t , (count_.i64[0] > 64 ? 64 : count_.i64[0])); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(64, shift); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> (shift); + } + #endif + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srl_epi64 + #define _mm256_srl_epi64(a, count) simde_mm256_srl_epi64(a, count) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi16 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + if (imm8 > 15) + return simde_mm256_setzero_si256(); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned short, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u16) / sizeof(a_.altivec_u16[0])) ; i++) { + r_.altivec_u16[i] = vec_sr(a_.altivec_u16[i], sv); + } + #else + if (HEDLEY_STATIC_CAST(unsigned int, imm8) > 15) { + simde_memset(&r_, 0, sizeof(r_)); + } else { + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = a_.u16[i] >> imm8; + } + #endif + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi16(a, imm8) _mm256_srli_epi16(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi16(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi16(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi16 + #define _mm256_srli_epi16(a, imm8) simde_mm256_srli_epi16(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi32 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sv = vec_splats(HEDLEY_STATIC_CAST(unsigned int, imm8)); + for (size_t i = 0 ; i < (sizeof(a_.altivec_u32) / sizeof(a_.altivec_u32[0])) ; i++) { + r_.altivec_u32[i] = vec_sr(a_.altivec_u32[i], sv); + } + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(16, imm8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] >> imm8; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi32(a, imm8) _mm256_srli_epi32(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi32(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi32(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi32 + #define _mm256_srli_epi32(a, imm8) simde_mm256_srli_epi32(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_epi64 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + +#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(32, imm8); +#else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = a_.u64[i] >> imm8; + } +#endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_epi64(a, imm8) _mm256_srli_epi64(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) +# define simde_mm256_srli_epi64(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_epi64(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_epi64 + #define _mm256_srli_epi64(a, imm8) simde_mm256_srli_epi64(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srli_si256 (simde__m256i a, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a); + + for (size_t h = 0 ; h < (sizeof(r_.m128i_private) / sizeof(r_.m128i_private[0])) ; h++) { + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.m128i_private[h].i8) / sizeof(r_.m128i_private[h].i8[0])) ; i++) { + const int e = imm8 + HEDLEY_STATIC_CAST(int, i); + r_.m128i_private[h].i8[i] = (e < 16) ? a_.m128i_private[h].i8[e] : 0; + } + } + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) +# define simde_mm256_srli_si256(a, imm8) _mm256_srli_si256(a, imm8) +#elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) && !defined(__PGI) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_srli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) +# define simde_mm256_srli_si256(a, imm8) \ + simde_mm256_set_m128i( \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 1), (imm8)), \ + simde_mm_bsrli_si128(simde_mm256_extracti128_si256(a, 0), (imm8))) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srli_si256 + #define _mm256_srli_si256(a, imm8) simde_mm256_srli_si256(a, imm8) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi32 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi32(a, b) _mm_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi32 + #define _mm_srlv_epi32(a, b) simde_mm_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), (b_.u32 < 32)) & (a_.u32 >> b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.u32[i] < 32) ? (a_.u32[i] >> b_.u32[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi32(a, b) _mm256_srlv_epi32(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi32 + #define _mm256_srlv_epi32(a, b) simde_mm256_srlv_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_srlv_epi64 (simde__m128i a, simde__m128i b) { + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm_srlv_epi64(a, b) _mm_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm_srlv_epi64 + #define _mm_srlv_epi64(a, b) simde_mm_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_srlv_epi64 (simde__m256i a, simde__m256i b) { + simde__m256i_private + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + r_; + + #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), (b_.u64 < 64)) & (a_.u64 >> b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.u64[i] < 64) ? (a_.u64[i] >> b_.u64[i]) : 0; + } + #endif + + return simde__m256i_from_private(r_); +} +#if defined(SIMDE_X86_AVX2_NATIVE) + #define simde_mm256_srlv_epi64(a, b) _mm256_srlv_epi64(a, b) +#endif +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_srlv_epi64 + #define _mm256_srlv_epi64(a, b) simde_mm256_srlv_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_stream_load_si256 (const simde__m256i* mem_addr) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_stream_load_si256(HEDLEY_CONST_CAST(simde__m256i*, mem_addr)); + #else + simde__m256i r; + simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m256i), sizeof(r)); + return r; + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) +# define _mm256_stream_load_si256(mem_addr) simde_mm256_stream_load_si256(mem_addr) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = a_.i8 - b_.i8; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = a_.i8[i] - b_.i8[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi8 + #define _mm256_sub_epi8(a, b) simde_mm256_sub_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = a_.i16 - b_.i16; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = a_.i16[i] - b_.i16[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi16 + #define _mm256_sub_epi16(a, b) simde_mm256_sub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi16(a, b); + #else + return simde_mm256_sub_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi16 + #define _mm256_hsub_epi16(a, b) simde_mm256_hsub_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = a_.i32 - b_.i32; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = a_.i32[i] - b_.i32[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi32 + #define _mm256_sub_epi32(a, b) simde_mm256_sub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsub_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsub_epi32(a, b); + #else + return simde_mm256_sub_epi32(simde_x_mm256_deinterleaveeven_epi32(a, b), simde_x_mm256_deinterleaveodd_epi32(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsub_epi32 + #define _mm256_hsub_epi32(a, b) simde_mm256_hsub_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_sub_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_sub_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_sub_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_sub_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = a_.i64 - b_.i64; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] - b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); +#endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_sub_epi64 + #define _mm256_sub_epi64(a, b) simde_mm256_sub_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_x_mm256_sub_epu32 (simde__m256i a, simde__m256i b) { + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = a_.u32 - b_.u32; + #elif SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_x_mm_sub_epu32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_x_mm_sub_epu32(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = a_.u32[i] - b_.u32[i]; + } + #endif + + return simde__m256i_from_private(r_); +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi8 + #define _mm256_subs_epi8(a, b) simde_mm256_subs_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epi16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epi16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epi16 + #define _mm256_subs_epi16(a, b) simde_mm256_subs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_hsubs_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_hsubs_epi16(a, b); + #else + return simde_mm256_subs_epi16(simde_x_mm256_deinterleaveeven_epi16(a, b), simde_x_mm256_deinterleaveodd_epi16(a, b)); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_hsubs_epi16 + #define _mm256_hsubs_epi16(a, b) simde_mm256_hsubs_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu8(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu8 + #define _mm256_subs_epu8(a, b) simde_mm256_subs_epu8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_subs_epu16(simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_subs_epu16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_subs_epu16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_subs_epu16(a_.m128i[1], b_.m128i[1]); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_subs_epu16 + #define _mm256_subs_epu16(a, b) simde_mm256_subs_epu16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int +simde_x_mm256_test_all_ones (simde__m256i a) { + simde__m256i_private a_ = simde__m256i_to_private(a); + int r; + int_fast32_t r_ = ~HEDLEY_STATIC_CAST(int_fast32_t, 0); + + SIMDE_VECTORIZE_REDUCTION(&:r_) + for (size_t i = 0 ; i < (sizeof(a_.i32f) / sizeof(a_.i32f[0])) ; i++) { + r_ &= a_.i32f[i]; + } + + r = (r_ == ~HEDLEY_STATIC_CAST(int_fast32_t, 0)); + + return r; +} + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 0, 32, 1, 33, 2, 34, 3, 35, + 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, + 20, 52, 21, 53, 22, 54, 23, 55); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi8 + #define _mm256_unpacklo_epi8(a, b) simde_mm256_unpacklo_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 =SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi16 + #define _mm256_unpacklo_epi16(a, b) simde_mm256_unpacklo_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 0, 8, 1, 9, 4, 12, 5, 13); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi32 + #define _mm256_unpacklo_epi32(a, b) simde_mm256_unpacklo_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpacklo_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpacklo_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpacklo_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpacklo_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 0, 4, 2, 6); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i]; + r_.i64[2 * i + 1] = b_.i64[2 * i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpacklo_epi64 + #define _mm256_unpacklo_epi64(a, b) simde_mm256_unpacklo_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi8 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi8(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi8(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi8(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 32, a_.i8, b_.i8, + 8, 40, 9, 41, 10, 42, 11, 43, + 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, + 28, 60, 29, 61, 30, 62, 31, 63); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0]) / 2) ; i++) { + r_.i8[2 * i] = a_.i8[i + 8 + ~(~i | 7)]; + r_.i8[2 * i + 1] = b_.i8[i + 8 + ~(~i | 7)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi8 + #define _mm256_unpackhi_epi8(a, b) simde_mm256_unpackhi_epi8(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi16 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi16(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi16(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi16(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, + 4, 20, 5, 21, 6, 22, 7, 23, + 12, 28, 13, 29, 14, 30, 15, 31); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0]) / 2) ; i++) { + r_.i16[2 * i] = a_.i16[i + 4 + ~(~i | 3)]; + r_.i16[2 * i + 1] = b_.i16[i + 4 + ~(~i | 3)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi16 + #define _mm256_unpackhi_epi16(a, b) simde_mm256_unpackhi_epi16(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi32 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi32(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi32(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi32(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, + 2, 10, 3, 11, 6, 14, 7, 15); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0]) / 2) ; i++) { + r_.i32[2 * i] = a_.i32[i + 2 + ~(~i | 1)]; + r_.i32[2 * i + 1] = b_.i32[i + 2 + ~(~i | 1)]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi32 + #define _mm256_unpackhi_epi32(a, b) simde_mm256_unpackhi_epi32(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_unpackhi_epi64 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_unpackhi_epi64(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_unpackhi_epi64(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_unpackhi_epi64(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_SHUFFLE_VECTOR_) + r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 32, a_.i64, b_.i64, 1, 5, 3, 7); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0]) / 2) ; i++) { + r_.i64[2 * i] = a_.i64[2 * i + 1]; + r_.i64[2 * i + 1] = b_.i64[2 * i + 1]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_unpackhi_epi64 + #define _mm256_unpackhi_epi64(a, b) simde_mm256_unpackhi_epi64(a, b) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_xor_si256 (simde__m256i a, simde__m256i b) { + #if defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_xor_si256(a, b); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b); + + #if SIMDE_NATURAL_INT_VECTOR_SIZE_LE(128) + r_.m128i[0] = simde_mm_xor_si128(a_.m128i[0], b_.m128i[0]); + r_.m128i[1] = simde_mm_xor_si128(a_.m128i[1], b_.m128i[1]); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = a_.i32f ^ b_.i32f; + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = a_.i64[i] ^ b_.i64[i]; + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES) + #undef _mm256_xor_si256 + #define _mm256_xor_si256(a, b) simde_mm256_xor_si256(a, b) +#endif + +SIMDE_END_DECLS_ + +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_X86_AVX2_H) */ +/* :: End simde/x86/avx2.h :: */ + +#if !defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) +# define SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES +#endif + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_cmov_si128 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_cmov_si128(a, b, c); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm_ternarylogic_epi32(a, b, c, 0xe4); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_or_si128(_mm_and_si128(c, a), _mm_andnot_si128(c, b)); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vbslq_s8(c_.neon_u8, a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, c_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) + r_.altivec_i32 = vec_sel(b_.altivec_i32, a_.altivec_i32, c_.altivec_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32f = (c_.i32f & a_.i32f) | (~c_.i32f & b_.i32f); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_cmov_si128(a, b, c) simde_mm_cmov_si128((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256i +simde_mm256_cmov_si256 (simde__m256i a, simde__m256i b, simde__m256i c) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_BUG_GCC_98521) && !defined(SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256) + return _mm256_cmov_si256(a, b, c); + #elif defined(SIMDE_X86_AVX512VL_NATIVE) + return _mm256_ternarylogic_epi32(a, b, c, 0xe4); + #elif defined(SIMDE_X86_AVX2_NATIVE) + return _mm256_or_si256(_mm256_and_si256(c, a), _mm256_andnot_si256(c, b)); + #else + simde__m256i_private + r_, + a_ = simde__m256i_to_private(a), + b_ = simde__m256i_to_private(b), + c_ = simde__m256i_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128i) / sizeof(r_.m128i[0])) ; i++) { + r_.m128i[i] = simde_mm_cmov_si128(a_.m128i[i], b_.m128i[i], c_.m128i[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { + r_.i32f[i] = (c_.i32f[i] & a_.i32f[i]) | (~c_.i32f[i] & b_.i32f[i]); + } + #endif + + return simde__m256i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm256_cmov_si256(a, b, c) simde_mm256_cmov_si256((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 == b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epi8(a, b) simde_mm_comeq_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 == b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epi16(a, b) simde_mm_comeq_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epi32(a, b) simde_mm_comeq_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 == b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] == b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epi64(a, b) simde_mm_comeq_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vceqq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 == b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] == b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epu8(a, b) simde_mm_comeq_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vceqq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 == b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] == b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epu16(a, b) simde_mm_comeq_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vceqq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 == b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] == b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epu32(a, b) simde_mm_comeq_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comeq_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_EQ) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_EQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comeq_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vceqq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 == b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] == b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comeq_epu64(a, b) simde_mm_comeq_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 >= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] >= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epi8(a, b) simde_mm_comge_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 >= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] >= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epi16(a, b) simde_mm_comge_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 >= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] >= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epi32(a, b) simde_mm_comge_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 >= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] >= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epi64(a, b) simde_mm_comge_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgeq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 >= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] >= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epu8(a, b) simde_mm_comge_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgeq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 >= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] >= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epu16(a, b) simde_mm_comge_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgeq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 >= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] >= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epu32(a, b) simde_mm_comge_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comge_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GE) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_GE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comge_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgeq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 >= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] >= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comge_epu64(a, b) simde_mm_comge_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 > b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epi8(a, b) simde_mm_comgt_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 > b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epi16(a, b) simde_mm_comgt_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 > b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epi32(a, b) simde_mm_comgt_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 > b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] > b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epi64(a, b) simde_mm_comgt_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcgtq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 > b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epu8(a, b) simde_mm_comgt_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcgtq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 > b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] > b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epu16(a, b) simde_mm_comgt_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcgtq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 > b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] > b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epu32(a, b) simde_mm_comgt_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comgt_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_GT) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_GT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comgt_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcgtq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 > b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] > b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comgt_epu64(a, b) simde_mm_comgt_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 <= b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] <= b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epi8(a, b) simde_mm_comle_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 <= b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] <= b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epi16(a, b) simde_mm_comle_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 <= b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] <= b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epi32(a, b) simde_mm_comle_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 <= b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] <= b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epi64(a, b) simde_mm_comle_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcleq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 <= b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] <= b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epu8(a, b) simde_mm_comle_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcleq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 <= b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] <= b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epu16(a, b) simde_mm_comle_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcleq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 <= b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] <= b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epu32(a, b) simde_mm_comle_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comle_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LE) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_LE); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comle_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcleq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 <= b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] <= b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comle_epu64(a, b) simde_mm_comle_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 < b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epi8(a, b) simde_mm_comlt_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 < b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epi16(a, b) simde_mm_comlt_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 < b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epi32(a, b) simde_mm_comlt_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_s64(a_.neon_i64, b_.neon_i64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 < b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] < b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epi64(a, b) simde_mm_comlt_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vcltq_u8(a_.neon_u8, b_.neon_u8); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 < b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epu8(a, b) simde_mm_comlt_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vcltq_u16(a_.neon_u16, b_.neon_u16); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 < b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] < b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epu16(a, b) simde_mm_comlt_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vcltq_u32(a_.neon_u32, b_.neon_u32); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 < b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] < b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epu32(a, b) simde_mm_comlt_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comlt_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_LT); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comlt_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u64 = vcltq_u64(a_.neon_u64, b_.neon_u64); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 < b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] < b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comlt_epu64(a, b) simde_mm_comlt_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epi8(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmvnq_u8(vceqq_s8(a_.neon_i8, b_.neon_i8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), a_.i8 != b_.i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + r_.i8[i] = (a_.i8[i] != b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epi8(a, b) simde_mm_comneq_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epi16(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmvnq_u16(vceqq_s16(a_.neon_i16, b_.neon_i16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), a_.i16 != b_.i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] != b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epi16(a, b) simde_mm_comneq_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epi32(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_s32(a_.neon_i32, b_.neon_i32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 != b_.i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] != b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epi32(a, b) simde_mm_comneq_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epi64(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_s64(a_.neon_i64, b_.neon_i64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), a_.i64 != b_.i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (a_.i64[i] != b_.i64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epi64(a, b) simde_mm_comneq_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epu8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epu8(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epu8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vmvnq_u8(vceqq_u8(a_.neon_u8, b_.neon_u8)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u8), a_.u8 != b_.u8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (a_.u8[i] != b_.u8[i]) ? ~INT8_C(0) : INT8_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epu8(a, b) simde_mm_comneq_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epu16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epu16(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epu16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vmvnq_u16(vceqq_u16(a_.neon_u16, b_.neon_u16)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u16), a_.u16 != b_.u16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (a_.u16[i] != b_.u16[i]) ? ~INT16_C(0) : INT16_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epu16(a, b) simde_mm_comneq_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epu32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epu32(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epu32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vmvnq_u32(vceqq_u32(a_.neon_u32, b_.neon_u32)); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u32), a_.u32 != b_.u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (a_.u32[i] != b_.u32[i]) ? ~INT32_C(0) : INT32_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epu32(a, b) simde_mm_comneq_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comneq_epu64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_NEQ) + return _mm_com_epu64(a, b, _MM_PCOMCTRL_NEQ); + #elif defined(SIMDE_X86_XOP_NATIVE) + return _mm_comneq_epu64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_u64(a_.neon_u64, b_.neon_u64))); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) + r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u64 != b_.u64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (a_.u64[i] != b_.u64[i]) ? ~INT64_C(0) : INT64_C(0); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comneq_epu64(a, b) simde_mm_comneq_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epi8 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epi8(a, b) simde_mm_comfalse_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epi16 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epi16(a, b) simde_mm_comfalse_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epi32 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epi32(a, b) simde_mm_comfalse_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epi64 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epi64(a, b) simde_mm_comfalse_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epu8 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epu8(a, b) simde_mm_comfalse_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epu16 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epu16(a, b) simde_mm_comfalse_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epu32 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epu32(a, b) simde_mm_comfalse_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comfalse_epu64 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_mm_setzero_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comfalse_epu64(a, b) simde_mm_comfalse_epu64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epi8 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epi8(a, b) simde_mm_comtrue_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epi16 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epi16(a, b) simde_mm_comtrue_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epi32 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epi32(a, b) simde_mm_comtrue_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epi64 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epi64(a, b) simde_mm_comtrue_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epu8 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epu8(a, b) simde_mm_comtrue_epu8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epu16 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epu16(a, b) simde_mm_comtrue_epu16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epu32 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epu32(a, b) simde_mm_comtrue_epu32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_comtrue_epu64 (simde__m128i a, simde__m128i b) { + (void) a; + (void) b; + return simde_x_mm_setone_si128(); +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_comtrue_epu64(a, b) simde_mm_comtrue_epu64((a), (b)) +#endif + +#if defined(SIMDE_X86_XOP_NATIVE) && defined(_MM_PCOMCTRL_LT) + #define SIMDE_X86_XOP_HAVE_COM_ 1 + #define SIMDE_MM_PCOMCTRL_LT _MM_PCOMCTRL_LT + #define SIMDE_MM_PCOMCTRL_LE _MM_PCOMCTRL_LE + #define SIMDE_MM_PCOMCTRL_GT _MM_PCOMCTRL_GT + #define SIMDE_MM_PCOMCTRL_GE _MM_PCOMCTRL_GE + #define SIMDE_MM_PCOMCTRL_EQ _MM_PCOMCTRL_EQ + #define SIMDE_MM_PCOMCTRL_NEQ _MM_PCOMCTRL_NEQ + #define SIMDE_MM_PCOMCTRL_FALSE _MM_PCOMCTRL_FALSE + #define SIMDE_MM_PCOMCTRL_TRUE _MM_PCOMCTRL_TRUE +#else + #define SIMDE_MM_PCOMCTRL_LT 0 + #define SIMDE_MM_PCOMCTRL_LE 1 + #define SIMDE_MM_PCOMCTRL_GT 2 + #define SIMDE_MM_PCOMCTRL_GE 3 + #define SIMDE_MM_PCOMCTRL_EQ 4 + #define SIMDE_MM_PCOMCTRL_NEQ 5 + #define SIMDE_MM_PCOMCTRL_FALSE 6 + #define SIMDE_MM_PCOMCTRL_TRUE 7 + + #if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _MM_PCOMCTRL_LT SIMDE_MM_PCOMCTRL_LT + #define _MM_PCOMCTRL_LE SIMDE_MM_PCOMCTRL_LE + #define _MM_PCOMCTRL_GT SIMDE_MM_PCOMCTRL_GT + #define _MM_PCOMCTRL_GE SIMDE_MM_PCOMCTRL_GE + #define _MM_PCOMCTRL_EQ SIMDE_MM_PCOMCTRL_EQ + #define _MM_PCOMCTRL_NEQ SIMDE_MM_PCOMCTRL_NEQ + #define _MM_PCOMCTRL_FALSE SIMDE_MM_PCOMCTRL_FALSE + #define _MM_PCOMCTRL_TRUE SIMDE_MM_PCOMCTRL_TRUE + #endif +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epi8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epi8(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epi8(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epi8(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epi8(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epi8(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epi8(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epi8(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epi8(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epi8(a, b, imm8) _mm_com_epi8((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epi8(a, b, imm8) simde_mm_com_epi8((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epi16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epi16(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epi16(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epi16(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epi16(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epi16(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epi16(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epi16(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epi16(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epi16(a, b, imm8) _mm_com_epi16((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epi16(a, b, imm8) simde_mm_com_epi16((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epi32 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epi32(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epi32(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epi32(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epi32(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epi32(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epi32(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epi32(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epi32(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epi32(a, b, imm8) _mm_com_epi32((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epi32(a, b, imm8) simde_mm_com_epi32((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epi64 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epi64(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epi64(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epi64(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epi64(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epi64(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epi64(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epi64(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epi64(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epi64(a, b, imm8) _mm_com_epi64((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epi64(a, b, imm8) simde_mm_com_epi64((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epu8 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epu8(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epu8(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epu8(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epu8(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epu8(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epu8(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epu8(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epu8(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epu8(a, b, imm8) _mm_com_epu8((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epu8(a, b, imm8) simde_mm_com_epu8((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epu16 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epu16(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epu16(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epu16(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epu16(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epu16(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epu16(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epu16(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epu16(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epu16(a, b, imm8) _mm_com_epu16((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epu16(a, b, imm8) simde_mm_com_epu16((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epu32 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epu32(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epu32(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epu32(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epu32(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epu32(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epu32(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epu32(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epu32(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epu32(a, b, imm8) _mm_com_epu32((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epu32(a, b, imm8) simde_mm_com_epu32((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_com_epu64 (simde__m128i a, simde__m128i b, const int imm8) + SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { + switch (imm8) { + case SIMDE_MM_PCOMCTRL_LT: + return simde_mm_comlt_epu64(a, b); + case SIMDE_MM_PCOMCTRL_LE: + return simde_mm_comle_epu64(a, b); + case SIMDE_MM_PCOMCTRL_GT: + return simde_mm_comgt_epu64(a, b); + case SIMDE_MM_PCOMCTRL_GE: + return simde_mm_comge_epu64(a, b); + case SIMDE_MM_PCOMCTRL_EQ: + return simde_mm_comeq_epu64(a, b); + case SIMDE_MM_PCOMCTRL_NEQ: + return simde_mm_comneq_epu64(a, b); + case SIMDE_MM_PCOMCTRL_FALSE: + return simde_mm_comfalse_epu64(a, b); + case SIMDE_MM_PCOMCTRL_TRUE: + return simde_mm_comtrue_epu64(a, b); + default: + HEDLEY_UNREACHABLE_RETURN(simde_mm_setzero_si128()); + } +} +#if defined(SIMDE_X86_XOP_NATIVE) && defined(SIMDE_X86_XOP_HAVE_COM_) + #define simde_mm_com_epu64(a, b, imm8) _mm_com_epu64((a), (b), (imm8)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_com_epu64(a, b, imm8) simde_mm_com_epu64((a), (b), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_frcz_ps (simde__m128 a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_frcz_ps(a); + #else + simde__m128_private + r_, + a_ = simde__m128_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if defined(simde_math_modff) + simde_float32 integral; + r_.f32[i] = simde_math_modff(a_.f32[i], &integral); + #else + r_.f32[i] = (a_.f32[i] / 1.0f); + #endif + } + + return simde__m128_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_frcz_pd (simde__m128d a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_frcz_pd(a); + #else + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + #if defined(simde_math_modf) + simde_float64 integral; + r_.f64[i] = simde_math_modf(a_.f64[i], &integral); + #else + r_.f64[i] = (a_.f64[i] / 1.0f); + #endif + } + + return simde__m128d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_frcz_ps(a) simde_mm_frcz_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_frcz_ss (simde__m128 a, simde__m128 b) { + #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) + return _mm_frcz_ss(a, b); + #else + simde__m128_private + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + + #if defined(simde_math_modff) + simde_float32 integral; + a_.f32[0] = simde_math_modff(b_.f32[0], &integral); + #else + a_.f32[0] = (b_.f32[0] / 1.0f); + #endif + + return simde__m128_from_private(a_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_frcz_ss(a, b) simde_mm_frcz_ss((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_frcz_sd (simde__m128d a, simde__m128d b) { + #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_BUG_CLANG_48673) + return _mm_frcz_sd(a, b); + #else + simde__m128d_private + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + + #if defined(simde_math_modf) + simde_float64 integral; + a_.f64[0] = simde_math_modf(b_.f64[0], &integral); + #else + a_.f64[0] = (b_.f64[0] / 1.0f); + #endif + + return simde__m128d_from_private(a_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_frcz_sd(a, b) simde_mm_frcz_sd((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_frcz_ps (simde__m256 a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm256_frcz_ps(a); + #else + simde__m256_private + r_, + a_ = simde__m256_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_frcz_ps(a_.m128[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + #if defined(simde_math_modff) + simde_float32 integral; + r_.f32[i] = simde_math_modff(a_.f32[i], &integral); + #else + r_.f32[i] = (a_.f32[i] / 1.0f); + #endif + } + #endif + + return simde__m256_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_frcz_pd (simde__m256d a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm256_frcz_pd(a); + #else + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_frcz_pd(a_.m128d[i]); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + #if defined(simde_math_modf) + simde_float64 integral; + r_.f64[i] = simde_math_modf(a_.f64[i], &integral); + #else + r_.f64[i] = (a_.f64[i] / 1.0f); + #endif + } + #endif + + return simde__m256d_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm256_frcz_ps(a) simde_mm256_frcz_ps((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddw_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddw_epi8(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(_mm_set1_epi8(INT8_C(1)), a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vpaddlq_s8(a_.neon_i8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i16x8_extadd_pairwise_i8x16(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) one = vec_splat_s8(1); + r_.altivec_i16 = + vec_add( + vec_mule(a_.altivec_i8, one), + vec_mulo(a_.altivec_i8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i16 = + ((a_.i16 << 8) >> 8) + + ((a_.i16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2)]) + HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddw_epi8(a) simde_mm_haddw_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddw_epu8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddw_epu8(a); + #elif defined(SIMDE_X86_SSSE3_NATIVE) + return _mm_maddubs_epi16(a, _mm_set1_epi8(INT8_C(1))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vpaddlq_u8(a_.neon_u8); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u16x8_extadd_pairwise_u8x16(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) one = vec_splat_u8(1); + r_.altivec_u16 = + vec_add( + vec_mule(a_.altivec_u8, one), + vec_mulo(a_.altivec_u8, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u16 = + ((a_.u16 << 8) >> 8) + + ((a_.u16 >> 8) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2)]) + HEDLEY_STATIC_CAST(uint16_t, a_.u8[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddw_epu8(a) simde_mm_haddw_epu8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddd_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddd_epi8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vpaddlq_s16(vpaddlq_s8(a_.neon_i8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) ]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 1]) + + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int32_t, a_.i8[(i * 4) + 3]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddd_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddd_epi16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return _mm_madd_epi16(a, _mm_set1_epi16(INT8_C(1))); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vpaddlq_s16(a_.neon_i16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_i32x4_extadd_pairwise_i16x8(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) one = vec_splat_s16(1); + r_.altivec_i32 = + vec_add( + vec_mule(a_.altivec_i16, one), + vec_mulo(a_.altivec_i16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.i32 = + ((a_.i32 << 16) >> 16) + + ((a_.i32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2)]) + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddd_epi8(a) simde_mm_haddd_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddd_epu8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddd_epu8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vpaddlq_u16(vpaddlq_u8(a_.neon_u8)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) ]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 1]) + + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint32_t, a_.u8[(i * 4) + 3]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddd_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddd_epu16(a); + #elif defined(SIMDE_X86_SSE2_NATIVE) + return + _mm_add_epi32( + _mm_srli_epi32(a, 16), + _mm_and_si128(a, _mm_set1_epi32(INT32_C(0x0000ffff))) + ); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vpaddlq_u16(a_.neon_u16); + #elif defined(SIMDE_WASM_SIMD128_NATIVE) + r_.wasm_v128 = wasm_u32x4_extadd_pairwise_u16x8(a_.wasm_v128); + #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) one = vec_splat_u16(1); + r_.altivec_u32 = + vec_add( + vec_mule(a_.altivec_u16, one), + vec_mulo(a_.altivec_u16, one) + ); + #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) + r_.u32 = + ((a_.u32 << 16) >> 16) + + ((a_.u32 >> 16) ); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2)]) + HEDLEY_STATIC_CAST(uint32_t, a_.u16[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddd_epu8(a) simde_mm_haddd_epu8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epi8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(vpaddlq_s8(a_.neon_i8))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 1]) + + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 3]) + + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 5]) + + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(int64_t, a_.i8[(i * 8) + 7]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epi8(a) simde_mm_haddq_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epi16(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vpaddlq_s32(vpaddlq_s16(a_.neon_i16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 1]) + + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(int64_t, a_.i16[(i * 4) + 3]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epi16(a) simde_mm_haddq_epi16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epi32(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vpaddlq_s32(a_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) + HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epi32(a) simde_mm_haddq_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epu8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epu8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(a_.neon_u8))); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 1]) + + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 3]) + + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 4]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 5]) + + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 6]) + HEDLEY_STATIC_CAST(uint64_t, a_.u8[(i * 8) + 7]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epu8(a) simde_mm_haddq_epu8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epu16 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epu16(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vpaddlq_u32(vpaddlq_u16(a_.neon_u16)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 1]) + + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 2]) + HEDLEY_STATIC_CAST(uint64_t, a_.u16[(i * 4) + 3]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epu16(a) simde_mm_haddq_epu16((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_haddq_epu32 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_haddq_epu32(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vpaddlq_u32(a_.neon_u32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) ]) + HEDLEY_STATIC_CAST(uint64_t, a_.u32[(i * 2) + 1]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_haddq_epu32(a) simde_mm_haddq_epu32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubw_epi8 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_hsubw_epi8(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i8[i * 2]) - HEDLEY_STATIC_CAST(int16_t, a_.i8[(i * 2) + 1]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_hsubw_epi8(a) simde_mm_hsubw_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubd_epi16 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_hsubd_epi16(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) ]) - HEDLEY_STATIC_CAST(int32_t, a_.i16[(i * 2) + 1]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_hsubd_epi8(a) simde_mm_hsubd_epi8((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_hsubq_epi32 (simde__m128i a) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_hsubq_epi32(a); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) ]) - HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_hsubq_epi32(a) simde_mm_hsubq_epi32((a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_macc_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_macc_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vmlaq_s16(c_.neon_i16, a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + r_.i16[i] = (a_.i16[i] * b_.i16[i]) + c_.i16[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_macc_epi16(a, b, c) simde_mm_macc_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_macc_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_macc_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_.neon_i32, b_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (a_.i32[i] * b_.i32[i]) + c_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_macc_epi32(a, b, c) simde_mm_macc_epi32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccd_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + int32x4_t a_even = vmovl_s16(vget_low_s16(even)); + int32x4_t b_even = vmovl_high_s16(even); + r_.neon_i32 = vmlaq_s32(c_.neon_i32, a_even, b_even); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = (HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2])) + c_.i32[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccd_epi16(a, b, c) simde_mm_maccd_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_macclo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_macclo_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t even = vuzp1q_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0])) + c_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_macclo_epi16(a, b, c) simde_mm_macclo_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_macchi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_macchi_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t even = vuzp2q_s32(a_.neon_i32, b_.neon_i32); + r_.neon_i64 = vaddq_s64(vmull_s32(vget_low_s32(even), vget_high_s32(even)), c_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + r_.i64[i] = (HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1])) + c_.i64[i]; + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_macchi_epi16(a, b, c) simde_mm_macchi_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccs_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccs_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int32x4_t c_lo = vmovl_s16(vget_low_s16(c_.neon_i16)); + int32x4_t c_hi = vmovl_high_s16(c_.neon_i16); + int32x4_t lo = vmlal_s16(c_lo, vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); + int32x4_t hi = vmlal_high_s16(c_hi, a_.neon_i16, b_.neon_i16); + r_.neon_i16 = vcombine_s16(vqmovn_s32(lo), vqmovn_s32(hi)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + int32_t tmp = HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i]); + tmp += c_.i16[i]; + if (tmp > INT16_MAX) + r_.i16[i] = INT16_MAX; + else if (tmp < INT16_MIN) + r_.i16[i] = INT16_MIN; + else + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, tmp); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccs_epi16(a, b, c) simde_mm_maccs_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccs_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccs_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int64x2_t c_lo = vmovl_s32(vget_low_s32(c_.neon_i32)); + int64x2_t c_hi = vmovl_high_s32(c_.neon_i32); + int64x2_t lo = vmlal_s32(c_lo, vget_low_s32(a_.neon_i32), vget_low_s32(b_.neon_i32)); + int64x2_t hi = vmlal_high_s32(c_hi, a_.neon_i32, b_.neon_i32); + r_.neon_i32 = vcombine_s32(vqmovn_s64(lo), vqmovn_s64(hi)); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[i]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[i]); + tmp += HEDLEY_STATIC_CAST(int64_t, c_.i32[i]); + if (tmp > INT32_MAX) + r_.i32[i] = INT32_MAX; + else if (tmp < INT32_MIN) + r_.i32[i] = INT32_MIN; + else + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, tmp); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccs_epi32(a, b, c) simde_mm_maccs_epi32((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccsd_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + int16x8_t even = vuzp1q_s16(a_.neon_i16, b_.neon_i16); + r_.neon_i32 = vqaddq_s32(vmull_s16(vget_low_s16(even), vget_high_s16(even)), c_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + int32_t prod = HEDLEY_STATIC_CAST(int32_t, a_.i16[i * 2]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i * 2]); + r_.i32[i] = simde_math_adds_i32(prod, c_.i32[i]); + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccsd_epi16(a, b, c) simde_mm_maccsd_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccslo_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccslo_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 0]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 0]); + r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccslo_epi16(a, b, c) simde_mm_maccslo_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maccshi_epi32 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maccshi_epi32(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + int64_t tmp = HEDLEY_STATIC_CAST(int64_t, a_.i32[(i * 2) + 1]) * HEDLEY_STATIC_CAST(int64_t, b_.i32[(i * 2) + 1]); + r_.i64[i] = simde_math_adds_i64(tmp, c_.i64[i]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maccshi_epi16(a, b, c) simde_mm_maccshi_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maddd_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + r_.i32[i] = + (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + + (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); + r_.i32[i] += c_.i32[i]; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maddd_epi16(a, b, c) simde_mm_maddd_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_maddsd_epi16 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_maddsd_epi16(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + /* The AMD64 Architecture Programmer's Manual says that "the" + * addition is saturated; I'm not sure whether that means + * the pairwise addition or the accumulate, or both. */ + r_.i32[i] = + (a_.i16[(i * 2) + 0] * b_.i16[(i * 2) + 0]) + + (a_.i16[(i * 2) + 1] * b_.i16[(i * 2) + 1]); + r_.i32[i] = simde_math_adds_i32(r_.i32[i], c_.i32[i]); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_maddsd_epi16(a, b, c) simde_mm_maddsd_epi16((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sha_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_sha_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i8 = vshlq_s8(a_.neon_i8, b_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + if (b_.i8[i] < 0) { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] >> -b_.i8[i]); + } else { + r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i8[i] << b_.i8[i]); + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_sha_epi8(a, b) simde_mm_sha_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sha_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_sha_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i16 = vshlq_s16(a_.neon_i16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { + if (b_.i16[i] < 0) { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] >> -b_.i16[i]); + } else { + r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << b_.i16[i]); + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_sha_epi16(a, b) simde_mm_sha_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sha_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_sha_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i32 = vshlq_s32(a_.neon_i32, b_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { + if (b_.i32[i] < 0) { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] >> -b_.i32[i]); + } else { + r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, a_.i32[i] << b_.i32[i]); + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_sha_epi32(a, b) simde_mm_sha_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_sha_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_sha_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_i64 = vshlq_s64(a_.neon_i64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { + if (b_.i64[i] < 0) { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] >> -b_.i64[i]); + } else { + r_.i64[i] = HEDLEY_STATIC_CAST(int64_t, a_.i64[i] << b_.i64[i]); + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_sha_epi64(a, b) simde_mm_sha_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shl_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_shl_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u8 = vshlq_u8(a_.neon_u8, b_.neon_i8); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + if (HEDLEY_UNLIKELY(b_.i8[i] < -7 || b_.i8[i] > 7)) { + r_.u8[i] = 0; + } else { + if (b_.i8[i] < 0) { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] >> -b_.i8[i]); + } else { + r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.u8[i] << b_.i8[i]); + } + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_shl_epi8(a, b) simde_mm_shl_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shl_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_shl_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u16 = vshlq_u16(a_.neon_u16, b_.neon_i16); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + if (HEDLEY_UNLIKELY(b_.i16[i] < -15 || b_.i16[i] > 15)) { + r_.u16[i] = 0; + } else { + if (b_.i16[i] < 0) { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] >> -b_.i16[i]); + } else { + r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << b_.i16[i]); + } + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_shl_epi16(a, b) simde_mm_shl_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shl_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_shl_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u32 = vshlq_u32(a_.neon_u32, b_.neon_i32); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + if (HEDLEY_UNLIKELY(b_.i32[i] < -31 || b_.i32[i] > 31)) { + r_.u32[i] = 0; + } else { + if (b_.i32[i] < 0) { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] >> -b_.i32[i]); + } else { + r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, a_.u32[i] << b_.i32[i]); + } + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_shl_epi32(a, b) simde_mm_shl_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_shl_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_shl_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + r_.neon_u64 = vshlq_u64(a_.neon_u64, b_.neon_i64); + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + if (HEDLEY_UNLIKELY(b_.i64[i] < -63 || b_.i64[i] > 63)) { + r_.u64[i] = 0; + } else { + if (b_.i64[i] < 0) { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] >> -b_.i64[i]); + } else { + r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u64[i] << b_.i64[i]); + } + } + } + #endif + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_shl_epi64(a, b) simde_mm_shl_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rot_epi8 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_rot_epi8(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (b_.i8[i] < 0) ? + HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -b_.i8[i]) | (a_.u8[i] << ( b_.i8[i] & 7)))) : + HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << b_.i8[i]) | (a_.u8[i] >> (-b_.i8[i] & 7)))); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_rot_epi8(a, b) simde_mm_rot_epi8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rot_epi16 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_rot_epi16(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (b_.i16[i] < 0) ? + HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -b_.i16[i]) | (a_.u16[i] << ( b_.i16[i] & 15)))) : + HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << b_.i16[i]) | (a_.u16[i] >> (-b_.i16[i] & 15)))); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_rot_epi16(a, b) simde_mm_rot_epi16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rot_epi32 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_rot_epi32(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (b_.i32[i] < 0) ? + HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -b_.i32[i]) | (a_.u32[i] << ( b_.i32[i] & 31)))) : + HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << b_.i32[i]) | (a_.u32[i] >> (-b_.i32[i] & 31)))); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_rot_epi32(a, b) simde_mm_rot_epi32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_rot_epi64 (simde__m128i a, simde__m128i b) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_rot_epi64(a, b); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (b_.i64[i] < 0) ? + HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -b_.i64[i]) | (a_.u64[i] << ( b_.i64[i] & 63)))) : + HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << b_.i64[i]) | (a_.u64[i] >> (-b_.i64[i] & 63)))); + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_rot_epi64(a, b) simde_mm_rot_epi64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_roti_epi8 (simde__m128i a, const int count) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { + r_.u8[i] = (count < 0) ? + HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] >> -count) | (a_.u8[i] << ( count & 7)))) : + HEDLEY_STATIC_CAST(uint8_t, ((a_.u8[i] << count) | (a_.u8[i] >> (-count & 7)))); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #define simde_mm_roti_epi8(a, count) _mm_roti_epi8((a), (count)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_roti_epi8(a, b) simde_mm_roti_epi8((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_roti_epi16 (simde__m128i a, const int count) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { + r_.u16[i] = (count < 0) ? + HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] >> -count) | (a_.u16[i] << ( count & 15)))) : + HEDLEY_STATIC_CAST(uint16_t, ((a_.u16[i] << count) | (a_.u16[i] >> (-count & 15)))); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #define simde_mm_roti_epi16(a, count) _mm_roti_epi16((a), (count)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_roti_epi16(a, count) simde_mm_roti_epi16((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_roti_epi32 (simde__m128i a, const int count) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { + r_.u32[i] = (count < 0) ? + HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] >> -count) | (a_.u32[i] << ( count & 31)))) : + HEDLEY_STATIC_CAST(uint32_t, ((a_.u32[i] << count) | (a_.u32[i] >> (-count & 31)))); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #define simde_mm_roti_epi32(a, count) _mm_roti_epi32((a), (count)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_roti_epi32(a, count) simde_mm_roti_epi32((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_roti_epi64 (simde__m128i a, const int count) { + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { + r_.u64[i] = (count < 0) ? + HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] >> -count) | (a_.u64[i] << ( count & 63)))) : + HEDLEY_STATIC_CAST(uint64_t, ((a_.u64[i] << count) | (a_.u64[i] >> (-count & 63)))); + } + + return simde__m128i_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #define simde_mm_roti_epi64(a, count) _mm_roti_epi64((a), (count)) +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_roti_epi64(a, count) simde_mm_roti_epi64((a), (count)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128i +simde_mm_perm_epi8 (simde__m128i a, simde__m128i b, simde__m128i c) { + #if defined(SIMDE_X86_XOP_NATIVE) + return _mm_perm_epi8(a, b, c); + #else + simde__m128i_private + r_, + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b), + c_ = simde__m128i_to_private(c); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { + int8_t src = (c_.u8[i] & 0x10) ? b_.i8[c_.u8[i] & 0xf] : a_.i8[c_.u8[i] & 0xf]; + + switch (c_.u8[i] & 0xc0) { + case 0x40: + #if HEDLEY_HAS_BUILTIN(__builtin_bitreverse8) && !defined(HEDLEY_IBM_VERSION) + src = HEDLEY_STATIC_CAST(int8_t, __builtin_bitreverse8(HEDLEY_STATIC_CAST(uint8_t, src))); + #else + src = HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t, src) * UINT64_C(0x80200802)) & UINT64_C(0x0884422110)) * UINT64_C(0x0101010101) >> 32); + #endif + break; + case 0x80: + src = 0; + break; + case 0xc0: + src >>= 7; + break; + } + + r_.i8[i] = (c_.u8[i] & 0x20) ? ~src : src; + } + + return simde__m128i_from_private(r_); + #endif +} +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_perm_epi8(a, b, c) simde_mm_perm_epi8((a), (b), (c)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128 +simde_mm_permute2_ps (simde__m128 a, simde__m128 b, simde__m128i c, const int imm8) { + simde__m128_private + r_, + a_ = simde__m128_to_private(a), + b_ = simde__m128_to_private(b); + simde__m128i_private c_ = simde__m128i_to_private(c); + + const int m2z = imm8 & 0x03; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + const int sel = c_.i32[i] & 0x07; + const int m = c_.i32[i] & 0x08; + + switch (m | m2z) { + case 0xa: + case 0x3: + r_.i32[i] = 0; + break; + default: + r_.i32[i] = (sel > 3) ? b_.i32[sel - 4] : a_.i32[sel]; + break; + } + } + + return simde__m128_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_permute2_ps(a, b, c, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_permute2_ps((a), (b), (c), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_permute2_ps(a, b, c, imm8) _mm_permute2_ps((a), (b), (c), (imm8)) + #endif +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_permute2_ps(a, b, c, imm8) simde_mm_permute2_ps((a), (b), (c), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m128d +simde_mm_permute2_pd (simde__m128d a, simde__m128d b, simde__m128i c, const int imm8) { + simde__m128d_private + r_, + a_ = simde__m128d_to_private(a), + b_ = simde__m128d_to_private(b); + simde__m128i_private c_ = simde__m128i_to_private(c); + + const int m2z = imm8 & 0x03; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const int sel = (c_.i64[i] & 0x06) >> 1; + const int m = c_.i64[i] & 0x08; + + switch (m | m2z) { + case 0x0a: + case 0x03: + r_.i64[i] = 0; + break; + default: + r_.i64[i] = (sel > 1) ? b_.i64[sel - 2] : a_.i64[sel]; + break; + } + } + + return simde__m128d_from_private(r_); +} + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm_permute2_pd(a, b, c, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm_permute2_pd((a), (b), (c), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm_permute2_pd(a, b, c, imm8) _mm_permute2_pd((a), (b), (c), (imm8)) + #endif +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm_permute2_pd(a, b, c, imm8) simde_mm_permute2_pd((a), (b), (c), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256 +simde_mm256_permute2_ps (simde__m256 a, simde__m256 b, simde__m256i c, const int imm8) { + simde__m256_private + r_, + a_ = simde__m256_to_private(a), + b_ = simde__m256_to_private(b); + simde__m256i_private c_ = simde__m256i_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) { + r_.m128[i] = simde_mm_permute2_ps(a_.m128[i], b_.m128[i], c_.m128i[i], imm8); + } + #else + const int m2z = imm8 & 0x03; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { + const int sel = c_.i32[i] & 0x07; + const int m = c_.i32[i] & 0x08; + + switch (m | m2z) { + case 0xa: + case 0x3: + r_.i32[i] = 0; + break; + default: + r_.i32[i] = (sel > 3) ? b_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4) - 4] : a_.i32[sel + (HEDLEY_STATIC_CAST(int, i) & 4)]; + break; + } + } + #endif + + return simde__m256_from_private(r_); +} + +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm256_permute2_ps(a, b, c, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm256_permute2_ps((a), (b), (c), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm256_permute2_ps(a, b, c, imm8) _mm256_permute2_ps((a), (b), (c), (imm8)) + #endif +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm256_permute2_ps(a, b, c, imm8) simde_mm256_permute2_ps((a), (b), (c), (imm8)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde__m256d +simde_mm256_permute2_pd (simde__m256d a, simde__m256d b, simde__m256i c, const int imm8) { + simde__m256d_private + r_, + a_ = simde__m256d_to_private(a), + b_ = simde__m256d_to_private(b); + simde__m256i_private c_ = simde__m256i_to_private(c); + + #if SIMDE_NATURAL_VECTOR_SIZE_LE(128) + for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) { + r_.m128d[i] = simde_mm_permute2_pd(a_.m128d[i], b_.m128d[i], c_.m128i[i], imm8); + } + #else + const int m2z = imm8 & 0x03; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { + const int sel = (c_.i64[i] & 0x06) >> 1; + const int m = c_.i64[i] & 0x08; + + switch (m | m2z) { + case 0x0a: + case 0x03: + r_.i64[i] = 0; + break; + default: + r_.i64[i] = (sel > 1) ? b_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2) - 2] : a_.i64[sel + (HEDLEY_STATIC_CAST(int, i) & 2)]; + break; + } + } + #endif + + return simde__m256d_from_private(r_); +} +#if defined(SIMDE_X86_XOP_NATIVE) + #if defined(HEDLEY_MCST_LCC_VERSION) + #define simde_mm256_permute2_pd(a, b, c, imm8) (__extension__ ({ \ + SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS \ + _mm256_permute2_pd((a), (b), (c), (imm8)); \ + SIMDE_LCC_REVERT_DEPRECATED_WARNINGS \ + })) + #else + #define simde_mm256_permute2_pd(a, b, c, imm8) simde_undeprecated_mm256_permute2_pd((a), (b), (c), (imm8)) + #endif +#endif +#if defined(SIMDE_X86_XOP_ENABLE_NATIVE_ALIASES) + #define _mm256_permute2_pd(a, b, c, imm8) simde_mm256_permute2_pd((a), (b), (c), (imm8)) +#endif + +HEDLEY_DIAGNOSTIC_POP +SIMDE_END_DECLS_ + +#endif /* !defined(SIMDE_X86_XOP_H) */ +/* :: End simde/x86/xop.h :: */ From eb84cee8b886cf278e999973511f9382f408c0d3 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 23 Oct 2023 14:52:06 +0200 Subject: [PATCH 003/113] Switch to simde --- include/epu.hpp | 26 ++++++++++------ include/epu_impl.hpp | 66 ++++++++++++++++++++++++----------------- include/perm16_impl.hpp | 40 ++++++++++++++----------- 3 files changed, 78 insertions(+), 54 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index a3e13a88..87f807fe 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -22,7 +22,6 @@ #include // less<>, equal_to<> #include #include -#include #ifdef HPCOMBI_HAVE_CONFIG #include "HPCombi-config.h" @@ -34,6 +33,11 @@ #include "vect_generic.hpp" + +#include "simde/x86/sse4.1.h" +#include "simde/x86/sse4.2.h" + + #ifdef HPCOMBI_CONSTEXPR_FUN_ARGS #define HPCOMBI_CONSTEXPR constexpr #define HPCOMBI_CONSTEXPR_CONSTRUCTOR constexpr @@ -202,32 +206,32 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { } /** Test whether all the entries of a #HPCombi::epu8 are zero */ -inline bool is_all_zero(epu8 a) { return _mm_testz_si128(a, a); } +inline bool is_all_zero(epu8 a) { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ -inline bool is_all_one(epu8 a) { return _mm_testc_si128(a, Epu8(0xFF)); } +inline bool is_all_one(epu8 a) { return simde_mm_testc_si128(a, Epu8(0xFF)); } /** Equality of #HPCombi::epu8 */ -inline bool equal(epu8 a, epu8 b) { return is_all_zero(_mm_xor_si128(a, b)); } +inline bool equal(epu8 a, epu8 b) { return is_all_zero(simde_mm_xor_si128(a, b)); } /** Non equality of #HPCombi::epu8 */ inline bool not_equal(epu8 a, epu8 b) { return not equal(a, b); } /** Permuting a #HPCombi::epu8 */ -inline epu8 permuted(epu8 a, epu8 b) { return _mm_shuffle_epi8(a, b); } +inline epu8 permuted(epu8 a, epu8 b) { return simde_mm_shuffle_epi8(a, b); } /** Left shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_right(epu8 a) { return _mm_bslli_si128(a, 1); } +inline epu8 shifted_right(epu8 a) { return simde_mm_bslli_si128(a, 1); } /** Right shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_left(epu8 a) { return _mm_bsrli_si128(a, 1); } +inline epu8 shifted_left(epu8 a) { return simde_mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ inline epu8 reverted(epu8 a) { return permuted(a, epu8rev); } /** Vector min between two #HPCombi::epu8 0 */ -inline epu8 min(epu8 a, epu8 b) { return _mm_min_epu8(a, b); } +inline epu8 min(epu8 a, epu8 b) { return simde_mm_min_epu8(a, b); } /** Vector max between two #HPCombi::epu8 0 */ -inline epu8 max(epu8 a, epu8 b) { return _mm_max_epu8(a, b); } +inline epu8 max(epu8 a, epu8 b) { return simde_mm_max_epu8(a, b); } /** Testing if a #HPCombi::epu8 is sorted */ inline bool is_sorted(epu8 a); @@ -546,11 +550,13 @@ inline epu8 eval16(epu8 v) { return eval16_cycle(v); }; * Reference @f$O(n)@f$ algorithm using loop and indexed access */ inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16); +#ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_first_diff * @par Algorithm: * Using \c cmpestri instruction */ inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +#endif /** @copydoc common_first_diff * @par Algorithm: * Using vector comparison and mask @@ -584,11 +590,13 @@ inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { * Reference @f$O(n)@f$ algorithm using loop and indexed access */ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16); +#ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_last_diff * @par Algorithm: * Using \c cmpestri instruction */ inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +#endif /** @copydoc common_last_diff * @par Algorithm: * Using vector comparison and mask diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index c085abff..43784b3d 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -24,18 +24,18 @@ // Comparison mode for _mm_cmpestri #define FIRST_DIFF \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | SIMDE_SIDD_NEGATIVE_POLARITY) #define LAST_DIFF \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY | \ - _SIDD_MOST_SIGNIFICANT) -#define FIRST_ZERO (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | SIMDE_SIDD_NEGATIVE_POLARITY | \ + SIMDE_SIDD_MOST_SIGNIFICANT) +#define FIRST_ZERO (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY) #define LAST_ZERO \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_MOST_SIGNIFICANT) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MOST_SIGNIFICANT) #define FIRST_NON_ZERO \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_MASKED_NEGATIVE_POLARITY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MASKED_NEGATIVE_POLARITY) #define LAST_NON_ZERO \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_MASKED_NEGATIVE_POLARITY | \ - _SIDD_MOST_SIGNIFICANT) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | \ + SIMDE_SIDD_MOST_SIGNIFICANT) namespace HPCombi { @@ -45,11 +45,11 @@ namespace HPCombi { // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { - uint64_t res = _mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); return res == 0 ? 16 : _bit_scan_forward(res); } inline uint64_t last_mask(epu8 msk, size_t bound) { - auto res = _mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + auto res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); return res == 0 ? 16 : _bit_scan_reverse(res); } @@ -59,9 +59,11 @@ inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) { return i; return 16; } +#ifdef SIMDE_X86_SSE4_2_NATIVE inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) { return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF)); } +#endif inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) { return first_mask(a != b, bound); } @@ -74,9 +76,11 @@ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) { } return 16; } +#ifdef SIMDE_X86_SSE4_2_NATIVE inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) { return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF)); } +#endif inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) { return last_mask(a != b, bound); } @@ -114,7 +118,7 @@ inline epu8 network_sort(epu8 res, std::array rounds) { epu8 mask = Increassing ? round < epu8id : epu8id < round; epu8 b = permuted(res, round); // res = mask ? min(res,b) : max(res,b); is not accepted by clang - res = _mm_blendv_epi8(min(res, b), max(res, b), mask); + res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask); } return res; } @@ -127,9 +131,9 @@ inline epu8 network_sort_perm(epu8 &v, std::array rounds) { // This conditional should be optimized out by the compiler epu8 mask = Increassing ? round < epu8id : epu8id < round; epu8 b = permuted(v, round); - epu8 cmp = _mm_blendv_epi8(b < v, v < b, mask); - v = _mm_blendv_epi8(v, b, cmp); - res = _mm_blendv_epi8(res, permuted(res, round), cmp); + epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask); + v = simde_mm_blendv_epi8(v, b, cmp); + res = simde_mm_blendv_epi8(res, permuted(res, round), cmp); } return res; } @@ -178,7 +182,7 @@ constexpr std::array sorting_rounds8 // clang-format on inline bool is_sorted(epu8 a) { - return _mm_movemask_epi8(shifted_right(a) > a) == 0; + return simde_mm_movemask_epi8(shifted_right(a) > a) == 0; } inline epu8 sorted(epu8 a) { return network_sort(a, sorting_rounds); @@ -215,7 +219,7 @@ inline epu8 random_epu8(uint16_t bnd) { inline epu8 remove_dups(epu8 v, uint8_t repl) { // Vector ternary operator is not supported by clang. // return (v != shifted_right(v) ? v : Epu8(repl); - return _mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v)); + return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v)); } // Gather at the front numbers with (3-i)-th bit not set. @@ -229,12 +233,13 @@ constexpr std::array inverting_rounds {{ }}; #define FIND_IN_VECT \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_UNIT_MASK | \ - _SIDD_NEGATIVE_POLARITY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \ + SIMDE_SIDD_NEGATIVE_POLARITY) #define FIND_IN_VECT_COMPL \ - (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_UNIT_MASK) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) inline epu8 permutation_of(epu8 a, epu8 b) { +#ifdef SIMDE_X86_SSE4_2_NATIVE epu8 res = -static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); for (epu8 round : inverting_rounds) { a = permuted(a, round); @@ -242,6 +247,8 @@ inline epu8 permutation_of(epu8 a, epu8 b) { res -= static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); } return res; +#else +#endif } @@ -404,7 +411,7 @@ inline epu8 eval16_cycle(epu8 v) { inline epu8 eval16_popcount(epu8 v) { epu8 res{}; for (size_t i = 0; i < 16; i++) { - res[i] = __builtin_popcountl(_mm_movemask_epi8(v == Epu8(uint8_t(i)))); + res[i] = __builtin_popcountl(simde_mm_movemask_epi8(v == Epu8(uint8_t(i)))); } return res; } @@ -419,13 +426,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) - return (_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) + return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } inline bool is_transformation(epu8 v, const size_t k) { uint64_t diff = last_diff(v, epu8id, 16); - return (_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) + return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } @@ -434,8 +441,8 @@ inline bool is_partial_permutation(epu8 v, const size_t k) { // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 // (v = Perm16::one() or last diff index < 16) - return (_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) - && (_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) + return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) + && (simde_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) && (diff == 16 || diff < k); } @@ -444,9 +451,12 @@ inline bool is_permutation(epu8 v, const size_t k) { // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and // (v = Perm16::one() or last diff index < 16) +#ifdef SIMDE_X86_SSE4_2_NATIVE return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 && _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 && (diff == 16 || diff < k); +#else +#endif } } // namespace HPCombi @@ -475,13 +485,13 @@ template <> struct not_equal_to { template <> struct hash { inline size_t operator()(HPCombi::epu8 a) const { - unsigned __int128 v0 = _mm_extract_epi64(a, 0); - unsigned __int128 v1 = _mm_extract_epi64(a, 1); + unsigned __int128 v0 = simde_mm_extract_epi64(a, 0); + unsigned __int128 v1 = simde_mm_extract_epi64(a, 1); return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64; /* The following is extremely slow on Renner benchmark - uint64_t v0 = _mm_extract_epi64(ar.v, 0); - uint64_t v1 = _mm_extract_epi64(ar.v, 1); + uint64_t v0 = simde_mm_extract_epi64(ar.v, 0); + uint64_t v1 = simde_mm_extract_epi64(ar.v, 1); size_t seed = v0 + 0x9e3779b9; seed ^= v1 + 0x9e3779b9 + (seed<<6) + (seed>>2); return seed; diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 5ea3af2a..19c25563 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -50,18 +50,21 @@ inline epu8 PTransf16::domain_mask(bool complement) const { return complement ? v == Epu8(0xFF) : v != Epu8(0xFF); } inline uint32_t PTransf16::domain_bitset(bool complement) const { - return _mm_movemask_epi8(domain_mask(complement)); + return simde_mm_movemask_epi8(domain_mask(complement)); } inline PTransf16 PTransf16::right_one() const { return domain_mask(true) | epu8id; } inline epu8 PTransf16::image_mask(bool complement) const { +#ifdef SIMDE_X86_SSE4_2_NATIVE return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT) : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL); +#else +#endif } inline uint32_t PTransf16::image_bitset(bool complement) const { - return _mm_movemask_epi8(image_mask(complement)); + return simde_mm_movemask_epi8(image_mask(complement)); } inline PTransf16 PTransf16::left_one() const { return image_mask(true) | epu8id; @@ -83,7 +86,7 @@ inline epu8 PTransf16::fix_points_mask(bool complement) const { return complement ? v != one().v : v == one().v; } inline uint32_t PTransf16::fix_points_bitset(bool complement) const { - return _mm_movemask_epi8(fix_points_mask(complement)); + return simde_mm_movemask_epi8(fix_points_mask(complement)); } inline uint8_t PTransf16::smallest_fix_point() const { @@ -120,14 +123,14 @@ inline static HPCOMBI_CONSTEXPR uint8_t hilo_mask_fun(uint8_t i) { static HPCOMBI_CONSTEXPR epu8 hilo_mask = Epu8(hilo_mask_fun); inline Transf16::Transf16(uint64_t compressed) { - epu8 res = _mm_set_epi64x(compressed, compressed); - v = _mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask); + epu8 res = simde_mm_set_epi64x(compressed, compressed); + v = simde_mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask); } inline Transf16::operator uint64_t() const { - epu8 res = static_cast(_mm_slli_epi32(v, 4)); + epu8 res = static_cast(simde_mm_slli_epi32(v, 4)); res = HPCombi::permuted(res, hilo_exchng) + v; - return _mm_extract_epi64(res, 0); + return simde_mm_extract_epi64(res, 0); } inline PPerm16 PPerm16::inverse_ref() const { @@ -139,8 +142,11 @@ inline PPerm16 PPerm16::inverse_ref() const { } inline PPerm16 PPerm16::inverse_find() const { +#ifdef SIMDE_X86_SSE4_2_NATIVE epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT); return permutation_of(v, one()) | mask; +#else +#endif } inline Perm16 Perm16::random(uint64_t n) { @@ -207,7 +213,7 @@ inline Perm16 Perm16::inverse_sort() const { // G++-7 compile this shift by 3 additions. // epu8 res = (v << 4) + one().v; // I call directly the shift intrinsic - epu8 res = static_cast(_mm_slli_epi32(v, 4)) + one().v; + epu8 res = static_cast(simde_mm_slli_epi32(v, 4)) + one().v; res = sorted(res) & Epu8(0x0F); return res; } @@ -230,7 +236,7 @@ inline Perm16 Perm16::inverse_cycl() const { for (int i = 9; i <= 16; i++) { Perm16 oldpow = newpow; newpow = oldpow * *this; - res.v = _mm_blendv_epi8(res, oldpow, newpow.v == one().v); + res.v = simde_mm_blendv_epi8(res, oldpow, newpow.v == one().v); } return res; } @@ -307,7 +313,7 @@ inline uint8_t Perm16::nb_descents_ref() const { return res; } inline uint8_t Perm16::nb_descents() const { - return __builtin_popcountl(_mm_movemask_epi8(v < shifted_right(v))); + return __builtin_popcountl(simde_mm_movemask_epi8(v < shifted_right(v))); } inline uint8_t Perm16::nb_cycles_ref() const { @@ -326,19 +332,19 @@ inline uint8_t Perm16::nb_cycles_ref() const { inline epu8 Perm16::cycles_partition() const { epu8 x0, x1 = one(); Perm16 p = *this; - x0 = _mm_min_epi8(x1, HPCombi::permuted(x1, p)); + x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p)); p = p * p; - x1 = _mm_min_epi8(x0, HPCombi::permuted(x0, p)); + x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p)); p = p * p; - x0 = _mm_min_epi8(x1, HPCombi::permuted(x1, p)); + x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p)); p = p * p; - x1 = _mm_min_epi8(x0, HPCombi::permuted(x0, p)); + x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p)); return x1; } inline uint8_t Perm16::nb_cycles_unroll() const { epu8 res = (epu8id == cycles_partition()); - return __builtin_popcountl(_mm_movemask_epi8(res)); + return __builtin_popcountl(simde_mm_movemask_epi8(res)); } inline bool Perm16::left_weak_leq_ref(Perm16 other) const { @@ -356,8 +362,8 @@ inline bool Perm16::left_weak_leq(Perm16 other) const { for (size_t i = 0; i < 15; i++) { srot = shifted_right(srot); orot = shifted_right(orot); - uint64_t sinv = _mm_movemask_epi8(v < srot); - uint64_t oinv = _mm_movemask_epi8(other.v < orot); + uint64_t sinv = simde_mm_movemask_epi8(v < srot); + uint64_t oinv = simde_mm_movemask_epi8(other.v < orot); if ((sinv & oinv) != sinv) return false; } From c95624bf3c95f16924f46e50130866f38ffbd433 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 14:27:35 +0100 Subject: [PATCH 004/113] Finish switching to SIMDe --- CMakeLists.txt | 56 +++++++++--------- include/bmat8_impl.hpp | 123 +++++++++++++++++++++------------------- include/epu.hpp | 4 -- include/epu_impl.hpp | 13 +++-- include/perm16.hpp | 4 +- include/perm16_impl.hpp | 13 +++-- 6 files changed, 109 insertions(+), 104 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6c93568..e775affe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,34 +84,34 @@ check_cxx_compiler_flag('-mavx' HPCOMBI_HAVE_FLAG_AVX) #check_cxx_compiler_flag('-mavx512bw' HPCOMBI_HAVE_FLAG_AVX512BW) ## Check for Intel intrisics -check_include_file_cxx("x86intrin.h" HPCOMBI_HAVE_X86INTRIN) -if (NOT ${HPCOMBI_HAVE_FLAG_AVX} OR NOT ${HPCOMBI_HAVE_X86INTRIN}) - message(FATAL_ERROR "No SSE/AVX compiler intrinsics") -endif() -file(READ ${CMAKE_SOURCE_DIR}/list_intrin.txt hpcombi_intrinsics) -string(REPLACE ";" "|" hpcombi_intrinsics "${hpcombi_intrinsics}") -string(REPLACE "\n" ";" hpcombi_intrinsics "${hpcombi_intrinsics}") -foreach (intrin ${hpcombi_intrinsics}) - if ("${intrin}" MATCHES "^#" ) # Comment - continue() - endif() - string(REPLACE "|" ";" intrin "${intrin}") - list(GET intrin 0 intrin_name) - list(GET intrin 1 intrin_params) - set(CMAKE_REQUIRED_FLAGS "-mavx") - check_cxx_source_compiles(" - #include - int main() { - ${intrin_name}(${intrin_params}); - return 0; - } - " - "HPCOMBI_HAVE_${intrin_name}" - ) - if (NOT "${HPCOMBI_HAVE_${intrin_name}}") - message(FATAL_ERROR "Intrinsic ${intrin_name} not supported by compiler") - endif() -endforeach() +# check_include_file_cxx("x86intrin.h" HPCOMBI_HAVE_X86INTRIN) +# if (NOT ${HPCOMBI_HAVE_FLAG_AVX} OR NOT ${HPCOMBI_HAVE_X86INTRIN}) +# message(FATAL_ERROR "No SSE/AVX compiler intrinsics") +# endif() +# file(READ ${CMAKE_SOURCE_DIR}/list_intrin.txt hpcombi_intrinsics) +# string(REPLACE ";" "|" hpcombi_intrinsics "${hpcombi_intrinsics}") +# string(REPLACE "\n" ";" hpcombi_intrinsics "${hpcombi_intrinsics}") +# foreach (intrin ${hpcombi_intrinsics}) +# if ("${intrin}" MATCHES "^#" ) # Comment +# continue() +# endif() +# string(REPLACE "|" ";" intrin "${intrin}") +# list(GET intrin 0 intrin_name) +# list(GET intrin 1 intrin_params) +# set(CMAKE_REQUIRED_FLAGS "-mavx") +# check_cxx_source_compiles(" +# #include +# int main() { +# ${intrin_name}(${intrin_params}); +# return 0; +# } +# " +# "HPCOMBI_HAVE_${intrin_name}" +# ) +# if (NOT "${HPCOMBI_HAVE_${intrin_name}}") +# message(FATAL_ERROR "Intrinsic ${intrin_name} not supported by compiler") +# endif() +# endforeach() add_compile_options(-mavx -mtune=native -funroll-loops -flax-vector-conversions) diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 85fed966..913e94f2 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -156,40 +156,42 @@ inline BMat8 BMat8::transpose() const { return BMat8(x); } - inline BMat8 BMat8::transpose_mask() const { - epu8 x = _mm_set_epi64x(_data, _data << 1); - uint64_t res = _mm_movemask_epi8(x); + epu8 x = simde_mm_set_epi64x(_data, _data << 1); + uint64_t res = simde_mm_movemask_epi8(x); x = x << Epu8(2); - res = res << 16 | _mm_movemask_epi8(x); + res = res << 16 | simde_mm_movemask_epi8(x); x = x << Epu8(2); - res = res << 16 | _mm_movemask_epi8(x); + res = res << 16 | simde_mm_movemask_epi8(x); x = x << Epu8(2); - res = res << 16 | _mm_movemask_epi8(x); + res = res << 16 | simde_mm_movemask_epi8(x); return BMat8(res); } inline BMat8 BMat8::transpose_maskd() const { - uint64_t res = _mm_movemask_epi8(_mm_set_epi64x(_data, _data << 1)); - res = res << 16 | _mm_movemask_epi8(_mm_set_epi64x(_data << 2, _data << 3)); - res = res << 16 | _mm_movemask_epi8(_mm_set_epi64x(_data << 4, _data << 5)); - res = res << 16 | _mm_movemask_epi8(_mm_set_epi64x(_data << 6, _data << 7)); + uint64_t res = + simde_mm_movemask_epi8(simde_mm_set_epi64x(_data, _data << 1)); + res = res << 16 | + simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 2, _data << 3)); + res = res << 16 | + simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 4, _data << 5)); + res = res << 16 | + simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 6, _data << 7)); return BMat8(res); } - -using epu64 = uint64_t __attribute__ ((__vector_size__ (16), __may_alias__)); +using epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__)); inline void BMat8::transpose2(BMat8 &a, BMat8 &b) { - epu64 x = _mm_set_epi64x(a._data, b._data); - epu64 y = (x ^ (x >> 7)) & (epu64 {0xAA00AA00AA00AA, 0xAA00AA00AA00AA}); + epu64 x = simde_mm_set_epi64x(a._data, b._data); + epu64 y = (x ^ (x >> 7)) & (epu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA}); x = x ^ y ^ (y << 7); - y = (x ^ (x >> 14)) & (epu64 {0xCCCC0000CCCC, 0xCCCC0000CCCC}); + y = (x ^ (x >> 14)) & (epu64{0xCCCC0000CCCC, 0xCCCC0000CCCC}); x = x ^ y ^ (y << 14); - y = (x ^ (x >> 28)) & (epu64 {0xF0F0F0F0, 0xF0F0F0F0}); + y = (x ^ (x >> 28)) & (epu64{0xF0F0F0F0, 0xF0F0F0F0}); x = x ^ y ^ (y << 28); - a._data = _mm_extract_epi64(x, 1); - b._data = _mm_extract_epi64(x, 0); + a._data = simde_mm_extract_epi64(x, 1); + b._data = simde_mm_extract_epi64(x, 0); } static constexpr epu8 rotlow { 7, 0, 1, 2, 3, 4, 5, 6}; @@ -201,24 +203,26 @@ static constexpr epu8 rot2 { 6, 7, 0, 1, 2, 3, 4, 5,14,15, 8, 9,10,11,12,13}; inline BMat8 BMat8::mult_transpose(BMat8 const &that) const { - epu8 x = _mm_set_epi64x(_data, _data); - epu8 y = _mm_shuffle_epi8(_mm_set_epi64x(that._data, that._data), rothigh); - epu8 data {}; - epu8 diag {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, - 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40}; + epu8 x = simde_mm_set_epi64x(_data, _data); + epu8 y = simde_mm_shuffle_epi8(simde_mm_set_epi64x(that._data, that._data), + rothigh); + epu8 data{}; + epu8 diag{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, + 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40}; for (int i = 0; i < 4; ++i) { data |= ((x & y) != epu8{}) & diag; - y = _mm_shuffle_epi8(y, rot2); - diag = _mm_shuffle_epi8(diag, rot2); + y = simde_mm_shuffle_epi8(y, rot2); + diag = simde_mm_shuffle_epi8(diag, rot2); } - return BMat8(_mm_extract_epi64(data, 0) | _mm_extract_epi64(data, 1)); + return BMat8(simde_mm_extract_epi64(data, 0) | + simde_mm_extract_epi64(data, 1)); } inline epu8 BMat8::row_space_basis_internal() const { - epu8 res = remove_dups(revsorted8(_mm_set_epi64x(0, _data))); + epu8 res = remove_dups(revsorted8(simde_mm_set_epi64x(0, _data))); epu8 rescy = res; // We now compute the union of all the included different rows - epu8 orincl {}; + epu8 orincl{}; for (int i = 0; i < 7; i++) { rescy = permuted(rescy, rotlow); orincl |= ((rescy | res) == res) & rescy; @@ -228,7 +232,8 @@ inline epu8 BMat8::row_space_basis_internal() const { } inline BMat8 BMat8::row_space_basis() const { - return BMat8(_mm_extract_epi64(sorted8(row_space_basis_internal()), 0)); + return BMat8( + simde_mm_extract_epi64(sorted8(row_space_basis_internal()), 0)); } #if defined(FF) @@ -245,45 +250,45 @@ constexpr std::array masks {{ }}; #undef FF -static const epu8 bound08 = _mm_slli_epi32(epu8id, 3); // shift for *8 -static const epu8 bound18 = bound08 + Epu8(0x80); static const epu8 shiftres {1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80}; inline void update_bitset(epu8 block, epu8 &set0, epu8 &set1) { + static const epu8 bound08 = simde_mm_slli_epi32(epu8id, 3); // shift for *8 +static const epu8 bound18 = bound08 + Epu8(0x80); for (size_t slice8 = 0; slice8 < 16; slice8++) { epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */ - epu8 shft = _mm_shuffle_epi8(shiftres, block - bm5); + epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5); set0 |= (bm5 == bound08) & shft; set1 |= (bm5 == bound18) & shft; - block = _mm_shuffle_epi8(block, right_cycle); + block = simde_mm_shuffle_epi8(block, right_cycle); } } inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { - epu8 in = _mm_set_epi64x(0, _data); + epu8 in = simde_mm_set_epi64x(0, _data); epu8 block0 {}, block1 {}; for (epu8 m : masks) { - block0 |= static_cast(_mm_shuffle_epi8(in, m)); - block1 |= static_cast(_mm_shuffle_epi8(in, m | Epu8(4))); + block0 |= static_cast(simde_mm_shuffle_epi8(in, m)); + block1 |= static_cast(simde_mm_shuffle_epi8(in, m | Epu8(4))); } res0 = epu8 {}; res1 = epu8 {}; for (size_t r = 0; r < 16; r++) { update_bitset(block0 | block1, res0, res1); - block1 = _mm_shuffle_epi8(block1, right_cycle); + block1 = simde_mm_shuffle_epi8(block1, right_cycle); } } inline uint64_t BMat8::row_space_size_bitset() const { epu8 res0 {}, res1 {}; row_space_bitset(res0, res1); - return (__builtin_popcountll(_mm_extract_epi64(res0, 0)) + - __builtin_popcountll(_mm_extract_epi64(res1, 0)) + - __builtin_popcountll(_mm_extract_epi64(res0, 1)) + - __builtin_popcountll(_mm_extract_epi64(res1, 1))); + return (__builtin_popcountll(simde_mm_extract_epi64(res0, 0)) + + __builtin_popcountll(simde_mm_extract_epi64(res1, 0)) + + __builtin_popcountll(simde_mm_extract_epi64(res0, 1)) + + __builtin_popcountll(simde_mm_extract_epi64(res1, 1))); } inline uint64_t BMat8::row_space_size_incl1() const { - epu8 in = _mm_set_epi64x(_data, _data); + epu8 in = simde_mm_set_epi64x(_data, _data); epu8 block = epu8id; uint64_t res = 0; for (size_t r = 0; r < 16; r++) { @@ -292,14 +297,14 @@ inline uint64_t BMat8::row_space_size_incl1() const { orincl |= ((in | block) == block) & in; in = permuted(in, rotboth); } - res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; } inline uint64_t BMat8::row_space_size_incl() const { - epu8 in = _mm_set_epi64x(_data, _data); + epu8 in = simde_mm_set_epi64x(_data, _data); epu8 block = epu8id; uint64_t res = 0; for (size_t r = 0; r < 16; r++) { @@ -308,7 +313,7 @@ inline uint64_t BMat8::row_space_size_incl() const { in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } - res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; @@ -322,8 +327,8 @@ inline bool BMat8::row_space_included_bitset(BMat8 other) const { } inline bool BMat8::row_space_included(BMat8 other) const { - epu8 in = _mm_set_epi64x(0, other._data); - epu8 block = _mm_set_epi64x(0, _data); + epu8 in = simde_mm_set_epi64x(0, other._data); + epu8 block = simde_mm_set_epi64x(0, _data); epu8 orincl = ((in | block) == block) & in; for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotlow); @@ -333,7 +338,7 @@ inline bool BMat8::row_space_included(BMat8 other) const { } inline epu8 BMat8::row_space_mask(epu8 block) const { - epu8 in = _mm_set_epi64x(_data, _data); + epu8 in = simde_mm_set_epi64x(_data, _data); epu8 orincl = ((in | block) == block) & in; for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotboth); @@ -344,16 +349,16 @@ inline epu8 BMat8::row_space_mask(epu8 block) const { inline std::pair BMat8::row_space_included2(BMat8 a0, BMat8 b0, BMat8 a1, BMat8 b1) { - epu8 in = _mm_set_epi64x(b1._data, b0._data); - epu8 block = _mm_set_epi64x(a1._data, a0._data); + epu8 in = simde_mm_set_epi64x(b1._data, b0._data); + epu8 block = simde_mm_set_epi64x(a1._data, a0._data); epu8 orincl = ((in | block) == block) & in; for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } epu8 res = (block == orincl); - return std::make_pair(_mm_extract_epi64(res, 0) == -1, - _mm_extract_epi64(res, 1) == -1); + return std::make_pair(simde_mm_extract_epi64(res, 0) == -1, + simde_mm_extract_epi64(res, 1) == -1); } inline std::bitset<256> BMat8::row_space_bitset_ref() const { @@ -398,18 +403,18 @@ inline std::vector BMat8::rows() const { } inline size_t BMat8::nr_rows() const { - epu8 x = _mm_set_epi64x(_data, 0); - return __builtin_popcountll(_mm_movemask_epi8(x != epu8 {})); + epu8 x = simde_mm_set_epi64x(_data, 0); + return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8 {})); } static HPCOMBI_CONSTEXPR epu8 rev8 {7,6,5,4,3,2,1,0,8,9,10,11,12,13,14,15}; inline BMat8 BMat8::row_permuted(Perm16 p) const { - epu8 x = _mm_set_epi64x(0, _data); + epu8 x = simde_mm_set_epi64x(0, _data); x = permuted(x, rev8); x = permuted(x, p); x = permuted(x, rev8); - return BMat8(_mm_extract_epi64(x, 0)); + return BMat8(simde_mm_extract_epi64(x, 0)); } inline BMat8 BMat8::col_permuted(Perm16 p) const { return transpose().row_permuted(p).transpose(); @@ -445,11 +450,11 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { } inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const { - epu8 x = permuted(_mm_set_epi64x(_data, 0), epu8rev); - epu8 y = permuted(_mm_set_epi64x((*this * other)._data, 0), epu8rev); + epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev); + epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev); // Vector ternary operator is not supported by clang. // return (x != (epu8 {})) ? permutation_of(y, x) : epu8id; - return _mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8 {}); + return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8 {}); } diff --git a/include/epu.hpp b/include/epu.hpp index 87f807fe..d4fe20d6 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -65,10 +65,6 @@ static_assert(alignof(epu8) == 16, /// SIMD vector of 32 unsigned bytes using xpu8 = uint8_t __attribute__((vector_size(32))); -static_assert(alignof(xpu8) == 32, - "xpu8 type is not properly aligned by the compiler !"); - - namespace { // Implementation detail code /// A handmade C++11 constexpr lambda diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 43784b3d..ad1128fd 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -46,11 +46,11 @@ namespace HPCombi { // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); - return res == 0 ? 16 : _bit_scan_forward(res); + return res == 0 ? 16 : (__builtin_ffsll(res) - 1); } inline uint64_t last_mask(epu8 msk, size_t bound) { auto res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); - return res == 0 ? 16 : _bit_scan_reverse(res); + return res == 0 ? 16 : (63 - __builtin_clzll(res)); } inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) { @@ -452,10 +452,11 @@ inline bool is_permutation(epu8 v, const size_t k) { // (forall x in Perm16::one(), x in v) and // (v = Perm16::one() or last diff index < 16) #ifdef SIMDE_X86_SSE4_2_NATIVE - return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 - && _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 - && (diff == 16 || diff < k); + return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 && + _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 && + (diff == 16 || diff < k); #else + return equal(sorted(v), epu8id) && (diff == 16 || diff < k); #endif } @@ -505,7 +506,7 @@ template <> struct less { // 10% faster than calling the lexicographic comparison operator ! inline size_t operator()(const HPCombi::epu8 &v1, const HPCombi::epu8 &v2) const { - __m128 v1v = __m128(v1), v2v = __m128(v2); + simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2); return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0]; } }; diff --git a/include/perm16.hpp b/include/perm16.hpp index 3befac9a..6eb8216a 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -22,11 +22,13 @@ #include // less<> #include #include -#include #include "epu.hpp" #include "vect16.hpp" +#include "simde/x86/sse4.1.h" +#include "simde/x86/sse4.2.h" + namespace HPCombi { // Forward declaration diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 19c25563..535de8ef 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -91,22 +91,23 @@ inline uint32_t PTransf16::fix_points_bitset(bool complement) const { inline uint8_t PTransf16::smallest_fix_point() const { uint32_t res = fix_points_bitset(false); - return res == 0 ? 0xFF : _bit_scan_forward(res); + return res == 0 ? 0xFF : __builtin_ffsl(res) - 1; } /** Returns the smallest non fix point of \c *this */ inline uint8_t PTransf16::smallest_moved_point() const { uint32_t res = fix_points_bitset(true); - return res == 0 ? 0xFF : _bit_scan_forward(res); + return res == 0 ? 0xFF : __builtin_ffsl(res) - 1; } /** Returns the largest fix point of \c *this */ inline uint8_t PTransf16::largest_fix_point() const { - uint32_t res = fix_points_bitset(false);; - return res == 0 ? 0xFF : _bit_scan_reverse(res); + uint32_t res = fix_points_bitset(false); + + return res == 0 ? 0xFF : 15 - __builtin_ctzl(res); } /** Returns the largest non fix point of \c *this */ inline uint8_t PTransf16::largest_moved_point() const { - uint32_t res = fix_points_bitset(true);; - return res == 0 ? 0xFF : _bit_scan_reverse(res); + uint32_t res = fix_points_bitset(true); + return res == 0 ? 0xFF : 15 - __builtin_ctzl(res); } /** Returns the number of fix points of \c *this */ inline uint8_t PTransf16::nb_fix_points() const { From 4835ca9ebe6b4f3743675d759b1949dd76e62454 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 14:29:17 +0100 Subject: [PATCH 005/113] Switch to C++14 --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e775affe..bcfcc986 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,16 +35,16 @@ message(STATUS "**** Build type = ${CMAKE_BUILD_TYPE}") ################################ # General compiler configuration set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_STANDARD 11) -set(CMAKE_CXX_EXTENSIONS OFF) # -std=c++11 instead of -std=gnu++11 +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_EXTENSIONS OFF) # -std=c++14 instead of -std=gnu++14 add_definitions(-DHPCOMBI_HAVE_CONFIG) message(STATUS "*** Compiler id is ${CMAKE_CXX_COMPILER_ID}") if ( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) # Workaround of CMAKE bug https://stackoverflow.com/questions/47213356/ - set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} -std=c++11) - add_compile_options(-std=c++11 -Wall -g -pg) + set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} -std=c++14) + add_compile_options(-std=c++14 -Wall -g -pg) endif() From 06736d9e4b8f627ffddc33684295f730e1de88d2 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 13:52:12 +0100 Subject: [PATCH 006/113] Stop using deprecated std::random_shuffle --- include/perm16_impl.hpp | 6 +++++- include/perm_generic_impl.hpp | 8 +++++--- include/vect16.hpp | 3 ++- include/vect_generic.hpp | 6 +++++- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 535de8ef..0408fc4d 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -151,9 +151,13 @@ inline PPerm16 PPerm16::inverse_find() const { } inline Perm16 Perm16::random(uint64_t n) { + static std::random_device rd; + static std::mt19937 g(rd()); + Perm16 res = one(); auto ar = res.as_array(); - std::random_shuffle(ar.begin(), ar.begin() + n); + + std::shuffle(ar.begin(), ar.begin() + n, g); return res; } diff --git a/include/perm_generic_impl.hpp b/include/perm_generic_impl.hpp index 525f5de7..8eb37e54 100644 --- a/include/perm_generic_impl.hpp +++ b/include/perm_generic_impl.hpp @@ -42,10 +42,12 @@ PermGeneric<_Size, Expo>::inverse() const { } template -PermGeneric<_Size, Expo> -PermGeneric<_Size, Expo>::random() { +PermGeneric<_Size, Expo> PermGeneric<_Size, Expo>::random() { + static std::random_device rd; + static std::mt19937 g(rd()); + PermGeneric res{{}}; - std::random_shuffle(res.v.begin(), res.v.end()); + std::shuffle(res.v.begin(), res.v.end(), g); return res; } diff --git a/include/vect16.hpp b/include/vect16.hpp index 880e44a7..c660392d 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -16,9 +16,10 @@ #ifndef HPCOMBI_VECT16_HPP_INCLUDED #define HPCOMBI_VECT16_HPP_INCLUDED +#include + #include "epu.hpp" #include "perm16.hpp" // for is_permutation -#include namespace HPCombi { diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index cee524ec..e7496a80 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace HPCombi { @@ -114,8 +115,11 @@ template struct VectGeneric { } static VectGeneric random() { + static std::random_device rd; + static std::mt19937 g(rd()); + VectGeneric<_Size, Expo> res = VectGeneric<_Size, Expo>(0, 0); - std::random_shuffle(res.begin(), res.end()); + std::shuffle(res.begin(), res.end(), g); return res; } From 8abef2bdec9756d0e7db753a5f97e431f3013aa4 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 13:52:45 +0100 Subject: [PATCH 007/113] Move testtools.hpp into benchmark/ --- {include => benchmark}/testtools.hpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {include => benchmark}/testtools.hpp (100%) diff --git a/include/testtools.hpp b/benchmark/testtools.hpp similarity index 100% rename from include/testtools.hpp rename to benchmark/testtools.hpp From d5beea919617730d0c85ee1bc64e4d248f4b979d Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 13:55:32 +0100 Subject: [PATCH 008/113] tests: remove pointless test files --- tests/test_mincl.cpp | 27 --------------------------- tests/test_mincl0.cpp | 19 ------------------- tests/test_mincl1.cpp | 19 ------------------- tests/test_mincl_common.cpp | 35 ----------------------------------- 4 files changed, 100 deletions(-) delete mode 100644 tests/test_mincl.cpp delete mode 100644 tests/test_mincl0.cpp delete mode 100644 tests/test_mincl1.cpp delete mode 100644 tests/test_mincl_common.cpp diff --git a/tests/test_mincl.cpp b/tests/test_mincl.cpp deleted file mode 100644 index 2eb9fc87..00000000 --- a/tests/test_mincl.cpp +++ /dev/null @@ -1,27 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -#define BOOST_TEST_MODULE MultIncl_Test - -#include - -int foo0(); // in test_mincl0.cpp -int foo1(); // in test_mincl1.cpp - -BOOST_AUTO_TEST_SUITE(MultIncl) -BOOST_AUTO_TEST_CASE(MultInclFoo0) { BOOST_CHECK_EQUAL(foo0(), 0); } -BOOST_AUTO_TEST_CASE(MultInclFoo1) { BOOST_CHECK_EQUAL(foo1(), 1); } -BOOST_AUTO_TEST_SUITE_END() - diff --git a/tests/test_mincl0.cpp b/tests/test_mincl0.cpp deleted file mode 100644 index c4753ece..00000000 --- a/tests/test_mincl0.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -// We check that multiple inclusion of HPCombi works - -#define CONST_TO_BE_CHANGED 0 -#include "test_mincl_common.cpp" diff --git a/tests/test_mincl1.cpp b/tests/test_mincl1.cpp deleted file mode 100644 index 2ae4c041..00000000 --- a/tests/test_mincl1.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -// We check that multiple inclusion of HPCombi works - -#define CONST_TO_BE_CHANGED 1 -#include "test_mincl_common.cpp" diff --git a/tests/test_mincl_common.cpp b/tests/test_mincl_common.cpp deleted file mode 100644 index 56f74d5d..00000000 --- a/tests/test_mincl_common.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -// We check that multiple inclusion of HPCombi works - -#include "hpcombi.hpp" - -#define PPCAT_NX(A, B) A ## B -#define PPCAT(A, B) PPCAT_NX(A, B) - -int PPCAT(foo, CONST_TO_BE_CHANGED)() { - HPCombi::Perm16 res = HPCombi::Perm16::one(); - res = res * res; - res = res * res; - res = res * res; - HPCombi::epu8 rnd = HPCombi::random_epu8(255); - rnd = rnd + rnd; - HPCombi::BMat8 resb = HPCombi::BMat8::one(); - resb = resb * resb; - HPCombi::BMat8 rndb = HPCombi::BMat8::random(); - rndb = rndb * rndb; - return CONST_TO_BE_CHANGED; -} From 3a1843f558466ead4a1b8546dc9bf54d2bd5588e Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 11:09:30 +0200 Subject: [PATCH 009/113] tests: stop using boost tests --- etc/test-renumber.sh | 52 ++ tests/CMakeLists.txt | 32 +- tests/test_bmat8.cpp | 725 +++++++++++------------ tests/test_epu.cpp | 1245 +++++++++++++++++++-------------------- tests/test_main.cpp | 38 ++ tests/test_main.hpp | 42 ++ tests/test_perm16.cpp | 857 +++++++++++++-------------- tests/test_perm_all.cpp | 533 +++++++++-------- 8 files changed, 1788 insertions(+), 1736 deletions(-) create mode 100755 etc/test-renumber.sh create mode 100644 tests/test_main.cpp create mode 100644 tests/test_main.hpp diff --git a/etc/test-renumber.sh b/etc/test-renumber.sh new file mode 100755 index 00000000..dfea653e --- /dev/null +++ b/etc/test-renumber.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -e + +bold() { + printf "\033[1m%s\033[0m\n" "$*" +} + +re='^[0-9]+$' + +if [[ $# -ne 1 && $# -ne 2 ]]; then + bold "error expected 1 or 2 arguments, got $#!" + exit 1 +elif [[ ! -f $1 ]]; then + bold "Error, expected a file as first argument, $1 is not a file!" + exit 1 +elif [[ $# -eq 2 && ! $2 =~ $re ]]; then + bold "Error, expected a positive integer as second argument, $2 is not a positive integer!" + exit 1 +fi + +if [[ $# -eq 2 ]]; then + export START="$2" +else + export START=0 +fi + +FNAME="$1" python3 - < -#include -#include - -#include "epu.hpp" -#include "bmat8.hpp" #include +#include +#include -using namespace HPCombi; - -#define EPU8_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(equal, (p1)(p2)) -#define EPU8_NOT_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(boost::not2(equal), (p1)(p2)) - -#define TEST_AGREES(type, ref, fun, vct) \ - BOOST_FIXTURE_TEST_CASE(type##_agrees_##fun, Fix) { \ - for (type p : vct) BOOST_TEST(p.fun() == p.ref()); \ - } -#define TEST_EPU8_AGREES(type, ref, fun, vct) \ - BOOST_FIXTURE_TEST_CASE(type##_agrees_##fun, Fix) { \ - for (type p : vct) EPU8_EQUAL(p.fun(), p.ref()); \ - } - -#define TEST_AGREES2(type, ref, fun, vct) \ - BOOST_FIXTURE_TEST_CASE(type##_agrees_##fun, Fix) { \ - for (type p1 : vct) for (type p2 : vct) \ - BOOST_TEST(p1.fun(p2) == p1.ref(p2)); \ - } +#include "test_main.hpp" +#include -struct Fix { - Fix() : zero(0), one1(1), one2(0x201), - ones(0xffffffffffffffff), - bm({{0, 0, 0, 1, 0, 0, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {0, 1, 1, 1, 0, 1, 0, 1}, - {1, 1, 0, 1, 1, 1, 1, 1}, - {0, 0, 1, 0, 0, 1, 1, 1}, - {1, 1, 0, 0, 0, 0, 0, 1}, - {0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}}), - bm1({{0, 0, 0, 1, 0, 0, 1, 1}, - {0, 0, 1, 0, 0, 1, 0, 1}, - {1, 1, 0, 0, 1, 1, 0, 1}, - {1, 1, 0, 0, 0, 0, 0, 1}, - {0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 0, 1, 1, 1, 1, 1}, - {0, 1, 0, 1, 0, 1, 0, 1}, - {0, 1, 0, 0, 0, 0, 1, 0}}), - bmm1({{1, 1, 0, 1, 0, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}, - {1, 1, 0, 1, 1, 1, 1, 1}, - {0, 1, 1, 1, 0, 1, 1, 1}, - {0, 1, 1, 1, 0, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 1}}), - bm2({{1, 1}, {0, 1}}), bm2t({{1, 0}, {1, 1}}), - bm3({{0, 0, 0, 1, 0, 0, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {0, 1, 1, 1, 1, 1, 0, 1}, - {1, 1, 0, 1, 1, 1, 1, 1}, - {0, 0, 1, 0, 0, 1, 1, 1}, - {1, 1, 0, 0, 0, 0, 0, 1}, - {0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}}), - bm3t({{0, 1, 0, 1, 0, 1, 0, 0}, - {0, 1, 1, 1, 0, 1, 1, 1}, - {0, 1, 1, 0, 1, 0, 0, 1}, - {1, 1, 1, 1, 0, 0, 0, 1}, - {0, 1, 1, 1, 0, 0, 0, 1}, - {0, 1, 1, 1, 1, 0, 0, 0}, - {1, 0, 0, 1, 1, 0, 1, 1}, - {1, 1, 1, 1, 1, 1, 1, 0}}), - BMlist({zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t}) - { - BOOST_TEST_MESSAGE("setup fixture"); - } - ~Fix() { BOOST_TEST_MESSAGE("teardown fixture"); } +#include "bmat8.hpp" +#include "epu.hpp" +namespace HPCombi { +namespace { +struct BMat8Fixture { const BMat8 zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t; const std::vector BMlist; + BMat8Fixture() + : zero(0), one1(1), one2(0x201), ones(0xffffffffffffffff), + bm({{0, 0, 0, 1, 0, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {0, 1, 1, 1, 0, 1, 0, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 1, 0, 0, 1, 1, 1}, + {1, 1, 0, 0, 0, 0, 0, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}}), + bm1({{0, 0, 0, 1, 0, 0, 1, 1}, + {0, 0, 1, 0, 0, 1, 0, 1}, + {1, 1, 0, 0, 1, 1, 0, 1}, + {1, 1, 0, 0, 0, 0, 0, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 0, 1, 1, 1, 1, 1}, + {0, 1, 0, 1, 0, 1, 0, 1}, + {0, 1, 0, 0, 0, 0, 1, 0}}), + bmm1({{1, 1, 0, 1, 0, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {0, 1, 1, 1, 0, 1, 1, 1}, + {0, 1, 1, 1, 0, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1}}), + bm2({{1, 1}, {0, 1}}), bm2t({{1, 0}, {1, 1}}), + bm3({{0, 0, 0, 1, 0, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {0, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 1, 0, 0, 1, 1, 1}, + {1, 1, 0, 0, 0, 0, 0, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}}), + bm3t({{0, 1, 0, 1, 0, 1, 0, 0}, + {0, 1, 1, 1, 0, 1, 1, 1}, + {0, 1, 1, 0, 1, 0, 0, 1}, + {1, 1, 1, 1, 0, 0, 0, 1}, + {0, 1, 1, 1, 0, 0, 0, 1}, + {0, 1, 1, 1, 1, 0, 0, 0}, + {1, 0, 0, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0}}), + BMlist( + {zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t}) {} }; - +} // namespace //****************************************************************************// -BOOST_AUTO_TEST_SUITE(BMat8_test) //****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_one, Fix) { - BOOST_TEST(BMat8::one(0) == zero); - BOOST_TEST(BMat8::one(2) == BMat8( - {{1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}})); - BOOST_TEST(BMat8::one(5) == BMat8( - {{1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}})); - BOOST_TEST(BMat8::one(8) == BMat8::one()); + +TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { + REQUIRE(BMat8::one(0) == zero); + REQUIRE(BMat8::one(2) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}})); + REQUIRE(BMat8::one(5) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}})); + REQUIRE(BMat8::one(8) == BMat8::one()); } -BOOST_FIXTURE_TEST_CASE(BMat8_transpose, Fix) { - BOOST_TEST(zero.transpose() == zero); - BOOST_TEST(bm2.transpose() == bm2t); - BOOST_TEST(bm3.transpose() == bm3t); +TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose", "[BMat8][001]") { + + REQUIRE(zero.transpose() == zero); + REQUIRE(bm2.transpose() == bm2t); + REQUIRE(bm3.transpose() == bm3t); - for (auto m : BMlist) - BOOST_TEST(m.transpose().transpose() == m); + for (auto m : BMlist) { + REQUIRE(m.transpose().transpose() == m); + } } -TEST_AGREES(BMat8, transpose, transpose_mask, BMlist); -TEST_AGREES(BMat8, transpose, transpose_maskd, BMlist); +TEST_AGREES(BMat8Fixture, BMat8, transpose, transpose_mask, BMlist, + "[BMat8][002]"); + +TEST_AGREES(BMat8Fixture, BMat8, transpose, transpose_maskd, BMlist, + "[BMat8][003]"); -BOOST_FIXTURE_TEST_CASE(BMat8_transpose2, Fix) { +TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][004]") { for (auto a : BMlist) { for (auto b : BMlist) { BMat8 at = a, bt = b; BMat8::transpose2(at, bt); - BOOST_TEST(at == a.transpose()); - BOOST_TEST(bt == b.transpose()); + REQUIRE(at == a.transpose()); + REQUIRE(bt == b.transpose()); } } } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_mult, Fix) { +TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator*", "[BMat8][005]") { BMat8 tmp = bm * bm1; - BOOST_TEST(tmp == bmm1); - BOOST_TEST(tmp == bm * bm1); + REQUIRE(tmp == bmm1); + REQUIRE(tmp == bm * bm1); for (auto b : BMlist) { - BOOST_TEST(zero * b == zero); - BOOST_TEST(b * zero == zero); - BOOST_TEST(b * b.one() == b); - BOOST_TEST(b.one() * b == b); - BOOST_TEST((b * b) * (b * b) == b * b * b * b); + REQUIRE(zero * b == zero); + REQUIRE(b * zero == zero); + REQUIRE(b * b.one() == b); + REQUIRE(b.one() * b == b); + REQUIRE((b * b) * (b * b) == b * b * b * b); } - for (auto b1 : BMlist) - for (auto b2 : BMlist) - for (auto b3 : BMlist) - BOOST_TEST((b1 * b2) * b3 == b1 * (b2 * b3)); - + for (auto b1 : BMlist) { + for (auto b2 : BMlist) { + for (auto b3 : BMlist) { + REQUIRE((b1 * b2) * b3 == b1 * (b2 * b3)); + } + } + } } - -//****************************************************************************// -BOOST_AUTO_TEST_CASE(BMat8_random) { +TEST_CASE("BMat8::random", "[BMat8][006]") { for (size_t d = 1; d < 8; ++d) { BMat8 bm = BMat8::random(d); for (size_t i = d + 1; i < 8; ++i) { for (size_t j = 0; j < 8; ++j) { - BOOST_TEST(bm(i, j) == 0); - BOOST_TEST(bm(j, i) == 0); + REQUIRE(bm(i, j) == 0); + REQUIRE(bm(j, i) == 0); } } } } -//****************************************************************************// -BOOST_AUTO_TEST_CASE(BMat8_call_operator) { - std::vector> mat = {{0, 0, 0, 1, 0, 0, 1}, - {0, 1, 1, 1, 0, 1, 0}, - {1, 1, 0, 1, 1, 1, 1}, - {0, 0, 1, 0, 0, 1, 1}, - {1, 1, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 1}, - {0, 1, 1, 1, 1, 0, 1}}; - BMat8 bm(mat); - +TEST_CASE("BMat8::operator()", "[BMat8][007]") { + std::vector> mat = { + {0, 0, 0, 1, 0, 0, 1}, {0, 1, 1, 1, 0, 1, 0}, {1, 1, 0, 1, 1, 1, 1}, + {0, 0, 1, 0, 0, 1, 1}, {1, 1, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 1}, + {0, 1, 1, 1, 1, 0, 1}}; + BMat8 bm(mat); for (size_t i = 0; i < 7; ++i) { for (size_t j = 0; j < 7; ++j) { - BOOST_TEST(static_cast(bm(i, j)) == mat[i][j]); + REQUIRE(static_cast(bm(i, j)) == mat[i][j]); } } } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_operator_insert, Fix) { +TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][008]") { std::ostringstream oss; oss << bm3; - BOOST_TEST(oss.str() == - "00010011\n" - "11111101\n" - "01111101\n" - "11011111\n" - "00100111\n" - "11000001\n" - "01000011\n" - "01111010\n"); + REQUIRE(oss.str() == "00010011\n" + "11111101\n" + "01111101\n" + "11011111\n" + "00100111\n" + "11000001\n" + "01000011\n" + "01111010\n"); std::stringbuf buff; - std::ostream os(&buff); + std::ostream os(&buff); os << BMat8::random(); // Also does not do anything visible } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_set, Fix) { +TEST_CASE_METHOD(BMat8Fixture, "BMat8::set", "[BMat8][009]") { BMat8 bs; - bs = bm; bs.set(0, 0, 1); - BOOST_TEST(bs != bm); - bs = bm; bs.set(0, 0, 0); - BOOST_TEST(bs == bm); - bs = bm; bs.set(2, 4, 1); - BOOST_TEST(bs != bm); - BOOST_TEST(bs == bm3); + bs = bm; + bs.set(0, 0, 1); + REQUIRE(bs != bm); + bs = bm; + bs.set(0, 0, 0); + REQUIRE(bs == bm); + bs = bm; + bs.set(2, 4, 1); + REQUIRE(bs != bm); + REQUIRE(bs == bm3); for (size_t i = 0; i < 8; ++i) - for (size_t j = 0; j < 8; ++j) - bs.set(i, j, true); - BOOST_TEST(bs == ones); + for (size_t j = 0; j < 8; ++j) + bs.set(i, j, true); + REQUIRE(bs == ones); for (size_t i = 0; i < 8; ++i) - for (size_t j = 0; j < 8; ++j) - bs.set(i, j, false); - BOOST_TEST(bs == zero); + for (size_t j = 0; j < 8; ++j) + bs.set(i, j, false); + REQUIRE(bs == zero); } -//****************************************************************************// -BOOST_AUTO_TEST_CASE(BMat8_row_space_basis) { +TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") { BMat8 bm({{0, 1, 1, 1, 0, 1, 0, 1}, {0, 0, 0, 0, 0, 0, 0, 1}, {1, 1, 1, 1, 1, 1, 0, 1}, @@ -261,7 +235,7 @@ BOOST_AUTO_TEST_CASE(BMat8_row_space_basis) { {0, 0, 1, 0, 0, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 1}}); - BOOST_TEST(bm.row_space_basis() == bm2.row_space_basis()); + REQUIRE(bm.row_space_basis() == bm2.row_space_basis()); BMat8 bm3({{1, 1, 1, 1, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 0, 1}, @@ -281,28 +255,25 @@ BOOST_AUTO_TEST_CASE(BMat8_row_space_basis) { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}}); - BOOST_TEST(bm3.row_space_basis() == bm4); - BOOST_TEST(bm4.row_space_basis() == bm4); + REQUIRE(bm3.row_space_basis() == bm4); + REQUIRE(bm4.row_space_basis() == bm4); BMat8 bm5(0xff00000000000000); uint64_t data = 0xffffffffffffffff; for (size_t i = 0; i < 7; ++i) { - BOOST_TEST(BMat8(data).row_space_basis() == bm5); + REQUIRE(BMat8(data).row_space_basis() == bm5); data = data >> 8; } for (size_t i = 0; i < 1000; ++i) { bm = BMat8::random(); - BOOST_TEST(bm.row_space_basis().row_space_basis() == bm.row_space_basis()); + REQUIRE(bm.row_space_basis().row_space_basis() == bm.row_space_basis()); } } - -//****************************************************************************// -//****************************************************************************// -BOOST_AUTO_TEST_CASE(BMat8_col_space_basis) { +TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") { BMat8 bm({{0, 1, 1, 1, 0, 1, 0, 1}, {0, 0, 0, 0, 0, 0, 0, 1}, {1, 1, 1, 1, 1, 1, 0, 1}, @@ -321,7 +292,7 @@ BOOST_AUTO_TEST_CASE(BMat8_col_space_basis) { {1, 0, 1, 0, 0, 0, 0, 1}, {0, 0, 1, 1, 1, 0, 1, 1}}); - BOOST_TEST(bm.col_space_basis() == bm2); + REQUIRE(bm.col_space_basis() == bm2); BMat8 bm3({{1, 1, 1, 1, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 0, 1}, @@ -341,252 +312,242 @@ BOOST_AUTO_TEST_CASE(BMat8_col_space_basis) { {0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}}); - BOOST_TEST(bm3.col_space_basis() == bm4); + REQUIRE(bm3.col_space_basis() == bm4); uint64_t col = 0x8080808080808080; - BMat8 bm5(col); + BMat8 bm5(col); uint64_t data = 0xffffffffffffffff; for (size_t i = 0; i < 7; ++i) { - BOOST_TEST(BMat8(data).col_space_basis() == bm5); + REQUIRE(BMat8(data).col_space_basis() == bm5); data &= ~(col >> i); } for (size_t i = 0; i < 1000; ++i) { bm = BMat8::random(); - BOOST_TEST(bm.col_space_basis().col_space_basis() == bm.col_space_basis()); + REQUIRE(bm.col_space_basis().col_space_basis() == bm.col_space_basis()); } } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_row_space_size, Fix) { - BOOST_TEST(zero.row_space_size() == 1); - BOOST_TEST(one1.row_space_size() == 2); - BOOST_TEST(one2.row_space_size() == 4); - BOOST_TEST(BMat8::one().row_space_size() == 256); - BOOST_TEST(bm.row_space_size() == 22); - BOOST_TEST(bm1.row_space_size() == 31); - BOOST_TEST(bm2.row_space_size() == 3); - BOOST_TEST(bm2t.row_space_size() == 3); - BOOST_TEST(bm3.row_space_size() == 21); - BOOST_TEST(bm3t.row_space_size() == 21); - BOOST_TEST(bmm1.row_space_size() == 6); +TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") { + REQUIRE(zero.row_space_size() == 1); + REQUIRE(one1.row_space_size() == 2); + REQUIRE(one2.row_space_size() == 4); + REQUIRE(BMat8::one().row_space_size() == 256); + REQUIRE(bm.row_space_size() == 22); + REQUIRE(bm1.row_space_size() == 31); + REQUIRE(bm2.row_space_size() == 3); + REQUIRE(bm2t.row_space_size() == 3); + REQUIRE(bm3.row_space_size() == 21); + REQUIRE(bm3t.row_space_size() == 21); + REQUIRE(bmm1.row_space_size() == 6); } -TEST_AGREES(BMat8, row_space_size_ref, row_space_size, BMlist); -TEST_AGREES(BMat8, row_space_size_ref, row_space_size_incl, BMlist); -TEST_AGREES(BMat8, row_space_size_ref, row_space_size_incl1, BMlist); -TEST_AGREES(BMat8, row_space_size_ref, row_space_size_bitset, BMlist); -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_row_space_included, Fix) { - BOOST_TEST(zero.row_space_included(one1)); - BOOST_TEST(not one1.row_space_included(zero)); +TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size, BMlist, + "[BMat8][013]"); +TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_incl, + BMlist, "[BMat8][014]"); +TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_incl1, + BMlist, "[BMat8][015]"); +TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_bitset, + BMlist, "[BMat8][016]"); + +TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { + REQUIRE(zero.row_space_included(one1)); + REQUIRE_FALSE(one1.row_space_included(zero)); BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); - BOOST_TEST(m1.row_space_included(m2)); - BOOST_TEST(m2.row_space_included(m1)); + REQUIRE(m1.row_space_included(m2)); + REQUIRE(m2.row_space_included(m1)); BMat8 m3({{0, 0, 1}, {1, 0, 1}, {1, 1, 0}}); - BOOST_TEST(m1.row_space_included(m3)); - BOOST_TEST(m2.row_space_included(m3)); - BOOST_TEST(not m3.row_space_included(m1)); - BOOST_TEST(not m3.row_space_included(m1)); - - BOOST_TEST(m1.row_space_included(BMat8::one())); - BOOST_TEST(m2.row_space_included(BMat8::one())); - BOOST_TEST(m3.row_space_included(BMat8::one())); - + REQUIRE(m1.row_space_included(m3)); + REQUIRE(m2.row_space_included(m3)); + REQUIRE_FALSE(m3.row_space_included(m1)); + REQUIRE_FALSE(m3.row_space_included(m1)); + + REQUIRE(m1.row_space_included(BMat8::one())); + REQUIRE(m2.row_space_included(BMat8::one())); + REQUIRE(m3.row_space_included(BMat8::one())); } -TEST_AGREES2(BMat8, row_space_included, row_space_included_ref, BMlist); -TEST_AGREES2(BMat8, row_space_included, row_space_included_bitset, BMlist); -BOOST_FIXTURE_TEST_CASE(BMat8_row_space_included2, Fix) { - BMat8 a0 = BMat8::one(); BMat8 b0 = BMat8(0); - BMat8 a1 = BMat8(0); BMat8 b1 = BMat8::one(); +TEST_AGREES2(BMat8Fixture, BMat8, row_space_included, row_space_included_ref, + BMlist, "[BMat8][018]"); +TEST_AGREES2(BMat8Fixture, BMat8, row_space_included, row_space_included_bitset, + BMlist, "[BMat8][019]"); + +TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") { + BMat8 a0 = BMat8::one(); + BMat8 b0 = BMat8(0); + BMat8 a1 = BMat8(0); + BMat8 b1 = BMat8::one(); auto res = BMat8::row_space_included2(a0, b0, a1, b1); - BOOST_TEST(res.first == a0.row_space_included(b0)); - BOOST_TEST(res.second == a1.row_space_included(b1)); + REQUIRE(res.first == a0.row_space_included(b0)); + REQUIRE(res.second == a1.row_space_included(b1)); for (auto a0 : BMlist) { for (auto b0 : BMlist) { for (auto a1 : BMlist) { for (auto b1 : BMlist) { auto res = BMat8::row_space_included2(a0, b0, a1, b1); - BOOST_TEST(res.first == a0.row_space_included(b0)); - BOOST_TEST(res.second == a1.row_space_included(b1)); + REQUIRE(res.first == a0.row_space_included(b0)); + REQUIRE(res.second == a1.row_space_included(b1)); } } } } } - -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_row_permuted, Fix) { - BOOST_TEST(bm2.row_permuted(Perm16({1,0})) == - BMat8({{0,1},{1,1}})); - BOOST_TEST(bm2.row_permuted(Perm16({2,1,0})) == - BMat8({{0,0,0},{0,1,0},{1,1,0}})); - BOOST_TEST(bm.row_permuted(Perm16({5,3,1,4,2,0})) == - BMat8({{1, 1, 0, 0, 0, 0, 0, 1}, - {1, 1, 0, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {0, 0, 1, 0, 0, 1, 1, 1}, - {0, 1, 1, 1, 0, 1, 0, 1}, - {0, 0, 0, 1, 0, 0, 1, 1}, - {0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}})); - BOOST_TEST(BMat8::one().row_permuted(Perm16({5,3,1,4,2,0})) == - BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); +TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") { + REQUIRE(bm2.row_permuted(Perm16({1, 0})) == BMat8({{0, 1}, {1, 1}})); + REQUIRE(bm2.row_permuted(Perm16({2, 1, 0})) == + BMat8({{0, 0, 0}, {0, 1, 0}, {1, 1, 0}})); + REQUIRE(bm.row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + BMat8({{1, 1, 0, 0, 0, 0, 0, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 1, 0, 0, 1, 1, 1}, + {0, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 1, 0, 0, 1, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}})); + REQUIRE(BMat8::one().row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } -BOOST_FIXTURE_TEST_CASE(BMat8_col_permuted, Fix) { - BOOST_TEST(bm2.col_permuted(Perm16({1,0})) == - BMat8({{1,1},{1,0}})); - BOOST_TEST(bm2.col_permuted(Perm16({2,1,0})) == - BMat8({{0,1,1},{0,1,0},{0,0,0}})); - BOOST_TEST(bm.col_permuted( - Perm16({5, 3, 1, 4, 2, 0})) == - BMat8( {{0, 1, 0, 0, 0, 0, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {1, 1, 1, 0, 1, 0, 0, 1}, - {1, 1, 1, 1, 0, 1, 1, 1}, - {1, 0, 0, 0, 1, 0, 1, 1}, - {0, 0, 1, 0, 0, 1, 0, 1}, - {0, 0, 1, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}})); - BOOST_TEST(BMat8::one().col_permuted( - Perm16({4, 1, 3, 0, 2, 6, 5})) == - BMat8({ {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); +TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") { + REQUIRE(bm2.col_permuted(Perm16({1, 0})) == BMat8({{1, 1}, {1, 0}})); + REQUIRE(bm2.col_permuted(Perm16({2, 1, 0})) == + BMat8({{0, 1, 1}, {0, 1, 0}, {0, 0, 0}})); + REQUIRE(bm.col_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + BMat8({{0, 1, 0, 0, 0, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 1, 0, 1, 0, 0, 1}, + {1, 1, 1, 1, 0, 1, 1, 1}, + {1, 0, 0, 0, 1, 0, 1, 1}, + {0, 0, 1, 0, 0, 1, 0, 1}, + {0, 0, 1, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}})); + REQUIRE(BMat8::one().col_permuted(Perm16({4, 1, 3, 0, 2, 6, 5})) == + BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } -BOOST_FIXTURE_TEST_CASE(BMat8_row_permutation_matrix, Fix) { - BOOST_TEST(BMat8::row_permutation_matrix( - Perm16({1, 0})) == - BMat8({ {0, 1, 0, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); - BOOST_TEST(BMat8::row_permutation_matrix( - Perm16({1, 3, 4, 0, 2})) == - BMat8({ {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); - BOOST_TEST(BMat8::row_permutation_matrix( - Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({ {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); +TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { + REQUIRE(BMat8::row_permutation_matrix(Perm16({1, 0})) == + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); + REQUIRE(BMat8::row_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); + REQUIRE(BMat8::row_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } -BOOST_FIXTURE_TEST_CASE(BMat8_col_permutation_matrix, Fix) { - BOOST_TEST(BMat8::col_permutation_matrix( - Perm16({1, 0})) == - BMat8({ {0, 1, 0, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); - BOOST_TEST(BMat8::col_permutation_matrix( - Perm16({1, 3, 4, 0, 2})) == - BMat8({ {0, 0, 0, 1, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); - BOOST_TEST(BMat8::col_permutation_matrix( - Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({ {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); +TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { + REQUIRE(BMat8::col_permutation_matrix(Perm16({1, 0})) == + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); + REQUIRE(BMat8::col_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == + BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); + REQUIRE(BMat8::col_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_nr_rows, Fix) { - BOOST_TEST(zero.nr_rows() == 0); - BOOST_TEST(one1.nr_rows() == 1); - BOOST_TEST(one2.nr_rows() == 2); - BOOST_TEST(bm.nr_rows() == 8); - BOOST_TEST(BMat8({{1, 0, 1}, - {1, 1, 0}, - {0, 0, 0}}).nr_rows() == 2); +TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") { + REQUIRE(zero.nr_rows() == 0); + REQUIRE(one1.nr_rows() == 1); + REQUIRE(one2.nr_rows() == 2); + REQUIRE(bm.nr_rows() == 8); + REQUIRE(BMat8({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2); } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(BMat8_right_perm_action_on_basis_ref, Fix) { - BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); - BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); - - m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); - m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16::one()); - - m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); - m2 = BMat8({{0, 0, 0, 0}, {1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16::one()); - - m1 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,1}, {0,0,0,0}}); - m2 = BMat8({{1,0,0,1}, {0,0,1,0}, {0,1,0,0}, {0,0,0,1}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); - - m1 = BMat8({{0,0,0,1}, {1,0,0,0}, {0,0,1,0}, {0,1,0,0}}); - m2 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,0}, {0,0,0,1}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16({0,2,3,1})); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16({0,2,3,1})); - - - m1 = BMat8({{0,0,0,1}, {0,0,1,0}, {0,1,0,0}, {1,0,0,0}}); - m2 = BMat8({{0,1,0,0}, {0,0,0,1}, {1,0,0,0}, {0,0,1,0}}); - BOOST_TEST(m1.right_perm_action_on_basis_ref(m2) == Perm16({2,0,3,1})); - BOOST_TEST(m1.right_perm_action_on_basis(m2) == Perm16({2,0,3,1})); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// +// TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { +// BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); +// BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); +// +// m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); +// m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); +// +// m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); +// m2 = BMat8({{0, 0, 0, 0}, {1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); +// +// m1 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,1}, {0,0,0,0}}); +// m2 = BMat8({{1,0,0,1}, {0,0,1,0}, {0,1,0,0}, {0,0,0,1}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); +// +// m1 = BMat8({{0,0,0,1}, {1,0,0,0}, {0,0,1,0}, {0,1,0,0}}); +// m2 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,0}, {0,0,0,1}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0,2,3,1})); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0,2,3,1})); +// +// +// m1 = BMat8({{0,0,0,1}, {0,0,1,0}, {0,1,0,0}, {1,0,0,0}}); +// m2 = BMat8({{0,1,0,0}, {0,0,0,1}, {1,0,0,0}, {0,0,1,0}}); +// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2,0,3,1})); +// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2,0,3,1})); +// } + +} // namespace HPCombi diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index adef614a..5eb5e5e0 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -13,769 +13,706 @@ /* http://www.gnu.org/licenses/ */ /******************************************************************************/ -#define BOOST_TEST_MODULE EPUTests - -#include -#include +#include #include #include "epu.hpp" -#include -using namespace HPCombi; +#include "test_main.hpp" +#include -#define EPU8_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(equal, (p1)(p2)) -#define EPU8_NOT_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(boost::not2(equal), (p1)(p2)) - -#define TEST_AGREES(ref, fun) \ - BOOST_FIXTURE_TEST_CASE(EPU8_agrees_##fun, Fix) { \ - for (auto x : v) BOOST_TEST(fun(x) == ref(x)); \ - } -#define TEST_EPU8_AGREES(ref, fun) \ - BOOST_FIXTURE_TEST_CASE(EPU8_agrees_##fun, Fix) { \ - for (auto x : v) EPU8_EQUAL(fun(x), ref(x)); \ - } +// #define TEST_AGREES(ref, fun) \ +// BOOST_FIXTURE_TEST_CASE(Epu8::agrees_##fun, Fix) { \ +// for (auto x : v) \ +// REQUIRE(fun(x) == ref(x)); \ +// } +namespace HPCombi { struct Fix { - Fix() : zero(Epu8({}, 0)), P01(Epu8({0, 1}, 0)), - P10(Epu8({1, 0}, 0)), P11(Epu8({1, 1}, 0)), - P1(Epu8({}, 1)), - P112(Epu8({1, 1}, 2)), - Pa(epu8{1, 2, 3, 4, 0, 5, 6, 7, 8, 9,10,11,12,13,14,15}), - Pb(epu8{1, 2, 3, 6, 0, 5, 4, 7, 8, 9,10,11,12,15,14,13}), - RP(epu8{ 3, 1, 0,14,15,13, 5,10, 2,11, 6,12, 7, 4, 8, 9}), - Pa1(Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1)), - Pa2(Epu8({4, 2, 5, 1, 2, 9, 7, 3, 4, 2}, 1)), - P51(Epu8({5,1}, 6)), - Pv(epu8{ 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}), - Pw(epu8{ 5, 5, 2, 9, 1, 6,12, 4, 0, 4, 4, 4,12,13,14,15}), - P5(Epu8({}, 5)), - Pc(Epu8({23, 5, 21, 5, 43, 36}, 7)), - // Elements should be sorted in alphabetic order here - v({zero, P01, epu8id, P10, P11, P1, P112, Pa, Pb, RP, - Pa1, Pa2, P51, Pv, Pw, P5, epu8rev, Pc}), - av({{ 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}}) - { - BOOST_TEST_MESSAGE("setup fixture"); - } - ~Fix() { BOOST_TEST_MESSAGE("teardown fixture"); } - - const epu8 zero, P01, P10, P11, P1, P112, Pa, Pb, RP, - Pa1, Pa2, P51, Pv, Pw, P5, Pc; + Fix() + : zero(Epu8({}, 0)), P01(Epu8({0, 1}, 0)), P10(Epu8({1, 0}, 0)), + P11(Epu8({1, 1}, 0)), P1(Epu8({}, 1)), P112(Epu8({1, 1}, 2)), + Pa(epu8{1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + Pb(epu8{1, 2, 3, 6, 0, 5, 4, 7, 8, 9, 10, 11, 12, 15, 14, 13}), + RP(epu8{3, 1, 0, 14, 15, 13, 5, 10, 2, 11, 6, 12, 7, 4, 8, 9}), + Pa1(Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1)), + Pa2(Epu8({4, 2, 5, 1, 2, 9, 7, 3, 4, 2}, 1)), P51(Epu8({5, 1}, 6)), + Pv(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15}), + Pw(epu8{5, 5, 2, 9, 1, 6, 12, 4, 0, 4, 4, 4, 12, 13, 14, 15}), + P5(Epu8({}, 5)), Pc(Epu8({23, 5, 21, 5, 43, 36}, 7)), + // Elements should be sorted in alphabetic order here + v({zero, P01, epu8id, P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, + Pv, Pw, P5, epu8rev, Pc}), + av({{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15}}) {} + ~Fix() = default; + + const epu8 zero, P01, P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, Pv, Pw, + P5, Pc; const std::vector v; const std::array av; }; - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_compare) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_first_diff_ref, Fix) { - BOOST_TEST(first_diff_ref(Pc, Pc) == 16); - BOOST_TEST(first_diff_ref(zero, P01) == 1); - BOOST_TEST(first_diff_ref(zero, P10) == 0); - BOOST_TEST(first_diff_ref(zero, P01, 1) == 16); - BOOST_TEST(first_diff_ref(zero, P01, 2) == 1); - BOOST_TEST(first_diff_ref(Pa1, Pa2, 2) == 16); - BOOST_TEST(first_diff_ref(Pa1, Pa2, 4) == 16); - BOOST_TEST(first_diff_ref(Pa1, Pa2, 5) == 16); - BOOST_TEST(first_diff_ref(Pa1, Pa2, 6) == 5); - BOOST_TEST(first_diff_ref(Pa1, Pa2, 7) == 5); - BOOST_TEST(first_diff_ref(Pa1, Pa2) == 5); - BOOST_TEST(first_diff(Pv, Pw) == 3); - for (int i=0; i<16; i++) - BOOST_TEST(first_diff(Pv, Pw, i) == (i <= 3 ? 16 : 3)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_first_diff_cmpstr, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::first_diff_ref", "[Epu8][000]") { + REQUIRE(first_diff_ref(Pc, Pc) == 16); + REQUIRE(first_diff_ref(zero, P01) == 1); + REQUIRE(first_diff_ref(zero, P10) == 0); + REQUIRE(first_diff_ref(zero, P01, 1) == 16); + REQUIRE(first_diff_ref(zero, P01, 2) == 1); + REQUIRE(first_diff_ref(Pa1, Pa2, 2) == 16); + REQUIRE(first_diff_ref(Pa1, Pa2, 4) == 16); + REQUIRE(first_diff_ref(Pa1, Pa2, 5) == 16); + REQUIRE(first_diff_ref(Pa1, Pa2, 6) == 5); + REQUIRE(first_diff_ref(Pa1, Pa2, 7) == 5); + REQUIRE(first_diff_ref(Pa1, Pa2) == 5); + REQUIRE(first_diff(Pv, Pw) == 3); + for (int i = 0; i < 16; i++) + REQUIRE(first_diff(Pv, Pw, i) == (i <= 3 ? 16 : 3)); +} + +#ifdef SIMDE_X86_SSE4_2_NATIVE +TEST_CASE_METHOD(Fix, "Epu8::first_diff_cmpstr", "[Epu8][001]") { for (auto x : v) { for (auto y : v) { - BOOST_TEST(first_diff_cmpstr(x, y) == first_diff_ref(x, y)); - for (int i=0; i<17; i++) - BOOST_TEST(first_diff_cmpstr(x, y, i) == first_diff_ref(x, y, i)); + REQUIRE(first_diff_cmpstr(x, y) == first_diff_ref(x, y)); + for (int i = 0; i < 17; i++) + REQUIRE(first_diff_cmpstr(x, y, i) == first_diff_ref(x, y, i)); } } } -BOOST_FIXTURE_TEST_CASE(EPU8_first_diff_mask, Fix) { +#endif +TEST_CASE_METHOD(Fix, "Epu8::first_diff_mask", "[Epu8][002]") { for (auto x : v) { for (auto y : v) { - BOOST_TEST(first_diff_mask(x, y) == first_diff_ref(x, y)); - for (int i=0; i<17; i++) - BOOST_TEST(first_diff_mask(x, y, i) == first_diff_ref(x, y, i)); + REQUIRE(first_diff_mask(x, y) == first_diff_ref(x, y)); + for (int i = 0; i < 17; i++) + REQUIRE(first_diff_mask(x, y, i) == first_diff_ref(x, y, i)); } } } -BOOST_FIXTURE_TEST_CASE(EPU8_last_diff_ref, Fix) { - BOOST_TEST(last_diff_ref(Pc, Pc) == 16); - BOOST_TEST(last_diff_ref(zero, P01) == 1); - BOOST_TEST(last_diff_ref(zero, P10) == 0); - BOOST_TEST(last_diff_ref(zero, P01, 1) == 16); - BOOST_TEST(last_diff_ref(zero, P01, 2) == 1); - BOOST_TEST(last_diff_ref(P1, Pa1) == 9); - BOOST_TEST(last_diff_ref(P1, Pa1, 12) == 9); - BOOST_TEST(last_diff_ref(P1, Pa1, 9) == 8); - BOOST_TEST(last_diff_ref(Pa1, Pa2, 2) == 16); - BOOST_TEST(last_diff_ref(Pa1, Pa2, 4) == 16); - BOOST_TEST(last_diff_ref(Pa1, Pa2, 5) == 16); - BOOST_TEST(last_diff_ref(Pa1, Pa2, 6) == 5); - BOOST_TEST(last_diff_ref(Pa1, Pa2, 7) == 5); - BOOST_TEST(last_diff_ref(Pa1, Pa2) == 5); - const std::array res {{ - 16,16,16,16, 3, 3, 3, 3, 3, 3,9,10,11,11,11,11,11 - }}; - for (int i=0; i<=16; i++) - BOOST_TEST(last_diff_ref(Pv, Pw, i) == res[i]); -} -BOOST_FIXTURE_TEST_CASE(EPU8_last_diff_cmpstr, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::last_diff_ref", "[Epu8][003]") { + REQUIRE(last_diff_ref(Pc, Pc) == 16); + REQUIRE(last_diff_ref(zero, P01) == 1); + REQUIRE(last_diff_ref(zero, P10) == 0); + REQUIRE(last_diff_ref(zero, P01, 1) == 16); + REQUIRE(last_diff_ref(zero, P01, 2) == 1); + REQUIRE(last_diff_ref(P1, Pa1) == 9); + REQUIRE(last_diff_ref(P1, Pa1, 12) == 9); + REQUIRE(last_diff_ref(P1, Pa1, 9) == 8); + REQUIRE(last_diff_ref(Pa1, Pa2, 2) == 16); + REQUIRE(last_diff_ref(Pa1, Pa2, 4) == 16); + REQUIRE(last_diff_ref(Pa1, Pa2, 5) == 16); + REQUIRE(last_diff_ref(Pa1, Pa2, 6) == 5); + REQUIRE(last_diff_ref(Pa1, Pa2, 7) == 5); + REQUIRE(last_diff_ref(Pa1, Pa2) == 5); + const std::array res{ + {16, 16, 16, 16, 3, 3, 3, 3, 3, 3, 9, 10, 11, 11, 11, 11, 11}}; + for (int i = 0; i <= 16; i++) { + REQUIRE(last_diff_ref(Pv, Pw, i) == res[i]); + } +} +#ifdef SIMDE_X86_SSE4_2_NATIVE +TEST_CASE_METHOD(Fix, "Epu8::last_diff_cmpstr", "[Epu8][004]") { for (auto x : v) { for (auto y : v) { - BOOST_TEST(last_diff_cmpstr(x, y) == last_diff_ref(x, y)); - for (int i=0; i<17; i++) - BOOST_TEST(last_diff_cmpstr(x, y, i) == last_diff_ref(x, y, i)); + REQUIRE(last_diff_cmpstr(x, y) == last_diff_ref(x, y)); + for (int i = 0; i < 17; i++) + REQUIRE(last_diff_cmpstr(x, y, i) == last_diff_ref(x, y, i)); } } } -BOOST_FIXTURE_TEST_CASE(EPU8_last_diff_mask, Fix) { +#endif + +TEST_CASE_METHOD(Fix, "Epu8::last_diff_mask", "[Epu8][005]") { for (auto x : v) { for (auto y : v) { - BOOST_TEST(last_diff_mask(x, y) == last_diff_ref(x, y)); - for (int i=0; i<17; i++) - BOOST_TEST(last_diff_mask(x, y, i) == last_diff_ref(x, y, i)); + REQUIRE(last_diff_mask(x, y) == last_diff_ref(x, y)); + for (int i = 0; i < 17; i++) + REQUIRE(last_diff_mask(x, y, i) == last_diff_ref(x, y, i)); } } } - -BOOST_FIXTURE_TEST_CASE(EPU8_is_all_zero, Fix) { - BOOST_TEST(is_all_zero(zero)); +TEST_CASE_METHOD(Fix, "Epu8::is_all_zero", "[Epu8][006]") { + REQUIRE(is_all_zero(zero)); for (size_t i = 1; i < v.size(); i++) { - BOOST_TEST(not is_all_zero(v[i])); + REQUIRE(!is_all_zero(v[i])); } } -BOOST_FIXTURE_TEST_CASE(EPU8_is_all_one, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::is_all_one", "[Epu8][007]") { for (size_t i = 0; i < v.size(); i++) { - BOOST_TEST(not is_all_one(v[i])); + REQUIRE(!is_all_one(v[i])); } - BOOST_TEST(is_all_one(Epu8(0xFF))); + REQUIRE(is_all_one(Epu8(0xFF))); } -BOOST_FIXTURE_TEST_CASE(EPU8_equal, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::equal", "[Epu8][008]") { for (size_t i = 0; i < v.size(); i++) { epu8 a = v[i]; for (size_t j = 0; j < v.size(); j++) { epu8 b = v[j]; if (i == j) { - BOOST_CHECK_PREDICATE(equal, (a)(b)); - BOOST_CHECK_PREDICATE(boost::not2(not_equal), (a)(b)); - BOOST_CHECK_PREDICATE(std::equal_to(), (a)(b)); -// For some reason, the following line doesn't compile -// BOOST_CHECK_PREDICATE(boost::not2(std::not_equal_to()), -// (a)(b)); - BOOST_CHECK_PREDICATE( - [](epu8 a, epu8 b) { - return not std::not_equal_to()(a, b); - }, (a)(b)); + REQUIRE(equal(a, b)); + REQUIRE(!not_equal(a, b)); + REQUIRE(std::equal_to()(a, b)); + REQUIRE(!std::not_equal_to()(a, b)); } else { - BOOST_CHECK_PREDICATE(boost::not2(equal), (a)(b)); - BOOST_CHECK_PREDICATE(not_equal, (a)(b)); - BOOST_CHECK_PREDICATE(std::not_equal_to(), (a)(b)); -// For some reason, the following line doesn't compile -// BOOST_CHECK_PREDICATE(boost::not2(std::equal_to()), (a)(b)); - BOOST_CHECK_PREDICATE( - [](epu8 a, epu8 b) { - return not std::equal_to()(a, b); - }, (a)(b)); + REQUIRE(!equal(a, b)); + REQUIRE(not_equal(a, b)); + REQUIRE(std::not_equal_to()(a, b)); + REQUIRE(!std::equal_to()(a, b)); } } } } -BOOST_FIXTURE_TEST_CASE(EPU8_not_equal, Fix) { - for (size_t i = 0; i < v.size(); i++) - for (size_t j = 0; j < v.size(); j++) - if (i == j) - BOOST_CHECK_PREDICATE(boost::not2(not_equal), - (v[i])(v[j])); - else - BOOST_CHECK_PREDICATE(not_equal, (v[i])(v[j])); +TEST_CASE_METHOD(Fix, "Epu8::not_equal", "[Epu8][009]") { + for (size_t i = 0; i < v.size(); i++) { + for (size_t j = 0; j < v.size(); j++) { + if (i == j) { + REQUIRE(!not_equal(v[i], v[j])); + } else { + REQUIRE(not_equal(v[i], v[j])); + } + } + } } -BOOST_FIXTURE_TEST_CASE(EPU8_less, Fix) { - for (size_t i = 0; i < v.size(); i++) - for (size_t j = 0; j < v.size(); j++) - if (i < j) - BOOST_CHECK_PREDICATE(less, (v[i])(v[j])); - else - BOOST_CHECK_PREDICATE(boost::not2(less), (v[i])(v[j])); +TEST_CASE_METHOD(Fix, "Epu8::less", "[Epu8][010]") { + for (size_t i = 0; i < v.size(); i++) { + for (size_t j = 0; j < v.size(); j++) { + if (i < j) { + REQUIRE(less(v[i], v[j])); + } else { + REQUIRE(!less(v[i], v[j])); + } + } + } } -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_permute) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_permuted, Fix) { - EPU8_EQUAL(permuted(epu8{ 0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}, - epu8{ 3, 2, 5, 1, 4, 0, 6, 7, 8, 9,10,11,12,13,14,15}), - (epu8 { 2, 3, 5, 1, 4, 0, 6, 7, 8, 9,10,11,12,13,14,15})); - EPU8_EQUAL(permuted(epu8{ 3, 2, 5, 1, 4, 0, 6, 7, 8, 9,10,11,12,13,14,15}, - epu8{ 0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), - (epu8 { 3, 2, 1, 5, 4, 0, 6, 7, 8, 9,10,11,12,13,14,15})); - EPU8_EQUAL(permuted(epu8{ 3, 2, 5, 1, 4, 0, 6, 7, 8, 9,10,11,12,13,14,15}, - epu8{ 2, 2, 1, 2, 3, 6,12, 4, 5,16,17,11,12,13,14,15}), - (epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15})); +TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { + REQUIRE(equal( + permuted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + epu8{2, 3, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(equal( + permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + epu8{3, 2, 1, 5, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(equal( + permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), + epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } -BOOST_FIXTURE_TEST_CASE(EPU8_shifted_left, Fix) { - EPU8_EQUAL(shifted_left(P01), P10); - EPU8_EQUAL(shifted_left(P112), (epu8{1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0})); - EPU8_EQUAL(shifted_left(Pv), - (epu8{ 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15, 0})); +TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { + REQUIRE(equal(shifted_left(P01), P10)); + REQUIRE(equal(shifted_left(P112), + epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); + REQUIRE(equal(shifted_left(Pv), + epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15, 0})); } -BOOST_FIXTURE_TEST_CASE(EPU8_shifted_right, Fix) { - EPU8_EQUAL(shifted_right(P10), P01); - EPU8_EQUAL(shifted_right(P112), Epu8({0,1,1}, 2)); - EPU8_EQUAL(shifted_right(Pv), - (epu8{ 0, 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14})); +TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][013]") { + REQUIRE(equal(shifted_right(P10), P01)); + REQUIRE(equal(shifted_right(P112), Epu8({0, 1, 1}, 2))); + REQUIRE(equal(shifted_right(Pv), + epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14})); } -BOOST_FIXTURE_TEST_CASE(EPU8_reverted, Fix) { - EPU8_EQUAL(reverted(epu8id), epu8rev); - for (auto x : v) EPU8_EQUAL(x, reverted(reverted(x))); +TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][014]") { + REQUIRE(equal(reverted(epu8id), epu8rev)); + for (auto x : v) { + REQUIRE(equal(x, reverted(reverted(x)))); + } } -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_array) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_as_array, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][015]") { epu8 x = Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1); - auto & refx = as_array(x); + auto &refx = as_array(x); refx[2] = 42; - EPU8_EQUAL(x, Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1)); - std::fill(refx.begin()+4, refx.end(), 3); - EPU8_EQUAL(x, Epu8({4, 2, 42, 1}, 3)); - BOOST_TEST(av == as_array(Pv)); + REQUIRE(equal(x, Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1))); + std::fill(refx.begin() + 4, refx.end(), 3); + REQUIRE(equal(x, Epu8({4, 2, 42, 1}, 3))); + REQUIRE(av == as_array(Pv)); } -BOOST_FIXTURE_TEST_CASE(EPU8_from_array, Fix) { +TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][016]") { for (auto x : v) { - EPU8_EQUAL(x, from_array(as_array(x))); + REQUIRE(equal(x, from_array(as_array(x)))); } - EPU8_EQUAL(Pv, from_array(av)); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_sorting) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_is_sorted, Fix) { - BOOST_TEST(is_sorted(epu8id)); - BOOST_TEST(is_sorted(epu8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15})); - BOOST_TEST(is_sorted(Epu8({ 0, 1}, 2))); - BOOST_TEST(is_sorted(Epu8({0}, 1))); - BOOST_TEST(is_sorted(Epu8({}, 5))); - BOOST_TEST(not is_sorted(epu8{ 0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15})); - BOOST_TEST(not is_sorted(Epu8({ 0, 2}, 1))); - BOOST_TEST(not is_sorted(Epu8({ 0, 0, 2}, 1))); - BOOST_TEST(not is_sorted(Epu8({6}, 5))); + REQUIRE(equal(Pv, from_array(av))); +} + +TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { + REQUIRE(is_sorted(epu8id)); + REQUIRE( + is_sorted(epu8{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(is_sorted(Epu8({0, 1}, 2))); + REQUIRE(is_sorted(Epu8({0}, 1))); + REQUIRE(is_sorted(Epu8({}, 5))); + REQUIRE( + !is_sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(!is_sorted(Epu8({0, 2}, 1))); + REQUIRE(!is_sorted(Epu8({0, 0, 2}, 1))); + REQUIRE(!is_sorted(Epu8({6}, 5))); epu8 x = epu8id; - BOOST_TEST(is_sorted(x)); - auto & refx = as_array(x); - while (std::next_permutation(refx.begin(), refx.begin()+9)) { - BOOST_TEST(not is_sorted(x)); + REQUIRE(is_sorted(x)); + auto &refx = as_array(x); + while (std::next_permutation(refx.begin(), refx.begin() + 9)) { + REQUIRE(!is_sorted(x)); } x = epu8id; - while (std::next_permutation(refx.begin()+8, refx.begin()+16)) { - BOOST_TEST(not is_sorted(x)); + while (std::next_permutation(refx.begin() + 8, refx.begin() + 16)) { + REQUIRE(!is_sorted(x)); } x = sorted(Pa1); - BOOST_TEST(is_sorted(x)); - while (std::next_permutation(refx.begin(), refx.begin()+14)) { - BOOST_TEST(not is_sorted(x)); + REQUIRE(is_sorted(x)); + while (std::next_permutation(refx.begin(), refx.begin() + 14)) { + REQUIRE(!is_sorted(x)); } } -BOOST_FIXTURE_TEST_CASE(EPU8_sorted, Fix) { - EPU8_EQUAL(sorted(epu8{ 0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), - epu8id); - for (auto &x : v) - BOOST_TEST(is_sorted(sorted(x))); +TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { + REQUIRE(equal( + sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + epu8id)); + for (auto &x : v) { + REQUIRE(is_sorted(sorted(x))); + } epu8 x = epu8id; - BOOST_TEST(is_sorted(x)); - auto & refx = as_array(x); + REQUIRE(is_sorted(x)); + auto &refx = as_array(x); do { - BOOST_TEST(is_sorted(sorted(x))); - } while (std::next_permutation(refx.begin(), refx.begin()+9)); + REQUIRE(is_sorted(sorted(x))); + } while (std::next_permutation(refx.begin(), refx.begin() + 9)); } -BOOST_FIXTURE_TEST_CASE(EPU8_revsorted, Fix) { - EPU8_EQUAL(revsorted(epu8{ 0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), - epu8rev); - for (auto &x : v) - BOOST_TEST(is_sorted(reverted(revsorted(x)))); +TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { + REQUIRE(equal( + revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + epu8rev)); + for (auto &x : v) { + REQUIRE(is_sorted(reverted(revsorted(x)))); + } epu8 x = epu8id; - BOOST_TEST(is_sorted(x)); - auto & refx = as_array(x); + REQUIRE(is_sorted(x)); + auto &refx = as_array(x); do { - BOOST_TEST(is_sorted(reverted(revsorted(x)))); - } while (std::next_permutation(refx.begin(), refx.begin()+9)); + REQUIRE(is_sorted(reverted(revsorted(x)))); + } while (std::next_permutation(refx.begin(), refx.begin() + 9)); } -BOOST_FIXTURE_TEST_CASE(EPU8_sort_perm, Fix) { - epu8 ve { 2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - EPU8_EQUAL(sort_perm(ve), - (epu8{ 9,15, 1, 5, 6,10,12, 3, 0, 8,11, 2,13, 7, 4,14})); - EPU8_EQUAL(ve, - (epu8{ 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); +TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { + epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; + REQUIRE(equal(sort_perm(ve), + epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); + REQUIRE(equal(ve, epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - BOOST_TEST(is_sorted(xsort)); - BOOST_TEST(is_permutation(psort)); - EPU8_EQUAL(permuted(x, psort), xsort); + REQUIRE(is_sorted(xsort)); + REQUIRE(is_permutation(psort)); + REQUIRE(equal(permuted(x, psort), xsort)); } - } -BOOST_FIXTURE_TEST_CASE(EPU8_sort8_perm, Fix) { - epu8 ve { 2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - EPU8_EQUAL(sort8_perm(ve), - (epu8{ 1, 6, 5, 0, 3, 2, 4, 7, 9,15,10,12, 8,11,13,14})); - EPU8_EQUAL(ve, - (epu8{ 1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); +TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { + epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; + REQUIRE(equal(sort8_perm(ve), + epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); + REQUIRE(equal(ve, epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - BOOST_TEST(is_sorted(xsort | Epu8({0,0,0,0,0,0,0,0}, 0xFF))); - BOOST_TEST(is_sorted(xsort & Epu8({0,0,0,0,0,0,0,0}, 0xFF))); - BOOST_TEST(is_permutation(psort)); - EPU8_EQUAL(permuted(x, psort), xsort); + REQUIRE(is_sorted(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); + REQUIRE(is_sorted(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); + REQUIRE(is_permutation(psort)); + REQUIRE(equal(permuted(x, psort), xsort)); } - } - -BOOST_FIXTURE_TEST_CASE(EPU8_permutation_of, Fix) { - EPU8_EQUAL(permutation_of(epu8id, epu8id), epu8id); - EPU8_EQUAL(permutation_of(Pa, Pa), epu8id); - EPU8_EQUAL(permutation_of(epu8rev, epu8id), epu8rev); - EPU8_EQUAL(permutation_of(epu8id, epu8rev), epu8rev); - EPU8_EQUAL(permutation_of(epu8rev, epu8rev), epu8id); - EPU8_EQUAL(permutation_of(epu8id, RP), RP); - const uint8_t FF = 0xff; - EPU8_EQUAL((permutation_of(Pv, Pv) | -// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15 -// epu8{ 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15} - (epu8{FF,FF,FF,FF, 0, 0,FF, 0, 0, 0,FF, 0,FF, 0, 0, 0})), - (epu8 {FF,FF,FF,FF, 4, 5,FF, 7, 8, 9,FF,11,FF,13,14,15})); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_remove_dups_sum) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_remove_dups, Fix) { - EPU8_EQUAL(remove_dups(P1), P10); - EPU8_EQUAL(remove_dups(P11), P10); - EPU8_EQUAL(remove_dups(sorted(P10)), - (epu8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); - EPU8_EQUAL(remove_dups(sorted(Pv)), - (epu8{ 0, 1, 2, 0, 3, 4, 5, 0, 0, 6,11,12, 0,13,14,15})); - EPU8_EQUAL(remove_dups(P1, 1), P1); - EPU8_EQUAL(remove_dups(P11, 1), Epu8({1,1,0},1)); - EPU8_EQUAL(remove_dups(P11, 42), Epu8({1,42,0},42)); - EPU8_EQUAL(remove_dups(sorted(P10), 1), P1); - EPU8_EQUAL(remove_dups(sorted(Pv), 7), - (epu8{ 7, 1, 2, 7, 3, 4, 5, 7, 7, 6,11,12, 7,13,14,15})); +// TODO uncomment +// TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { +// REQUIRE(equal(permutation_of(epu8id, epu8id), epu8id)); +// REQUIRE(equal(permutation_of(Pa, Pa), epu8id)); +// REQUIRE(equal(permutation_of(epu8rev, epu8id), epu8rev)); +// REQUIRE(equal(permutation_of(epu8id, epu8rev), epu8rev)); +// REQUIRE(equal(permutation_of(epu8rev, epu8rev), epu8id)); +// REQUIRE(equal(permutation_of(epu8id, RP), RP)); +// const uint8_t FF = 0xff; +// REQUIRE(equal( +// (permutation_of(Pv, Pv) | +// epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0})), +// epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15}); +// } +// +TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { + REQUIRE(equal(remove_dups(P1), P10)); + REQUIRE(equal(remove_dups(P11), P10)); + REQUIRE(equal(remove_dups(sorted(P10)), + epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); + REQUIRE(equal(remove_dups(sorted(Pv)), + epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, 6, 11, 12, 0, 13, 14, 15})); + REQUIRE(equal(remove_dups(P1, 1), P1)); + REQUIRE(equal(remove_dups(P11, 1), Epu8({1, 1, 0}, 1))); + REQUIRE(equal(remove_dups(P11, 42), Epu8({1, 42, 0}, 42))); + REQUIRE(equal(remove_dups(sorted(P10), 1), P1)); + REQUIRE(equal(remove_dups(sorted(Pv), 7), + epu8{7, 1, 2, 7, 3, 4, 5, 7, 7, 6, 11, 12, 7, 13, 14, 15})); for (auto x : v) { x = sorted(remove_dups(sorted(x))); - EPU8_EQUAL(x, sorted(remove_dups(x))); + REQUIRE(equal(x, sorted(remove_dups(x)))); } for (auto x : v) { x = sorted(remove_dups(sorted(x), 42)); - EPU8_EQUAL(x, sorted(remove_dups(x, 42))); + REQUIRE(equal(x, sorted(remove_dups(x, 42)))); } } -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_horiz_sum) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_horiz_sum_ref, Fix) { - BOOST_TEST(horiz_sum_ref(zero) == 0); - BOOST_TEST(horiz_sum_ref(P01) == 1); - BOOST_TEST(horiz_sum_ref(epu8id) == 120); - BOOST_TEST(horiz_sum_ref(P10) == 1); - BOOST_TEST(horiz_sum_ref(P11) == 2); - BOOST_TEST(horiz_sum_ref(P1) == 16); - BOOST_TEST(horiz_sum_ref(P112) == 30); - BOOST_TEST(horiz_sum_ref(Pa1) == 43); - BOOST_TEST(horiz_sum_ref(Pa2) == 45); - BOOST_TEST(horiz_sum_ref(P51) == 90); - BOOST_TEST(horiz_sum_ref(Pv) == 110); - BOOST_TEST(horiz_sum_ref(P5) == 80); - BOOST_TEST(horiz_sum_ref(epu8rev) == 120); - BOOST_TEST(horiz_sum_ref(Pc) == 203); -} -TEST_AGREES(horiz_sum_ref, horiz_sum_gen) -TEST_AGREES(horiz_sum_ref, horiz_sum4) -TEST_AGREES(horiz_sum_ref, horiz_sum3) -TEST_AGREES(horiz_sum_ref, horiz_sum) -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_partial_sums) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sums_ref, Fix) { - EPU8_EQUAL(partial_sums_ref(zero), zero); - EPU8_EQUAL(partial_sums_ref(P01), Epu8({0}, 1)); - EPU8_EQUAL(partial_sums_ref(epu8id), - (epu8{ 0, 1, 3, 6,10,15,21,28,36,45,55,66,78,91,105,120})); - EPU8_EQUAL(partial_sums_ref(P10), P1); - EPU8_EQUAL(partial_sums_ref(P11), Epu8({1}, 2)); - EPU8_EQUAL(partial_sums_ref(P1), epu8id + Epu8({}, 1)); - EPU8_EQUAL(partial_sums_ref(P112), - (epu8{ 1, 2, 4, 6, 8,10,12,14,16,18,20,22,24,26,28,30})); - EPU8_EQUAL(partial_sums_ref(Pa1), - (epu8{ 4, 6,11,12,14,21,28,31,35,37,38,39,40,41,42,43})); - EPU8_EQUAL(partial_sums_ref(Pa2), - (epu8{ 4, 6,11,12,14,23,30,33,37,39,40,41,42,43,44,45})); - EPU8_EQUAL(partial_sums_ref(P51), - (epu8{ 5, 6,12,18,24,30,36,42,48,54,60,66,72,78,84,90})); - EPU8_EQUAL(partial_sums_ref(Pv), - (epu8{ 5,10,12,17,18,24,36,40,40,43,45,56,68,81,95,110})); - EPU8_EQUAL(partial_sums_ref(P5), - (epu8{ 5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80})); - EPU8_EQUAL(partial_sums_ref(epu8rev), - (epu8{15,29,42,54,65,75,84,92,99,105,110,114,117,119,120,120})); - EPU8_EQUAL(partial_sums_ref(Pc), - (epu8{23,28,49,54,97,133,140,147,154,161,168,175,182,189,196,203})); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sum_gen, Fix) { - for (auto x : v) EPU8_EQUAL(partial_sums_gen(x), partial_sums_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sum_round, Fix) { - for (auto x : v) EPU8_EQUAL(partial_sums_round(x), partial_sums_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sum, Fix) { - for (auto x : v) EPU8_EQUAL(partial_sums(x), partial_sums_ref(x)); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_horiz_max) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_horiz_max_ref, Fix) { - BOOST_TEST(horiz_max_ref(zero) == 0); - BOOST_TEST(horiz_max_ref(P01) == 1); - BOOST_TEST(horiz_max_ref(epu8id) == 15); - BOOST_TEST(horiz_max_ref(P10) == 1); - BOOST_TEST(horiz_max_ref(P11) == 1); - BOOST_TEST(horiz_max_ref(P1) == 1); - BOOST_TEST(horiz_max_ref(P112) == 2); - BOOST_TEST(horiz_max_ref(Pa1) == 7); - BOOST_TEST(horiz_max_ref(Pa2) == 9); - BOOST_TEST(horiz_max_ref(P51) == 6); - BOOST_TEST(horiz_max_ref(Pv) == 15); - BOOST_TEST(horiz_max_ref(P5) == 5); - BOOST_TEST(horiz_max_ref(epu8rev) == 15); - BOOST_TEST(horiz_max_ref(Pc) == 43); -} -TEST_AGREES(horiz_max_ref, horiz_max_gen) -TEST_AGREES(horiz_max_ref, horiz_max4) -TEST_AGREES(horiz_max_ref, horiz_max3) -TEST_AGREES(horiz_max_ref, horiz_max) -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_partial_max) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_partial_max_ref, Fix) { - EPU8_EQUAL(partial_max_ref(zero), zero); - EPU8_EQUAL(partial_max_ref(P01), Epu8({0}, 1)); - EPU8_EQUAL(partial_max_ref(epu8id), epu8id); - EPU8_EQUAL(partial_max_ref(P10), P1); - EPU8_EQUAL(partial_max_ref(P11), P1); - EPU8_EQUAL(partial_max_ref(P1), P1); - EPU8_EQUAL(partial_max_ref(P112), P112); - EPU8_EQUAL(partial_max_ref(Pa1), (Epu8({4, 4, 5, 5, 5}, 7))); - EPU8_EQUAL(partial_max_ref(Pa2), (Epu8({4, 4, 5, 5, 5}, 9))); - EPU8_EQUAL(partial_max_ref(P51), (Epu8({5, 5}, 6))); - EPU8_EQUAL(partial_max_ref(Pv), - (epu8{ 5, 5, 5, 5, 5, 6,12,12,12,12,12,12,12,13,14,15})); - EPU8_EQUAL(partial_max_ref(P5), P5); - EPU8_EQUAL(partial_max_ref(epu8rev), Epu8({}, 15)); - EPU8_EQUAL(partial_max_ref(Pc), (Epu8({23,23,23,23}, 43))); - -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sum_gen, Fix) { - for (auto x : v) EPU8_EQUAL(partial_max_gen(x), partial_max_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_max_round, Fix) { - for (auto x : v) EPU8_EQUAL(partial_max_round(x), partial_max_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_max, Fix) { - for (auto x : v) EPU8_EQUAL(partial_max(x), partial_max_ref(x)); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_horiz_min) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_horiz_min_ref, Fix) { - BOOST_TEST(horiz_min_ref(zero) == 0); - BOOST_TEST(horiz_min_ref(P01) == 0); - BOOST_TEST(horiz_min_ref(epu8id) == 0); - BOOST_TEST(horiz_min_ref(P10) == 0); - BOOST_TEST(horiz_min_ref(P11) == 0); - BOOST_TEST(horiz_min_ref(P1) == 1); - BOOST_TEST(horiz_min_ref(P112) == 1); - BOOST_TEST(horiz_min_ref(Pa1) == 1); - BOOST_TEST(horiz_min_ref(Pa2) == 1); - BOOST_TEST(horiz_min_ref(P51) == 1); - BOOST_TEST(horiz_min_ref(Pv) == 0); - BOOST_TEST(horiz_min_ref(P5) == 5); - BOOST_TEST(horiz_min_ref(epu8rev) == 0); - BOOST_TEST(horiz_min_ref(Pc) == 5); -} -TEST_AGREES(horiz_min_ref, horiz_min_gen) -TEST_AGREES(horiz_min_ref, horiz_min4) -TEST_AGREES(horiz_min_ref, horiz_min3) -TEST_AGREES(horiz_min_ref, horiz_min) -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_partial_min) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_partial_min_ref, Fix) { - EPU8_EQUAL(partial_min_ref(zero), zero); - EPU8_EQUAL(partial_min_ref(P01), zero); - EPU8_EQUAL(partial_min_ref(epu8id), zero); - EPU8_EQUAL(partial_min_ref(P10), P10); - EPU8_EQUAL(partial_min_ref(P11), P11); - EPU8_EQUAL(partial_min_ref(P1), P1); - EPU8_EQUAL(partial_min_ref(P112), P1); - EPU8_EQUAL(partial_min_ref(Pa1), (Epu8({4, 2, 2}, 1))); - EPU8_EQUAL(partial_min_ref(Pa2), (Epu8({4, 2, 2}, 1))); - EPU8_EQUAL(partial_min_ref(P51), (Epu8({5}, 1))); - EPU8_EQUAL(partial_min_ref(Pv), - (Epu8({ 5, 5, 2, 2, 1, 1, 1, 1,}, 0))); - EPU8_EQUAL(partial_min_ref(P5), P5); - EPU8_EQUAL(partial_min_ref(epu8rev), epu8rev); - EPU8_EQUAL(partial_min_ref(Pc), (Epu8({23}, 5))); - -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_sum_gen, Fix) { - for (auto x : v) EPU8_EQUAL(partial_min_gen(x), partial_min_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_min_round, Fix) { - for (auto x : v) EPU8_EQUAL(partial_min_round(x), partial_min_ref(x)); -} -BOOST_FIXTURE_TEST_CASE(EPU8_partial_min, Fix) { - for (auto x : v) EPU8_EQUAL(partial_min(x), partial_min_ref(x)); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_eval16) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_eval16_ref, Fix) { - EPU8_EQUAL(eval16_ref(zero), Epu8({16}, 0)); - EPU8_EQUAL(eval16_ref(P01), Epu8({15, 1}, 0)); - EPU8_EQUAL(eval16_ref(epu8id), Epu8({}, 1)); - EPU8_EQUAL(eval16_ref(P10), Epu8({15, 1}, 0)); - EPU8_EQUAL(eval16_ref(P11), Epu8({14, 2}, 0)); - EPU8_EQUAL(eval16_ref(P1), Epu8({0, 16}, 0)); - EPU8_EQUAL(eval16_ref(P112), Epu8({0, 2, 14}, 0)); - EPU8_EQUAL(eval16_ref(Pa1), Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0)); - EPU8_EQUAL(eval16_ref(Pa2), Epu8({ 0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0)); - EPU8_EQUAL(eval16_ref(P51), Epu8({ 0, 1, 0, 0, 0, 1,14}, 0)); - EPU8_EQUAL(eval16_ref(Pv), - (epu8{ 1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); - EPU8_EQUAL(eval16_ref(P5), Epu8({ 0, 0, 0, 0, 0, 16}, 0)); - EPU8_EQUAL(eval16_ref(epu8rev), Epu8({}, 1)); - EPU8_EQUAL(eval16_ref(Pc), Epu8({ 0, 0, 0, 0, 0, 2, 0,10}, 0)); -} -TEST_EPU8_AGREES(eval16_ref, eval16_cycle) -TEST_EPU8_AGREES(eval16_ref, eval16_popcount) -TEST_EPU8_AGREES(eval16_ref, eval16_arr) -TEST_EPU8_AGREES(eval16_ref, eval16_gen) -TEST_EPU8_AGREES(eval16_ref, eval16) -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_vector_popcount) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(EPU8_popcount4, Fix) { - EPU8_EQUAL(popcount4, - (epu8 { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); -} - -BOOST_FIXTURE_TEST_CASE(EPU8_popcount, Fix) { - EPU8_EQUAL(popcount16(Pv), - (epu8 {2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); - EPU8_EQUAL(popcount16(RP), - (epu8 {2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - EPU8_EQUAL(popcount16(RP << 1), - (epu8 {2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - EPU8_EQUAL(popcount16(RP << 2), - (epu8 {2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - EPU8_EQUAL(popcount16(Epu8({0,1,5,0xff, 0xf0, 0x35}, 0x0f)), - (Epu8({0,1,2,8}, 4))); -} -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_random) -//****************************************************************************// - -BOOST_AUTO_TEST_CASE(Random) { - for (int i = 0; i<10 ; i++) { + +TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { + REQUIRE(horiz_sum_ref(zero) == 0); + REQUIRE(horiz_sum_ref(P01) == 1); + REQUIRE(horiz_sum_ref(epu8id) == 120); + REQUIRE(horiz_sum_ref(P10) == 1); + REQUIRE(horiz_sum_ref(P11) == 2); + REQUIRE(horiz_sum_ref(P1) == 16); + REQUIRE(horiz_sum_ref(P112) == 30); + REQUIRE(horiz_sum_ref(Pa1) == 43); + REQUIRE(horiz_sum_ref(Pa2) == 45); + REQUIRE(horiz_sum_ref(P51) == 90); + REQUIRE(horiz_sum_ref(Pv) == 110); + REQUIRE(horiz_sum_ref(P5) == 80); + REQUIRE(horiz_sum_ref(epu8rev) == 120); + REQUIRE(horiz_sum_ref(Pc) == 203); +} + +// TODO uncomment, compiler complains that Epu8 is an unknown type!? +// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") +// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") +// +// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") +// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") + +TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { + REQUIRE(equal(partial_sums_ref(zero), zero)); + REQUIRE(equal(partial_sums_ref(P01), Epu8({0}, 1))); + REQUIRE( + equal(partial_sums_ref(epu8id), epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, + 55, 66, 78, 91, 105, 120})); + REQUIRE(equal(partial_sums_ref(P10), P1)); + REQUIRE(equal(partial_sums_ref(P11), Epu8({1}, 2))); + REQUIRE(equal(partial_sums_ref(P1), epu8id + Epu8({}, 1))); + REQUIRE(equal(partial_sums_ref(P112), epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, + 18, 20, 22, 24, 26, 28, 30})); + REQUIRE(equal(partial_sums_ref(Pa1), epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, + 37, 38, 39, 40, 41, 42, 43})); + + REQUIRE(equal(partial_sums_ref(Pa2), epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, + 39, 40, 41, 42, 43, 44, 45})); + REQUIRE(equal(partial_sums_ref(P51), epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, + 54, 60, 66, 72, 78, 84, 90})); + REQUIRE(equal(partial_sums_ref(Pv), epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, + 43, 45, 56, 68, 81, 95, 110})); + REQUIRE(equal(partial_sums_ref(P5), epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, + 50, 55, 60, 65, 70, 75, 80})); + REQUIRE(equal(partial_sums_ref(epu8rev), + epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, 117, + 119, 120, 120})); + REQUIRE( + equal(partial_sums_ref(Pc), epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, + 161, 168, 175, 182, 189, 196, 203})); +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_sum_gen", "[Epu8][030]") { + for (auto x : v) { + REQUIRE(equal(partial_sums_gen(x), partial_sums_ref(x))); + } +} +TEST_CASE_METHOD(Fix, "Epu8::partial_sum_round", "[Epu8][031]") { + for (auto x : v) { + REQUIRE(equal(partial_sums_round(x), partial_sums_ref(x))); + } +} +TEST_CASE_METHOD(Fix, "Epu8::partial_sum", "[Epu8][032]") { + for (auto x : v) { + REQUIRE(equal(partial_sums(x), partial_sums_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { + REQUIRE(horiz_max_ref(zero) == 0); + REQUIRE(horiz_max_ref(P01) == 1); + REQUIRE(horiz_max_ref(epu8id) == 15); + REQUIRE(horiz_max_ref(P10) == 1); + REQUIRE(horiz_max_ref(P11) == 1); + REQUIRE(horiz_max_ref(P1) == 1); + REQUIRE(horiz_max_ref(P112) == 2); + REQUIRE(horiz_max_ref(Pa1) == 7); + REQUIRE(horiz_max_ref(Pa2) == 9); + REQUIRE(horiz_max_ref(P51) == 6); + REQUIRE(horiz_max_ref(Pv) == 15); + REQUIRE(horiz_max_ref(P5) == 5); + REQUIRE(horiz_max_ref(epu8rev) == 15); + REQUIRE(horiz_max_ref(Pc) == 43); +} + +// TODO uncomment, compiler complains that Epu8 is an unknown type!? +// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") +// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max4, v, "[Epu8][035]") +// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max3, v, "[Epu8][036]") +// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") + +TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { + REQUIRE(equal(partial_max_ref(zero), zero)); + REQUIRE(equal(partial_max_ref(P01), Epu8({0}, 1))); + REQUIRE(equal(partial_max_ref(epu8id), epu8id)); + REQUIRE(equal(partial_max_ref(P10), P1)); + REQUIRE(equal(partial_max_ref(P11), P1)); + REQUIRE(equal(partial_max_ref(P1), P1)); + REQUIRE(equal(partial_max_ref(P112), P112)); + REQUIRE(equal(partial_max_ref(Pa1), Epu8({4, 4, 5, 5, 5}, 7))); + REQUIRE(equal(partial_max_ref(Pa2), Epu8({4, 4, 5, 5, 5}, 9))); + REQUIRE(equal(partial_max_ref(P51), Epu8({5, 5}, 6))); + REQUIRE(equal(partial_max_ref(Pv), epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, + 12, 12, 12, 13, 14, 15})); + REQUIRE(equal(partial_max_ref(P5), P5)); + REQUIRE(equal(partial_max_ref(epu8rev), Epu8({}, 15))); + REQUIRE(equal(partial_max_ref(Pc), Epu8({23, 23, 23, 23}, 43))); +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_max_gen", "[Epu8][039]") { + for (auto x : v) { + REQUIRE(equal(partial_max_gen(x), partial_max_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_max_round", "[Epu8][040]") { + for (auto x : v) { + REQUIRE(equal(partial_max_round(x), partial_max_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_max", "[Epu8][041]") { + for (auto x : v) { + REQUIRE(equal(partial_max(x), partial_max_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { + REQUIRE(horiz_min_ref(zero) == 0); + REQUIRE(horiz_min_ref(P01) == 0); + REQUIRE(horiz_min_ref(epu8id) == 0); + REQUIRE(horiz_min_ref(P10) == 0); + REQUIRE(horiz_min_ref(P11) == 0); + REQUIRE(horiz_min_ref(P1) == 1); + REQUIRE(horiz_min_ref(P112) == 1); + REQUIRE(horiz_min_ref(Pa1) == 1); + REQUIRE(horiz_min_ref(Pa2) == 1); + REQUIRE(horiz_min_ref(P51) == 1); + REQUIRE(horiz_min_ref(Pv) == 0); + REQUIRE(horiz_min_ref(P5) == 5); + REQUIRE(horiz_min_ref(epu8rev) == 0); + REQUIRE(horiz_min_ref(Pc) == 5); +} +// TODO uncomment +// TEST_AGREES(horiz_min_ref, horiz_min_gen) +// TEST_AGREES(horiz_min_ref, horiz_min4) +// TEST_AGREES(horiz_min_ref, horiz_min3) +// TEST_AGREES(horiz_min_ref, horiz_min) + +TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { + REQUIRE(equal(partial_min_ref(zero), zero)); + REQUIRE(equal(partial_min_ref(P01), zero)); + REQUIRE(equal(partial_min_ref(epu8id), zero)); + REQUIRE(equal(partial_min_ref(P10), P10)); + REQUIRE(equal(partial_min_ref(P11), P11)); + REQUIRE(equal(partial_min_ref(P1), P1)); + REQUIRE(equal(partial_min_ref(P112), P1)); + REQUIRE(equal(partial_min_ref(Pa1), Epu8({4, 2, 2}, 1))); + REQUIRE(equal(partial_min_ref(Pa2), Epu8({4, 2, 2}, 1))); + REQUIRE(equal(partial_min_ref(P51), Epu8({5}, 1))); + REQUIRE(equal(partial_min_ref(Pv), Epu8( + { + 5, + 5, + 2, + 2, + 1, + 1, + 1, + 1, + }, + 0))); + REQUIRE(equal(partial_min_ref(P5), P5)); + REQUIRE(equal(partial_min_ref(epu8rev), epu8rev)); + REQUIRE(equal(partial_min_ref(Pc), Epu8({23}, 5))); +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_min_gen", "[Epu8][044]") { + for (auto x : v) { + REQUIRE(equal(partial_min_gen(x), partial_min_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_min_round", "[Epu8][045]") { + for (auto x : v) { + REQUIRE(equal(partial_min_round(x), partial_min_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::partial_min", "[Epu8][046]") { + for (auto x : v) { + REQUIRE(equal(partial_min(x), partial_min_ref(x))); + } +} + +TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { + REQUIRE(equal(eval16_ref(zero), Epu8({16}, 0))); + REQUIRE(equal(eval16_ref(P01), Epu8({15, 1}, 0))); + REQUIRE(equal(eval16_ref(epu8id), Epu8({}, 1))); + REQUIRE(equal(eval16_ref(P10), Epu8({15, 1}, 0))); + REQUIRE(equal(eval16_ref(P11), Epu8({14, 2}, 0))); + REQUIRE(equal(eval16_ref(P1), Epu8({0, 16}, 0))); + REQUIRE(equal(eval16_ref(P112), Epu8({0, 2, 14}, 0))); + REQUIRE(equal(eval16_ref(Pa1), Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); + REQUIRE(equal(eval16_ref(Pa2), Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); + REQUIRE(equal(eval16_ref(P51), Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); + REQUIRE(equal(eval16_ref(Pv), + epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); + REQUIRE(equal(eval16_ref(P5), Epu8({0, 0, 0, 0, 0, 16}, 0))); + REQUIRE(equal(eval16_ref(epu8rev), Epu8({}, 1))); + REQUIRE(equal(eval16_ref(Pc), Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); +} +// TODO uncomment +// TEST_Epu8::AGREES(eval16_ref, eval16_cycle, "[Epu8][000]") +// TEST_Epu8::AGREES(eval16_ref, eval16_popcount, "[Epu8][000]") +// TEST_Epu8::AGREES(eval16_ref, eval16_arr, "[Epu8][000]") +// TEST_Epu8::AGREES(eval16_ref, eval16_gen, "[Epu8][000]") +// TEST_Epu8::AGREES(eval16_ref, eval16, "[Epu8][000]") + +TEST_CASE("Epu8::popcount4", "[Epu8][048]") { + REQUIRE( + equal(popcount4, epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); +} + +TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { + REQUIRE(equal(popcount16(Pv), + epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); + REQUIRE(equal(popcount16(RP), + epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE(equal(popcount16(RP << 1), + epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE(equal(popcount16(RP << 2), + epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE(equal(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), + Epu8({0, 1, 2, 8}, 4))); +} + +TEST_CASE("random_epu8", "[Epu8][050]") { + for (int i = 0; i < 10; i++) { epu8 r = random_epu8(255); - EPU8_EQUAL(r, r); + REQUIRE(equal(r, r)); } } -//****************************************************************************// -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(EPU8_PermTransf16_test) -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(IsPTransf, Fix) { - BOOST_TEST(is_partial_transformation(zero)); - BOOST_TEST(is_partial_transformation(P01)); - BOOST_TEST(is_partial_transformation(P10)); - BOOST_TEST(not is_partial_transformation(Epu8({16}, 0))); - BOOST_TEST(is_partial_transformation(Epu8({}, 0xff))); - BOOST_TEST(is_partial_transformation(Epu8({2, 0xff, 3}, 0))); - - BOOST_TEST(not is_partial_transformation(zero, 15)); - BOOST_TEST(is_partial_transformation(Pa)); - BOOST_TEST(is_partial_transformation(Pa, 6)); - BOOST_TEST(is_partial_transformation(Pa, 5)); - BOOST_TEST(not is_partial_transformation(Pa, 4)); - BOOST_TEST(not is_partial_transformation(Pa, 1)); - BOOST_TEST(not is_partial_transformation(Pa, 0)); - - BOOST_TEST(is_partial_transformation(RP)); - BOOST_TEST(is_partial_transformation(RP, 16)); - BOOST_TEST(not is_partial_transformation(RP, 15)); - BOOST_TEST(is_partial_transformation(Epu8({1,2,1,0xFF,0,5,0xFF,2}, 0))); - BOOST_TEST(not is_partial_transformation(Epu8({1,2,1,0xFF,0,16,0xFF,2}, 0))); -} - -BOOST_FIXTURE_TEST_CASE(IsTransf, Fix) { - BOOST_TEST(is_transformation(zero)); - BOOST_TEST(is_transformation(P01)); - BOOST_TEST(is_transformation(P10)); - BOOST_TEST(not is_transformation(Epu8({16}, 0))); - BOOST_TEST(not is_transformation(Epu8({}, 0xff))); - BOOST_TEST(not is_transformation(Epu8({2, 0xff, 3}, 0))); - - BOOST_TEST(not is_transformation(zero, 15)); - BOOST_TEST(is_transformation(Pa)); - BOOST_TEST(is_transformation(Pa, 6)); - BOOST_TEST(is_transformation(Pa, 5)); - BOOST_TEST(not is_transformation(Pa, 4)); - BOOST_TEST(not is_transformation(Pa, 1)); - BOOST_TEST(not is_transformation(Pa, 0)); - - BOOST_TEST(is_transformation(RP)); - BOOST_TEST(is_transformation(RP, 16)); - BOOST_TEST(not is_transformation(RP, 15)); -} - -BOOST_FIXTURE_TEST_CASE(IsPPerm, Fix) { - BOOST_TEST(not is_partial_permutation(zero)); - BOOST_TEST(not is_partial_permutation(P01)); - BOOST_TEST(not is_partial_permutation(P10)); - BOOST_TEST(not is_partial_permutation(Epu8({16}, 0))); - BOOST_TEST(is_partial_permutation(Epu8({}, 0xff))); - BOOST_TEST(not is_partial_permutation(Epu8({2, 0xff, 3}, 0))); - BOOST_TEST(is_partial_permutation(Epu8({2, 0xff, 3}, 0xff))); - - BOOST_TEST(not is_partial_permutation(zero, 15)); - BOOST_TEST(is_partial_permutation(Pa)); - BOOST_TEST(is_partial_permutation(Pa, 6)); - BOOST_TEST(is_partial_permutation(Pa, 5)); - BOOST_TEST(not is_partial_permutation(Pa, 4)); - BOOST_TEST(not is_partial_permutation(Pa, 1)); - BOOST_TEST(not is_partial_permutation(Pa, 0)); - - BOOST_TEST(is_partial_permutation(RP)); - BOOST_TEST(is_partial_permutation(RP, 16)); - BOOST_TEST(not is_partial_permutation(RP, 15)); - - BOOST_TEST(is_partial_permutation( - epu8 {1,2,0xFF,0xFF,0,5,0xFF,3,8,9,10,11,12,13,14,15})); - BOOST_TEST(not is_partial_permutation( - epu8 {1,2,1,0xFF,0,5,0xFF,2,8,9,10,11,12,13,14,15})); - BOOST_TEST(not is_partial_permutation(Epu8({1,2,1,0xFF,0,5,0xFF,2}, 0))); - BOOST_TEST(not is_partial_permutation(Epu8({1,2,1,0xFF,0,16,0xFF,2}, 0))); -} - -BOOST_FIXTURE_TEST_CASE(IsPerm, Fix) { - BOOST_TEST(not is_permutation(zero)); - BOOST_TEST(not is_permutation(P01)); - BOOST_TEST(not is_permutation(P10)); - BOOST_TEST(not is_permutation(Epu8({16}, 0))); - BOOST_TEST(not is_permutation(Epu8({}, 0xff))); - BOOST_TEST(not is_permutation(Epu8({2, 0xff, 3}, 0))); - - BOOST_TEST(not is_permutation(zero, 15)); - BOOST_TEST(is_permutation(Pa)); - BOOST_TEST(is_permutation(Pa, 6)); - BOOST_TEST(is_permutation(Pa, 5)); - BOOST_TEST(not is_permutation(Pa, 4)); - BOOST_TEST(not is_permutation(Pa, 1)); - BOOST_TEST(not is_permutation(Pa, 0)); - - BOOST_TEST(is_permutation(RP)); - BOOST_TEST(is_permutation(RP, 16)); - BOOST_TEST(not is_permutation(RP, 15)); -} - -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// + +TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][051]") { + REQUIRE(is_partial_transformation(zero)); + REQUIRE(is_partial_transformation(P01)); + REQUIRE(is_partial_transformation(P10)); + REQUIRE(!is_partial_transformation(Epu8({16}, 0))); + REQUIRE(is_partial_transformation(Epu8({}, 0xff))); + REQUIRE(is_partial_transformation(Epu8({2, 0xff, 3}, 0))); + + REQUIRE(!is_partial_transformation(zero, 15)); + REQUIRE(is_partial_transformation(Pa)); + REQUIRE(is_partial_transformation(Pa, 6)); + REQUIRE(is_partial_transformation(Pa, 5)); + REQUIRE(!is_partial_transformation(Pa, 4)); + REQUIRE(!is_partial_transformation(Pa, 1)); + REQUIRE(!is_partial_transformation(Pa, 0)); + + REQUIRE(is_partial_transformation(RP)); + REQUIRE(is_partial_transformation(RP, 16)); + REQUIRE(!is_partial_transformation(RP, 15)); + REQUIRE(is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); + REQUIRE( + !is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); +} + +TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][052]") { + REQUIRE(is_transformation(zero)); + REQUIRE(is_transformation(P01)); + REQUIRE(is_transformation(P10)); + REQUIRE(!is_transformation(Epu8({16}, 0))); + REQUIRE(!is_transformation(Epu8({}, 0xff))); + REQUIRE(!is_transformation(Epu8({2, 0xff, 3}, 0))); + + REQUIRE(!is_transformation(zero, 15)); + REQUIRE(is_transformation(Pa)); + REQUIRE(is_transformation(Pa, 6)); + REQUIRE(is_transformation(Pa, 5)); + REQUIRE(!is_transformation(Pa, 4)); + REQUIRE(!is_transformation(Pa, 1)); + REQUIRE(!is_transformation(Pa, 0)); + + REQUIRE(is_transformation(RP)); + REQUIRE(is_transformation(RP, 16)); + REQUIRE(!is_transformation(RP, 15)); +} + +TEST_CASE_METHOD(Fix, "is_partial_permutation", "[Epu8][053]") { + REQUIRE(!is_partial_permutation(zero)); + REQUIRE(!is_partial_permutation(P01)); + REQUIRE(!is_partial_permutation(P10)); + REQUIRE(!is_partial_permutation(Epu8({16}, 0))); + REQUIRE(is_partial_permutation(Epu8({}, 0xff))); + REQUIRE(!is_partial_permutation(Epu8({2, 0xff, 3}, 0))); + REQUIRE(is_partial_permutation(Epu8({2, 0xff, 3}, 0xff))); + + REQUIRE(!is_partial_permutation(zero, 15)); + REQUIRE(is_partial_permutation(Pa)); + REQUIRE(is_partial_permutation(Pa, 6)); + REQUIRE(is_partial_permutation(Pa, 5)); + REQUIRE(!is_partial_permutation(Pa, 4)); + REQUIRE(!is_partial_permutation(Pa, 1)); + REQUIRE(!is_partial_permutation(Pa, 0)); + + REQUIRE(is_partial_permutation(RP)); + REQUIRE(is_partial_permutation(RP, 16)); + REQUIRE(!is_partial_permutation(RP, 15)); + + REQUIRE(is_partial_permutation( + epu8{1, 2, 0xFF, 0xFF, 0, 5, 0xFF, 3, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(!is_partial_permutation( + epu8{1, 2, 1, 0xFF, 0, 5, 0xFF, 2, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); + REQUIRE(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); +} + +TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { + REQUIRE(!is_permutation(zero)); + REQUIRE(!is_permutation(P01)); + REQUIRE(!is_permutation(P10)); + REQUIRE(!is_permutation(Epu8({16}, 0))); + REQUIRE(!is_permutation(Epu8({}, 0xff))); + REQUIRE(!is_permutation(Epu8({2, 0xff, 3}, 0))); + + REQUIRE(!is_permutation(zero, 15)); + REQUIRE(is_permutation(Pa)); + REQUIRE(is_permutation(Pa, 6)); + REQUIRE(is_permutation(Pa, 5)); + REQUIRE(!is_permutation(Pa, 4)); + REQUIRE(!is_permutation(Pa, 1)); + REQUIRE(!is_permutation(Pa, 0)); + + REQUIRE(is_permutation(RP)); + REQUIRE(is_permutation(RP, 16)); + REQUIRE(!is_permutation(RP, 15)); +} +} // namespace HPCombi diff --git a/tests/test_main.cpp b/tests/test_main.cpp new file mode 100644 index 00000000..b1673b8f --- /dev/null +++ b/tests/test_main.cpp @@ -0,0 +1,38 @@ +//****************************************************************************// +// Copyright (C) 2023 James D. Mitchell // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//****************************************************************************// + +#include + +#include +#include +#include + +struct HPCombiListener : Catch::EventListenerBase { + using EventListenerBase::EventListenerBase; // inherit constructor + + void testCaseStarting(Catch::TestCaseInfo const &testInfo) override { + std::cout << testInfo.tagsAsString() << " " << testInfo.name + << std::endl; + } + void testCaseEnded(Catch::TestCaseStats const &testInfo) override {} + void sectionStarting(Catch::SectionInfo const §ionStats) override {} + void sectionEnded(Catch::SectionStats const §ionStats) override {} + void testCasePartialStarting(Catch::TestCaseInfo const &testInfo, + uint64_t partNumber) override {} + void testCasePartialEnded(Catch::TestCaseStats const &testCaseStats, + uint64_t partNumber) override {} +}; + +CATCH_REGISTER_LISTENER(HPCombiListener) diff --git a/tests/test_main.hpp b/tests/test_main.hpp new file mode 100644 index 00000000..6328ba80 --- /dev/null +++ b/tests/test_main.hpp @@ -0,0 +1,42 @@ +//****************************************************************************// +// Copyright (C) 2023 James D. Mitchell // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//****************************************************************************// + +#ifndef HPCOMBI_TESTS_TEST_MAIN_HPP_ +#define HPCOMBI_TESTS_TEST_MAIN_HPP_ + +#define TEST_AGREES(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (type p : vct) { \ + REQUIRE(p.fun() == p.ref()); \ + } \ + } + +#define TEST_AGREES2(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (type p1 : vct) { \ + for (type p2 : vct) { \ + REQUIRE(p1.fun(p2) == p1.ref(p2)); \ + } \ + } \ + } + +#define TEST_AGREES_EPU8(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (type p : vct) { \ + REQUIRE(equal(p.fun(), p.ref())); \ + } \ + } + +#endif // HPCOMBI_TESTS_TEST_MAIN_HPP_ diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index a976887d..9acea11c 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -13,51 +13,29 @@ /* http://www.gnu.org/licenses/ */ /******************************************************************************/ -#define BOOST_TEST_MODULE Perm16Tests - #include "perm16.hpp" -#include - -using HPCombi::epu8; -using HPCombi::equal; -using HPCombi::Epu8; -using HPCombi::is_partial_transformation; -using HPCombi::is_transformation; -using HPCombi::is_permutation; -using HPCombi::PTransf16; -using HPCombi::Transf16; -using HPCombi::PPerm16; -using HPCombi::Perm16; +#include "test_main.hpp" +#include +namespace HPCombi { const uint8_t FF = 0xff; -#define EPU8_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(equal, (p1)(p2)) -#define EPU8_NOT_EQUAL(p1, p2) BOOST_CHECK_PREDICATE(boost::not2(equal), (p1)(p2)) - -#define TEST_AGREES(type, ref, fun, vct) \ - BOOST_FIXTURE_TEST_CASE(type##_agrees_##fun, Fix) { \ - for (type p : vct) BOOST_TEST(p.fun() == p.ref()); \ - } -#define TEST_EPU8_AGREES(type, ref, fun, vct) \ - BOOST_FIXTURE_TEST_CASE(type##_agrees_##fun, Fix) { \ - for (type p : vct) EPU8_EQUAL(p.fun(), p.ref()); \ - } - -std::vector all_perms(uint8_t sz){ - std::vector res {}; +namespace { +std::vector all_perms(uint8_t sz) { + std::vector res{}; epu8 x = HPCombi::epu8id; res.push_back(x); - auto & refx = HPCombi::as_array(x); - while (std::next_permutation(refx.begin(), refx.begin()+sz)) { + auto &refx = HPCombi::as_array(x); + while (std::next_permutation(refx.begin(), refx.begin() + sz)) { res.push_back(x); } return res; }; std::vector all_pperms(std::vector perms, - std::vector masks){ - std::vector res {}; + std::vector masks) { + std::vector res{}; for (epu8 mask : masks) { for (Perm16 p : perms) { res.push_back(p.v | mask); @@ -66,28 +44,21 @@ std::vector all_pperms(std::vector perms, return res; } -struct Fix { - Fix() : zero(Epu8({}, 0)), - P01(Epu8({0, 1}, 0)), - P10(Epu8({1, 0}, 0)), - P11(Epu8({1, 1}, 0)), - P1(Epu8({}, 1)), - RandT({3, 1, 0, 14, 15, 13, 5, 10, 2, 11, 6, 12, 7, 4, 8, 9}), - PPa({1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - PPb({1, 2, 3, 6, 0, 5, 4, 7, 8, 9, 10, 11, 12, 15, 14, 13}), - RandPerm(RandT), - Tlist({zero, P01, P10, P11, P1, RandT, epu8(PPa), epu8(PPb)}), - PlistSmall(all_perms(6)), Plist(all_perms(9)), - PPmasks({ - Epu8(0), Epu8(FF), Epu8({0}, FF), Epu8({0, 0}, FF), - Epu8({0, FF, 0}, FF), Epu8({0, FF, 0}, 0), - Epu8({0, FF, 0, FF, 0, 0, 0, FF, FF}, 0) - }), - PPlist(all_pperms(PlistSmall, PPmasks)) - { - BOOST_TEST_MESSAGE("setup fixture"); - } - ~Fix() { BOOST_TEST_MESSAGE("teardown fixture"); } +struct Perm16Fixture { + Perm16Fixture() + : zero(Epu8({}, 0)), P01(Epu8({0, 1}, 0)), P10(Epu8({1, 0}, 0)), + P11(Epu8({1, 1}, 0)), P1(Epu8({}, 1)), + RandT({3, 1, 0, 14, 15, 13, 5, 10, 2, 11, 6, 12, 7, 4, 8, 9}), + PPa({1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), + PPb({1, 2, 3, 6, 0, 5, 4, 7, 8, 9, 10, 11, 12, 15, 14, 13}), + RandPerm(RandT), + Tlist({zero, P01, P10, P11, P1, RandT, epu8(PPa), epu8(PPb)}), + PlistSmall(all_perms(6)), Plist(all_perms(9)), + PPmasks({Epu8(0), Epu8(FF), Epu8({0}, FF), Epu8({0, 0}, FF), + Epu8({0, FF, 0}, FF), Epu8({0, FF, 0}, 0), + Epu8({0, FF, 0, FF, 0, 0, 0, FF, FF}, 0)}), + PPlist(all_pperms(PlistSmall, PPmasks)) {} + ~Perm16Fixture() = default; const Transf16 zero, P01, P10, P11, P1, RandT; const Perm16 PPa, PPb, RandPerm; @@ -96,424 +67,442 @@ struct Fix { const std::vector PPmasks; const std::vector PPlist; }; +} // namespace - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(PTransf16_test) -//****************************************************************************// - -BOOST_AUTO_TEST_CASE(PTransf16_constructor) { +TEST_CASE("PTransf16::PTransf16", "[PTransf16][000]") { const uint8_t FF = 0xff; - BOOST_TEST(PTransf16({}) == PTransf16::one()); - BOOST_TEST(PTransf16({0,1,2,3}) == PTransf16::one()); - BOOST_TEST(PTransf16({1,0}) == PTransf16({1,0,2})); - BOOST_TEST(PTransf16({2}) == PTransf16({2,1,2})); - BOOST_TEST(PTransf16({4, 5, 0}, {9, 0, 1}) == - PTransf16({ 1,FF,FF,FF, 9, 0,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF})); - BOOST_TEST(PTransf16({4, 5, 0, 8}, {9, 0, 1, 2}) == - PTransf16({ 1,FF,FF,FF, 9, 0,FF,FF,2,FF,FF,FF,FF,FF,FF,FF})); - BOOST_TEST(PTransf16({4, 5, 0, 8}, {9, 0, 2, 2}) == - PTransf16({ 2,FF,FF,FF, 9, 0,FF,FF,2,FF,FF,FF,FF,FF,FF,FF})); -} - - -BOOST_AUTO_TEST_CASE(PTransf16_hash) { - BOOST_TEST(std::hash()(PTransf16::one()) != 0); - BOOST_TEST(std::hash()(PTransf16(Epu8(1))) != 0); - BOOST_TEST(std::hash()(PTransf16({4, 5, 0}, {9, 0, 1})) != 0); -} - - -BOOST_AUTO_TEST_CASE(PTransf16_image_mask) { - EPU8_EQUAL(PTransf16({}).image_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16({}).image_mask(false), Epu8(FF)); - EPU8_EQUAL(PTransf16({}).image_mask(true), Epu8(0)); - EPU8_EQUAL(PTransf16({4,4,4,4}).image_mask(), Epu8({0,0,0,0}, FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).image_mask(false), Epu8({0,0,0,0}, FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).image_mask(true), Epu8({FF,FF,FF,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8(1)).image_mask(), Epu8({0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8(2)).image_mask(), Epu8({0,0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({2,2,2,0xf},2)).image_mask(), - Epu8({0,0,FF,0,0,0,0,0,0,0,0,0,0,0,0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).image_mask(), - Epu8({FF,0,FF,0,0,FF,0,0,0,0,0,0,0,0,0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).image_mask(false), - Epu8({FF,0,FF,0,0,FF,0,0,0,0,0,0,0,0,0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).image_mask(true), - Epu8({0,FF,0,FF,FF,0,FF,FF,FF,FF,FF,FF,FF,FF,FF,0}, 0)); -} - -BOOST_AUTO_TEST_CASE(PTransf16_left_one) { - BOOST_TEST(PTransf16({}).left_one() == PTransf16::one()); - BOOST_TEST(PTransf16({4,4,4,4}).left_one() == PTransf16({FF,FF,FF,FF})); - BOOST_TEST(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF,1}, FF))); - BOOST_TEST(PTransf16(Epu8(2)).left_one() == PTransf16(Epu8({FF,FF,2}, FF))); - BOOST_TEST(PTransf16(Epu8({2,2,2,0xf},2)).left_one() == - PTransf16({FF,FF,2,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,15})); - BOOST_TEST(PTransf16(Epu8({FF,2,2,0xf},FF)).left_one() == - PTransf16({FF,FF,2,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,15})); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).left_one() == - PTransf16({0,FF,2,FF,FF,5,FF,FF,FF,FF,FF,FF,FF,FF,FF,15})); - BOOST_TEST(PTransf16(Epu8({0,2,FF,0xf,2,FF,2,FF,5}, FF)).left_one() == - PTransf16({0,FF,2,FF,FF,5,FF,FF,FF,FF,FF,FF,FF,FF,FF,15})); -} - -BOOST_AUTO_TEST_CASE(PTransf16_domain_mask) { - EPU8_EQUAL(PTransf16({}).domain_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).domain_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).domain_mask(false), Epu8(FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).domain_mask(true), Epu8(0)); - EPU8_EQUAL(PTransf16(Epu8(1)).domain_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16(Epu8(2)).domain_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16(Epu8({2,2,2,0xf}, FF)).domain_mask(), - Epu8({FF,FF,FF,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({FF,2,2,0xf},FF)).domain_mask(), - Epu8({0, FF, FF, FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,FF,0xf,2,FF,2,FF,5}, FF)).domain_mask(), - Epu8({FF,FF,0,FF,FF,0,FF,0,FF},0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,FF,0xf,2,FF,2,FF,5}, FF)).domain_mask(false), - Epu8({FF,FF,0,FF,FF,0,FF,0,FF},0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,FF,0xf,2,FF,2,FF,5}, FF)).domain_mask(true), - Epu8({0,0,FF,0,0,FF, 0,FF,0},FF)); -} - -BOOST_AUTO_TEST_CASE(PTransf16_right_one) { - BOOST_TEST(PTransf16({}).right_one() == PTransf16::one()); - BOOST_TEST(PTransf16({4,4,4,4}).right_one() == PTransf16::one()); - BOOST_TEST(PTransf16(Epu8(1)).right_one() == PTransf16::one()); - BOOST_TEST(PTransf16(Epu8(2)).right_one() == PTransf16::one()); - BOOST_TEST(PTransf16(Epu8({2,2,2,0xf}, FF)).right_one() == - PTransf16(Epu8({0,1,2,3}, FF))); - BOOST_TEST(PTransf16(Epu8({FF,2,2,0xf},FF)).right_one() == - PTransf16({FF, 1, 2, 3,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF})); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).right_one() == - PTransf16::one()); - BOOST_TEST(PTransf16(Epu8({0,2,FF,0xf,2,FF,2,FF,5}, FF)).right_one() == - PTransf16({0,1,FF,3,4,FF, 6,FF,8,FF,FF,FF,FF,FF,FF,FF})); -} - - -BOOST_AUTO_TEST_CASE(PTransf16_rank_ref) { - BOOST_TEST(PTransf16({}).rank_ref() == 16); - BOOST_TEST(PTransf16({4,4,4,4}).rank() == 12); - BOOST_TEST(PTransf16({1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}).rank_ref() == 1); - BOOST_TEST(PTransf16({2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}).rank_ref() == 1); - BOOST_TEST(PTransf16({2,2,2,0xf,2,2,2,2,2,2,2,2,2,2,2,2}).rank_ref() == 2); - BOOST_TEST(PTransf16({0,2,2,0xf,2,2,2,2,5,2,2,2,2,2,2,2}).rank_ref() == 4); - BOOST_TEST(PTransf16({1,1,1,FF,1,1,FF,1,1,FF,1,FF,1,1,1,1}).rank_ref() == 1); - BOOST_TEST(PTransf16({2,2,2,2,2,FF,2,2,2,FF,2,2,2,FF,2,2}).rank_ref() == 1); - BOOST_TEST(PTransf16({2,2,2,0xf,2,FF,2,2,2,2,2,2,2,2,2,2}).rank_ref() == 2); - BOOST_TEST(PTransf16({0,2,2,0xf,2,2,FF,2,5,2,FF,2,2,2,2,2}).rank_ref() == 4); -} - -BOOST_AUTO_TEST_CASE(PTransf16_rank) { - BOOST_TEST(PTransf16({}).rank() == 16); - BOOST_TEST(PTransf16({4,4,4,4}).rank() == 12); - BOOST_TEST(PTransf16({1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}).rank() == 1); - BOOST_TEST(PTransf16({2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2}).rank() == 1); - BOOST_TEST(PTransf16({2,2,2,0xf,2,2,2,2,2,2,2,2,2,2,2,2}).rank() == 2); - BOOST_TEST(PTransf16({0,2,2,0xf,2,2,2,2,5,2,2,2,2,2,2,2}).rank() == 4); -} - -BOOST_AUTO_TEST_CASE(PTransf16_fix_points_mask) { - EPU8_EQUAL(PTransf16({}).fix_points_mask(), Epu8(FF)); - EPU8_EQUAL(PTransf16({}).fix_points_mask(false), Epu8(FF)); - EPU8_EQUAL(PTransf16({}).fix_points_mask(true), Epu8(0)); - EPU8_EQUAL(PTransf16({4,4,4,4}).fix_points_mask(), Epu8({0,0,0,0}, FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).fix_points_mask(false), Epu8({0,0,0,0}, FF)); - EPU8_EQUAL(PTransf16({4,4,4,4}).fix_points_mask(true), Epu8({FF,FF,FF,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8(1)).fix_points_mask(), Epu8({0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8(2)).fix_points_mask(), Epu8({0,0,FF}, 0)); - EPU8_EQUAL(PTransf16(Epu8({2,2,2,0xf},7)).fix_points_mask(), - Epu8({0,0,FF,0,0,0,0,FF,0,0,0,0,0,0,0,0}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,14,5,2}, 2)).fix_points_mask(), - Epu8({FF,0,FF,0,0,0,0,0,0,0,0,0,0,0,0,0}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,8,2}, 14)).fix_points_mask(false), - Epu8({FF,0,FF,0,0,0,0,0,FF,0,0,0,0,0,FF,0}, 0)); - EPU8_EQUAL(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).fix_points_mask(true), - Epu8({0,FF,0},FF)); -} -BOOST_AUTO_TEST_CASE(PTransf16_fix_points_bitset) { - BOOST_TEST(PTransf16({}).fix_points_bitset() == 0xFFFF); - BOOST_TEST(PTransf16({}).fix_points_bitset(false) == 0xFFFF); - BOOST_TEST(PTransf16({}).fix_points_bitset(true) == 0); - BOOST_TEST(PTransf16({4,4,4,4}).fix_points_bitset() == 0xFFF0); - BOOST_TEST(PTransf16({4,4,4,4}).fix_points_bitset(false) == 0xFFF0); - BOOST_TEST(PTransf16({4,4,4,4}).fix_points_bitset(true) == 0x000F); - BOOST_TEST(PTransf16(Epu8(1)).fix_points_bitset() == 0x0002); - BOOST_TEST(PTransf16(Epu8(2)).fix_points_bitset() == 0x0004); - BOOST_TEST(PTransf16(Epu8({2,2,2,0xf},7)).fix_points_bitset() == 0x0084); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,14,5,2}, 2)).fix_points_bitset() - == 0x5); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,8,2}, 14)).fix_points_bitset(false) - == 0x4105); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,5,2}, 2)).fix_points_bitset(true) - == 0xFFFA); -} -BOOST_AUTO_TEST_CASE(PTransf16_nb_fix_points) { - BOOST_TEST(PTransf16({}).nb_fix_points() == 16); - BOOST_TEST(PTransf16({4,4,4,4}).nb_fix_points() == 12); - BOOST_TEST(PTransf16(Epu8(1)).nb_fix_points() == 1); - BOOST_TEST(PTransf16(Epu8(2)).nb_fix_points() == 1); - BOOST_TEST(PTransf16(Epu8({2,2,2,0xf},7)).nb_fix_points() == 2); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,14,5,2}, 2)).nb_fix_points() - == 2); - BOOST_TEST(PTransf16(Epu8({0,2,2,0xf,2,2,2,2,8,2}, 14)).nb_fix_points() - == 4); - -} - -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(Transf16_test) -//****************************************************************************// - -BOOST_FIXTURE_TEST_CASE(Transf16OperatorUInt64, Fix) { - BOOST_TEST(static_cast(Transf16::one()) == 0xf7e6d5c4b3a29180); - BOOST_TEST(static_cast(zero) == 0x0); - BOOST_TEST(static_cast(P10) == 0x1); - BOOST_TEST(static_cast(P01) == 0x100); - BOOST_TEST(static_cast(P11) == 0x101); - BOOST_TEST(static_cast(P1) == 0x1111111111111111); - BOOST_TEST(static_cast(RandT) == 0x9a854d7fce60b123); -} - -BOOST_FIXTURE_TEST_CASE(Transf16ConstrUInt64, Fix) { - BOOST_TEST(static_cast(0x0) == zero); - BOOST_TEST(static_cast(0x1) == P10); - BOOST_TEST(static_cast(0x100) == P01); - for (auto p : Tlist) - BOOST_TEST(static_cast(static_cast(p)) == p); + REQUIRE(PTransf16({}) == PTransf16::one()); + REQUIRE(PTransf16({0, 1, 2, 3}) == PTransf16::one()); + REQUIRE(PTransf16({1, 0}) == PTransf16({1, 0, 2})); + REQUIRE(PTransf16({2}) == PTransf16({2, 1, 2})); + REQUIRE(PTransf16({4, 5, 0}, {9, 0, 1}) == + PTransf16( + {1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); + REQUIRE(PTransf16({4, 5, 0, 8}, {9, 0, 1, 2}) == + PTransf16( + {1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); + REQUIRE(PTransf16({4, 5, 0, 8}, {9, 0, 2, 2}) == + PTransf16( + {2, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); +} + +TEST_CASE("PTransf16::hash", "[PTransf16][001]") { + REQUIRE(std::hash()(PTransf16::one()) != 0); + REQUIRE(std::hash()(PTransf16(Epu8(1))) != 0); + REQUIRE(std::hash()(PTransf16({4, 5, 0}, {9, 0, 1})) != 0); +} + +// TODO uncomment +// TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { +// REQUIRE(equal(PTransf16({}).image_mask(), Epu8(FF)); +// REQUIRE(equal(PTransf16({}).image_mask(false), Epu8(FF)); +// REQUIRE(equal(PTransf16({}).image_mask(true), Epu8(0)); +// REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(), Epu8({0, 0, 0, 0}, +// FF)); REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(false), +// Epu8({0, 0, 0, 0}, FF)); +// REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(true), +// Epu8({FF, FF, FF, FF}, 0)); +// REQUIRE(equal(PTransf16(Epu8(1)).image_mask(), Epu8({0, FF}, 0)); +// REQUIRE(equal(PTransf16(Epu8(2)).image_mask(), Epu8({0, 0, FF}, 0)); +// REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), +// Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0)); +// REQUIRE(equal( +// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(), +// Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0)); +// REQUIRE(equal( +// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, +// 2)).image_mask(false), Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, +// 0, 0, 0, FF}, 0)); +// REQUIRE(equal( +// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, +// 2)).image_mask(true), Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, +// FF, FF, FF, FF, 0}, 0)); +// } + +// TODO uncomment +// TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { +// REQUIRE(PTransf16({}).left_one() == PTransf16::one()); +// REQUIRE(PTransf16({4, 4, 4, 4}).left_one() == +// PTransf16({FF, FF, FF, FF})); +// REQUIRE(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, +// FF))); REQUIRE(PTransf16(Epu8(2)).left_one() == +// PTransf16(Epu8({FF, FF, 2}, FF))); +// REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == +// PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, +// FF, +// FF, 15})); +// REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == +// PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, +// FF, +// FF, 15})); +// REQUIRE( +// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == +// PTransf16( +// {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15 })); +// REQUIRE( +// PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == +// PTransf16( +// {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15 })); +// } + +TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { + REQUIRE(equal(PTransf16({}).domain_mask(), Epu8(FF))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(), Epu8(FF))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(false), Epu8(FF))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(true), Epu8(0))); + REQUIRE(equal(PTransf16(Epu8(1)).domain_mask(), Epu8(FF))); + REQUIRE(equal(PTransf16(Epu8(2)).domain_mask(), Epu8(FF))); + REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), + Epu8({FF, FF, FF, FF}, 0))); + REQUIRE(equal(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), + Epu8({0, FF, FF, FF}, 0))); + REQUIRE(equal( + PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(), + Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); + REQUIRE(equal(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) + .domain_mask(false), + Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); + REQUIRE(equal( + PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(true), + Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); +} + +TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { + REQUIRE(PTransf16({}).right_one() == PTransf16::one()); + REQUIRE(PTransf16({4, 4, 4, 4}).right_one() == PTransf16::one()); + REQUIRE(PTransf16(Epu8(1)).right_one() == PTransf16::one()); + REQUIRE(PTransf16(Epu8(2)).right_one() == PTransf16::one()); + REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).right_one() == + PTransf16(Epu8({0, 1, 2, 3}, FF))); + REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).right_one() == + PTransf16( + {FF, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); + REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).right_one() == + PTransf16::one()); + REQUIRE( + PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).right_one() == + PTransf16({0, 1, FF, 3, 4, FF, 6, FF, 8, FF, FF, FF, FF, FF, FF, FF})); +} + +TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { + REQUIRE(PTransf16({}).rank_ref() == 16); + REQUIRE(PTransf16({4, 4, 4, 4}).rank_ref() == 12); + REQUIRE(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) + .rank_ref() == 1); + REQUIRE(PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + .rank_ref() == 1); + REQUIRE(PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + .rank_ref() == 2); + REQUIRE(PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}) + .rank_ref() == 4); + REQUIRE(PTransf16({1, 1, 1, FF, 1, 1, FF, 1, 1, FF, 1, FF, 1, 1, 1, 1}) + .rank_ref() == 1); + REQUIRE(PTransf16({2, 2, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2}) + .rank_ref() == 1); + REQUIRE(PTransf16({2, 2, 2, 0xf, 2, FF, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + .rank_ref() == 2); + REQUIRE(PTransf16({0, 2, 2, 0xf, 2, 2, FF, 2, 5, 2, FF, 2, 2, 2, 2, 2}) + .rank_ref() == 4); +} + +// TODO uncomment +// TEST_CASE("PTransf16::rank", "[PTransf16][007]") { +// REQUIRE(PTransf16({}).rank() == 16); +// REQUIRE(PTransf16({4, 4, 4, 4}).rank() == 12); +// REQUIRE( +// PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == +// 1); +// REQUIRE( +// PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == +// 1); +// REQUIRE( +// PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() +// == 2); +// REQUIRE( +// PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() +// == 4); +// } + +TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { + REQUIRE(equal(PTransf16({}).fix_points_mask(), Epu8(FF))); + REQUIRE(equal(PTransf16({}).fix_points_mask(false), Epu8(FF))); + REQUIRE(equal(PTransf16({}).fix_points_mask(true), Epu8(0))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(), + Epu8({0, 0, 0, 0}, FF))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(false), + Epu8({0, 0, 0, 0}, FF))); + REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(true), + Epu8({FF, FF, FF, FF}, 0))); + REQUIRE(equal(PTransf16(Epu8(1)).fix_points_mask(), Epu8({0, FF}, 0))); + REQUIRE(equal(PTransf16(Epu8(2)).fix_points_mask(), Epu8({0, 0, FF}, 0))); + REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_mask(), + Epu8({0, 0, FF, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); + REQUIRE(equal( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)).fix_points_mask(), + Epu8({FF, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); + REQUIRE( + equal(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) + .fix_points_mask(false), + Epu8({FF, 0, FF, 0, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, FF, 0}, 0))); + REQUIRE(equal(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .fix_points_mask(true), + Epu8({0, FF, 0}, FF))); +} + +TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { + REQUIRE(PTransf16({}).fix_points_bitset() == 0xFFFF); + REQUIRE(PTransf16({}).fix_points_bitset(false) == 0xFFFF); + REQUIRE(PTransf16({}).fix_points_bitset(true) == 0); + REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset() == 0xFFF0); + REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset(false) == 0xFFF0); + REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset(true) == 0x000F); + REQUIRE(PTransf16(Epu8(1)).fix_points_bitset() == 0x0002); + REQUIRE(PTransf16(Epu8(2)).fix_points_bitset() == 0x0004); + REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_bitset() == 0x0084); + REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)) + .fix_points_bitset() == 0x5); + REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) + .fix_points_bitset(false) == 0x4105); + REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .fix_points_bitset(true) == 0xFFFA); +} + +TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][010]") { + REQUIRE(PTransf16({}).nb_fix_points() == 16); + REQUIRE(PTransf16({4, 4, 4, 4}).nb_fix_points() == 12); + REQUIRE(PTransf16(Epu8(1)).nb_fix_points() == 1); + REQUIRE(PTransf16(Epu8(2)).nb_fix_points() == 1); + REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).nb_fix_points() == 2); + REQUIRE( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)).nb_fix_points() == + 2); + REQUIRE( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)).nb_fix_points() == + 4); +} + +TEST_CASE_METHOD(Perm16Fixture, "Transf16::operator uint64", + "[Transf16][011]") { + REQUIRE(static_cast(Transf16::one()) == 0xf7e6d5c4b3a29180); + REQUIRE(static_cast(zero) == 0x0); + REQUIRE(static_cast(P10) == 0x1); + REQUIRE(static_cast(P01) == 0x100); + REQUIRE(static_cast(P11) == 0x101); + REQUIRE(static_cast(P1) == 0x1111111111111111); + REQUIRE(static_cast(RandT) == 0x9a854d7fce60b123); +} + +TEST_CASE_METHOD(Perm16Fixture, "Transf16::Transf16(uint64_t)", + "[Transf16][012]") { + REQUIRE(static_cast(0x0) == zero); + REQUIRE(static_cast(0x1) == P10); + REQUIRE(static_cast(0x100) == P01); + for (auto p : Tlist) { + REQUIRE(static_cast(static_cast(p)) == p); + } } -BOOST_FIXTURE_TEST_CASE(Transf16_hash, Fix) { - BOOST_TEST(std::hash()(Transf16::one()) != 0); - BOOST_TEST(std::hash()(Transf16(Epu8(1))) != 0); - BOOST_TEST(std::hash()(RandT) != 0); +TEST_CASE_METHOD(Perm16Fixture, "Transf16::hash", "[Transf16][013]") { + REQUIRE(std::hash()(Transf16::one()) != 0); + REQUIRE(std::hash()(Transf16(Epu8(1))) != 0); + REQUIRE(std::hash()(RandT) != 0); } -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(Perm16_constr) -//****************************************************************************// - -BOOST_FIXTURE_TEST_CASE(Perm16OperatorUInt64, Fix) { - BOOST_TEST(static_cast(Perm16::one()) == 0xf7e6d5c4b3a29180); - BOOST_TEST(static_cast(PPa) == 0xf7e6d5c0b4a39281); - BOOST_TEST(static_cast(PPb) == 0xd7e4f5c0b6a39281); - BOOST_TEST(static_cast(RandPerm) == 0x9a854d7fce60b123); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::operator uint64_t", "[Perm16][014]") { + REQUIRE(static_cast(Perm16::one()) == 0xf7e6d5c4b3a29180); + REQUIRE(static_cast(PPa) == 0xf7e6d5c0b4a39281); + REQUIRE(static_cast(PPb) == 0xd7e4f5c0b6a39281); + REQUIRE(static_cast(RandPerm) == 0x9a854d7fce60b123); - for (auto p : { Perm16::one(), PPa, PPb, RandPerm }) - BOOST_TEST(static_cast(static_cast(p)) == p); + for (auto p : {Perm16::one(), PPa, PPb, RandPerm}) { + REQUIRE(static_cast(static_cast(p)) == p); + } } - -BOOST_AUTO_TEST_CASE(Perm16TestEq) { - BOOST_TEST(Perm16::one() * Perm16::one() == Perm16::one()); +TEST_CASE("Perm::operator==", "[Perm16][015]") { + REQUIRE(Perm16::one() * Perm16::one() == Perm16::one()); } -BOOST_FIXTURE_TEST_CASE(Perm16_hash, Fix) { - BOOST_TEST(std::hash()(Transf16::one()) != 0); - BOOST_TEST(std::hash()(PPa) != 0); - BOOST_TEST(std::hash()(RandPerm) != 0); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::hash", "[Perm16][016]") { + REQUIRE(std::hash()(Transf16::one()) != 0); + REQUIRE(std::hash()(PPa) != 0); + REQUIRE(std::hash()(RandPerm) != 0); } -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - - -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(PPerm16_test) -//****************************************************************************// - -BOOST_AUTO_TEST_CASE(PPerm16_constructor) { +TEST_CASE("PPerm16::PPerm16", "[PPerm16][017]") { const uint8_t FF = 0xff; - BOOST_TEST(PPerm16({4, 5, 0}, {9, 0, 1}) == - PPerm16({ 1,FF,FF,FF, 9, 0,FF,FF,FF,FF,FF,FF,FF,FF,FF,FF})); - BOOST_TEST(PPerm16({4, 5, 0, 8}, {9, 0, 1, 2}) == - PPerm16({ 1,FF,FF,FF, 9, 0,FF,FF,2,FF,FF,FF,FF,FF,FF,FF})); -} - -BOOST_AUTO_TEST_CASE(PPerm16_hash) { - BOOST_TEST(std::hash()(PPerm16::one()) != 0); - BOOST_TEST(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); -} - - -BOOST_FIXTURE_TEST_CASE(PPerm16_left_one, Fix) { - BOOST_TEST(PPerm16({}).left_one() == PPerm16::one()); - BOOST_TEST(PPerm16({FF,FF,FF,4}).left_one() == PPerm16({FF,FF,FF,FF})); - BOOST_TEST(PPerm16({FF,4,FF,FF}).left_one() == PPerm16({FF,FF,FF,FF})); + REQUIRE( + PPerm16({4, 5, 0}, {9, 0, 1}) == + PPerm16({1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); + REQUIRE( + PPerm16({4, 5, 0, 8}, {9, 0, 1, 2}) == + PPerm16({1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); +} + +TEST_CASE("PPerm16::hash", "[PPerm16][018]") { + REQUIRE(std::hash()(PPerm16::one()) != 0); + REQUIRE(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); +} +// TODO uncomment +// TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { +// REQUIRE(PPerm16({}).left_one() == PPerm16::one()); +// REQUIRE(PPerm16({FF, FF, FF, 4}).left_one() == +// PPerm16({FF, FF, FF, FF})); +// REQUIRE(PPerm16({FF, 4, FF, FF}).left_one() == +// PPerm16({FF, FF, FF, FF})); +// for (auto pp : PPlist) { +// REQUIRE(pp.left_one() * pp == pp); +// } +// } + +TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { + REQUIRE(PPerm16({}).right_one() == PPerm16::one()); + REQUIRE(PPerm16({FF, FF, FF, 4}).right_one() == PPerm16({FF, FF, FF})); + REQUIRE(PPerm16({FF, 4, FF, FF}).right_one() == PPerm16({FF, 1, FF, FF})); for (auto pp : PPlist) { - BOOST_TEST(pp.left_one() * pp == pp); + REQUIRE(pp * pp.right_one() == pp); } } +// TODO uncomment +// TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, +// "[PPerm16][021]"); + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { + REQUIRE(equal(PTransf16::one().fix_points_mask(), Epu8(FF))); + REQUIRE(equal(Perm16::one().fix_points_mask(), Epu8(FF))); + REQUIRE(equal(PPa.fix_points_mask(), Epu8({0, 0, 0, 0, 0}, FF))); + REQUIRE(equal(PPb.fix_points_mask(), (epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, + FF, FF, FF, 0, FF, 0}))); + REQUIRE(equal(RandPerm.fix_points_mask(), Epu8({0, FF}, 0))); + + REQUIRE(equal(Perm16::one().fix_points_mask(false), Epu8(FF))); + REQUIRE(equal(PPa.fix_points_mask(false), Epu8({0, 0, 0, 0, 0}, FF))); + REQUIRE( + equal(PPb.fix_points_mask(false), + (epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0}))); + REQUIRE(equal(RandPerm.fix_points_mask(false), Epu8({0, FF}, 0))); + + REQUIRE(equal(Perm16::one().fix_points_mask(true), Epu8(0))); + REQUIRE(equal(PPa.fix_points_mask(true), Epu8({FF, FF, FF, FF, FF}, 0))); + REQUIRE(equal(PPb.fix_points_mask(true), (epu8{FF, FF, FF, FF, FF, 0, FF, 0, + 0, 0, 0, 0, 0, FF, 0, FF}))); + REQUIRE(equal(RandPerm.fix_points_mask(true), Epu8({FF, 0}, FF))); +} + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { + REQUIRE(Perm16::one().smallest_fix_point() == 0); + REQUIRE(PPa.smallest_fix_point() == 5); + REQUIRE(PPb.smallest_fix_point() == 5); + REQUIRE(RandPerm.smallest_fix_point() == 1); +} + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_moved_point", + "[Perm16][024]") { + REQUIRE(Perm16::one().smallest_moved_point() == FF); + REQUIRE(PPa.smallest_moved_point() == 0); + REQUIRE(PPb.smallest_moved_point() == 0); + REQUIRE(RandPerm.smallest_moved_point() == 0); + REQUIRE(Perm16({0, 1, 3, 2}).smallest_moved_point() == 2); +} + +// TODO broken test +// TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") +// { +// REQUIRE(Perm16::one().largest_fix_point() == 15); +// REQUIRE(PPa.largest_fix_point() == 15); +// REQUIRE(PPb.largest_fix_point() == 14); +// REQUIRE(RandPerm.largest_fix_point() == 1); +// } + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { + REQUIRE(Perm16::one().nb_fix_points() == 16); + REQUIRE(PPa.nb_fix_points() == 11); + REQUIRE(PPb.nb_fix_points() == 8); + REQUIRE(RandPerm.nb_fix_points() == 1); + REQUIRE(Perm16({0, 1, 3, 2}).nb_fix_points() == 14); +} + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { + REQUIRE(PPa * PPa.inverse() == Perm16::one()); + REQUIRE(PPa.inverse() * PPa == Perm16::one()); + REQUIRE(PPb * PPb.inverse() == Perm16::one()); + REQUIRE(PPb.inverse() * PPb == Perm16::one()); + REQUIRE(RandPerm * RandPerm.inverse() == Perm16::one()); + REQUIRE(RandPerm.inverse() * RandPerm == Perm16::one()); -BOOST_FIXTURE_TEST_CASE(PPerm16_right_one, Fix) { - BOOST_TEST(PPerm16({}).right_one() == PPerm16::one()); - BOOST_TEST(PPerm16({FF,FF,FF,4}).right_one() == PPerm16({FF,FF,FF})); - BOOST_TEST(PPerm16({FF,4,FF,FF}).right_one() == PPerm16({FF,1,FF,FF})); - for (auto pp : PPlist) { - BOOST_TEST(pp * pp.right_one() == pp); - } -} - -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(PPerm16_inverse_ref, Fix) { - for (epu8 mask : PPmasks) { - for (Perm16 p : Plist) { - PPerm16 pp (p.v | mask); - PPerm16 pi = pp.inverse_ref(); - BOOST_TEST(pp * pi * pp == pp); - BOOST_TEST(pi * pp * pi == pi); - BOOST_TEST(pp.inverse_ref().inverse_ref() == pp); - } + for (Perm16 p : Plist) { + REQUIRE(p * p.inverse() == Perm16::one()); + REQUIRE(p.inverse() * p == Perm16::one()); } } -TEST_AGREES(PPerm16, inverse_ref, inverse_find, PPlist); - -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// - +// TODO uncomment +// TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_find, Plist, +// "[Perm16][028]"); +TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_pow, Plist, + "[Perm16][029]"); +TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_cycl, Plist, + "[Perm16][030]"); +TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse, Plist, + "[Perm16][031]"); -//****************************************************************************// -BOOST_AUTO_TEST_SUITE(Perm16_mathematical_methods) -//****************************************************************************// - -BOOST_FIXTURE_TEST_CASE(Perm16_fix_points_mask, Fix) { - EPU8_EQUAL(PTransf16::one().fix_points_mask(), Epu8(FF)); - EPU8_EQUAL(Perm16::one().fix_points_mask(), Epu8(FF)); - EPU8_EQUAL(PPa.fix_points_mask(), Epu8({0, 0, 0, 0, 0}, FF)); - EPU8_EQUAL(PPb.fix_points_mask(), - (epu8{ 0, 0, 0, 0, 0,FF, 0,FF,FF,FF,FF,FF,FF, 0,FF, 0})); - EPU8_EQUAL(RandPerm.fix_points_mask(), Epu8({0,FF}, 0)); - - EPU8_EQUAL(Perm16::one().fix_points_mask(false), Epu8(FF)); - EPU8_EQUAL(PPa.fix_points_mask(false), Epu8({0, 0, 0, 0, 0}, FF)); - EPU8_EQUAL(PPb.fix_points_mask(false), - (epu8{ 0, 0, 0, 0, 0,FF, 0,FF,FF,FF,FF,FF,FF, 0,FF, 0})); - EPU8_EQUAL(RandPerm.fix_points_mask(false), Epu8({0,FF}, 0)); - - EPU8_EQUAL(Perm16::one().fix_points_mask(true), Epu8(0)); - EPU8_EQUAL(PPa.fix_points_mask(true), Epu8({FF,FF,FF,FF,FF}, 0)); - EPU8_EQUAL(PPb.fix_points_mask(true), - (epu8{FF,FF,FF,FF,FF, 0,FF, 0, 0, 0, 0, 0, 0,FF, 0,FF})); - EPU8_EQUAL(RandPerm.fix_points_mask(true), Epu8({FF, 0}, FF)); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { + REQUIRE(equal(Perm16::one().lehmer(), zero)); + REQUIRE(equal(PPa.lehmer(), + (epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}))); + REQUIRE(equal(PPb.lehmer(), + (epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0}))); } -BOOST_FIXTURE_TEST_CASE(Perm16_smallest_fix_point, Fix) { - BOOST_TEST(Perm16::one().smallest_fix_point() == 0); - BOOST_TEST(PPa.smallest_fix_point() == 5); - BOOST_TEST(PPb.smallest_fix_point() == 5); - BOOST_TEST(RandPerm.smallest_fix_point() == 1); -} -BOOST_FIXTURE_TEST_CASE(Perm16_smallest_moved_point, Fix) { - BOOST_TEST(Perm16::one().smallest_moved_point() == FF); - BOOST_TEST(PPa.smallest_moved_point() == 0); - BOOST_TEST(PPb.smallest_moved_point() == 0); - BOOST_TEST(RandPerm.smallest_moved_point() == 0); - BOOST_TEST(Perm16({0,1,3,2}).smallest_moved_point() == 2); -} +TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer_arr, Plist, + "[Perm16][033]"); +TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer, Plist, + "[Perm16][034]"); -BOOST_FIXTURE_TEST_CASE(Perm16_largest_fix_point, Fix) { - BOOST_TEST(Perm16::one().largest_fix_point() == 15); - BOOST_TEST(PPa.largest_fix_point() == 15); - BOOST_TEST(PPb.largest_fix_point() == 14); - BOOST_TEST(RandPerm.largest_fix_point() == 1); -} -BOOST_FIXTURE_TEST_CASE(Perm16_nb_fix_points, Fix) { - BOOST_TEST(Perm16::one().nb_fix_points() == 16); - BOOST_TEST(PPa.nb_fix_points() == 11); - BOOST_TEST(PPb.nb_fix_points() == 8); - BOOST_TEST(RandPerm.nb_fix_points() == 1); - BOOST_TEST(Perm16({0,1,3,2}).nb_fix_points() == 14); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][035]") { + REQUIRE(Perm16::one().length() == 0); + REQUIRE(PPa.length() == 4); + REQUIRE(PPb.length() == 10); } -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_inverse_ref, Fix) { - BOOST_TEST(PPa * PPa.inverse() == Perm16::one()); - BOOST_TEST(PPa.inverse() * PPa == Perm16::one()); - BOOST_TEST(PPb * PPb.inverse() == Perm16::one()); - BOOST_TEST(PPb.inverse() * PPb == Perm16::one()); - BOOST_TEST(RandPerm * RandPerm.inverse() == Perm16::one()); - BOOST_TEST(RandPerm.inverse() * RandPerm == Perm16::one()); +TEST_AGREES(Perm16Fixture, Perm16, length_ref, length_arr, Plist, + "[Perm16][036]"); +TEST_AGREES(Perm16Fixture, Perm16, length_ref, length, Plist, "[Perm16][037]"); - for (Perm16 p : Plist) { - BOOST_TEST(p * p.inverse() == Perm16::one()); - BOOST_TEST(p.inverse() * p == Perm16::one()); - } -} -TEST_AGREES(Perm16, inverse_ref, inverse_arr, Plist); -TEST_AGREES(Perm16, inverse_ref, inverse_sort, Plist); -TEST_AGREES(Perm16, inverse_ref, inverse_find, Plist); -TEST_AGREES(Perm16, inverse_ref, inverse_pow, Plist); -TEST_AGREES(Perm16, inverse_ref, inverse_cycl, Plist); -TEST_AGREES(Perm16, inverse_ref, inverse, Plist); - - -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_lehmer_ref, Fix) { - EPU8_EQUAL(Perm16::one().lehmer(), zero); - EPU8_EQUAL(PPa.lehmer(), - (epu8 { 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); - EPU8_EQUAL(PPb.lehmer(), - (epu8 { 1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { + REQUIRE(Perm16::one().nb_descents_ref() == 0); + REQUIRE(PPa.nb_descents_ref() == 1); + REQUIRE(PPb.nb_descents_ref() == 4); + REQUIRE(Perm16::one().nb_descents() == 0); } -TEST_EPU8_AGREES(Perm16, lehmer_ref, lehmer_arr, Plist); -TEST_EPU8_AGREES(Perm16, lehmer_ref, lehmer, Plist); - -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_length_ref, Fix) { - BOOST_TEST(Perm16::one().length() == 0); - BOOST_TEST(PPa.length() == 4); - BOOST_TEST(PPb.length() == 10); -} -TEST_AGREES(Perm16, length_ref, length_arr, Plist); -TEST_AGREES(Perm16, length_ref, length, Plist); - -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_nb_descents_ref, Fix) { - BOOST_TEST(Perm16::one().nb_descents_ref() == 0); - BOOST_TEST(PPa.nb_descents_ref() == 1); - BOOST_TEST(PPb.nb_descents_ref() == 4); - BOOST_TEST(Perm16::one().nb_descents() == 0); -} -TEST_AGREES(Perm16, nb_descents_ref, nb_descents, Plist); -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_nb_cycles_ref, Fix) { - BOOST_TEST(Perm16::one().nb_cycles_ref() == 16); - BOOST_TEST(PPa.nb_cycles_ref() == 12); - BOOST_TEST(PPb.nb_cycles_ref() == 10); +TEST_AGREES(Perm16Fixture, Perm16, nb_descents_ref, nb_descents, Plist, + "[Perm16][039]"); + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][040]") { + REQUIRE(Perm16::one().nb_cycles_ref() == 16); + REQUIRE(PPa.nb_cycles_ref() == 12); + REQUIRE(PPb.nb_cycles_ref() == 10); } -TEST_AGREES(Perm16, nb_cycles_ref, nb_cycles, Plist); +TEST_AGREES(Perm16Fixture, Perm16, nb_cycles_ref, nb_cycles, Plist, + "[Perm16][041]"); -//****************************************************************************// -BOOST_FIXTURE_TEST_CASE(Perm16_left_weak_leq_ref, Fix) { - BOOST_TEST(Perm16::one().left_weak_leq_ref(Perm16::one())); - BOOST_TEST(Perm16::one().left_weak_leq_ref(PPa)); - BOOST_TEST(Perm16::one().left_weak_leq_ref(PPb)); - BOOST_TEST(PPa.left_weak_leq_ref(PPa)); - BOOST_TEST(PPb.left_weak_leq_ref(PPb)); +TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][042]") { + REQUIRE(Perm16::one().left_weak_leq_ref(Perm16::one())); + REQUIRE(Perm16::one().left_weak_leq_ref(PPa)); + REQUIRE(Perm16::one().left_weak_leq_ref(PPb)); + REQUIRE(PPa.left_weak_leq_ref(PPa)); + REQUIRE(PPb.left_weak_leq_ref(PPb)); } -BOOST_FIXTURE_TEST_CASE(Perm16_left_weak_leq, Fix) { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq", "[Perm16][043]") { for (auto u : PlistSmall) { for (auto v : PlistSmall) { - BOOST_TEST(u.left_weak_leq(v) == u.left_weak_leq_ref(v)); - BOOST_TEST(u.left_weak_leq_length(v) == u.left_weak_leq_ref(v)); + REQUIRE(u.left_weak_leq(v) == u.left_weak_leq_ref(v)); + REQUIRE(u.left_weak_leq_length(v) == u.left_weak_leq_ref(v)); } } } - -BOOST_AUTO_TEST_SUITE_END() -//****************************************************************************// +} // namespace HPCombi diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index 2267761c..9895e201 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -13,44 +13,32 @@ /* http://www.gnu.org/licenses/ */ /******************************************************************************/ -#define BOOST_TEST_MODULE perm_generic - #include +#include +#include #include -#include -#include -#include - -//____________________________________________________________________________// - -template struct IsPermFunctions { - static bool is_perm(const VectType a) { return a.is_permutation(); }; - static bool is_perm2(const VectType a, size_t i) { - return a.is_permutation(i); - }; -}; - #include "perm16.hpp" #include "perm_generic.hpp" -//____________________________________________________________________________// +#include "test_main.hpp" +#include -template -struct Fixture : public IsPermFunctions { +namespace HPCombi { +namespace { - using VectType = typename _PermType::vect; - using PermType = _PermType; +template struct Fixture1 { - Fixture() + using VectType = typename Perm_::vect; + using PermType = Perm_; + + Fixture1() : zero({0}), V01({0, 1}), V10({1, 0}), V11({1, 1}), V1({}, 1), PPa({1, 2, 3, 4, 0, 5}), PPb({1, 2, 3, 6, 0, 5}), czero(zero), cV01(V01), RandPerm({3, 1, 0, 5, 10, 2, 6, 7, 4, 8, 9}), Plist({PPa, PPb, RandPerm}), - Vlist({zero, V01, V10, V11, V1, PPa, PPb, RandPerm}) { - BOOST_TEST_MESSAGE("setup fixture"); - } - ~Fixture() { BOOST_TEST_MESSAGE("teardown fixture"); } + Vlist({zero, V01, V10, V11, V1, PPa, PPb, RandPerm}) {} + ~Fixture1() {} VectType zero, V01, V10, V11, V1; PermType PPa, PPb; @@ -59,314 +47,351 @@ struct Fixture : public IsPermFunctions { const std::vector Plist; const std::vector Vlist; - static bool less(const VectType a, const VectType b) { return a < b; }; - static bool not_less(const VectType a, const VectType b) { - return not(a < b); - }; - // some tests assume that the size is at least 6 static_assert(VectType::Size() >= 6, "Minimum size for tests"); }; -//____________________________________________________________________________// - -typedef boost::mpl::list< - Fixture, Fixture>, - Fixture>, Fixture>, - Fixture>, Fixture>, - Fixture>> - Fixtures; +} // namespace -//____________________________________________________________________________// +// Better than std::tuple because we can see the actual types in the output +// with a macro but not with the tuple. +#define PermTypes \ + Perm16, PermGeneric<12>, PermGeneric<16>, PermGeneric<32>, \ + PermGeneric<42>, PermGeneric<49>, (PermGeneric<350, uint32_t>) -BOOST_AUTO_TEST_SUITE(VectType_test) -//____________________________________________________________________________// - -BOOST_FIXTURE_TEST_CASE_TEMPLATE(sizeof_test, F, Fixtures, F) { - BOOST_TEST(sizeof(F::zero) == F::VectType::Size() * sizeof(F::zero[0])); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "sizeof", "[PermAll][000]", PermTypes) { + REQUIRE(sizeof(Fixture1::zero) == + Fixture1::VectType::Size() * + sizeof(Fixture1::zero[0])); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(equal_test, F, Fixtures, F) { - BOOST_TEST(F::zero == F::zero); - BOOST_TEST(F::zero != F::V01); - for (unsigned i = 0; i < F::Plist.size(); i++) - for (unsigned j = 0; j < F::Plist.size(); j++) - if (i == j) - BOOST_TEST(F::Plist[i] == F::Plist[j]); - else - BOOST_TEST(F::Plist[i] != F::Plist[j]); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator==", "[PermAll][001]", PermTypes) { + REQUIRE(Fixture1::zero == Fixture1::zero); + REQUIRE(Fixture1::zero != Fixture1::V01); + for (unsigned i = 0; i < Fixture1::Plist.size(); i++) { + for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { + if (i == j) { + REQUIRE(Fixture1::Plist[i] == + Fixture1::Plist[j]); + } else { + REQUIRE(Fixture1::Plist[i] != + Fixture1::Plist[j]); + } + } + } } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(operator_bracket_const_test, F, Fixtures, F) { - BOOST_TEST(F::czero[0] == 0u); - BOOST_TEST(F::czero[1] == 0u); - if (F::czero.Size() > 12) - BOOST_TEST(F::czero[12] == 0u); - if (F::czero.Size() > 15) - BOOST_TEST(F::czero[15] == 0u); - BOOST_TEST(F::cV01[0] == 0u); - BOOST_TEST(F::cV01[1] == 1u); - BOOST_TEST(F::cV01[2] == 0u); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[] const", "[PermAll][002]", + PermTypes) { + REQUIRE(Fixture1::czero[0] == 0u); + REQUIRE(Fixture1::czero[1] == 0u); + if (Fixture1::czero.Size() > 12) { + REQUIRE(Fixture1::czero[12] == 0u); + } + if (Fixture1::czero.Size() > 15) { + REQUIRE(Fixture1::czero[15] == 0u); + } + REQUIRE(Fixture1::cV01[0] == 0u); + REQUIRE(Fixture1::cV01[1] == 1u); + REQUIRE(Fixture1::cV01[2] == 0u); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(operator_bracket_test, F, Fixtures, F) { - BOOST_TEST(F::zero[0] == 0u); - BOOST_TEST(F::zero[1] == 0u); - if (F::czero.Size() > 12) - BOOST_TEST(F::zero[12] == 0u); - if (F::czero.Size() > 15) - BOOST_TEST(F::zero[15] == 0u); - BOOST_TEST(F::V01[0] == 0u); - BOOST_TEST(F::V01[1] == 1u); - BOOST_TEST(F::V01[2] == 0u); - BOOST_TEST(F::PPa[4] == 0u); - BOOST_TEST(F::PPa[5] == 5u); - F::zero[0] = 3; - BOOST_TEST(F::zero[0] == 3u); - BOOST_TEST(F::zero[1] == 0u); - if (F::czero.Size() > 12) - BOOST_TEST(F::zero[12] == 0u); - if (F::czero.Size() > 15) - BOOST_TEST(F::zero[15] == 0u); - F::PPa[2] = 0; - BOOST_TEST(F::PPa[1] == 2u); - BOOST_TEST(F::PPa[2] == 0u); - BOOST_TEST(F::PPa[3] == 4u); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[]", "[PermAll][003]", PermTypes) { + REQUIRE(Fixture1::zero[0] == 0u); + REQUIRE(Fixture1::zero[1] == 0u); + if (Fixture1::czero.Size() > 12) { + REQUIRE(Fixture1::zero[12] == 0u); + } + if (Fixture1::czero.Size() > 15) { + REQUIRE(Fixture1::zero[15] == 0u); + } + REQUIRE(Fixture1::V01[0] == 0u); + REQUIRE(Fixture1::V01[1] == 1u); + REQUIRE(Fixture1::V01[2] == 0u); + REQUIRE(Fixture1::PPa[4] == 0u); + REQUIRE(Fixture1::PPa[5] == 5u); + Fixture1::zero[0] = 3; + REQUIRE(Fixture1::zero[0] == 3u); + REQUIRE(Fixture1::zero[1] == 0u); + if (Fixture1::czero.Size() > 12) { + REQUIRE(Fixture1::zero[12] == 0u); + } + if (Fixture1::czero.Size() > 15) { + REQUIRE(Fixture1::zero[15] == 0u); + } + Fixture1::PPa[2] = 0; + REQUIRE(Fixture1::PPa[1] == 2u); + REQUIRE(Fixture1::PPa[2] == 0u); + REQUIRE(Fixture1::PPa[3] == 4u); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(operator_less_test, F, Fixtures, F) { - for (unsigned i = 0; i < F::Plist.size(); i++) - for (unsigned j = 0; j < F::Plist.size(); j++) - if (i < j) - BOOST_CHECK_PREDICATE(F::less, (F::Plist[i])(F::Plist[j])); - else - BOOST_CHECK_PREDICATE(F::not_less, (F::Plist[i])(F::Plist[j])); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator<", "[AllPerm][004]", PermTypes) { + for (unsigned i = 0; i < Fixture1::Plist.size(); i++) { + for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { + if (i < j) { + REQUIRE(Fixture1::Plist[i] < + Fixture1::Plist[j]); + } else { + REQUIRE(!(Fixture1::Plist[i] < + Fixture1::Plist[j])); + } + } + } } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(operator_less_partial_test, F, Fixtures, F) { - for (auto p : F::Plist) - for (unsigned k = 0; k < F::PermType::size(); k++) - BOOST_TEST(p.less_partial(p, k) == 0); - for (auto p : F::Plist) - for (auto q : F::Plist) - BOOST_TEST(p.less_partial(q, 0) == 0); - - BOOST_TEST(F::zero.less_partial(F::V01, 1) == 0); - BOOST_TEST(F::V01.less_partial(F::zero, 1) == 0); - BOOST_TEST(F::zero.less_partial(F::V01, 2) < 0); - BOOST_TEST(F::V01.less_partial(F::zero, 2) > 0); - - BOOST_TEST(F::zero.less_partial(F::V10, 1) < 0); - BOOST_TEST(F::zero.less_partial(F::V10, 2) < 0); - BOOST_TEST(F::V10.less_partial(F::zero, 1) > 0); - BOOST_TEST(F::V10.less_partial(F::zero, 2) > 0); - - BOOST_TEST(F::PPa.less_partial(F::PPb, 1) == 0); - BOOST_TEST(F::PPa.less_partial(F::PPb, 2) == 0); - BOOST_TEST(F::PPa.less_partial(F::PPb, 3) == 0); - BOOST_TEST(F::PPa.less_partial(F::PPb, 4) < 0); - BOOST_TEST(F::PPa.less_partial(F::PPb, 5) < 0); - BOOST_TEST(F::PPb.less_partial(F::PPa, 4) > 0); - BOOST_TEST(F::PPb.less_partial(F::PPa, 5) > 0); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "less_partial", "[AllPerm][005]", + PermTypes) { + for (auto p : Fixture1::Plist) { + for (unsigned k = 0; k < Fixture1::PermType::size(); k++) { + REQUIRE(p.less_partial(p, k) == 0); + } + } + for (auto p : Fixture1::Plist) { + for (auto q : Fixture1::Plist) { + REQUIRE(p.less_partial(q, 0) == 0); + } + } + + REQUIRE(Fixture1::zero.less_partial(Fixture1::V01, 1) == + 0); + REQUIRE(Fixture1::V01.less_partial(Fixture1::zero, 1) == + 0); + REQUIRE(Fixture1::zero.less_partial(Fixture1::V01, 2) < + 0); + REQUIRE(Fixture1::V01.less_partial(Fixture1::zero, 2) > + 0); + + REQUIRE(Fixture1::zero.less_partial(Fixture1::V10, 1) < + 0); + REQUIRE(Fixture1::zero.less_partial(Fixture1::V10, 2) < + 0); + REQUIRE(Fixture1::V10.less_partial(Fixture1::zero, 1) > + 0); + REQUIRE(Fixture1::V10.less_partial(Fixture1::zero, 2) > + 0); + + REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 1) == + 0); + REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 2) == + 0); + REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 3) == + 0); + REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 4) < + 0); + REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 5) < + 0); + REQUIRE(Fixture1::PPb.less_partial(Fixture1::PPa, 4) > + 0); + REQUIRE(Fixture1::PPb.less_partial(Fixture1::PPa, 5) > + 0); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(first_zero_test, F, Fixtures, F) { - BOOST_TEST(F::zero.first_zero() == 0u); - BOOST_TEST(F::V01.first_zero() == 0u); - BOOST_TEST(F::PPa.first_zero() == 4u); - BOOST_TEST(F::V10.first_zero() == 1u); - BOOST_TEST(F::V1.first_zero() == F::VectType::Size()); - BOOST_TEST(F::V10.first_zero(1) == F::VectType::Size()); - BOOST_TEST(F::PPa.first_zero(5) == 4u); - BOOST_TEST(F::PPa.first_zero(3) == F::VectType::Size()); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_zero", "[AllPerm][006]", PermTypes) { + REQUIRE(Fixture1::zero.first_zero() == 0u); + REQUIRE(Fixture1::V01.first_zero() == 0u); + REQUIRE(Fixture1::PPa.first_zero() == 4u); + REQUIRE(Fixture1::V10.first_zero() == 1u); + REQUIRE(Fixture1::V1.first_zero() == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::V10.first_zero(1) == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::PPa.first_zero(5) == 4u); + REQUIRE(Fixture1::PPa.first_zero(3) == + Fixture1::VectType::Size()); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(last_zero_test, F, Fixtures, F) { - BOOST_TEST(F::zero.last_zero() == F::VectType::Size() - 1); - BOOST_TEST(F::V01.last_zero() == F::VectType::Size() - 1); - BOOST_TEST(F::PPa.last_zero() == 4u); - BOOST_TEST(F::V1.last_zero() == F::VectType::Size()); - BOOST_TEST(F::V01.last_zero(1) == 0u); - BOOST_TEST(F::V10.last_zero(1) == F::VectType::Size()); - BOOST_TEST(F::PPa.last_zero(5) == 4u); - BOOST_TEST(F::PPa.last_zero(3) == F::VectType::Size()); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_zero", "[AllPerm][007]", PermTypes) { + REQUIRE(Fixture1::zero.last_zero() == + Fixture1::VectType::Size() - 1); + REQUIRE(Fixture1::V01.last_zero() == + Fixture1::VectType::Size() - 1); + REQUIRE(Fixture1::PPa.last_zero() == 4u); + REQUIRE(Fixture1::V1.last_zero() == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::V01.last_zero(1) == 0u); + REQUIRE(Fixture1::V10.last_zero(1) == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::PPa.last_zero(5) == 4u); + REQUIRE(Fixture1::PPa.last_zero(3) == + Fixture1::VectType::Size()); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(first_non_zero_test, F, Fixtures, F) { - BOOST_TEST(F::zero.first_non_zero() == F::VectType::Size()); - BOOST_TEST(F::V01.first_non_zero() == 1u); - BOOST_TEST(F::PPa.first_non_zero() == 0u); - BOOST_TEST(F::V01.first_non_zero() == 1u); - BOOST_TEST(F::V01.first_non_zero(1) == F::VectType::Size()); - BOOST_TEST(F::PPa.first_non_zero(5) == 0u); - BOOST_TEST(F::PPa.first_non_zero(3) == 0u); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_non_zero", "[AllPerm][008]", + PermTypes) { + REQUIRE(Fixture1::zero.first_non_zero() == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::V01.first_non_zero() == 1u); + REQUIRE(Fixture1::PPa.first_non_zero() == 0u); + REQUIRE(Fixture1::V01.first_non_zero() == 1u); + REQUIRE(Fixture1::V01.first_non_zero(1) == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::PPa.first_non_zero(5) == 0u); + REQUIRE(Fixture1::PPa.first_non_zero(3) == 0u); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(last_non_zero_test, F, Fixtures, F) { - BOOST_TEST(F::zero.last_non_zero() == F::VectType::Size()); - BOOST_TEST(F::V01.last_non_zero() == 1u); - BOOST_TEST(F::PPa.last_non_zero() == F::VectType::Size() - 1); - BOOST_TEST(F::V01.last_non_zero() == 1u); - BOOST_TEST(F::V01.last_non_zero(1) == F::VectType::Size()); - BOOST_TEST(F::PPa.last_non_zero(5) == 3u); - BOOST_TEST(F::PPa.last_non_zero(3) == 2u); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_non_zero", "[AllPerm][009]", + PermTypes) { + REQUIRE(Fixture1::zero.last_non_zero() == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::V01.last_non_zero() == 1u); + REQUIRE(Fixture1::PPa.last_non_zero() == + Fixture1::VectType::Size() - 1); + REQUIRE(Fixture1::V01.last_non_zero() == 1u); + REQUIRE(Fixture1::V01.last_non_zero(1) == + Fixture1::VectType::Size()); + REQUIRE(Fixture1::PPa.last_non_zero(5) == 3u); + REQUIRE(Fixture1::PPa.last_non_zero(3) == 2u); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(permuted_test, F, Fixtures, F) { - BOOST_TEST(F::zero.permuted(F::zero) == F::zero); - BOOST_TEST(F::V01.permuted(F::V01) == F::V01); - BOOST_TEST(F::V10.permuted(F::V10) == typename F::VectType({0, 1}, 1)); - BOOST_TEST(F::V10.permuted(F::V01) == typename F::VectType({1, 0}, 1)); - BOOST_TEST(F::V01.permuted(F::V10) == F::V10); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "permuted", "[AllPerm][010]", PermTypes) { + REQUIRE(Fixture1::zero.permuted(Fixture1::zero) == + Fixture1::zero); + REQUIRE(Fixture1::V01.permuted(Fixture1::V01) == + Fixture1::V01); + REQUIRE(Fixture1::V10.permuted(Fixture1::V10) == + typename Fixture1::VectType({0, 1}, 1)); + REQUIRE(Fixture1::V10.permuted(Fixture1::V01) == + typename Fixture1::VectType({1, 0}, 1)); + REQUIRE(Fixture1::V01.permuted(Fixture1::V10) == + Fixture1::V10); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(operator_insert_test, F, Fixtures, F) { +TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", + PermTypes) { std::ostringstream out, out2; - out << F::zero; + out << Fixture1::zero; out2 << "[ 0"; - for (size_t i = 1; i < F::VectType::Size(); i++) + for (size_t i = 1; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; out2 << "]"; - BOOST_TEST(out.str() == out2.str()); + REQUIRE(out.str() == out2.str()); out.str(""); out2.str(""); - out << F::V01; + out << Fixture1::V01; out2 << "[ 0, 1"; - for (size_t i = 2; i < F::VectType::Size(); i++) + for (size_t i = 2; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; out2 << "]"; - BOOST_TEST(out.str() == out2.str()); + REQUIRE(out.str() == out2.str()); out.str(""); out2.str(""); - out << F::PPa; + out << Fixture1::PPa; out2 << "[ 1, 2, 3, 4, 0"; - for (size_t i = 5; i < F::VectType::Size(); i++) + for (size_t i = 5; i < Fixture1::VectType::Size(); i++) out2 << "," << std::setw(2) << i; out2 << "]"; - BOOST_TEST(out.str() == out2.str()); + REQUIRE(out.str() == out2.str()); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(is_permutation_test, F, Fixtures, F) { - BOOST_CHECK_PREDICATE(boost::not1(F::is_perm), (F::zero)); - BOOST_CHECK_PREDICATE(F::is_perm, (F::PPa)); - BOOST_CHECK_PREDICATE(boost::not1(F::is_perm), (F::PPb)); - BOOST_CHECK_PREDICATE(F::is_perm, (F::RandPerm)); - BOOST_CHECK_PREDICATE( - boost::not1(F::is_perm), - (typename F::VectType({3, 1, 0, 9, 3, 10, 2, 11, 6, 7, 4, 8}))); - BOOST_CHECK_PREDICATE(F::is_perm2, (F::PPa)(16)); - BOOST_CHECK_PREDICATE(boost::not2(F::is_perm2), (F::RandPerm)(4)); - BOOST_CHECK_PREDICATE(F::is_perm2, (F::PPa)(5)); - BOOST_CHECK_PREDICATE(boost::not2(F::is_perm2), (F::PPa)(4)); +TEMPLATE_TEST_CASE_METHOD(Fixture1, "is_permutation", "[AllPerm][012]", + PermTypes) { + REQUIRE(!Fixture1::zero.is_permutation()); + REQUIRE(Fixture1::PPa.is_permutation()); + REQUIRE(!Fixture1::PPb.is_permutation()); + REQUIRE(Fixture1::RandPerm.is_permutation()); + REQUIRE(!typename Fixture1::VectType( + {3, 1, 0, 9, 3, 10, 2, 11, 6, 7, 4, 8}) + .is_permutation()); + REQUIRE(Fixture1::PPa.is_permutation(16)); + REQUIRE(!Fixture1::RandPerm.is_permutation(4)); + REQUIRE(Fixture1::PPa.is_permutation(5)); + REQUIRE(!Fixture1::PPa.is_permutation(4)); } -BOOST_AUTO_TEST_SUITE_END() - -//____________________________________________________________________________// -//____________________________________________________________________________// - -template struct PermFixture : public IsPermFunctions<_Perm> { - using PermType = _Perm; - PermFixture() +template struct Fixture2 { + using PermType = Perm_; + Fixture2() : id(PermType::one()), RandPerm({3, 1, 0, 5, 10, 2, 11, 6, 7, 4, 8, 9}), Plist({id, RandPerm}) { for (uint64_t i = 0; i < std::min(PermType::size(), 30) - 1; - i++) + i++) { Plist.push_back(PermType::elementary_transposition(i)); + } for (uint64_t i = std::max(30, PermType::size() - 20); - i < PermType::size() - 1; i++) + i < PermType::size() - 1; i++) { Plist.push_back(PermType::elementary_transposition(i)); - for (uint64_t i = 0; i < 10; i++) + } + for (uint64_t i = 0; i < 10; i++) { Plist.push_back(PermType::random()); - BOOST_TEST_MESSAGE("setup fixture"); + } } - ~PermFixture() { BOOST_TEST_MESSAGE("teardown fixture"); } + ~Fixture2() = default; PermType id, s1, s2, s3; const PermType RandPerm; std::vector Plist; }; -//____________________________________________________________________________// - -typedef boost::mpl::list, - PermFixture>, - PermFixture>, - PermFixture>, - PermFixture>, - PermFixture>, - PermFixture>> - PermFixtures; - -//____________________________________________________________________________// - -BOOST_AUTO_TEST_SUITE(PermType_test) -//____________________________________________________________________________// - -BOOST_FIXTURE_TEST_CASE_TEMPLATE(constructor_is_permutation_test, F, - PermFixtures, F) { - for (auto x : F::Plist) - BOOST_CHECK_PREDICATE(F::is_perm, (x)); - - // Default constructor doesn't initialize - // BOOST_CHECK_PREDICATE(F::is_perm, (typename F::PermType())); - BOOST_CHECK_PREDICATE(F::is_perm, (typename F::PermType({}))); - BOOST_CHECK_PREDICATE(F::is_perm, (typename F::PermType({1, 0}))); - BOOST_CHECK_PREDICATE(F::is_perm, (typename F::PermType({1, 2, 0}))); - BOOST_CHECK_PREDICATE(boost::not1(F::is_perm), - (typename F::PermType({1, 2}))); +TEMPLATE_TEST_CASE_METHOD(Fixture2, + "is_permutation for freshly constructed perms", + "[AllPerm][013]", PermTypes) { + using Perm = typename Fixture2::PermType; + for (auto x : Fixture2::Plist) { + REQUIRE(Perm(x).is_permutation()); + } + REQUIRE(Perm({}).is_permutation()); + REQUIRE(Perm({1, 0}).is_permutation()); + REQUIRE(Perm({1, 2, 0}).is_permutation()); + REQUIRE(!Perm({1, 2}).is_permutation()); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(std_hash_test, F, PermFixtures, F) { - for (auto x : F::Plist) - BOOST_TEST(std::hash()(x) != 0); +TEMPLATE_TEST_CASE_METHOD(Fixture2, "std::hash", "[AllPerm][014]", PermTypes) { + for (auto x : Fixture2::Plist) { + REQUIRE(std::hash::PermType>()(x) != 0); + } } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(mult_coxeter_test, F, PermFixtures, F) { - for (uint64_t i = 0; i < F::PermType::Size() - 1; i++) { - auto si = F::PermType::elementary_transposition(i); - BOOST_TEST(si != F::id); - BOOST_TEST(si * si == F::id); - if (i + 1 < F::PermType::Size() - 1) { - auto si1 = F::PermType::elementary_transposition(i + 1); - BOOST_TEST(si * si1 * si == si1 * si * si1); +TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult_coxeter", "[AllPerm][015]", + PermTypes) { + const size_t n = TestType::Size() - 1; + + for (uint64_t i = 0; i < n; i++) { + auto si = TestType::elementary_transposition(i); + REQUIRE(si != Fixture2::id); + REQUIRE(si * si == Fixture2::id); + if (i + 1 < n) { + auto si1 = TestType::elementary_transposition(i + 1); + REQUIRE(si * si1 * si == si1 * si * si1); } - for (uint64_t j = i + 2; j < F::PermType::Size() - 1; j++) { - auto sj = F::PermType::elementary_transposition(j); - BOOST_TEST(sj * si == si * sj); + for (uint64_t j = i + 2; j < n; j++) { + auto sj = TestType::elementary_transposition(j); + REQUIRE(sj * si == si * sj); } } } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(mult_test, F, PermFixtures, F) { - for (auto x : F::Plist) { - BOOST_TEST(F::id * x == x); - BOOST_TEST(x * F::id == x); +TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult", "[AllPerm][016]", PermTypes) { + for (auto x : Fixture2::Plist) { + REQUIRE(Fixture2::id * x == x); + REQUIRE(x * Fixture2::id == x); + } + REQUIRE(Fixture2::RandPerm * Fixture2::RandPerm == + TestType({5, 1, 3, 2, 8, 0, 9, 11, 6, 10, 7, 4})); + + for (auto x : Fixture2::Plist) { + for (auto y : Fixture2::Plist) { + for (auto z : Fixture2::Plist) { + REQUIRE((x * y) * z == x * (y * z)); + } + } } - BOOST_TEST(F::RandPerm * F::RandPerm == - typename F::PermType({5, 1, 3, 2, 8, 0, 9, 11, 6, 10, 7, 4})); - - for (auto x : F::Plist) - for (auto y : F::Plist) - for (auto z : F::Plist) - BOOST_TEST((x * y) * z == x * (y * z)); } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(inverse_test, F, PermFixtures, F) { - for (auto x : F::Plist) { - BOOST_TEST(x.inverse() * x == F::id); - BOOST_TEST(x * x.inverse() == F::id); - BOOST_TEST(x.inverse().inverse() == x); +TEMPLATE_TEST_CASE_METHOD(Fixture2, "inverse", "[AllPerm][017]", PermTypes) { + for (auto x : Fixture2::Plist) { + REQUIRE(x.inverse() * x == Fixture2::id); + REQUIRE(x * x.inverse() == Fixture2::id); + REQUIRE(x.inverse().inverse() == x); } } -BOOST_FIXTURE_TEST_CASE_TEMPLATE(random_test, F, PermFixtures, F) { +TEMPLATE_TEST_CASE_METHOD(Fixture2, "random", "[AllPerm][018]", PermTypes) { for (int i = 0; i < 100; i++) { - BOOST_CHECK_PREDICATE(F::is_perm, (F::PermType::random())); + REQUIRE(Fixture2::PermType::random().is_permutation()); } } -BOOST_AUTO_TEST_SUITE_END() +} // namespace HPCombi From 4ab972a820a4967084ba722e9edd6a76e1575ce1 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 24 Oct 2023 14:59:40 +0100 Subject: [PATCH 010/113] Update ci to use catch2 --- .github/workflows/runtests.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index f056d764..fb826f69 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -28,8 +28,7 @@ jobs: sudo apt-get --yes update sudo apt-get install --yes ccache sudo apt-get install --yes libbenchmark-dev - sudo apt-get install --yes libgtest-dev - sudo apt-get install --yes libboost-all-dev + sudo apt-get install --yes catch2 - name: "Build + run HPCombi tests . . ." run: | mkdir build From a0a1c8ab0d579527608a43077012c231022f3b9a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 26 Oct 2023 13:31:17 +0100 Subject: [PATCH 011/113] Remove -mavx flag + bump cmake to 3.8 --- CMakeLists.txt | 15 ++++++--------- tests/test_epu.cpp | 1 + 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bcfcc986..d48153d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,8 @@ # http://www.gnu.org/licenses/ # #****************************************************************************# -cmake_minimum_required(VERSION 2.8) +# Require at least 3.8 so that we can set cmake_policy CMP0067 below +cmake_minimum_required(VERSION 3.8) ##################### # Project description @@ -41,12 +42,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) # -std=c++14 instead of -std=gnu++14 add_definitions(-DHPCOMBI_HAVE_CONFIG) message(STATUS "*** Compiler id is ${CMAKE_CXX_COMPILER_ID}") -if ( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" ) - # Workaround of CMAKE bug https://stackoverflow.com/questions/47213356/ - set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} -std=c++14) - add_compile_options(-std=c++14 -Wall -g -pg) -endif() - +# See https://stackoverflow.com/questions/47213356/ for discussion +cmake_policy(SET CMP0067 NEW) ################################## # Checks for compiler capabilities @@ -78,7 +75,7 @@ check_cxx_source_compiles( " HPCOMBI_CONSTEXPR_FUN_ARGS) -check_cxx_compiler_flag('-mavx' HPCOMBI_HAVE_FLAG_AVX) +# check_cxx_compiler_flag('-mavx' HPCOMBI_HAVE_FLAG_AVX) #check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_NATIVE) #check_cxx_compiler_flag('-mavx2' HPCOMBI_HAVE_FLAG_AVX2) #check_cxx_compiler_flag('-mavx512bw' HPCOMBI_HAVE_FLAG_AVX512BW) @@ -113,7 +110,7 @@ check_cxx_compiler_flag('-mavx' HPCOMBI_HAVE_FLAG_AVX) # endif() # endforeach() -add_compile_options(-mavx -mtune=native -funroll-loops -flax-vector-conversions) +add_compile_options(-mtune=native -funroll-loops -flax-vector-conversions) ################### # Project Structure diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 5eb5e5e0..f5ea53fa 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -21,6 +21,7 @@ #include "test_main.hpp" #include +// TODO uncomment // #define TEST_AGREES(ref, fun) \ // BOOST_FIXTURE_TEST_CASE(Epu8::agrees_##fun, Fix) { \ // for (auto x : v) \ From cfa7d030e820b0e5b5620316d81fbfbbf7484d21 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 26 Oct 2023 13:33:38 +0100 Subject: [PATCH 012/113] Add some static cast for compilation on arm with gcc --- include/bmat8_impl.hpp | 2 +- include/perm16_impl.hpp | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 913e94f2..3b2b0da0 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -253,7 +253,7 @@ constexpr std::array masks {{ static const epu8 shiftres {1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80}; inline void update_bitset(epu8 block, epu8 &set0, epu8 &set1) { - static const epu8 bound08 = simde_mm_slli_epi32(epu8id, 3); // shift for *8 + static const epu8 bound08 = simde_mm_slli_epi32(static_cast(epu8id), 3); // shift for *8 static const epu8 bound18 = bound08 + Epu8(0x80); for (size_t slice8 = 0; slice8 < 16; slice8++) { epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */ diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 0408fc4d..b2d74a45 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -129,7 +129,8 @@ inline Transf16::Transf16(uint64_t compressed) { } inline Transf16::operator uint64_t() const { - epu8 res = static_cast(simde_mm_slli_epi32(v, 4)); + epu8 res = + static_cast(simde_mm_slli_epi32(static_cast(v), 4)); res = HPCombi::permuted(res, hilo_exchng) + v; return simde_mm_extract_epi64(res, 0); } @@ -218,7 +219,9 @@ inline Perm16 Perm16::inverse_sort() const { // G++-7 compile this shift by 3 additions. // epu8 res = (v << 4) + one().v; // I call directly the shift intrinsic - epu8 res = static_cast(simde_mm_slli_epi32(v, 4)) + one().v; + epu8 res = static_cast( + simde_mm_slli_epi32(static_cast(v), 4)) + + one().v; res = sorted(res) & Epu8(0x0F); return res; } From 9b861982e2a018f4c2dc972f825dbf4c5776473f Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 26 Oct 2023 15:06:08 +0100 Subject: [PATCH 013/113] ci: remove apt install of catch2 --- .github/workflows/runtests.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index fb826f69..19c1e79e 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -28,12 +28,11 @@ jobs: sudo apt-get --yes update sudo apt-get install --yes ccache sudo apt-get install --yes libbenchmark-dev - sudo apt-get install --yes catch2 - name: "Build + run HPCombi tests . . ." run: | mkdir build cd build cmake -DBUILD_TESTING=1 -DCMAKE_BUILD_TYPE=Release .. + cd tests make - make test - + ./test_all From cd485f71d58ff812ad27896192d8b4a2d16901a9 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 26 Oct 2023 15:55:53 +0100 Subject: [PATCH 014/113] Test for and cond. use -march/tune=native --- CMakeLists.txt | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d48153d5..adb8bf25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,8 +75,8 @@ check_cxx_source_compiles( " HPCOMBI_CONSTEXPR_FUN_ARGS) -# check_cxx_compiler_flag('-mavx' HPCOMBI_HAVE_FLAG_AVX) -#check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_NATIVE) +check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_ARCH_NATIVE) +check_cxx_compiler_flag('-mtune=native' HPCOMBI_HAVE_FLAG_TUNE_NATIVE) #check_cxx_compiler_flag('-mavx2' HPCOMBI_HAVE_FLAG_AVX2) #check_cxx_compiler_flag('-mavx512bw' HPCOMBI_HAVE_FLAG_AVX512BW) @@ -110,7 +110,14 @@ check_cxx_source_compiles( # endif() # endforeach() -add_compile_options(-mtune=native -funroll-loops -flax-vector-conversions) +add_compile_options(-funroll-loops -flax-vector-conversions) +if (HPCOMBI_HAVE_FLAG_ARCH_NATIVE) + add_compile_options(-march=native) +endif() +if (HPCOMBI_HAVE_FLAG_TUNE_NATIVE) + add_compile_options(-mtune=native) +endif() + ################### # Project Structure From fe873bd494adfd2ffad76d79ada2c04d0e5adb79 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 12:18:34 +0100 Subject: [PATCH 015/113] Arch checking --- include/arch.hpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 include/arch.hpp diff --git a/include/arch.hpp b/include/arch.hpp new file mode 100644 index 00000000..239da6fb --- /dev/null +++ b/include/arch.hpp @@ -0,0 +1,23 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HPCOMBI_ARCH_HPP_INCLUDED +#define HPCOMBI_ARCH_HPP_INCLUDED + +#if defined(SIMDE_ARCH_AMD64) && !defined(SIMDE_ARCH_X86_SSE4_1) +# error("x86_64 architecture without required compiler flags for SSE-4.1 instruction set. Did you forget to provide the flag -march=(native,avx,sse4.1) flag ?") +#endif + +#endif // HPCOMBI_ARCH_HPP_INCLUDED From e760ee8c6e6d4e32f83c3b40e914b0f73d1ed1ae Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 12:19:03 +0100 Subject: [PATCH 016/113] benchmarks: convert to catch2 --- CMakeLists.txt | 4 + benchmark/CMakeLists.txt | 27 +++-- benchmark/bench_bmat8.cpp | 212 +++++++++++++++++++------------------- 3 files changed, 131 insertions(+), 112 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index adb8bf25..96b8a15e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,10 @@ check_cxx_source_compiles( check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_ARCH_NATIVE) check_cxx_compiler_flag('-mtune=native' HPCOMBI_HAVE_FLAG_TUNE_NATIVE) +# TODO check for -funroll-loops + -flax-vector-constexpr +# TODO only check for and set the flags required to make HPCombi work + + #check_cxx_compiler_flag('-mavx2' HPCOMBI_HAVE_FLAG_AVX2) #check_cxx_compiler_flag('-mavx512bw' HPCOMBI_HAVE_FLAG_AVX512BW) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index d0a43bad..61615ee8 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -13,8 +13,24 @@ # http://www.gnu.org/licenses/ # #****************************************************************************# -find_package(PkgConfig REQUIRED) -pkg_check_modules(BENCHMARK REQUIRED IMPORTED_TARGET benchmark) +find_package(Catch2 3 QUIET) + +if(NOT Catch2_FOUND) + message(STATUS "Cloning Catch2 from GIT_REPOSITORY https://github.com/catchorg/Catch2.git") + Include(FetchContent) + + FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.4.0 # or a later release + ) + + FetchContent_MakeAvailable(Catch2) +else() + message(STATUS "Using system Catch2") +endif() + +message(STATUS "Building tests") include_directories( ${CMAKE_SOURCE_DIR}/include @@ -26,10 +42,5 @@ set(benchmark_src foreach(f ${benchmark_src}) get_filename_component(benchName ${f} NAME_WE) add_executable (${benchName} ${f}) - target_include_directories(${benchName} PUBLIC ${BENCHMARK_INCLUDE_DIRS}) - target_compile_options(${benchName} PUBLIC ${BENCHMARK_CFLAGS_OTHER}) - target_link_libraries(${benchName} benchmark pthread) - # install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/${benchName} - # DESTINATION bin - # RENAME ${CMAKE_PROJECT_NAME}-${benchName}) + target_link_libraries(${benchName} PRIVATE Catch2::Catch2WithMain) endforeach(f) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index 57c82a53..c15010d1 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -13,144 +13,148 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// +#include #include -#include -#include -#include +#include + +#include +#include -#include "compilerinfo.hpp" -#include "cpu_x86_impl.hpp" #include "bench_fixture.hpp" +// #include "compilerinfo.hpp" +// #include "cpu_x86_impl.hpp" #include "bmat8.hpp" -using namespace FeatureDetector; -using namespace std; -using HPCombi::epu8; +// using namespace FeatureDetector; +// using namespace std; +// using HPCombi::epu8; -// const Fix_perm16 sample; -const std::string SIMDSET = cpu_x86::get_highest_SIMD(); -const std::string PROCID = cpu_x86::get_proc_string(); +namespace HPCombi { -using namespace HPCombi; +// const Fix_perm16 sample; +const std::string PROCID = "TODO"; std::vector make_sample(size_t n) { - std::vector res {}; - for (size_t i=0; i < n; i++) { + std::vector res{}; + for (size_t i = 0; i < n; i++) { res.push_back(BMat8::random()); } return res; } std::vector> make_pair_sample(size_t n) { - std::vector> res {}; - for (size_t i=0; i < n; i++) { - res.push_back(make_pair(BMat8::random(),BMat8::random())); + std::vector> res{}; + for (size_t i = 0; i < n; i++) { + res.push_back(std::make_pair(BMat8::random(), BMat8::random())); } return res; } std::vector sample = make_sample(1000); std::vector> pair_sample = make_pair_sample(1000); -// std::vector sample = {BMat8::one()}; -// std::vector sample = {BMat8(0)}; - -//################################################################################## -template -void myBench(const string &name, TF pfunc, Sample &sample) { - string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; - benchmark::RegisterBenchmark(fullname.c_str(), - [pfunc, sample](benchmark::State& st) { - for (auto _ : st) { - for (auto elem : sample) { - benchmark::DoNotOptimize(pfunc(elem)); - } - } - }); -} - -#define myBenchMeth(descr, methname, smp) \ - myBench(descr, [](BMat8 p) { return p.methname(); }, smp) -//################################################################################## -int Bench_row_space_size() { - myBenchMeth("row_space_size_ref", row_space_size_ref, sample); - myBenchMeth("row_space_size_bitset", row_space_size_bitset, sample); - myBenchMeth("row_space_size_incl1", row_space_size_incl1, sample); - myBenchMeth("row_space_size_incl", row_space_size_incl, sample); - myBenchMeth("row_space_size", row_space_size, sample); - return 0; +// template +// void myBench(const std::string &name, TF pfunc, Sample &sample) { +// std::string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; +// benchmark::RegisterBenchmark( +// fullname.c_str(), [pfunc, sample](benchmark::State &st) { +// for (auto _ : st) { +// for (auto elem : sample) { +// benchmark::DoNotOptimize(pfunc(elem)); +// } +// } +// }); +// } + +#define BENCHMARK_MEM_FN(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &elem : sample) { \ + REQUIRE_NOTHROW(elem.mem_fn()); \ + } \ + return true; \ + }; + +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &pair : sample) { \ + REQUIRE_NOTHROW( \ + std::make_pair(pair.first.mem_fn(), pair.second.mem_fn())); \ + } \ + return true; \ + }; + +TEST_CASE("Row space size benchmarks 1000 BMat8", "[BMat8][000]") { + BENCHMARK_MEM_FN(row_space_size_ref, sample); + BENCHMARK_MEM_FN(row_space_size_bitset, sample); + BENCHMARK_MEM_FN(row_space_size_incl1, sample); + BENCHMARK_MEM_FN(row_space_size_incl, sample); + BENCHMARK_MEM_FN(row_space_size, sample); } -//################################################################################## -int Bench_transpose() { - myBenchMeth("transpose_knuth", transpose, sample); - myBenchMeth("transpose_mask", transpose_mask, sample); - myBenchMeth("transpose_maskd", transpose_maskd, sample); - return 0; +TEST_CASE("Transpose benchmarks 1000 BMat8", "[BMat8][000]") { + BENCHMARK_MEM_FN(transpose, sample); + BENCHMARK_MEM_FN(transpose_mask, sample); + BENCHMARK_MEM_FN(transpose_maskd, sample); } -int Bench_transpose2() { - myBench("transpose2_knuth", - [](std::pair p) { - return make_pair(p.first.transpose(), - p.second.transpose()); - }, pair_sample); - myBench("transpose2_mask", - [](std::pair p) { - return make_pair(p.first.transpose_mask(), - p.second.transpose_mask()); - }, pair_sample); - myBench("transpose2_maskd", - [](std::pair p) { - return make_pair(p.first.transpose_maskd(), - p.second.transpose_maskd()); - }, pair_sample); - myBench("transpose2", - [](std::pair p) { - BMat8::transpose2(p.first, p.second); - return p; - }, pair_sample); - return 0; +TEST_CASE("Transpose pairs benchmarks 1000 BMat8", "[BMat8][000]") { + BENCHMARK_MEM_FN_PAIR(transpose, pair_sample); + BENCHMARK_MEM_FN_PAIR(transpose_mask, pair_sample); + BENCHMARK_MEM_FN_PAIR(transpose_maskd, pair_sample); + BENCHMARK("transpose2") { + for (auto &pair : pair_sample) { + REQUIRE_NOTHROW(BMat8::transpose2(pair.first, pair.second)); + } + return true; + }; } +/* + + int Bench_row_space_included() { - myBench("row_space_incl_ref", - [](std::pair p) { - return p.first.row_space_included_ref(p.second); - }, pair_sample); - myBench("row_space_incl_bitset", - [](std::pair p) { - return p.first.row_space_included_bitset(p.second); - }, pair_sample); - myBench("row_space_incl_rotate", - [](std::pair p) { - return p.first.row_space_included(p.second); - }, pair_sample); + myBench( + "row_space_incl_ref", + [](std::pair p) { + return p.first.row_space_included_ref(p.second); + }, + pair_sample); + myBench( + "row_space_incl_bitset", + [](std::pair p) { + return p.first.row_space_included_bitset(p.second); + }, + pair_sample); + myBench( + "row_space_incl_rotate", + [](std::pair p) { + return p.first.row_space_included(p.second); + }, + pair_sample); return 0; } int Bench_row_space_included2() { - myBench("row_space_incl2_rotate", - [](std::pair p) { - return p.first.row_space_included(p.second) - == p.second.row_space_included(p.first); - }, pair_sample); - myBench("row_space_incl2", - [](std::pair p) { - auto res = BMat8::row_space_included2( - p.first, p.second, p.second, p.first); - return res.first == res.second; - }, pair_sample); + myBench( + "row_space_incl2_rotate", + [](std::pair p) { + return p.first.row_space_included(p.second) == + p.second.row_space_included(p.first); + }, + pair_sample); + myBench( + "row_space_incl2", + [](std::pair p) { + auto res = BMat8::row_space_included2(p.first, p.second, +p.second, p.first); return res.first == res.second; + }, + pair_sample); return 0; } -auto dummy = { - Bench_row_space_size(), - Bench_transpose(), - Bench_transpose2(), - Bench_row_space_included(), - Bench_row_space_included2() -}; - -BENCHMARK_MAIN(); +auto dummy = {Bench_row_space_size(), Bench_transpose(), +Bench_transpose2(), Bench_row_space_included(), +Bench_row_space_included2()}; +*/ +} // namespace HPCombi From 494998579ceb4a7b784819142b28233a5310781b Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 12:37:57 +0100 Subject: [PATCH 017/113] Fixup bench -> catch2 --- benchmark/bench_bmat8.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index c15010d1..0e0e5dc7 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -46,7 +46,8 @@ std::vector make_sample(size_t n) { std::vector> make_pair_sample(size_t n) { std::vector> res{}; for (size_t i = 0; i < n; i++) { - res.push_back(std::make_pair(BMat8::random(), BMat8::random())); + auto x = BMat8::random(); + res.push_back(std::make_pair(x, x)); } return res; } @@ -70,7 +71,7 @@ std::vector> pair_sample = make_pair_sample(1000); #define BENCHMARK_MEM_FN(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &elem : sample) { \ - REQUIRE_NOTHROW(elem.mem_fn()); \ + volatile auto dummy = elem.mem_fn(); \ } \ return true; \ }; @@ -78,8 +79,9 @@ std::vector> pair_sample = make_pair_sample(1000); #define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - REQUIRE_NOTHROW( \ - std::make_pair(pair.first.mem_fn(), pair.second.mem_fn())); \ + auto val = \ + std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ + REQUIRE(val.first == val.second); \ } \ return true; \ }; @@ -98,13 +100,14 @@ TEST_CASE("Transpose benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(transpose_maskd, sample); } -TEST_CASE("Transpose pairs benchmarks 1000 BMat8", "[BMat8][000]") { +TEST_CASE("Transpose pairs benchmarks 1000 BMat8", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR(transpose, pair_sample); BENCHMARK_MEM_FN_PAIR(transpose_mask, pair_sample); BENCHMARK_MEM_FN_PAIR(transpose_maskd, pair_sample); BENCHMARK("transpose2") { for (auto &pair : pair_sample) { - REQUIRE_NOTHROW(BMat8::transpose2(pair.first, pair.second)); + BMat8::transpose2(pair.first, pair.second); + REQUIRE(pair.first == pair.second); } return true; }; @@ -153,8 +156,5 @@ p.second, p.first); return res.first == res.second; return 0; } -auto dummy = {Bench_row_space_size(), Bench_transpose(), -Bench_transpose2(), Bench_row_space_included(), -Bench_row_space_included2()}; */ } // namespace HPCombi From 5b482745fa2611e92c2d4b71bfaf3b09a0c87cd1 Mon Sep 17 00:00:00 2001 From: reiniscirpons Date: Fri, 27 Oct 2023 13:53:44 +0100 Subject: [PATCH 018/113] Rollback some changes --- .circleci/config.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .circleci/config.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..2c4e58d7 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,32 @@ +# .circleci/config.yml + +version: 2.1 + +jobs: + test: + docker: + - image: reiniscirpons/hpcombi-env-arm64v8:v1 + resource_class: arm.medium + steps: + - checkout + - run: + name: Run cmake + command: | + mkdir build + cd build + cmake -DBUILD_TESTING=1 .. + - run: + name: Run make in tests folder + command: | + cd build/tests + make + - run: + name: Run tests + command: | + cd build/tests + ./test_all + +workflows: + test: + jobs: + - test From 82a9d251f20677a5b4eabda0b594c11a3e7669a4 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 14:12:09 +0100 Subject: [PATCH 019/113] ci: use multiple versions of gcc --- .github/workflows/runtests.yml | 74 ++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index 19c1e79e..e3ae23b4 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -1,5 +1,5 @@ name: CI -on: [pull_request, push, workflow_dispatch] +on: [pull_request, workflow_dispatch] concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -7,32 +7,62 @@ concurrency: jobs: tests: - name: "ubuntu-latest | g++" + name: "ubuntu-latest" timeout-minutes: 60 runs-on: ubuntu-latest - env: - CXX: "ccache g++" - CXXFLAGS: "-fdiagnostics-color" - CMAKE_CXX_COMPILER: "ccache g++" + strategy: + fail-fast: false + matrix: + sys: + - { compiler: 'gcc', version: '9'} + - { compiler: 'gcc', version: '10'} + - { compiler: 'gcc', version: '11'} + - { compiler: 'gcc', version: '12'} + - { compiler: 'clang', version: '11'} + - { compiler: 'clang', version: '12'} + - { compiler: 'clang', version: '13'} + - { compiler: 'clang', version: '14'} + - { compiler: 'clang', version: '15'} steps: - name: "Checkout HPCombi repo . . ." uses: actions/checkout@v3 - - name: "Setup ccache . . ." - uses: Chocobo1/setup-ccache-action@v1 - with: - update_packager_index: false - override_cache_key: ${{ runner.os }}-${{ github.ref }} - override_cache_key_fallback: ${{ runner.os }} + - name: "Setup compiler . . ." + if: ${{ matrix.sys.compiler == 'gcc' }} + run: | + GCC_VERSION=${{ matrix.sys.version }} + sudo apt-get --yes update + sudo apt-get install gcc-$GCC_VERSION + CC=gcc-$GCC_VERSION + echo "CC=$CC" >> $GITHUB_ENV + CXX=g++-$GCC_VERSION + echo "CXX=$CXX" >> $GITHUB_ENV + - name: "Setup compiler . . ." + if: ${{ matrix.sys.compiler == 'clang' }} + run: | + LLVM_VERSION=${{ matrix.sys.version }} + sudo apt-get --yes update + sudo apt-get install --yes clang++-$LLVM_VERSION + CC=clang-$LLVM_VERSION + echo "CC=$CC" >> $GITHUB_ENV + CXX=clang++-$LLVM_VERSION + echo "CXX=$CXX" >> $GITHUB_ENV - name: "Install dependencies . . ." run: | - sudo apt-get --yes update - sudo apt-get install --yes ccache - sudo apt-get install --yes libbenchmark-dev - - name: "Build + run HPCombi tests . . ." + sudo apt-get install --yes ccache + sudo apt-get install --yes libbenchmark-dev + - name: "Configure the HPCombi build . . ." + env: + CC: ${{ env.CC }} + CXX: ${{ env.CXX }} + run: | + mkdir build + cd build + cmake -DBUILD_TESTING=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX .. + - name: "Build HPCombi . . ." + run: | + cd build/tests + make -j4 + - name: "Run HPCombi tests . . ." run: | - mkdir build - cd build - cmake -DBUILD_TESTING=1 -DCMAKE_BUILD_TYPE=Release .. - cd tests - make - ./test_all + cd build/tests + ./test_all From e9b5eb3d972cb1f0e1de38e2b2130623bd31082b Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 14:46:56 +0100 Subject: [PATCH 020/113] Implemented is_permutation and permutation_of for non x86 arch --- include/epu.hpp | 33 +++++++++++++++++++++--- include/epu_impl.hpp | 40 ++++++++++++++++++++++++----- tests/test_epu.cpp | 61 +++++++++++++++++++++++++++++++++----------- 3 files changed, 109 insertions(+), 25 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index d4fe20d6..a5809447 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -266,13 +266,27 @@ inline epu8 sort_perm(epu8 & a); inline epu8 sort8_perm(epu8 & a); -/** Find if a vector is a permutation of one other +/** @class common_permutation_of + * @brief Find if a vector is a permutation of one other * @details * @param a, b: two #HPCombi::epu8 * @returns a #HPCombi::epu8 * For each @f$0 \leq i < 16@f$, \c res[i] is the position in \c a of \c b[i] if \c b[i] appears exactly once in \c a, or undefined if not. */ +#ifdef SIMDE_X86_SSE4_2_NATIVE +/** @copydoc common_permutation_of + @par Algorithm: uses string matching cpmestrm intrisics + */ +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b); +#endif +/** @copydoc common_permutation_of + @par Algorithm: reference implementation + */ +inline epu8 permutation_of_ref(epu8 a, epu8 b); +/** @copydoc common_permutation_of + @par Algorithm: architecture dependant + */ inline epu8 permutation_of(epu8 a, epu8 b); /** A prime number good for hashing */ @@ -610,7 +624,7 @@ inline bool less(epu8 a, epu8 b); * @param k : the bound for the lexicographic comparison * @return a positive, negative or zero char depending on the result */ -inline char less_partial(epu8 a, epu8 b, int k); +inline int8_t less_partial(epu8 a, epu8 b, int k); /** return the index of the first zero entry or 16 if there are none * Only index smaller than bound are taken into account. @@ -685,7 +699,7 @@ inline bool is_transformation(epu8 v, const size_t k = 16); */ inline bool is_partial_permutation(epu8 v, const size_t k = 16); -/** Test for permutations +/** @class common_is_permutation * @details * @returns whether \c *this is a permutation. * @param v the vector to test @@ -700,6 +714,19 @@ inline bool is_partial_permutation(epu8 v, const size_t k = 16); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 3 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,3,1,4,6,7,8,9,10,11,12,13,14,15} */ +#ifdef SIMDE_X86_SSE4_2_NATIVE +/** @copydoc common_is_permutation + @par Algorithm: uses string matching cpmestri intrisics + */ +inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16); +#endif +/** @copydoc common_is_permutation + @par Algorithm: sort the vector and compare to identity + */ +inline bool is_permutation_sort(epu8 v, const size_t k = 16); +/** @copydoc common_is_permutation + @par Algorithm: architecture dependant + */ inline bool is_permutation(epu8 v, const size_t k = 16); } // namespace HPCombi diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index ad1128fd..52b48a89 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -89,7 +89,7 @@ inline bool less(epu8 a, epu8 b) { uint64_t diff = first_diff(a, b); return (diff < 16) && (a[diff] < b[diff]); } -inline char less_partial(epu8 a, epu8 b, int k) { +inline int8_t less_partial(epu8 a, epu8 b, int k) { uint64_t diff = first_diff(a, b, k); return (diff == 16) ? 0 @@ -232,14 +232,13 @@ constexpr std::array inverting_rounds {{ // clang-format on }}; +#ifdef SIMDE_X86_SSE4_2_NATIVE #define FIND_IN_VECT \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \ SIMDE_SIDD_NEGATIVE_POLARITY) #define FIND_IN_VECT_COMPL \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) - -inline epu8 permutation_of(epu8 a, epu8 b) { -#ifdef SIMDE_X86_SSE4_2_NATIVE +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { epu8 res = -static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); for (epu8 round : inverting_rounds) { a = permuted(a, round); @@ -247,7 +246,22 @@ inline epu8 permutation_of(epu8 a, epu8 b) { res -= static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); } return res; +} +#endif + +inline epu8 permutation_of_ref(epu8 a, epu8 b) { + auto ar = as_array(a); + epu8 res {}; + for (size_t i = 0; i < 16; i++) { + res[i] = std::distance(ar.begin(), std::find(ar.begin(), ar.end(), b[i])); + } + return res; +} +inline epu8 permutation_of(epu8 a, epu8 b) { +#ifdef SIMDE_X86_SSE4_2_NATIVE + return permutation_of_cmpestrm(a, b); #else + return permutation_of_ref(a, b); #endif } @@ -446,20 +460,32 @@ inline bool is_partial_permutation(epu8 v, const size_t k) { && (diff == 16 || diff < k); } -inline bool is_permutation(epu8 v, const size_t k) { +#ifdef SIMDE_X86_SSE4_2_NATIVE +inline bool is_permutation_cmpestri(epu8 v, const size_t k) { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and // (v = Perm16::one() or last diff index < 16) -#ifdef SIMDE_X86_SSE4_2_NATIVE return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 && _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 && (diff == 16 || diff < k); -#else +} +#endif + +inline bool is_permutation_sort(epu8 v, const size_t k) { + uint64_t diff = last_diff(v, epu8id, 16); return equal(sorted(v), epu8id) && (diff == 16 || diff < k); +} + +inline bool is_permutation(epu8 v, const size_t k) { +#ifdef SIMDE_X86_SSE4_2_NATIVE + return is_permutation_cmpestri(v, k); +#else + return is_permutation_sort(v, k); #endif } + } // namespace HPCombi namespace std { diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index f5ea53fa..2bb9778a 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -338,21 +338,33 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { } } -// TODO uncomment -// TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { -// REQUIRE(equal(permutation_of(epu8id, epu8id), epu8id)); -// REQUIRE(equal(permutation_of(Pa, Pa), epu8id)); -// REQUIRE(equal(permutation_of(epu8rev, epu8id), epu8rev)); -// REQUIRE(equal(permutation_of(epu8id, epu8rev), epu8rev)); -// REQUIRE(equal(permutation_of(epu8rev, epu8rev), epu8id)); -// REQUIRE(equal(permutation_of(epu8id, RP), RP)); -// const uint8_t FF = 0xff; -// REQUIRE(equal( -// (permutation_of(Pv, Pv) | -// epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0})), -// epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15}); -// } -// +TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { + REQUIRE(equal(permutation_of(epu8id, epu8id), epu8id)); + REQUIRE(equal(permutation_of(Pa, Pa), epu8id)); + REQUIRE(equal(permutation_of(epu8rev, epu8id), epu8rev)); + REQUIRE(equal(permutation_of(epu8id, epu8rev), epu8rev)); + REQUIRE(equal(permutation_of(epu8rev, epu8rev), epu8id)); + REQUIRE(equal(permutation_of(epu8id, RP), RP)); + const uint8_t FF = 0xff; + REQUIRE(equal( + (permutation_of(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); +} +TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { + REQUIRE(equal(permutation_of_ref(epu8id, epu8id), epu8id)); + REQUIRE(equal(permutation_of_ref(Pa, Pa), epu8id)); + REQUIRE(equal(permutation_of_ref(epu8rev, epu8id), epu8rev)); + REQUIRE(equal(permutation_of_ref(epu8id, epu8rev), epu8rev)); + REQUIRE(equal(permutation_of_ref(epu8rev, epu8rev), epu8id)); + REQUIRE(equal(permutation_of_ref(epu8id, RP), RP)); + const uint8_t FF = 0xff; + REQUIRE(equal( + (permutation_of_ref(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); +} + TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { REQUIRE(equal(remove_dups(P1), P10)); REQUIRE(equal(remove_dups(P11), P10)); @@ -716,4 +728,23 @@ TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { REQUIRE(is_permutation(RP, 16)); REQUIRE(!is_permutation(RP, 15)); } + +#ifdef SIMDE_X86_SSE4_2_NATIVE +TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { + for (auto x : v) { + for (size_t i = 0; i < 16; i++) { + REQUIRE(is_permutation(x, i) == is_permutation_cmpestri(x, i)); + } + } +} +#endif + +TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { + for (auto x : v) { + for (size_t i = 0; i < 16; i++) { + REQUIRE(is_permutation(x, i) == is_permutation_sort(x, i)); + } + } +} + } // namespace HPCombi From 3c9d041f97c918ddf8742f89a37be6a33863d72f Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 14:52:58 +0100 Subject: [PATCH 021/113] Changed char -> int8_t --- include/epu.hpp | 2 +- include/epu_impl.hpp | 2 +- include/vect16.hpp | 2 +- include/vect_generic.hpp | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index a5809447..bc024b44 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -622,7 +622,7 @@ inline bool less(epu8 a, epu8 b); /** Partial lexicographic comparison between two #HPCombi::epu8 * @param a, b : the vectors to compare * @param k : the bound for the lexicographic comparison - * @return a positive, negative or zero char depending on the result + * @return a positive, negative or zero int8_t depending on the result */ inline int8_t less_partial(epu8 a, epu8 b, int k); diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 52b48a89..79e7b8ea 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -93,7 +93,7 @@ inline int8_t less_partial(epu8 a, epu8 b, int k) { uint64_t diff = first_diff(a, b, k); return (diff == 16) ? 0 - : static_cast(a[diff]) - static_cast(b[diff]); + : static_cast(a[diff]) - static_cast(b[diff]); } diff --git a/include/vect16.hpp b/include/vect16.hpp index c660392d..8f0eb8b6 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -83,7 +83,7 @@ struct alignas(16) Vect16 { }; bool operator<(const Vect16 &b) const { return less(v, b.v); }; - char less_partial(const Vect16 &b, int k) const { + int8_t less_partial(const Vect16 &b, int k) const { return HPCombi::less_partial(v, b.v, k); }; Vect16 permuted(const Vect16 &b) const { diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index e7496a80..594a14ea 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -93,9 +93,9 @@ template struct VectGeneric { return (diff != _Size) and v[diff] < u[diff]; } - char less_partial(const VectGeneric &u, int k) const { + int8_t less_partial(const VectGeneric &u, int k) const { uint64_t diff = first_diff(u, k); - return (diff == _Size) ? 0 : char(v[diff]) - char(u[diff]); + return (diff == _Size) ? 0 : int8_t(v[diff]) - int8_t(u[diff]); } VectGeneric permuted(const VectGeneric &u) const { From 71f704b33d74403487cac8de960ad33a100eff0b Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 15:36:48 +0100 Subject: [PATCH 022/113] Inverse find form PPerm16 and Perm16 --- include/perm16.hpp | 2 ++ include/perm16_impl.hpp | 5 ++--- tests/test_perm16.cpp | 13 +++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/perm16.hpp b/include/perm16.hpp index 6eb8216a..fdc505d0 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -179,12 +179,14 @@ struct PPerm16 : public PTransf16 { * @f$O(n)@f$ algorithm using reference cast to arrays */ PPerm16 inverse_ref() const; +#ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_inverse_pperm * @par Algorithm: * @f$O(\log n)@f$ algorithm using some kind of vectorized dichotomic * search. */ PPerm16 inverse_find() const; +#endif PPerm16 right_one() const { return PTransf16::right_one(); } PPerm16 left_one() const { return PTransf16::left_one(); } diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index b2d74a45..634c264a 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -143,13 +143,12 @@ inline PPerm16 PPerm16::inverse_ref() const { return res; } -inline PPerm16 PPerm16::inverse_find() const { #ifdef SIMDE_X86_SSE4_2_NATIVE +inline PPerm16 PPerm16::inverse_find() const { epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT); return permutation_of(v, one()) | mask; -#else -#endif } +#endif inline Perm16 Perm16::random(uint64_t n) { static std::random_device rd; diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 9acea11c..52e40a8b 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -364,9 +364,10 @@ TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { } } -// TODO uncomment -// TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, -// "[PPerm16][021]"); +#ifdef SIMDE_X86_SSE4_2_NATIVE +TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, + "[PPerm16][021]"); +#endif TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { REQUIRE(equal(PTransf16::one().fix_points_mask(), Epu8(FF))); @@ -437,9 +438,9 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { } } -// TODO uncomment -// TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_find, Plist, -// "[Perm16][028]"); + +TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_find, Plist, + "[Perm16][028]"); TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_pow, Plist, "[Perm16][029]"); TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_cycl, Plist, From f69a3afbf9bc8abf9b796d1e12c066db63da0d7a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 15:42:47 +0100 Subject: [PATCH 023/113] Formatting --- benchmark/bench_epu8.cpp | 93 ++++--- benchmark/bench_fixture.hpp | 33 ++- benchmark/bench_perm16.cpp | 79 +++--- benchmark/compilerinfo.hpp | 31 ++- benchmark/cpu_x86.h | 16 +- benchmark/cpu_x86_Linux.ipp | 18 +- benchmark/cpu_x86_Windows.ipp | 36 ++- benchmark/cpu_x86_impl.hpp | 194 +++++++-------- benchmark/cycle.cpp | 321 +++++++++++++------------ benchmark/inverse.cpp | 96 ++++---- benchmark/length.cpp | 66 ++--- benchmark/sort.cpp | 73 +++--- benchmark/sum.cpp | 54 +++-- examples/CF.cpp | 82 +++---- examples/RD.cpp | 135 ++++++----- examples/Renner.cpp | 169 ++++++------- examples/Trans.cpp | 67 +++--- examples/image.cpp | 110 ++++----- examples/stringmonoid.cpp | 25 +- examples/timer.h | 81 +++---- experiments/Perm32/demovect.cpp | 42 ++-- experiments/Perm32/demovect32_avx2.cpp | 38 +-- experiments/Perm32/perm32.cpp | 174 +++++++------- experiments/Perm32/perm64.cpp | 294 +++++++++++----------- experiments/Perm32/permbig.cpp | 296 ++++++++++++----------- experiments/Perm32/permdesc_red.cpp | 189 +++++++-------- include/arch.hpp | 2 +- include/bmat8.hpp | 15 +- include/bmat8_impl.hpp | 18 +- include/epu.hpp | 24 +- include/epu_impl.hpp | 92 ++++--- include/hpcombi.hpp | 2 +- include/perm16.hpp | 32 +-- include/perm16_impl.hpp | 11 +- include/perm_generic.hpp | 5 +- include/perm_generic_impl.hpp | 7 +- include/power.hpp | 10 +- include/vect16.hpp | 11 +- tests/test_epu.cpp | 28 +-- tests/test_perm16.cpp | 3 +- 40 files changed, 1521 insertions(+), 1551 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index b6f3866f..6db5c472 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -13,14 +13,14 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include #include -#include +#include #include +#include +#include "bench_fixture.hpp" #include "compilerinfo.hpp" #include "cpu_x86_impl.hpp" -#include "bench_fixture.hpp" using namespace FeatureDetector; using namespace std; @@ -31,7 +31,7 @@ const std::string SIMDSET = cpu_x86::get_highest_SIMD(); const std::string PROCID = cpu_x86::get_proc_string(); struct RoundsMask { - // commented out due to a bug in gcc + // commented out due to a bug in gcc /* constexpr */ RoundsMask() : arr() { for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) arr[i] = HPCombi::sorting_rounds[i] < HPCombi::epu8id; @@ -53,12 +53,12 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; - static const epu8 even = - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; - static const epu8 odd = - {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; - static const epu8 mask = - {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF}; + static const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, + 9, 8, 11, 10, 13, 12, 15, 14}; + static const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, + 7, 10, 9, 12, 11, 14, 13, 15}; + static const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, + 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { b = HPCombi::permuted(a, even); @@ -83,7 +83,7 @@ inline epu8 insertion_sort(epu8 p) { inline epu8 radix_sort(epu8 p) { auto &a = HPCombi::as_array(p); - std::array stat {}; + std::array stat{}; for (int i = 0; i < 16; i++) stat[a[i]]++; int c = 0; @@ -109,12 +109,11 @@ inline epu8 gen_sort(epu8 p) { return p; } - -template +template void myBench(const string &name, TF pfunc, Sample &sample) { string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; - benchmark::RegisterBenchmark(fullname.c_str(), - [pfunc, sample](benchmark::State& st) { + benchmark::RegisterBenchmark( + fullname.c_str(), [pfunc, sample](benchmark::State &st) { for (auto _ : st) { for (auto elem : sample) { benchmark::DoNotOptimize(pfunc(elem)); @@ -125,12 +124,14 @@ void myBench(const string &name, TF pfunc, Sample &sample) { static const epu8 bla = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; -#define MYBENCH(nm, fun, smp) \ - myBench(nm, [](epu8 p) { return fun(p); }, smp) -#define MYBENCH2(nm, fun, smp) \ - myBench(nm, [](epu8 p) { return fun(p,bla); }, smp) +#define MYBENCH(nm, fun, smp) \ + myBench( \ + nm, [](epu8 p) { return fun(p); }, smp) +#define MYBENCH2(nm, fun, smp) \ + myBench( \ + nm, [](epu8 p) { return fun(p, bla); }, smp) -//################################################################################## +// ################################################################################## int Bench_sort() { myBench("sort_std1_nolmbd", std_sort, sample.perms); myBench("sort_std2_nolmbd", std_sort, sample.perms); @@ -165,7 +166,7 @@ int Bench_sort() { return 0; } -//################################################################################## +// ################################################################################## int Bench_hsum() { myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); myBench("hsum_ref2_nolmbd", HPCombi::horiz_sum_ref, sample.perms); @@ -182,7 +183,7 @@ int Bench_hsum() { MYBENCH("hsum_sum3_lmbd", HPCombi::horiz_sum3, sample.perms); return 0; } -//################################################################################## +// ################################################################################## int Bench_psum() { myBench("psum_ref1_nolmbd", HPCombi::partial_sums_ref, sample.perms); myBench("psum_ref2_nolmbd", HPCombi::partial_sums_ref, sample.perms); @@ -198,76 +199,73 @@ int Bench_psum() { return 0; } - -//################################################################################## +// ################################################################################## int Bench_hmax() { myBench("hmax_ref1_nolmbd", HPCombi::horiz_max_ref, sample.perms); myBench("hmax_ref2_nolmbd", HPCombi::horiz_max_ref, sample.perms); myBench("hmax_ref3_nolmbd", HPCombi::horiz_max_ref, sample.perms); myBench("hmax_ref_nolmbd", HPCombi::horiz_max_ref, sample.perms); -// myBench("hmax_gen_nolmbd", HPCombi::horiz_max_gen, sample.perms); + // myBench("hmax_gen_nolmbd", HPCombi::horiz_max_gen, sample.perms); myBench("hmax_max4_nolmbd", HPCombi::horiz_max4, sample.perms); myBench("hmax_max3_nolmbd", HPCombi::horiz_max3, sample.perms); MYBENCH("hmax_ref_lmbd", HPCombi::horiz_max_ref, sample.perms); -// MYBENCH("hmax_gen_lmbd", HPCombi::horiz_max_gen, sample.perms); + // MYBENCH("hmax_gen_lmbd", HPCombi::horiz_max_gen, sample.perms); MYBENCH("hmax_max4_lmbd", HPCombi::horiz_max4, sample.perms); MYBENCH("hmax_max3_lmbd", HPCombi::horiz_max3, sample.perms); return 0; } -//################################################################################## +// ################################################################################## int Bench_pmax() { myBench("pmax_ref1_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref2_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref3_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref_nolmbd", HPCombi::partial_max_ref, sample.perms); -// myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, sample.perms); + // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, sample.perms); myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, sample.perms); MYBENCH("pmax_ref_lmbd", HPCombi::partial_max_ref, sample.perms); -// MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); + // MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); MYBENCH("pmax_rnd_lmbd", HPCombi::partial_max_round, sample.perms); return 0; } - -//################################################################################## +// ################################################################################## int Bench_hmin() { myBench("hmin_ref1_nolmbd", HPCombi::horiz_min_ref, sample.perms); myBench("hmin_ref2_nolmbd", HPCombi::horiz_min_ref, sample.perms); myBench("hmin_ref3_nolmbd", HPCombi::horiz_min_ref, sample.perms); myBench("hmin_ref_nolmbd", HPCombi::horiz_min_ref, sample.perms); -// myBench("hmin_gen_nolmbd", HPCombi::horiz_min_gen, sample.perms); + // myBench("hmin_gen_nolmbd", HPCombi::horiz_min_gen, sample.perms); myBench("hmin_min4_nolmbd", HPCombi::horiz_min4, sample.perms); myBench("hmin_min3_nolmbd", HPCombi::horiz_min3, sample.perms); MYBENCH("hmin_ref_lmbd", HPCombi::horiz_min_ref, sample.perms); -// MYBENCH("hmin_gen_lmbd", HPCombi::horiz_min_gen, sample.perms); + // MYBENCH("hmin_gen_lmbd", HPCombi::horiz_min_gen, sample.perms); MYBENCH("hmin_min4_lmbd", HPCombi::horiz_min4, sample.perms); MYBENCH("hmin_min3_lmbd", HPCombi::horiz_min3, sample.perms); return 0; } -//################################################################################## +// ################################################################################## int Bench_pmin() { myBench("pmin_ref1_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref2_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref3_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref_nolmbd", HPCombi::partial_min_ref, sample.perms); -// myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, sample.perms); + // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, sample.perms); myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, sample.perms); MYBENCH("pmin_ref_lmbd", HPCombi::partial_min_ref, sample.perms); -// MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); + // MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); MYBENCH("pmin_rnd_lmbd", HPCombi::partial_min_round, sample.perms); return 0; } - -//################################################################################## +// ################################################################################## int Bench_eval() { myBench("eval_ref1_nolmbd", HPCombi::eval16_ref, sample.perms); myBench("eval_ref2_nolmbd", HPCombi::eval16_ref, sample.perms); @@ -287,7 +285,7 @@ int Bench_eval() { return 0; } -//################################################################################## +// ################################################################################## int Bench_first_diff() { MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, sample.perms); @@ -295,7 +293,7 @@ int Bench_first_diff() { return 0; } -//################################################################################## +// ################################################################################## int Bench_last_diff() { MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, sample.perms); @@ -303,17 +301,8 @@ int Bench_last_diff() { return 0; } -auto dummy = { - Bench_sort(), - Bench_hsum(), - Bench_psum(), - Bench_hmax(), - Bench_pmax(), - Bench_hmin(), - Bench_pmin(), - Bench_eval(), - Bench_first_diff(), - Bench_last_diff() -}; +auto dummy = {Bench_sort(), Bench_hsum(), Bench_psum(), Bench_hmax(), + Bench_pmax(), Bench_hmin(), Bench_pmin(), Bench_eval(), + Bench_first_diff(), Bench_last_diff()}; BENCHMARK_MAIN(); diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index b4538387..c0dd8378 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -22,39 +22,34 @@ using HPCombi::epu8; constexpr uint_fast64_t size = 1000; // constexpr uint_fast64_t repeat = 100; - std::vector rand_epu8(size_t sz) { std::vector res; - for (size_t i=0; i < sz; i++) + for (size_t i = 0; i < sz; i++) res.push_back(HPCombi::random_epu8(256)); return res; } inline epu8 rand_perm() { - epu8 res = HPCombi::epu8id; - auto &ar = HPCombi::as_array(res); - std::random_shuffle(ar.begin(), ar.end()); - return res; + epu8 res = HPCombi::epu8id; + auto &ar = HPCombi::as_array(res); + std::random_shuffle(ar.begin(), ar.end()); + return res; } std::vector rand_perms(int sz) { - std::vector res(sz); - std::srand(std::time(0)); - for (int i = 0; i < sz; i++) - res[i] = rand_perm(); - return res; + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = rand_perm(); + return res; } class Fix_epu8 { -public : - Fix_epu8() : - vects(rand_epu8(size)), - perms(rand_perms(size)) - {} + public: + Fix_epu8() : vects(rand_epu8(size)), perms(rand_perms(size)) {} ~Fix_epu8() {} - const std::vector vects; - const std::vector perms; + const std::vector vects; + const std::vector perms; }; - #endif // BENCH_FIXTURE diff --git a/benchmark/bench_perm16.cpp b/benchmark/bench_perm16.cpp index ff4d3910..851f792e 100644 --- a/benchmark/bench_perm16.cpp +++ b/benchmark/bench_perm16.cpp @@ -13,14 +13,14 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include #include -#include +#include #include +#include +#include "bench_fixture.hpp" #include "compilerinfo.hpp" #include "cpu_x86_impl.hpp" -#include "bench_fixture.hpp" #include "perm16.hpp" #include "perm_generic.hpp" @@ -35,17 +35,17 @@ const std::string SIMDSET = cpu_x86::get_highest_SIMD(); const std::string PROCID = cpu_x86::get_proc_string(); using HPCombi::epu8; -using HPCombi::Vect16; +using HPCombi::Perm16; using HPCombi::PTransf16; using HPCombi::Transf16; -using HPCombi::Perm16; +using HPCombi::Vect16; -//################################################################################## -template +// ################################################################################## +template void myBench(const string &name, TF pfunc, Sample &sample) { string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; - benchmark::RegisterBenchmark(fullname.c_str(), - [pfunc, sample](benchmark::State& st) { + benchmark::RegisterBenchmark( + fullname.c_str(), [pfunc, sample](benchmark::State &st) { for (auto _ : st) { for (auto elem : sample) { benchmark::DoNotOptimize(pfunc(elem)); @@ -54,25 +54,36 @@ void myBench(const string &name, TF pfunc, Sample &sample) { }); } -#define myBenchLoop(descr, methname, smp) \ - myBench(descr, [](Perm16 p) { \ - for (int i = 0; i < 100; i++) p = p.methname(); \ - return p; }, smp) -#define myBenchMeth(descr, methname, smp) \ - myBench(descr, [](Perm16 p) { \ - for (int i = 0; i < 100; i++) benchmark::DoNotOptimize(p.methname()); \ - return p.methname(); }, smp) - -#define myBenchMeth2(descr, methname, smp) \ - myBench(descr, \ - [](Perm16 p) { \ - for (Perm16 p1 : smp) benchmark::DoNotOptimize(p.methname(p1)); \ - return 1; \ - }, smp); - - - -//################################################################################## +#define myBenchLoop(descr, methname, smp) \ + myBench( \ + descr, \ + [](Perm16 p) { \ + for (int i = 0; i < 100; i++) \ + p = p.methname(); \ + return p; \ + }, \ + smp) +#define myBenchMeth(descr, methname, smp) \ + myBench( \ + descr, \ + [](Perm16 p) { \ + for (int i = 0; i < 100; i++) \ + benchmark::DoNotOptimize(p.methname()); \ + return p.methname(); \ + }, \ + smp) + +#define myBenchMeth2(descr, methname, smp) \ + myBench( \ + descr, \ + [](Perm16 p) { \ + for (Perm16 p1 : smp) \ + benchmark::DoNotOptimize(p.methname(p1)); \ + return 1; \ + }, \ + smp); + +// ################################################################################## int Bench_inverse() { myBenchMeth("inverse_ref1", inverse_ref, sample.perms); myBenchMeth("inverse_ref2", inverse_ref, sample.perms); @@ -131,14 +142,8 @@ int Bench_rank() { return 0; } -auto dummy = { - Bench_inverse(), - Bench_lehmer(), - Bench_length(), - Bench_nb_descents(), - Bench_nb_cycles(), - Bench_left_weak_leq(), - Bench_rank() -}; +auto dummy = {Bench_inverse(), Bench_lehmer(), Bench_length(), + Bench_nb_descents(), Bench_nb_cycles(), Bench_left_weak_leq(), + Bench_rank()}; BENCHMARK_MAIN(); diff --git a/benchmark/compilerinfo.hpp b/benchmark/compilerinfo.hpp index 4438012b..0728fa45 100644 --- a/benchmark/compilerinfo.hpp +++ b/benchmark/compilerinfo.hpp @@ -1,40 +1,39 @@ #include - + std::string ver_string(std::string a, int b, int c, int d) { - std::ostringstream ss; - ss << a << '-' << b << '.' << c << '.' << d; - return ss.str(); + std::ostringstream ss; + ss << a << '-' << b << '.' << c << '.' << d; + return ss.str(); } std::string ver_string(std::string a, int b) { - std::ostringstream ss; - ss << a << '-' << b; - return ss.str(); + std::ostringstream ss; + ss << a << '-' << b; + return ss.str(); } - std::string CXX_VER = #if defined(__clang__) - ver_string("clang", __clang_major__, __clang_minor__, __clang_patchlevel__); + ver_string("clang", __clang_major__, __clang_minor__, __clang_patchlevel__); #elif defined(__ICC) || defined(__INTEL_COMPILER) - ver_string("icc", __INTEL_COMPILER); + ver_string("icc", __INTEL_COMPILER); #elif defined(__GNUC__) || defined(__GNUG__) - ver_string("g++", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + ver_string("g++", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); #elif defined(__HP_cc) || defined(__HP_aCC) - ver_string("hp", __HP_cc); + ver_string("hp", __HP_cc); #elif defined(__IBMC__) || defined(__IBMCPP__) - ver_string("xl", __IBMCPP__); + ver_string("xl", __IBMCPP__); #elif defined(_MSC_VER) - ver_string("mvs", _MSC_FULL_VER); + ver_string("mvs", _MSC_FULL_VER); #elif defined(__PGI) - ver_string("pgi", __PGIC__, __PGIC_MINOR, __PGIC_PATCHLEVEL__); + ver_string("pgi", __PGIC__, __PGIC_MINOR, __PGIC_PATCHLEVEL__); #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) - ver_string("sun", __SUNPRO_CC); + ver_string("sun", __SUNPRO_CC); #endif diff --git a/benchmark/cpu_x86.h b/benchmark/cpu_x86.h index fa242736..abe673cb 100644 --- a/benchmark/cpu_x86.h +++ b/benchmark/cpu_x86.h @@ -1,9 +1,9 @@ /* cpu_x86.h - * + * * Author : Alexander J. Yee * Date Created : 04/12/2014 * Last Modified : 04/12/2014 - * + * */ #pragma once @@ -16,12 +16,12 @@ // Dependencies #include #include -namespace FeatureDetector{ +namespace FeatureDetector { //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -struct cpu_x86{ +struct cpu_x86 { // Vendor bool Vendor_AMD; bool Vendor_Intel; @@ -71,7 +71,7 @@ struct cpu_x86{ bool HW_AVX512_IFMA; bool HW_AVX512_VBMI; -public: + public: cpu_x86(); void detect_host(); @@ -83,8 +83,8 @@ struct cpu_x86{ static std::string get_proc_string(); static std::string get_highest_SIMD(); -private: - static void print(const char* label, bool yes); + private: + static void print(const char *label, bool yes); static bool detect_OS_x64(); static bool detect_OS_AVX(); @@ -94,5 +94,5 @@ struct cpu_x86{ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -} +} // namespace FeatureDetector #endif diff --git a/benchmark/cpu_x86_Linux.ipp b/benchmark/cpu_x86_Linux.ipp index d192e2d6..70a2a2e9 100644 --- a/benchmark/cpu_x86_Linux.ipp +++ b/benchmark/cpu_x86_Linux.ipp @@ -1,9 +1,9 @@ /* cpu_x86_Linux.ipp - * + * * Author : Alexander J. Yee * Date Created : 04/12/2014 * Last Modified : 04/12/2014 - * + * */ //////////////////////////////////////////////////////////////////////////////// @@ -11,26 +11,26 @@ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Dependencies -#include #include "cpu_x86.h" -namespace FeatureDetector{ +#include +namespace FeatureDetector { //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -void cpu_x86::cpuid(int32_t out[4], int32_t x){ +void cpu_x86::cpuid(int32_t out[4], int32_t x) { __cpuid_count(x, 0, out[0], out[1], out[2], out[3]); } -uint64_t xgetbv(unsigned int index){ +uint64_t xgetbv(unsigned int index) { uint32_t eax, edx; __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index)); return ((uint64_t)edx << 32) | eax; } -#define _XCR_XFEATURE_ENABLED_MASK 0 +#define _XCR_XFEATURE_ENABLED_MASK 0 //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Detect 64-bit -bool cpu_x86::detect_OS_x64(){ +bool cpu_x86::detect_OS_x64() { // We only support x64 on Linux. return true; } @@ -38,4 +38,4 @@ bool cpu_x86::detect_OS_x64(){ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -} +} // namespace FeatureDetector diff --git a/benchmark/cpu_x86_Windows.ipp b/benchmark/cpu_x86_Windows.ipp index ad0ad414..5acab817 100644 --- a/benchmark/cpu_x86_Windows.ipp +++ b/benchmark/cpu_x86_Windows.ipp @@ -1,9 +1,9 @@ /* cpu_x86_Windows.ipp - * + * * Author : Alexander J. Yee * Date Created : 04/12/2014 * Last Modified : 04/12/2014 - * + * */ //////////////////////////////////////////////////////////////////////////////// @@ -11,35 +11,29 @@ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Dependencies +#include "cpu_x86.h" #include #include -#include "cpu_x86.h" -namespace FeatureDetector{ +namespace FeatureDetector { //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -void cpu_x86::cpuid(int32_t out[4], int32_t x){ - __cpuidex(out, x, 0); -} -__int64 xgetbv(unsigned int x){ - return _xgetbv(x); -} +void cpu_x86::cpuid(int32_t out[4], int32_t x) { __cpuidex(out, x, 0); } +__int64 xgetbv(unsigned int x) { return _xgetbv(x); } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -// Detect 64-bit - Note that this snippet of code for detecting 64-bit has been copied from MSDN. -typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); -BOOL IsWow64() -{ +// Detect 64-bit - Note that this snippet of code for detecting 64-bit has been +// copied from MSDN. +typedef BOOL(WINAPI *LPFN_ISWOW64PROCESS)(HANDLE, PBOOL); +BOOL IsWow64() { BOOL bIsWow64 = FALSE; - LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS) GetProcAddress( + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress( GetModuleHandle(TEXT("kernel32")), "IsWow64Process"); - if (NULL != fnIsWow64Process) - { - if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) - { + if (NULL != fnIsWow64Process) { + if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) { printf("Error Detecting Operating System.\n"); printf("Defaulting to 32-bit OS.\n\n"); bIsWow64 = FALSE; @@ -47,7 +41,7 @@ BOOL IsWow64() } return bIsWow64; } -bool cpu_x86::detect_OS_x64(){ +bool cpu_x86::detect_OS_x64() { #ifdef _M_X64 return true; #else @@ -58,4 +52,4 @@ bool cpu_x86::detect_OS_x64(){ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -} +} // namespace FeatureDetector diff --git a/benchmark/cpu_x86_impl.hpp b/benchmark/cpu_x86_impl.hpp index 4e4d999e..5c6e8167 100644 --- a/benchmark/cpu_x86_impl.hpp +++ b/benchmark/cpu_x86_impl.hpp @@ -1,9 +1,9 @@ /* cpu_x86.cpp - * + * * Author : Alexander J. Yee * Date Created : 04/12/2014 * Last Modified : 04/12/2014 - * + * */ //////////////////////////////////////////////////////////////////////////////// @@ -11,35 +11,36 @@ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // Dependencies +#include "cpu_x86.h" #include #include -#include "cpu_x86.h" //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -# if _WIN32 -# include "cpu_x86_Windows.ipp" -# elif defined(__GNUC__) || defined(__clang__) -# include "cpu_x86_Linux.ipp" -# else -# error "No cpuid intrinsic defined for compiler." -# endif +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \ + defined(_M_IX86) +#if _WIN32 +#include "cpu_x86_Windows.ipp" +#elif defined(__GNUC__) || defined(__clang__) +#include "cpu_x86_Linux.ipp" +#else +#error "No cpuid intrinsic defined for compiler." +#endif #else -# error "No cpuid intrinsic defined for processor architecture." +#error "No cpuid intrinsic defined for processor architecture." #endif -namespace FeatureDetector{ - using std::cout; - using std::endl; - using std::memcpy; - using std::memset; +namespace FeatureDetector { +using std::cout; +using std::endl; +using std::memcpy; +using std::memset; //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -void cpu_x86::print(const char* label, bool yes){ +void cpu_x86::print(const char *label, bool yes) { cout << label; cout << (yes ? "Yes" : "No") << endl; } @@ -47,10 +48,8 @@ void cpu_x86::print(const char* label, bool yes){ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -cpu_x86::cpu_x86(){ - memset(this, 0, sizeof(*this)); -} -bool cpu_x86::detect_OS_AVX(){ +cpu_x86::cpu_x86() { memset(this, 0, sizeof(*this)); } +bool cpu_x86::detect_OS_AVX() { // Copied from: http://stackoverflow.com/a/22521619/922184 bool avxSupported = false; @@ -61,22 +60,21 @@ bool cpu_x86::detect_OS_AVX(){ bool osUsesXSAVE_XRSTORE = (cpuInfo[2] & (1 << 27)) != 0; bool cpuAVXSuport = (cpuInfo[2] & (1 << 28)) != 0; - if (osUsesXSAVE_XRSTORE && cpuAVXSuport) - { + if (osUsesXSAVE_XRSTORE && cpuAVXSuport) { uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); avxSupported = (xcrFeatureMask & 0x6) == 0x6; } return avxSupported; } -bool cpu_x86::detect_OS_AVX512(){ +bool cpu_x86::detect_OS_AVX512() { if (!detect_OS_AVX()) return false; uint64_t xcrFeatureMask = xgetbv(_XCR_XFEATURE_ENABLED_MASK); return (xcrFeatureMask & 0xe6) == 0xe6; } -std::string cpu_x86::get_vendor_string(){ +std::string cpu_x86::get_vendor_string() { int32_t CPUInfo[4]; char name[13]; @@ -89,27 +87,29 @@ std::string cpu_x86::get_vendor_string(){ return name; } -std::string cpu_x86::get_proc_string(){ +std::string cpu_x86::get_proc_string() { int32_t CPUInfo[4]; std::string name; cpuid(CPUInfo, 1); - //~ printf("stepping %d\n", CPUInfo[0] & 0xF); - //~ printf("model %d\n", (CPUInfo[0] >> 4) & 0xF); - //~ printf("family %d\n", (CPUInfo[0] >> 8) & 0xF); - //~ printf("processor type %d\n", (CPUInfo[0] >> 12) & 0x3); - //~ printf("extended model %d\n", (CPUInfo[0] >> 16) & 0xF); - //~ printf("extended family %d\n", (CPUInfo[0] >> 20) & 0xFF); - int family = ((CPUInfo[0] >> 8) & 0xF) + ((CPUInfo[0] >> 20) & 0xFF); - int model = ((CPUInfo[0] >> 4) & 0xF) + ((CPUInfo[0] >> 16) & 0xF); // Looks like model is not the right number - name = std::to_string( family ) + "." + std::to_string( model ); + //~ printf("stepping %d\n", CPUInfo[0] & 0xF); + //~ printf("model %d\n", (CPUInfo[0] >> 4) & 0xF); + //~ printf("family %d\n", (CPUInfo[0] >> 8) & 0xF); + //~ printf("processor type %d\n", (CPUInfo[0] >> 12) & 0x3); + //~ printf("extended model %d\n", (CPUInfo[0] >> 16) & 0xF); + //~ printf("extended family %d\n", (CPUInfo[0] >> 20) & 0xFF); + int family = ((CPUInfo[0] >> 8) & 0xF) + ((CPUInfo[0] >> 20) & 0xFF); + int model = + ((CPUInfo[0] >> 4) & 0xF) + + ((CPUInfo[0] >> 16) & 0xF); // Looks like model is not the right number + name = std::to_string(family) + "." + std::to_string(model); return name; } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -void cpu_x86::detect_host(){ +void cpu_x86::detect_host() { // OS Features OS_x64 = detect_OS_x64(); OS_AVX = detect_OS_AVX(); @@ -117,9 +117,9 @@ void cpu_x86::detect_host(){ // Vendor std::string vendor(get_vendor_string()); - if (vendor == "GenuineIntel"){ + if (vendor == "GenuineIntel") { Vendor_Intel = true; - }else if (vendor == "AuthenticAMD"){ + } else if (vendor == "AuthenticAMD") { Vendor_AMD = true; } @@ -131,54 +131,54 @@ void cpu_x86::detect_host(){ uint32_t nExIds = info[0]; // Detect Features - if (nIds >= 0x00000001){ + if (nIds >= 0x00000001) { cpuid(info, 0x00000001); - HW_MMX = (info[3] & ((int)1 << 23)) != 0; - HW_SSE = (info[3] & ((int)1 << 25)) != 0; - HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; - HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; + HW_MMX = (info[3] & ((int)1 << 23)) != 0; + HW_SSE = (info[3] & ((int)1 << 25)) != 0; + HW_SSE2 = (info[3] & ((int)1 << 26)) != 0; + HW_SSE3 = (info[2] & ((int)1 << 0)) != 0; - HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; - HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; - HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; - HW_AES = (info[2] & ((int)1 << 25)) != 0; + HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0; + HW_SSE41 = (info[2] & ((int)1 << 19)) != 0; + HW_SSE42 = (info[2] & ((int)1 << 20)) != 0; + HW_AES = (info[2] & ((int)1 << 25)) != 0; - HW_AVX = (info[2] & ((int)1 << 28)) != 0; - HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; + HW_AVX = (info[2] & ((int)1 << 28)) != 0; + HW_FMA3 = (info[2] & ((int)1 << 12)) != 0; HW_RDRAND = (info[2] & ((int)1 << 30)) != 0; } - if (nIds >= 0x00000007){ + if (nIds >= 0x00000007) { cpuid(info, 0x00000007); - HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; + HW_AVX2 = (info[1] & ((int)1 << 5)) != 0; - HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; - HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; - HW_ADX = (info[1] & ((int)1 << 19)) != 0; - HW_MPX = (info[1] & ((int)1 << 14)) != 0; - HW_SHA = (info[1] & ((int)1 << 29)) != 0; - HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; + HW_BMI1 = (info[1] & ((int)1 << 3)) != 0; + HW_BMI2 = (info[1] & ((int)1 << 8)) != 0; + HW_ADX = (info[1] & ((int)1 << 19)) != 0; + HW_MPX = (info[1] & ((int)1 << 14)) != 0; + HW_SHA = (info[1] & ((int)1 << 29)) != 0; + HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0; - HW_AVX512_F = (info[1] & ((int)1 << 16)) != 0; - HW_AVX512_CD = (info[1] & ((int)1 << 28)) != 0; - HW_AVX512_PF = (info[1] & ((int)1 << 26)) != 0; - HW_AVX512_ER = (info[1] & ((int)1 << 27)) != 0; - HW_AVX512_VL = (info[1] & ((int)1 << 31)) != 0; - HW_AVX512_BW = (info[1] & ((int)1 << 30)) != 0; - HW_AVX512_DQ = (info[1] & ((int)1 << 17)) != 0; - HW_AVX512_IFMA = (info[1] & ((int)1 << 21)) != 0; - HW_AVX512_VBMI = (info[2] & ((int)1 << 1)) != 0; + HW_AVX512_F = (info[1] & ((int)1 << 16)) != 0; + HW_AVX512_CD = (info[1] & ((int)1 << 28)) != 0; + HW_AVX512_PF = (info[1] & ((int)1 << 26)) != 0; + HW_AVX512_ER = (info[1] & ((int)1 << 27)) != 0; + HW_AVX512_VL = (info[1] & ((int)1 << 31)) != 0; + HW_AVX512_BW = (info[1] & ((int)1 << 30)) != 0; + HW_AVX512_DQ = (info[1] & ((int)1 << 17)) != 0; + HW_AVX512_IFMA = (info[1] & ((int)1 << 21)) != 0; + HW_AVX512_VBMI = (info[2] & ((int)1 << 1)) != 0; } - if (nExIds >= 0x80000001){ + if (nExIds >= 0x80000001) { cpuid(info, 0x80000001); - HW_x64 = (info[3] & ((int)1 << 29)) != 0; - HW_ABM = (info[2] & ((int)1 << 5)) != 0; - HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; - HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; - HW_XOP = (info[2] & ((int)1 << 11)) != 0; + HW_x64 = (info[3] & ((int)1 << 29)) != 0; + HW_ABM = (info[2] & ((int)1 << 5)) != 0; + HW_SSE4a = (info[2] & ((int)1 << 6)) != 0; + HW_FMA4 = (info[2] & ((int)1 << 16)) != 0; + HW_XOP = (info[2] & ((int)1 << 11)) != 0; } } -void cpu_x86::print() const{ +void cpu_x86::print() const { cout << "CPU Vendor:" << endl; print(" AMD = ", Vendor_AMD); print(" Intel = ", Vendor_Intel); @@ -241,39 +241,39 @@ void cpu_x86::print() const{ print(" Safe to use AVX512: ", HW_AVX512_F && OS_AVX512); cout << endl; } -void cpu_x86::print_host(){ +void cpu_x86::print_host() { cpu_x86 features; features.detect_host(); features.print(); } -std::string cpu_x86::get_highest_SIMD(){ +std::string cpu_x86::get_highest_SIMD() { cpu_x86 features; features.detect_host(); std::string out; - if(features.HW_AVX512_F) - out = "AVX512"; - else if(features.HW_AVX2) - out = "AVX2"; - else if(features.HW_AVX) - out = "AVX"; - else if(features.HW_SSE42) - out = "SSE42"; - else if(features.HW_SSE41) - out = "SSE41"; - else if(features.HW_SSE4a) - out = "SSE4a"; - else if(features.HW_SSSE3) - out = "SSSE3"; - else if(features.HW_SSE3) - out = "SSE3"; - else if(features.HW_SSE2) - out = "SSE2"; - else if(features.HW_SSE) - out = "SSE"; + if (features.HW_AVX512_F) + out = "AVX512"; + else if (features.HW_AVX2) + out = "AVX2"; + else if (features.HW_AVX) + out = "AVX"; + else if (features.HW_SSE42) + out = "SSE42"; + else if (features.HW_SSE41) + out = "SSE41"; + else if (features.HW_SSE4a) + out = "SSE4a"; + else if (features.HW_SSSE3) + out = "SSSE3"; + else if (features.HW_SSE3) + out = "SSE3"; + else if (features.HW_SSE2) + out = "SSE2"; + else if (features.HW_SSE) + out = "SSE"; return out; } //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -} +} // namespace FeatureDetector diff --git a/benchmark/cycle.cpp b/benchmark/cycle.cpp index 19787a8a..e4d5cf2c 100644 --- a/benchmark/cycle.cpp +++ b/benchmark/cycle.cpp @@ -30,121 +30,124 @@ using namespace std::chrono; using namespace HPCombi; uint8_t nb_cycles_ref(Perm16 p) { - Vect16 v{}; - int i, j, c = 0; - for (i = 0; i < 16; i++) { - if (v[i] == 0) { - for (j = i; v[j] == 0; j = p[j]) - v[j] = 1; - c++; + Vect16 v{}; + int i, j, c = 0; + for (i = 0; i < 16; i++) { + if (v[i] == 0) { + for (j = i; v[j] == 0; j = p[j]) + v[j] = 1; + c++; + } } - } - return c; + return c; } uint8_t nb_cycles(Perm16 p) { - Vect16 x0, x1 = Perm16::one(); - Perm16 pp = p; - do { - x0 = x1; - x1 = _mm_min_epi8(x0, x0.permuted(pp)); - pp = pp * pp; - } while (x0 != x1); - x0.v = (Perm16::one().v == x1.v); - return _mm_popcnt_u32(_mm_movemask_epi8(x0)); + Vect16 x0, x1 = Perm16::one(); + Perm16 pp = p; + do { + x0 = x1; + x1 = _mm_min_epi8(x0, x0.permuted(pp)); + pp = pp * pp; + } while (x0 != x1); + x0.v = (Perm16::one().v == x1.v); + return _mm_popcnt_u32(_mm_movemask_epi8(x0)); } uint8_t nb_cycles2(Perm16 p) { - Vect16 x0, x1 = Perm16::one(); - Perm16 pp = p; - do { - x0 = _mm_min_epi8(x1, x1.permuted(pp)); - pp = pp * pp; - x1 = _mm_min_epi8(x0, x0.permuted(pp)); - pp = pp * pp; - } while (x0 != x1); - x0.v = (Perm16::one().v == x1.v); - return _mm_popcnt_u32(_mm_movemask_epi8(x0)); + Vect16 x0, x1 = Perm16::one(); + Perm16 pp = p; + do { + x0 = _mm_min_epi8(x1, x1.permuted(pp)); + pp = pp * pp; + x1 = _mm_min_epi8(x0, x0.permuted(pp)); + pp = pp * pp; + } while (x0 != x1); + x0.v = (Perm16::one().v == x1.v); + return _mm_popcnt_u32(_mm_movemask_epi8(x0)); } /** This is by far the fastest implementation ! * 42 the default implem up there **/ inline Vect16 cycles_mask_unroll(Perm16 p) { - Vect16 x0, x1 = Perm16::one(); - x0 = _mm_min_epi8(x1, x1.permuted(p)); - p = p * p; - x1 = _mm_min_epi8(x0, x0.permuted(p)); - p = p * p; - x0 = _mm_min_epi8(x1, x1.permuted(p)); - p = p * p; - x1 = _mm_min_epi8(x0, x0.permuted(p)); - return x1; + Vect16 x0, x1 = Perm16::one(); + x0 = _mm_min_epi8(x1, x1.permuted(p)); + p = p * p; + x1 = _mm_min_epi8(x0, x0.permuted(p)); + p = p * p; + x0 = _mm_min_epi8(x1, x1.permuted(p)); + p = p * p; + x1 = _mm_min_epi8(x0, x0.permuted(p)); + return x1; } inline uint8_t nb_cycles_unroll(Perm16 p) { - Perm16 res; - res.v = (Perm16::one().v == cycles_mask_unroll(p).v); - return _mm_popcnt_u32(_mm_movemask_epi8(res)); + Perm16 res; + res.v = (Perm16::one().v == cycles_mask_unroll(p).v); + return _mm_popcnt_u32(_mm_movemask_epi8(res)); } Vect16 cycle_type_ref(Perm16 p) { - Vect16 v{}, res{}; - int i, j, c = 0; - for (i = 0; i < 16; i++) { - if (v[i] == 0) { - uint8_t lc = 0; - for (j = i; v[j] == 0; j = p[j]) { - v[j] = 1; - lc++; - } - res[c] = lc; - c++; + Vect16 v{}, res{}; + int i, j, c = 0; + for (i = 0; i < 16; i++) { + if (v[i] == 0) { + uint8_t lc = 0; + for (j = i; v[j] == 0; j = p[j]) { + v[j] = 1; + lc++; + } + res[c] = lc; + c++; + } } - } - sort(p.begin(), p.begin() + c); - return res; + sort(p.begin(), p.begin() + c); + return res; } Vect16 evaluation(Vect16 v) { - Vect16 res; - res.v = -(Perm16::one().v == v.v); - for (int i = 0; i < 15; i++) { - v = v.permuted(Perm16::left_cycle()); - res.v -= (Perm16::one().v == v.v); - } - return res; + Vect16 res; + res.v = -(Perm16::one().v == v.v); + for (int i = 0; i < 15; i++) { + v = v.permuted(Perm16::left_cycle()); + res.v -= (Perm16::one().v == v.v); + } + return res; } Vect16 cycle_type(Perm16 p) { - return (evaluation(cycles_mask_unroll(p))).revsorted(); + return (evaluation(cycles_mask_unroll(p))).revsorted(); } inline Vect16 cycle_type_unroll(Perm16 p) { - Perm16 pp = p; - Vect16 one16 = Perm16::one().v * cst_epu8_0x0F; - Vect16 res = one16; - - res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x01).permuted(pp)); - pp = pp * pp; - res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x02).permuted(pp)); - pp = pp * pp; - res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x04).permuted(pp)); - pp = pp * pp; - res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x08).permuted(pp)); - res = res.permuted(p); - - res = (res.v - one16.v + cst_epu8_0x01) & ((res.v & cst_epu8_0xF0) == one16.v); - return res.revsorted(); + Perm16 pp = p; + Vect16 one16 = Perm16::one().v * cst_epu8_0x0F; + Vect16 res = one16; + + res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x01).permuted(pp)); + pp = pp * pp; + res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x02).permuted(pp)); + pp = pp * pp; + res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x04).permuted(pp)); + pp = pp * pp; + res = _mm_min_epu8(res, Vect16(res.v + cst_epu8_0x08).permuted(pp)); + res = res.permuted(p); + + res = (res.v - one16.v + cst_epu8_0x01) & + ((res.v & cst_epu8_0xF0) == one16.v); + return res.revsorted(); } inline uint8_t nb_cycles_type_ref(Perm16 p) { - return _mm_popcnt_u32(_mm_movemask_epi8(cycle_type_ref(p).v != cst_epu8_0x00)); + return _mm_popcnt_u32( + _mm_movemask_epi8(cycle_type_ref(p).v != cst_epu8_0x00)); } inline uint8_t nb_cycles_type_mask(Perm16 p) { - return _mm_popcnt_u32(_mm_movemask_epi8(cycle_type(p).v != cst_epu8_0x00)); + return _mm_popcnt_u32(_mm_movemask_epi8(cycle_type(p).v != cst_epu8_0x00)); } inline uint8_t nb_cycles_type_unroll(Perm16 p) { - return _mm_popcnt_u32(_mm_movemask_epi8(cycle_type_unroll(p).v != cst_epu8_0x00)); + return _mm_popcnt_u32( + _mm_movemask_epi8(cycle_type_unroll(p).v != cst_epu8_0x00)); } auto func = {nb_cycles_ref, nb_cycles, nb_cycles2, nb_cycles_unroll, @@ -153,107 +156,107 @@ auto func = {nb_cycles_ref, nb_cycles, nb_cycles2, nb_cycles_unroll, using Statistic = array; std::ostream &operator<<(std::ostream &stream, Statistic const &term) { - stream << "[" << unsigned(term[0]); - for (unsigned i = 1; i < 17; i++) - stream << "," << unsigned(term[i]); - stream << "]"; - return stream; + stream << "[" << unsigned(term[0]); + for (unsigned i = 1; i < 17; i++) + stream << "," << unsigned(term[i]); + stream << "]"; + return stream; } template double timef(const vector &v, double reftime) { - high_resolution_clock::time_point tstart, tfin; - Statistic stat = {}; - uint_fast64_t sz = v.size(); - - tstart = high_resolution_clock::now(); - for (uint_fast64_t i = 0; i < sz; i++) - stat[ncycles(v[i])]++; - tfin = high_resolution_clock::now(); - - auto tm = duration_cast>(tfin - tstart); - cout << stat << endl; - cout << "time = " << tm.count() << "s"; - if (reftime != 0) - cout << ", speedup = " << reftime / tm.count(); - cout << endl; - return tm.count(); + high_resolution_clock::time_point tstart, tfin; + Statistic stat = {}; + uint_fast64_t sz = v.size(); + + tstart = high_resolution_clock::now(); + for (uint_fast64_t i = 0; i < sz; i++) + stat[ncycles(v[i])]++; + tfin = high_resolution_clock::now(); + + auto tm = duration_cast>(tfin - tstart); + cout << stat << endl; + cout << "time = " << tm.count() << "s"; + if (reftime != 0) + cout << ", speedup = " << reftime / tm.count(); + cout << endl; + return tm.count(); } void timeit(vector v) { - double sp_ref; - - cout << "Reference: " << endl; - sp_ref = timef(v, 0.); - cout << "Loop 1 : " << endl; - timef(v, sp_ref); - cout << "Loop 2 : " << endl; - timef(v, sp_ref); - cout << "Unroll : " << endl; - timef(v, sp_ref); - cout << endl; - cout << "RefType : " << endl; - sp_ref = timef(v, 0.); - cout << "MaskType : " << endl; - timef(v, sp_ref); - cout << "UnrollTyp: " << endl; - timef(v, sp_ref); + double sp_ref; + + cout << "Reference: " << endl; + sp_ref = timef(v, 0.); + cout << "Loop 1 : " << endl; + timef(v, sp_ref); + cout << "Loop 2 : " << endl; + timef(v, sp_ref); + cout << "Unroll : " << endl; + timef(v, sp_ref); + cout << endl; + cout << "RefType : " << endl; + sp_ref = timef(v, 0.); + cout << "MaskType : " << endl; + timef(v, sp_ref); + cout << "UnrollTyp: " << endl; + timef(v, sp_ref); } void democycle(Perm16 p) { - Vect16 x0, x1 = Perm16::one(); - cout << "one " << x1 << endl; - cout << "sig " << p << endl; - cout << "perm" << x1.permuted(p) << endl; - x0 = _mm_min_epi8(x1, x1.permuted(p)); - cout << "min " << x0 << endl; - p = p * p; - cout << "p^2 " << p << endl; - cout << "pe^2" << x0.permuted(p) << endl; - x1 = _mm_min_epi8(x0, x0.permuted(p)); - cout << "min " << x1 << endl; - p = p * p; - cout << "p^4 " << p << endl; - cout << "pe^4" << x1.permuted(p) << endl; - x0 = _mm_min_epi8(x1, x1.permuted(p)); - cout << "min " << x0 << endl; - p = p * p; - cout << "p^8 " << p << endl; - cout << "pe^8" << x0.permuted(p) << endl; - x1 = _mm_min_epi8(x0, x0.permuted(p)); - cout << "min " << x1 << endl; + Vect16 x0, x1 = Perm16::one(); + cout << "one " << x1 << endl; + cout << "sig " << p << endl; + cout << "perm" << x1.permuted(p) << endl; + x0 = _mm_min_epi8(x1, x1.permuted(p)); + cout << "min " << x0 << endl; + p = p * p; + cout << "p^2 " << p << endl; + cout << "pe^2" << x0.permuted(p) << endl; + x1 = _mm_min_epi8(x0, x0.permuted(p)); + cout << "min " << x1 << endl; + p = p * p; + cout << "p^4 " << p << endl; + cout << "pe^4" << x1.permuted(p) << endl; + x0 = _mm_min_epi8(x1, x1.permuted(p)); + cout << "min " << x0 << endl; + p = p * p; + cout << "p^8 " << p << endl; + cout << "pe^8" << x0.permuted(p) << endl; + x1 = _mm_min_epi8(x0, x0.permuted(p)); + cout << "min " << x1 << endl; } // (-1)**(len(self)-len(self.to_cycles())) inline uint8_t sign_nb_cycles_unroll(Perm16 p, uint8_t n = 16) { - return (n - nb_cycles_unroll(p)) & 1; + return (n - nb_cycles_unroll(p)) & 1; } int main() { - std::srand(std::time(0)); + std::srand(std::time(0)); - Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; - // p = { 13, 6, 11, 14, 5, 2, 12, 4, 9, 1, 7, 0, 8, 10, 3, 15}; + Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; + // p = { 13, 6, 11, 14, 5, 2, 12, 4, 9, 1, 7, 0, 8, 10, 3, 15}; - p = Perm16::random(); - cout << Perm16::one() << endl - << p << endl - << cycles_mask_unroll(p) << endl - << evaluation(cycles_mask_unroll(p)) - << " #= " << unsigned(nb_cycles_unroll(p)) << endl - << cycle_type(p) << endl - << cycle_type_unroll(p) << endl; + p = Perm16::random(); + cout << Perm16::one() << endl + << p << endl + << cycles_mask_unroll(p) << endl + << evaluation(cycles_mask_unroll(p)) + << " #= " << unsigned(nb_cycles_unroll(p)) << endl + << cycle_type(p) << endl + << cycle_type_unroll(p) << endl; - cout << "Sign = " << int(sign_nb_cycles_unroll(p)) << endl; + cout << "Sign = " << int(sign_nb_cycles_unroll(p)) << endl; - for (auto f : func) - cout << f(p) << " "; - cout << endl; + for (auto f : func) + cout << f(p) << " "; + cout << endl; - timeit(rand_perms(10000000)); - cout << endl; + timeit(rand_perms(10000000)); + cout << endl; - timeit(all_perms(11)); + timeit(all_perms(11)); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/benchmark/inverse.cpp b/benchmark/inverse.cpp index 340abbca..2ca6547d 100644 --- a/benchmark/inverse.cpp +++ b/benchmark/inverse.cpp @@ -30,63 +30,61 @@ using namespace std; using namespace std::chrono; using namespace HPCombi; - template -double timecheck(Func fun, Sample &sample, - SampleRef &ref, double reftime = 0) { - std::vector cur(sample.size()); - double time = timethat( - [&sample, &cur, fun]() { - std::transform(sample.begin(), sample.end(), cur.begin(), - [fun](Perm16 p) -> Perm16 { - for (int i = 0; i < 100; i++) - p = fun(p); - return p; - }); - }, - 100, reftime); - if (ref.size() == 0) - ref = std::move(cur); - else - assert(cur == ref); - return time; +double timecheck(Func fun, Sample &sample, SampleRef &ref, double reftime = 0) { + std::vector cur(sample.size()); + double time = timethat( + [&sample, &cur, fun]() { + std::transform(sample.begin(), sample.end(), cur.begin(), + [fun](Perm16 p) -> Perm16 { + for (int i = 0; i < 100; i++) + p = fun(p); + return p; + }); + }, + 100, reftime); + if (ref.size() == 0) + ref = std::move(cur); + else + assert(cur == ref); + return time; } int main() { - std::srand(std::time(0)); + std::srand(std::time(0)); - // Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; + // Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; - Perm16 p = Perm16::random(); + Perm16 p = Perm16::random(); - cout << p << endl << p.inverse_ref() << endl; - cout << p.inverse_pow() << endl; + cout << p << endl << p.inverse_ref() << endl; + cout << p.inverse_pow() << endl; - assert(p.inverse_ref() == p.inverse_sort()); - assert(p.inverse_ref() == p.inverse_find()); - assert(p.inverse_ref() == p.inverse_cycl()); - assert(p.inverse_ref() == p.inverse_pow()); - assert(p * p.inverse_find() == Perm16::one()); - assert(p.inverse_find() * p == Perm16::one()); - assert(p * p.inverse_cycl() == Perm16::one()); - assert(p.inverse_cycl() * p == Perm16::one()); + assert(p.inverse_ref() == p.inverse_sort()); + assert(p.inverse_ref() == p.inverse_find()); + assert(p.inverse_ref() == p.inverse_cycl()); + assert(p.inverse_ref() == p.inverse_pow()); + assert(p * p.inverse_find() == Perm16::one()); + assert(p.inverse_find() * p == Perm16::one()); + assert(p * p.inverse_cycl() == Perm16::one()); + assert(p.inverse_cycl() * p == Perm16::one()); - uint_fast64_t sz = 10000; - auto sample = rand_perms(sz); - std::vector refres; - cout << "Ref : "; - double tmref = - timecheck([](Perm16 p) { return p.inverse_ref(); }, sample, refres); - cout << "Arr : "; - timecheck([](Perm16 p) { return p.inverse_arr(); }, sample, refres, tmref); - cout << "Sort : "; - timecheck([](Perm16 p) { return p.inverse_sort(); }, sample, refres, tmref); - cout << "Find : "; - timecheck([](Perm16 p) { return p.inverse_find(); }, sample, refres, tmref); - cout << "Pow : "; - timecheck([](Perm16 p) { return p.inverse_pow(); }, sample, refres, tmref); - cout << "Cycl : "; - timecheck([](Perm16 p) { return p.inverse_cycl(); }, sample, refres, tmref); + uint_fast64_t sz = 10000; + auto sample = rand_perms(sz); + std::vector refres; + cout << "Ref : "; + double tmref = + timecheck([](Perm16 p) { return p.inverse_ref(); }, sample, refres); + cout << "Arr : "; + timecheck([](Perm16 p) { return p.inverse_arr(); }, sample, refres, tmref); + cout << "Sort : "; + timecheck([](Perm16 p) { return p.inverse_sort(); }, sample, refres, tmref); + cout << "Find : "; + timecheck([](Perm16 p) { return p.inverse_find(); }, sample, refres, tmref); + cout << "Pow : "; + timecheck([](Perm16 p) { return p.inverse_pow(); }, sample, refres, tmref); + cout << "Cycl : "; + timecheck([](Perm16 p) { return p.inverse_cycl(); }, sample, refres, tmref); - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/benchmark/length.cpp b/benchmark/length.cpp index a25aca91..2e0e5bc5 100644 --- a/benchmark/length.cpp +++ b/benchmark/length.cpp @@ -34,48 +34,48 @@ using Statistic = array; template double timef(const vector &v, double reftime, int nloop = 1) { - high_resolution_clock::time_point tstart, tfin; - Statistic stat = {}; - uint_fast64_t sz = v.size(); - tstart = high_resolution_clock::now(); - for (int loop = 0; loop < nloop; loop++) - for (uint_fast64_t i = 0; i < sz; i++) - stat[(v[i].*fun)()]++; - tfin = high_resolution_clock::now(); + high_resolution_clock::time_point tstart, tfin; + Statistic stat = {}; + uint_fast64_t sz = v.size(); + tstart = high_resolution_clock::now(); + for (int loop = 0; loop < nloop; loop++) + for (uint_fast64_t i = 0; i < sz; i++) + stat[(v[i].*fun)()]++; + tfin = high_resolution_clock::now(); - for (int i = 0; i <= 120; i++) - cout << stat[i] / nloop << " "; - cout << endl; - auto tm = duration_cast>(tfin - tstart); - cout << "time = " << tm.count() << "s"; - if (reftime != 0) - cout << ", speedup = " << reftime / tm.count(); - cout << endl; - return tm.count(); + for (int i = 0; i <= 120; i++) + cout << stat[i] / nloop << " "; + cout << endl; + auto tm = duration_cast>(tfin - tstart); + cout << "time = " << tm.count() << "s"; + if (reftime != 0) + cout << ", speedup = " << reftime / tm.count(); + cout << endl; + return tm.count(); } void timeit(const vector &v, int nloop = 1) { - double ref; + double ref; - cout << "Reference: "; - ref = timef<&Perm16::length_ref>(v, 0., nloop); - cout << "Fast : "; - ref = timef<&Perm16::length>(v, ref, nloop); + cout << "Reference: "; + ref = timef<&Perm16::length_ref>(v, 0., nloop); + cout << "Fast : "; + ref = timef<&Perm16::length>(v, ref, nloop); } int main() { - std::srand(std::time(0)); + std::srand(std::time(0)); - Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; + Perm16 p = {5, 4, 12, 15, 10, 8, 9, 2, 3, 13, 14, 0, 1, 7, 11, 6}; - assert(&p[0] == &(p.as_array()[0])); + assert(&p[0] == &(p.as_array()[0])); - cout << Perm16::one() << endl; - cout << p << endl << endl; - cout << int(p.length()) << endl; - cout << int(p.length_ref()) << endl; - auto vv = rand_perms(10000); - timeit(vv, 100); - cout << endl; - return EXIT_SUCCESS; + cout << Perm16::one() << endl; + cout << p << endl << endl; + cout << int(p.length()) << endl; + cout << int(p.length_ref()) << endl; + auto vv = rand_perms(10000); + timeit(vv, 100); + cout << endl; + return EXIT_SUCCESS; } diff --git a/benchmark/sort.cpp b/benchmark/sort.cpp index 78afa89b..56778f47 100644 --- a/benchmark/sort.cpp +++ b/benchmark/sort.cpp @@ -29,52 +29,55 @@ using namespace std; using namespace std::chrono; using namespace HPCombi; -#define ASSERT(test) if (!(test)) cout << "Test failed in file " << __FILE__ \ - << " line " << __LINE__ << ": " #test << endl +#define ASSERT(test) \ + if (!(test)) \ + cout << "Test failed in file " << __FILE__ << " line " << __LINE__ \ + << ": " #test << endl std::vector rand_sample(size_t sz) { std::vector res; - for (size_t i=0; i < sz; i++) + for (size_t i = 0; i < sz; i++) res.push_back(random_epu8(256)); return res; } inline epu8 rand_perm() { - epu8 res = epu8id; - auto &ar = as_array(res); - std::random_shuffle(ar.begin(), ar.end()); - return res; + epu8 res = epu8id; + auto &ar = as_array(res); + std::random_shuffle(ar.begin(), ar.end()); + return res; } std::vector rand_perms(int sz) { - std::vector res(sz); - std::srand(std::time(0)); - for (int i = 0; i < sz; i++) - res[i] = rand_perm(); - return res; + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = rand_perm(); + return res; } template double timethat(Func fun, int rep = 1, double reftime = 0) { - using std::chrono::duration; - using std::chrono::duration_cast; - using std::chrono::high_resolution_clock; - auto tstart = high_resolution_clock::now(); - for (int i = 0; i < rep; i++) - fun(); - auto tfin = high_resolution_clock::now(); - - auto tm = duration_cast>(tfin - tstart); - std::cout << "time = " << std::fixed << std::setprecision(6) << tm.count() - << "s"; - if (reftime != 0) - std::cout << ", speedup = " << std::setprecision(3) << reftime / tm.count(); - std::cout << std::endl; - return tm.count(); + using std::chrono::duration; + using std::chrono::duration_cast; + using std::chrono::high_resolution_clock; + auto tstart = high_resolution_clock::now(); + for (int i = 0; i < rep; i++) + fun(); + auto tfin = high_resolution_clock::now(); + + auto tm = duration_cast>(tfin - tstart); + std::cout << "time = " << std::fixed << std::setprecision(6) << tm.count() + << "s"; + if (reftime != 0) + std::cout << ", speedup = " << std::setprecision(3) + << reftime / tm.count(); + std::cout << std::endl; + return tm.count(); } struct RoundsMask { - // commented out due to a bug in gcc + // commented out due to a bug in gcc /* constexpr */ RoundsMask() : arr() { for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) arr[i] = HPCombi::sorting_rounds[i] < epu8id; @@ -96,12 +99,12 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; - static const epu8 even = - {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; - static const epu8 odd = - {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; - static const epu8 mask = - {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF}; + static const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, + 9, 8, 11, 10, 13, 12, 15, 14}; + static const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, + 7, 10, 9, 12, 11, 14, 13, 15}; + static const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, + 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { b = permuted(a, even); @@ -126,7 +129,7 @@ inline epu8 insertion_sort(epu8 p) { inline epu8 radix_sort(epu8 p) { auto &a = HPCombi::as_array(p); - std::array stat {}; + std::array stat{}; for (int i = 0; i < 16; i++) stat[a[i]]++; int c = 0; diff --git a/benchmark/sum.cpp b/benchmark/sum.cpp index 5904c21c..bdf8e7f3 100644 --- a/benchmark/sum.cpp +++ b/benchmark/sum.cpp @@ -21,33 +21,35 @@ using namespace std; using namespace HPCombi; -#define ASSERT(test) if (!(test)) cout << "Test failed in file " << __FILE__ \ - << " line " << __LINE__ << ": " #test << endl +#define ASSERT(test) \ + if (!(test)) \ + cout << "Test failed in file " << __FILE__ << " line " << __LINE__ \ + << ": " #test << endl int main() { - auto vrand = rand_perms(1000); - auto rep = 10000; + auto vrand = rand_perms(1000); + auto rep = 10000; - cout << "Loop : "; - double reftime = timethat( - [vrand]() { - for (Perm16 v : vrand) - ASSERT(v.sum_ref() == 120); - }, - rep); - cout << "4 rnds : "; - timethat( - [vrand]() { - for (Perm16 v : vrand) - ASSERT(v.sum4() == 120); - }, - rep, reftime); - cout << "3 rnds : "; - timethat( - [vrand]() { - for (Perm16 v : vrand) - ASSERT(v.sum3() == 120); - }, - rep, reftime); - return EXIT_SUCCESS; + cout << "Loop : "; + double reftime = timethat( + [vrand]() { + for (Perm16 v : vrand) + ASSERT(v.sum_ref() == 120); + }, + rep); + cout << "4 rnds : "; + timethat( + [vrand]() { + for (Perm16 v : vrand) + ASSERT(v.sum4() == 120); + }, + rep, reftime); + cout << "3 rnds : "; + timethat( + [vrand]() { + for (Perm16 v : vrand) + ASSERT(v.sum3() == 120); + }, + rep, reftime); + return EXIT_SUCCESS; } diff --git a/examples/CF.cpp b/examples/CF.cpp index c5390290..ab964c84 100644 --- a/examples/CF.cpp +++ b/examples/CF.cpp @@ -25,55 +25,55 @@ using namespace HPCombi; class EqEpu8 { - const epu8 elem; - const uint64_t size; - class EqEpu8Iterator; + const epu8 elem; + const uint64_t size; + class EqEpu8Iterator; -public: - - EqEpu8(epu8 x, uint64_t sz = 16) : elem(x), size(sz) {}; - - EqEpu8Iterator begin() { return { *this, 0, 1}; } - EqEpu8Iterator end() { return { *this, 0, size}; } - -private: + public: + EqEpu8(epu8 x, uint64_t sz = 16) : elem(x), size(sz){}; - class EqEpu8Iterator { + EqEpu8Iterator begin() { return {*this, 0, 1}; } + EqEpu8Iterator end() { return {*this, 0, size}; } - const EqEpu8 &eqv; - uint64_t i, j; + private: + class EqEpu8Iterator { - public: + const EqEpu8 &eqv; + uint64_t i, j; - EqEpu8Iterator(const EqEpu8 &_eqv, uint64_t _i, uint64_t _j) : - eqv(_eqv), i(_i), j(_j) { find_eq(); } + public: + EqEpu8Iterator(const EqEpu8 &_eqv, uint64_t _i, uint64_t _j) + : eqv(_eqv), i(_i), j(_j) { + find_eq(); + } - void find_eq() { - for (; j < eqv.size; j++) { - for (; i < j; i++) - if (eqv.elem[i] == eqv.elem[j]) - return; - i = 0; - } - } - EqEpu8Iterator &operator++() { - assert( j < eqv.size ); - ++i; find_eq(); - return *this; - } - std::pair operator*() const { return {i, j}; } - bool operator!=(const EqEpu8Iterator &it) const { - return i != it.i or j != it.j; - } - }; + void find_eq() { + for (; j < eqv.size; j++) { + for (; i < j; i++) + if (eqv.elem[i] == eqv.elem[j]) + return; + i = 0; + } + } + EqEpu8Iterator &operator++() { + assert(j < eqv.size); + ++i; + find_eq(); + return *this; + } + std::pair operator*() const { return {i, j}; } + bool operator!=(const EqEpu8Iterator &it) const { + return i != it.i or j != it.j; + } + }; }; int main() { - epu8 a{0,2,3,4,0,6,2,2,0}; + epu8 a{0, 2, 3, 4, 0, 6, 2, 2, 0}; - for (auto p : EqEpu8(a, 9)) { - cout << "(" << p.first << ", " << p.second << ")" << endl; - } - cout << endl; - exit(0); + for (auto p : EqEpu8(a, 9)) { + cout << "(" << p.first << ", " << p.second << ")" << endl; + } + cout << endl; + exit(0); } diff --git a/examples/RD.cpp b/examples/RD.cpp index 0a417909..78ed1486 100644 --- a/examples/RD.cpp +++ b/examples/RD.cpp @@ -27,93 +27,92 @@ #endif #include -using HPCombi::PTransf16; using HPCombi::epu8; +using HPCombi::PTransf16; -const PTransf16 id {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}; +const PTransf16 id{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -const PTransf16 s0 {0, 1, 2, 3, 4, 5, 6, 8, 7, 9,10,11,12,13,14,15}; +const PTransf16 s0{0, 1, 2, 3, 4, 5, 6, 8, 7, 9, 10, 11, 12, 13, 14, 15}; -const PTransf16 s1e{0, 1, 2, 3, 4, 5, 7, 6, 9, 8,10,11,12,13,14,15}; -const PTransf16 s1f{0, 1, 2, 3, 4, 5, 8, 9, 6, 7,10,11,12,13,14,15}; +const PTransf16 s1e{0, 1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13, 14, 15}; +const PTransf16 s1f{0, 1, 2, 3, 4, 5, 8, 9, 6, 7, 10, 11, 12, 13, 14, 15}; -const PTransf16 s2 {0, 1, 2, 3, 4, 6, 5, 7, 8,10, 9,11,12,13,14,15}; -const PTransf16 s3 {0, 1, 2, 3, 5, 4, 6, 7, 8, 9,11,10,12,13,14,15}; -const PTransf16 s4 {0, 1, 2, 4, 3, 5, 6, 7, 8, 9,10,12,11,13,14,15}; -const PTransf16 s5 {0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,13,12,14,15}; -const PTransf16 s6 {0, 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,14,13,15}; -const PTransf16 s7 {1, 0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,15,14}; +const PTransf16 s2{0, 1, 2, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 13, 14, 15}; +const PTransf16 s3{0, 1, 2, 3, 5, 4, 6, 7, 8, 9, 11, 10, 12, 13, 14, 15}; +const PTransf16 s4{0, 1, 2, 4, 3, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15}; +const PTransf16 s5{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 12, 14, 15}; +const PTransf16 s6{0, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 13, 15}; +const PTransf16 s7{1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 14}; const uint8_t FF = 0xff; const uint8_t FE = 0xfe; -const PTransf16 gene {FF,FF,FF,FF,FF,FF,FF,FF, 8, 9,10,11,12,13,14,15}; -const PTransf16 genf {FF,FF,FF,FF,FF,FF,FF, 7,FF, 9,10,11,12,13,14,15}; +const PTransf16 gene{FF, FF, FF, FF, FF, FF, FF, FF, + 8, 9, 10, 11, 12, 13, 14, 15}; +const PTransf16 genf{FF, FF, FF, FF, FF, FF, FF, 7, + FF, 9, 10, 11, 12, 13, 14, 15}; -inline PTransf16 act1(PTransf16 x, PTransf16 y) { - return x * y; -} +inline PTransf16 act1(PTransf16 x, PTransf16 y) { return x * y; } inline PTransf16 act0(PTransf16 x, PTransf16 y) { - PTransf16 minab, maxab, mask, b = x * y; - mask = _mm_cmplt_epi8(y, PTransf16::one()); - minab = _mm_min_epi8(x, b); - maxab = _mm_max_epi8(x, b); - return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | - (y.v == HPCombi::Epu8(0xFF)); + PTransf16 minab, maxab, mask, b = x * y; + mask = _mm_cmplt_epi8(y, PTransf16::one()); + minab = _mm_min_epi8(x, b); + maxab = _mm_max_epi8(x, b); + return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | + (y.v == HPCombi::Epu8(0xFF)); } int main() { - using namespace std; - // vector gens {gene, genf, s1e, s1f, s2, s3, s4, s5}; - // vector gens {gene, genf, s1e, s1f}; - vector gens{gene, genf, s1e, s1f, s2, s3, s4, s5, s6}; - // vector gens {gene, s1e, s2, s3, s4, s5, s6}; - // const PTransf16 toFind = - // {FF,FF,FF,FF,FF,FF,FF,FF, FF, FF, FF, FF, FF, 13, 14, 15}; - // cout << act0(s2,genf) << endl; - int lg = 0; - + using namespace std; + // vector gens {gene, genf, s1e, s1f, s2, s3, s4, s5}; + // vector gens {gene, genf, s1e, s1f}; + vector gens{gene, genf, s1e, s1f, s2, s3, s4, s5, s6}; + // vector gens {gene, s1e, s2, s3, s4, s5, s6}; + // const PTransf16 toFind = + // {FF,FF,FF,FF,FF,FF,FF,FF, FF, FF, FF, FF, FF, 13, 14, 15}; + // cout << act0(s2,genf) << endl; + int lg = 0; #ifdef HPCOMBI_HAVE_DENSEHASHSET - // using google::sparse_hash_set; - // sparse_hash_set, equal_to> res; - - using google::dense_hash_set; - dense_hash_set, equal_to> res; - res.set_empty_key( - {FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); - res.resize(250000000); + // using google::sparse_hash_set; + // sparse_hash_set, equal_to> res; + + using google::dense_hash_set; + dense_hash_set, equal_to> res; + res.set_empty_key( + {FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); + res.resize(250000000); #else - unordered_set res; - res.reserve(250000000); + unordered_set res; + res.reserve(250000000); #endif - res.insert(id); - - vector todo, newtodo; - todo.push_back(id); - while (todo.size()) { - newtodo.clear(); - lg++; - for (auto v : todo) { - for (auto g : gens) { - auto el = act0(v, g); - if (res.insert(el).second) - newtodo.push_back(el); - // if (el == toFind) cout << v << endl; - } + res.insert(id); + + vector todo, newtodo; + todo.push_back(id); + while (todo.size()) { + newtodo.clear(); + lg++; + for (auto v : todo) { + for (auto g : gens) { + auto el = act0(v, g); + if (res.insert(el).second) + newtodo.push_back(el); + // if (el == toFind) cout << v << endl; + } + } + std::swap(todo, newtodo); + cout << lg << ", todo = " << todo.size() << ", res = " << res.size() + << ", #Bucks = " << res.bucket_count() << endl; + // cout << "Trouve " << (res.find(toFind) != res.end()) << endl; + // if (res.find(toFind) != res.end()) break; } - std::swap(todo, newtodo); - cout << lg << ", todo = " << todo.size() << ", res = " << res.size() - << ", #Bucks = " << res.bucket_count() << endl; - // cout << "Trouve " << (res.find(toFind) != res.end()) << endl; - // if (res.find(toFind) != res.end()) break; - } - cout << "res = " << res.size() << endl; - assert (res.size() == 248318309); -// 1 2 3 4 5 6 7 8 -// 4 37 541 10625 258661 7464625 248318309 9339986689 - - exit(0); + cout << "res = " << res.size() << endl; + assert(res.size() == 248318309); + // 1 2 3 4 5 6 7 8 + // 4 37 541 10625 258661 7464625 248318309 9339986689 + + exit(0); } diff --git a/examples/Renner.cpp b/examples/Renner.cpp index 0557e395..bb6007b6 100644 --- a/examples/Renner.cpp +++ b/examples/Renner.cpp @@ -31,48 +31,50 @@ template std::ostream &operator<<(std::ostream &out, const std::vector &v) { - out << '['; - if (!v.empty()) { - auto i = v.begin(); - for (; i != --v.end(); ++i) - out << std::setw(2) << *i << ","; - out << std::setw(2) << *i; - } - out << "]"; - return out; + out << '['; + if (!v.empty()) { + auto i = v.begin(); + for (; i != --v.end(); ++i) + out << std::setw(2) << *i << ","; + out << std::setw(2) << *i; + } + out << "]"; + return out; } using namespace std; using namespace HPCombi; -const PTransf16 id {0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}; +const PTransf16 id{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; -const PTransf16 s0 {0, 1, 2, 3, 4, 5, 6, 8, 7, 9,10,11,12,13,14,15}; +const PTransf16 s0{0, 1, 2, 3, 4, 5, 6, 8, 7, 9, 10, 11, 12, 13, 14, 15}; -const PTransf16 s1e {0, 1, 2, 3, 4, 5, 7, 6, 9, 8,10,11,12,13,14,15}; -const PTransf16 s1f {0, 1, 2, 3, 4, 5, 8, 9, 6, 7,10,11,12,13,14,15}; +const PTransf16 s1e{0, 1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13, 14, 15}; +const PTransf16 s1f{0, 1, 2, 3, 4, 5, 8, 9, 6, 7, 10, 11, 12, 13, 14, 15}; -const PTransf16 s2 {0, 1, 2, 3, 4, 6, 5, 7, 8,10, 9,11,12,13,14,15}; -const PTransf16 s3 {0, 1, 2, 3, 5, 4, 6, 7, 8, 9,11,10,12,13,14,15}; -const PTransf16 s4 {0, 1, 2, 4, 3, 5, 6, 7, 8, 9,10,12,11,13,14,15}; -const PTransf16 s5 {0, 1, 3, 2, 4, 5, 6, 7, 8, 9,10,11,13,12,14,15}; -const PTransf16 s6 {0, 2, 1, 3, 4, 5, 6, 7, 8, 9,10,11,12,14,13,15}; -const PTransf16 s7 {1, 0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,15,14}; +const PTransf16 s2{0, 1, 2, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 13, 14, 15}; +const PTransf16 s3{0, 1, 2, 3, 5, 4, 6, 7, 8, 9, 11, 10, 12, 13, 14, 15}; +const PTransf16 s4{0, 1, 2, 4, 3, 5, 6, 7, 8, 9, 10, 12, 11, 13, 14, 15}; +const PTransf16 s5{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 12, 14, 15}; +const PTransf16 s6{0, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 13, 15}; +const PTransf16 s7{1, 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 14}; const uint8_t FF = 0xff; const uint8_t FE = 0xfe; -const PTransf16 gene {FF,FF,FF,FF,FF,FF,FF,FF, 8, 9,10,11,12,13,14,15}; -const PTransf16 genf {FF,FF,FF,FF,FF,FF,FF, 7,FF, 9,10,11,12,13,14,15}; +const PTransf16 gene{FF, FF, FF, FF, FF, FF, FF, FF, + 8, 9, 10, 11, 12, 13, 14, 15}; +const PTransf16 genf{FF, FF, FF, FF, FF, FF, FF, 7, + FF, 9, 10, 11, 12, 13, 14, 15}; // const vector gens {gene, genf, s1e, s1f}; const vector gens{gene, genf, s1e, s1f, s2, s3, s4, s5}; const int nprint = 6; - #ifdef HPCOMBI_HAVE_DENSEHASHMAP -google::dense_hash_map, - hash, equal_to> elems; +google::dense_hash_map, hash, + equal_to> + elems; #else unordered_map> elems; #endif @@ -81,81 +83,82 @@ inline PTransf16 act1(PTransf16 x, PTransf16 y) { return x * y; } inline PTransf16 mult1(PTransf16 x, PTransf16 y) { return x * y; } inline PTransf16 act0(PTransf16 x, PTransf16 y) { - PTransf16 minab, maxab, mask, b = x * y; - mask = _mm_cmplt_epi8(y, Perm16::one()); - minab = _mm_min_epi8(x, b); - maxab = _mm_max_epi8(x, b); - return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | - (y.v == Epu8(0xFF)); + PTransf16 minab, maxab, mask, b = x * y; + mask = _mm_cmplt_epi8(y, Perm16::one()); + minab = _mm_min_epi8(x, b); + maxab = _mm_max_epi8(x, b); + return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | + (y.v == Epu8(0xFF)); } std::vector reduced_word(PTransf16 x) { - std::vector res{}; - while (x != id) { - auto p = elems[x]; - res.push_back(p.second); - x = p.first; - } - std::reverse(res.begin(), res.end()); - return res; + std::vector res{}; + while (x != id) { + auto p = elems[x]; + res.push_back(p.second); + x = p.first; + } + std::reverse(res.begin(), res.end()); + return res; } inline PTransf16 mult0(PTransf16 x, PTransf16 y) { - for (auto i : reduced_word(y)) - x = act0(x, gens[i]); - return x; + for (auto i : reduced_word(y)) + x = act0(x, gens[i]); + return x; } std::vector sym_renner(PTransf16 v, int n) { - std::vector res; - for (int i = 8 - n; i < 8 + n; i++) { - if (v[i] == 0xff) - res.push_back(0); - else if (v[i] < 8) - res.push_back(v[i] - 8); - else - res.push_back(v[i] - 7); - } - return res; + std::vector res; + for (int i = 8 - n; i < 8 + n; i++) { + if (v[i] == 0xff) + res.push_back(0); + else if (v[i] < 8) + res.push_back(v[i] - 8); + else + res.push_back(v[i] - 7); + } + return res; } int main() { - int lg = 0; + int lg = 0; #ifdef HPCOMBI_HAVE_DENSEHASHMAP - elems.set_empty_key( - {FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); + elems.set_empty_key( + {FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); #endif - elems.insert({id, {{}, -1}}); - // elems.resize(250000000); - - int nidemp = 1; - cout << "Idemp : " << setw(3) << nidemp << " " << sym_renner(id, nprint) - << " " << reduced_word(id) << endl; - - vector todo, newtodo; - todo.push_back(id); - while (todo.size()) { - newtodo.clear(); - lg++; - for (auto v : todo) { - for (uint8_t i = 0; i < gens.size(); i++) { - PTransf16 el = act0(v, gens[i]); - if (elems.insert({el, {v, i}}).second) { - newtodo.push_back(el); - if (mult0(el, el) == el) { - nidemp++; - cout << "Idemp : " << setw(3) << nidemp << " " - << sym_renner(el, nprint) << " " << reduced_word(el) << endl; - } + elems.insert({id, {{}, -1}}); + // elems.resize(250000000); + + int nidemp = 1; + cout << "Idemp : " << setw(3) << nidemp << " " << sym_renner(id, nprint) + << " " << reduced_word(id) << endl; + + vector todo, newtodo; + todo.push_back(id); + while (todo.size()) { + newtodo.clear(); + lg++; + for (auto v : todo) { + for (uint8_t i = 0; i < gens.size(); i++) { + PTransf16 el = act0(v, gens[i]); + if (elems.insert({el, {v, i}}).second) { + newtodo.push_back(el); + if (mult0(el, el) == el) { + nidemp++; + cout << "Idemp : " << setw(3) << nidemp << " " + << sym_renner(el, nprint) << " " + << reduced_word(el) << endl; + } + } + } } - } + std::swap(todo, newtodo); + cout << lg << ", todo = " << todo.size() << ", elems = " << elems.size() + << ", #Bucks = " << elems.bucket_count() << endl; } - std::swap(todo, newtodo); - cout << lg << ", todo = " << todo.size() << ", elems = " << elems.size() - << ", #Bucks = " << elems.bucket_count() << endl; - } - cout << "elems = " << elems.size() << endl; - exit(0); + cout << "elems = " << elems.size() << endl; + exit(0); } diff --git a/examples/Trans.cpp b/examples/Trans.cpp index 776e93c3..f199001e 100644 --- a/examples/Trans.cpp +++ b/examples/Trans.cpp @@ -28,21 +28,20 @@ using HPCombi::Transf16; -// Full transformation semigroup on 7 points -const Transf16 s {1, 0, 2, 3, 4, 5, 6}; -const Transf16 cy {1, 2, 3, 4, 5, 6, 0}; -const Transf16 pi {0, 0, 2, 3, 4, 5, 6}; +// Full transformation semigroup on 7 points +const Transf16 s{1, 0, 2, 3, 4, 5, 6}; +const Transf16 cy{1, 2, 3, 4, 5, 6, 0}; +const Transf16 pi{0, 0, 2, 3, 4, 5, 6}; const std::vector gens{s, cy, pi}; - -/* Full transformation semigroup on 9 points +/* Full transformation semigroup on 9 points const Transf16 s {1, 0, 2, 3, 4, 5, 6, 7, 8}; const Transf16 cy {1, 2, 3, 4, 5, 6, 7, 8, 0}; const Transf16 pi {0, 0, 2, 3, 4, 5, 6, 7, 8}; const std::vector gens{s, cy, pi}; */ -/* James favourite +/* James favourite const Transf16 a1 {1, 7, 2, 6, 0, 4, 1, 5}; const Transf16 a2 {2, 4, 6, 1, 4, 5, 2, 7}; const Transf16 a3 {3, 0, 7, 2, 4, 6, 2, 4}; @@ -57,37 +56,39 @@ const vector gens{a1,a2,a3,a4,a5,a6,a7,a8}; const uint8_t FE = 0xfe; int main() { - int lg = 0; + int lg = 0; #ifdef HPCOMBI_HAVE_DENSEHASHSET - using google::dense_hash_set; - dense_hash_set, std::equal_to> res; - res.set_empty_key({FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); - // res.resize(500000000); + using google::dense_hash_set; + dense_hash_set, std::equal_to> res; + res.set_empty_key( + {FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE, FE}); + // res.resize(500000000); #else - using std::unordered_set; - unordered_set res; - // res.reserve(500000000); + using std::unordered_set; + unordered_set res; + // res.reserve(500000000); #endif - res.insert(Transf16::one()); + res.insert(Transf16::one()); - std::vector todo, newtodo; - todo.push_back(Transf16::one()); - while (todo.size()) { - newtodo.clear(); - lg++; - for (auto v : todo) { - for (auto g : gens) { - auto el = v * g; - if (res.insert(el).second) - newtodo.push_back(el); - } + std::vector todo, newtodo; + todo.push_back(Transf16::one()); + while (todo.size()) { + newtodo.clear(); + lg++; + for (auto v : todo) { + for (auto g : gens) { + auto el = v * g; + if (res.insert(el).second) + newtodo.push_back(el); + } + } + std::swap(todo, newtodo); + std::cout << lg << ", todo = " << todo.size() + << ", res = " << res.size() + << ", #Bucks = " << res.bucket_count() << std::endl; } - std::swap(todo, newtodo); - std::cout << lg << ", todo = " << todo.size() << ", res = " << res.size() - << ", #Bucks = " << res.bucket_count() << std::endl; - } - std::cout << "res = " << res.size() << std::endl; - exit(0); + std::cout << "res = " << res.size() << std::endl; + exit(0); } diff --git a/examples/image.cpp b/examples/image.cpp index 742acf1b..03e8895a 100644 --- a/examples/image.cpp +++ b/examples/image.cpp @@ -14,30 +14,30 @@ //****************************************************************************// #include "perm16.hpp" +#include "timer.h" #include #include #include #include // less<> #include #include -#include // pair #include #include +#include // pair #include #include -#include "timer.h" template std::ostream &operator<<(std::ostream &out, const std::vector &v) { - out << '['; - if (!v.empty()) { - auto i = v.begin(); - for (; i != --v.end(); ++i) - out << std::setw(2) << *i << ","; - out << std::setw(2) << *i; - } - out << "]"; - return out; + out << '['; + if (!v.empty()) { + auto i = v.begin(); + for (; i != --v.end(); ++i) + out << std::setw(2) << *i << ","; + out << std::setw(2) << *i; + } + out << "]"; + return out; } using namespace std; @@ -45,55 +45,57 @@ using namespace HPCombi; // James favourite -const Transf16 a1 {1, 7, 2, 6, 0, 4, 1, 5}; -const Transf16 a2 {2, 4, 6, 1, 4, 5, 2, 7}; -const Transf16 a3 {3, 0, 7, 2, 4, 6, 2, 4}; -const Transf16 a4 {3, 2, 3, 4, 5, 3, 0, 1}; -const Transf16 a5 {4, 3, 7, 7, 4, 5, 0, 4}; -const Transf16 a6 {5, 6, 3, 0, 3, 0, 5, 1}; -const Transf16 a7 {6, 0, 1, 1, 1, 6, 3, 4}; -const Transf16 a8 {7, 7, 4, 0, 6, 4, 1, 7}; -const array gens{{a1,a2,a3,a4,a5,a6,a7,a8}}; -//const vector gens{{a1, a2}}; +const Transf16 a1{1, 7, 2, 6, 0, 4, 1, 5}; +const Transf16 a2{2, 4, 6, 1, 4, 5, 2, 7}; +const Transf16 a3{3, 0, 7, 2, 4, 6, 2, 4}; +const Transf16 a4{3, 2, 3, 4, 5, 3, 0, 1}; +const Transf16 a5{4, 3, 7, 7, 4, 5, 0, 4}; +const Transf16 a6{5, 6, 3, 0, 3, 0, 5, 1}; +const Transf16 a7{6, 0, 1, 1, 1, 6, 3, 4}; +const Transf16 a8{7, 7, 4, 0, 6, 4, 1, 7}; +const array gens{{a1, a2, a3, a4, a5, a6, a7, a8}}; +// const vector gens{{a1, a2}}; // std::array, 65536> res {}; -std::array>, 65536> res; +std::array>, + 65536> + res; int main() { - int lg = 0; - int total = 0; + int lg = 0; + int total = 0; - vector todo, newtodo; - // res[Transf16::one().image_bitset()] = make_tuple(0xFFFF, 0xFFFF, {}); - get<0>(res[Transf16::one().image_bitset()]) = 0xFFFF; - get<1>(res[Transf16::one().image_bitset()]) = 0xFFFF; - cout << "start" << endl; + vector todo, newtodo; + // res[Transf16::one().image_bitset()] = make_tuple(0xFFFF, 0xFFFF, {}); + get<0>(res[Transf16::one().image_bitset()]) = 0xFFFF; + get<1>(res[Transf16::one().image_bitset()]) = 0xFFFF; + cout << "start" << endl; - libsemigroups::Timer t; - todo.push_back(Transf16::one()); - while (todo.size()) { - newtodo.clear(); - lg++; - for (auto v : todo) { - total++; - uint32_t vim = v.image_bitset(); - for (uint8_t i = 0; i < gens.size(); i++) { - Transf16 el = gens[i] * v; - uint32_t im = el.image_bitset(); - get<2>(res[vim])[i] = im; - if (get<0>(res[im]) == 0) { - // cout << el.sorted() << endl; - newtodo.push_back(el); - get<0>(res[im]) = gens[i].image_bitset(); - get<1>(res[im]) = i; + libsemigroups::Timer t; + todo.push_back(Transf16::one()); + while (todo.size()) { + newtodo.clear(); + lg++; + for (auto v : todo) { + total++; + uint32_t vim = v.image_bitset(); + for (uint8_t i = 0; i < gens.size(); i++) { + Transf16 el = gens[i] * v; + uint32_t im = el.image_bitset(); + get<2>(res[vim])[i] = im; + if (get<0>(res[im]) == 0) { + // cout << el.sorted() << endl; + newtodo.push_back(el); + get<0>(res[im]) = gens[i].image_bitset(); + get<1>(res[im]) = i; + } + } } - } + swap(todo, newtodo); + // cout << lg << ", todo = " << todo.size() << ", total = " << total << + // endl; } - swap(todo, newtodo); - // cout << lg << ", todo = " << todo.size() << ", total = " << total << endl; - } - cout << t << endl; - cout << "lg = " << lg << ", total = " << total << endl; - exit(0); + cout << t << endl; + cout << "lg = " << lg << ", total = " << total << endl; + exit(0); } diff --git a/examples/stringmonoid.cpp b/examples/stringmonoid.cpp index 5a80f417..f1333ddc 100644 --- a/examples/stringmonoid.cpp +++ b/examples/stringmonoid.cpp @@ -27,23 +27,22 @@ namespace power_helper { // Algebraic monoid for string with concatenation template <> struct Monoid { - // The one of the string monoid - static std::string one() { return {}; }; - - /* The product of two strings that is their concatenation - * @param a the first string to be concatenated - * @param b the second string to be concatenated - * @return the concatenation of \a a and \a b - */ - static std::string prod(std::string a, std::string b) { return a + b; } + // The one of the string monoid + static std::string one() { return {}; }; + + /* The product of two strings that is their concatenation + * @param a the first string to be concatenated + * @param b the second string to be concatenated + * @return the concatenation of \a a and \a b + */ + static std::string prod(std::string a, std::string b) { return a + b; } }; } // namespace power_helper } // namespace HPCombi - int main() { - assert(HPCombi::pow<0>(std::string("ab")) == ""); - assert(HPCombi::pow<4>(std::string("ab")) == "abababab"); - assert(HPCombi::pow<5>(std::string("abc")) == "abcabcabcabcabc"); + assert(HPCombi::pow<0>(std::string("ab")) == ""); + assert(HPCombi::pow<4>(std::string("ab")) == "abababab"); + assert(HPCombi::pow<5>(std::string("abc")) == "abcabcabcabcabc"); } diff --git a/examples/timer.h b/examples/timer.h index 2dd4991c..410fa891 100644 --- a/examples/timer.h +++ b/examples/timer.h @@ -27,75 +27,70 @@ namespace libsemigroups { - // This is a simple class which can be used to send timing information in a - // somewhat human readable format to the standard output. - class Timer { - public: +// This is a simple class which can be used to send timing information in a +// somewhat human readable format to the standard output. +class Timer { + public: // Default constructor, timer starts when object is created Timer() : _start(std::chrono::high_resolution_clock::now()) {} // Reset the timer (i.e. time from this point on) - void reset() { - _start = std::chrono::high_resolution_clock::now(); - } + void reset() { _start = std::chrono::high_resolution_clock::now(); } // The elapsed time in nanoseconds since last reset std::chrono::nanoseconds elapsed() const { - return std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - _start); + return std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - _start); } // String containing the somewhat human readable amount of time, this is // primarily intended for testing purposes std::string string(std::chrono::nanoseconds elapsed) const { - std::string out; - if (string_it(out, elapsed, "h ", 0)) { - string_it(out, elapsed, "m", 0); - return out; - } else if (string_it(out, elapsed, "m ", 0)) { - string_it(out, elapsed, "s", 0); - return out; - } else if (string_it(out, elapsed, "ms", 9)) { + std::string out; + if (string_it(out, elapsed, "h ", 0)) { + string_it(out, elapsed, "m", 0); + return out; + } else if (string_it(out, elapsed, "m ", 0)) { + string_it(out, elapsed, "s", 0); + return out; + } else if (string_it(out, elapsed, "ms", + 9)) { + return out; + } else if (string_it(out, elapsed, "\u03BCs", + 9)) { + return out; + } else if (string_it(out, elapsed, "ns", 0)) { + return out; + } return out; - } else if (string_it( - out, elapsed, "\u03BCs", 9)) { - return out; - } else if (string_it(out, elapsed, "ns", 0)) { - return out; - } - return out; } // String containing the somewhat human readable amount of time since the // last reset - std::string string() const { - return string(elapsed()); - } + std::string string() const { return string(elapsed()); } // Left shift the string containing the somewhat human readable amount of // time since last reset to an ostream - friend std::ostream& operator<<(std::ostream& os, Timer const& t) { - os << t.string(); - return os; + friend std::ostream &operator<<(std::ostream &os, Timer const &t) { + os << t.string(); + return os; } - private: + private: std::chrono::high_resolution_clock::time_point _start; template - bool string_it(std::string& str, - std::chrono::nanoseconds& elapsed, - std::string unit, - size_t threshold) const { - T x = std::chrono::duration_cast(elapsed); - if (x > T(threshold)) { - str += std::to_string(x.count()) + unit; - elapsed -= x; - return true; - } - return false; + bool string_it(std::string &str, std::chrono::nanoseconds &elapsed, + std::string unit, size_t threshold) const { + T x = std::chrono::duration_cast(elapsed); + if (x > T(threshold)) { + str += std::to_string(x.count()) + unit; + elapsed -= x; + return true; + } + return false; } - }; +}; } // namespace libsemigroups #endif // LIBSEMIGROUPS_SRC_TIMER_H_ diff --git a/experiments/Perm32/demovect.cpp b/experiments/Perm32/demovect.cpp index 1952a67b..81cf2a6a 100644 --- a/experiments/Perm32/demovect.cpp +++ b/experiments/Perm32/demovect.cpp @@ -40,11 +40,11 @@ using perm = uint8_t __attribute__((vector_size(16), __may_alias__)); * Définition de l'opérateur d'affichage << pour le type perm **/ ostream &operator<<(ostream &stream, perm const &p) { - stream << "[" << setw(2) << hex << unsigned(p[0]); - for (unsigned i = 1; i < 16; ++i) - stream << "," << setw(2) << hex << unsigned(p[i]) << dec; - stream << "]"; - return stream; + stream << "[" << setw(2) << hex << unsigned(p[0]); + for (unsigned i = 1; i < 16; ++i) + stream << "," << setw(2) << hex << unsigned(p[i]) << dec; + stream << "]"; + return stream; } /**********************************************************************/ @@ -58,23 +58,27 @@ const perm permid{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; const perm decal{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}; int main() { - const perm v1{2, 1, 7, 4, 9, 15, 12, 0, 5, 3, 6, 8, 11, 10, 14, 13}; - const perm v2{2, 1, 32, 4, 8, 1, 12, 0, 4, 4, 4, 4, 41, 10, 14, 13}; - perm v3; - v3 = v1 <= v2; + const perm v1{2, 1, 7, 4, 9, 15, 12, 0, 5, 3, 6, 8, 11, 10, 14, 13}; + const perm v2{2, 1, 32, 4, 8, 1, 12, 0, 4, 4, 4, 4, 41, 10, 14, 13}; + perm v3; + v3 = v1 <= v2; - cout << v1 << endl; - cout << v2 << endl; + cout << v1 << endl; + cout << v2 << endl; -#define FIND_IN_VECT_MASK (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_UNIT_MASK) +#define FIND_IN_VECT_MASK \ + (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_UNIT_MASK) #define FIND_IN_VECT (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY) - cout << _mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT_MASK) << endl; - cout << hex << _mm_movemask_epi8(_mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT_MASK)) << endl; - cout << _mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT) << endl; - cout << "=====" << endl; + cout << _mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT_MASK) << endl; + cout << hex + << _mm_movemask_epi8(_mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT_MASK)) + << endl; + cout << _mm_cmpestrm(v1, 16, v2, 16, FIND_IN_VECT) << endl; + cout << "=====" << endl; - cout << _mm_cmpistrm(v1, v2, FIND_IN_VECT_MASK) << endl; - cout << hex << _mm_movemask_epi8(_mm_cmpistrm(v1, v2, FIND_IN_VECT_MASK)) << endl; - cout << _mm_cmpistrm(v1, v2, FIND_IN_VECT) << endl; + cout << _mm_cmpistrm(v1, v2, FIND_IN_VECT_MASK) << endl; + cout << hex << _mm_movemask_epi8(_mm_cmpistrm(v1, v2, FIND_IN_VECT_MASK)) + << endl; + cout << _mm_cmpistrm(v1, v2, FIND_IN_VECT) << endl; } diff --git a/experiments/Perm32/demovect32_avx2.cpp b/experiments/Perm32/demovect32_avx2.cpp index 83ae7315..4c8dbb4f 100644 --- a/experiments/Perm32/demovect32_avx2.cpp +++ b/experiments/Perm32/demovect32_avx2.cpp @@ -41,11 +41,11 @@ using perm = uint8_t __attribute__((vector_size(SZ), __may_alias__)); * Définition de l'opérateur d'affichage << pour le type perm **/ ostream &operator<<(ostream &stream, perm const &p) { - stream << "[" << setw(2) << hex << unsigned(p[0]); - for (unsigned i = 1; i < SZ; ++i) - stream << "," << setw(2) << hex << unsigned(p[i]) << dec; - stream << "]"; - return stream; + stream << "[" << setw(2) << hex << unsigned(p[0]); + for (unsigned i = 1; i < SZ; ++i) + stream << "," << setw(2) << hex << unsigned(p[i]) << dec; + stream << "]"; + return stream; } /**********************************************************************/ @@ -59,21 +59,21 @@ const perm permid{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; const perm decal{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}; int main() { - const perm v1{2, 1, 7, 4, 9, 15, 12, 0, 5, 3, 6, 8, 11, 10, 14, 13}; - const perm v2{2, 1, 32, 4, 8, 1, 12, 0, 4, 4, 4, 4, 41, 10, 14, 13}; - perm v3; - v3 = v2 == v1; + const perm v1{2, 1, 7, 4, 9, 15, 12, 0, 5, 3, 6, 8, 11, 10, 14, 13}; + const perm v2{2, 1, 32, 4, 8, 1, 12, 0, 4, 4, 4, 4, 41, 10, 14, 13}; + perm v3; + v3 = v2 == v1; - cout << permid << endl; - cout << v1 << endl; - cout << v2 << endl; - cout << v3 << endl; + cout << permid << endl; + cout << v1 << endl; + cout << v2 << endl; + cout << v3 << endl; - cout << int(v1[0]) << " " << int(v1[1]) << endl; + cout << int(v1[0]) << " " << int(v1[1]) << endl; - long int b = _mm256_movemask_epi8(v3); - cout << "Application du masque : positions égales : " << hex << unsigned(b) - << dec << endl; - cout << "On compte les 1 avec une opération du processeur" << endl; - cout << _mm_popcnt_u32(b) << endl; + long int b = _mm256_movemask_epi8(v3); + cout << "Application du masque : positions égales : " << hex << unsigned(b) + << dec << endl; + cout << "On compte les 1 avec une opération du processeur" << endl; + cout << _mm_popcnt_u32(b) << endl; } diff --git a/experiments/Perm32/perm32.cpp b/experiments/Perm32/perm32.cpp index 08a3e663..e738dbe1 100644 --- a/experiments/Perm32/perm32.cpp +++ b/experiments/Perm32/perm32.cpp @@ -52,12 +52,12 @@ inline uint8_t get(perm32 p, uint64_t i) { return *(&p[0][0] + i); } * Définition de l'opérateur d'affichage << pour le type perm32 **/ std::ostream &operator<<(std::ostream &stream, perm32 const &p) { - using namespace std; - stream << "[" << setw(2) << hex << unsigned(get(p, 0)); - for (unsigned i = 1; i < 32; ++i) - stream << "," << setw(2) << unsigned(get(p, i)); - stream << dec << "]"; - return stream; + using namespace std; + stream << "[" << setw(2) << hex << unsigned(get(p, 0)); + for (unsigned i = 1; i < 32; ++i) + stream << "," << setw(2) << unsigned(get(p, i)); + stream << dec << "]"; + return stream; } /**********************************************************************/ @@ -74,9 +74,9 @@ const perm32 permid{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, /**********************************************************************/ perm32 random_perm32() { - perm32 res = permid; - std::random_shuffle(&set(res, 0), &set(res, 32)); - return res; + perm32 res = permid; + std::random_shuffle(&set(res, 0), &set(res, 32)); + return res; } /** Construit un vecteurs d'ar16 au hasard @@ -84,11 +84,11 @@ perm32 random_perm32() { * @return le vecteur correspondant **/ std::vector rand_perms(int sz) { - std::vector res(sz); - std::srand(std::time(0)); - for (int i = 0; i < sz; i++) - res[i] = random_perm32(); - return res; + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = random_perm32(); + return res; } /** Calcul et affiche le temps de calcul d'une fonction @@ -97,17 +97,17 @@ std::vector rand_perms(int sz) { * @return le temps d'exécution **/ template double timethat(Func fun, double reftime = 0) { - using namespace std::chrono; - auto tstart = high_resolution_clock::now(); - fun(); - auto tfin = high_resolution_clock::now(); - - auto tm = duration_cast>(tfin - tstart); - std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; - if (reftime != 0) - std::cout << ", speedup = " << reftime / tm.count(); - std::cout << std::endl; - return tm.count(); + using namespace std::chrono; + auto tstart = high_resolution_clock::now(); + fun(); + auto tfin = high_resolution_clock::now(); + + auto tm = duration_cast>(tfin - tstart); + std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; + if (reftime != 0) + std::cout << ", speedup = " << reftime / tm.count(); + std::cout << std::endl; + return tm.count(); } /**********************************************************************/ @@ -115,73 +115,77 @@ template double timethat(Func fun, double reftime = 0) { /**********************************************************************/ inline bool eqperm32(perm32 p1, perm32 p2) { - return (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[0], p2[0])) == 0xffff) & - (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[1], p2[1])) == 0xffff); + return (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[0], p2[0])) == 0xffff) & + (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[1], p2[1])) == 0xffff); } perm32 permute(perm32 v1, perm32 v2) { - return {_mm_blendv_epi8(_mm_shuffle_epi8(v1[1], v2[0]), - _mm_shuffle_epi8(v1[0], v2[0]), v2[0] < 16), - _mm_blendv_epi8(_mm_shuffle_epi8(v1[1], v2[1]), - _mm_shuffle_epi8(v1[0], v2[1]), v2[1] < 16)}; + return {_mm_blendv_epi8(_mm_shuffle_epi8(v1[1], v2[0]), + _mm_shuffle_epi8(v1[0], v2[0]), v2[0] < 16), + _mm_blendv_epi8(_mm_shuffle_epi8(v1[1], v2[1]), + _mm_shuffle_epi8(v1[0], v2[1]), v2[1] < 16)}; } perm32 permute_ref(perm32 v1, perm32 v2) { - perm32 res; - for (uint64_t i = 0; i < 32; i++) - set(res, i) = get(v1, get(v2, i)); - return res; + perm32 res; + for (uint64_t i = 0; i < 32; i++) + set(res, i) = get(v1, get(v2, i)); + return res; } int main() { - using namespace std; - srand(time(0)); - perm32 v1 = random_perm32(); - perm32 v2 = random_perm32(); - cout << permid << endl; - cout << v1 << endl; - cout << v2 << endl; - cout << permute(v1, v2) << endl; - cout << permute_ref(v1, v2) << endl; - - cout << from_xpu8(to_xpu8(v1) & to_xpu8(v2)) << endl; -// cout << from_xpu8(_mm256_and_pd((__m256d) to_xpu8(v1), (__m256d) to_xpu8(v2))) << endl; - cout << from_m256d(_mm256_and_pd((__m256d) to_m256d(v1), (__m256d) to_m256d(v2))) << endl; - - cout << "Sampling : "; - cout.flush(); - auto vrand = rand_perms(100000); - cout << "Done !" << endl; - std::vector check_ref(vrand.size()); - std::vector check(vrand.size()); - - cout << "Ref : "; - double sp_ref = timethat( - [&vrand, &check_ref]() { - std::transform(vrand.begin(), vrand.end(), check_ref.begin(), - [](perm32 p) { - for (int i = 0; i < 800; i++) - p = permute_ref(p, p); - return p; - }); - }, - 0.0); - - cout << "Fast : "; - timethat( - [&vrand, &check]() { - std::transform(vrand.begin(), vrand.end(), check.begin(), [](perm32 p) { - for (int i = 0; i < 800; i++) - p = permute(p, p); - return p; - }); - }, - sp_ref); - - cout << "Checking : "; - cout.flush(); - assert(std::mismatch(check_ref.begin(), check_ref.end(), check.begin(), - eqperm32) == - std::make_pair(check_ref.end(), check.end())); - cout << "Ok !" << endl; + using namespace std; + srand(time(0)); + perm32 v1 = random_perm32(); + perm32 v2 = random_perm32(); + cout << permid << endl; + cout << v1 << endl; + cout << v2 << endl; + cout << permute(v1, v2) << endl; + cout << permute_ref(v1, v2) << endl; + + cout << from_xpu8(to_xpu8(v1) & to_xpu8(v2)) << endl; + // cout << from_xpu8(_mm256_and_pd((__m256d) to_xpu8(v1), (__m256d) + // to_xpu8(v2))) << endl; + cout << from_m256d( + _mm256_and_pd((__m256d)to_m256d(v1), (__m256d)to_m256d(v2))) + << endl; + + cout << "Sampling : "; + cout.flush(); + auto vrand = rand_perms(100000); + cout << "Done !" << endl; + std::vector check_ref(vrand.size()); + std::vector check(vrand.size()); + + cout << "Ref : "; + double sp_ref = timethat( + [&vrand, &check_ref]() { + std::transform(vrand.begin(), vrand.end(), check_ref.begin(), + [](perm32 p) { + for (int i = 0; i < 800; i++) + p = permute_ref(p, p); + return p; + }); + }, + 0.0); + + cout << "Fast : "; + timethat( + [&vrand, &check]() { + std::transform(vrand.begin(), vrand.end(), check.begin(), + [](perm32 p) { + for (int i = 0; i < 800; i++) + p = permute(p, p); + return p; + }); + }, + sp_ref); + + cout << "Checking : "; + cout.flush(); + assert(std::mismatch(check_ref.begin(), check_ref.end(), check.begin(), + eqperm32) == + std::make_pair(check_ref.end(), check.end())); + cout << "Ok !" << endl; } diff --git a/experiments/Perm32/perm64.cpp b/experiments/Perm32/perm64.cpp index b8d217df..22163b8d 100644 --- a/experiments/Perm32/perm64.cpp +++ b/experiments/Perm32/perm64.cpp @@ -44,12 +44,12 @@ inline uint8_t get(perm64 p, uint64_t i) { return *(&p[0][0] + i); } * Définition de l'opérateur d'affichage << pour le type perm64 **/ std::ostream &operator<<(std::ostream &stream, perm64 const &p) { - using namespace std; - stream << "[" << setw(2) << hex << unsigned(get(p, 0)); - for (unsigned i = 1; i < 32; ++i) - stream << "," << setw(2) << unsigned(get(p, i)); - stream << dec << "...]"; - return stream; + using namespace std; + stream << "[" << setw(2) << hex << unsigned(get(p, 0)); + for (unsigned i = 1; i < 32; ++i) + stream << "," << setw(2) << unsigned(get(p, i)); + stream << dec << "...]"; + return stream; } /**********************************************************************/ @@ -68,9 +68,9 @@ const perm64 permid{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, /**********************************************************************/ perm64 random_perm64() { - perm64 res = permid; - std::random_shuffle(&set(res, 0), &set(res, 64)); - return res; + perm64 res = permid; + std::random_shuffle(&set(res, 0), &set(res, 64)); + return res; } /** Construit un vecteurs d'ar16 au hasard @@ -78,11 +78,11 @@ perm64 random_perm64() { * @return le vecteur correspondant **/ std::vector rand_perms(int sz) { - std::vector res(sz); - std::srand(std::time(0)); - for (int i = 0; i < sz; i++) - res[i] = random_perm64(); - return res; + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = random_perm64(); + return res; } /** Calcul et affiche le temps de calcul d'une fonction @@ -91,17 +91,17 @@ std::vector rand_perms(int sz) { * @return le temps d'exécution **/ template double timethat(Func fun, double reftime = 0) { - using namespace std::chrono; - auto tstart = high_resolution_clock::now(); - fun(); - auto tfin = high_resolution_clock::now(); - - auto tm = duration_cast>(tfin - tstart); - std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; - if (reftime != 0) - std::cout << ", speedup = " << reftime / tm.count(); - std::cout << std::endl; - return tm.count(); + using namespace std::chrono; + auto tstart = high_resolution_clock::now(); + fun(); + auto tfin = high_resolution_clock::now(); + + auto tm = duration_cast>(tfin - tstart); + std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; + if (reftime != 0) + std::cout << ", speedup = " << reftime / tm.count(); + std::cout << std::endl; + return tm.count(); } /**********************************************************************/ @@ -109,144 +109,144 @@ template double timethat(Func fun, double reftime = 0) { /**********************************************************************/ inline bool eqperm64(perm64 p1, perm64 p2) { - for (uint64_t i = 0; i < 4; i++) - if (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[i], p2[i])) != 0xffff) - return false; - return true; + for (uint64_t i = 0; i < 4; i++) + if (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[i], p2[i])) != 0xffff) + return false; + return true; } perm64 permute_1(perm64 v1, perm64 v2) { - perm64 res = {}; - for (uint64_t i = 0; i < 4; i++) { - for (uint64_t j = 0; j < 4; j++) { - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), v2[j] <= 15); - v2[j] -= 16; + perm64 res = {}; + for (uint64_t i = 0; i < 4; i++) { + for (uint64_t j = 0; j < 4; j++) { + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + } } - } - return res; + return res; } perm64 permute_2(perm64 v1, perm64 v2) { - perm64 res; - for (uint64_t j = 0; j < 4; j++) { - res[j] = _mm_shuffle_epi8(v1[0], v2[j]); - v2[j] -= 16; - } - for (uint64_t i = 1; i < 4; i++) { + perm64 res; for (uint64_t j = 0; j < 4; j++) { - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), v2[j] <= 15); - v2[j] -= 16; + res[j] = _mm_shuffle_epi8(v1[0], v2[j]); + v2[j] -= 16; + } + for (uint64_t i = 1; i < 4; i++) { + for (uint64_t j = 0; j < 4; j++) { + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + } } - } - return res; + return res; } perm64 permute_3(perm64 v1, perm64 v2) { - perm64 res; - for (uint64_t j = 0; j < 4; j++) { - res[j] = _mm_shuffle_epi8(v1[0], v2[j]); - v2[j] -= 16; - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[1], v2[j]), v2[j] <= 15); - v2[j] -= 16; - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[2], v2[j]), v2[j] <= 15); - v2[j] -= 16; - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[3], v2[j]), v2[j] <= 15); - } - return res; + perm64 res; + for (uint64_t j = 0; j < 4; j++) { + res[j] = _mm_shuffle_epi8(v1[0], v2[j]); + v2[j] -= 16; + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[1], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[2], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[3], v2[j]), + v2[j] <= 15); + } + return res; } perm64 permute_ref(perm64 v1, perm64 v2) { - perm64 res; - for (uint64_t i = 0; i < 64; i++) - set(res, i) = get(v1, get(v2, i)); - return res; + perm64 res; + for (uint64_t i = 0; i < 64; i++) + set(res, i) = get(v1, get(v2, i)); + return res; } int main() { - using namespace std; - srand(time(0)); - perm64 v1 = random_perm64(); - perm64 v2 = random_perm64(); - cout << permid << endl; - cout << v1 << endl; - cout << v2 << endl << endl; - cout << permute_ref(v1, v2) << endl << endl; - cout << permute_1(v1, v2) << endl; - cout << permute_2(v1, v2) << endl; - cout << permute_3(v1, v2) << endl; - - cout << "Sampling : "; - cout.flush(); - auto vrand = rand_perms(100000); - cout << "Done !" << endl; - std::vector check_ref(vrand.size()); - std::vector check_1(vrand.size()); - std::vector check_2(vrand.size()); - std::vector check_3(vrand.size()); - - cout << "Ref : "; - double sp_ref = timethat( - [&vrand, &check_ref]() { - std::transform(vrand.begin(), vrand.end(), check_ref.begin(), - [](perm64 p) { - for (int i = 0; i < 800; i++) - p = permute_ref(p, p); - return p; - }); - }, - 0.0); - - cout << "Fast : "; - timethat( - [&vrand, &check_1]() { - std::transform(vrand.begin(), vrand.end(), check_1.begin(), - [](perm64 p) { - for (int i = 0; i < 800; i++) - p = permute_1(p, p); - return p; - }); - }, - sp_ref); - - cout << "Fast2: "; - timethat( - [&vrand, &check_2]() { - std::transform(vrand.begin(), vrand.end(), check_2.begin(), - [](perm64 p) { - for (int i = 0; i < 800; i++) - p = permute_2(p, p); - return p; - }); - }, - sp_ref); - - cout << "Fast3: "; - timethat( - [&vrand, &check_3]() { - std::transform(vrand.begin(), vrand.end(), check_3.begin(), - [](perm64 p) { - for (int i = 0; i < 800; i++) - p = permute_3(p, p); - return p; - }); - }, - sp_ref); - - cout << "Checking : "; - cout.flush(); - assert(std::mismatch(check_ref.begin(), check_ref.end(), check_1.begin(), - eqperm64) == - std::make_pair(check_ref.end(), check_1.end())); - assert(std::mismatch(check_ref.begin(), check_ref.end(), check_2.begin(), - eqperm64) == - std::make_pair(check_ref.end(), check_2.end())); - assert(std::mismatch(check_ref.begin(), check_ref.end(), check_3.begin(), - eqperm64) == - std::make_pair(check_ref.end(), check_3.end())); - cout << "Ok !" << endl; + using namespace std; + srand(time(0)); + perm64 v1 = random_perm64(); + perm64 v2 = random_perm64(); + cout << permid << endl; + cout << v1 << endl; + cout << v2 << endl << endl; + cout << permute_ref(v1, v2) << endl << endl; + cout << permute_1(v1, v2) << endl; + cout << permute_2(v1, v2) << endl; + cout << permute_3(v1, v2) << endl; + + cout << "Sampling : "; + cout.flush(); + auto vrand = rand_perms(100000); + cout << "Done !" << endl; + std::vector check_ref(vrand.size()); + std::vector check_1(vrand.size()); + std::vector check_2(vrand.size()); + std::vector check_3(vrand.size()); + + cout << "Ref : "; + double sp_ref = timethat( + [&vrand, &check_ref]() { + std::transform(vrand.begin(), vrand.end(), check_ref.begin(), + [](perm64 p) { + for (int i = 0; i < 800; i++) + p = permute_ref(p, p); + return p; + }); + }, + 0.0); + + cout << "Fast : "; + timethat( + [&vrand, &check_1]() { + std::transform(vrand.begin(), vrand.end(), check_1.begin(), + [](perm64 p) { + for (int i = 0; i < 800; i++) + p = permute_1(p, p); + return p; + }); + }, + sp_ref); + + cout << "Fast2: "; + timethat( + [&vrand, &check_2]() { + std::transform(vrand.begin(), vrand.end(), check_2.begin(), + [](perm64 p) { + for (int i = 0; i < 800; i++) + p = permute_2(p, p); + return p; + }); + }, + sp_ref); + + cout << "Fast3: "; + timethat( + [&vrand, &check_3]() { + std::transform(vrand.begin(), vrand.end(), check_3.begin(), + [](perm64 p) { + for (int i = 0; i < 800; i++) + p = permute_3(p, p); + return p; + }); + }, + sp_ref); + + cout << "Checking : "; + cout.flush(); + assert(std::mismatch(check_ref.begin(), check_ref.end(), check_1.begin(), + eqperm64) == + std::make_pair(check_ref.end(), check_1.end())); + assert(std::mismatch(check_ref.begin(), check_ref.end(), check_2.begin(), + eqperm64) == + std::make_pair(check_ref.end(), check_2.end())); + assert(std::mismatch(check_ref.begin(), check_ref.end(), check_3.begin(), + eqperm64) == + std::make_pair(check_ref.end(), check_3.end())); + cout << "Ok !" << endl; } diff --git a/experiments/Perm32/permbig.cpp b/experiments/Perm32/permbig.cpp index 8550667f..5424b119 100644 --- a/experiments/Perm32/permbig.cpp +++ b/experiments/Perm32/permbig.cpp @@ -47,12 +47,12 @@ inline uint8_t get(perm p, uint64_t i) { return *(&p[0][0] + i); } * Définition de l'opérateur d'affichage << pour le type perm **/ std::ostream &operator<<(std::ostream &stream, perm const &p) { - using namespace std; - stream << "[" << setw(2) << hex << unsigned(get(p, 0)); - for (unsigned i = 1; i < 16; ++i) - stream << "," << setw(2) << unsigned(get(p, i)); - stream << dec << "...]"; - return stream; + using namespace std; + stream << "[" << setw(2) << hex << unsigned(get(p, 0)); + for (unsigned i = 1; i < 16; ++i) + stream << "," << setw(2) << unsigned(get(p, i)); + stream << dec << "...]"; + return stream; } /**********************************************************************/ @@ -60,10 +60,10 @@ std::ostream &operator<<(std::ostream &stream, perm const &p) { /**********************************************************************/ const perm make_permid() { - perm res; - for (uint64_t i = 0; i < Size; i++) - set(res, i) = i; - return res; + perm res; + for (uint64_t i = 0; i < Size; i++) + set(res, i) = i; + return res; } /** Permutation identité **/ const perm permid = make_permid(); @@ -74,9 +74,9 @@ const perm permid = make_permid(); /**********************************************************************/ perm random_perm() { - perm res = permid; - std::random_shuffle(&set(res, 0), &set(res, Size)); - return res; + perm res = permid; + std::random_shuffle(&set(res, 0), &set(res, Size)); + return res; } /** Construit un vecteurs d'ar16 au hasard @@ -84,11 +84,11 @@ perm random_perm() { * @return le vecteur correspondant **/ std::vector rand_perms(int sz) { - std::vector res(sz); - std::srand(std::time(0)); - for (int i = 0; i < sz; i++) - res[i] = random_perm(); - return res; + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = random_perm(); + return res; } /** Calcul et affiche le temps de calcul d'une fonction @@ -97,17 +97,17 @@ std::vector rand_perms(int sz) { * @return le temps d'exécution **/ template double timethat(Func fun, double reftime = 0) { - using namespace std::chrono; - auto tstart = high_resolution_clock::now(); - fun(); - auto tfin = high_resolution_clock::now(); - - auto tm = duration_cast>(tfin - tstart); - std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; - if (reftime != 0) - std::cout << ", speedup = " << reftime / tm.count(); - std::cout << std::endl; - return tm.count(); + using namespace std::chrono; + auto tstart = high_resolution_clock::now(); + fun(); + auto tfin = high_resolution_clock::now(); + + auto tm = duration_cast>(tfin - tstart); + std::cout << "time = " << std::setprecision(3) << tm.count() << "s"; + if (reftime != 0) + std::cout << ", speedup = " << reftime / tm.count(); + std::cout << std::endl; + return tm.count(); } /**********************************************************************/ @@ -115,141 +115,143 @@ template double timethat(Func fun, double reftime = 0) { /**********************************************************************/ inline bool eqperm(perm p1, perm p2) { - for (uint64_t i = 0; i < NBlock; i++) - if (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[i], p2[i])) != 0xffff) - return false; - return true; + for (uint64_t i = 0; i < NBlock; i++) + if (_mm_movemask_epi8(_mm_cmpeq_epi8(p1[i], p2[i])) != 0xffff) + return false; + return true; } perm permute_1(const perm &v1, perm v2) { - perm res = {}; - for (uint64_t i = 0; i < NBlock; i++) { - for (uint64_t j = 0; j < NBlock; j++) { - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), v2[j] <= 15); - v2[j] -= 16; + perm res = {}; + for (uint64_t i = 0; i < NBlock; i++) { + for (uint64_t j = 0; j < NBlock; j++) { + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + } } - } - return res; + return res; } perm permute_2(const perm &v1, perm v2) { - perm res; - for (uint64_t j = 0; j < NBlock; j++) { - res[j] = _mm_shuffle_epi8(v1[0], v2[j]); - v2[j] -= 16; - } - for (uint64_t i = 1; i < NBlock; i++) { + perm res; for (uint64_t j = 0; j < NBlock; j++) { - res[j] = - _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), v2[j] <= 15); - v2[j] -= 16; + res[j] = _mm_shuffle_epi8(v1[0], v2[j]); + v2[j] -= 16; + } + for (uint64_t i = 1; i < NBlock; i++) { + for (uint64_t j = 0; j < NBlock; j++) { + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2[j]), + v2[j] <= 15); + v2[j] -= 16; + } } - } - return res; + return res; } perm permute_3(const perm &v1, const perm &v2) { - perm res; - for (uint64_t j = 0; j < NBlock; j++) { - epu8 v2j = v2[j]; - res[j] = _mm_shuffle_epi8(v1[0], v2j); - for (uint64_t i = 1; i < NBlock; i++) { - v2j -= 16; - res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2j), v2j <= 15); + perm res; + for (uint64_t j = 0; j < NBlock; j++) { + epu8 v2j = v2[j]; + res[j] = _mm_shuffle_epi8(v1[0], v2j); + for (uint64_t i = 1; i < NBlock; i++) { + v2j -= 16; + res[j] = _mm_blendv_epi8(res[j], _mm_shuffle_epi8(v1[i], v2j), + v2j <= 15); + } } - } - return res; + return res; } perm permute_ref(const perm &v1, const perm &v2) { - perm res; - for (uint64_t i = 0; i < Size; i++) - set(res, i) = get(v1, get(v2, i)); - return res; + perm res; + for (uint64_t i = 0; i < Size; i++) + set(res, i) = get(v1, get(v2, i)); + return res; } int main() { - using namespace std; - - const int repl = 256; - const int samplesz = 100000 / NBlock; - - srand(time(0)); - - /* - perm v1 = random_perm(); - perm v2 = random_perm(); - cout << permid << endl; - cout << v1 << endl; - cout << v2 << endl << endl; - cout << permute_ref(v1, v2) << endl << endl; - cout << permute_1(v1, v2) << endl; - cout << permute_2(v1, v2) << endl; - */ - - cout << "NBlock = " << NBlock << endl; - - cout << "Sampling : "; - cout.flush(); - auto vrand = rand_perms(samplesz); - cout << "Done !" << endl; - vector check_ref(vrand.size()); - vector check_1(vrand.size()); - vector check_2(vrand.size()); - vector check_3(vrand.size()); - - cout << "Ref : "; - double sp_ref = timethat( - [&vrand, &check_ref]() { - transform(vrand.begin(), vrand.end(), check_ref.begin(), [](perm p) { - for (int i = 0; i < repl; i++) - p = permute_ref(p, p); - return p; - }); - }, - 0.0); - - cout << "Fast : "; - timethat( - [&vrand, &check_1]() { - transform(vrand.begin(), vrand.end(), check_1.begin(), [](perm p) { - for (int i = 0; i < repl; i++) - p = permute_1(p, p); - return p; - }); - }, - sp_ref); - - cout << "Fast2: "; - timethat( - [&vrand, &check_2]() { - transform(vrand.begin(), vrand.end(), check_2.begin(), [](perm p) { - for (int i = 0; i < repl; i++) - p = permute_2(p, p); - return p; - }); - }, - sp_ref); - - cout << "Fast3: "; - timethat( - [&vrand, &check_3]() { - transform(vrand.begin(), vrand.end(), check_3.begin(), [](perm p) { - for (int i = 0; i < repl; i++) - p = permute_3(p, p); - return p; - }); - }, - sp_ref); - - cout << "Checking : "; - cout.flush(); - assert(mismatch(check_ref.begin(), check_ref.end(), check_1.begin(), - eqperm) == make_pair(check_ref.end(), check_1.end())); - assert(mismatch(check_ref.begin(), check_ref.end(), check_2.begin(), - eqperm) == make_pair(check_ref.end(), check_2.end())); - assert(mismatch(check_ref.begin(), check_ref.end(), check_3.begin(), - eqperm) == make_pair(check_ref.end(), check_3.end())); - cout << "Ok !" << endl; + using namespace std; + + const int repl = 256; + const int samplesz = 100000 / NBlock; + + srand(time(0)); + + /* + perm v1 = random_perm(); + perm v2 = random_perm(); + cout << permid << endl; + cout << v1 << endl; + cout << v2 << endl << endl; + cout << permute_ref(v1, v2) << endl << endl; + cout << permute_1(v1, v2) << endl; + cout << permute_2(v1, v2) << endl; + */ + + cout << "NBlock = " << NBlock << endl; + + cout << "Sampling : "; + cout.flush(); + auto vrand = rand_perms(samplesz); + cout << "Done !" << endl; + vector check_ref(vrand.size()); + vector check_1(vrand.size()); + vector check_2(vrand.size()); + vector check_3(vrand.size()); + + cout << "Ref : "; + double sp_ref = timethat( + [&vrand, &check_ref]() { + transform(vrand.begin(), vrand.end(), check_ref.begin(), + [](perm p) { + for (int i = 0; i < repl; i++) + p = permute_ref(p, p); + return p; + }); + }, + 0.0); + + cout << "Fast : "; + timethat( + [&vrand, &check_1]() { + transform(vrand.begin(), vrand.end(), check_1.begin(), [](perm p) { + for (int i = 0; i < repl; i++) + p = permute_1(p, p); + return p; + }); + }, + sp_ref); + + cout << "Fast2: "; + timethat( + [&vrand, &check_2]() { + transform(vrand.begin(), vrand.end(), check_2.begin(), [](perm p) { + for (int i = 0; i < repl; i++) + p = permute_2(p, p); + return p; + }); + }, + sp_ref); + + cout << "Fast3: "; + timethat( + [&vrand, &check_3]() { + transform(vrand.begin(), vrand.end(), check_3.begin(), [](perm p) { + for (int i = 0; i < repl; i++) + p = permute_3(p, p); + return p; + }); + }, + sp_ref); + + cout << "Checking : "; + cout.flush(); + assert(mismatch(check_ref.begin(), check_ref.end(), check_1.begin(), + eqperm) == make_pair(check_ref.end(), check_1.end())); + assert(mismatch(check_ref.begin(), check_ref.end(), check_2.begin(), + eqperm) == make_pair(check_ref.end(), check_2.end())); + assert(mismatch(check_ref.begin(), check_ref.end(), check_3.begin(), + eqperm) == make_pair(check_ref.end(), check_3.end())); + cout << "Ok !" << endl; } diff --git a/experiments/Perm32/permdesc_red.cpp b/experiments/Perm32/permdesc_red.cpp index 3ad483d5..fa2e6a41 100644 --- a/experiments/Perm32/permdesc_red.cpp +++ b/experiments/Perm32/permdesc_red.cpp @@ -33,33 +33,33 @@ using perm = uint8_t __attribute__((vector_size(16), __may_alias__)); // conversion ar16 <-> perm using converter = union { - ar16 p; - perm v8; + ar16 p; + perm v8; }; ar16 ar16_perm(perm v) { - converter c; - c.v8 = v; - return c.p; + converter c; + c.v8 = v; + return c.p; } perm perm_ar16(ar16 p) { - converter c; - c.p = p; - return c.v8; + converter c; + c.p = p; + return c.v8; } // Affichage ostream &operator<<(ostream &stream, ar16 const &ar) { - stream << "[" << setw(2) << hex << unsigned(ar[0]); - for (unsigned i = 1; i < 16; ++i) - stream << "," << setw(2) << hex << unsigned(ar[i]) << dec; - stream << "]"; - return stream; + stream << "[" << setw(2) << hex << unsigned(ar[0]); + for (unsigned i = 1; i < 16; ++i) + stream << "," << setw(2) << hex << unsigned(ar[i]) << dec; + stream << "]"; + return stream; } ostream &operator<<(ostream &stream, perm const &p) { - stream << ar16_perm(p); - return stream; + stream << ar16_perm(p); + return stream; } // permutation identique @@ -67,36 +67,36 @@ const ar16 ar16id = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; const perm permid = perm_ar16(ar16id); ar16 transposition(uint64_t i, uint64_t j) { - assert(i < 16); - assert(j < 16); - ar16 res = ar16id; - std::swap(res[i], res[j]); - return res; + assert(i < 16); + assert(j < 16); + ar16 res = ar16id; + std::swap(res[i], res[j]); + return res; } ar16 randomar16() { - ar16 res = ar16id; - random_shuffle(res.begin(), res.end()); - return res; + ar16 res = ar16id; + random_shuffle(res.begin(), res.end()); + return res; } perm decal = perm_ar16({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15}); int nb_descent(perm p) { - perm pdec = _mm_shuffle_epi8((__m128i)p, (__m128i)decal); - pdec = (p > pdec); - return _mm_popcnt_u32(_mm_movemask_epi8((__m128i)pdec)); + perm pdec = _mm_shuffle_epi8((__m128i)p, (__m128i)decal); + pdec = (p > pdec); + return _mm_popcnt_u32(_mm_movemask_epi8((__m128i)pdec)); } array, 16> transp; void init_transp() { - for (int i = 0; i < 16; i++) - for (int j = 0; j < 16; j++) - transp[i][j] = perm_ar16(transposition(i, j)); + for (int i = 0; i < 16; i++) + for (int j = 0; j < 16; j++) + transp[i][j] = perm_ar16(transposition(i, j)); } perm permuteij(perm p, int i, int j) { - return _mm_shuffle_epi8((__m128i)p, (__m128i)transp[i][j]); + return _mm_shuffle_epi8((__m128i)p, (__m128i)transp[i][j]); } cilkpub::creducer_opadd_array res_red(16); @@ -104,85 +104,86 @@ cilkpub::creducer_opadd_array res_red(16); /* parcours l'ensemble de toutes les permutations obtenues en permutant les n premières valeurs de perm */ void allperm(perm p, int n) { - if (n == 0) - res_red[nb_descent(p)] += 1; - for (int i = 0; i < n; i++) { - cilk_spawn allperm(permuteij(p, i, n - 1), n - 1); - } + if (n == 0) + res_red[nb_descent(p)] += 1; + for (int i = 0; i < n; i++) { + cilk_spawn allperm(permuteij(p, i, n - 1), n - 1); + } } void allperm_iter7(perm p7) { - unsigned long int res[16]; - for (int i = 0; i < 16; i++) - res[i] = 0; - for (int i6 = 0; i6 <= 6; i6++) { - perm p6 = permuteij(p7, i6, 6); - for (int i5 = 0; i5 <= 5; i5++) { - perm p5 = permuteij(p6, i5, 5); - for (int i4 = 0; i4 <= 4; i4++) { - perm p4 = permuteij(p5, i4, 4); - for (int i3 = 0; i3 <= 3; i3++) { - perm p3 = permuteij(p4, i3, 3); - for (int i2 = 0; i2 <= 2; i2++) { - perm p2 = permuteij(p3, i2, 2); - for (int i1 = 0; i1 <= 1; i1++) - res[nb_descent(permuteij(p2, i1, 1))]++; - } + unsigned long int res[16]; + for (int i = 0; i < 16; i++) + res[i] = 0; + for (int i6 = 0; i6 <= 6; i6++) { + perm p6 = permuteij(p7, i6, 6); + for (int i5 = 0; i5 <= 5; i5++) { + perm p5 = permuteij(p6, i5, 5); + for (int i4 = 0; i4 <= 4; i4++) { + perm p4 = permuteij(p5, i4, 4); + for (int i3 = 0; i3 <= 3; i3++) { + perm p3 = permuteij(p4, i3, 3); + for (int i2 = 0; i2 <= 2; i2++) { + perm p2 = permuteij(p3, i2, 2); + for (int i1 = 0; i1 <= 1; i1++) + res[nb_descent(permuteij(p2, i1, 1))]++; + } + } + } } - } } - } - for (int i = 0; i < 16; i++) - res_red[i] += res[i]; + for (int i = 0; i < 16; i++) + res_red[i] += res[i]; } /* parcours l'ensemble de toutes les permutations obtenues en permutant les n premières valeurs de perm */ void allperm_derec(perm p, int n) { - if (n == 7) - allperm_iter7(p); - else - for (int i = 0; i < n; i++) { - cilk_spawn allperm_derec(permuteij(p, i, n - 1), n - 1); - } + if (n == 7) + allperm_iter7(p); + else + for (int i = 0; i < n; i++) { + cilk_spawn allperm_derec(permuteij(p, i, n - 1), n - 1); + } } void show_usage(std::string name) { - cerr << "Usage: " << name << " [-n ] size " << endl; + cerr << "Usage: " << name << " [-n ] size " << endl; } int main(int argc, char *argv[]) { - int n; - - if (argc != 2 and argc != 4) { - show_usage(argv[0]); - return 1; - } - if (argc == 4) { - if (std::string(argv[1]) != "-n") { - show_usage(argv[0]); - return 1; + int n; + + if (argc != 2 and argc != 4) { + show_usage(argv[0]); + return 1; } - std::string nproc = std::string(argv[2]); - if (__cilkrts_set_param("nworkers", nproc.c_str()) != - __CILKRTS_SET_PARAM_SUCCESS) - std::cerr << "Failed to set the number of Cilk workers" << std::endl; - } - - init_transp(); - n = atoi(argv[argc - 1]); - - unsigned long int res[16]; - for (int i = 0; i < 16; i++) - res[i] = 0; - - res_red.move_in(res); - allperm_derec(permid, n); - res_red.move_out(res); - - std::cout << "Result: "; - for (int i = 0; i < 16; i++) - std::cout << res[i] << " "; - std::cout << std::endl; - return 0; + if (argc == 4) { + if (std::string(argv[1]) != "-n") { + show_usage(argv[0]); + return 1; + } + std::string nproc = std::string(argv[2]); + if (__cilkrts_set_param("nworkers", nproc.c_str()) != + __CILKRTS_SET_PARAM_SUCCESS) + std::cerr << "Failed to set the number of Cilk workers" + << std::endl; + } + + init_transp(); + n = atoi(argv[argc - 1]); + + unsigned long int res[16]; + for (int i = 0; i < 16; i++) + res[i] = 0; + + res_red.move_in(res); + allperm_derec(permid, n); + res_red.move_out(res); + + std::cout << "Result: "; + for (int i = 0; i < 16; i++) + std::cout << res[i] << " "; + std::cout << std::endl; + return 0; } diff --git a/include/arch.hpp b/include/arch.hpp index 239da6fb..addd724b 100644 --- a/include/arch.hpp +++ b/include/arch.hpp @@ -17,7 +17,7 @@ #define HPCOMBI_ARCH_HPP_INCLUDED #if defined(SIMDE_ARCH_AMD64) && !defined(SIMDE_ARCH_X86_SSE4_1) -# error("x86_64 architecture without required compiler flags for SSE-4.1 instruction set. Did you forget to provide the flag -march=(native,avx,sse4.1) flag ?") +#error("x86_64 architecture without required compiler flags for SSE-4.1 instruction set. Did you forget to provide the flag -march=(native,avx,sse4.1) flag ?") #endif #endif // HPCOMBI_ARCH_HPP_INCLUDED diff --git a/include/bmat8.hpp b/include/bmat8.hpp index cd984bf2..4d764d00 100644 --- a/include/bmat8.hpp +++ b/include/bmat8.hpp @@ -293,15 +293,10 @@ class BMat8 { //! This method returns the 8 x 8 BMat8 with 1s on the main diagonal. static BMat8 one(size_t dim = 8) { HPCOMBI_ASSERT(dim <= 8); - static std::array const ones = {0x0000000000000000, - 0x8000000000000000, - 0x8040000000000000, - 0x8040200000000000, - 0x8040201000000000, - 0x8040201008000000, - 0x8040201008040000, - 0x8040201008040200, - 0x8040201008040201}; + static std::array const ones = { + 0x0000000000000000, 0x8000000000000000, 0x8040000000000000, + 0x8040200000000000, 0x8040201000000000, 0x8040201008000000, + 0x8040201008040000, 0x8040201008040200, 0x8040201008040201}; return BMat8(ones[dim]); } @@ -320,7 +315,7 @@ class BMat8 { void swap(BMat8 &that) { std::swap(this->_data, that._data); } //! Write \c this on \c os - std::ostream & write(std::ostream &os) const; + std::ostream &write(std::ostream &os) const; #ifdef LIBSEMIGROUPS_DENSEHASHMAP // FIXME do this another way diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 3b2b0da0..8dbd27a7 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -194,13 +194,13 @@ inline void BMat8::transpose2(BMat8 &a, BMat8 &b) { b._data = simde_mm_extract_epi64(x, 0); } -static constexpr epu8 rotlow { 7, 0, 1, 2, 3, 4, 5, 6}; -static constexpr epu8 rothigh - { 0, 1, 2, 3, 4, 5, 6, 7,15, 8, 9,10,11,12,13,14}; -static constexpr epu8 rotboth - { 7, 0, 1, 2, 3, 4, 5, 6,15, 8, 9,10,11,12,13,14}; -static constexpr epu8 rot2 - { 6, 7, 0, 1, 2, 3, 4, 5,14,15, 8, 9,10,11,12,13}; +static constexpr epu8 rotlow{7, 0, 1, 2, 3, 4, 5, 6}; +static constexpr epu8 rothigh{0, 1, 2, 3, 4, 5, 6, 7, + 15, 8, 9, 10, 11, 12, 13, 14}; +static constexpr epu8 rotboth{7, 0, 1, 2, 3, 4, 5, 6, + 15, 8, 9, 10, 11, 12, 13, 14}; +static constexpr epu8 rot2{6, 7, 0, 1, 2, 3, 4, 5, + 14, 15, 8, 9, 10, 11, 12, 13}; inline BMat8 BMat8::mult_transpose(BMat8 const &that) const { epu8 x = simde_mm_set_epi64x(_data, _data); @@ -241,8 +241,8 @@ inline BMat8 BMat8::row_space_basis() const { #endif /* FF */ #define FF 0xff -constexpr std::array masks {{ -// clang-format off +constexpr std::array masks{ + {// clang-format off {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0}, {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1}, {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2}, diff --git a/include/epu.hpp b/include/epu.hpp index bc024b44..1c7420ee 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -33,11 +33,9 @@ #include "vect_generic.hpp" - #include "simde/x86/sse4.1.h" #include "simde/x86/sse4.2.h" - #ifdef HPCOMBI_CONSTEXPR_FUN_ARGS #define HPCOMBI_CONSTEXPR constexpr #define HPCOMBI_CONSTEXPR_CONSTRUCTOR constexpr @@ -48,7 +46,6 @@ #define HPCOMBI_CONSTEXPR_CONSTRUCTOR #endif - namespace HPCombi { /// Unsigned 8 bits int constant. @@ -136,15 +133,12 @@ uint8_t right_dup_fun(uint8_t i) { return i == 0 ? 0 : i - 1; } HPCOMBI_CONSTEXPR uint8_t complement_fun(uint8_t i) { return 15 - i; } HPCOMBI_CONSTEXPR uint8_t popcount4_fun(uint8_t i) { - return ((i & 1) != 0 ? 1 : 0) - + ((i & 2) != 0 ? 1 : 0) - + ((i & 4) != 0 ? 1 : 0) - + ((i & 8) != 0 ? 1 : 0); + return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) + + ((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0); } } // Anonymous namespace - /// Factory object for various SIMD constants in particular constexpr TPUBuild Epu8; @@ -207,7 +201,9 @@ inline bool is_all_zero(epu8 a) { return simde_mm_testz_si128(a, a); } inline bool is_all_one(epu8 a) { return simde_mm_testc_si128(a, Epu8(0xFF)); } /** Equality of #HPCombi::epu8 */ -inline bool equal(epu8 a, epu8 b) { return is_all_zero(simde_mm_xor_si128(a, b)); } +inline bool equal(epu8 a, epu8 b) { + return is_all_zero(simde_mm_xor_si128(a, b)); +} /** Non equality of #HPCombi::epu8 */ inline bool not_equal(epu8 a, epu8 b) { return not equal(a, b); } @@ -258,13 +254,12 @@ inline epu8 revsorted8(epu8 a); * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort_perm(epu8 & a); +inline epu8 sort_perm(epu8 &a); /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort8_perm(epu8 & a); - +inline epu8 sort8_perm(epu8 &a); /** @class common_permutation_of * @brief Find if a vector is a permutation of one other @@ -372,7 +367,6 @@ inline epu8 partial_sums_round(epu8); /** @copydoc common_partial_sums */ inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); } - /** @class common_horiz_max * @brief Horizontal sum of a #HPCombi::epu8 * @details @@ -436,7 +430,6 @@ inline epu8 partial_max_round(epu8); /** @copydoc common_partial_max */ inline epu8 partial_max(epu8 v) { return partial_max_round(v); } - /** @class common_horiz_min * @brief Horizontal sum of a #HPCombi::epu8 * @details @@ -500,7 +493,6 @@ inline epu8 partial_min_round(epu8); /** @copydoc common_partial_min */ inline epu8 partial_min(epu8 v) { return partial_min_round(v); } - /** @class common_eval16 * @brief Evaluation of a #HPCombi::epu8 * @details @@ -741,7 +733,7 @@ inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a); * - std::hash * - std::less */ -} +} // namespace std #include "epu_impl.hpp" diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 79e7b8ea..c3eb80c7 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -24,18 +24,21 @@ // Comparison mode for _mm_cmpestri #define FIRST_DIFF \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | SIMDE_SIDD_NEGATIVE_POLARITY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \ + SIMDE_SIDD_NEGATIVE_POLARITY) #define LAST_DIFF \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | SIMDE_SIDD_NEGATIVE_POLARITY | \ - SIMDE_SIDD_MOST_SIGNIFICANT) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \ + SIMDE_SIDD_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT) #define FIRST_ZERO (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY) #define LAST_ZERO \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MOST_SIGNIFICANT) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \ + SIMDE_SIDD_MOST_SIGNIFICANT) #define FIRST_NON_ZERO \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MASKED_NEGATIVE_POLARITY) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \ + SIMDE_SIDD_MASKED_NEGATIVE_POLARITY) #define LAST_NON_ZERO \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | \ - SIMDE_SIDD_MOST_SIGNIFICANT) + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \ + SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT) namespace HPCombi { @@ -96,18 +99,17 @@ inline int8_t less_partial(epu8 a, epu8 b, int k) { : static_cast(a[diff]) - static_cast(b[diff]); } - inline uint64_t first_zero(epu8 v, int bnd) { - return first_mask(v == epu8 {}, bnd); + return first_mask(v == epu8{}, bnd); } inline uint64_t last_zero(epu8 v, int bnd) { - return last_mask(v == epu8 {}, bnd); + return last_mask(v == epu8{}, bnd); } inline uint64_t first_non_zero(epu8 v, int bnd) { - return first_mask(v != epu8 {}, bnd); + return first_mask(v != epu8{}, bnd); } inline uint64_t last_non_zero(epu8 v, int bnd) { - return last_mask(v != epu8 {}, bnd); + return last_mask(v != epu8{}, bnd); } /// Apply a sorting network @@ -184,15 +186,9 @@ constexpr std::array sorting_rounds8 inline bool is_sorted(epu8 a) { return simde_mm_movemask_epi8(shifted_right(a) > a) == 0; } -inline epu8 sorted(epu8 a) { - return network_sort(a, sorting_rounds); -} -inline epu8 sorted8(epu8 a) { - return network_sort(a, sorting_rounds8); -} -inline epu8 revsorted(epu8 a) { - return network_sort(a, sorting_rounds); -} +inline epu8 sorted(epu8 a) { return network_sort(a, sorting_rounds); } +inline epu8 sorted8(epu8 a) { return network_sort(a, sorting_rounds8); } +inline epu8 revsorted(epu8 a) { return network_sort(a, sorting_rounds); } inline epu8 revsorted8(epu8 a) { return network_sort(a, sorting_rounds8); } @@ -204,7 +200,6 @@ inline epu8 sort8_perm(epu8 &a) { return network_sort_perm(a, sorting_rounds8); } - inline epu8 random_epu8(uint16_t bnd) { epu8 res; std::random_device rd; @@ -223,7 +218,7 @@ inline epu8 remove_dups(epu8 v, uint8_t repl) { } // Gather at the front numbers with (3-i)-th bit not set. -constexpr std::array inverting_rounds {{ +constexpr std::array inverting_rounds{{ // clang-format off // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 epu8 { 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15}, @@ -234,7 +229,7 @@ constexpr std::array inverting_rounds {{ #ifdef SIMDE_X86_SSE4_2_NATIVE #define FIND_IN_VECT \ - (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \ + (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \ SIMDE_SIDD_NEGATIVE_POLARITY) #define FIND_IN_VECT_COMPL \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) @@ -250,29 +245,29 @@ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { #endif inline epu8 permutation_of_ref(epu8 a, epu8 b) { - auto ar = as_array(a); - epu8 res {}; - for (size_t i = 0; i < 16; i++) { - res[i] = std::distance(ar.begin(), std::find(ar.begin(), ar.end(), b[i])); - } - return res; + auto ar = as_array(a); + epu8 res{}; + for (size_t i = 0; i < 16; i++) { + res[i] = + std::distance(ar.begin(), std::find(ar.begin(), ar.end(), b[i])); + } + return res; } inline epu8 permutation_of(epu8 a, epu8 b) { #ifdef SIMDE_X86_SSE4_2_NATIVE - return permutation_of_cmpestrm(a, b); + return permutation_of_cmpestrm(a, b); #else - return permutation_of_ref(a, b); + return permutation_of_ref(a, b); #endif } - #if defined(FF) #error FF is defined ! #endif /* FF */ #define FF 0xff /// Permutation Round for partial and horizontal sums -constexpr std::array summing_rounds {{ +constexpr std::array summing_rounds{{ // clang-format off // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 epu8 { FF, 0, FF, 2, FF, 4, FF, 6, FF, 8, FF, 10, FF, 12, FF, 14}, @@ -282,7 +277,7 @@ constexpr std::array summing_rounds {{ // clang-format on }}; -constexpr std::array mining_rounds {{ +constexpr std::array mining_rounds{{ // clang-format off // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 epu8 { 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, @@ -327,7 +322,6 @@ inline epu8 partial_sums_round(epu8 v) { return v; } - inline uint8_t horiz_max_ref(epu8 v) { uint8_t res = 0; for (size_t i = 0; i < 16; i++) @@ -361,7 +355,6 @@ inline epu8 partial_max_round(epu8 v) { return v; } - inline uint8_t horiz_min_ref(epu8 v) { uint8_t res = 255; for (size_t i = 0; i < 16; i++) @@ -382,7 +375,7 @@ inline epu8 partial_min_ref(epu8 v) { epu8 res; res[0] = v[0]; for (size_t i = 1; i < 16; i++) - res[i] = std::min(res[i - 1], v[i]) ; + res[i] = std::min(res[i - 1], v[i]); return res; } inline epu8 partial_min_gen(epu8 v) { @@ -395,7 +388,6 @@ inline epu8 partial_min_round(epu8 v) { return v; } - inline epu8 eval16_ref(epu8 v) { epu8 res{}; for (size_t i = 0; i < 16; i++) @@ -425,29 +417,28 @@ inline epu8 eval16_cycle(epu8 v) { inline epu8 eval16_popcount(epu8 v) { epu8 res{}; for (size_t i = 0; i < 16; i++) { - res[i] = __builtin_popcountl(simde_mm_movemask_epi8(v == Epu8(uint8_t(i)))); + res[i] = + __builtin_popcountl(simde_mm_movemask_epi8(v == Epu8(uint8_t(i)))); } return res; } - -inline epu8 popcount16(epu8 v){ +inline epu8 popcount16(epu8 v) { return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4); } - inline bool is_partial_transformation(epu8 v, const size_t k) { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) - return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) - && (diff == 16 || diff < k); + return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) && + (diff == 16 || diff < k); } inline bool is_transformation(epu8 v, const size_t k) { uint64_t diff = last_diff(v, epu8id, 16); - return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) - && (diff == 16 || diff < k); + return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && + (diff == 16 || diff < k); } inline bool is_partial_permutation(epu8 v, const size_t k) { @@ -455,9 +446,9 @@ inline bool is_partial_permutation(epu8 v, const size_t k) { // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 // (v = Perm16::one() or last diff index < 16) - return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) - && (simde_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) - && (diff == 16 || diff < k); + return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) && + (simde_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) && + (diff == 16 || diff < k); } #ifdef SIMDE_X86_SSE4_2_NATIVE @@ -485,7 +476,6 @@ inline bool is_permutation(epu8 v, const size_t k) { #endif } - } // namespace HPCombi namespace std { diff --git a/include/hpcombi.hpp b/include/hpcombi.hpp index 043bb8b3..89b4743f 100644 --- a/include/hpcombi.hpp +++ b/include/hpcombi.hpp @@ -16,8 +16,8 @@ #ifndef HPCOMBI_HPCOMBI_HPP_INCLUDED #define HPCOMBI_HPCOMBI_HPP_INCLUDED +#include "bmat8.hpp" #include "epu.hpp" #include "perm16.hpp" -#include "bmat8.hpp" #endif // HPCOMBI_HPCOMBI_HPP_INCLUDED diff --git a/include/perm16.hpp b/include/perm16.hpp index fdc505d0..f4b3d92b 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -55,7 +55,10 @@ struct alignas(16) PTransf16 : public Vect16 { PTransf16(std::initializer_list il); PTransf16 &operator=(const PTransf16 &) = default; - PTransf16 &operator=(const epu8 &vv) { v = vv; return *this; } + PTransf16 &operator=(const epu8 &vv) { + v = vv; + return *this; + } //! Return whether \c *this is a well constructed object bool validate(size_t k = 16) const { @@ -70,13 +73,13 @@ struct alignas(16) PTransf16 : public Vect16 { } /** Returns a mask for the image of \c *this */ - epu8 image_mask(bool complement=false) const; + epu8 image_mask(bool complement = false) const; /** Returns a bit mask for the image of \c *this */ - uint32_t image_bitset(bool complement=false) const; + uint32_t image_bitset(bool complement = false) const; /** Returns a mask for the domain of \c *this */ - epu8 domain_mask(bool complement=false) const; + epu8 domain_mask(bool complement = false) const; /** Returns a bit mask for the domain of \c *this */ - uint32_t domain_bitset(bool complement=false) const; + uint32_t domain_bitset(bool complement = false) const; /** Returns the partial right identity for \c *this */ PTransf16 right_one() const; @@ -89,9 +92,9 @@ struct alignas(16) PTransf16 : public Vect16 { uint32_t rank() const; /** Returns a mask for the fix point of \c *this */ - epu8 fix_points_mask(bool complement=false) const; + epu8 fix_points_mask(bool complement = false) const; /** Returns a bit mask for the fix point of \c *this */ - uint32_t fix_points_bitset(bool complement=false) const; + uint32_t fix_points_bitset(bool complement = false) const; /** Returns the smallest fix point of \c *this */ uint8_t smallest_fix_point() const; /** Returns the smallest non fix point of \c *this */ @@ -144,7 +147,8 @@ struct PPerm16 : public PTransf16 { HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const vect v) : PTransf16(v) {} HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const epu8 x) : PTransf16(x) {} PPerm16(std::vector dom, std::vector rng, - size_t = 0 /* unused */) : PTransf16(dom, rng) {} + size_t = 0 /* unused */) + : PTransf16(dom, rng) {} PPerm16(std::initializer_list il) : PTransf16(il) {} PPerm16 &operator=(const PPerm16 &) = default; @@ -172,7 +176,8 @@ struct PPerm16 : public PTransf16 { * x.inverse() * @endcode * Returns - * @verbatim {0,0xFF,2,1,3,5,6,0xFF,8,9,0xFF,10,12,0xFF,0xFF,0xFF} @endverbatim + * @verbatim {0,0xFF,2,1,3,5,6,0xFF,8,9,0xFF,10,12,0xFF,0xFF,0xFF} + * @endverbatim */ /** @copydoc common_inverse_pperm * @par Algorithm: @@ -205,9 +210,7 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { Perm16(std::initializer_list il) : Transf16(il) {} //! Return whether \c *this is a well constructed object - bool validate(size_t k = 16) const { - return HPCombi::is_permutation(v, k); - } + bool validate(size_t k = 16) const { return HPCombi::is_permutation(v, k); } // It's not possible to have a static constexpr member of same type as class // being defined (see https://stackoverflow.com/questions/11928089/) @@ -428,7 +431,6 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { * @f$O(n)@f$ algorithm using length */ bool left_weak_leq(Perm16 other) const; - }; /*****************************************************************************/ @@ -469,9 +471,7 @@ template <> struct hash { template <> struct hash { //! A hash operator for #HPCombi::Perm16 - size_t operator()(const HPCombi::Perm16 &ar) const { - return uint64_t(ar); - } + size_t operator()(const HPCombi::Perm16 &ar) const { return uint64_t(ar); } }; } // namespace std diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 634c264a..6e6bde42 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -22,7 +22,7 @@ #include // lcm until c++17 #else #include "fallback/gcdlcm.hpp" // lcm until c++17 -#endif // HAVE_EXPERIMENTAL_NUMERIC_LCM +#endif // HAVE_EXPERIMENTAL_NUMERIC_LCM namespace HPCombi { @@ -30,14 +30,15 @@ namespace HPCombi { /** Implementation part for inline functions *********************************/ /*****************************************************************************/ -inline PTransf16::PTransf16(std::initializer_list il) : Vect16(epu8id) { +inline PTransf16::PTransf16(std::initializer_list il) + : Vect16(epu8id) { assert(il.size() <= 16); std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin()); } -inline PTransf16::PTransf16(std::vector dom, - std::vector rng, size_t /*unused */) : - Vect16(Epu8(0xFF)) { +inline PTransf16::PTransf16(std::vector dom, std::vector rng, + size_t /*unused */) + : Vect16(Epu8(0xFF)) { assert(dom.size() == rng.size()); assert(dom.size() <= 16); for (size_t i = 0; i < dom.size(); ++i) { diff --git a/include/perm_generic.hpp b/include/perm_generic.hpp index a9fa2125..6f868de0 100644 --- a/include/perm_generic.hpp +++ b/include/perm_generic.hpp @@ -32,7 +32,7 @@ struct PermGeneric : public VectGeneric<_Size, Expo> { static constexpr size_t size() { return _Size; } PermGeneric() = default; - PermGeneric(const vect v) : vect(v) {}; + PermGeneric(const vect v) : vect(v){}; PermGeneric(std::initializer_list il); PermGeneric operator*(const PermGeneric &p) const { @@ -41,7 +41,7 @@ struct PermGeneric : public VectGeneric<_Size, Expo> { static PermGeneric one() { return PermGeneric({}); } static PermGeneric elementary_transposition(uint64_t i); - PermGeneric inverse() const ; + PermGeneric inverse() const; static PermGeneric random(); vect lehmer() const; @@ -50,7 +50,6 @@ struct PermGeneric : public VectGeneric<_Size, Expo> { uint64_t nb_cycles() const; bool left_weak_leq(PermGeneric other) const; - }; /*****************************************************************************/ diff --git a/include/perm_generic_impl.hpp b/include/perm_generic_impl.hpp index 8eb37e54..4a4c5b6d 100644 --- a/include/perm_generic_impl.hpp +++ b/include/perm_generic_impl.hpp @@ -34,10 +34,10 @@ PermGeneric<_Size, Expo>::elementary_transposition(uint64_t i) { } template -PermGeneric<_Size, Expo> -PermGeneric<_Size, Expo>::inverse() const { +PermGeneric<_Size, Expo> PermGeneric<_Size, Expo>::inverse() const { PermGeneric res; - for (uint64_t i = 0; i < _Size; i++) res[this->v[i]] = i; + for (uint64_t i = 0; i < _Size; i++) + res[this->v[i]] = i; return res; } @@ -118,4 +118,3 @@ struct hash> { }; } // namespace std - diff --git a/include/power.hpp b/include/power.hpp index 68cb0943..e24f67cb 100644 --- a/include/power.hpp +++ b/include/power.hpp @@ -76,12 +76,10 @@ const T square(const T x) { */ template > const T pow(const T x) { - return (exp == 0) - ? M::one() - : (exp % 2 == 0) - ? square(pow(x)) - : M::prod(x, - square(pow(x))); + return (exp == 0) ? M::one() + : (exp % 2 == 0) + ? square(pow(x)) + : M::prod(x, square(pow(x))); } namespace power_helper { diff --git a/include/vect16.hpp b/include/vect16.hpp index 8f0eb8b6..76113ff6 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -36,7 +36,10 @@ struct alignas(16) Vect16 { HPCOMBI_CONSTEXPR_CONSTRUCTOR operator epu8() const { return v; } Vect16 &operator=(const Vect16 &) = default; - Vect16 &operator=(const epu8 &vv) { v = vv; return *this; } + Vect16 &operator=(const epu8 &vv) { + v = vv; + return *this; + } array &as_array() { return HPCombi::as_array(v); } const array &as_array() const { return HPCombi::as_array(v); } @@ -86,9 +89,7 @@ struct alignas(16) Vect16 { int8_t less_partial(const Vect16 &b, int k) const { return HPCombi::less_partial(v, b.v, k); }; - Vect16 permuted(const Vect16 &b) const { - return HPCombi::permuted(v, b.v); - } + Vect16 permuted(const Vect16 &b) const { return HPCombi::permuted(v, b.v); } uint8_t sum() const { return HPCombi::horiz_sum(v); } Vect16 partial_sums() const { return HPCombi::partial_sums(v); }; Vect16 eval16() const { return HPCombi::eval16(v); }; @@ -97,8 +98,6 @@ struct alignas(16) Vect16 { bool is_permutation(size_t k) const { return HPCombi::is_permutation(v, k); } - - }; static_assert(std::is_trivial(), "Vect16 is not a trivial class !"); diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 2bb9778a..7f8fe36d 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -346,10 +346,10 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { REQUIRE(equal(permutation_of(epu8rev, epu8rev), epu8id)); REQUIRE(equal(permutation_of(epu8id, RP), RP)); const uint8_t FF = 0xff; - REQUIRE(equal( - (permutation_of(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE( + equal((permutation_of(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { REQUIRE(equal(permutation_of_ref(epu8id, epu8id), epu8id)); @@ -359,10 +359,10 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { REQUIRE(equal(permutation_of_ref(epu8rev, epu8rev), epu8id)); REQUIRE(equal(permutation_of_ref(epu8id, RP), RP)); const uint8_t FF = 0xff; - REQUIRE(equal( - (permutation_of_ref(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE( + equal((permutation_of_ref(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { @@ -732,18 +732,18 @@ TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { #ifdef SIMDE_X86_SSE4_2_NATIVE TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { for (auto x : v) { - for (size_t i = 0; i < 16; i++) { - REQUIRE(is_permutation(x, i) == is_permutation_cmpestri(x, i)); - } + for (size_t i = 0; i < 16; i++) { + REQUIRE(is_permutation(x, i) == is_permutation_cmpestri(x, i)); + } } } #endif TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { for (auto x : v) { - for (size_t i = 0; i < 16; i++) { - REQUIRE(is_permutation(x, i) == is_permutation_sort(x, i)); - } + for (size_t i = 0; i < 16; i++) { + REQUIRE(is_permutation(x, i) == is_permutation_sort(x, i)); + } } } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 52e40a8b..d75e3146 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -366,7 +366,7 @@ TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { #ifdef SIMDE_X86_SSE4_2_NATIVE TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, - "[PPerm16][021]"); + "[PPerm16][021]"); #endif TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { @@ -438,7 +438,6 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { } } - TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_find, Plist, "[Perm16][028]"); TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_pow, Plist, From b3d55aa1b41cd07711d813a70614f29ab33e33e0 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 16:38:58 +0100 Subject: [PATCH 024/113] Linting --- CPPLINT.cfg | 3 + include/arch.hpp | 6 +- include/bmat8.hpp | 33 +++-- include/bmat8_impl.hpp | 233 +++++++++++++++++----------------- include/epu.hpp | 8 +- include/epu_impl.hpp | 22 ++-- include/perm16.hpp | 6 +- include/perm16_impl.hpp | 4 +- include/perm_generic.hpp | 7 +- include/perm_generic_impl.hpp | 4 +- include/power.hpp | 1 - include/vect16.hpp | 20 +-- include/vect_generic.hpp | 9 +- 13 files changed, 175 insertions(+), 181 deletions(-) create mode 100644 CPPLINT.cfg diff --git a/CPPLINT.cfg b/CPPLINT.cfg new file mode 100644 index 00000000..903aa4b8 --- /dev/null +++ b/CPPLINT.cfg @@ -0,0 +1,3 @@ +set noparent +filter=-build/c++14,-build/include_subdir,-runtime/indentation_namespace,-runtime/references,-build/include,-readability/todo,-runtime/printf +linelength=80 diff --git a/include/arch.hpp b/include/arch.hpp index addd724b..e43a6f6e 100644 --- a/include/arch.hpp +++ b/include/arch.hpp @@ -17,7 +17,11 @@ #define HPCOMBI_ARCH_HPP_INCLUDED #if defined(SIMDE_ARCH_AMD64) && !defined(SIMDE_ARCH_X86_SSE4_1) -#error("x86_64 architecture without required compiler flags for SSE-4.1 instruction set. Did you forget to provide the flag -march=(native,avx,sse4.1) flag ?") +char const msg[] = + R("x86_64 architecture without required compiler flags for SSE-4.1 " + "instruction set. Did you forget to provide the flag -march=" + "(native,avx,sse4.1) flag ?"); +#error(msg) #endif #endif // HPCOMBI_ARCH_HPP_INCLUDED diff --git a/include/bmat8.hpp b/include/bmat8.hpp index 4d764d00..95ebce2f 100644 --- a/include/bmat8.hpp +++ b/include/bmat8.hpp @@ -1,19 +1,19 @@ -/******************************************************************************/ -/* Copyright (C) 2018 Finn Smith */ -/* Copyright (C) 2018 James Mitchell */ -/* Copyright (C) 2018 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2018 Finn Smith // +// Copyright (C) 2018 James Mitchell // +// Copyright (C) 2018 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// // This file contains a declaration of fast boolean matrices up to dimension 8. @@ -32,7 +32,6 @@ #include // for vector #include "epu.hpp" - #include "perm16.hpp" #ifndef HPCOMBI_ASSERT diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 8dbd27a7..f17d0263 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -1,19 +1,19 @@ -/******************************************************************************/ -/* Copyright (C) 2018 Finn Smith */ -/* Copyright (C) 2018 James Mitchell */ -/* Copyright (C) 2018 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2018 Finn Smith // +// Copyright (C) 2018 James Mitchell // +// Copyright (C) 2018 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// // This file contains an implementation of fast boolean matrices up to // dimension 8 x 8. @@ -21,82 +21,79 @@ namespace HPCombi { static_assert(std::is_trivial(), "BMat8 is not a trivial class!"); -// clang-format off -static const constexpr std::array ROW_MASK = {{ - 0xff00000000000000, 0xff000000000000, 0xff0000000000, 0xff00000000, - 0xff000000, 0xff0000, 0xff00, 0xff}}; - -static const constexpr std::array COL_MASK = {{ - 0x8080808080808080, 0x4040404040404040, 0x2020202020202020, 0x1010101010101010, - 0x808080808080808, 0x404040404040404, 0x202020202020202, 0x101010101010101}}; - -static const constexpr std::array BIT_MASK = {{ - 0x8000000000000000, - 0x4000000000000000, - 0x2000000000000000, - 0x1000000000000000, - 0x800000000000000, - 0x400000000000000, - 0x200000000000000, - 0x100000000000000, - 0x80000000000000, - 0x40000000000000, - 0x20000000000000, - 0x10000000000000, - 0x8000000000000, - 0x4000000000000, - 0x2000000000000, - 0x1000000000000, - 0x800000000000, - 0x400000000000, - 0x200000000000, - 0x100000000000, - 0x80000000000, - 0x40000000000, - 0x20000000000, - 0x10000000000, - 0x8000000000, - 0x4000000000, - 0x2000000000, - 0x1000000000, - 0x800000000, - 0x400000000, - 0x200000000, - 0x100000000, - 0x80000000, - 0x40000000, - 0x20000000, - 0x10000000, - 0x8000000, - 0x4000000, - 0x2000000, - 0x1000000, - 0x800000, - 0x400000, - 0x200000, - 0x100000, - 0x80000, - 0x40000, - 0x20000, - 0x10000, - 0x8000, - 0x4000, - 0x2000, - 0x1000, - 0x800, - 0x400, - 0x200, - 0x100, - 0x80, - 0x40, - 0x20, - 0x10, - 0x8, - 0x4, - 0x2, - 0x1}}; - -// clang-format on +static const constexpr std::array ROW_MASK = { + {0xff00000000000000, 0xff000000000000, 0xff0000000000, 0xff00000000, + 0xff000000, 0xff0000, 0xff00, 0xff}}; + +static const constexpr std::array COL_MASK = { + 0x8080808080808080, 0x4040404040404040, 0x2020202020202020, + 0x1010101010101010, 0x808080808080808, 0x404040404040404, + 0x202020202020202, 0x101010101010101}; + +static const constexpr std::array BIT_MASK = {{0x8000000000000000, + 0x4000000000000000, + 0x2000000000000000, + 0x1000000000000000, + 0x800000000000000, + 0x400000000000000, + 0x200000000000000, + 0x100000000000000, + 0x80000000000000, + 0x40000000000000, + 0x20000000000000, + 0x10000000000000, + 0x8000000000000, + 0x4000000000000, + 0x2000000000000, + 0x1000000000000, + 0x800000000000, + 0x400000000000, + 0x200000000000, + 0x100000000000, + 0x80000000000, + 0x40000000000, + 0x20000000000, + 0x10000000000, + 0x8000000000, + 0x4000000000, + 0x2000000000, + 0x1000000000, + 0x800000000, + 0x400000000, + 0x200000000, + 0x100000000, + 0x80000000, + 0x40000000, + 0x20000000, + 0x10000000, + 0x8000000, + 0x4000000, + 0x2000000, + 0x1000000, + 0x800000, + 0x400000, + 0x200000, + 0x100000, + 0x80000, + 0x40000, + 0x20000, + 0x10000, + 0x8000, + 0x4000, + 0x2000, + 0x1000, + 0x800, + 0x400, + 0x200, + 0x100, + 0x80, + 0x40, + 0x20, + 0x10, + 0x8, + 0x4, + 0x2, + 0x1}}; inline bool BMat8::operator()(size_t i, size_t j) const { HPCOMBI_ASSERT(i < 8); @@ -238,7 +235,7 @@ inline BMat8 BMat8::row_space_basis() const { #if defined(FF) #error FF is defined ! -#endif /* FF */ +#endif // FF #define FF 0xff constexpr std::array masks{ @@ -250,11 +247,12 @@ constexpr std::array masks{ }}; #undef FF -static const epu8 shiftres {1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80}; +static const epu8 shiftres{1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80}; inline void update_bitset(epu8 block, epu8 &set0, epu8 &set1) { - static const epu8 bound08 = simde_mm_slli_epi32(static_cast(epu8id), 3); // shift for *8 -static const epu8 bound18 = bound08 + Epu8(0x80); + static const epu8 bound08 = simde_mm_slli_epi32( + static_cast(epu8id), 3); // shift for *8 + static const epu8 bound18 = bound08 + Epu8(0x80); for (size_t slice8 = 0; slice8 < 16; slice8++) { epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */ epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5); @@ -266,12 +264,13 @@ static const epu8 bound18 = bound08 + Epu8(0x80); inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { epu8 in = simde_mm_set_epi64x(0, _data); - epu8 block0 {}, block1 {}; + epu8 block0{}, block1{}; for (epu8 m : masks) { block0 |= static_cast(simde_mm_shuffle_epi8(in, m)); block1 |= static_cast(simde_mm_shuffle_epi8(in, m | Epu8(4))); } - res0 = epu8 {}; res1 = epu8 {}; + res0 = epu8{}; + res1 = epu8{}; for (size_t r = 0; r < 16; r++) { update_bitset(block0 | block1, res0, res1); block1 = simde_mm_shuffle_epi8(block1, right_cycle); @@ -279,7 +278,7 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { } inline uint64_t BMat8::row_space_size_bitset() const { - epu8 res0 {}, res1 {}; + epu8 res0{}, res1{}; row_space_bitset(res0, res1); return (__builtin_popcountll(simde_mm_extract_epi64(res0, 0)) + __builtin_popcountll(simde_mm_extract_epi64(res1, 0)) + @@ -292,7 +291,7 @@ inline uint64_t BMat8::row_space_size_incl1() const { epu8 block = epu8id; uint64_t res = 0; for (size_t r = 0; r < 16; r++) { - epu8 orincl {}; + epu8 orincl{}; for (int i = 0; i < 8; i++) { orincl |= ((in | block) == block) & in; in = permuted(in, rotboth); @@ -309,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl() const { uint64_t res = 0; for (size_t r = 0; r < 16; r++) { epu8 orincl = ((in | block) == block) & in; - for (int i = 0; i < 7; i++) { // Only rotating + for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } @@ -330,7 +329,7 @@ inline bool BMat8::row_space_included(BMat8 other) const { epu8 in = simde_mm_set_epi64x(0, other._data); epu8 block = simde_mm_set_epi64x(0, _data); epu8 orincl = ((in | block) == block) & in; - for (int i = 0; i < 7; i++) { // Only rotating + for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotlow); orincl |= ((in | block) == block) & in; } @@ -340,7 +339,7 @@ inline bool BMat8::row_space_included(BMat8 other) const { inline epu8 BMat8::row_space_mask(epu8 block) const { epu8 in = simde_mm_set_epi64x(_data, _data); epu8 orincl = ((in | block) == block) & in; - for (int i = 0; i < 7; i++) { // Only rotating + for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } @@ -352,7 +351,7 @@ inline std::pair BMat8::row_space_included2(BMat8 a0, BMat8 b0, epu8 in = simde_mm_set_epi64x(b1._data, b0._data); epu8 block = simde_mm_set_epi64x(a1._data, a0._data); epu8 orincl = ((in | block) == block) & in; - for (int i = 0; i < 7; i++) { // Only rotating + for (int i = 0; i < 7; i++) { // Only rotating in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } @@ -362,9 +361,9 @@ inline std::pair BMat8::row_space_included2(BMat8 a0, BMat8 b0, } inline std::bitset<256> BMat8::row_space_bitset_ref() const { - std::bitset<256> lookup; + std::bitset<256> lookup; std::vector row_vec = row_space_basis().rows(); - auto last = std::remove(row_vec.begin(), row_vec.end(), 0); + auto last = std::remove(row_vec.begin(), row_vec.end(), 0); row_vec.erase(last, row_vec.end()); for (uint8_t x : row_vec) { lookup.set(x); @@ -404,11 +403,11 @@ inline std::vector BMat8::rows() const { inline size_t BMat8::nr_rows() const { epu8 x = simde_mm_set_epi64x(_data, 0); - return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8 {})); + return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8{})); } -static HPCOMBI_CONSTEXPR epu8 - rev8 {7,6,5,4,3,2,1,0,8,9,10,11,12,13,14,15}; +static HPCOMBI_CONSTEXPR epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0, + 8, 9, 10, 11, 12, 13, 14, 15}; inline BMat8 BMat8::row_permuted(Perm16 p) const { epu8 x = simde_mm_set_epi64x(0, _data); x = permuted(x, rev8); @@ -429,7 +428,7 @@ inline BMat8 BMat8::col_permutation_matrix(Perm16 p) { inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { // LIBSEMIGROUPS_ASSERT(bm.row_space_basis() == bm); std::vector rows = this->rows(); - BMat8 product = *this * bm; + BMat8 product = *this * bm; std::vector prod_rows = product.rows(); // LIBSEMIGROUPS_ASSERT(product.row_space_basis() == bm); @@ -437,28 +436,26 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { std::vector perm(8); for (size_t i = 0; i < nr_rows(); ++i) { uint8_t row = rows[i]; - perm[i] - = std::distance(prod_rows.begin(), - std::find(prod_rows.begin(), prod_rows.end(), row)); + perm[i] = + std::distance(prod_rows.begin(), + std::find(prod_rows.begin(), prod_rows.end(), row)); } std::iota(perm.begin() + nr_rows(), perm.end(), nr_rows()); Perm16 res = Perm16::one(); - for (size_t i=0; i < 8; i++) + for (size_t i = 0; i < 8; i++) res[i] = perm[i]; return res; - } +} inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const { epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev); epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev); // Vector ternary operator is not supported by clang. // return (x != (epu8 {})) ? permutation_of(y, x) : epu8id; - return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8 {}); + return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8{}); } - - inline std::ostream &BMat8::write(std::ostream &os) const { uint64_t x = _data; uint64_t pow = 1; diff --git a/include/epu.hpp b/include/epu.hpp index 1c7420ee..1a5aa304 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -49,7 +49,8 @@ namespace HPCombi { /// Unsigned 8 bits int constant. -inline constexpr uint8_t operator"" _u8(unsigned long long arg) noexcept { +inline constexpr uint8_t +operator"" _u8(unsigned long long arg) noexcept { // NOLINT return static_cast(arg); } @@ -74,7 +75,6 @@ template struct ConstFun { /// Factory object for various SIMD constants in particular constexpr template struct TPUBuild { - using type_elem = typename std::remove_reference::type; static constexpr size_t size_elem = sizeof(type_elem); @@ -205,7 +205,7 @@ inline bool equal(epu8 a, epu8 b) { return is_all_zero(simde_mm_xor_si128(a, b)); } /** Non equality of #HPCombi::epu8 */ -inline bool not_equal(epu8 a, epu8 b) { return not equal(a, b); } +inline bool not_equal(epu8 a, epu8 b) { return !equal(a, b); } /** Permuting a #HPCombi::epu8 */ inline epu8 permuted(epu8 a, epu8 b) { return simde_mm_shuffle_epi8(a, b); } @@ -527,7 +527,7 @@ inline epu8 eval16_cycle(epu8 v); */ inline epu8 eval16_popcount(epu8 v); /** @copydoc common_eval16 */ -inline epu8 eval16(epu8 v) { return eval16_cycle(v); }; +inline epu8 eval16(epu8 v) { return eval16_cycle(v); } /** @class common_first_diff * @brief The first difference between two #HPCombi::epu8 diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index c3eb80c7..1d809c50 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -147,20 +147,16 @@ inline epu8 network_sort_perm(epu8 &v, std::array rounds) { * [AoCP3]: "D. Knuth, The art of computer programming vol. 3" */ constexpr std::array sorting_rounds - // clang-format off // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 -{{ - epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, - epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, - epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, - epu8 { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, - epu8 { 0, 2, 1, 12, 8, 10, 9, 11, 4, 6, 5, 7, 3, 14, 13, 15}, - epu8 { 0, 4, 8, 10, 1, 9, 12, 13, 2, 5, 3, 14, 6, 7, 11, 15}, - epu8 { 0, 1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 14, 15}, - epu8 { 0, 1, 2, 6, 4, 8, 3, 10, 5, 12, 7, 11, 9, 13, 14, 15}, - epu8 { 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 13, 14, 15} -}}; -// clang-format on + {{epu8{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, + epu8{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, + epu8{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, + epu8{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, + epu8{0, 2, 1, 12, 8, 10, 9, 11, 4, 6, 5, 7, 3, 14, 13, 15}, + epu8{0, 4, 8, 10, 1, 9, 12, 13, 2, 5, 3, 14, 6, 7, 11, 15}, + epu8{0, 1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 14, 15}, + epu8{0, 1, 2, 6, 4, 8, 3, 10, 5, 12, 7, 11, 9, 13, 14, 15}, + epu8{0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 13, 14, 15}}}; /** A duplicated 8-way sorting network * @details [Batcher odd-Even mergesort] sorting network diff --git a/include/perm16.hpp b/include/perm16.hpp index f4b3d92b..860e61d3 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -40,8 +40,7 @@ struct Transf16; * */ struct alignas(16) PTransf16 : public Vect16 { - - static constexpr size_t size() { return 16; }; + static constexpr size_t size() { return 16; } using vect = HPCombi::Vect16; using array = TPUBuild::array; @@ -111,7 +110,6 @@ struct alignas(16) PTransf16 : public Vect16 { * */ struct Transf16 : public PTransf16 { - Transf16() = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR Transf16(const Transf16 &v) = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR Transf16(const vect v) : PTransf16(v) {} @@ -141,7 +139,6 @@ struct Transf16 : public PTransf16 { * */ struct PPerm16 : public PTransf16 { - PPerm16() = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const PPerm16 &v) = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const vect v) : PTransf16(v) {} @@ -201,7 +198,6 @@ struct PPerm16 : public PTransf16 { * */ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { - Perm16() = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR Perm16(const Perm16 &) = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR Perm16(const vect v) : Transf16(v) {} diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 6e6bde42..49e0d408 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -328,8 +328,8 @@ inline uint8_t Perm16::nb_cycles_ref() const { std::array b{}; uint8_t c = 0; for (size_t i = 0; i < 16; i++) { - if (not b[i]) { - for (size_t j = i; not b[j]; j = v[j]) + if (!b[i]) { + for (size_t j = i; !b[j]; j = v[j]) b[j] = true; c++; } diff --git a/include/perm_generic.hpp b/include/perm_generic.hpp index 6f868de0..d9c0b94f 100644 --- a/include/perm_generic.hpp +++ b/include/perm_generic.hpp @@ -26,14 +26,15 @@ namespace HPCombi { template struct PermGeneric : public VectGeneric<_Size, Expo> { - using vect = VectGeneric<_Size, Expo>; static constexpr size_t size() { return _Size; } PermGeneric() = default; - PermGeneric(const vect v) : vect(v){}; - PermGeneric(std::initializer_list il); + PermGeneric(const vect v) : vect(v) {} // NOLINT + // Not marked explicit because we want to be able to pass non-initializer + // lists here + PermGeneric(std::initializer_list il); // NOLINT PermGeneric operator*(const PermGeneric &p) const { return this->permuted(p); diff --git a/include/perm_generic_impl.hpp b/include/perm_generic_impl.hpp index 4a4c5b6d..19f3ee49 100644 --- a/include/perm_generic_impl.hpp +++ b/include/perm_generic_impl.hpp @@ -86,8 +86,8 @@ uint64_t PermGeneric<_Size, Expo>::nb_cycles() const { std::array b{}; uint64_t c = 0; for (size_t i = 0; i < _Size; i++) { - if (not b[i]) { - for (size_t j = i; not b[j]; j = this->v[j]) + if (!b[i]) { + for (size_t j = i; !b[j]; j = this->v[j]) b[j] = true; c++; } diff --git a/include/power.hpp b/include/power.hpp index e24f67cb..0163a5e1 100644 --- a/include/power.hpp +++ b/include/power.hpp @@ -96,7 +96,6 @@ namespace power_helper { * the template for some specific type \c T. */ template struct Monoid { - /// The one of type T static const T one() { return 1; } diff --git a/include/vect16.hpp b/include/vect16.hpp index 76113ff6..8ee3d53f 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -24,15 +24,14 @@ namespace HPCombi { struct alignas(16) Vect16 { - - static constexpr size_t Size() { return 16; }; + static constexpr size_t Size() { return 16; } using array = TPUBuild::array; epu8 v; Vect16() = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR Vect16(epu8 x) : v(x) {} Vect16(std::initializer_list il, uint8_t def = 0) - : v(Epu8(il, def)){}; + : v(Epu8(il, def)) {} HPCOMBI_CONSTEXPR_CONSTRUCTOR operator epu8() const { return v; } Vect16 &operator=(const Vect16 &) = default; @@ -59,7 +58,7 @@ struct alignas(16) Vect16 { } size_t last_zero(size_t bound = Size()) const { return HPCombi::last_zero(v, bound); - }; + } size_t first_non_zero(size_t bound = Size()) const { return HPCombi::first_non_zero(v, bound); } @@ -80,19 +79,20 @@ struct alignas(16) Vect16 { const_iterator begin() const { return as_array().begin(); } const_iterator end() const { return as_array().end(); } - bool operator==(const Vect16 &b) const { return HPCombi::equal(v, b.v); }; + bool operator==(const Vect16 &b) const { return HPCombi::equal(v, b.v); } + bool operator!=(const Vect16 &b) const { return HPCombi::not_equal(v, b.v); - }; + } - bool operator<(const Vect16 &b) const { return less(v, b.v); }; + bool operator<(const Vect16 &b) const { return less(v, b.v); } int8_t less_partial(const Vect16 &b, int k) const { return HPCombi::less_partial(v, b.v, k); - }; + } Vect16 permuted(const Vect16 &b) const { return HPCombi::permuted(v, b.v); } uint8_t sum() const { return HPCombi::horiz_sum(v); } - Vect16 partial_sums() const { return HPCombi::partial_sums(v); }; - Vect16 eval16() const { return HPCombi::eval16(v); }; + Vect16 partial_sums() const { return HPCombi::partial_sums(v); } + Vect16 eval16() const { return HPCombi::eval16(v); } bool is_permutation() const { return HPCombi::is_permutation(v); } bool is_permutation(size_t k) const { diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index 594a14ea..b3fabe52 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -35,13 +35,12 @@ std::array sorted_vect(std::array v) { /** A generic class for combinatorial integer vectors. */ template struct VectGeneric { - - static constexpr size_t Size() { return _Size; }; + static constexpr size_t Size() { return _Size; } using array = std::array; array v; VectGeneric() = default; - VectGeneric(std::array _v) : v(_v){}; + VectGeneric(const std::array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { assert(il.size() <= _Size); std::copy(il.begin(), il.end(), v.begin()); @@ -90,7 +89,7 @@ template struct VectGeneric { bool operator<(const VectGeneric &u) const { uint64_t diff = first_diff(u); - return (diff != _Size) and v[diff] < u[diff]; + return (diff != _Size) && v[diff] < u[diff]; } int8_t less_partial(const VectGeneric &u, int k) const { @@ -103,7 +102,7 @@ template struct VectGeneric { for (uint64_t i = 0; i < _Size; i++) res[i] = v[u[i]]; return res; - }; + } void sort() { std::sort(v.begin(), v.end()); } From 5ce40c2e1f7151a02f759ce5ca5872df3cd10a8a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 27 Oct 2023 17:17:51 +0100 Subject: [PATCH 025/113] Start using C++17 --- CMakeLists.txt | 14 +---- include/epu.hpp | 101 ++++++++++++++---------------------- include/epu_impl.hpp | 14 ++--- include/fallback/gcdlcm.hpp | 35 ------------- include/perm16.hpp | 2 +- include/perm16_impl.hpp | 30 ++++------- include/vect16.hpp | 2 +- 7 files changed, 60 insertions(+), 138 deletions(-) delete mode 100644 include/fallback/gcdlcm.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 96b8a15e..6531bab5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,7 +36,7 @@ message(STATUS "**** Build type = ${CMAKE_BUILD_TYPE}") ################################ # General compiler configuration set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_EXTENSIONS OFF) # -std=c++14 instead of -std=gnu++14 add_definitions(-DHPCOMBI_HAVE_CONFIG) @@ -52,18 +52,6 @@ include(CheckIncludeFileCXX) include(CheckCXXSourceCompiles) include(CheckCXXCompilerFlag) -## Check for static lcm -check_include_file_cxx("experimental/numeric" HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC) -if (HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC) - check_cxx_source_compiles( - " - #include - static_assert(std::experimental::lcm(4, 6) == 12, \"Buggy lcm\"); - int main() { } - " - HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC_LCM) -endif (HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC) - ## Check for buggy constexpr support G++ 5.0 check_cxx_source_compiles( " diff --git a/include/epu.hpp b/include/epu.hpp index 1a5aa304..43828fb5 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -22,6 +22,7 @@ #include // less<>, equal_to<> #include #include +#include #ifdef HPCOMBI_HAVE_CONFIG #include "HPCombi-config.h" @@ -61,33 +62,26 @@ static_assert(alignof(epu8) == 16, "epu8 type is not properly aligned by the compiler !"); /// SIMD vector of 32 unsigned bytes +/// Currently not really used except in experiments using xpu8 = uint8_t __attribute__((vector_size(32))); -namespace { // Implementation detail code - -/// A handmade C++11 constexpr lambda -template struct ConstFun { - HPCOMBI_CONSTEXPR_CONSTRUCTOR ConstFun(T cc) : cst(cc) {} - HPCOMBI_CONSTEXPR T operator()(T) const { return cst; } - /// constant value for constexpr lambda - T cst; -}; +namespace detail { // Implementation detail code /// Factory object for various SIMD constants in particular constexpr template struct TPUBuild { - using type_elem = - typename std::remove_reference::type; + using type_elem = typename std::remove_reference_t; static constexpr size_t size_elem = sizeof(type_elem); static constexpr size_t size = sizeof(TPU) / size_elem; + using array = std::array; - template - static HPCOMBI_CONSTEXPR TPU make_helper(Fun f, - std::index_sequence) { + template + static constexpr TPU make_helper(Fun f, std::index_sequence) { + static_assert(std::is_invocable_v); return TPU{f(Is)...}; } - inline TPU operator()(const std::initializer_list il, + inline TPU operator()(std::initializer_list il, type_elem def) const { assert(il.size() <= size); array res; @@ -96,86 +90,67 @@ template struct TPUBuild { return reinterpret_cast(res); } - template inline HPCOMBI_CONSTEXPR TPU operator()(Fun f) const { + template inline constexpr TPU operator()(Fun f) const { + static_assert(std::is_invocable_v); return make_helper(f, std::make_index_sequence{}); } - inline HPCOMBI_CONSTEXPR TPU operator()(type_elem c) const { - return make_helper(ConstFun(c), + inline constexpr TPU operator()(type_elem c) const { + return make_helper([c](auto) { return c; }, std::make_index_sequence{}); } // explicit overloading for int constants - inline HPCOMBI_CONSTEXPR TPU operator()(int c) const { - return operator()(uint8_t(c)); + inline constexpr TPU operator()(int c) const { + return operator()(type_elem(c)); } - inline HPCOMBI_CONSTEXPR TPU operator()(size_t c) const { - return operator()(uint8_t(c)); + inline constexpr TPU operator()(size_t c) const { + return operator()(type_elem(c)); } }; -// The following functions should be constexpr lambdas writen directly in -// their corresponding methods. However until C++17, constexpr lambda are -// forbidden. So we put them here. -/// The image of i by the identity function -HPCOMBI_CONSTEXPR uint8_t id_fun(uint8_t i) { return i; } -/// The image of i by the left cycle function -HPCOMBI_CONSTEXPR uint8_t left_cycle_fun(uint8_t i) { return (i + 15) % 16; } -/// The image of i by the right cycle function -HPCOMBI_CONSTEXPR -uint8_t right_cycle_fun(uint8_t i) { return (i + 1) % 16; } -/// The image of i by a left shift duplicating the hole -HPCOMBI_CONSTEXPR -uint8_t left_dup_fun(uint8_t i) { return i == 15 ? 15 : i + 1; } -/// The image of i by a right shift duplicating the hole -HPCOMBI_CONSTEXPR -uint8_t right_dup_fun(uint8_t i) { return i == 0 ? 0 : i - 1; } -/// The complement of i to 15 -HPCOMBI_CONSTEXPR -uint8_t complement_fun(uint8_t i) { return 15 - i; } -HPCOMBI_CONSTEXPR uint8_t popcount4_fun(uint8_t i) { - return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) + - ((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0); -} - -} // Anonymous namespace +} // namespace detail -/// Factory object for various SIMD constants in particular constexpr -TPUBuild Epu8; +// Single instance of the TPUBuild factory object +static constexpr detail::TPUBuild Epu8; -/// The indentity #HPCombi::epu8 -HPCOMBI_CONSTEXPR epu8 epu8id = Epu8(id_fun); +/// The identity #HPCombi::epu8 +/// The image of i by the identity function +constexpr epu8 epu8id = Epu8([](uint8_t i) { return i; }); /// The reverted identity #HPCombi::epu8 -HPCOMBI_CONSTEXPR epu8 epu8rev = Epu8(complement_fun); +constexpr epu8 epu8rev = Epu8([](uint8_t i) { return 15 - i; }); /// Left cycle #HPCombi::epu8 permutation -HPCOMBI_CONSTEXPR epu8 left_cycle = Epu8(left_cycle_fun); +constexpr epu8 left_cycle = Epu8([](uint8_t i) { return (i + 15) % 16; }); /// Right cycle #HPCombi::epu8 permutation -HPCOMBI_CONSTEXPR epu8 right_cycle = Epu8(right_cycle_fun); +constexpr epu8 right_cycle = Epu8([](uint8_t i) { return (i + 1) % 16; }); /// Left shift #HPCombi::epu8, duplicating the rightmost entry -HPCOMBI_CONSTEXPR epu8 left_dup = Epu8(left_dup_fun); +constexpr epu8 left_dup = Epu8([](uint8_t i) { return i == 15 ? 15 : i + 1; }); /// Right shift #HPCombi::epu8, duplicating the leftmost entry -HPCOMBI_CONSTEXPR epu8 right_dup = Epu8(right_dup_fun); +constexpr epu8 right_dup = Epu8([](uint8_t i) { return i == 0 ? 0 : i - 1; }); /// Popcount #HPCombi::epu8: the ith entry contains the number of bits set in i -HPCOMBI_CONSTEXPR epu8 popcount4 = Epu8(popcount4_fun); +constexpr epu8 popcount4 = Epu8([](uint8_t i) { + return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) + + ((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0); +}); /** Cast a #HPCombi::epu8 to a c++ \c std::array * * This is usually faster for algorithm using a lot of indexed acces. */ -inline TPUBuild::array &as_array(epu8 &v) { - return reinterpret_cast::array &>(v); +inline decltype(Epu8)::array &as_array(epu8 &v) { + return reinterpret_cast(v); } /** Cast a constant #HPCombi::epu8 to a C++ \c std::array * * This is usually faster for algorithm using a lot of indexed acces. */ -inline const TPUBuild::array &as_array(const epu8 &v) { - return reinterpret_cast::array &>(v); +inline const decltype(Epu8)::array &as_array(const epu8 &v) { + return reinterpret_cast(v); } /** Cast a C++ \c std::array to a #HPCombi::epu8 */ // Passing the argument by reference triggers a segfault in gcc // Since vector types doesn't belongs to the standard, I didn't manage // to know if I'm using undefined behavior here. -inline epu8 from_array(TPUBuild::array a) { +inline epu8 from_array(decltype(Epu8)::array a) { return reinterpret_cast(a); } @@ -285,7 +260,7 @@ inline epu8 permutation_of_ref(epu8 a, epu8 b); inline epu8 permutation_of(epu8 a, epu8 b); /** A prime number good for hashing */ -const uint64_t prime = 0x9e3779b97f4a7bb9; +constexpr uint64_t prime = 0x9e3779b97f4a7bb9; /** A random #HPCombi::epu8 * @details diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 1d809c50..ab446c22 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -13,7 +13,7 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -// This is the implementation par of epu.hpp this should be seen as +// This is the implementation part of epu.hpp this should be seen as // implementation details and should not be included directly. #include @@ -113,11 +113,11 @@ inline uint64_t last_non_zero(epu8 v, int bnd) { } /// Apply a sorting network -template +template inline epu8 network_sort(epu8 res, std::array rounds) { for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increassing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < epu8id : epu8id < round; epu8 b = permuted(res, round); // res = mask ? min(res,b) : max(res,b); is not accepted by clang res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask); @@ -126,12 +126,12 @@ inline epu8 network_sort(epu8 res, std::array rounds) { } /// Apply a sorting network in place and return the permutation -template +template inline epu8 network_sort_perm(epu8 &v, std::array rounds) { epu8 res = epu8id; for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increassing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < epu8id : epu8id < round; epu8 b = permuted(v, round); epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask); v = simde_mm_blendv_epi8(v, b, cmp); @@ -229,6 +229,7 @@ constexpr std::array inverting_rounds{{ SIMDE_SIDD_NEGATIVE_POLARITY) #define FIND_IN_VECT_COMPL \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) + inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { epu8 res = -static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); for (epu8 round : inverting_rounds) { @@ -391,8 +392,9 @@ inline epu8 eval16_ref(epu8 v) { res[v[i]]++; return res; } + inline epu8 eval16_arr(epu8 v8) { - TPUBuild::array res{}; + decltype(Epu8)::array res{}; auto v = as_array(v8); for (size_t i = 0; i < 16; i++) if (v[i] < 16) diff --git a/include/fallback/gcdlcm.hpp b/include/fallback/gcdlcm.hpp deleted file mode 100644 index 0009328e..00000000 --- a/include/fallback/gcdlcm.hpp +++ /dev/null @@ -1,35 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2017 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -#ifndef HPCOMBI_FALLBACK_GCDLCM_HPP_INCLUDED -#define HPCOMBI_FALLBACK_GCDLCM_HPP_INCLUDED - - -// FallBack gcd and lcm in case experimental/numeric is not present - -namespace std { -namespace experimental { - -constexpr unsigned gcd( unsigned m, unsigned n ) -{ return n == 0 ? m : gcd(n, m % n); } - -constexpr unsigned lcm( unsigned m, unsigned n ) -{ return m == 0 or n == 0 ? 0 : (m / gcd(m,n)) * n; } - -} -} - -#endif // HPCOMBI_FALLBACK_GCDLCM_HPP_INCLUDED - diff --git a/include/perm16.hpp b/include/perm16.hpp index 860e61d3..c3f83a81 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -43,7 +43,7 @@ struct alignas(16) PTransf16 : public Vect16 { static constexpr size_t size() { return 16; } using vect = HPCombi::Vect16; - using array = TPUBuild::array; + using array = decltype(Epu8)::array; PTransf16() = default; HPCOMBI_CONSTEXPR_CONSTRUCTOR PTransf16(const PTransf16 &v) = default; diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 49e0d408..794ae329 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -13,16 +13,12 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "power.hpp" #include #include +#include #include -#ifdef HAVE_EXPERIMENTAL_NUMERIC_LCM -#include // lcm until c++17 -#else -#include "fallback/gcdlcm.hpp" // lcm until c++17 -#endif // HAVE_EXPERIMENTAL_NUMERIC_LCM +#include "power.hpp" namespace HPCombi { @@ -71,8 +67,8 @@ inline PTransf16 PTransf16::left_one() const { return image_mask(true) | epu8id; } inline uint32_t PTransf16::rank_ref() const { - TPUBuild::array tmp{}; - static_assert(TPUBuild::size == 16, "Wrong size of EPU8 array"); + decltype(Epu8)::array tmp{}; + static_assert(decltype(Epu8)::size == 16, "Wrong size of EPU8 array"); for (size_t i = 0; i < 16; i++) { if (v[i] != 0xFF) tmp[v[i]] = 1; @@ -249,15 +245,11 @@ inline Perm16 Perm16::inverse_cycl() const { return res; } -static constexpr unsigned lcm_range(unsigned n) { -#if __cplusplus <= 201103L - return n == 1 ? 1 : std::experimental::lcm(lcm_range(n - 1), n); -#else - unsigned res = 1; - for (unsigned i = 1; i <= n; ++i) - res = std::experimental::lcm(res, i); +static constexpr uint32_t lcm_range(uint8_t n) { + uint32_t res = 1; + for (uint8_t i = 1; i <= n; ++i) + res = std::lcm(res, i); return res; -#endif } inline Perm16 Perm16::inverse_pow() const { @@ -274,8 +266,8 @@ inline epu8 Perm16::lehmer_ref() const { } inline epu8 Perm16::lehmer_arr() const { - TPUBuild::array res{}; - TPUBuild::array ar = as_array(); + decltype(Epu8)::array res{}; + decltype(Epu8)::array ar = as_array(); for (size_t i = 0; i < 16; i++) for (size_t j = i + 1; j < 16; j++) if (ar[i] > ar[j]) @@ -303,7 +295,7 @@ inline uint8_t Perm16::length_ref() const { inline uint8_t Perm16::length_arr() const { uint8_t res = 0; - TPUBuild::array ar = as_array(); + decltype(Epu8)::array ar = as_array(); for (size_t i = 0; i < 16; i++) for (size_t j = i + 1; j < 16; j++) if (ar[i] > ar[j]) diff --git a/include/vect16.hpp b/include/vect16.hpp index 8ee3d53f..8a5a33ff 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -25,7 +25,7 @@ namespace HPCombi { struct alignas(16) Vect16 { static constexpr size_t Size() { return 16; } - using array = TPUBuild::array; + using array = decltype(Epu8)::array; epu8 v; Vect16() = default; From b4f1dda3890c3e03f7861d8c6c29b072328bd095 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 18:03:50 +0100 Subject: [PATCH 026/113] Wrote a catch matcher for equality of epu8 --- tests/test_main.hpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 6328ba80..59f98d60 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -16,6 +16,12 @@ #ifndef HPCOMBI_TESTS_TEST_MAIN_HPP_ #define HPCOMBI_TESTS_TEST_MAIN_HPP_ +#include + +#include "epu.hpp" +#include +#include + #define TEST_AGREES(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (type p : vct) { \ @@ -39,4 +45,19 @@ } \ } + +struct Equals : Catch::Matchers::MatcherGenericBase { + Equals(HPCombi::epu8 v) : v(v) {} + + bool match(HPCombi::epu8 w) const { return HPCombi::equal(v, w); } + + std::string describe() const override { + return "\n!=\n" + std::to_string(v); + } + +private: + + const HPCombi::epu8 v; +}; + #endif // HPCOMBI_TESTS_TEST_MAIN_HPP_ From a9151f3cd052b5609a5671df8b33ae70b1ab2ec8 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 18:06:02 +0100 Subject: [PATCH 027/113] Change printing of vectors from [] to {} --- include/epu.hpp | 3 +++ include/epu_impl.hpp | 10 ++++++++-- tests/test_perm_all.cpp | 12 ++++++------ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index 1c7420ee..3c1e069a 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -17,6 +17,7 @@ #define HPCOMBI_EPU_HPP_INCLUDED #include +#include #include #include #include // less<>, equal_to<> @@ -727,6 +728,8 @@ namespace std { inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a); +inline std::string to_string(HPCombi::epu8 const &a); + /** We also specialize the struct * - std::equal_to * - std::not_equal_to diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index c3eb80c7..8dda8b86 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -481,13 +481,19 @@ inline bool is_permutation(epu8 v, const size_t k) { namespace std { inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a) { - stream << "[" << std::setw(2) << unsigned(a[0]); + stream << "{" << std::setw(2) << unsigned(a[0]); for (unsigned i = 1; i < 16; ++i) stream << "," << std::setw(2) << unsigned(a[i]); - stream << "]"; + stream << "}"; return stream; } +inline std::string to_string(HPCombi::epu8 const &a) { + std::ostringstream ss; + ss << a; + return ss.str(); +} + template <> struct equal_to { bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const { return HPCombi::equal(lhs, rhs); diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index 9895e201..33c210d6 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -260,28 +260,28 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", PermTypes) { std::ostringstream out, out2; out << Fixture1::zero; - out2 << "[ 0"; + out2 << "{ 0"; for (size_t i = 1; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; - out2 << "]"; + out2 << "}"; REQUIRE(out.str() == out2.str()); out.str(""); out2.str(""); out << Fixture1::V01; - out2 << "[ 0, 1"; + out2 << "{ 0, 1"; for (size_t i = 2; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; - out2 << "]"; + out2 << "}"; REQUIRE(out.str() == out2.str()); out.str(""); out2.str(""); out << Fixture1::PPa; - out2 << "[ 1, 2, 3, 4, 0"; + out2 << "{ 1, 2, 3, 4, 0"; for (size_t i = 5; i < Fixture1::VectType::Size(); i++) out2 << "," << std::setw(2) << i; - out2 << "]"; + out2 << "}"; REQUIRE(out.str() == out2.str()); } From a60d5e1a9856d4b0bce18fff3e576e4b5e4bc119 Mon Sep 17 00:00:00 2001 From: reiniscirpons Date: Fri, 27 Oct 2023 16:36:45 +0100 Subject: [PATCH 028/113] Update circleci job --- .circleci/config.yml | 90 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2c4e58d7..d0fa20f8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -4,29 +4,103 @@ version: 2.1 jobs: test: + parameters: + compiler: + type: string + version: + type: string docker: - image: reiniscirpons/hpcombi-env-arm64v8:v1 resource_class: arm.medium steps: - - checkout + - run: + name: "Set up compiler" + environment: + COMPILER_NAME: << parameters.compiler >> + COMPILER_VERSION: << parameters.version >> + command: | + apt-get --yes update + mkdir -p workspace + if [ $COMPILER_NAME = "gcc" ]; then + apt-get install --yes gcc-$COMPILER_VERSION + apt-get install --yes g++-$COMPILER_VERSION + echo "export CC=gcc-$COMPILER_VERSION" >> workspace/new-env-vars + echo "export CXX=g++-$COMPILER_VERSION" >> workspace/new-env-vars + else + apt-get install --yes clang++-$COMPILER_VERSION + echo "export CC=clang-$COMPILER_VERSION" >> workspace/new-env-vars + echo "export CXX=clang++-$COMPILER_VERSION" >> workspace/new-env-vars + fi + - run: + name: Check compiler version + command: | + cat workspace/new-env-vars >> $BASH_ENV + source $BASH_ENV + echo "CC" + echo $CC + echo "CXX" + echo $CXX + - checkout: + path: "./HPCombi" - run: name: Run cmake command: | - mkdir build - cd build - cmake -DBUILD_TESTING=1 .. + cat workspace/new-env-vars >> $BASH_ENV + source $BASH_ENV + mkdir -p ./HPCombi/build + cd ./HPCombi/build + cmake -DBUILD_TESTING=1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX .. - run: name: Run make in tests folder command: | - cd build/tests - make + cat workspace/new-env-vars >> $BASH_ENV + source $BASH_ENV + cd ./HPCombi/build/tests + make -j2 - run: name: Run tests command: | - cd build/tests + cat workspace/new-env-vars >> $BASH_ENV + source $BASH_ENV + cd ./HPCombi/build/tests ./test_all workflows: test: jobs: - - test + - test: + name: "test-gcc-9" + compiler: "gcc" + version: "9" + # - test: + # name: "test-gcc-10" + # compiler: "gcc" + # version: "10" + # - test: + # name: "test-gcc-11" + # compiler: "gcc" + # version: "11" + - test: + name: "test-gcc-12" + compiler: "gcc" + version: "12" + - test: + name: "test-clang-11" + compiler: "clang" + version: "11" + # - test: + # name: "test-clang-12" + # compiler: "clang" + # version: "12" + # - test: + # name: "test-clang-13" + # compiler: "clang" + # version: "13" + # - test: + # name: "test-clang-14" + # compiler: "clang" + # version: "14" + - test: + name: "test-clang-15" + compiler: "clang" + version: "15" From 9fa9d74a3a9243edb62112462de1cd5a6ed64d3f Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 18:44:33 +0100 Subject: [PATCH 029/113] Ref implem of image_mask --- include/perm16.hpp | 11 +++++- include/perm16_impl.hpp | 18 ++++++---- include/vect_generic.hpp | 4 +-- tests/test_epu.cpp | 12 +++---- tests/test_perm16.cpp | 75 ++++++++++++++++++++++++++-------------- 5 files changed, 79 insertions(+), 41 deletions(-) diff --git a/include/perm16.hpp b/include/perm16.hpp index f4b3d92b..da65c158 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -73,7 +73,16 @@ struct alignas(16) PTransf16 : public Vect16 { } /** Returns a mask for the image of \c *this */ - epu8 image_mask(bool complement = false) const; + epu8 image_mask_cmpestrm(bool complement = false) const; + /** Returns a mask for the image of \c *this */ + epu8 image_mask_ref(bool complement = false) const; + epu8 image_mask(bool complement = false) const { +#ifdef SIMDE_X86_SSE4_2_NATIVE + return image_mask_cmpestrm(complement); +#else + return image_mask_ref(complement); +#endif + } /** Returns a bit mask for the image of \c *this */ uint32_t image_bitset(bool complement = false) const; /** Returns a mask for the domain of \c *this */ diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 6e6bde42..44ccf42b 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -57,13 +57,20 @@ inline PTransf16 PTransf16::right_one() const { return domain_mask(true) | epu8id; } -inline epu8 PTransf16::image_mask(bool complement) const { #ifdef SIMDE_X86_SSE4_2_NATIVE +inline epu8 PTransf16::image_mask_cmpestrm(bool complement) const { return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT) : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL); -#else +} #endif +inline epu8 PTransf16::image_mask_ref(bool complement) const { + epu8 res{}; + for (auto x : *this) + if (x != 0xFF) + res[x] = 0xFF; + return complement ? static_cast(!res) : res; } + inline uint32_t PTransf16::image_bitset(bool complement) const { return simde_mm_movemask_epi8(image_mask(complement)); } @@ -73,10 +80,9 @@ inline PTransf16 PTransf16::left_one() const { inline uint32_t PTransf16::rank_ref() const { TPUBuild::array tmp{}; static_assert(TPUBuild::size == 16, "Wrong size of EPU8 array"); - for (size_t i = 0; i < 16; i++) { - if (v[i] != 0xFF) - tmp[v[i]] = 1; - } + for (auto x : *this) + if (x != 0xFF) + tmp[x] = 1; return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0)); } inline uint32_t PTransf16::rank() const { diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index 594a14ea..6c98f281 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -222,10 +222,10 @@ namespace std { template std::ostream &operator<<(std::ostream &stream, const HPCombi::VectGeneric<_Size, Expo> &v) { - stream << "[" << std::setw(2) << unsigned(v[0]); + stream << "{" << std::setw(2) << unsigned(v[0]); for (unsigned i = 1; i < _Size; ++i) stream << "," << std::setw(2) << unsigned(v[i]); - stream << "]"; + stream << "}"; return stream; } diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 7f8fe36d..fe80eb77 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -194,18 +194,18 @@ TEST_CASE_METHOD(Fix, "Epu8::less", "[Epu8][010]") { } TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { - REQUIRE(equal( + REQUIRE_THAT( permuted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - epu8{2, 3, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(equal( + Equals(epu8{2, 3, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE_THAT( permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - epu8{3, 2, 1, 5, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(equal( + Equals(epu8{3, 2, 1, 5, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); + REQUIRE_THAT( permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), - epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); + Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index d75e3146..8d3278e6 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -92,32 +92,55 @@ TEST_CASE("PTransf16::hash", "[PTransf16][001]") { REQUIRE(std::hash()(PTransf16({4, 5, 0}, {9, 0, 1})) != 0); } -// TODO uncomment -// TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { -// REQUIRE(equal(PTransf16({}).image_mask(), Epu8(FF)); -// REQUIRE(equal(PTransf16({}).image_mask(false), Epu8(FF)); -// REQUIRE(equal(PTransf16({}).image_mask(true), Epu8(0)); -// REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(), Epu8({0, 0, 0, 0}, -// FF)); REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(false), -// Epu8({0, 0, 0, 0}, FF)); -// REQUIRE(equal(PTransf16({4, 4, 4, 4}).image_mask(true), -// Epu8({FF, FF, FF, FF}, 0)); -// REQUIRE(equal(PTransf16(Epu8(1)).image_mask(), Epu8({0, FF}, 0)); -// REQUIRE(equal(PTransf16(Epu8(2)).image_mask(), Epu8({0, 0, FF}, 0)); -// REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), -// Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0)); -// REQUIRE(equal( -// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(), -// Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0)); -// REQUIRE(equal( -// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, -// 2)).image_mask(false), Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, -// 0, 0, 0, FF}, 0)); -// REQUIRE(equal( -// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, -// 2)).image_mask(true), Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, -// FF, FF, FF, FF, 0}, 0)); -// } +TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { + REQUIRE_THAT(PTransf16({}).image_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).image_mask(false), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).image_mask(true), Equals(Epu8(0))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(false), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(true), + Equals(Epu8({FF, FF, FF, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(1)).image_mask(), Equals(Epu8({0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(2)).image_mask(), Equals(Epu8({0, 0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), + Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(), + Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(false), + Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(true), + Equals(Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); +} + +TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { + REQUIRE_THAT(PTransf16({}).image_mask_ref(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).image_mask_ref(false), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).image_mask_ref(true), Equals(Epu8(0))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(false), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(true), + Equals(Epu8({FF, FF, FF, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(1)).image_mask_ref(), Equals(Epu8({0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(2)).image_mask_ref(), Equals(Epu8({0, 0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask_ref(), + Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(), + Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(false), + Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(true), + Equals(Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); +} // TODO uncomment // TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { From d59bf8f220bb51a8f1a24eede2088fbd9cda7ea4 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 21:49:38 +0100 Subject: [PATCH 030/113] REQUIRE_THAT(..., Equals(...)) everywhere --- tests/test_epu.cpp | 334 +++++++++++++++++++++--------------------- tests/test_main.hpp | 2 +- tests/test_perm16.cpp | 130 ++++++++-------- 3 files changed, 237 insertions(+), 229 deletions(-) diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index fe80eb77..e53e4f51 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -209,24 +209,24 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { } TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { - REQUIRE(equal(shifted_left(P01), P10)); - REQUIRE(equal(shifted_left(P112), - epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); - REQUIRE(equal(shifted_left(Pv), - epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15, 0})); + REQUIRE_THAT(shifted_left(P01), Equals(P10)); + REQUIRE_THAT(shifted_left(P112), + Equals(epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); + REQUIRE_THAT(shifted_left(Pv), Equals(epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, + 11, 12, 13, 14, 15, 0})); } TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][013]") { - REQUIRE(equal(shifted_right(P10), P01)); - REQUIRE(equal(shifted_right(P112), Epu8({0, 1, 1}, 2))); - REQUIRE(equal(shifted_right(Pv), - epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14})); + REQUIRE_THAT(shifted_right(P10), Equals(P01)); + REQUIRE_THAT(shifted_right(P112), Equals(Epu8({0, 1, 1}, 2))); + REQUIRE_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, + 3, 2, 11, 12, 13, 14})); } TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][014]") { - REQUIRE(equal(reverted(epu8id), epu8rev)); + REQUIRE_THAT(reverted(epu8id), Equals(epu8rev)); for (auto x : v) { - REQUIRE(equal(x, reverted(reverted(x)))); + REQUIRE_THAT(x, Equals(reverted(reverted(x)))); } } @@ -234,17 +234,17 @@ TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][015]") { epu8 x = Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1); auto &refx = as_array(x); refx[2] = 42; - REQUIRE(equal(x, Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1))); + REQUIRE_THAT(x, Equals(Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1))); std::fill(refx.begin() + 4, refx.end(), 3); - REQUIRE(equal(x, Epu8({4, 2, 42, 1}, 3))); + REQUIRE_THAT(x, Equals(Epu8({4, 2, 42, 1}, 3))); REQUIRE(av == as_array(Pv)); } TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][016]") { for (auto x : v) { - REQUIRE(equal(x, from_array(as_array(x)))); + REQUIRE_THAT(x, Equals(from_array(as_array(x)))); } - REQUIRE(equal(Pv, from_array(av))); + REQUIRE_THAT(Pv, Equals(from_array(av))); } TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { @@ -278,9 +278,9 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { } TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { - REQUIRE(equal( + REQUIRE_THAT( sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - epu8id)); + Equals(epu8id)); for (auto &x : v) { REQUIRE(is_sorted(sorted(x))); } @@ -293,9 +293,9 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { } TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { - REQUIRE(equal( + REQUIRE_THAT( revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - epu8rev)); + Equals(epu8rev)); for (auto &x : v) { REQUIRE(is_sorted(reverted(revsorted(x)))); } @@ -309,24 +309,26 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE(equal(sort_perm(ve), - epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); - REQUIRE(equal(ve, epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); + REQUIRE_THAT(sort_perm(ve), + Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); + REQUIRE_THAT(ve, Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); REQUIRE(is_sorted(xsort)); REQUIRE(is_permutation(psort)); - REQUIRE(equal(permuted(x, psort), xsort)); + REQUIRE_THAT(permuted(x, psort), Equals(xsort)); } } TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE(equal(sort8_perm(ve), - epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); - REQUIRE(equal(ve, epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); + REQUIRE_THAT( + sort8_perm(ve), + Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); + REQUIRE_THAT(ve, + Equals(epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); for (auto x : v) { epu8 xsort = x; @@ -334,57 +336,59 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { REQUIRE(is_sorted(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); REQUIRE(is_sorted(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); REQUIRE(is_permutation(psort)); - REQUIRE(equal(permuted(x, psort), xsort)); + REQUIRE_THAT(permuted(x, psort), Equals(xsort)); } } TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { - REQUIRE(equal(permutation_of(epu8id, epu8id), epu8id)); - REQUIRE(equal(permutation_of(Pa, Pa), epu8id)); - REQUIRE(equal(permutation_of(epu8rev, epu8id), epu8rev)); - REQUIRE(equal(permutation_of(epu8id, epu8rev), epu8rev)); - REQUIRE(equal(permutation_of(epu8rev, epu8rev), epu8id)); - REQUIRE(equal(permutation_of(epu8id, RP), RP)); + REQUIRE_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); + REQUIRE_THAT(permutation_of(Pa, Pa), Equals(epu8id)); + REQUIRE_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); + REQUIRE_THAT(permutation_of(epu8id, epu8rev), Equals(epu8rev)); + REQUIRE_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); + REQUIRE_THAT(permutation_of(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE( - equal((permutation_of(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE_THAT( + (permutation_of(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { - REQUIRE(equal(permutation_of_ref(epu8id, epu8id), epu8id)); - REQUIRE(equal(permutation_of_ref(Pa, Pa), epu8id)); - REQUIRE(equal(permutation_of_ref(epu8rev, epu8id), epu8rev)); - REQUIRE(equal(permutation_of_ref(epu8id, epu8rev), epu8rev)); - REQUIRE(equal(permutation_of_ref(epu8rev, epu8rev), epu8id)); - REQUIRE(equal(permutation_of_ref(epu8id, RP), RP)); + REQUIRE_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); + REQUIRE_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); + REQUIRE_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); + REQUIRE_THAT(permutation_of_ref(epu8id, epu8rev), Equals(epu8rev)); + REQUIRE_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); + REQUIRE_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE( - equal((permutation_of_ref(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE_THAT( + (permutation_of_ref(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { - REQUIRE(equal(remove_dups(P1), P10)); - REQUIRE(equal(remove_dups(P11), P10)); - REQUIRE(equal(remove_dups(sorted(P10)), - epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); - REQUIRE(equal(remove_dups(sorted(Pv)), - epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, 6, 11, 12, 0, 13, 14, 15})); - REQUIRE(equal(remove_dups(P1, 1), P1)); - REQUIRE(equal(remove_dups(P11, 1), Epu8({1, 1, 0}, 1))); - REQUIRE(equal(remove_dups(P11, 42), Epu8({1, 42, 0}, 42))); - REQUIRE(equal(remove_dups(sorted(P10), 1), P1)); - REQUIRE(equal(remove_dups(sorted(Pv), 7), - epu8{7, 1, 2, 7, 3, 4, 5, 7, 7, 6, 11, 12, 7, 13, 14, 15})); + REQUIRE_THAT(remove_dups(P1), Equals(P10)); + REQUIRE_THAT(remove_dups(P11), Equals(P10)); + REQUIRE_THAT(remove_dups(sorted(P10)), + Equals(epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); + REQUIRE_THAT( + remove_dups(sorted(Pv)), + Equals(epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, 6, 11, 12, 0, 13, 14, 15})); + REQUIRE_THAT(remove_dups(P1, 1), Equals(P1)); + REQUIRE_THAT(remove_dups(P11, 1), Equals(Epu8({1, 1, 0}, 1))); + REQUIRE_THAT(remove_dups(P11, 42), Equals(Epu8({1, 42, 0}, 42))); + REQUIRE_THAT(remove_dups(sorted(P10), 1), Equals(P1)); + REQUIRE_THAT( + remove_dups(sorted(Pv), 7), + Equals(epu8{7, 1, 2, 7, 3, 4, 5, 7, 7, 6, 11, 12, 7, 13, 14, 15})); for (auto x : v) { x = sorted(remove_dups(sorted(x))); - REQUIRE(equal(x, sorted(remove_dups(x)))); + REQUIRE_THAT(x, Equals(sorted(remove_dups(x)))); } for (auto x : v) { x = sorted(remove_dups(sorted(x), 42)); - REQUIRE(equal(x, sorted(remove_dups(x, 42)))); + REQUIRE_THAT(x, Equals(sorted(remove_dups(x, 42)))); } } @@ -413,48 +417,53 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { // TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { - REQUIRE(equal(partial_sums_ref(zero), zero)); - REQUIRE(equal(partial_sums_ref(P01), Epu8({0}, 1))); - REQUIRE( - equal(partial_sums_ref(epu8id), epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, - 55, 66, 78, 91, 105, 120})); - REQUIRE(equal(partial_sums_ref(P10), P1)); - REQUIRE(equal(partial_sums_ref(P11), Epu8({1}, 2))); - REQUIRE(equal(partial_sums_ref(P1), epu8id + Epu8({}, 1))); - REQUIRE(equal(partial_sums_ref(P112), epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, - 18, 20, 22, 24, 26, 28, 30})); - REQUIRE(equal(partial_sums_ref(Pa1), epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, - 37, 38, 39, 40, 41, 42, 43})); - - REQUIRE(equal(partial_sums_ref(Pa2), epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, - 39, 40, 41, 42, 43, 44, 45})); - REQUIRE(equal(partial_sums_ref(P51), epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, - 54, 60, 66, 72, 78, 84, 90})); - REQUIRE(equal(partial_sums_ref(Pv), epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, - 43, 45, 56, 68, 81, 95, 110})); - REQUIRE(equal(partial_sums_ref(P5), epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, - 50, 55, 60, 65, 70, 75, 80})); - REQUIRE(equal(partial_sums_ref(epu8rev), - epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, 117, - 119, 120, 120})); - REQUIRE( - equal(partial_sums_ref(Pc), epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, - 161, 168, 175, 182, 189, 196, 203})); + REQUIRE_THAT(partial_sums_ref(zero), Equals(zero)); + REQUIRE_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); + REQUIRE_THAT(partial_sums_ref(epu8id), + Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, + 105, 120})); + REQUIRE_THAT(partial_sums_ref(P10), Equals(P1)); + REQUIRE_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); + REQUIRE_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); + REQUIRE_THAT(partial_sums_ref(P112), + Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, + 28, 30})); + REQUIRE_THAT(partial_sums_ref(Pa1), + Equals(epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, 37, 38, 39, 40, + 41, 42, 43})); + + REQUIRE_THAT(partial_sums_ref(Pa2), + Equals(epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, 39, 40, 41, 42, + 43, 44, 45})); + REQUIRE_THAT(partial_sums_ref(P51), + Equals(epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, + 78, 84, 90})); + REQUIRE_THAT(partial_sums_ref(Pv), + Equals(epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, 43, 45, 56, 68, + 81, 95, 110})); + REQUIRE_THAT(partial_sums_ref(P5), + Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, + 70, 75, 80})); + REQUIRE_THAT(partial_sums_ref(epu8rev), + Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, + 117, 119, 120, 120})); + REQUIRE_THAT(partial_sums_ref(Pc), + Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, + 175, 182, 189, 196, 203})); } - TEST_CASE_METHOD(Fix, "Epu8::partial_sum_gen", "[Epu8][030]") { for (auto x : v) { - REQUIRE(equal(partial_sums_gen(x), partial_sums_ref(x))); + REQUIRE_THAT(partial_sums_gen(x), Equals(partial_sums_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_sum_round", "[Epu8][031]") { for (auto x : v) { - REQUIRE(equal(partial_sums_round(x), partial_sums_ref(x))); + REQUIRE_THAT(partial_sums_round(x), Equals(partial_sums_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_sum", "[Epu8][032]") { for (auto x : v) { - REQUIRE(equal(partial_sums(x), partial_sums_ref(x))); + REQUIRE_THAT(partial_sums(x), Equals(partial_sums_ref(x))); } } @@ -482,38 +491,39 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { // TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { - REQUIRE(equal(partial_max_ref(zero), zero)); - REQUIRE(equal(partial_max_ref(P01), Epu8({0}, 1))); - REQUIRE(equal(partial_max_ref(epu8id), epu8id)); - REQUIRE(equal(partial_max_ref(P10), P1)); - REQUIRE(equal(partial_max_ref(P11), P1)); - REQUIRE(equal(partial_max_ref(P1), P1)); - REQUIRE(equal(partial_max_ref(P112), P112)); - REQUIRE(equal(partial_max_ref(Pa1), Epu8({4, 4, 5, 5, 5}, 7))); - REQUIRE(equal(partial_max_ref(Pa2), Epu8({4, 4, 5, 5, 5}, 9))); - REQUIRE(equal(partial_max_ref(P51), Epu8({5, 5}, 6))); - REQUIRE(equal(partial_max_ref(Pv), epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, - 12, 12, 12, 13, 14, 15})); - REQUIRE(equal(partial_max_ref(P5), P5)); - REQUIRE(equal(partial_max_ref(epu8rev), Epu8({}, 15))); - REQUIRE(equal(partial_max_ref(Pc), Epu8({23, 23, 23, 23}, 43))); + REQUIRE_THAT(partial_max_ref(zero), Equals(zero)); + REQUIRE_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); + REQUIRE_THAT(partial_max_ref(epu8id), Equals(epu8id)); + REQUIRE_THAT(partial_max_ref(P10), Equals(P1)); + REQUIRE_THAT(partial_max_ref(P11), Equals(P1)); + REQUIRE_THAT(partial_max_ref(P1), Equals(P1)); + REQUIRE_THAT(partial_max_ref(P112), Equals(P112)); + REQUIRE_THAT(partial_max_ref(Pa1), Equals(Epu8({4, 4, 5, 5, 5}, 7))); + REQUIRE_THAT(partial_max_ref(Pa2), Equals(Epu8({4, 4, 5, 5, 5}, 9))); + REQUIRE_THAT(partial_max_ref(P51), Equals(Epu8({5, 5}, 6))); + REQUIRE_THAT( + partial_max_ref(Pv), + Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15})); + REQUIRE_THAT(partial_max_ref(P5), Equals(P5)); + REQUIRE_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); + REQUIRE_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } TEST_CASE_METHOD(Fix, "Epu8::partial_max_gen", "[Epu8][039]") { for (auto x : v) { - REQUIRE(equal(partial_max_gen(x), partial_max_ref(x))); + REQUIRE_THAT(partial_max_gen(x), Equals(partial_max_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_max_round", "[Epu8][040]") { for (auto x : v) { - REQUIRE(equal(partial_max_round(x), partial_max_ref(x))); + REQUIRE_THAT(partial_max_round(x), Equals(partial_max_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_max", "[Epu8][041]") { for (auto x : v) { - REQUIRE(equal(partial_max(x), partial_max_ref(x))); + REQUIRE_THAT(partial_max(x), Equals(partial_max_ref(x))); } } @@ -540,67 +550,59 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { // TEST_AGREES(horiz_min_ref, horiz_min) TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { - REQUIRE(equal(partial_min_ref(zero), zero)); - REQUIRE(equal(partial_min_ref(P01), zero)); - REQUIRE(equal(partial_min_ref(epu8id), zero)); - REQUIRE(equal(partial_min_ref(P10), P10)); - REQUIRE(equal(partial_min_ref(P11), P11)); - REQUIRE(equal(partial_min_ref(P1), P1)); - REQUIRE(equal(partial_min_ref(P112), P1)); - REQUIRE(equal(partial_min_ref(Pa1), Epu8({4, 2, 2}, 1))); - REQUIRE(equal(partial_min_ref(Pa2), Epu8({4, 2, 2}, 1))); - REQUIRE(equal(partial_min_ref(P51), Epu8({5}, 1))); - REQUIRE(equal(partial_min_ref(Pv), Epu8( - { - 5, - 5, - 2, - 2, - 1, - 1, - 1, - 1, - }, - 0))); - REQUIRE(equal(partial_min_ref(P5), P5)); - REQUIRE(equal(partial_min_ref(epu8rev), epu8rev)); - REQUIRE(equal(partial_min_ref(Pc), Epu8({23}, 5))); + REQUIRE_THAT(partial_min_ref(zero), Equals(zero)); + REQUIRE_THAT(partial_min_ref(P01), Equals(zero)); + REQUIRE_THAT(partial_min_ref(epu8id), Equals(zero)); + REQUIRE_THAT(partial_min_ref(P10), Equals(P10)); + REQUIRE_THAT(partial_min_ref(P11), Equals(P11)); + REQUIRE_THAT(partial_min_ref(P1), Equals(P1)); + REQUIRE_THAT(partial_min_ref(P112), Equals(P1)); + REQUIRE_THAT(partial_min_ref(Pa1), Equals(Epu8({4, 2, 2}, 1))); + REQUIRE_THAT(partial_min_ref(Pa2), Equals(Epu8({4, 2, 2}, 1))); + REQUIRE_THAT(partial_min_ref(P51), Equals(Epu8({5}, 1))); + REQUIRE_THAT( + partial_min_ref(Pv), + Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); + REQUIRE_THAT(partial_min_ref(P5), Equals(P5)); + REQUIRE_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); + REQUIRE_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } TEST_CASE_METHOD(Fix, "Epu8::partial_min_gen", "[Epu8][044]") { for (auto x : v) { - REQUIRE(equal(partial_min_gen(x), partial_min_ref(x))); + REQUIRE_THAT(partial_min_gen(x), Equals(partial_min_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_min_round", "[Epu8][045]") { for (auto x : v) { - REQUIRE(equal(partial_min_round(x), partial_min_ref(x))); + REQUIRE_THAT(partial_min_round(x), Equals(partial_min_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::partial_min", "[Epu8][046]") { for (auto x : v) { - REQUIRE(equal(partial_min(x), partial_min_ref(x))); + REQUIRE_THAT(partial_min(x), Equals(partial_min_ref(x))); } } TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { - REQUIRE(equal(eval16_ref(zero), Epu8({16}, 0))); - REQUIRE(equal(eval16_ref(P01), Epu8({15, 1}, 0))); - REQUIRE(equal(eval16_ref(epu8id), Epu8({}, 1))); - REQUIRE(equal(eval16_ref(P10), Epu8({15, 1}, 0))); - REQUIRE(equal(eval16_ref(P11), Epu8({14, 2}, 0))); - REQUIRE(equal(eval16_ref(P1), Epu8({0, 16}, 0))); - REQUIRE(equal(eval16_ref(P112), Epu8({0, 2, 14}, 0))); - REQUIRE(equal(eval16_ref(Pa1), Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); - REQUIRE(equal(eval16_ref(Pa2), Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); - REQUIRE(equal(eval16_ref(P51), Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); - REQUIRE(equal(eval16_ref(Pv), - epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); - REQUIRE(equal(eval16_ref(P5), Epu8({0, 0, 0, 0, 0, 16}, 0))); - REQUIRE(equal(eval16_ref(epu8rev), Epu8({}, 1))); - REQUIRE(equal(eval16_ref(Pc), Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); + REQUIRE_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); + REQUIRE_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); + REQUIRE_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); + REQUIRE_THAT(eval16_ref(P10), Equals(Epu8({15, 1}, 0))); + REQUIRE_THAT(eval16_ref(P11), Equals(Epu8({14, 2}, 0))); + REQUIRE_THAT(eval16_ref(P1), Equals(Epu8({0, 16}, 0))); + REQUIRE_THAT(eval16_ref(P112), Equals(Epu8({0, 2, 14}, 0))); + REQUIRE_THAT(eval16_ref(Pa1), Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); + REQUIRE_THAT(eval16_ref(Pa2), + Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); + REQUIRE_THAT(eval16_ref(P51), Equals(Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); + REQUIRE_THAT(eval16_ref(Pv), + Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); + REQUIRE_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); + REQUIRE_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); + REQUIRE_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } // TODO uncomment // TEST_Epu8::AGREES(eval16_ref, eval16_cycle, "[Epu8][000]") @@ -610,27 +612,27 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { // TEST_Epu8::AGREES(eval16_ref, eval16, "[Epu8][000]") TEST_CASE("Epu8::popcount4", "[Epu8][048]") { - REQUIRE( - equal(popcount4, epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); + REQUIRE_THAT(popcount4, + Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { - REQUIRE(equal(popcount16(Pv), - epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); - REQUIRE(equal(popcount16(RP), - epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE(equal(popcount16(RP << 1), - epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE(equal(popcount16(RP << 2), - epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE(equal(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), - Epu8({0, 1, 2, 8}, 4))); + REQUIRE_THAT(popcount16(Pv), + Equals(epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); + REQUIRE_THAT(popcount16(RP), + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE_THAT(popcount16(RP << 1), + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE_THAT(popcount16(RP << 2), + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + REQUIRE_THAT(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), + Equals(Epu8({0, 1, 2, 8}, 4))); } TEST_CASE("random_epu8", "[Epu8][050]") { for (int i = 0; i < 10; i++) { epu8 r = random_epu8(255); - REQUIRE(equal(r, r)); + REQUIRE_THAT(r, Equals(r)); } } diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 59f98d60..dd1d4594 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -41,7 +41,7 @@ #define TEST_AGREES_EPU8(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (type p : vct) { \ - REQUIRE(equal(p.fun(), p.ref())); \ + REQUIRE_THAT(p.fun(), Equals(p.ref())); \ } \ } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 8d3278e6..49bb6f6c 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -169,25 +169,25 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { // } TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { - REQUIRE(equal(PTransf16({}).domain_mask(), Epu8(FF))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(), Epu8(FF))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(false), Epu8(FF))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).domain_mask(true), Epu8(0))); - REQUIRE(equal(PTransf16(Epu8(1)).domain_mask(), Epu8(FF))); - REQUIRE(equal(PTransf16(Epu8(2)).domain_mask(), Epu8(FF))); - REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), - Epu8({FF, FF, FF, FF}, 0))); - REQUIRE(equal(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), - Epu8({0, FF, FF, FF}, 0))); - REQUIRE(equal( + REQUIRE_THAT(PTransf16({}).domain_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(false), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(true), Equals(Epu8(0))); + REQUIRE_THAT(PTransf16(Epu8(1)).domain_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16(Epu8(2)).domain_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), + Equals(Epu8({FF, FF, FF, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), + Equals(Epu8({0, FF, FF, FF}, 0))); + REQUIRE_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(), - Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); - REQUIRE(equal(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) + Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) .domain_mask(false), - Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); - REQUIRE(equal( + Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); + REQUIRE_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(true), - Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); + Equals(Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); } TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { @@ -247,29 +247,32 @@ TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { // } TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { - REQUIRE(equal(PTransf16({}).fix_points_mask(), Epu8(FF))); - REQUIRE(equal(PTransf16({}).fix_points_mask(false), Epu8(FF))); - REQUIRE(equal(PTransf16({}).fix_points_mask(true), Epu8(0))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(), - Epu8({0, 0, 0, 0}, FF))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(false), - Epu8({0, 0, 0, 0}, FF))); - REQUIRE(equal(PTransf16({4, 4, 4, 4}).fix_points_mask(true), - Epu8({FF, FF, FF, FF}, 0))); - REQUIRE(equal(PTransf16(Epu8(1)).fix_points_mask(), Epu8({0, FF}, 0))); - REQUIRE(equal(PTransf16(Epu8(2)).fix_points_mask(), Epu8({0, 0, FF}, 0))); - REQUIRE(equal(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_mask(), - Epu8({0, 0, FF, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); - REQUIRE(equal( + REQUIRE_THAT(PTransf16({}).fix_points_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).fix_points_mask(false), Equals(Epu8(FF))); + REQUIRE_THAT(PTransf16({}).fix_points_mask(true), Equals(Epu8(0))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(false), + Equals(Epu8({0, 0, 0, 0}, FF))); + REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(true), + Equals(Epu8({FF, FF, FF, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(1)).fix_points_mask(), + Equals(Epu8({0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(2)).fix_points_mask(), + Equals(Epu8({0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_mask(), + Equals(Epu8({0, 0, FF, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); + REQUIRE_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)).fix_points_mask(), - Epu8({FF, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); - REQUIRE( - equal(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) - .fix_points_mask(false), - Epu8({FF, 0, FF, 0, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, FF, 0}, 0))); - REQUIRE(equal(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) - .fix_points_mask(true), - Epu8({0, FF, 0}, FF))); + Equals(Epu8({FF, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) + .fix_points_mask(false), + Equals(Epu8({FF, 0, FF, 0, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, FF, 0}, 0))); + REQUIRE_THAT(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .fix_points_mask(true), + Equals(Epu8({0, FF, 0}, FF))); } TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { @@ -393,25 +396,28 @@ TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, #endif TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { - REQUIRE(equal(PTransf16::one().fix_points_mask(), Epu8(FF))); - REQUIRE(equal(Perm16::one().fix_points_mask(), Epu8(FF))); - REQUIRE(equal(PPa.fix_points_mask(), Epu8({0, 0, 0, 0, 0}, FF))); - REQUIRE(equal(PPb.fix_points_mask(), (epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, - FF, FF, FF, 0, FF, 0}))); - REQUIRE(equal(RandPerm.fix_points_mask(), Epu8({0, FF}, 0))); - - REQUIRE(equal(Perm16::one().fix_points_mask(false), Epu8(FF))); - REQUIRE(equal(PPa.fix_points_mask(false), Epu8({0, 0, 0, 0, 0}, FF))); - REQUIRE( - equal(PPb.fix_points_mask(false), - (epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0}))); - REQUIRE(equal(RandPerm.fix_points_mask(false), Epu8({0, FF}, 0))); - - REQUIRE(equal(Perm16::one().fix_points_mask(true), Epu8(0))); - REQUIRE(equal(PPa.fix_points_mask(true), Epu8({FF, FF, FF, FF, FF}, 0))); - REQUIRE(equal(PPb.fix_points_mask(true), (epu8{FF, FF, FF, FF, FF, 0, FF, 0, - 0, 0, 0, 0, 0, FF, 0, FF}))); - REQUIRE(equal(RandPerm.fix_points_mask(true), Epu8({FF, 0}, FF))); + REQUIRE_THAT(PTransf16::one().fix_points_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(Perm16::one().fix_points_mask(), Equals(Epu8(FF))); + REQUIRE_THAT(PPa.fix_points_mask(), Equals(Epu8({0, 0, 0, 0, 0}, FF))); + REQUIRE_THAT( + PPb.fix_points_mask(), + Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0})); + REQUIRE_THAT(RandPerm.fix_points_mask(), Equals(Epu8({0, FF}, 0))); + + REQUIRE_THAT(Perm16::one().fix_points_mask(false), Equals(Epu8(FF))); + REQUIRE_THAT(PPa.fix_points_mask(false), Equals(Epu8({0, 0, 0, 0, 0}, FF))); + REQUIRE_THAT( + PPb.fix_points_mask(false), + Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0})); + REQUIRE_THAT(RandPerm.fix_points_mask(false), Equals(Epu8({0, FF}, 0))); + + REQUIRE_THAT(Perm16::one().fix_points_mask(true), Equals(Epu8(0))); + REQUIRE_THAT(PPa.fix_points_mask(true), + Equals(Epu8({FF, FF, FF, FF, FF}, 0))); + REQUIRE_THAT( + PPb.fix_points_mask(true), + Equals(epu8{FF, FF, FF, FF, FF, 0, FF, 0, 0, 0, 0, 0, 0, FF, 0, FF})); + REQUIRE_THAT(RandPerm.fix_points_mask(true), Equals(Epu8({FF, 0}, FF))); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { @@ -471,11 +477,11 @@ TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse, Plist, "[Perm16][031]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { - REQUIRE(equal(Perm16::one().lehmer(), zero)); - REQUIRE(equal(PPa.lehmer(), - (epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}))); - REQUIRE(equal(PPb.lehmer(), - (epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0}))); + REQUIRE_THAT(Perm16::one().lehmer(), Equals(zero)); + REQUIRE_THAT(PPa.lehmer(), + Equals(epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + REQUIRE_THAT(PPb.lehmer(), + Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); } TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer_arr, Plist, From 62a38dd9eb7707c92b0dcaf2e81db065d6726af5 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 27 Oct 2023 22:45:18 +0100 Subject: [PATCH 031/113] Reinstated commented out test in bmat8 --- tests/test_bmat8.cpp | 64 ++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index e14dd95f..a3b4908f 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -517,37 +517,37 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") { REQUIRE(BMat8({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2); } -// TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { -// BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); -// BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); -// -// m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); -// m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); -// -// m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); -// m2 = BMat8({{0, 0, 0, 0}, {1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); -// -// m1 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,1}, {0,0,0,0}}); -// m2 = BMat8({{1,0,0,1}, {0,0,1,0}, {0,1,0,0}, {0,0,0,1}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); -// -// m1 = BMat8({{0,0,0,1}, {1,0,0,0}, {0,0,1,0}, {0,1,0,0}}); -// m2 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,0}, {0,0,0,1}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0,2,3,1})); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0,2,3,1})); -// -// -// m1 = BMat8({{0,0,0,1}, {0,0,1,0}, {0,1,0,0}, {1,0,0,0}}); -// m2 = BMat8({{0,1,0,0}, {0,0,0,1}, {1,0,0,0}, {0,0,1,0}}); -// REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2,0,3,1})); -// REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2,0,3,1})); -// } +TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { + BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); + BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); + + m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); + m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); + + m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); + m2 = BMat8({{0, 0, 0, 0}, {1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); + + m1 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,1}, {0,0,0,0}}); + m2 = BMat8({{1,0,0,1}, {0,0,1,0}, {0,1,0,0}, {0,0,0,1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); + + m1 = BMat8({{0,0,0,1}, {1,0,0,0}, {0,0,1,0}, {0,1,0,0}}); + m2 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,0}, {0,0,0,1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0,2,3,1})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0,2,3,1})); + + + m1 = BMat8({{0,0,0,1}, {0,0,1,0}, {0,1,0,0}, {1,0,0,0}}); + m2 = BMat8({{0,1,0,0}, {0,0,0,1}, {1,0,0,0}, {0,0,1,0}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2,0,3,1})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2,0,3,1})); +} } // namespace HPCombi From 95b1b63e70a30235f2d4b8c6096bc27102a9efb0 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 11:47:31 +0100 Subject: [PATCH 032/113] All test reinstated & pass on x86_64 --- include/perm16_impl.hpp | 11 ++-- tests/test_epu.cpp | 48 ++++++--------- tests/test_main.hpp | 24 ++++++-- tests/test_perm16.cpp | 132 ++++++++++++++++++++-------------------- 4 files changed, 107 insertions(+), 108 deletions(-) diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 11c23294..92ec5bb8 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -93,24 +93,21 @@ inline uint32_t PTransf16::fix_points_bitset(bool complement) const { } inline uint8_t PTransf16::smallest_fix_point() const { - uint32_t res = fix_points_bitset(false); - return res == 0 ? 0xFF : __builtin_ffsl(res) - 1; + return __builtin_ffs(fix_points_bitset(false)) - 1; } /** Returns the smallest non fix point of \c *this */ inline uint8_t PTransf16::smallest_moved_point() const { - uint32_t res = fix_points_bitset(true); - return res == 0 ? 0xFF : __builtin_ffsl(res) - 1; + return __builtin_ffs(fix_points_bitset(true)) - 1; } /** Returns the largest fix point of \c *this */ inline uint8_t PTransf16::largest_fix_point() const { uint32_t res = fix_points_bitset(false); - - return res == 0 ? 0xFF : 15 - __builtin_ctzl(res); + return res == 0 ? 0xFF : 31 - __builtin_clz(res); } /** Returns the largest non fix point of \c *this */ inline uint8_t PTransf16::largest_moved_point() const { uint32_t res = fix_points_bitset(true); - return res == 0 ? 0xFF : 15 - __builtin_ctzl(res); + return res == 0 ? 0xFF : 31 - __builtin_clz(res); } /** Returns the number of fix points of \c *this */ inline uint8_t PTransf16::nb_fix_points() const { diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index e53e4f51..6617d3da 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -21,13 +21,6 @@ #include "test_main.hpp" #include -// TODO uncomment -// #define TEST_AGREES(ref, fun) \ -// BOOST_FIXTURE_TEST_CASE(Epu8::agrees_##fun, Fix) { \ -// for (auto x : v) \ -// REQUIRE(fun(x) == ref(x)); \ -// } - namespace HPCombi { struct Fix { @@ -409,12 +402,10 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { REQUIRE(horiz_sum_ref(Pc) == 203); } -// TODO uncomment, compiler complains that Epu8 is an unknown type!? -// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") -// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") -// -// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") -// TEST_AGREES(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") +TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") +TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") +TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") +TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { REQUIRE_THAT(partial_sums_ref(zero), Equals(zero)); @@ -484,11 +475,10 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { REQUIRE(horiz_max_ref(Pc) == 43); } -// TODO uncomment, compiler complains that Epu8 is an unknown type!? -// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") -// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max4, v, "[Epu8][035]") -// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max3, v, "[Epu8][036]") -// TEST_AGREES(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") +TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max4, v, "[Epu8][035]") +TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max3, v, "[Epu8][036]") +TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { REQUIRE_THAT(partial_max_ref(zero), Equals(zero)); @@ -543,11 +533,11 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { REQUIRE(horiz_min_ref(epu8rev) == 0); REQUIRE(horiz_min_ref(Pc) == 5); } -// TODO uncomment -// TEST_AGREES(horiz_min_ref, horiz_min_gen) -// TEST_AGREES(horiz_min_ref, horiz_min4) -// TEST_AGREES(horiz_min_ref, horiz_min3) -// TEST_AGREES(horiz_min_ref, horiz_min) + +TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min_gen, v, "[Epu8][034]") +TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min4, v, "[Epu8][035]") +TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min3, v, "[Epu8][036]") +TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { REQUIRE_THAT(partial_min_ref(zero), Equals(zero)); @@ -604,12 +594,12 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { REQUIRE_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); REQUIRE_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } -// TODO uncomment -// TEST_Epu8::AGREES(eval16_ref, eval16_cycle, "[Epu8][000]") -// TEST_Epu8::AGREES(eval16_ref, eval16_popcount, "[Epu8][000]") -// TEST_Epu8::AGREES(eval16_ref, eval16_arr, "[Epu8][000]") -// TEST_Epu8::AGREES(eval16_ref, eval16_gen, "[Epu8][000]") -// TEST_Epu8::AGREES(eval16_ref, eval16, "[Epu8][000]") + +TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_cycle, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_popcount, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_arr, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_gen, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16, v, "[Epu8][034]") TEST_CASE("Epu8::popcount4", "[Epu8][048]") { REQUIRE_THAT(popcount4, diff --git a/tests/test_main.hpp b/tests/test_main.hpp index dd1d4594..473be9cc 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -22,17 +22,31 @@ #include #include +#define TEST_AGREES_FUN(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (auto p : vct) { \ + REQUIRE(fun(p) == ref(p)); \ + } \ + } + +#define TEST_AGREES_FUN_EPU8(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (auto p : vct) { \ + REQUIRE_THAT(fun(p), Equals(ref(p))); \ + } \ + } + #define TEST_AGREES(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ - for (type p : vct) { \ + for (auto p : vct) { \ REQUIRE(p.fun() == p.ref()); \ } \ } #define TEST_AGREES2(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ - for (type p1 : vct) { \ - for (type p2 : vct) { \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ REQUIRE(p1.fun(p2) == p1.ref(p2)); \ } \ } \ @@ -40,8 +54,8 @@ #define TEST_AGREES_EPU8(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ - for (type p : vct) { \ - REQUIRE_THAT(p.fun(), Equals(p.ref())); \ + for (auto p : vct) { \ + REQUIRE_THAT(p.fun(), Equals(p.ref())); \ } \ } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 49bb6f6c..70016a94 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -142,31 +142,25 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { Equals(Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } -// TODO uncomment -// TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { -// REQUIRE(PTransf16({}).left_one() == PTransf16::one()); -// REQUIRE(PTransf16({4, 4, 4, 4}).left_one() == -// PTransf16({FF, FF, FF, FF})); -// REQUIRE(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, -// FF))); REQUIRE(PTransf16(Epu8(2)).left_one() == -// PTransf16(Epu8({FF, FF, 2}, FF))); -// REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == -// PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, -// FF, -// FF, 15})); -// REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == -// PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, -// FF, -// FF, 15})); -// REQUIRE( -// PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == -// PTransf16( -// {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15 })); -// REQUIRE( -// PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == -// PTransf16( -// {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15 })); -// } + +TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { + REQUIRE(PTransf16({}).left_one() == PTransf16::one()); + REQUIRE(PTransf16({4, 4, 4, 4}).left_one() == PTransf16({FF, FF, FF, FF})); + REQUIRE(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, FF))); + REQUIRE(PTransf16(Epu8(2)).left_one() == PTransf16(Epu8({FF, FF, 2}, FF))); + REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == + PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, + FF, 15})); + REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == + PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, + FF, 15})); + REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == + PTransf16( + {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); + REQUIRE(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == + PTransf16( + {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); +} TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { REQUIRE_THAT(PTransf16({}).domain_mask(), Equals(Epu8(FF))); @@ -229,22 +223,22 @@ TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { } // TODO uncomment -// TEST_CASE("PTransf16::rank", "[PTransf16][007]") { -// REQUIRE(PTransf16({}).rank() == 16); -// REQUIRE(PTransf16({4, 4, 4, 4}).rank() == 12); -// REQUIRE( -// PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == -// 1); -// REQUIRE( -// PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == -// 1); -// REQUIRE( -// PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() -// == 2); -// REQUIRE( -// PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() -// == 4); -// } +TEST_CASE("PTransf16::rank", "[PTransf16][007]") { + REQUIRE(PTransf16({}).rank() == 16); + REQUIRE(PTransf16({4, 4, 4, 4}).rank() == 12); + REQUIRE( + PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == + 1); + REQUIRE( + PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == + 1); + REQUIRE( + PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() + == 2); + REQUIRE( + PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() + == 4); +} TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { REQUIRE_THAT(PTransf16({}).fix_points_mask(), Equals(Epu8(FF))); @@ -369,17 +363,15 @@ TEST_CASE("PPerm16::hash", "[PPerm16][018]") { REQUIRE(std::hash()(PPerm16::one()) != 0); REQUIRE(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); } -// TODO uncomment -// TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { -// REQUIRE(PPerm16({}).left_one() == PPerm16::one()); -// REQUIRE(PPerm16({FF, FF, FF, 4}).left_one() == -// PPerm16({FF, FF, FF, FF})); -// REQUIRE(PPerm16({FF, 4, FF, FF}).left_one() == -// PPerm16({FF, FF, FF, FF})); -// for (auto pp : PPlist) { -// REQUIRE(pp.left_one() * pp == pp); -// } -// } + +TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { + REQUIRE(PPerm16({}).left_one() == PPerm16::one()); + REQUIRE(PPerm16({FF, FF, FF, 4}).left_one() == PPerm16({FF, FF, FF, FF})); + REQUIRE(PPerm16({FF, 4, FF, FF}).left_one() == PPerm16({FF, FF, FF, FF})); + for (auto pp : PPlist) { + REQUIRE(pp.left_one() * pp == pp); + } +} TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { REQUIRE(PPerm16({}).right_one() == PPerm16::one()); @@ -429,21 +421,27 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_moved_point", "[Perm16][024]") { - REQUIRE(Perm16::one().smallest_moved_point() == FF); - REQUIRE(PPa.smallest_moved_point() == 0); - REQUIRE(PPb.smallest_moved_point() == 0); - REQUIRE(RandPerm.smallest_moved_point() == 0); - REQUIRE(Perm16({0, 1, 3, 2}).smallest_moved_point() == 2); -} - -// TODO broken test -// TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") -// { -// REQUIRE(Perm16::one().largest_fix_point() == 15); -// REQUIRE(PPa.largest_fix_point() == 15); -// REQUIRE(PPb.largest_fix_point() == 14); -// REQUIRE(RandPerm.largest_fix_point() == 1); -// } + CHECK(Perm16::one().smallest_moved_point() == int(FF)); + CHECK(PPa.smallest_moved_point() == 0); + CHECK(PPb.smallest_moved_point() == 0); + CHECK(RandPerm.smallest_moved_point() == 0); + CHECK(Perm16({0, 1, 3, 2}).smallest_moved_point() == 2); +} + +TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") { + CHECK(Perm16::one().largest_fix_point() == 15); + CHECK(int(PPa.largest_fix_point()) == 15); + CHECK(PPb.largest_fix_point() == 14); + CHECK(RandPerm.largest_fix_point() == 1); + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .largest_fix_point() == 2); + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 10)) + .largest_fix_point() == 10); + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) + .largest_fix_point() == 14); + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 15)) + .largest_fix_point() == 15); +} TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { REQUIRE(Perm16::one().nb_fix_points() == 16); From ad6ac858eda451e7647ddbd06eae7b470d5940e5 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 12:11:16 +0100 Subject: [PATCH 033/113] Reformating --- tests/test_bmat8.cpp | 29 +++++++++++++------------- tests/test_epu.cpp | 48 +++++++++++++++++++++++++------------------ tests/test_main.hpp | 11 +++++----- tests/test_perm16.cpp | 44 ++++++++++++++++++++++----------------- 4 files changed, 72 insertions(+), 60 deletions(-) diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index a3b4908f..9e04f06d 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -520,8 +520,8 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") { TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); @@ -533,21 +533,20 @@ TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); - m1 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,1}, {0,0,0,0}}); - m2 = BMat8({{1,0,0,1}, {0,0,1,0}, {0,1,0,0}, {0,0,0,1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1,0})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1,0})); + m1 = BMat8({{0, 1, 0, 0}, {0, 0, 1, 0}, {1, 0, 0, 1}, {0, 0, 0, 0}}); + m2 = BMat8({{1, 0, 0, 1}, {0, 0, 1, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); - m1 = BMat8({{0,0,0,1}, {1,0,0,0}, {0,0,1,0}, {0,1,0,0}}); - m2 = BMat8({{0,1,0,0}, {0,0,1,0}, {1,0,0,0}, {0,0,0,1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0,2,3,1})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0,2,3,1})); + m1 = BMat8({{0, 0, 0, 1}, {1, 0, 0, 0}, {0, 0, 1, 0}, {0, 1, 0, 0}}); + m2 = BMat8({{0, 1, 0, 0}, {0, 0, 1, 0}, {1, 0, 0, 0}, {0, 0, 0, 1}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0, 2, 3, 1})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0, 2, 3, 1})); - - m1 = BMat8({{0,0,0,1}, {0,0,1,0}, {0,1,0,0}, {1,0,0,0}}); - m2 = BMat8({{0,1,0,0}, {0,0,0,1}, {1,0,0,0}, {0,0,1,0}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2,0,3,1})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2,0,3,1})); + m1 = BMat8({{0, 0, 0, 1}, {0, 0, 1, 0}, {0, 1, 0, 0}, {1, 0, 0, 0}}); + m2 = BMat8({{0, 1, 0, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}, {0, 0, 1, 0}}); + REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2, 0, 3, 1})); + REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2, 0, 3, 1})); } } // namespace HPCombi diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 6617d3da..f97a283d 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -302,9 +302,10 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE_THAT(sort_perm(ve), - Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); - REQUIRE_THAT(ve, Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); + REQUIRE_THAT(sort_perm(ve), Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, + 2, 13, 7, 4, 14})); + REQUIRE_THAT(ve, + Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); for (auto x : v) { epu8 xsort = x; @@ -317,9 +318,8 @@ TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE_THAT( - sort8_perm(ve), - Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); + REQUIRE_THAT(sort8_perm(ve), Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, + 12, 8, 11, 13, 14})); REQUIRE_THAT(ve, Equals(epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); @@ -341,10 +341,10 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { REQUIRE_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); REQUIRE_THAT(permutation_of(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE_THAT( - (permutation_of(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE_THAT((permutation_of(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, + 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { REQUIRE_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); @@ -354,10 +354,10 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { REQUIRE_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); REQUIRE_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE_THAT( - (permutation_of_ref(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); + REQUIRE_THAT((permutation_of_ref(Pv, Pv) | + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, + 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { @@ -491,9 +491,8 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { REQUIRE_THAT(partial_max_ref(Pa1), Equals(Epu8({4, 4, 5, 5, 5}, 7))); REQUIRE_THAT(partial_max_ref(Pa2), Equals(Epu8({4, 4, 5, 5, 5}, 9))); REQUIRE_THAT(partial_max_ref(P51), Equals(Epu8({5, 5}, 6))); - REQUIRE_THAT( - partial_max_ref(Pv), - Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15})); + REQUIRE_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, + 12, 12, 12, 12, 13, 14, 15})); REQUIRE_THAT(partial_max_ref(P5), Equals(P5)); REQUIRE_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); REQUIRE_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); @@ -550,9 +549,18 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { REQUIRE_THAT(partial_min_ref(Pa1), Equals(Epu8({4, 2, 2}, 1))); REQUIRE_THAT(partial_min_ref(Pa2), Equals(Epu8({4, 2, 2}, 1))); REQUIRE_THAT(partial_min_ref(P51), Equals(Epu8({5}, 1))); - REQUIRE_THAT( - partial_min_ref(Pv), - Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); + REQUIRE_THAT(partial_min_ref(Pv), Equals(Epu8( + { + 5, + 5, + 2, + 2, + 1, + 1, + 1, + 1, + }, + 0))); REQUIRE_THAT(partial_min_ref(P5), Equals(P5)); REQUIRE_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); REQUIRE_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 473be9cc..58dc8800 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -22,10 +22,11 @@ #include #include -#define TEST_AGREES_FUN(fixture, type, ref, fun, vct, tags) \ +// In the followings "type" is only used for documentation +#define TEST_AGREES_FUN(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE(fun(p) == ref(p)); \ + REQUIRE(fun(p) == ref(p)); \ } \ } @@ -59,18 +60,16 @@ } \ } - struct Equals : Catch::Matchers::MatcherGenericBase { Equals(HPCombi::epu8 v) : v(v) {} bool match(HPCombi::epu8 w) const { return HPCombi::equal(v, w); } std::string describe() const override { - return "\n!=\n" + std::to_string(v); + return "\n!=\n" + std::to_string(v); } -private: - + private: const HPCombi::epu8 v; }; diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 70016a94..ed2988f8 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -104,8 +104,9 @@ TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { Equals(Epu8({FF, FF, FF, FF}, 0))); REQUIRE_THAT(PTransf16(Epu8(1)).image_mask(), Equals(Epu8({0, FF}, 0))); REQUIRE_THAT(PTransf16(Epu8(2)).image_mask(), Equals(Epu8({0, 0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), - Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), + Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); REQUIRE_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); @@ -114,7 +115,8 @@ TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); REQUIRE_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(true), - Equals(Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); + Equals(Epu8( + {0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { @@ -128,21 +130,25 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(true), Equals(Epu8({FF, FF, FF, FF}, 0))); REQUIRE_THAT(PTransf16(Epu8(1)).image_mask_ref(), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(2)).image_mask_ref(), Equals(Epu8({0, 0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask_ref(), - Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); + REQUIRE_THAT(PTransf16(Epu8(2)).image_mask_ref(), + Equals(Epu8({0, 0, FF}, 0))); + REQUIRE_THAT( + PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask_ref(), + Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); REQUIRE_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); REQUIRE_THAT( - PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(false), + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .image_mask_ref(false), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); REQUIRE_THAT( - PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(true), - Equals(Epu8({0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); + PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + .image_mask_ref(true), + Equals(Epu8( + {0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } - TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { REQUIRE(PTransf16({}).left_one() == PTransf16::one()); REQUIRE(PTransf16({4, 4, 4, 4}).left_one() == PTransf16({FF, FF, FF, FF})); @@ -177,7 +183,7 @@ TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(), Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); REQUIRE_THAT(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) - .domain_mask(false), + .domain_mask(false), Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); REQUIRE_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(true), @@ -233,11 +239,11 @@ TEST_CASE("PTransf16::rank", "[PTransf16][007]") { PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == 1); REQUIRE( - PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() - == 2); + PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == + 2); REQUIRE( - PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() - == 4); + PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() == + 4); } TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { @@ -434,13 +440,13 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") { CHECK(PPb.largest_fix_point() == 14); CHECK(RandPerm.largest_fix_point() == 1); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) - .largest_fix_point() == 2); + .largest_fix_point() == 2); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 10)) - .largest_fix_point() == 10); + .largest_fix_point() == 10); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) - .largest_fix_point() == 14); + .largest_fix_point() == 14); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 15)) - .largest_fix_point() == 15); + .largest_fix_point() == 15); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { From 137224a40b46c7107819315b0a7aa04996c31f4d Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 20:08:03 +0100 Subject: [PATCH 034/113] constexpr backward compat cleanup --- include/bmat8_impl.hpp | 2 +- include/epu.hpp | 14 +------ include/fallback/seq.hpp | 85 ---------------------------------------- include/perm16.hpp | 32 +++++++-------- include/perm16_impl.hpp | 8 ++-- include/vect16.hpp | 4 +- 6 files changed, 24 insertions(+), 121 deletions(-) delete mode 100644 include/fallback/seq.hpp diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index f17d0263..ea4c0f77 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -406,7 +406,7 @@ inline size_t BMat8::nr_rows() const { return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8{})); } -static HPCOMBI_CONSTEXPR epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0, +static constexpr epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15}; inline BMat8 BMat8::row_permuted(Perm16 p) const { epu8 x = simde_mm_set_epi64x(0, _data); diff --git a/include/epu.hpp b/include/epu.hpp index ca56dad7..00fa8e9b 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -24,29 +24,17 @@ #include #include #include +#include // integer_sequences #ifdef HPCOMBI_HAVE_CONFIG #include "HPCombi-config.h" #endif -#if __cplusplus <= 201103L -#include "fallback/seq.hpp" -#endif - #include "vect_generic.hpp" #include "simde/x86/sse4.1.h" #include "simde/x86/sse4.2.h" -#ifdef HPCOMBI_CONSTEXPR_FUN_ARGS -#define HPCOMBI_CONSTEXPR constexpr -#define HPCOMBI_CONSTEXPR_CONSTRUCTOR constexpr -#else -#pragma message "Using a constexpr broken compiler ! " \ - "Performance may not be optimal" -#define HPCOMBI_CONSTEXPR const -#define HPCOMBI_CONSTEXPR_CONSTRUCTOR -#endif namespace HPCombi { diff --git a/include/fallback/seq.hpp b/include/fallback/seq.hpp deleted file mode 100644 index 58dcf899..00000000 --- a/include/fallback/seq.hpp +++ /dev/null @@ -1,85 +0,0 @@ -//****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // -// // -// Distributed under the terms of the GNU General Public License (GPL) // -// // -// This code is distributed in the hope that it will be useful, // -// but WITHOUT ANY WARRANTY; without even the implied warranty of // -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // -// General Public License for more details. // -// // -// The full text of the GPL is available at: // -// // -// http://www.gnu.org/licenses/ // -//****************************************************************************// - -#ifndef HPCOMBI_FALLBACK_SEQ_HPP_INCLUDED -#define HPCOMBI_FALLBACK_SEQ_HPP_INCLUDED - -namespace bwcompat_details { - // Copy pasted from sources - - // Stores a tuple of indices. Used by tuple and pair, and by bind() to - // extract the elements in a tuple. - template - struct _Index_tuple - { - typedef _Index_tuple<_Indexes..., sizeof...(_Indexes)> __next; - }; - - // Builds an _Index_tuple<0, 1, 2, ..., _Num-1>. - template - struct _Build_index_tuple - { - typedef typename _Build_index_tuple<_Num - 1>::__type::__next __type; - }; - - template<> - struct _Build_index_tuple<0> - { - typedef _Index_tuple<> __type; - }; - - /// Class template integer_sequence - template - struct integer_sequence - { - typedef _Tp value_type; - static constexpr size_t size() { return sizeof...(_Idx); } - }; - - template::__type> - struct _Make_integer_sequence; - - template - struct _Make_integer_sequence<_Tp, _Num, _Index_tuple<_Idx...>> - { - static_assert( _Num >= 0, - "Cannot make integer sequence of negative length" ); - - typedef integer_sequence<_Tp, static_cast<_Tp>(_Idx)...> __type; - }; -} // namespace bwcompat_details - -namespace std { - /// Alias template make_integer_sequence - template - using make_integer_sequence - = typename bwcompat_details::_Make_integer_sequence<_Tp, _Num>::__type; - - /// Alias template index_sequence - template - using index_sequence = bwcompat_details::integer_sequence; - - /// Alias template make_index_sequence - template - using make_index_sequence = make_integer_sequence; - - /// Alias template index_sequence_for - template - using index_sequence_for = make_index_sequence; - -} // namespace std - -#endif // HPCOMBI_FALLBACK_SEQ_HPP_INCLUDED diff --git a/include/perm16.hpp b/include/perm16.hpp index fa8d245c..8178a157 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -46,9 +46,9 @@ struct alignas(16) PTransf16 : public Vect16 { using array = decltype(Epu8)::array; PTransf16() = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR PTransf16(const PTransf16 &v) = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR PTransf16(const vect v) : Vect16(v) {} - HPCOMBI_CONSTEXPR_CONSTRUCTOR PTransf16(const epu8 x) : Vect16(x) {} + constexpr PTransf16(const PTransf16 &v) = default; + constexpr PTransf16(const vect v) : Vect16(v) {} + constexpr PTransf16(const epu8 x) : Vect16(x) {} PTransf16(std::vector dom, std::vector rng, size_t = 0 /* unused */); PTransf16(std::initializer_list il); @@ -65,7 +65,7 @@ struct alignas(16) PTransf16 : public Vect16 { } //! The identity partial transformation. - static HPCOMBI_CONSTEXPR PTransf16 one() { return epu8id; } + static constexpr PTransf16 one() { return epu8id; } //! The product of two partial transformations. PTransf16 operator*(const PTransf16 &p) const { return HPCombi::permuted(v, p.v) | (p.v == Epu8(0xFF)); @@ -120,9 +120,9 @@ struct alignas(16) PTransf16 : public Vect16 { */ struct Transf16 : public PTransf16 { Transf16() = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR Transf16(const Transf16 &v) = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR Transf16(const vect v) : PTransf16(v) {} - HPCOMBI_CONSTEXPR_CONSTRUCTOR Transf16(const epu8 x) : PTransf16(x) {} + constexpr Transf16(const Transf16 &v) = default; + constexpr Transf16(const vect v) : PTransf16(v) {} + constexpr Transf16(const epu8 x) : PTransf16(x) {} Transf16(std::initializer_list il) : PTransf16(il) {} Transf16 &operator=(const Transf16 &) = default; @@ -132,7 +132,7 @@ struct Transf16 : public PTransf16 { } //! The identity transformation. - static HPCOMBI_CONSTEXPR Transf16 one() { return epu8id; } + static constexpr Transf16 one() { return epu8id; } //! The product of two transformations. Transf16 operator*(const Transf16 &p) const { return HPCombi::permuted(v, p.v); @@ -149,9 +149,9 @@ struct Transf16 : public PTransf16 { */ struct PPerm16 : public PTransf16 { PPerm16() = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const PPerm16 &v) = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const vect v) : PTransf16(v) {} - HPCOMBI_CONSTEXPR_CONSTRUCTOR PPerm16(const epu8 x) : PTransf16(x) {} + constexpr PPerm16(const PPerm16 &v) = default; + constexpr PPerm16(const vect v) : PTransf16(v) {} + constexpr PPerm16(const epu8 x) : PTransf16(x) {} PPerm16(std::vector dom, std::vector rng, size_t = 0 /* unused */) : PTransf16(dom, rng) {} @@ -164,7 +164,7 @@ struct PPerm16 : public PTransf16 { } //! The identity partial permutations. - static HPCOMBI_CONSTEXPR PPerm16 one() { return epu8id; } + static constexpr PPerm16 one() { return epu8id; } //! The product of two partial perrmutations. PPerm16 operator*(const PPerm16 &p) const { return this->PTransf16::operator*(p); @@ -208,9 +208,9 @@ struct PPerm16 : public PTransf16 { */ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { Perm16() = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR Perm16(const Perm16 &) = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR Perm16(const vect v) : Transf16(v) {} - HPCOMBI_CONSTEXPR_CONSTRUCTOR Perm16(const epu8 x) : Transf16(x) {} + constexpr Perm16(const Perm16 &) = default; + constexpr Perm16(const vect v) : Transf16(v) {} + constexpr Perm16(const epu8 x) : Transf16(x) {} Perm16 &operator=(const Perm16 &) = default; Perm16(std::initializer_list il) : Transf16(il) {} @@ -221,7 +221,7 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { // being defined (see https://stackoverflow.com/questions/11928089/) // therefore we chose to have functions. //! The identity partial permutation. - static HPCOMBI_CONSTEXPR Perm16 one() { return epu8id; } + static constexpr Perm16 one() { return epu8id; } //! The product of two permutations Perm16 operator*(const Perm16 &p) const { return HPCombi::permuted(v, p.v); diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 92ec5bb8..78eaefe9 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -114,14 +114,14 @@ inline uint8_t PTransf16::nb_fix_points() const { return __builtin_popcountl(fix_points_bitset()); } -inline static HPCOMBI_CONSTEXPR uint8_t hilo_exchng_fun(uint8_t i) { +inline static constexpr uint8_t hilo_exchng_fun(uint8_t i) { return i < 8 ? i + 8 : i - 8; } -static HPCOMBI_CONSTEXPR epu8 hilo_exchng = Epu8(hilo_exchng_fun); -inline static HPCOMBI_CONSTEXPR uint8_t hilo_mask_fun(uint8_t i) { +static constexpr epu8 hilo_exchng = Epu8(hilo_exchng_fun); +inline static constexpr uint8_t hilo_mask_fun(uint8_t i) { return i < 8 ? 0x0 : 0xFF; } -static HPCOMBI_CONSTEXPR epu8 hilo_mask = Epu8(hilo_mask_fun); +static constexpr epu8 hilo_mask = Epu8(hilo_mask_fun); inline Transf16::Transf16(uint64_t compressed) { epu8 res = simde_mm_set_epi64x(compressed, compressed); diff --git a/include/vect16.hpp b/include/vect16.hpp index 8a5a33ff..cfc54f26 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -29,10 +29,10 @@ struct alignas(16) Vect16 { epu8 v; Vect16() = default; - HPCOMBI_CONSTEXPR_CONSTRUCTOR Vect16(epu8 x) : v(x) {} + constexpr Vect16(epu8 x) : v(x) {} Vect16(std::initializer_list il, uint8_t def = 0) : v(Epu8(il, def)) {} - HPCOMBI_CONSTEXPR_CONSTRUCTOR operator epu8() const { return v; } + constexpr operator epu8() const { return v; } Vect16 &operator=(const Vect16 &) = default; Vect16 &operator=(const epu8 &vv) { From a9db482aaa19f04c560b9ceada8983d51aeb42f8 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 20:10:58 +0100 Subject: [PATCH 035/113] constexpr backward compat cleanup in build files --- CMakeLists.txt | 18 +++++++++--------- config.h.in | 9 --------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6531bab5..cec744ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,15 +53,15 @@ include(CheckCXXSourceCompiles) include(CheckCXXCompilerFlag) ## Check for buggy constexpr support G++ 5.0 -check_cxx_source_compiles( - " - using T = int; - constexpr int exec(T f()) { return f(); } - constexpr int foo() { return 1; } - static_assert(exec(foo) == 1, \"Failed exec\"); - int main() {} - " - HPCOMBI_CONSTEXPR_FUN_ARGS) +# check_cxx_source_compiles( +# " +# using T = int; +# constexpr int exec(T f()) { return f(); } +# constexpr int foo() { return 1; } +# static_assert(exec(foo) == 1, \"Failed exec\"); +# int main() {} +# " +# HPCOMBI_CONSTEXPR_FUN_ARGS) check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_ARCH_NATIVE) check_cxx_compiler_flag('-mtune=native' HPCOMBI_HAVE_FLAG_TUNE_NATIVE) diff --git a/config.h.in b/config.h.in index f84cbbfa..7a1f2ccc 100644 --- a/config.h.in +++ b/config.h.in @@ -8,19 +8,10 @@ #ifndef HPCOMBI_CONFIG_HPP #define HPCOMBI_CONFIG_HPP -// Whether the compiler provide the "experimental/numeric" include file -#cmakedefine HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC - -// Whether std::experimental::lcm function works -#cmakedefine HPCOMBI_HAVE_EXPERIMENTAL_NUMERIC_LCM - // Google sparse hash map #cmakedefine HPCOMBI_HAVE_DENSEHASHMAP // Google sparse hash set #cmakedefine HPCOMBI_HAVE_DENSEHASHSET -// GCC-4.8 constexpr function argument bug -#cmakedefine HPCOMBI_CONSTEXPR_FUN_ARGS - #endif // HPCOMBI_CONFIG_HPP From 63a54cce5158e65e28502682d9238470a7c6cf7d Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 22:20:40 +0100 Subject: [PATCH 036/113] Removed HPCombi-Config system --- CMakeLists.txt | 6 +----- config.h.in | 17 ----------------- examples/CMakeLists.txt | 4 ++++ include/epu.hpp | 4 ---- 4 files changed, 5 insertions(+), 26 deletions(-) delete mode 100644 config.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index cec744ea..5e9dff9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,8 +132,7 @@ IF (BUILD_TESTING) ENDIF(BUILD_TESTING) ##################### -# config.h file stuff -configure_file(${CMAKE_SOURCE_DIR}/config.h.in ${CMAKE_BINARY_DIR}/HPCombi-config.h) +# Is this needed ? configure_file(${CMAKE_SOURCE_DIR}/.VERSION.in ${CMAKE_BINARY_DIR}/.VERSION) set(AUTOGENERATED_WARNING "WARNING: THIS IS A CMAKE AUTO-GENERATED FILE.") @@ -149,9 +148,6 @@ endforeach(f) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/.VERSION DESTINATION ".") -install (FILES ${CMAKE_CURRENT_BINARY_DIR}/HPCombi-config.h - DESTINATION include/${CMAKE_PROJECT_NAME}) - install ( DIRECTORY ${CMAKE_SOURCE_DIR}/include/ DESTINATION include/${CMAKE_PROJECT_NAME} diff --git a/config.h.in b/config.h.in deleted file mode 100644 index 7a1f2ccc..00000000 --- a/config.h.in +++ /dev/null @@ -1,17 +0,0 @@ -//****************************************************************************// -// File : HPCombi-config.h -// Description : Configuration of @PROJECT_NAME@ -// -// @AUTOGENERATED_WARNING@ -//****************************************************************************// - -#ifndef HPCOMBI_CONFIG_HPP -#define HPCOMBI_CONFIG_HPP - -// Google sparse hash map -#cmakedefine HPCOMBI_HAVE_DENSEHASHMAP - -// Google sparse hash set -#cmakedefine HPCOMBI_HAVE_DENSEHASHSET - -#endif // HPCOMBI_CONFIG_HPP diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fa9540a9..e8e073ee 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -17,10 +17,14 @@ include (CheckIncludeFileCXX) check_include_file_cxx("sparsehash/dense_hash_map" HPCOMBI_HAVE_DENSEHASHMAP) if (NOT HPCOMBI_HAVE_DENSEHASHMAP) message(STATUS "Google 'dense_hash_map' not found, using std::unordered_map") +else() + add_compile_definitions(HPCOMBI_HAVE_DENSEHASHMAP) endif() check_include_file_cxx("sparsehash/dense_hash_set" HPCOMBI_HAVE_DENSEHASHSET) if (NOT HPCOMBI_HAVE_DENSEHASHSET) message(STATUS "Google 'dense_hash_set' not found, using std::unordered_set") +else() + add_compile_definitions(HPCOMBI_HAVE_DENSEHASHSET) endif() include_directories(${CMAKE_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}) diff --git a/include/epu.hpp b/include/epu.hpp index 00fa8e9b..5ea6ab72 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -26,10 +26,6 @@ #include #include // integer_sequences -#ifdef HPCOMBI_HAVE_CONFIG -#include "HPCombi-config.h" -#endif - #include "vect_generic.hpp" #include "simde/x86/sse4.1.h" From 3dd3ad41578cc19704088751c194af8dc68af2d3 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sat, 28 Oct 2023 23:13:48 +0100 Subject: [PATCH 037/113] Fixup --- CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e9dff9c..cd911e33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,8 +39,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_EXTENSIONS OFF) # -std=c++14 instead of -std=gnu++14 -add_definitions(-DHPCOMBI_HAVE_CONFIG) - message(STATUS "*** Compiler id is ${CMAKE_CXX_COMPILER_ID}") # See https://stackoverflow.com/questions/47213356/ for discussion cmake_policy(SET CMP0067 NEW) @@ -180,6 +178,3 @@ if (UNIX) add_custom_target(tags etags --members --declarations `find ${CMAKE_SOURCE_DIR}/ -name *.cpp -or -name *.hpp -or -name *.c -or -name *.h` -o ${CMAKE_SOURCE_DIR}/TAGS) add_custom_target(etags DEPENDS tags) endif (UNIX) - - - From c38cc9bd4a48b02b336c838bd0e7fbb53db0a4c9 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 09:50:46 +0000 Subject: [PATCH 038/113] Updated Doxygen config file --- doc/Doxyfile.in | 436 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 317 insertions(+), 119 deletions(-) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 5d3d9f2b..44a5ec78 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -1,4 +1,4 @@ -# Doxyfile 1.8.11 +# Doxyfile 1.9.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -17,11 +17,11 @@ # Project related configuration options #--------------------------------------------------------------------------- -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv -# for the list of possible encodings. +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 @@ -93,6 +93,14 @@ ALLOW_UNICODE_NAMES = NO OUTPUT_LANGUAGE = English +# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all generated output in the proper direction. +# Possible values are: None, LTR, RTL and Context. +# The default value is: None. + +OUTPUT_TEXT_DIRECTION = None + # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -179,6 +187,16 @@ SHORT_NAMES = NO JAVADOC_AUTOBRIEF = YES +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*************** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus @@ -199,6 +217,14 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = NO +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -226,16 +252,15 @@ TAB_SIZE = 4 # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. +# newlines (in the resulting output). You can put ^^ in the value part of an +# alias to insert a newline as if a physical newline was in the original file. +# When you need a literal { or } or , in the value part of an alias you have to +# escape them by means of a backslash (\), this can lead to conflicts with the +# commands \{ and \} for these it is advised to use the version @{ and @} or use +# a double escape (\\{ and \\}) ALIASES = -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -264,28 +289,40 @@ OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUTPUT_VHDL = NO +# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice +# sources only. Doxygen will then generate output that is more tailored for that +# language. For instance, namespaces will be presented as modules, types will be +# separated into more groups, etc. +# The default value is: NO. + +OPTIMIZE_OUTPUT_SLICE = NO + # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: -# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: -# Fortran. In the later case the parser tries to guess whether the code is fixed -# or free formatted code, this is the default for Fortran type files), VHDL. For -# instance to make doxygen treat .inc files as Fortran files (default is PHP), -# and .f files as C (default is Fortran), use: inc=Fortran f=C. +# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, +# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, +# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser +# tries to guess whether the code is fixed or free formatted code, this is the +# default for Fortran type files). For instance to make doxygen treat .inc files +# as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. +# the files are not read by doxygen. When specifying no_extension you should add +# * to the FILE_PATTERNS. +# +# Note see also the list of default file extension mappings. EXTENSION_MAPPING = # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. +# documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. @@ -293,6 +330,15 @@ EXTENSION_MAPPING = MARKDOWN_SUPPORT = YES +# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up +# to that level are automatically included in the table of contents, even if +# they do not have an id attribute. +# Note: This feature currently applies only to Markdown headings. +# Minimum value: 0, maximum value: 99, default value: 5. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +TOC_INCLUDE_HEADINGS = 5 + # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or @@ -318,7 +364,7 @@ BUILTIN_STL_SUPPORT = NO CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen +# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. @@ -404,6 +450,19 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 +# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# during processing. When set to 0 doxygen will based this on the number of +# cores available in the system. You can set it explicitly to a value larger +# than 0 to get more control over the balance between CPU load and processing +# speed. At this moment only the input processing can be done using multiple +# threads. Since this is still an experimental feature the default is set to 1, +# which efficively disables parallel processing. Please report any issues you +# encounter. Generating dot graphs in parallel is controlled by the +# DOT_NUM_THREADS setting. +# Minimum value: 0, maximum value: 32, default value: 1. + +NUM_PROC_THREADS = 1 + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -424,6 +483,12 @@ EXTRACT_ALL = YES EXTRACT_PRIVATE = NO +# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual +# methods of a class will be included in the documentation. +# The default value is: NO. + +EXTRACT_PRIV_VIRTUAL = NO + # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. @@ -461,6 +526,13 @@ EXTRACT_LOCAL_METHODS = NO EXTRACT_ANON_NSPACES = NO +# If this flag is set to YES, the name of an unnamed parameter in a declaration +# will be determined by the corresponding definition. By default unnamed +# parameters remain unnamed in the output. +# The default value is: YES. + +RESOLVE_UNNAMED_PARAMS = YES + # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation @@ -478,8 +550,8 @@ HIDE_UNDOC_MEMBERS = NO HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. +# declarations. If set to NO, these declarations will be included in the +# documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO @@ -498,11 +570,18 @@ HIDE_IN_BODY_DOCS = NO INTERNAL_DOCS = NO -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. +# With the correct setting of option CASE_SENSE_NAMES doxygen will better be +# able to match the capabilities of the underlying filesystem. In case the +# filesystem is case sensitive (i.e. it supports files in the same directory +# whose names only differ in casing), the option must be set to YES to properly +# deal with such files in case they appear in the input. For filesystems that +# are not case sensitive the option should be be set to NO to properly deal with +# output files written for symbols that only differ in casing, such as for two +# classes, one named CLASS and the other named Class, and to also support +# references to files without having to specify the exact matching casing. On +# Windows (including Cygwin) and MacOS, users should typically set this option +# to NO, whereas on Linux or other Unix flavors it should typically be set to +# YES. # The default value is: system dependent. CASE_SENSE_NAMES = YES @@ -689,7 +768,7 @@ LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. +# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. @@ -734,13 +813,17 @@ WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. +# parameter documentation, but not about the absence of documentation. If +# EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. +# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS +# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but +# at the end of the doxygen process doxygen will return with a non-zero status. +# Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = NO @@ -771,16 +854,16 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ${CMAKE_SOURCE_DIR}/include \ - ${CMAKE_SOURCE_DIR}/src \ - ${CMAKE_SOURCE_DIR}/examples \ - ${CMAKE_SOURCE_DIR}/README.md +INPUT = ${CMAKE_SOURCE_DIR}/include \ + ${CMAKE_SOURCE_DIR}/src \ + ${CMAKE_SOURCE_DIR}/examples \ + ${CMAKE_SOURCE_DIR}/README.md # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. +# documentation (see: +# https://www.gnu.org/software/libiconv/) for the list of possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 @@ -793,11 +876,15 @@ INPUT_ENCODING = UTF-8 # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # +# Note the list of default checked file patterns might differ from the list of +# default file extension mappings. +# # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f, *.for, *.tcl, -# *.vhd, *.vhdl, *.ucf, *.qsf, *.as and *.js. +# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), +# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, +# *.ucf, *.qsf and *.ice. FILE_PATTERNS = @@ -841,7 +928,7 @@ EXCLUDE_PATTERNS = # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = common_* +EXCLUDE_SYMBOLS = common_* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -952,7 +1039,7 @@ INLINE_SOURCES = NO STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. +# entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO @@ -984,12 +1071,12 @@ SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version +# (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file +# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # @@ -1011,6 +1098,44 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES +# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the +# clang parser (see: +# http://clang.llvm.org/) for more accurate parsing at the cost of reduced +# performance. This can be particularly helpful with template rich C++ code for +# which doxygen's built-in parser lacks the necessary type information. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. +# The default value is: NO. + +CLANG_ASSISTED_PARSING = NO + +# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to +# YES then doxygen will add the directory of each input to the include path. +# The default value is: YES. + +CLANG_ADD_INC_PATHS = YES + +# If clang assisted parsing is enabled you can provide the compiler with command +# line options that you would normally use when invoking the compiler. Note that +# the include paths will already be set by doxygen for the files and directories +# specified with INPUT and INCLUDE_PATH. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. + +CLANG_OPTIONS = + +# If clang assisted parsing is enabled you can provide the clang parser with the +# path to the directory containing a file called compile_commands.json. This +# file is the compilation database (see: +# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the +# options used when the source files were built. This is equivalent to +# specifying the -p option to a clang tool, such as clang-check. These options +# will then be passed to the parser. Any options specified with CLANG_OPTIONS +# will be added as well. +# Note: The availability of this option depends on whether or not doxygen was +# generated with the -Duse_libclang=ON option for CMake. + +CLANG_DATABASE_PATH = + #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- @@ -1022,13 +1147,6 @@ VERBATIM_HEADERS = YES ALPHABETICAL_INDEX = YES -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored @@ -1129,7 +1247,7 @@ HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see -# http://en.wikipedia.org/wiki/Hue for more information. For instance the value +# https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. @@ -1165,6 +1283,17 @@ HTML_COLORSTYLE_GAMMA = 80 HTML_TIMESTAMP = NO +# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML +# documentation will contain a main index with vertical navigation menus that +# are dynamically created via JavaScript. If disabled, the navigation index will +# consists of multiple levels of tabs that are statically embedded in every HTML +# page. Disable this option to support browsers that do not have JavaScript, +# like the Qt help browser. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_DYNAMIC_MENUS = YES + # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. @@ -1188,13 +1317,14 @@ HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in +# environment (see: +# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To +# create a documentation set, doxygen will generate a Makefile in the HTML +# output directory. Running make will produce the docset in that directory and +# running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. +# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy +# genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1233,8 +1363,8 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. +# (see: +# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1264,7 +1394,7 @@ CHM_FILE = HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). +# (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. @@ -1309,7 +1439,8 @@ QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). +# (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1317,8 +1448,8 @@ QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). +# Folders (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. @@ -1326,30 +1457,30 @@ QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). +# Filters (see: +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). +# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. +# The QHG_LOCATION tag can be used to specify the location (absolute path +# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to +# run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = @@ -1426,6 +1557,17 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg +# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see +# https://inkscape.org) to generate formulas as SVG images instead of PNGs for +# the HTML output. These images will generally look nicer at scaled resolutions. +# Possible values are: png (the default) and svg (looks nicer but requires the +# pdf2svg or inkscape tool). +# The default value is: png. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_FORMULA_FORMAT = png + # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML @@ -1435,7 +1577,7 @@ EXT_LINKS_IN_WINDOW = NO FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # @@ -1446,8 +1588,14 @@ FORMULA_FONTSIZE = 10 FORMULA_TRANSPARENT = YES +# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands +# to create new LaTeX commands to be used in formulas as building blocks. See +# the section "Including formulas" for details. + +FORMULA_MACROFILE = + # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering +# https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path @@ -1459,7 +1607,7 @@ USE_MATHJAX = YES # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. @@ -1474,8 +1622,8 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. +# MathJax from https://www.mathjax.org before deployment. +# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest @@ -1489,7 +1637,8 @@ MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an +# (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1517,7 +1666,7 @@ MATHJAX_CODEFILE = SEARCHENGINE = YES # When the SERVER_BASED_SEARCH tag is enabled the search engine will be -# implemented using a web server instead of a web client using Javascript. There +# implemented using a web server instead of a web client using JavaScript. There # are two flavors of web server based searching depending on the EXTERNAL_SEARCH # setting. When disabled, doxygen will generate a PHP script for searching and # an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing @@ -1536,7 +1685,8 @@ SERVER_BASED_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). +# Xapian (see: +# https://xapian.org/). # # See the section "External Indexing and Searching" for details. # The default value is: NO. @@ -1549,8 +1699,9 @@ EXTERNAL_SEARCH = NO # # Doxygen ships with an example indexer (doxyindexer) and search engine # (doxysearch.cgi) which are based on the open source search engine library -# Xapian (see: http://xapian.org/). See the section "External Indexing and -# Searching" for details. +# Xapian (see: +# https://xapian.org/). See the section "External Indexing and Searching" for +# details. # This tag requires that the tag SEARCHENGINE is set to YES. SEARCHENGINE_URL = @@ -1601,21 +1752,35 @@ LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be # invoked. # -# Note that when enabling USE_PDFLATEX this option is only used for generating -# bitmaps for formulas in the HTML output, but not in the Makefile that is -# written to the output directory. -# The default file is: latex. +# Note that when not enabling USE_PDFLATEX the default is latex when enabling +# USE_PDFLATEX the default is pdflatex and when in the later case latex is +# chosen this is overwritten by pdflatex. For specific output languages the +# default can have been set differently, this depends on the implementation of +# the output language. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate # index for LaTeX. +# Note: This tag is used in the Makefile / make.bat. +# See also: LATEX_MAKEINDEX_CMD for the part in the generated output file +# (.tex). # The default file is: makeindex. # This tag requires that the tag GENERATE_LATEX is set to YES. MAKEINDEX_CMD_NAME = makeindex +# The LATEX_MAKEINDEX_CMD tag can be used to specify the command name to +# generate index for LaTeX. In case there is no backslash (\) as first character +# it will be automatically added in the LaTeX code. +# Note: This tag is used in the generated output file (.tex). +# See also: MAKEINDEX_CMD_NAME for the part in the Makefile / make.bat. +# The default value is: makeindex. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_MAKEINDEX_CMD = makeindex + # If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX # documents. This may be useful for small projects and may help to save some # trees in general. @@ -1700,9 +1865,11 @@ LATEX_EXTRA_FILES = PDF_HYPERLINKS = YES -# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate -# the PDF file directly from the LaTeX files. Set this option to YES, to get a -# higher quality PDF documentation. +# If the USE_PDFLATEX tag is set to YES, doxygen will use the engine as +# specified with LATEX_CMD_NAME to generate the PDF file directly from the LaTeX +# files. Set this option to YES, to get a higher quality PDF documentation. +# +# See also section LATEX_CMD_NAME for selecting the engine. # The default value is: YES. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1736,7 +1903,7 @@ LATEX_SOURCE_CODE = NO # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See -# http://en.wikipedia.org/wiki/BibTeX and \cite for more info. +# https://en.wikipedia.org/wiki/BibTeX and \cite for more info. # The default value is: plain. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1750,6 +1917,14 @@ LATEX_BIB_STYLE = plain LATEX_TIMESTAMP = NO +# The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute) +# path from which the emoji images will be read. If a relative path is entered, +# it will be relative to the LATEX_OUTPUT directory. If left blank the +# LATEX_OUTPUT directory will be used. +# This tag requires that the tag GENERATE_LATEX is set to YES. + +LATEX_EMOJI_DIRECTORY = + #--------------------------------------------------------------------------- # Configuration options related to the RTF output #--------------------------------------------------------------------------- @@ -1789,9 +1964,9 @@ COMPACT_RTF = NO RTF_HYPERLINKS = NO -# Load stylesheet definitions from file. Syntax is similar to doxygen's config -# file, i.e. a series of assignments. You only have to provide replacements, -# missing definitions are set to their default value. +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# configuration file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. # # See also section "Doxygen usage" for information on how to generate the # default style sheet that doxygen normally uses. @@ -1800,8 +1975,8 @@ RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an RTF document. Syntax is -# similar to doxygen's config file. A template extensions file can be generated -# using doxygen -e rtf extensionFile. +# similar to doxygen's configuration file. A template extensions file can be +# generated using doxygen -e rtf extensionFile. # This tag requires that the tag GENERATE_RTF is set to YES. RTF_EXTENSIONS_FILE = @@ -1887,6 +2062,13 @@ XML_OUTPUT = xml XML_PROGRAMLISTING = YES +# If the XML_NS_MEMB_FILE_SCOPE tag is set to YES, doxygen will include +# namespace members in file scope as well, matching the HTML output. +# The default value is: NO. +# This tag requires that the tag GENERATE_XML is set to YES. + +XML_NS_MEMB_FILE_SCOPE = NO + #--------------------------------------------------------------------------- # Configuration options related to the DOCBOOK output #--------------------------------------------------------------------------- @@ -1919,9 +2101,9 @@ DOCBOOK_PROGRAMLISTING = NO #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an -# AutoGen Definitions (see http://autogen.sf.net) file that captures the -# structure of the code including all documentation. Note that this feature is -# still experimental and incomplete at the moment. +# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures +# the structure of the code including all documentation. Note that this feature +# is still experimental and incomplete at the moment. # The default value is: NO. GENERATE_AUTOGEN_DEF = NO @@ -2088,12 +2270,6 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl - #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- @@ -2107,15 +2283,6 @@ PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = YES -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2134,7 +2301,7 @@ HIDE_UNDOC_RELATIONS = YES # http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent # Bell Labs. The other options in this section have no effect if this option is # set to NO -# The default value is: NO. +# The default value is: YES. HAVE_DOT = NO @@ -2213,10 +2380,32 @@ UML_LOOK = NO # but if the number exceeds 15, the total amount of fields shown is limited to # 10. # Minimum value: 0, maximum value: 100, default value: 10. -# This tag requires that the tag HAVE_DOT is set to YES. +# This tag requires that the tag UML_LOOK is set to YES. UML_LIMIT_NUM_FIELDS = 10 +# If the DOT_UML_DETAILS tag is set to NO, doxygen will show attributes and +# methods without types and arguments in the UML graphs. If the DOT_UML_DETAILS +# tag is set to YES, doxygen will add type and arguments for attributes and +# methods in the UML graphs. If the DOT_UML_DETAILS tag is set to NONE, doxygen +# will not generate fields with class member information in the UML graphs. The +# class diagrams will look similar to the default class diagrams but using UML +# notation for the relationships. +# Possible values are: NO, YES and NONE. +# The default value is: NO. +# This tag requires that the tag UML_LOOK is set to YES. + +DOT_UML_DETAILS = NO + +# The DOT_WRAP_THRESHOLD tag can be used to set the maximum number of characters +# to display on a single line. If the actual line length exceeds this threshold +# significantly it will wrapped across multiple lines. Some heuristics are apply +# to avoid ugly line breaks. +# Minimum value: 0, maximum value: 1000, default value: 17. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_WRAP_THRESHOLD = 17 + # If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and # collaboration graphs will show the relations between templates and their # instances. @@ -2290,7 +2479,9 @@ DIRECTORY_GRAPH = YES # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). -# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo, +# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, +# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, +# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and # png:gdiplus:gdiplus. # The default value is: png. @@ -2343,6 +2534,11 @@ DIAFILE_DIRS = PLANTUML_JAR_PATH = +# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a +# configuration file for plantuml. + +PLANTUML_CFG_FILE = + # When using plantuml, the specified paths are searched for files specified by # the !include statement in a plantuml block. @@ -2401,9 +2597,11 @@ DOT_MULTI_TARGETS = NO GENERATE_LEGEND = YES -# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot +# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. +# +# Note: This setting is not only used for dot files but also for msc and +# plantuml temporary files. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. DOT_CLEANUP = YES From 80eedee0fdc2a9161d15f6ad6cf816799f5d7bb4 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 11:54:48 +0000 Subject: [PATCH 039/113] Fix spelling --- include/power.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/power.hpp b/include/power.hpp index 0163a5e1..71edd425 100644 --- a/include/power.hpp +++ b/include/power.hpp @@ -21,7 +21,7 @@ * time to a O(log n) long sequence of multiplication. Furthermore such * expression not only works for numbers for for any type where there is a * neutral element and an associative (non necessarily commutative) product, - * namely what mathematician calls a \e monoid. This include for example, + * namely what mathematicians call \e monoids. These include for example, * strings where the neutral element is the empty string and the product is * the concatenation. * From 27ab45e81f9efc70dadd087cc315b8d9b28c350f Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 11:55:25 +0000 Subject: [PATCH 040/113] Doc improvements --- include/epu.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index 5ea6ab72..8ea52292 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -249,7 +249,8 @@ constexpr uint64_t prime = 0x9e3779b97f4a7bb9; /** A random #HPCombi::epu8 * @details - * @param bnd : the upper bound for the value of the entries + * @param bnd : the upper bound for the value of the entries. + * \c bnd must verify @f$ 0 < bnd \leq 256 @f$. This is not checked. * @returns a random #HPCombi::epu8 with value in the interval * @f$[0, 1, 2, ..., bnd-1]@f$. */ @@ -259,7 +260,8 @@ inline epu8 random_epu8(uint16_t bnd); * @details * @param a: supposed to be sorted * @param repl: the value replacing the duplicate entries (default to 0) - * @return a where repeated occurences of entries are replaced by \c repl + * @return the vector \c a where repeated occurences of entries are replaced + * by \c repl */ inline epu8 remove_dups(epu8 a, uint8_t repl = 0); From c904286e6fc28cc9d7b1728ae2474dc70aafd7a2 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 11:55:46 +0000 Subject: [PATCH 041/113] Tests improvements (TEST_AGREES_EPU8_FUN) --- tests/test_epu.cpp | 94 ++++++++++++++------------------------------- tests/test_main.hpp | 7 ++++ 2 files changed, 35 insertions(+), 66 deletions(-) diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index f97a283d..fc0c634b 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -442,21 +442,12 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_CASE_METHOD(Fix, "Epu8::partial_sum_gen", "[Epu8][030]") { - for (auto x : v) { - REQUIRE_THAT(partial_sums_gen(x), Equals(partial_sums_ref(x))); - } -} -TEST_CASE_METHOD(Fix, "Epu8::partial_sum_round", "[Epu8][031]") { - for (auto x : v) { - REQUIRE_THAT(partial_sums_round(x), Equals(partial_sums_ref(x))); - } -} -TEST_CASE_METHOD(Fix, "Epu8::partial_sum", "[Epu8][032]") { - for (auto x : v) { - REQUIRE_THAT(partial_sums(x), Equals(partial_sums_ref(x))); - } -} +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_sums_ref, partial_sums_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_sums_ref, partial_sums, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { REQUIRE(horiz_max_ref(zero) == 0); @@ -497,24 +488,12 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { REQUIRE_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); REQUIRE_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } - -TEST_CASE_METHOD(Fix, "Epu8::partial_max_gen", "[Epu8][039]") { - for (auto x : v) { - REQUIRE_THAT(partial_max_gen(x), Equals(partial_max_ref(x))); - } -} - -TEST_CASE_METHOD(Fix, "Epu8::partial_max_round", "[Epu8][040]") { - for (auto x : v) { - REQUIRE_THAT(partial_max_round(x), Equals(partial_max_ref(x))); - } -} - -TEST_CASE_METHOD(Fix, "Epu8::partial_max", "[Epu8][041]") { - for (auto x : v) { - REQUIRE_THAT(partial_max(x), Equals(partial_max_ref(x))); - } -} +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_max_ref, partial_max_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_max_ref, partial_max_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { REQUIRE(horiz_min_ref(zero) == 0); @@ -549,40 +528,19 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { REQUIRE_THAT(partial_min_ref(Pa1), Equals(Epu8({4, 2, 2}, 1))); REQUIRE_THAT(partial_min_ref(Pa2), Equals(Epu8({4, 2, 2}, 1))); REQUIRE_THAT(partial_min_ref(P51), Equals(Epu8({5}, 1))); - REQUIRE_THAT(partial_min_ref(Pv), Equals(Epu8( - { - 5, - 5, - 2, - 2, - 1, - 1, - 1, - 1, - }, - 0))); + REQUIRE_THAT(partial_min_ref(Pv), // clang-format off + Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); + // clang-format on REQUIRE_THAT(partial_min_ref(P5), Equals(P5)); REQUIRE_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); REQUIRE_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } - -TEST_CASE_METHOD(Fix, "Epu8::partial_min_gen", "[Epu8][044]") { - for (auto x : v) { - REQUIRE_THAT(partial_min_gen(x), Equals(partial_min_ref(x))); - } -} - -TEST_CASE_METHOD(Fix, "Epu8::partial_min_round", "[Epu8][045]") { - for (auto x : v) { - REQUIRE_THAT(partial_min_round(x), Equals(partial_min_ref(x))); - } -} - -TEST_CASE_METHOD(Fix, "Epu8::partial_min", "[Epu8][046]") { - for (auto x : v) { - REQUIRE_THAT(partial_min(x), Equals(partial_min_ref(x))); - } -} +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_min_ref, partial_min_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_min_ref, partial_min_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN( + Fix, Epu8, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { REQUIRE_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); @@ -628,9 +586,13 @@ TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { } TEST_CASE("random_epu8", "[Epu8][050]") { - for (int i = 0; i < 10; i++) { - epu8 r = random_epu8(255); - REQUIRE_THAT(r, Equals(r)); + for (int bnd : {1, 10, 100, 255, 256}) { + for (int i = 0; i < 10; i++) { + epu8 r = random_epu8(bnd); + REQUIRE_THAT(r, Equals(r)); + for (auto v : as_array(r)) + REQUIRE(v < bnd); + } } } diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 58dc8800..2acf32a7 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -60,6 +60,13 @@ } \ } +#define TEST_AGREES_EPU8_FUN(fixture, type, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ + for (auto p : vct) { \ + REQUIRE_THAT(fun(p), Equals(ref(p))); \ + } \ + } + struct Equals : Catch::Matchers::MatcherGenericBase { Equals(HPCombi::epu8 v) : v(v) {} From 5a07f2f0e6657327eeeeee8a524ae171bf422fa9 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 12:43:48 +0000 Subject: [PATCH 042/113] Formatting --- include/epu.hpp | 5 ++--- include/perm16.hpp | 4 ++-- include/perm16_impl.hpp | 2 +- tests/test_epu.cpp | 34 ++++++++++++++++------------------ 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/include/epu.hpp b/include/epu.hpp index 8ea52292..8e9fb289 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -17,21 +17,20 @@ #define HPCOMBI_EPU_HPP_INCLUDED #include -#include #include #include #include // less<>, equal_to<> #include #include +#include #include -#include // integer_sequences +#include // integer_sequences #include "vect_generic.hpp" #include "simde/x86/sse4.1.h" #include "simde/x86/sse4.2.h" - namespace HPCombi { /// Unsigned 8 bits int constant. diff --git a/include/perm16.hpp b/include/perm16.hpp index 8178a157..6a792619 100644 --- a/include/perm16.hpp +++ b/include/perm16.hpp @@ -77,9 +77,9 @@ struct alignas(16) PTransf16 : public Vect16 { epu8 image_mask_ref(bool complement = false) const; epu8 image_mask(bool complement = false) const { #ifdef SIMDE_X86_SSE4_2_NATIVE - return image_mask_cmpestrm(complement); + return image_mask_cmpestrm(complement); #else - return image_mask_ref(complement); + return image_mask_ref(complement); #endif } /** Returns a bit mask for the image of \c *this */ diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 78eaefe9..f157a32e 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -63,7 +63,7 @@ inline epu8 PTransf16::image_mask_ref(bool complement) const { epu8 res{}; for (auto x : *this) if (x != 0xFF) - res[x] = 0xFF; + res[x] = 0xFF; return complement ? static_cast(!res) : res; } diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index fc0c634b..d4af52b4 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -442,12 +442,12 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_sums_ref, partial_sums_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_sums_ref, partial_sums, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums_gen, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums_round, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums, v, + "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { REQUIRE(horiz_max_ref(zero) == 0); @@ -488,12 +488,11 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { REQUIRE_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); REQUIRE_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_max_ref, partial_max_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_max_ref, partial_max_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_max_ref, partial_max, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_gen, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_round, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { REQUIRE(horiz_min_ref(zero) == 0); @@ -535,12 +534,11 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { REQUIRE_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); REQUIRE_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_min_ref, partial_min_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_min_ref, partial_min_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN( - Fix, Epu8, partial_min_ref, partial_min, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_gen, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_round, v, + "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { REQUIRE_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); From 0f76708ae16f0f0b8b92882d4d001cac5eacb6c8 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 13:02:54 +0000 Subject: [PATCH 043/113] Add missing include for sstream --- include/epu_impl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index b25ac9b5..136915c6 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "vect_generic.hpp" From 967de58503ff5ff7d7d150b164015f63ea9673e7 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 13:09:19 +0000 Subject: [PATCH 044/113] Remove redundant cmake stuff --- CMakeLists.txt | 55 +++----------------------------------------------- 1 file changed, 3 insertions(+), 52 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd911e33..1d3f535e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ #****************************************************************************# -# Copyright (C) 2016 Florent Hivert , # +# Copyright (C) 2016-2023 Florent Hivert , # # # # Distributed under the terms of the GNU General Public License (GPL) # # # @@ -10,7 +10,7 @@ # # # The full text of the GPL is available at: # # # -# http://www.gnu.org/licenses/ # +# http://www.gnu.org/licenses/ # #****************************************************************************# # Require at least 3.8 so that we can set cmake_policy CMP0067 below @@ -32,7 +32,6 @@ message(STATUS "**** Build type = ${CMAKE_BUILD_TYPE}") # set(CMAKE_BUILD_TYPE Debug) # set(CMAKE_VERBOSE_MAKEFILE 1) - ################################ # General compiler configuration set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -50,56 +49,11 @@ include(CheckIncludeFileCXX) include(CheckCXXSourceCompiles) include(CheckCXXCompilerFlag) -## Check for buggy constexpr support G++ 5.0 -# check_cxx_source_compiles( -# " -# using T = int; -# constexpr int exec(T f()) { return f(); } -# constexpr int foo() { return 1; } -# static_assert(exec(foo) == 1, \"Failed exec\"); -# int main() {} -# " -# HPCOMBI_CONSTEXPR_FUN_ARGS) - check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_ARCH_NATIVE) check_cxx_compiler_flag('-mtune=native' HPCOMBI_HAVE_FLAG_TUNE_NATIVE) # TODO check for -funroll-loops + -flax-vector-constexpr # TODO only check for and set the flags required to make HPCombi work - -#check_cxx_compiler_flag('-mavx2' HPCOMBI_HAVE_FLAG_AVX2) -#check_cxx_compiler_flag('-mavx512bw' HPCOMBI_HAVE_FLAG_AVX512BW) - -## Check for Intel intrisics -# check_include_file_cxx("x86intrin.h" HPCOMBI_HAVE_X86INTRIN) -# if (NOT ${HPCOMBI_HAVE_FLAG_AVX} OR NOT ${HPCOMBI_HAVE_X86INTRIN}) -# message(FATAL_ERROR "No SSE/AVX compiler intrinsics") -# endif() -# file(READ ${CMAKE_SOURCE_DIR}/list_intrin.txt hpcombi_intrinsics) -# string(REPLACE ";" "|" hpcombi_intrinsics "${hpcombi_intrinsics}") -# string(REPLACE "\n" ";" hpcombi_intrinsics "${hpcombi_intrinsics}") -# foreach (intrin ${hpcombi_intrinsics}) -# if ("${intrin}" MATCHES "^#" ) # Comment -# continue() -# endif() -# string(REPLACE "|" ";" intrin "${intrin}") -# list(GET intrin 0 intrin_name) -# list(GET intrin 1 intrin_params) -# set(CMAKE_REQUIRED_FLAGS "-mavx") -# check_cxx_source_compiles(" -# #include -# int main() { -# ${intrin_name}(${intrin_params}); -# return 0; -# } -# " -# "HPCOMBI_HAVE_${intrin_name}" -# ) -# if (NOT "${HPCOMBI_HAVE_${intrin_name}}") -# message(FATAL_ERROR "Intrinsic ${intrin_name} not supported by compiler") -# endif() -# endforeach() - add_compile_options(-funroll-loops -flax-vector-conversions) if (HPCOMBI_HAVE_FLAG_ARCH_NATIVE) add_compile_options(-march=native) @@ -130,10 +84,8 @@ IF (BUILD_TESTING) ENDIF(BUILD_TESTING) ##################### -# Is this needed ? -configure_file(${CMAKE_SOURCE_DIR}/.VERSION.in ${CMAKE_BINARY_DIR}/.VERSION) -set(AUTOGENERATED_WARNING "WARNING: THIS IS A CMAKE AUTO-GENERATED FILE.") +configure_file(${CMAKE_SOURCE_DIR}/.VERSION.in ${CMAKE_BINARY_DIR}/.VERSION) #################### # Installation @@ -156,7 +108,6 @@ install ( # configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hpcombi.pc.in # ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc @ONLY) - ################# # Packing stuff # From fa8088e5154d93a940b2a077dd005f5b42b6ffa8 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 13:10:11 +0000 Subject: [PATCH 045/113] REQUIRE -> CHECK --- tests/test_bmat8.cpp | 190 ++++++------ tests/test_epu.cpp | 628 ++++++++++++++++++++-------------------- tests/test_main.hpp | 12 +- tests/test_perm16.cpp | 386 ++++++++++++------------ tests/test_perm_all.cpp | 224 +++++++------- 5 files changed, 720 insertions(+), 720 deletions(-) diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index 9e04f06d..4f424d51 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -80,8 +80,8 @@ struct BMat8Fixture { //****************************************************************************// TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { - REQUIRE(BMat8::one(0) == zero); - REQUIRE(BMat8::one(2) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, + CHECK(BMat8::one(0) == zero); + CHECK(BMat8::one(2) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, @@ -89,7 +89,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}})); - REQUIRE(BMat8::one(5) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, + CHECK(BMat8::one(5) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, @@ -97,17 +97,17 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}})); - REQUIRE(BMat8::one(8) == BMat8::one()); + CHECK(BMat8::one(8) == BMat8::one()); } TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose", "[BMat8][001]") { - REQUIRE(zero.transpose() == zero); - REQUIRE(bm2.transpose() == bm2t); - REQUIRE(bm3.transpose() == bm3t); + CHECK(zero.transpose() == zero); + CHECK(bm2.transpose() == bm2t); + CHECK(bm3.transpose() == bm3t); for (auto m : BMlist) { - REQUIRE(m.transpose().transpose() == m); + CHECK(m.transpose().transpose() == m); } } @@ -122,29 +122,29 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][004]") { for (auto b : BMlist) { BMat8 at = a, bt = b; BMat8::transpose2(at, bt); - REQUIRE(at == a.transpose()); - REQUIRE(bt == b.transpose()); + CHECK(at == a.transpose()); + CHECK(bt == b.transpose()); } } } TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator*", "[BMat8][005]") { BMat8 tmp = bm * bm1; - REQUIRE(tmp == bmm1); - REQUIRE(tmp == bm * bm1); + CHECK(tmp == bmm1); + CHECK(tmp == bm * bm1); for (auto b : BMlist) { - REQUIRE(zero * b == zero); - REQUIRE(b * zero == zero); - REQUIRE(b * b.one() == b); - REQUIRE(b.one() * b == b); - REQUIRE((b * b) * (b * b) == b * b * b * b); + CHECK(zero * b == zero); + CHECK(b * zero == zero); + CHECK(b * b.one() == b); + CHECK(b.one() * b == b); + CHECK((b * b) * (b * b) == b * b * b * b); } for (auto b1 : BMlist) { for (auto b2 : BMlist) { for (auto b3 : BMlist) { - REQUIRE((b1 * b2) * b3 == b1 * (b2 * b3)); + CHECK((b1 * b2) * b3 == b1 * (b2 * b3)); } } } @@ -155,8 +155,8 @@ TEST_CASE("BMat8::random", "[BMat8][006]") { BMat8 bm = BMat8::random(d); for (size_t i = d + 1; i < 8; ++i) { for (size_t j = 0; j < 8; ++j) { - REQUIRE(bm(i, j) == 0); - REQUIRE(bm(j, i) == 0); + CHECK(bm(i, j) == 0); + CHECK(bm(j, i) == 0); } } } @@ -170,7 +170,7 @@ TEST_CASE("BMat8::operator()", "[BMat8][007]") { BMat8 bm(mat); for (size_t i = 0; i < 7; ++i) { for (size_t j = 0; j < 7; ++j) { - REQUIRE(static_cast(bm(i, j)) == mat[i][j]); + CHECK(static_cast(bm(i, j)) == mat[i][j]); } } } @@ -178,7 +178,7 @@ TEST_CASE("BMat8::operator()", "[BMat8][007]") { TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][008]") { std::ostringstream oss; oss << bm3; - REQUIRE(oss.str() == "00010011\n" + CHECK(oss.str() == "00010011\n" "11111101\n" "01111101\n" "11011111\n" @@ -196,24 +196,24 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::set", "[BMat8][009]") { BMat8 bs; bs = bm; bs.set(0, 0, 1); - REQUIRE(bs != bm); + CHECK(bs != bm); bs = bm; bs.set(0, 0, 0); - REQUIRE(bs == bm); + CHECK(bs == bm); bs = bm; bs.set(2, 4, 1); - REQUIRE(bs != bm); - REQUIRE(bs == bm3); + CHECK(bs != bm); + CHECK(bs == bm3); for (size_t i = 0; i < 8; ++i) for (size_t j = 0; j < 8; ++j) bs.set(i, j, true); - REQUIRE(bs == ones); + CHECK(bs == ones); for (size_t i = 0; i < 8; ++i) for (size_t j = 0; j < 8; ++j) bs.set(i, j, false); - REQUIRE(bs == zero); + CHECK(bs == zero); } TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") { @@ -235,7 +235,7 @@ TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") { {0, 0, 1, 0, 0, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 1}}); - REQUIRE(bm.row_space_basis() == bm2.row_space_basis()); + CHECK(bm.row_space_basis() == bm2.row_space_basis()); BMat8 bm3({{1, 1, 1, 1, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 0, 1}, @@ -255,21 +255,21 @@ TEST_CASE("BMat8::row_space_basis", "[BMat8][010]") { {0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0}}); - REQUIRE(bm3.row_space_basis() == bm4); - REQUIRE(bm4.row_space_basis() == bm4); + CHECK(bm3.row_space_basis() == bm4); + CHECK(bm4.row_space_basis() == bm4); BMat8 bm5(0xff00000000000000); uint64_t data = 0xffffffffffffffff; for (size_t i = 0; i < 7; ++i) { - REQUIRE(BMat8(data).row_space_basis() == bm5); + CHECK(BMat8(data).row_space_basis() == bm5); data = data >> 8; } for (size_t i = 0; i < 1000; ++i) { bm = BMat8::random(); - REQUIRE(bm.row_space_basis().row_space_basis() == bm.row_space_basis()); + CHECK(bm.row_space_basis().row_space_basis() == bm.row_space_basis()); } } @@ -292,7 +292,7 @@ TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") { {1, 0, 1, 0, 0, 0, 0, 1}, {0, 0, 1, 1, 1, 0, 1, 1}}); - REQUIRE(bm.col_space_basis() == bm2); + CHECK(bm.col_space_basis() == bm2); BMat8 bm3({{1, 1, 1, 1, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 0, 1}, @@ -312,7 +312,7 @@ TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") { {0, 0, 0, 0, 1, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}}); - REQUIRE(bm3.col_space_basis() == bm4); + CHECK(bm3.col_space_basis() == bm4); uint64_t col = 0x8080808080808080; BMat8 bm5(col); @@ -320,28 +320,28 @@ TEST_CASE("BMat8::col_space_basis", "[BMat8][011]") { uint64_t data = 0xffffffffffffffff; for (size_t i = 0; i < 7; ++i) { - REQUIRE(BMat8(data).col_space_basis() == bm5); + CHECK(BMat8(data).col_space_basis() == bm5); data &= ~(col >> i); } for (size_t i = 0; i < 1000; ++i) { bm = BMat8::random(); - REQUIRE(bm.col_space_basis().col_space_basis() == bm.col_space_basis()); + CHECK(bm.col_space_basis().col_space_basis() == bm.col_space_basis()); } } TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") { - REQUIRE(zero.row_space_size() == 1); - REQUIRE(one1.row_space_size() == 2); - REQUIRE(one2.row_space_size() == 4); - REQUIRE(BMat8::one().row_space_size() == 256); - REQUIRE(bm.row_space_size() == 22); - REQUIRE(bm1.row_space_size() == 31); - REQUIRE(bm2.row_space_size() == 3); - REQUIRE(bm2t.row_space_size() == 3); - REQUIRE(bm3.row_space_size() == 21); - REQUIRE(bm3t.row_space_size() == 21); - REQUIRE(bmm1.row_space_size() == 6); + CHECK(zero.row_space_size() == 1); + CHECK(one1.row_space_size() == 2); + CHECK(one2.row_space_size() == 4); + CHECK(BMat8::one().row_space_size() == 256); + CHECK(bm.row_space_size() == 22); + CHECK(bm1.row_space_size() == 31); + CHECK(bm2.row_space_size() == 3); + CHECK(bm2t.row_space_size() == 3); + CHECK(bm3.row_space_size() == 21); + CHECK(bm3t.row_space_size() == 21); + CHECK(bmm1.row_space_size() == 6); } TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size, BMlist, @@ -354,23 +354,23 @@ TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_bitset, BMlist, "[BMat8][016]"); TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { - REQUIRE(zero.row_space_included(one1)); - REQUIRE_FALSE(one1.row_space_included(zero)); + CHECK(zero.row_space_included(one1)); + CHECK_FALSE(one1.row_space_included(zero)); BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); - REQUIRE(m1.row_space_included(m2)); - REQUIRE(m2.row_space_included(m1)); + CHECK(m1.row_space_included(m2)); + CHECK(m2.row_space_included(m1)); BMat8 m3({{0, 0, 1}, {1, 0, 1}, {1, 1, 0}}); - REQUIRE(m1.row_space_included(m3)); - REQUIRE(m2.row_space_included(m3)); - REQUIRE_FALSE(m3.row_space_included(m1)); - REQUIRE_FALSE(m3.row_space_included(m1)); - - REQUIRE(m1.row_space_included(BMat8::one())); - REQUIRE(m2.row_space_included(BMat8::one())); - REQUIRE(m3.row_space_included(BMat8::one())); + CHECK(m1.row_space_included(m3)); + CHECK(m2.row_space_included(m3)); + CHECK_FALSE(m3.row_space_included(m1)); + CHECK_FALSE(m3.row_space_included(m1)); + + CHECK(m1.row_space_included(BMat8::one())); + CHECK(m2.row_space_included(BMat8::one())); + CHECK(m3.row_space_included(BMat8::one())); } TEST_AGREES2(BMat8Fixture, BMat8, row_space_included, row_space_included_ref, @@ -385,16 +385,16 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") { BMat8 b1 = BMat8::one(); auto res = BMat8::row_space_included2(a0, b0, a1, b1); - REQUIRE(res.first == a0.row_space_included(b0)); - REQUIRE(res.second == a1.row_space_included(b1)); + CHECK(res.first == a0.row_space_included(b0)); + CHECK(res.second == a1.row_space_included(b1)); for (auto a0 : BMlist) { for (auto b0 : BMlist) { for (auto a1 : BMlist) { for (auto b1 : BMlist) { auto res = BMat8::row_space_included2(a0, b0, a1, b1); - REQUIRE(res.first == a0.row_space_included(b0)); - REQUIRE(res.second == a1.row_space_included(b1)); + CHECK(res.first == a0.row_space_included(b0)); + CHECK(res.second == a1.row_space_included(b1)); } } } @@ -402,10 +402,10 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") { } TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") { - REQUIRE(bm2.row_permuted(Perm16({1, 0})) == BMat8({{0, 1}, {1, 1}})); - REQUIRE(bm2.row_permuted(Perm16({2, 1, 0})) == + CHECK(bm2.row_permuted(Perm16({1, 0})) == BMat8({{0, 1}, {1, 1}})); + CHECK(bm2.row_permuted(Perm16({2, 1, 0})) == BMat8({{0, 0, 0}, {0, 1, 0}, {1, 1, 0}})); - REQUIRE(bm.row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + CHECK(bm.row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == BMat8({{1, 1, 0, 0, 0, 0, 0, 1}, {1, 1, 0, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 1, 1, 0, 1}, @@ -414,7 +414,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") { {0, 0, 0, 1, 0, 0, 1, 1}, {0, 1, 0, 0, 0, 0, 1, 1}, {0, 1, 1, 1, 1, 0, 1, 0}})); - REQUIRE(BMat8::one().row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + CHECK(BMat8::one().row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, @@ -426,10 +426,10 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") { } TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") { - REQUIRE(bm2.col_permuted(Perm16({1, 0})) == BMat8({{1, 1}, {1, 0}})); - REQUIRE(bm2.col_permuted(Perm16({2, 1, 0})) == + CHECK(bm2.col_permuted(Perm16({1, 0})) == BMat8({{1, 1}, {1, 0}})); + CHECK(bm2.col_permuted(Perm16({2, 1, 0})) == BMat8({{0, 1, 1}, {0, 1, 0}, {0, 0, 0}})); - REQUIRE(bm.col_permuted(Perm16({5, 3, 1, 4, 2, 0})) == + CHECK(bm.col_permuted(Perm16({5, 3, 1, 4, 2, 0})) == BMat8({{0, 1, 0, 0, 0, 0, 1, 1}, {1, 1, 1, 1, 1, 1, 0, 1}, {1, 1, 1, 0, 1, 0, 0, 1}, @@ -438,7 +438,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") { {0, 0, 1, 0, 0, 1, 0, 1}, {0, 0, 1, 0, 0, 0, 1, 1}, {0, 1, 1, 1, 1, 0, 1, 0}})); - REQUIRE(BMat8::one().col_permuted(Perm16({4, 1, 3, 0, 2, 6, 5})) == + CHECK(BMat8::one().col_permuted(Perm16({4, 1, 3, 0, 2, 6, 5})) == BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, @@ -450,7 +450,7 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") { } TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { - REQUIRE(BMat8::row_permutation_matrix(Perm16({1, 0})) == + CHECK(BMat8::row_permutation_matrix(Perm16({1, 0})) == BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, @@ -459,7 +459,7 @@ TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}})); - REQUIRE(BMat8::row_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == + CHECK(BMat8::row_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, @@ -468,7 +468,7 @@ TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}})); - REQUIRE(BMat8::row_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == + CHECK(BMat8::row_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 0, 0}, @@ -480,7 +480,7 @@ TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { } TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { - REQUIRE(BMat8::col_permutation_matrix(Perm16({1, 0})) == + CHECK(BMat8::col_permutation_matrix(Perm16({1, 0})) == BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, @@ -489,7 +489,7 @@ TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}})); - REQUIRE(BMat8::col_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == + CHECK(BMat8::col_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, @@ -498,7 +498,7 @@ TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { {0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 0, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 0, 0, 1}})); - REQUIRE(BMat8::col_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == + CHECK(BMat8::col_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 1, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0}, @@ -510,43 +510,43 @@ TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { } TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") { - REQUIRE(zero.nr_rows() == 0); - REQUIRE(one1.nr_rows() == 1); - REQUIRE(one2.nr_rows() == 2); - REQUIRE(bm.nr_rows() == 8); - REQUIRE(BMat8({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2); + CHECK(zero.nr_rows() == 0); + CHECK(one1.nr_rows() == 1); + CHECK(one2.nr_rows() == 2); + CHECK(bm.nr_rows() == 8); + CHECK(BMat8({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2); } TEST_CASE("BMat8::right_perm_action_on_basis_ref", "[BMat8][026]") { BMat8 m1({{1, 1, 0}, {1, 0, 1}, {0, 0, 0}}); BMat8 m2({{0, 0, 0}, {1, 0, 1}, {1, 1, 0}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); m2 = BMat8({{1, 0, 0, 0}, {0, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16::one()); m1 = BMat8({{1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}, {0, 0, 0, 0}}); m2 = BMat8({{0, 0, 0, 0}, {1, 1, 0, 1}, {1, 0, 1, 0}, {0, 0, 0, 1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16::one()); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16::one()); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16::one()); m1 = BMat8({{0, 1, 0, 0}, {0, 0, 1, 0}, {1, 0, 0, 1}, {0, 0, 0, 0}}); m2 = BMat8({{1, 0, 0, 1}, {0, 0, 1, 0}, {0, 1, 0, 0}, {0, 0, 0, 1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16({1, 0})); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16({1, 0})); m1 = BMat8({{0, 0, 0, 1}, {1, 0, 0, 0}, {0, 0, 1, 0}, {0, 1, 0, 0}}); m2 = BMat8({{0, 1, 0, 0}, {0, 0, 1, 0}, {1, 0, 0, 0}, {0, 0, 0, 1}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({0, 2, 3, 1})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({0, 2, 3, 1})); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16({0, 2, 3, 1})); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16({0, 2, 3, 1})); m1 = BMat8({{0, 0, 0, 1}, {0, 0, 1, 0}, {0, 1, 0, 0}, {1, 0, 0, 0}}); m2 = BMat8({{0, 1, 0, 0}, {0, 0, 0, 1}, {1, 0, 0, 0}, {0, 0, 1, 0}}); - REQUIRE(m1.right_perm_action_on_basis_ref(m2) == Perm16({2, 0, 3, 1})); - REQUIRE(m1.right_perm_action_on_basis(m2) == Perm16({2, 0, 3, 1})); + CHECK(m1.right_perm_action_on_basis_ref(m2) == Perm16({2, 0, 3, 1})); + CHECK(m1.right_perm_action_on_basis(m2) == Perm16({2, 0, 3, 1})); } } // namespace HPCombi diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index d4af52b4..ef32c4a5 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -48,29 +48,29 @@ struct Fix { }; TEST_CASE_METHOD(Fix, "Epu8::first_diff_ref", "[Epu8][000]") { - REQUIRE(first_diff_ref(Pc, Pc) == 16); - REQUIRE(first_diff_ref(zero, P01) == 1); - REQUIRE(first_diff_ref(zero, P10) == 0); - REQUIRE(first_diff_ref(zero, P01, 1) == 16); - REQUIRE(first_diff_ref(zero, P01, 2) == 1); - REQUIRE(first_diff_ref(Pa1, Pa2, 2) == 16); - REQUIRE(first_diff_ref(Pa1, Pa2, 4) == 16); - REQUIRE(first_diff_ref(Pa1, Pa2, 5) == 16); - REQUIRE(first_diff_ref(Pa1, Pa2, 6) == 5); - REQUIRE(first_diff_ref(Pa1, Pa2, 7) == 5); - REQUIRE(first_diff_ref(Pa1, Pa2) == 5); - REQUIRE(first_diff(Pv, Pw) == 3); + CHECK(first_diff_ref(Pc, Pc) == 16); + CHECK(first_diff_ref(zero, P01) == 1); + CHECK(first_diff_ref(zero, P10) == 0); + CHECK(first_diff_ref(zero, P01, 1) == 16); + CHECK(first_diff_ref(zero, P01, 2) == 1); + CHECK(first_diff_ref(Pa1, Pa2, 2) == 16); + CHECK(first_diff_ref(Pa1, Pa2, 4) == 16); + CHECK(first_diff_ref(Pa1, Pa2, 5) == 16); + CHECK(first_diff_ref(Pa1, Pa2, 6) == 5); + CHECK(first_diff_ref(Pa1, Pa2, 7) == 5); + CHECK(first_diff_ref(Pa1, Pa2) == 5); + CHECK(first_diff(Pv, Pw) == 3); for (int i = 0; i < 16; i++) - REQUIRE(first_diff(Pv, Pw, i) == (i <= 3 ? 16 : 3)); + CHECK(first_diff(Pv, Pw, i) == (i <= 3 ? 16 : 3)); } #ifdef SIMDE_X86_SSE4_2_NATIVE TEST_CASE_METHOD(Fix, "Epu8::first_diff_cmpstr", "[Epu8][001]") { for (auto x : v) { for (auto y : v) { - REQUIRE(first_diff_cmpstr(x, y) == first_diff_ref(x, y)); + CHECK(first_diff_cmpstr(x, y) == first_diff_ref(x, y)); for (int i = 0; i < 17; i++) - REQUIRE(first_diff_cmpstr(x, y, i) == first_diff_ref(x, y, i)); + CHECK(first_diff_cmpstr(x, y, i) == first_diff_ref(x, y, i)); } } } @@ -78,41 +78,41 @@ TEST_CASE_METHOD(Fix, "Epu8::first_diff_cmpstr", "[Epu8][001]") { TEST_CASE_METHOD(Fix, "Epu8::first_diff_mask", "[Epu8][002]") { for (auto x : v) { for (auto y : v) { - REQUIRE(first_diff_mask(x, y) == first_diff_ref(x, y)); + CHECK(first_diff_mask(x, y) == first_diff_ref(x, y)); for (int i = 0; i < 17; i++) - REQUIRE(first_diff_mask(x, y, i) == first_diff_ref(x, y, i)); + CHECK(first_diff_mask(x, y, i) == first_diff_ref(x, y, i)); } } } TEST_CASE_METHOD(Fix, "Epu8::last_diff_ref", "[Epu8][003]") { - REQUIRE(last_diff_ref(Pc, Pc) == 16); - REQUIRE(last_diff_ref(zero, P01) == 1); - REQUIRE(last_diff_ref(zero, P10) == 0); - REQUIRE(last_diff_ref(zero, P01, 1) == 16); - REQUIRE(last_diff_ref(zero, P01, 2) == 1); - REQUIRE(last_diff_ref(P1, Pa1) == 9); - REQUIRE(last_diff_ref(P1, Pa1, 12) == 9); - REQUIRE(last_diff_ref(P1, Pa1, 9) == 8); - REQUIRE(last_diff_ref(Pa1, Pa2, 2) == 16); - REQUIRE(last_diff_ref(Pa1, Pa2, 4) == 16); - REQUIRE(last_diff_ref(Pa1, Pa2, 5) == 16); - REQUIRE(last_diff_ref(Pa1, Pa2, 6) == 5); - REQUIRE(last_diff_ref(Pa1, Pa2, 7) == 5); - REQUIRE(last_diff_ref(Pa1, Pa2) == 5); + CHECK(last_diff_ref(Pc, Pc) == 16); + CHECK(last_diff_ref(zero, P01) == 1); + CHECK(last_diff_ref(zero, P10) == 0); + CHECK(last_diff_ref(zero, P01, 1) == 16); + CHECK(last_diff_ref(zero, P01, 2) == 1); + CHECK(last_diff_ref(P1, Pa1) == 9); + CHECK(last_diff_ref(P1, Pa1, 12) == 9); + CHECK(last_diff_ref(P1, Pa1, 9) == 8); + CHECK(last_diff_ref(Pa1, Pa2, 2) == 16); + CHECK(last_diff_ref(Pa1, Pa2, 4) == 16); + CHECK(last_diff_ref(Pa1, Pa2, 5) == 16); + CHECK(last_diff_ref(Pa1, Pa2, 6) == 5); + CHECK(last_diff_ref(Pa1, Pa2, 7) == 5); + CHECK(last_diff_ref(Pa1, Pa2) == 5); const std::array res{ {16, 16, 16, 16, 3, 3, 3, 3, 3, 3, 9, 10, 11, 11, 11, 11, 11}}; for (int i = 0; i <= 16; i++) { - REQUIRE(last_diff_ref(Pv, Pw, i) == res[i]); + CHECK(last_diff_ref(Pv, Pw, i) == res[i]); } } #ifdef SIMDE_X86_SSE4_2_NATIVE TEST_CASE_METHOD(Fix, "Epu8::last_diff_cmpstr", "[Epu8][004]") { for (auto x : v) { for (auto y : v) { - REQUIRE(last_diff_cmpstr(x, y) == last_diff_ref(x, y)); + CHECK(last_diff_cmpstr(x, y) == last_diff_ref(x, y)); for (int i = 0; i < 17; i++) - REQUIRE(last_diff_cmpstr(x, y, i) == last_diff_ref(x, y, i)); + CHECK(last_diff_cmpstr(x, y, i) == last_diff_ref(x, y, i)); } } } @@ -121,25 +121,25 @@ TEST_CASE_METHOD(Fix, "Epu8::last_diff_cmpstr", "[Epu8][004]") { TEST_CASE_METHOD(Fix, "Epu8::last_diff_mask", "[Epu8][005]") { for (auto x : v) { for (auto y : v) { - REQUIRE(last_diff_mask(x, y) == last_diff_ref(x, y)); + CHECK(last_diff_mask(x, y) == last_diff_ref(x, y)); for (int i = 0; i < 17; i++) - REQUIRE(last_diff_mask(x, y, i) == last_diff_ref(x, y, i)); + CHECK(last_diff_mask(x, y, i) == last_diff_ref(x, y, i)); } } } TEST_CASE_METHOD(Fix, "Epu8::is_all_zero", "[Epu8][006]") { - REQUIRE(is_all_zero(zero)); + CHECK(is_all_zero(zero)); for (size_t i = 1; i < v.size(); i++) { - REQUIRE(!is_all_zero(v[i])); + CHECK(!is_all_zero(v[i])); } } TEST_CASE_METHOD(Fix, "Epu8::is_all_one", "[Epu8][007]") { for (size_t i = 0; i < v.size(); i++) { - REQUIRE(!is_all_one(v[i])); + CHECK(!is_all_one(v[i])); } - REQUIRE(is_all_one(Epu8(0xFF))); + CHECK(is_all_one(Epu8(0xFF))); } TEST_CASE_METHOD(Fix, "Epu8::equal", "[Epu8][008]") { @@ -148,15 +148,15 @@ TEST_CASE_METHOD(Fix, "Epu8::equal", "[Epu8][008]") { for (size_t j = 0; j < v.size(); j++) { epu8 b = v[j]; if (i == j) { - REQUIRE(equal(a, b)); - REQUIRE(!not_equal(a, b)); - REQUIRE(std::equal_to()(a, b)); - REQUIRE(!std::not_equal_to()(a, b)); + CHECK(equal(a, b)); + CHECK(!not_equal(a, b)); + CHECK(std::equal_to()(a, b)); + CHECK(!std::not_equal_to()(a, b)); } else { - REQUIRE(!equal(a, b)); - REQUIRE(not_equal(a, b)); - REQUIRE(std::not_equal_to()(a, b)); - REQUIRE(!std::equal_to()(a, b)); + CHECK(!equal(a, b)); + CHECK(not_equal(a, b)); + CHECK(std::not_equal_to()(a, b)); + CHECK(!std::equal_to()(a, b)); } } } @@ -166,9 +166,9 @@ TEST_CASE_METHOD(Fix, "Epu8::not_equal", "[Epu8][009]") { for (size_t i = 0; i < v.size(); i++) { for (size_t j = 0; j < v.size(); j++) { if (i == j) { - REQUIRE(!not_equal(v[i], v[j])); + CHECK(!not_equal(v[i], v[j])); } else { - REQUIRE(not_equal(v[i], v[j])); + CHECK(not_equal(v[i], v[j])); } } } @@ -178,48 +178,48 @@ TEST_CASE_METHOD(Fix, "Epu8::less", "[Epu8][010]") { for (size_t i = 0; i < v.size(); i++) { for (size_t j = 0; j < v.size(); j++) { if (i < j) { - REQUIRE(less(v[i], v[j])); + CHECK(less(v[i], v[j])); } else { - REQUIRE(!less(v[i], v[j])); + CHECK(!less(v[i], v[j])); } } } } TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { - REQUIRE_THAT( + CHECK_THAT( permuted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8{2, 3, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE_THAT( + CHECK_THAT( permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8{3, 2, 1, 5, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE_THAT( + CHECK_THAT( permuted(epu8{3, 2, 5, 1, 4, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { - REQUIRE_THAT(shifted_left(P01), Equals(P10)); - REQUIRE_THAT(shifted_left(P112), + CHECK_THAT(shifted_left(P01), Equals(P10)); + CHECK_THAT(shifted_left(P112), Equals(epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); - REQUIRE_THAT(shifted_left(Pv), Equals(epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, + CHECK_THAT(shifted_left(Pv), Equals(epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15, 0})); } TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][013]") { - REQUIRE_THAT(shifted_right(P10), Equals(P01)); - REQUIRE_THAT(shifted_right(P112), Equals(Epu8({0, 1, 1}, 2))); - REQUIRE_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, + CHECK_THAT(shifted_right(P10), Equals(P01)); + CHECK_THAT(shifted_right(P112), Equals(Epu8({0, 1, 1}, 2))); + CHECK_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14})); } TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][014]") { - REQUIRE_THAT(reverted(epu8id), Equals(epu8rev)); + CHECK_THAT(reverted(epu8id), Equals(epu8rev)); for (auto x : v) { - REQUIRE_THAT(x, Equals(reverted(reverted(x)))); + CHECK_THAT(x, Equals(reverted(reverted(x)))); } } @@ -227,179 +227,179 @@ TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][015]") { epu8 x = Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1); auto &refx = as_array(x); refx[2] = 42; - REQUIRE_THAT(x, Equals(Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1))); + CHECK_THAT(x, Equals(Epu8({4, 2, 42, 1, 2, 7, 7, 3, 4, 2}, 1))); std::fill(refx.begin() + 4, refx.end(), 3); - REQUIRE_THAT(x, Equals(Epu8({4, 2, 42, 1}, 3))); - REQUIRE(av == as_array(Pv)); + CHECK_THAT(x, Equals(Epu8({4, 2, 42, 1}, 3))); + CHECK(av == as_array(Pv)); } TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][016]") { for (auto x : v) { - REQUIRE_THAT(x, Equals(from_array(as_array(x)))); + CHECK_THAT(x, Equals(from_array(as_array(x)))); } - REQUIRE_THAT(Pv, Equals(from_array(av))); + CHECK_THAT(Pv, Equals(from_array(av))); } TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { - REQUIRE(is_sorted(epu8id)); - REQUIRE( + CHECK(is_sorted(epu8id)); + CHECK( is_sorted(epu8{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(is_sorted(Epu8({0, 1}, 2))); - REQUIRE(is_sorted(Epu8({0}, 1))); - REQUIRE(is_sorted(Epu8({}, 5))); - REQUIRE( + CHECK(is_sorted(Epu8({0, 1}, 2))); + CHECK(is_sorted(Epu8({0}, 1))); + CHECK(is_sorted(Epu8({}, 5))); + CHECK( !is_sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(!is_sorted(Epu8({0, 2}, 1))); - REQUIRE(!is_sorted(Epu8({0, 0, 2}, 1))); - REQUIRE(!is_sorted(Epu8({6}, 5))); + CHECK(!is_sorted(Epu8({0, 2}, 1))); + CHECK(!is_sorted(Epu8({0, 0, 2}, 1))); + CHECK(!is_sorted(Epu8({6}, 5))); epu8 x = epu8id; - REQUIRE(is_sorted(x)); + CHECK(is_sorted(x)); auto &refx = as_array(x); while (std::next_permutation(refx.begin(), refx.begin() + 9)) { - REQUIRE(!is_sorted(x)); + CHECK(!is_sorted(x)); } x = epu8id; while (std::next_permutation(refx.begin() + 8, refx.begin() + 16)) { - REQUIRE(!is_sorted(x)); + CHECK(!is_sorted(x)); } x = sorted(Pa1); - REQUIRE(is_sorted(x)); + CHECK(is_sorted(x)); while (std::next_permutation(refx.begin(), refx.begin() + 14)) { - REQUIRE(!is_sorted(x)); + CHECK(!is_sorted(x)); } } TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { - REQUIRE_THAT( + CHECK_THAT( sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8id)); for (auto &x : v) { - REQUIRE(is_sorted(sorted(x))); + CHECK(is_sorted(sorted(x))); } epu8 x = epu8id; - REQUIRE(is_sorted(x)); + CHECK(is_sorted(x)); auto &refx = as_array(x); do { - REQUIRE(is_sorted(sorted(x))); + CHECK(is_sorted(sorted(x))); } while (std::next_permutation(refx.begin(), refx.begin() + 9)); } TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { - REQUIRE_THAT( + CHECK_THAT( revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8rev)); for (auto &x : v) { - REQUIRE(is_sorted(reverted(revsorted(x)))); + CHECK(is_sorted(reverted(revsorted(x)))); } epu8 x = epu8id; - REQUIRE(is_sorted(x)); + CHECK(is_sorted(x)); auto &refx = as_array(x); do { - REQUIRE(is_sorted(reverted(revsorted(x)))); + CHECK(is_sorted(reverted(revsorted(x)))); } while (std::next_permutation(refx.begin(), refx.begin() + 9)); } TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE_THAT(sort_perm(ve), Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, + CHECK_THAT(sort_perm(ve), Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); - REQUIRE_THAT(ve, + CHECK_THAT(ve, Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - REQUIRE(is_sorted(xsort)); - REQUIRE(is_permutation(psort)); - REQUIRE_THAT(permuted(x, psort), Equals(xsort)); + CHECK(is_sorted(xsort)); + CHECK(is_permutation(psort)); + CHECK_THAT(permuted(x, psort), Equals(xsort)); } } TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; - REQUIRE_THAT(sort8_perm(ve), Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, + CHECK_THAT(sort8_perm(ve), Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); - REQUIRE_THAT(ve, + CHECK_THAT(ve, Equals(epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - REQUIRE(is_sorted(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); - REQUIRE(is_sorted(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); - REQUIRE(is_permutation(psort)); - REQUIRE_THAT(permuted(x, psort), Equals(xsort)); + CHECK(is_sorted(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); + CHECK(is_sorted(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); + CHECK(is_permutation(psort)); + CHECK_THAT(permuted(x, psort), Equals(xsort)); } } TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { - REQUIRE_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); - REQUIRE_THAT(permutation_of(Pa, Pa), Equals(epu8id)); - REQUIRE_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); - REQUIRE_THAT(permutation_of(epu8id, epu8rev), Equals(epu8rev)); - REQUIRE_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); - REQUIRE_THAT(permutation_of(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); + CHECK_THAT(permutation_of(Pa, Pa), Equals(epu8id)); + CHECK_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); + CHECK_THAT(permutation_of(epu8id, epu8rev), Equals(epu8rev)); + CHECK_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); + CHECK_THAT(permutation_of(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE_THAT((permutation_of(Pv, Pv) | + CHECK_THAT((permutation_of(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { - REQUIRE_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); - REQUIRE_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); - REQUIRE_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); - REQUIRE_THAT(permutation_of_ref(epu8id, epu8rev), Equals(epu8rev)); - REQUIRE_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); - REQUIRE_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); + CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); + CHECK_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); + CHECK_THAT(permutation_of_ref(epu8id, epu8rev), Equals(epu8rev)); + CHECK_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); + CHECK_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; - REQUIRE_THAT((permutation_of_ref(Pv, Pv) | + CHECK_THAT((permutation_of_ref(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { - REQUIRE_THAT(remove_dups(P1), Equals(P10)); - REQUIRE_THAT(remove_dups(P11), Equals(P10)); - REQUIRE_THAT(remove_dups(sorted(P10)), + CHECK_THAT(remove_dups(P1), Equals(P10)); + CHECK_THAT(remove_dups(P11), Equals(P10)); + CHECK_THAT(remove_dups(sorted(P10)), Equals(epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); - REQUIRE_THAT( + CHECK_THAT( remove_dups(sorted(Pv)), Equals(epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, 6, 11, 12, 0, 13, 14, 15})); - REQUIRE_THAT(remove_dups(P1, 1), Equals(P1)); - REQUIRE_THAT(remove_dups(P11, 1), Equals(Epu8({1, 1, 0}, 1))); - REQUIRE_THAT(remove_dups(P11, 42), Equals(Epu8({1, 42, 0}, 42))); - REQUIRE_THAT(remove_dups(sorted(P10), 1), Equals(P1)); - REQUIRE_THAT( + CHECK_THAT(remove_dups(P1, 1), Equals(P1)); + CHECK_THAT(remove_dups(P11, 1), Equals(Epu8({1, 1, 0}, 1))); + CHECK_THAT(remove_dups(P11, 42), Equals(Epu8({1, 42, 0}, 42))); + CHECK_THAT(remove_dups(sorted(P10), 1), Equals(P1)); + CHECK_THAT( remove_dups(sorted(Pv), 7), Equals(epu8{7, 1, 2, 7, 3, 4, 5, 7, 7, 6, 11, 12, 7, 13, 14, 15})); for (auto x : v) { x = sorted(remove_dups(sorted(x))); - REQUIRE_THAT(x, Equals(sorted(remove_dups(x)))); + CHECK_THAT(x, Equals(sorted(remove_dups(x)))); } for (auto x : v) { x = sorted(remove_dups(sorted(x), 42)); - REQUIRE_THAT(x, Equals(sorted(remove_dups(x, 42)))); + CHECK_THAT(x, Equals(sorted(remove_dups(x, 42)))); } } TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { - REQUIRE(horiz_sum_ref(zero) == 0); - REQUIRE(horiz_sum_ref(P01) == 1); - REQUIRE(horiz_sum_ref(epu8id) == 120); - REQUIRE(horiz_sum_ref(P10) == 1); - REQUIRE(horiz_sum_ref(P11) == 2); - REQUIRE(horiz_sum_ref(P1) == 16); - REQUIRE(horiz_sum_ref(P112) == 30); - REQUIRE(horiz_sum_ref(Pa1) == 43); - REQUIRE(horiz_sum_ref(Pa2) == 45); - REQUIRE(horiz_sum_ref(P51) == 90); - REQUIRE(horiz_sum_ref(Pv) == 110); - REQUIRE(horiz_sum_ref(P5) == 80); - REQUIRE(horiz_sum_ref(epu8rev) == 120); - REQUIRE(horiz_sum_ref(Pc) == 203); + CHECK(horiz_sum_ref(zero) == 0); + CHECK(horiz_sum_ref(P01) == 1); + CHECK(horiz_sum_ref(epu8id) == 120); + CHECK(horiz_sum_ref(P10) == 1); + CHECK(horiz_sum_ref(P11) == 2); + CHECK(horiz_sum_ref(P1) == 16); + CHECK(horiz_sum_ref(P112) == 30); + CHECK(horiz_sum_ref(Pa1) == 43); + CHECK(horiz_sum_ref(Pa2) == 45); + CHECK(horiz_sum_ref(P51) == 90); + CHECK(horiz_sum_ref(Pv) == 110); + CHECK(horiz_sum_ref(P5) == 80); + CHECK(horiz_sum_ref(epu8rev) == 120); + CHECK(horiz_sum_ref(Pc) == 203); } TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") @@ -408,37 +408,37 @@ TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { - REQUIRE_THAT(partial_sums_ref(zero), Equals(zero)); - REQUIRE_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); - REQUIRE_THAT(partial_sums_ref(epu8id), + CHECK_THAT(partial_sums_ref(zero), Equals(zero)); + CHECK_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); + CHECK_THAT(partial_sums_ref(epu8id), Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120})); - REQUIRE_THAT(partial_sums_ref(P10), Equals(P1)); - REQUIRE_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); - REQUIRE_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); - REQUIRE_THAT(partial_sums_ref(P112), + CHECK_THAT(partial_sums_ref(P10), Equals(P1)); + CHECK_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); + CHECK_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); + CHECK_THAT(partial_sums_ref(P112), Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30})); - REQUIRE_THAT(partial_sums_ref(Pa1), + CHECK_THAT(partial_sums_ref(Pa1), Equals(epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, 37, 38, 39, 40, 41, 42, 43})); - REQUIRE_THAT(partial_sums_ref(Pa2), + CHECK_THAT(partial_sums_ref(Pa2), Equals(epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, 39, 40, 41, 42, 43, 44, 45})); - REQUIRE_THAT(partial_sums_ref(P51), + CHECK_THAT(partial_sums_ref(P51), Equals(epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, 84, 90})); - REQUIRE_THAT(partial_sums_ref(Pv), + CHECK_THAT(partial_sums_ref(Pv), Equals(epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, 43, 45, 56, 68, 81, 95, 110})); - REQUIRE_THAT(partial_sums_ref(P5), + CHECK_THAT(partial_sums_ref(P5), Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80})); - REQUIRE_THAT(partial_sums_ref(epu8rev), + CHECK_THAT(partial_sums_ref(epu8rev), Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, 117, 119, 120, 120})); - REQUIRE_THAT(partial_sums_ref(Pc), + CHECK_THAT(partial_sums_ref(Pc), Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } @@ -450,20 +450,20 @@ TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { - REQUIRE(horiz_max_ref(zero) == 0); - REQUIRE(horiz_max_ref(P01) == 1); - REQUIRE(horiz_max_ref(epu8id) == 15); - REQUIRE(horiz_max_ref(P10) == 1); - REQUIRE(horiz_max_ref(P11) == 1); - REQUIRE(horiz_max_ref(P1) == 1); - REQUIRE(horiz_max_ref(P112) == 2); - REQUIRE(horiz_max_ref(Pa1) == 7); - REQUIRE(horiz_max_ref(Pa2) == 9); - REQUIRE(horiz_max_ref(P51) == 6); - REQUIRE(horiz_max_ref(Pv) == 15); - REQUIRE(horiz_max_ref(P5) == 5); - REQUIRE(horiz_max_ref(epu8rev) == 15); - REQUIRE(horiz_max_ref(Pc) == 43); + CHECK(horiz_max_ref(zero) == 0); + CHECK(horiz_max_ref(P01) == 1); + CHECK(horiz_max_ref(epu8id) == 15); + CHECK(horiz_max_ref(P10) == 1); + CHECK(horiz_max_ref(P11) == 1); + CHECK(horiz_max_ref(P1) == 1); + CHECK(horiz_max_ref(P112) == 2); + CHECK(horiz_max_ref(Pa1) == 7); + CHECK(horiz_max_ref(Pa2) == 9); + CHECK(horiz_max_ref(P51) == 6); + CHECK(horiz_max_ref(Pv) == 15); + CHECK(horiz_max_ref(P5) == 5); + CHECK(horiz_max_ref(epu8rev) == 15); + CHECK(horiz_max_ref(Pc) == 43); } TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") @@ -472,21 +472,21 @@ TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max3, v, "[Epu8][036]") TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { - REQUIRE_THAT(partial_max_ref(zero), Equals(zero)); - REQUIRE_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); - REQUIRE_THAT(partial_max_ref(epu8id), Equals(epu8id)); - REQUIRE_THAT(partial_max_ref(P10), Equals(P1)); - REQUIRE_THAT(partial_max_ref(P11), Equals(P1)); - REQUIRE_THAT(partial_max_ref(P1), Equals(P1)); - REQUIRE_THAT(partial_max_ref(P112), Equals(P112)); - REQUIRE_THAT(partial_max_ref(Pa1), Equals(Epu8({4, 4, 5, 5, 5}, 7))); - REQUIRE_THAT(partial_max_ref(Pa2), Equals(Epu8({4, 4, 5, 5, 5}, 9))); - REQUIRE_THAT(partial_max_ref(P51), Equals(Epu8({5, 5}, 6))); - REQUIRE_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, + CHECK_THAT(partial_max_ref(zero), Equals(zero)); + CHECK_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); + CHECK_THAT(partial_max_ref(epu8id), Equals(epu8id)); + CHECK_THAT(partial_max_ref(P10), Equals(P1)); + CHECK_THAT(partial_max_ref(P11), Equals(P1)); + CHECK_THAT(partial_max_ref(P1), Equals(P1)); + CHECK_THAT(partial_max_ref(P112), Equals(P112)); + CHECK_THAT(partial_max_ref(Pa1), Equals(Epu8({4, 4, 5, 5, 5}, 7))); + CHECK_THAT(partial_max_ref(Pa2), Equals(Epu8({4, 4, 5, 5, 5}, 9))); + CHECK_THAT(partial_max_ref(P51), Equals(Epu8({5, 5}, 6))); + CHECK_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15})); - REQUIRE_THAT(partial_max_ref(P5), Equals(P5)); - REQUIRE_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); - REQUIRE_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); + CHECK_THAT(partial_max_ref(P5), Equals(P5)); + CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); + CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_gen, v, "[Epu8][030]") @@ -495,20 +495,20 @@ TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_round, v, TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { - REQUIRE(horiz_min_ref(zero) == 0); - REQUIRE(horiz_min_ref(P01) == 0); - REQUIRE(horiz_min_ref(epu8id) == 0); - REQUIRE(horiz_min_ref(P10) == 0); - REQUIRE(horiz_min_ref(P11) == 0); - REQUIRE(horiz_min_ref(P1) == 1); - REQUIRE(horiz_min_ref(P112) == 1); - REQUIRE(horiz_min_ref(Pa1) == 1); - REQUIRE(horiz_min_ref(Pa2) == 1); - REQUIRE(horiz_min_ref(P51) == 1); - REQUIRE(horiz_min_ref(Pv) == 0); - REQUIRE(horiz_min_ref(P5) == 5); - REQUIRE(horiz_min_ref(epu8rev) == 0); - REQUIRE(horiz_min_ref(Pc) == 5); + CHECK(horiz_min_ref(zero) == 0); + CHECK(horiz_min_ref(P01) == 0); + CHECK(horiz_min_ref(epu8id) == 0); + CHECK(horiz_min_ref(P10) == 0); + CHECK(horiz_min_ref(P11) == 0); + CHECK(horiz_min_ref(P1) == 1); + CHECK(horiz_min_ref(P112) == 1); + CHECK(horiz_min_ref(Pa1) == 1); + CHECK(horiz_min_ref(Pa2) == 1); + CHECK(horiz_min_ref(P51) == 1); + CHECK(horiz_min_ref(Pv) == 0); + CHECK(horiz_min_ref(P5) == 5); + CHECK(horiz_min_ref(epu8rev) == 0); + CHECK(horiz_min_ref(Pc) == 5); } TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min_gen, v, "[Epu8][034]") @@ -517,22 +517,22 @@ TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min3, v, "[Epu8][036]") TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { - REQUIRE_THAT(partial_min_ref(zero), Equals(zero)); - REQUIRE_THAT(partial_min_ref(P01), Equals(zero)); - REQUIRE_THAT(partial_min_ref(epu8id), Equals(zero)); - REQUIRE_THAT(partial_min_ref(P10), Equals(P10)); - REQUIRE_THAT(partial_min_ref(P11), Equals(P11)); - REQUIRE_THAT(partial_min_ref(P1), Equals(P1)); - REQUIRE_THAT(partial_min_ref(P112), Equals(P1)); - REQUIRE_THAT(partial_min_ref(Pa1), Equals(Epu8({4, 2, 2}, 1))); - REQUIRE_THAT(partial_min_ref(Pa2), Equals(Epu8({4, 2, 2}, 1))); - REQUIRE_THAT(partial_min_ref(P51), Equals(Epu8({5}, 1))); - REQUIRE_THAT(partial_min_ref(Pv), // clang-format off + CHECK_THAT(partial_min_ref(zero), Equals(zero)); + CHECK_THAT(partial_min_ref(P01), Equals(zero)); + CHECK_THAT(partial_min_ref(epu8id), Equals(zero)); + CHECK_THAT(partial_min_ref(P10), Equals(P10)); + CHECK_THAT(partial_min_ref(P11), Equals(P11)); + CHECK_THAT(partial_min_ref(P1), Equals(P1)); + CHECK_THAT(partial_min_ref(P112), Equals(P1)); + CHECK_THAT(partial_min_ref(Pa1), Equals(Epu8({4, 2, 2}, 1))); + CHECK_THAT(partial_min_ref(Pa2), Equals(Epu8({4, 2, 2}, 1))); + CHECK_THAT(partial_min_ref(P51), Equals(Epu8({5}, 1))); + CHECK_THAT(partial_min_ref(Pv), // clang-format off Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); // clang-format on - REQUIRE_THAT(partial_min_ref(P5), Equals(P5)); - REQUIRE_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); - REQUIRE_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); + CHECK_THAT(partial_min_ref(P5), Equals(P5)); + CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); + CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_gen, v, "[Epu8][030]") @@ -541,22 +541,22 @@ TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_round, v, TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { - REQUIRE_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); - REQUIRE_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); - REQUIRE_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); - REQUIRE_THAT(eval16_ref(P10), Equals(Epu8({15, 1}, 0))); - REQUIRE_THAT(eval16_ref(P11), Equals(Epu8({14, 2}, 0))); - REQUIRE_THAT(eval16_ref(P1), Equals(Epu8({0, 16}, 0))); - REQUIRE_THAT(eval16_ref(P112), Equals(Epu8({0, 2, 14}, 0))); - REQUIRE_THAT(eval16_ref(Pa1), Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); - REQUIRE_THAT(eval16_ref(Pa2), + CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); + CHECK_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); + CHECK_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(P10), Equals(Epu8({15, 1}, 0))); + CHECK_THAT(eval16_ref(P11), Equals(Epu8({14, 2}, 0))); + CHECK_THAT(eval16_ref(P1), Equals(Epu8({0, 16}, 0))); + CHECK_THAT(eval16_ref(P112), Equals(Epu8({0, 2, 14}, 0))); + CHECK_THAT(eval16_ref(Pa1), Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); + CHECK_THAT(eval16_ref(Pa2), Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); - REQUIRE_THAT(eval16_ref(P51), Equals(Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); - REQUIRE_THAT(eval16_ref(Pv), + CHECK_THAT(eval16_ref(P51), Equals(Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); + CHECK_THAT(eval16_ref(Pv), Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); - REQUIRE_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); - REQUIRE_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); - REQUIRE_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); + CHECK_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); + CHECK_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_cycle, v, "[Epu8][034]") @@ -566,20 +566,20 @@ TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_gen, v, "[Epu8][034]") TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16, v, "[Epu8][034]") TEST_CASE("Epu8::popcount4", "[Epu8][048]") { - REQUIRE_THAT(popcount4, + CHECK_THAT(popcount4, Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { - REQUIRE_THAT(popcount16(Pv), + CHECK_THAT(popcount16(Pv), Equals(epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); - REQUIRE_THAT(popcount16(RP), + CHECK_THAT(popcount16(RP), Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE_THAT(popcount16(RP << 1), + CHECK_THAT(popcount16(RP << 1), Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE_THAT(popcount16(RP << 2), + CHECK_THAT(popcount16(RP << 2), Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); - REQUIRE_THAT(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), + CHECK_THAT(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), Equals(Epu8({0, 1, 2, 8}, 4))); } @@ -587,113 +587,113 @@ TEST_CASE("random_epu8", "[Epu8][050]") { for (int bnd : {1, 10, 100, 255, 256}) { for (int i = 0; i < 10; i++) { epu8 r = random_epu8(bnd); - REQUIRE_THAT(r, Equals(r)); + CHECK_THAT(r, Equals(r)); for (auto v : as_array(r)) - REQUIRE(v < bnd); + CHECK(v < bnd); } } } TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][051]") { - REQUIRE(is_partial_transformation(zero)); - REQUIRE(is_partial_transformation(P01)); - REQUIRE(is_partial_transformation(P10)); - REQUIRE(!is_partial_transformation(Epu8({16}, 0))); - REQUIRE(is_partial_transformation(Epu8({}, 0xff))); - REQUIRE(is_partial_transformation(Epu8({2, 0xff, 3}, 0))); - - REQUIRE(!is_partial_transformation(zero, 15)); - REQUIRE(is_partial_transformation(Pa)); - REQUIRE(is_partial_transformation(Pa, 6)); - REQUIRE(is_partial_transformation(Pa, 5)); - REQUIRE(!is_partial_transformation(Pa, 4)); - REQUIRE(!is_partial_transformation(Pa, 1)); - REQUIRE(!is_partial_transformation(Pa, 0)); - - REQUIRE(is_partial_transformation(RP)); - REQUIRE(is_partial_transformation(RP, 16)); - REQUIRE(!is_partial_transformation(RP, 15)); - REQUIRE(is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); - REQUIRE( + CHECK(is_partial_transformation(zero)); + CHECK(is_partial_transformation(P01)); + CHECK(is_partial_transformation(P10)); + CHECK(!is_partial_transformation(Epu8({16}, 0))); + CHECK(is_partial_transformation(Epu8({}, 0xff))); + CHECK(is_partial_transformation(Epu8({2, 0xff, 3}, 0))); + + CHECK(!is_partial_transformation(zero, 15)); + CHECK(is_partial_transformation(Pa)); + CHECK(is_partial_transformation(Pa, 6)); + CHECK(is_partial_transformation(Pa, 5)); + CHECK(!is_partial_transformation(Pa, 4)); + CHECK(!is_partial_transformation(Pa, 1)); + CHECK(!is_partial_transformation(Pa, 0)); + + CHECK(is_partial_transformation(RP)); + CHECK(is_partial_transformation(RP, 16)); + CHECK(!is_partial_transformation(RP, 15)); + CHECK(is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); + CHECK( !is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); } TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][052]") { - REQUIRE(is_transformation(zero)); - REQUIRE(is_transformation(P01)); - REQUIRE(is_transformation(P10)); - REQUIRE(!is_transformation(Epu8({16}, 0))); - REQUIRE(!is_transformation(Epu8({}, 0xff))); - REQUIRE(!is_transformation(Epu8({2, 0xff, 3}, 0))); - - REQUIRE(!is_transformation(zero, 15)); - REQUIRE(is_transformation(Pa)); - REQUIRE(is_transformation(Pa, 6)); - REQUIRE(is_transformation(Pa, 5)); - REQUIRE(!is_transformation(Pa, 4)); - REQUIRE(!is_transformation(Pa, 1)); - REQUIRE(!is_transformation(Pa, 0)); - - REQUIRE(is_transformation(RP)); - REQUIRE(is_transformation(RP, 16)); - REQUIRE(!is_transformation(RP, 15)); + CHECK(is_transformation(zero)); + CHECK(is_transformation(P01)); + CHECK(is_transformation(P10)); + CHECK(!is_transformation(Epu8({16}, 0))); + CHECK(!is_transformation(Epu8({}, 0xff))); + CHECK(!is_transformation(Epu8({2, 0xff, 3}, 0))); + + CHECK(!is_transformation(zero, 15)); + CHECK(is_transformation(Pa)); + CHECK(is_transformation(Pa, 6)); + CHECK(is_transformation(Pa, 5)); + CHECK(!is_transformation(Pa, 4)); + CHECK(!is_transformation(Pa, 1)); + CHECK(!is_transformation(Pa, 0)); + + CHECK(is_transformation(RP)); + CHECK(is_transformation(RP, 16)); + CHECK(!is_transformation(RP, 15)); } TEST_CASE_METHOD(Fix, "is_partial_permutation", "[Epu8][053]") { - REQUIRE(!is_partial_permutation(zero)); - REQUIRE(!is_partial_permutation(P01)); - REQUIRE(!is_partial_permutation(P10)); - REQUIRE(!is_partial_permutation(Epu8({16}, 0))); - REQUIRE(is_partial_permutation(Epu8({}, 0xff))); - REQUIRE(!is_partial_permutation(Epu8({2, 0xff, 3}, 0))); - REQUIRE(is_partial_permutation(Epu8({2, 0xff, 3}, 0xff))); - - REQUIRE(!is_partial_permutation(zero, 15)); - REQUIRE(is_partial_permutation(Pa)); - REQUIRE(is_partial_permutation(Pa, 6)); - REQUIRE(is_partial_permutation(Pa, 5)); - REQUIRE(!is_partial_permutation(Pa, 4)); - REQUIRE(!is_partial_permutation(Pa, 1)); - REQUIRE(!is_partial_permutation(Pa, 0)); - - REQUIRE(is_partial_permutation(RP)); - REQUIRE(is_partial_permutation(RP, 16)); - REQUIRE(!is_partial_permutation(RP, 15)); - - REQUIRE(is_partial_permutation( + CHECK(!is_partial_permutation(zero)); + CHECK(!is_partial_permutation(P01)); + CHECK(!is_partial_permutation(P10)); + CHECK(!is_partial_permutation(Epu8({16}, 0))); + CHECK(is_partial_permutation(Epu8({}, 0xff))); + CHECK(!is_partial_permutation(Epu8({2, 0xff, 3}, 0))); + CHECK(is_partial_permutation(Epu8({2, 0xff, 3}, 0xff))); + + CHECK(!is_partial_permutation(zero, 15)); + CHECK(is_partial_permutation(Pa)); + CHECK(is_partial_permutation(Pa, 6)); + CHECK(is_partial_permutation(Pa, 5)); + CHECK(!is_partial_permutation(Pa, 4)); + CHECK(!is_partial_permutation(Pa, 1)); + CHECK(!is_partial_permutation(Pa, 0)); + + CHECK(is_partial_permutation(RP)); + CHECK(is_partial_permutation(RP, 16)); + CHECK(!is_partial_permutation(RP, 15)); + + CHECK(is_partial_permutation( epu8{1, 2, 0xFF, 0xFF, 0, 5, 0xFF, 3, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(!is_partial_permutation( + CHECK(!is_partial_permutation( epu8{1, 2, 1, 0xFF, 0, 5, 0xFF, 2, 8, 9, 10, 11, 12, 13, 14, 15})); - REQUIRE(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); - REQUIRE(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); + CHECK(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); + CHECK(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); } TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { - REQUIRE(!is_permutation(zero)); - REQUIRE(!is_permutation(P01)); - REQUIRE(!is_permutation(P10)); - REQUIRE(!is_permutation(Epu8({16}, 0))); - REQUIRE(!is_permutation(Epu8({}, 0xff))); - REQUIRE(!is_permutation(Epu8({2, 0xff, 3}, 0))); - - REQUIRE(!is_permutation(zero, 15)); - REQUIRE(is_permutation(Pa)); - REQUIRE(is_permutation(Pa, 6)); - REQUIRE(is_permutation(Pa, 5)); - REQUIRE(!is_permutation(Pa, 4)); - REQUIRE(!is_permutation(Pa, 1)); - REQUIRE(!is_permutation(Pa, 0)); - - REQUIRE(is_permutation(RP)); - REQUIRE(is_permutation(RP, 16)); - REQUIRE(!is_permutation(RP, 15)); + CHECK(!is_permutation(zero)); + CHECK(!is_permutation(P01)); + CHECK(!is_permutation(P10)); + CHECK(!is_permutation(Epu8({16}, 0))); + CHECK(!is_permutation(Epu8({}, 0xff))); + CHECK(!is_permutation(Epu8({2, 0xff, 3}, 0))); + + CHECK(!is_permutation(zero, 15)); + CHECK(is_permutation(Pa)); + CHECK(is_permutation(Pa, 6)); + CHECK(is_permutation(Pa, 5)); + CHECK(!is_permutation(Pa, 4)); + CHECK(!is_permutation(Pa, 1)); + CHECK(!is_permutation(Pa, 0)); + + CHECK(is_permutation(RP)); + CHECK(is_permutation(RP, 16)); + CHECK(!is_permutation(RP, 15)); } #ifdef SIMDE_X86_SSE4_2_NATIVE TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { - REQUIRE(is_permutation(x, i) == is_permutation_cmpestri(x, i)); + CHECK(is_permutation(x, i) == is_permutation_cmpestri(x, i)); } } } @@ -702,7 +702,7 @@ TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { - REQUIRE(is_permutation(x, i) == is_permutation_sort(x, i)); + CHECK(is_permutation(x, i) == is_permutation_sort(x, i)); } } } diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 2acf32a7..2258f144 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -26,21 +26,21 @@ #define TEST_AGREES_FUN(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE(fun(p) == ref(p)); \ + CHECK(fun(p) == ref(p)); \ } \ } #define TEST_AGREES_FUN_EPU8(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE_THAT(fun(p), Equals(ref(p))); \ + CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } #define TEST_AGREES(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE(p.fun() == p.ref()); \ + CHECK(p.fun() == p.ref()); \ } \ } @@ -48,7 +48,7 @@ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p1 : vct) { \ for (auto p2 : vct) { \ - REQUIRE(p1.fun(p2) == p1.ref(p2)); \ + CHECK(p1.fun(p2) == p1.ref(p2)); \ } \ } \ } @@ -56,14 +56,14 @@ #define TEST_AGREES_EPU8(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE_THAT(p.fun(), Equals(p.ref())); \ + CHECK_THAT(p.fun(), Equals(p.ref())); \ } \ } #define TEST_AGREES_EPU8_FUN(fixture, type, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ for (auto p : vct) { \ - REQUIRE_THAT(fun(p), Equals(ref(p))); \ + CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index ed2988f8..188d29de 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -71,78 +71,78 @@ struct Perm16Fixture { TEST_CASE("PTransf16::PTransf16", "[PTransf16][000]") { const uint8_t FF = 0xff; - REQUIRE(PTransf16({}) == PTransf16::one()); - REQUIRE(PTransf16({0, 1, 2, 3}) == PTransf16::one()); - REQUIRE(PTransf16({1, 0}) == PTransf16({1, 0, 2})); - REQUIRE(PTransf16({2}) == PTransf16({2, 1, 2})); - REQUIRE(PTransf16({4, 5, 0}, {9, 0, 1}) == + CHECK(PTransf16({}) == PTransf16::one()); + CHECK(PTransf16({0, 1, 2, 3}) == PTransf16::one()); + CHECK(PTransf16({1, 0}) == PTransf16({1, 0, 2})); + CHECK(PTransf16({2}) == PTransf16({2, 1, 2})); + CHECK(PTransf16({4, 5, 0}, {9, 0, 1}) == PTransf16( {1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); - REQUIRE(PTransf16({4, 5, 0, 8}, {9, 0, 1, 2}) == + CHECK(PTransf16({4, 5, 0, 8}, {9, 0, 1, 2}) == PTransf16( {1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); - REQUIRE(PTransf16({4, 5, 0, 8}, {9, 0, 2, 2}) == + CHECK(PTransf16({4, 5, 0, 8}, {9, 0, 2, 2}) == PTransf16( {2, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); } TEST_CASE("PTransf16::hash", "[PTransf16][001]") { - REQUIRE(std::hash()(PTransf16::one()) != 0); - REQUIRE(std::hash()(PTransf16(Epu8(1))) != 0); - REQUIRE(std::hash()(PTransf16({4, 5, 0}, {9, 0, 1})) != 0); + CHECK(std::hash()(PTransf16::one()) != 0); + CHECK(std::hash()(PTransf16(Epu8(1))) != 0); + CHECK(std::hash()(PTransf16({4, 5, 0}, {9, 0, 1})) != 0); } TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { - REQUIRE_THAT(PTransf16({}).image_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).image_mask(false), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).image_mask(true), Equals(Epu8(0))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(), + CHECK_THAT(PTransf16({}).image_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).image_mask(false), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).image_mask(true), Equals(Epu8(0))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(false), + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(false), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask(true), + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(true), Equals(Epu8({FF, FF, FF, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(1)).image_mask(), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(2)).image_mask(), Equals(Epu8({0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT(PTransf16(Epu8(1)).image_mask(), Equals(Epu8({0, FF}, 0))); + CHECK_THAT(PTransf16(Epu8(2)).image_mask(), Equals(Epu8({0, 0, FF}, 0))); + CHECK_THAT( PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask(), Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(false), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask(true), Equals(Epu8( {0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { - REQUIRE_THAT(PTransf16({}).image_mask_ref(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).image_mask_ref(false), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).image_mask_ref(true), Equals(Epu8(0))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(), + CHECK_THAT(PTransf16({}).image_mask_ref(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).image_mask_ref(false), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).image_mask_ref(true), Equals(Epu8(0))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(false), + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(false), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(true), + CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(true), Equals(Epu8({FF, FF, FF, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(1)).image_mask_ref(), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(2)).image_mask_ref(), + CHECK_THAT(PTransf16(Epu8(1)).image_mask_ref(), Equals(Epu8({0, FF}, 0))); + CHECK_THAT(PTransf16(Epu8(2)).image_mask_ref(), Equals(Epu8({0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask_ref(), Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).image_mask_ref(), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) .image_mask_ref(false), Equals(Epu8({FF, 0, FF, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) .image_mask_ref(true), Equals(Epu8( @@ -150,241 +150,241 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { } TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { - REQUIRE(PTransf16({}).left_one() == PTransf16::one()); - REQUIRE(PTransf16({4, 4, 4, 4}).left_one() == PTransf16({FF, FF, FF, FF})); - REQUIRE(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, FF))); - REQUIRE(PTransf16(Epu8(2)).left_one() == PTransf16(Epu8({FF, FF, 2}, FF))); - REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == + CHECK(PTransf16({}).left_one() == PTransf16::one()); + CHECK(PTransf16({4, 4, 4, 4}).left_one() == PTransf16({FF, FF, FF, FF})); + CHECK(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, FF))); + CHECK(PTransf16(Epu8(2)).left_one() == PTransf16(Epu8({FF, FF, 2}, FF))); + CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); - REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == + CHECK(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); - REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == PTransf16( {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); - REQUIRE(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == + CHECK(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == PTransf16( {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); } TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { - REQUIRE_THAT(PTransf16({}).domain_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(false), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).domain_mask(true), Equals(Epu8(0))); - REQUIRE_THAT(PTransf16(Epu8(1)).domain_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16(Epu8(2)).domain_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), + CHECK_THAT(PTransf16({}).domain_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).domain_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).domain_mask(false), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).domain_mask(true), Equals(Epu8(0))); + CHECK_THAT(PTransf16(Epu8(1)).domain_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16(Epu8(2)).domain_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), Equals(Epu8({FF, FF, FF, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), + CHECK_THAT(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), Equals(Epu8({0, FF, FF, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(), Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) + CHECK_THAT(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) .domain_mask(false), Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(true), Equals(Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); } TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { - REQUIRE(PTransf16({}).right_one() == PTransf16::one()); - REQUIRE(PTransf16({4, 4, 4, 4}).right_one() == PTransf16::one()); - REQUIRE(PTransf16(Epu8(1)).right_one() == PTransf16::one()); - REQUIRE(PTransf16(Epu8(2)).right_one() == PTransf16::one()); - REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).right_one() == + CHECK(PTransf16({}).right_one() == PTransf16::one()); + CHECK(PTransf16({4, 4, 4, 4}).right_one() == PTransf16::one()); + CHECK(PTransf16(Epu8(1)).right_one() == PTransf16::one()); + CHECK(PTransf16(Epu8(2)).right_one() == PTransf16::one()); + CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).right_one() == PTransf16(Epu8({0, 1, 2, 3}, FF))); - REQUIRE(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).right_one() == + CHECK(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).right_one() == PTransf16( {FF, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); - REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).right_one() == + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).right_one() == PTransf16::one()); - REQUIRE( + CHECK( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).right_one() == PTransf16({0, 1, FF, 3, 4, FF, 6, FF, 8, FF, FF, FF, FF, FF, FF, FF})); } TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { - REQUIRE(PTransf16({}).rank_ref() == 16); - REQUIRE(PTransf16({4, 4, 4, 4}).rank_ref() == 12); - REQUIRE(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) + CHECK(PTransf16({}).rank_ref() == 16); + CHECK(PTransf16({4, 4, 4, 4}).rank_ref() == 12); + CHECK(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) .rank_ref() == 1); - REQUIRE(PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + CHECK(PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) .rank_ref() == 1); - REQUIRE(PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + CHECK(PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) .rank_ref() == 2); - REQUIRE(PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}) + CHECK(PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}) .rank_ref() == 4); - REQUIRE(PTransf16({1, 1, 1, FF, 1, 1, FF, 1, 1, FF, 1, FF, 1, 1, 1, 1}) + CHECK(PTransf16({1, 1, 1, FF, 1, 1, FF, 1, 1, FF, 1, FF, 1, 1, 1, 1}) .rank_ref() == 1); - REQUIRE(PTransf16({2, 2, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2}) + CHECK(PTransf16({2, 2, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2}) .rank_ref() == 1); - REQUIRE(PTransf16({2, 2, 2, 0xf, 2, FF, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) + CHECK(PTransf16({2, 2, 2, 0xf, 2, FF, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) .rank_ref() == 2); - REQUIRE(PTransf16({0, 2, 2, 0xf, 2, 2, FF, 2, 5, 2, FF, 2, 2, 2, 2, 2}) + CHECK(PTransf16({0, 2, 2, 0xf, 2, 2, FF, 2, 5, 2, FF, 2, 2, 2, 2, 2}) .rank_ref() == 4); } // TODO uncomment TEST_CASE("PTransf16::rank", "[PTransf16][007]") { - REQUIRE(PTransf16({}).rank() == 16); - REQUIRE(PTransf16({4, 4, 4, 4}).rank() == 12); - REQUIRE( + CHECK(PTransf16({}).rank() == 16); + CHECK(PTransf16({4, 4, 4, 4}).rank() == 12); + CHECK( PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == 1); - REQUIRE( + CHECK( PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == 1); - REQUIRE( + CHECK( PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == 2); - REQUIRE( + CHECK( PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}).rank() == 4); } TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { - REQUIRE_THAT(PTransf16({}).fix_points_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).fix_points_mask(false), Equals(Epu8(FF))); - REQUIRE_THAT(PTransf16({}).fix_points_mask(true), Equals(Epu8(0))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(), + CHECK_THAT(PTransf16({}).fix_points_mask(), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).fix_points_mask(false), Equals(Epu8(FF))); + CHECK_THAT(PTransf16({}).fix_points_mask(true), Equals(Epu8(0))); + CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(false), + CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(false), Equals(Epu8({0, 0, 0, 0}, FF))); - REQUIRE_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(true), + CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(true), Equals(Epu8({FF, FF, FF, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(1)).fix_points_mask(), + CHECK_THAT(PTransf16(Epu8(1)).fix_points_mask(), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(PTransf16(Epu8(2)).fix_points_mask(), + CHECK_THAT(PTransf16(Epu8(2)).fix_points_mask(), Equals(Epu8({0, 0, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_mask(), Equals(Epu8({0, 0, FF, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)).fix_points_mask(), Equals(Epu8({FF, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); - REQUIRE_THAT( + CHECK_THAT( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) .fix_points_mask(false), Equals(Epu8({FF, 0, FF, 0, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, FF, 0}, 0))); - REQUIRE_THAT(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + CHECK_THAT(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) .fix_points_mask(true), Equals(Epu8({0, FF, 0}, FF))); } TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { - REQUIRE(PTransf16({}).fix_points_bitset() == 0xFFFF); - REQUIRE(PTransf16({}).fix_points_bitset(false) == 0xFFFF); - REQUIRE(PTransf16({}).fix_points_bitset(true) == 0); - REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset() == 0xFFF0); - REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset(false) == 0xFFF0); - REQUIRE(PTransf16({4, 4, 4, 4}).fix_points_bitset(true) == 0x000F); - REQUIRE(PTransf16(Epu8(1)).fix_points_bitset() == 0x0002); - REQUIRE(PTransf16(Epu8(2)).fix_points_bitset() == 0x0004); - REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_bitset() == 0x0084); - REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)) + CHECK(PTransf16({}).fix_points_bitset() == 0xFFFF); + CHECK(PTransf16({}).fix_points_bitset(false) == 0xFFFF); + CHECK(PTransf16({}).fix_points_bitset(true) == 0); + CHECK(PTransf16({4, 4, 4, 4}).fix_points_bitset() == 0xFFF0); + CHECK(PTransf16({4, 4, 4, 4}).fix_points_bitset(false) == 0xFFF0); + CHECK(PTransf16({4, 4, 4, 4}).fix_points_bitset(true) == 0x000F); + CHECK(PTransf16(Epu8(1)).fix_points_bitset() == 0x0002); + CHECK(PTransf16(Epu8(2)).fix_points_bitset() == 0x0004); + CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_bitset() == 0x0084); + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)) .fix_points_bitset() == 0x5); - REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) .fix_points_bitset(false) == 0x4105); - REQUIRE(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) + CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) .fix_points_bitset(true) == 0xFFFA); } TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][010]") { - REQUIRE(PTransf16({}).nb_fix_points() == 16); - REQUIRE(PTransf16({4, 4, 4, 4}).nb_fix_points() == 12); - REQUIRE(PTransf16(Epu8(1)).nb_fix_points() == 1); - REQUIRE(PTransf16(Epu8(2)).nb_fix_points() == 1); - REQUIRE(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).nb_fix_points() == 2); - REQUIRE( + CHECK(PTransf16({}).nb_fix_points() == 16); + CHECK(PTransf16({4, 4, 4, 4}).nb_fix_points() == 12); + CHECK(PTransf16(Epu8(1)).nb_fix_points() == 1); + CHECK(PTransf16(Epu8(2)).nb_fix_points() == 1); + CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).nb_fix_points() == 2); + CHECK( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)).nb_fix_points() == 2); - REQUIRE( + CHECK( PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)).nb_fix_points() == 4); } TEST_CASE_METHOD(Perm16Fixture, "Transf16::operator uint64", "[Transf16][011]") { - REQUIRE(static_cast(Transf16::one()) == 0xf7e6d5c4b3a29180); - REQUIRE(static_cast(zero) == 0x0); - REQUIRE(static_cast(P10) == 0x1); - REQUIRE(static_cast(P01) == 0x100); - REQUIRE(static_cast(P11) == 0x101); - REQUIRE(static_cast(P1) == 0x1111111111111111); - REQUIRE(static_cast(RandT) == 0x9a854d7fce60b123); + CHECK(static_cast(Transf16::one()) == 0xf7e6d5c4b3a29180); + CHECK(static_cast(zero) == 0x0); + CHECK(static_cast(P10) == 0x1); + CHECK(static_cast(P01) == 0x100); + CHECK(static_cast(P11) == 0x101); + CHECK(static_cast(P1) == 0x1111111111111111); + CHECK(static_cast(RandT) == 0x9a854d7fce60b123); } TEST_CASE_METHOD(Perm16Fixture, "Transf16::Transf16(uint64_t)", "[Transf16][012]") { - REQUIRE(static_cast(0x0) == zero); - REQUIRE(static_cast(0x1) == P10); - REQUIRE(static_cast(0x100) == P01); + CHECK(static_cast(0x0) == zero); + CHECK(static_cast(0x1) == P10); + CHECK(static_cast(0x100) == P01); for (auto p : Tlist) { - REQUIRE(static_cast(static_cast(p)) == p); + CHECK(static_cast(static_cast(p)) == p); } } TEST_CASE_METHOD(Perm16Fixture, "Transf16::hash", "[Transf16][013]") { - REQUIRE(std::hash()(Transf16::one()) != 0); - REQUIRE(std::hash()(Transf16(Epu8(1))) != 0); - REQUIRE(std::hash()(RandT) != 0); + CHECK(std::hash()(Transf16::one()) != 0); + CHECK(std::hash()(Transf16(Epu8(1))) != 0); + CHECK(std::hash()(RandT) != 0); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::operator uint64_t", "[Perm16][014]") { - REQUIRE(static_cast(Perm16::one()) == 0xf7e6d5c4b3a29180); - REQUIRE(static_cast(PPa) == 0xf7e6d5c0b4a39281); - REQUIRE(static_cast(PPb) == 0xd7e4f5c0b6a39281); - REQUIRE(static_cast(RandPerm) == 0x9a854d7fce60b123); + CHECK(static_cast(Perm16::one()) == 0xf7e6d5c4b3a29180); + CHECK(static_cast(PPa) == 0xf7e6d5c0b4a39281); + CHECK(static_cast(PPb) == 0xd7e4f5c0b6a39281); + CHECK(static_cast(RandPerm) == 0x9a854d7fce60b123); for (auto p : {Perm16::one(), PPa, PPb, RandPerm}) { - REQUIRE(static_cast(static_cast(p)) == p); + CHECK(static_cast(static_cast(p)) == p); } } TEST_CASE("Perm::operator==", "[Perm16][015]") { - REQUIRE(Perm16::one() * Perm16::one() == Perm16::one()); + CHECK(Perm16::one() * Perm16::one() == Perm16::one()); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::hash", "[Perm16][016]") { - REQUIRE(std::hash()(Transf16::one()) != 0); - REQUIRE(std::hash()(PPa) != 0); - REQUIRE(std::hash()(RandPerm) != 0); + CHECK(std::hash()(Transf16::one()) != 0); + CHECK(std::hash()(PPa) != 0); + CHECK(std::hash()(RandPerm) != 0); } TEST_CASE("PPerm16::PPerm16", "[PPerm16][017]") { const uint8_t FF = 0xff; - REQUIRE( + CHECK( PPerm16({4, 5, 0}, {9, 0, 1}) == PPerm16({1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); - REQUIRE( + CHECK( PPerm16({4, 5, 0, 8}, {9, 0, 1, 2}) == PPerm16({1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); } TEST_CASE("PPerm16::hash", "[PPerm16][018]") { - REQUIRE(std::hash()(PPerm16::one()) != 0); - REQUIRE(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); + CHECK(std::hash()(PPerm16::one()) != 0); + CHECK(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); } TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { - REQUIRE(PPerm16({}).left_one() == PPerm16::one()); - REQUIRE(PPerm16({FF, FF, FF, 4}).left_one() == PPerm16({FF, FF, FF, FF})); - REQUIRE(PPerm16({FF, 4, FF, FF}).left_one() == PPerm16({FF, FF, FF, FF})); + CHECK(PPerm16({}).left_one() == PPerm16::one()); + CHECK(PPerm16({FF, FF, FF, 4}).left_one() == PPerm16({FF, FF, FF, FF})); + CHECK(PPerm16({FF, 4, FF, FF}).left_one() == PPerm16({FF, FF, FF, FF})); for (auto pp : PPlist) { - REQUIRE(pp.left_one() * pp == pp); + CHECK(pp.left_one() * pp == pp); } } TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { - REQUIRE(PPerm16({}).right_one() == PPerm16::one()); - REQUIRE(PPerm16({FF, FF, FF, 4}).right_one() == PPerm16({FF, FF, FF})); - REQUIRE(PPerm16({FF, 4, FF, FF}).right_one() == PPerm16({FF, 1, FF, FF})); + CHECK(PPerm16({}).right_one() == PPerm16::one()); + CHECK(PPerm16({FF, FF, FF, 4}).right_one() == PPerm16({FF, FF, FF})); + CHECK(PPerm16({FF, 4, FF, FF}).right_one() == PPerm16({FF, 1, FF, FF})); for (auto pp : PPlist) { - REQUIRE(pp * pp.right_one() == pp); + CHECK(pp * pp.right_one() == pp); } } @@ -394,35 +394,35 @@ TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, #endif TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { - REQUIRE_THAT(PTransf16::one().fix_points_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(Perm16::one().fix_points_mask(), Equals(Epu8(FF))); - REQUIRE_THAT(PPa.fix_points_mask(), Equals(Epu8({0, 0, 0, 0, 0}, FF))); - REQUIRE_THAT( + CHECK_THAT(PTransf16::one().fix_points_mask(), Equals(Epu8(FF))); + CHECK_THAT(Perm16::one().fix_points_mask(), Equals(Epu8(FF))); + CHECK_THAT(PPa.fix_points_mask(), Equals(Epu8({0, 0, 0, 0, 0}, FF))); + CHECK_THAT( PPb.fix_points_mask(), Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0})); - REQUIRE_THAT(RandPerm.fix_points_mask(), Equals(Epu8({0, FF}, 0))); + CHECK_THAT(RandPerm.fix_points_mask(), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(Perm16::one().fix_points_mask(false), Equals(Epu8(FF))); - REQUIRE_THAT(PPa.fix_points_mask(false), Equals(Epu8({0, 0, 0, 0, 0}, FF))); - REQUIRE_THAT( + CHECK_THAT(Perm16::one().fix_points_mask(false), Equals(Epu8(FF))); + CHECK_THAT(PPa.fix_points_mask(false), Equals(Epu8({0, 0, 0, 0, 0}, FF))); + CHECK_THAT( PPb.fix_points_mask(false), Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0})); - REQUIRE_THAT(RandPerm.fix_points_mask(false), Equals(Epu8({0, FF}, 0))); + CHECK_THAT(RandPerm.fix_points_mask(false), Equals(Epu8({0, FF}, 0))); - REQUIRE_THAT(Perm16::one().fix_points_mask(true), Equals(Epu8(0))); - REQUIRE_THAT(PPa.fix_points_mask(true), + CHECK_THAT(Perm16::one().fix_points_mask(true), Equals(Epu8(0))); + CHECK_THAT(PPa.fix_points_mask(true), Equals(Epu8({FF, FF, FF, FF, FF}, 0))); - REQUIRE_THAT( + CHECK_THAT( PPb.fix_points_mask(true), Equals(epu8{FF, FF, FF, FF, FF, 0, FF, 0, 0, 0, 0, 0, 0, FF, 0, FF})); - REQUIRE_THAT(RandPerm.fix_points_mask(true), Equals(Epu8({FF, 0}, FF))); + CHECK_THAT(RandPerm.fix_points_mask(true), Equals(Epu8({FF, 0}, FF))); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { - REQUIRE(Perm16::one().smallest_fix_point() == 0); - REQUIRE(PPa.smallest_fix_point() == 5); - REQUIRE(PPb.smallest_fix_point() == 5); - REQUIRE(RandPerm.smallest_fix_point() == 1); + CHECK(Perm16::one().smallest_fix_point() == 0); + CHECK(PPa.smallest_fix_point() == 5); + CHECK(PPb.smallest_fix_point() == 5); + CHECK(RandPerm.smallest_fix_point() == 1); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_moved_point", @@ -450,24 +450,24 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") { } TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { - REQUIRE(Perm16::one().nb_fix_points() == 16); - REQUIRE(PPa.nb_fix_points() == 11); - REQUIRE(PPb.nb_fix_points() == 8); - REQUIRE(RandPerm.nb_fix_points() == 1); - REQUIRE(Perm16({0, 1, 3, 2}).nb_fix_points() == 14); + CHECK(Perm16::one().nb_fix_points() == 16); + CHECK(PPa.nb_fix_points() == 11); + CHECK(PPb.nb_fix_points() == 8); + CHECK(RandPerm.nb_fix_points() == 1); + CHECK(Perm16({0, 1, 3, 2}).nb_fix_points() == 14); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { - REQUIRE(PPa * PPa.inverse() == Perm16::one()); - REQUIRE(PPa.inverse() * PPa == Perm16::one()); - REQUIRE(PPb * PPb.inverse() == Perm16::one()); - REQUIRE(PPb.inverse() * PPb == Perm16::one()); - REQUIRE(RandPerm * RandPerm.inverse() == Perm16::one()); - REQUIRE(RandPerm.inverse() * RandPerm == Perm16::one()); + CHECK(PPa * PPa.inverse() == Perm16::one()); + CHECK(PPa.inverse() * PPa == Perm16::one()); + CHECK(PPb * PPb.inverse() == Perm16::one()); + CHECK(PPb.inverse() * PPb == Perm16::one()); + CHECK(RandPerm * RandPerm.inverse() == Perm16::one()); + CHECK(RandPerm.inverse() * RandPerm == Perm16::one()); for (Perm16 p : Plist) { - REQUIRE(p * p.inverse() == Perm16::one()); - REQUIRE(p.inverse() * p == Perm16::one()); + CHECK(p * p.inverse() == Perm16::one()); + CHECK(p.inverse() * p == Perm16::one()); } } @@ -481,10 +481,10 @@ TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse, Plist, "[Perm16][031]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { - REQUIRE_THAT(Perm16::one().lehmer(), Equals(zero)); - REQUIRE_THAT(PPa.lehmer(), + CHECK_THAT(Perm16::one().lehmer(), Equals(zero)); + CHECK_THAT(PPa.lehmer(), Equals(epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); - REQUIRE_THAT(PPb.lehmer(), + CHECK_THAT(PPb.lehmer(), Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); } @@ -494,9 +494,9 @@ TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer, Plist, "[Perm16][034]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][035]") { - REQUIRE(Perm16::one().length() == 0); - REQUIRE(PPa.length() == 4); - REQUIRE(PPb.length() == 10); + CHECK(Perm16::one().length() == 0); + CHECK(PPa.length() == 4); + CHECK(PPb.length() == 10); } TEST_AGREES(Perm16Fixture, Perm16, length_ref, length_arr, Plist, @@ -504,37 +504,37 @@ TEST_AGREES(Perm16Fixture, Perm16, length_ref, length_arr, Plist, TEST_AGREES(Perm16Fixture, Perm16, length_ref, length, Plist, "[Perm16][037]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { - REQUIRE(Perm16::one().nb_descents_ref() == 0); - REQUIRE(PPa.nb_descents_ref() == 1); - REQUIRE(PPb.nb_descents_ref() == 4); - REQUIRE(Perm16::one().nb_descents() == 0); + CHECK(Perm16::one().nb_descents_ref() == 0); + CHECK(PPa.nb_descents_ref() == 1); + CHECK(PPb.nb_descents_ref() == 4); + CHECK(Perm16::one().nb_descents() == 0); } TEST_AGREES(Perm16Fixture, Perm16, nb_descents_ref, nb_descents, Plist, "[Perm16][039]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][040]") { - REQUIRE(Perm16::one().nb_cycles_ref() == 16); - REQUIRE(PPa.nb_cycles_ref() == 12); - REQUIRE(PPb.nb_cycles_ref() == 10); + CHECK(Perm16::one().nb_cycles_ref() == 16); + CHECK(PPa.nb_cycles_ref() == 12); + CHECK(PPb.nb_cycles_ref() == 10); } TEST_AGREES(Perm16Fixture, Perm16, nb_cycles_ref, nb_cycles, Plist, "[Perm16][041]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][042]") { - REQUIRE(Perm16::one().left_weak_leq_ref(Perm16::one())); - REQUIRE(Perm16::one().left_weak_leq_ref(PPa)); - REQUIRE(Perm16::one().left_weak_leq_ref(PPb)); - REQUIRE(PPa.left_weak_leq_ref(PPa)); - REQUIRE(PPb.left_weak_leq_ref(PPb)); + CHECK(Perm16::one().left_weak_leq_ref(Perm16::one())); + CHECK(Perm16::one().left_weak_leq_ref(PPa)); + CHECK(Perm16::one().left_weak_leq_ref(PPb)); + CHECK(PPa.left_weak_leq_ref(PPa)); + CHECK(PPb.left_weak_leq_ref(PPb)); } TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq", "[Perm16][043]") { for (auto u : PlistSmall) { for (auto v : PlistSmall) { - REQUIRE(u.left_weak_leq(v) == u.left_weak_leq_ref(v)); - REQUIRE(u.left_weak_leq_length(v) == u.left_weak_leq_ref(v)); + CHECK(u.left_weak_leq(v) == u.left_weak_leq_ref(v)); + CHECK(u.left_weak_leq_length(v) == u.left_weak_leq_ref(v)); } } } diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index 33c210d6..ab8be3ae 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -60,21 +60,21 @@ template struct Fixture1 { PermGeneric<42>, PermGeneric<49>, (PermGeneric<350, uint32_t>) TEMPLATE_TEST_CASE_METHOD(Fixture1, "sizeof", "[PermAll][000]", PermTypes) { - REQUIRE(sizeof(Fixture1::zero) == + CHECK(sizeof(Fixture1::zero) == Fixture1::VectType::Size() * sizeof(Fixture1::zero[0])); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator==", "[PermAll][001]", PermTypes) { - REQUIRE(Fixture1::zero == Fixture1::zero); - REQUIRE(Fixture1::zero != Fixture1::V01); + CHECK(Fixture1::zero == Fixture1::zero); + CHECK(Fixture1::zero != Fixture1::V01); for (unsigned i = 0; i < Fixture1::Plist.size(); i++) { for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { if (i == j) { - REQUIRE(Fixture1::Plist[i] == + CHECK(Fixture1::Plist[i] == Fixture1::Plist[j]); } else { - REQUIRE(Fixture1::Plist[i] != + CHECK(Fixture1::Plist[i] != Fixture1::Plist[j]); } } @@ -83,56 +83,56 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator==", "[PermAll][001]", PermTypes) { TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[] const", "[PermAll][002]", PermTypes) { - REQUIRE(Fixture1::czero[0] == 0u); - REQUIRE(Fixture1::czero[1] == 0u); + CHECK(Fixture1::czero[0] == 0u); + CHECK(Fixture1::czero[1] == 0u); if (Fixture1::czero.Size() > 12) { - REQUIRE(Fixture1::czero[12] == 0u); + CHECK(Fixture1::czero[12] == 0u); } if (Fixture1::czero.Size() > 15) { - REQUIRE(Fixture1::czero[15] == 0u); + CHECK(Fixture1::czero[15] == 0u); } - REQUIRE(Fixture1::cV01[0] == 0u); - REQUIRE(Fixture1::cV01[1] == 1u); - REQUIRE(Fixture1::cV01[2] == 0u); + CHECK(Fixture1::cV01[0] == 0u); + CHECK(Fixture1::cV01[1] == 1u); + CHECK(Fixture1::cV01[2] == 0u); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[]", "[PermAll][003]", PermTypes) { - REQUIRE(Fixture1::zero[0] == 0u); - REQUIRE(Fixture1::zero[1] == 0u); + CHECK(Fixture1::zero[0] == 0u); + CHECK(Fixture1::zero[1] == 0u); if (Fixture1::czero.Size() > 12) { - REQUIRE(Fixture1::zero[12] == 0u); + CHECK(Fixture1::zero[12] == 0u); } if (Fixture1::czero.Size() > 15) { - REQUIRE(Fixture1::zero[15] == 0u); + CHECK(Fixture1::zero[15] == 0u); } - REQUIRE(Fixture1::V01[0] == 0u); - REQUIRE(Fixture1::V01[1] == 1u); - REQUIRE(Fixture1::V01[2] == 0u); - REQUIRE(Fixture1::PPa[4] == 0u); - REQUIRE(Fixture1::PPa[5] == 5u); + CHECK(Fixture1::V01[0] == 0u); + CHECK(Fixture1::V01[1] == 1u); + CHECK(Fixture1::V01[2] == 0u); + CHECK(Fixture1::PPa[4] == 0u); + CHECK(Fixture1::PPa[5] == 5u); Fixture1::zero[0] = 3; - REQUIRE(Fixture1::zero[0] == 3u); - REQUIRE(Fixture1::zero[1] == 0u); + CHECK(Fixture1::zero[0] == 3u); + CHECK(Fixture1::zero[1] == 0u); if (Fixture1::czero.Size() > 12) { - REQUIRE(Fixture1::zero[12] == 0u); + CHECK(Fixture1::zero[12] == 0u); } if (Fixture1::czero.Size() > 15) { - REQUIRE(Fixture1::zero[15] == 0u); + CHECK(Fixture1::zero[15] == 0u); } Fixture1::PPa[2] = 0; - REQUIRE(Fixture1::PPa[1] == 2u); - REQUIRE(Fixture1::PPa[2] == 0u); - REQUIRE(Fixture1::PPa[3] == 4u); + CHECK(Fixture1::PPa[1] == 2u); + CHECK(Fixture1::PPa[2] == 0u); + CHECK(Fixture1::PPa[3] == 4u); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator<", "[AllPerm][004]", PermTypes) { for (unsigned i = 0; i < Fixture1::Plist.size(); i++) { for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { if (i < j) { - REQUIRE(Fixture1::Plist[i] < + CHECK(Fixture1::Plist[i] < Fixture1::Plist[j]); } else { - REQUIRE(!(Fixture1::Plist[i] < + CHECK(!(Fixture1::Plist[i] < Fixture1::Plist[j])); } } @@ -143,116 +143,116 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "less_partial", "[AllPerm][005]", PermTypes) { for (auto p : Fixture1::Plist) { for (unsigned k = 0; k < Fixture1::PermType::size(); k++) { - REQUIRE(p.less_partial(p, k) == 0); + CHECK(p.less_partial(p, k) == 0); } } for (auto p : Fixture1::Plist) { for (auto q : Fixture1::Plist) { - REQUIRE(p.less_partial(q, 0) == 0); + CHECK(p.less_partial(q, 0) == 0); } } - REQUIRE(Fixture1::zero.less_partial(Fixture1::V01, 1) == + CHECK(Fixture1::zero.less_partial(Fixture1::V01, 1) == 0); - REQUIRE(Fixture1::V01.less_partial(Fixture1::zero, 1) == + CHECK(Fixture1::V01.less_partial(Fixture1::zero, 1) == 0); - REQUIRE(Fixture1::zero.less_partial(Fixture1::V01, 2) < + CHECK(Fixture1::zero.less_partial(Fixture1::V01, 2) < 0); - REQUIRE(Fixture1::V01.less_partial(Fixture1::zero, 2) > + CHECK(Fixture1::V01.less_partial(Fixture1::zero, 2) > 0); - REQUIRE(Fixture1::zero.less_partial(Fixture1::V10, 1) < + CHECK(Fixture1::zero.less_partial(Fixture1::V10, 1) < 0); - REQUIRE(Fixture1::zero.less_partial(Fixture1::V10, 2) < + CHECK(Fixture1::zero.less_partial(Fixture1::V10, 2) < 0); - REQUIRE(Fixture1::V10.less_partial(Fixture1::zero, 1) > + CHECK(Fixture1::V10.less_partial(Fixture1::zero, 1) > 0); - REQUIRE(Fixture1::V10.less_partial(Fixture1::zero, 2) > + CHECK(Fixture1::V10.less_partial(Fixture1::zero, 2) > 0); - REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 1) == + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 1) == 0); - REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 2) == + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 2) == 0); - REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 3) == + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 3) == 0); - REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 4) < + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 4) < 0); - REQUIRE(Fixture1::PPa.less_partial(Fixture1::PPb, 5) < + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 5) < 0); - REQUIRE(Fixture1::PPb.less_partial(Fixture1::PPa, 4) > + CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 4) > 0); - REQUIRE(Fixture1::PPb.less_partial(Fixture1::PPa, 5) > + CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 5) > 0); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_zero", "[AllPerm][006]", PermTypes) { - REQUIRE(Fixture1::zero.first_zero() == 0u); - REQUIRE(Fixture1::V01.first_zero() == 0u); - REQUIRE(Fixture1::PPa.first_zero() == 4u); - REQUIRE(Fixture1::V10.first_zero() == 1u); - REQUIRE(Fixture1::V1.first_zero() == + CHECK(Fixture1::zero.first_zero() == 0u); + CHECK(Fixture1::V01.first_zero() == 0u); + CHECK(Fixture1::PPa.first_zero() == 4u); + CHECK(Fixture1::V10.first_zero() == 1u); + CHECK(Fixture1::V1.first_zero() == Fixture1::VectType::Size()); - REQUIRE(Fixture1::V10.first_zero(1) == + CHECK(Fixture1::V10.first_zero(1) == Fixture1::VectType::Size()); - REQUIRE(Fixture1::PPa.first_zero(5) == 4u); - REQUIRE(Fixture1::PPa.first_zero(3) == + CHECK(Fixture1::PPa.first_zero(5) == 4u); + CHECK(Fixture1::PPa.first_zero(3) == Fixture1::VectType::Size()); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_zero", "[AllPerm][007]", PermTypes) { - REQUIRE(Fixture1::zero.last_zero() == + CHECK(Fixture1::zero.last_zero() == Fixture1::VectType::Size() - 1); - REQUIRE(Fixture1::V01.last_zero() == + CHECK(Fixture1::V01.last_zero() == Fixture1::VectType::Size() - 1); - REQUIRE(Fixture1::PPa.last_zero() == 4u); - REQUIRE(Fixture1::V1.last_zero() == + CHECK(Fixture1::PPa.last_zero() == 4u); + CHECK(Fixture1::V1.last_zero() == Fixture1::VectType::Size()); - REQUIRE(Fixture1::V01.last_zero(1) == 0u); - REQUIRE(Fixture1::V10.last_zero(1) == + CHECK(Fixture1::V01.last_zero(1) == 0u); + CHECK(Fixture1::V10.last_zero(1) == Fixture1::VectType::Size()); - REQUIRE(Fixture1::PPa.last_zero(5) == 4u); - REQUIRE(Fixture1::PPa.last_zero(3) == + CHECK(Fixture1::PPa.last_zero(5) == 4u); + CHECK(Fixture1::PPa.last_zero(3) == Fixture1::VectType::Size()); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_non_zero", "[AllPerm][008]", PermTypes) { - REQUIRE(Fixture1::zero.first_non_zero() == + CHECK(Fixture1::zero.first_non_zero() == Fixture1::VectType::Size()); - REQUIRE(Fixture1::V01.first_non_zero() == 1u); - REQUIRE(Fixture1::PPa.first_non_zero() == 0u); - REQUIRE(Fixture1::V01.first_non_zero() == 1u); - REQUIRE(Fixture1::V01.first_non_zero(1) == + CHECK(Fixture1::V01.first_non_zero() == 1u); + CHECK(Fixture1::PPa.first_non_zero() == 0u); + CHECK(Fixture1::V01.first_non_zero() == 1u); + CHECK(Fixture1::V01.first_non_zero(1) == Fixture1::VectType::Size()); - REQUIRE(Fixture1::PPa.first_non_zero(5) == 0u); - REQUIRE(Fixture1::PPa.first_non_zero(3) == 0u); + CHECK(Fixture1::PPa.first_non_zero(5) == 0u); + CHECK(Fixture1::PPa.first_non_zero(3) == 0u); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_non_zero", "[AllPerm][009]", PermTypes) { - REQUIRE(Fixture1::zero.last_non_zero() == + CHECK(Fixture1::zero.last_non_zero() == Fixture1::VectType::Size()); - REQUIRE(Fixture1::V01.last_non_zero() == 1u); - REQUIRE(Fixture1::PPa.last_non_zero() == + CHECK(Fixture1::V01.last_non_zero() == 1u); + CHECK(Fixture1::PPa.last_non_zero() == Fixture1::VectType::Size() - 1); - REQUIRE(Fixture1::V01.last_non_zero() == 1u); - REQUIRE(Fixture1::V01.last_non_zero(1) == + CHECK(Fixture1::V01.last_non_zero() == 1u); + CHECK(Fixture1::V01.last_non_zero(1) == Fixture1::VectType::Size()); - REQUIRE(Fixture1::PPa.last_non_zero(5) == 3u); - REQUIRE(Fixture1::PPa.last_non_zero(3) == 2u); + CHECK(Fixture1::PPa.last_non_zero(5) == 3u); + CHECK(Fixture1::PPa.last_non_zero(3) == 2u); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "permuted", "[AllPerm][010]", PermTypes) { - REQUIRE(Fixture1::zero.permuted(Fixture1::zero) == + CHECK(Fixture1::zero.permuted(Fixture1::zero) == Fixture1::zero); - REQUIRE(Fixture1::V01.permuted(Fixture1::V01) == + CHECK(Fixture1::V01.permuted(Fixture1::V01) == Fixture1::V01); - REQUIRE(Fixture1::V10.permuted(Fixture1::V10) == + CHECK(Fixture1::V10.permuted(Fixture1::V10) == typename Fixture1::VectType({0, 1}, 1)); - REQUIRE(Fixture1::V10.permuted(Fixture1::V01) == + CHECK(Fixture1::V10.permuted(Fixture1::V01) == typename Fixture1::VectType({1, 0}, 1)); - REQUIRE(Fixture1::V01.permuted(Fixture1::V10) == + CHECK(Fixture1::V01.permuted(Fixture1::V10) == Fixture1::V10); } @@ -264,7 +264,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", for (size_t i = 1; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; out2 << "}"; - REQUIRE(out.str() == out2.str()); + CHECK(out.str() == out2.str()); out.str(""); out2.str(""); @@ -273,7 +273,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", for (size_t i = 2; i < Fixture1::VectType::Size(); i++) out2 << ", 0"; out2 << "}"; - REQUIRE(out.str() == out2.str()); + CHECK(out.str() == out2.str()); out.str(""); out2.str(""); @@ -282,22 +282,22 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", for (size_t i = 5; i < Fixture1::VectType::Size(); i++) out2 << "," << std::setw(2) << i; out2 << "}"; - REQUIRE(out.str() == out2.str()); + CHECK(out.str() == out2.str()); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "is_permutation", "[AllPerm][012]", PermTypes) { - REQUIRE(!Fixture1::zero.is_permutation()); - REQUIRE(Fixture1::PPa.is_permutation()); - REQUIRE(!Fixture1::PPb.is_permutation()); - REQUIRE(Fixture1::RandPerm.is_permutation()); - REQUIRE(!typename Fixture1::VectType( + CHECK(!Fixture1::zero.is_permutation()); + CHECK(Fixture1::PPa.is_permutation()); + CHECK(!Fixture1::PPb.is_permutation()); + CHECK(Fixture1::RandPerm.is_permutation()); + CHECK(!typename Fixture1::VectType( {3, 1, 0, 9, 3, 10, 2, 11, 6, 7, 4, 8}) .is_permutation()); - REQUIRE(Fixture1::PPa.is_permutation(16)); - REQUIRE(!Fixture1::RandPerm.is_permutation(4)); - REQUIRE(Fixture1::PPa.is_permutation(5)); - REQUIRE(!Fixture1::PPa.is_permutation(4)); + CHECK(Fixture1::PPa.is_permutation(16)); + CHECK(!Fixture1::RandPerm.is_permutation(4)); + CHECK(Fixture1::PPa.is_permutation(5)); + CHECK(!Fixture1::PPa.is_permutation(4)); } template struct Fixture2 { @@ -330,17 +330,17 @@ TEMPLATE_TEST_CASE_METHOD(Fixture2, "[AllPerm][013]", PermTypes) { using Perm = typename Fixture2::PermType; for (auto x : Fixture2::Plist) { - REQUIRE(Perm(x).is_permutation()); + CHECK(Perm(x).is_permutation()); } - REQUIRE(Perm({}).is_permutation()); - REQUIRE(Perm({1, 0}).is_permutation()); - REQUIRE(Perm({1, 2, 0}).is_permutation()); - REQUIRE(!Perm({1, 2}).is_permutation()); + CHECK(Perm({}).is_permutation()); + CHECK(Perm({1, 0}).is_permutation()); + CHECK(Perm({1, 2, 0}).is_permutation()); + CHECK(!Perm({1, 2}).is_permutation()); } TEMPLATE_TEST_CASE_METHOD(Fixture2, "std::hash", "[AllPerm][014]", PermTypes) { for (auto x : Fixture2::Plist) { - REQUIRE(std::hash::PermType>()(x) != 0); + CHECK(std::hash::PermType>()(x) != 0); } } @@ -350,31 +350,31 @@ TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult_coxeter", "[AllPerm][015]", for (uint64_t i = 0; i < n; i++) { auto si = TestType::elementary_transposition(i); - REQUIRE(si != Fixture2::id); - REQUIRE(si * si == Fixture2::id); + CHECK(si != Fixture2::id); + CHECK(si * si == Fixture2::id); if (i + 1 < n) { auto si1 = TestType::elementary_transposition(i + 1); - REQUIRE(si * si1 * si == si1 * si * si1); + CHECK(si * si1 * si == si1 * si * si1); } for (uint64_t j = i + 2; j < n; j++) { auto sj = TestType::elementary_transposition(j); - REQUIRE(sj * si == si * sj); + CHECK(sj * si == si * sj); } } } TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult", "[AllPerm][016]", PermTypes) { for (auto x : Fixture2::Plist) { - REQUIRE(Fixture2::id * x == x); - REQUIRE(x * Fixture2::id == x); + CHECK(Fixture2::id * x == x); + CHECK(x * Fixture2::id == x); } - REQUIRE(Fixture2::RandPerm * Fixture2::RandPerm == + CHECK(Fixture2::RandPerm * Fixture2::RandPerm == TestType({5, 1, 3, 2, 8, 0, 9, 11, 6, 10, 7, 4})); for (auto x : Fixture2::Plist) { for (auto y : Fixture2::Plist) { for (auto z : Fixture2::Plist) { - REQUIRE((x * y) * z == x * (y * z)); + CHECK((x * y) * z == x * (y * z)); } } } @@ -382,15 +382,15 @@ TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult", "[AllPerm][016]", PermTypes) { TEMPLATE_TEST_CASE_METHOD(Fixture2, "inverse", "[AllPerm][017]", PermTypes) { for (auto x : Fixture2::Plist) { - REQUIRE(x.inverse() * x == Fixture2::id); - REQUIRE(x * x.inverse() == Fixture2::id); - REQUIRE(x.inverse().inverse() == x); + CHECK(x.inverse() * x == Fixture2::id); + CHECK(x * x.inverse() == Fixture2::id); + CHECK(x.inverse().inverse() == x); } } TEMPLATE_TEST_CASE_METHOD(Fixture2, "random", "[AllPerm][018]", PermTypes) { for (int i = 0; i < 100; i++) { - REQUIRE(Fixture2::PermType::random().is_permutation()); + CHECK(Fixture2::PermType::random().is_permutation()); } } From 9a488f7d499e9919a98a1f000c4c4ece9db11ca0 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 13:26:23 +0000 Subject: [PATCH 046/113] Removed unused type argument to TEST_AGREES macros --- tests/test_bmat8.cpp | 19 ++++++-------- tests/test_epu.cpp | 58 +++++++++++++++++++------------------------ tests/test_main.hpp | 37 ++++++++++++++------------- tests/test_perm16.cpp | 31 ++++++++--------------- 4 files changed, 63 insertions(+), 82 deletions(-) diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index 4f424d51..7f486089 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -111,11 +111,8 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose", "[BMat8][001]") { } } -TEST_AGREES(BMat8Fixture, BMat8, transpose, transpose_mask, BMlist, - "[BMat8][002]"); - -TEST_AGREES(BMat8Fixture, BMat8, transpose, transpose_maskd, BMlist, - "[BMat8][003]"); +TEST_AGREES(BMat8Fixture, transpose, transpose_mask, BMlist, "[BMat8][002]"); +TEST_AGREES(BMat8Fixture, transpose, transpose_maskd, BMlist, "[BMat8][003]"); TEST_CASE_METHOD(BMat8Fixture, "BMat8::transpose2", "[BMat8][004]") { for (auto a : BMlist) { @@ -344,13 +341,13 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") { CHECK(bmm1.row_space_size() == 6); } -TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size, BMlist, +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size, BMlist, "[BMat8][013]"); -TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_incl, +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl, BMlist, "[BMat8][014]"); -TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_incl1, +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl1, BMlist, "[BMat8][015]"); -TEST_AGREES(BMat8Fixture, BMat8, row_space_size_ref, row_space_size_bitset, +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_bitset, BMlist, "[BMat8][016]"); TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { @@ -373,9 +370,9 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { CHECK(m3.row_space_included(BMat8::one())); } -TEST_AGREES2(BMat8Fixture, BMat8, row_space_included, row_space_included_ref, +TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_ref, BMlist, "[BMat8][018]"); -TEST_AGREES2(BMat8Fixture, BMat8, row_space_included, row_space_included_bitset, +TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_bitset, BMlist, "[BMat8][019]"); TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") { diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index ef32c4a5..7d6941c3 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -402,10 +402,10 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { CHECK(horiz_sum_ref(Pc) == 203); } -TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") -TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") -TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") -TEST_AGREES_FUN(Fix, Epu8, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { CHECK_THAT(partial_sums_ref(zero), Equals(zero)); @@ -442,12 +442,10 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums_gen, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums_round, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_sums_ref, partial_sums, v, +TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { CHECK(horiz_max_ref(zero) == 0); @@ -466,10 +464,10 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { CHECK(horiz_max_ref(Pc) == 43); } -TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") -TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max4, v, "[Epu8][035]") -TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max3, v, "[Epu8][036]") -TEST_AGREES_FUN(Fix, Epu8, horiz_max_ref, horiz_max, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max4, v, "[Epu8][035]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max3, v, "[Epu8][036]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(zero), Equals(zero)); @@ -488,11 +486,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_gen, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max_round, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_max_ref, partial_max, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { CHECK(horiz_min_ref(zero) == 0); @@ -511,10 +507,10 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { CHECK(horiz_min_ref(Pc) == 5); } -TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min_gen, v, "[Epu8][034]") -TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min4, v, "[Epu8][035]") -TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min3, v, "[Epu8][036]") -TEST_AGREES_FUN(Fix, Epu8, horiz_min_ref, horiz_min, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min_gen, v, "[Epu8][034]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min4, v, "[Epu8][035]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min3, v, "[Epu8][036]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min, v, "[Epu8][037]") TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { CHECK_THAT(partial_min_ref(zero), Equals(zero)); @@ -534,11 +530,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_gen, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min_round, v, - "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, Epu8, partial_min_ref, partial_min, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") +TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); @@ -559,11 +553,11 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } -TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_cycle, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_popcount, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_arr, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16_gen, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, Epu8, eval16_ref, eval16, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_cycle, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_popcount, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_arr, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_gen, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][034]") TEST_CASE("Epu8::popcount4", "[Epu8][048]") { CHECK_THAT(popcount4, diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 2258f144..03bd811f 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -22,48 +22,47 @@ #include #include -// In the followings "type" is only used for documentation -#define TEST_AGREES_FUN(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK(fun(p) == ref(p)); \ + CHECK(fun(p) == ref(p)); \ } \ } -#define TEST_AGREES_FUN_EPU8(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK_THAT(fun(p), Equals(ref(p))); \ + CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } -#define TEST_AGREES(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK(p.fun() == p.ref()); \ + CHECK(p.fun() == p.ref()); \ } \ } -#define TEST_AGREES2(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES2(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p1 : vct) { \ for (auto p2 : vct) { \ - CHECK(p1.fun(p2) == p1.ref(p2)); \ + CHECK(p1.fun(p2) == p1.ref(p2)); \ } \ } \ } -#define TEST_AGREES_EPU8(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK_THAT(p.fun(), Equals(p.ref())); \ + CHECK_THAT(p.fun(), Equals(p.ref())); \ } \ } -#define TEST_AGREES_EPU8_FUN(fixture, type, ref, fun, vct, tags) \ - TEST_CASE_METHOD(fixture, #type "::" #ref " == " #type "::" #fun, tags) { \ +#define TEST_AGREES_EPU8_FUN(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK_THAT(fun(p), Equals(ref(p))); \ + CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 188d29de..856b6344 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -389,8 +389,7 @@ TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { } #ifdef SIMDE_X86_SSE4_2_NATIVE -TEST_AGREES(Perm16Fixture, PPerm16, inverse_ref, inverse_find, PPlist, - "[PPerm16][021]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, PPlist, "[PPerm16][021]"); #endif TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { @@ -471,14 +470,10 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { } } -TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_find, Plist, - "[Perm16][028]"); -TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_pow, Plist, - "[Perm16][029]"); -TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse_cycl, Plist, - "[Perm16][030]"); -TEST_AGREES(Perm16Fixture, Perm16, inverse_ref, inverse, Plist, - "[Perm16][031]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, Plist, "[Perm16][028]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_pow, Plist, "[Perm16][029]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_cycl, Plist, "[Perm16][030]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse, Plist, "[Perm16][031]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { CHECK_THAT(Perm16::one().lehmer(), Equals(zero)); @@ -488,10 +483,8 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); } -TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer_arr, Plist, - "[Perm16][033]"); -TEST_AGREES_EPU8(Perm16Fixture, Perm16, lehmer_ref, lehmer, Plist, - "[Perm16][034]"); +TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer_arr, Plist, "[Perm16][033]"); +TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer, Plist, "[Perm16][034]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][035]") { CHECK(Perm16::one().length() == 0); @@ -499,9 +492,8 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][035]") { CHECK(PPb.length() == 10); } -TEST_AGREES(Perm16Fixture, Perm16, length_ref, length_arr, Plist, - "[Perm16][036]"); -TEST_AGREES(Perm16Fixture, Perm16, length_ref, length, Plist, "[Perm16][037]"); +TEST_AGREES(Perm16Fixture, length_ref, length_arr, Plist, "[Perm16][036]"); +TEST_AGREES(Perm16Fixture, length_ref, length, Plist, "[Perm16][037]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { CHECK(Perm16::one().nb_descents_ref() == 0); @@ -510,7 +502,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { CHECK(Perm16::one().nb_descents() == 0); } -TEST_AGREES(Perm16Fixture, Perm16, nb_descents_ref, nb_descents, Plist, +TEST_AGREES(Perm16Fixture, nb_descents_ref, nb_descents, Plist, "[Perm16][039]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][040]") { @@ -519,8 +511,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][040]") { CHECK(PPb.nb_cycles_ref() == 10); } -TEST_AGREES(Perm16Fixture, Perm16, nb_cycles_ref, nb_cycles, Plist, - "[Perm16][041]"); +TEST_AGREES(Perm16Fixture, nb_cycles_ref, nb_cycles, Plist, "[Perm16][041]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][042]") { CHECK(Perm16::one().left_weak_leq_ref(Perm16::one())); From 97777b8505e3565305062f25437d7be844b2c04e Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 13:23:35 +0000 Subject: [PATCH 047/113] Change Size -> size mem fn --- include/vect16.hpp | 14 ++--- include/vect_generic.hpp | 2 +- tests/test_perm_all.cpp | 110 +++++++++++++++++++-------------------- 3 files changed, 61 insertions(+), 65 deletions(-) diff --git a/include/vect16.hpp b/include/vect16.hpp index cfc54f26..ff72a4c9 100644 --- a/include/vect16.hpp +++ b/include/vect16.hpp @@ -24,7 +24,7 @@ namespace HPCombi { struct alignas(16) Vect16 { - static constexpr size_t Size() { return 16; } + static constexpr size_t size() { return 16; } using array = decltype(Epu8)::array; epu8 v; @@ -46,23 +46,23 @@ struct alignas(16) Vect16 { const uint8_t &operator[](uint64_t i) const { return as_array()[i]; } uint8_t &operator[](uint64_t i) { return as_array()[i]; } - size_t first_diff(const Vect16 &u, size_t bound = Size()) const { + size_t first_diff(const Vect16 &u, size_t bound = size()) const { return HPCombi::first_diff(v, u.v, bound); } - size_t last_diff(const Vect16 &u, size_t bound = Size()) const { + size_t last_diff(const Vect16 &u, size_t bound = size()) const { return HPCombi::last_diff(v, u.v, bound); } - size_t first_zero(size_t bound = Size()) const { + size_t first_zero(size_t bound = size()) const { return HPCombi::first_zero(v, bound); } - size_t last_zero(size_t bound = Size()) const { + size_t last_zero(size_t bound = size()) const { return HPCombi::last_zero(v, bound); } - size_t first_non_zero(size_t bound = Size()) const { + size_t first_non_zero(size_t bound = size()) const { return HPCombi::first_non_zero(v, bound); } - size_t last_non_zero(size_t bound = Size()) const { + size_t last_non_zero(size_t bound = size()) const { return HPCombi::last_non_zero(v, bound); } diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index e7806ad6..40d6309a 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -35,7 +35,7 @@ std::array sorted_vect(std::array v) { /** A generic class for combinatorial integer vectors. */ template struct VectGeneric { - static constexpr size_t Size() { return _Size; } + static constexpr size_t size() { return _Size; } using array = std::array; array v; diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index ab8be3ae..3264ed4b 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -48,7 +48,7 @@ template struct Fixture1 { const std::vector Vlist; // some tests assume that the size is at least 6 - static_assert(VectType::Size() >= 6, "Minimum size for tests"); + static_assert(VectType::size() >= 6, "Minimum size for tests"); }; } // namespace @@ -61,8 +61,8 @@ template struct Fixture1 { TEMPLATE_TEST_CASE_METHOD(Fixture1, "sizeof", "[PermAll][000]", PermTypes) { CHECK(sizeof(Fixture1::zero) == - Fixture1::VectType::Size() * - sizeof(Fixture1::zero[0])); + Fixture1::VectType::size() * + sizeof(Fixture1::zero[0])); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator==", "[PermAll][001]", PermTypes) { @@ -72,10 +72,10 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator==", "[PermAll][001]", PermTypes) { for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { if (i == j) { CHECK(Fixture1::Plist[i] == - Fixture1::Plist[j]); + Fixture1::Plist[j]); } else { CHECK(Fixture1::Plist[i] != - Fixture1::Plist[j]); + Fixture1::Plist[j]); } } } @@ -85,10 +85,10 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[] const", "[PermAll][002]", PermTypes) { CHECK(Fixture1::czero[0] == 0u); CHECK(Fixture1::czero[1] == 0u); - if (Fixture1::czero.Size() > 12) { + if (Fixture1::czero.size() > 12) { CHECK(Fixture1::czero[12] == 0u); } - if (Fixture1::czero.Size() > 15) { + if (Fixture1::czero.size() > 15) { CHECK(Fixture1::czero[15] == 0u); } CHECK(Fixture1::cV01[0] == 0u); @@ -99,10 +99,10 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[] const", "[PermAll][002]", TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[]", "[PermAll][003]", PermTypes) { CHECK(Fixture1::zero[0] == 0u); CHECK(Fixture1::zero[1] == 0u); - if (Fixture1::czero.Size() > 12) { + if (Fixture1::czero.size() > 12) { CHECK(Fixture1::zero[12] == 0u); } - if (Fixture1::czero.Size() > 15) { + if (Fixture1::czero.size() > 15) { CHECK(Fixture1::zero[15] == 0u); } CHECK(Fixture1::V01[0] == 0u); @@ -113,10 +113,10 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator[]", "[PermAll][003]", PermTypes) { Fixture1::zero[0] = 3; CHECK(Fixture1::zero[0] == 3u); CHECK(Fixture1::zero[1] == 0u); - if (Fixture1::czero.Size() > 12) { + if (Fixture1::czero.size() > 12) { CHECK(Fixture1::zero[12] == 0u); } - if (Fixture1::czero.Size() > 15) { + if (Fixture1::czero.size() > 15) { CHECK(Fixture1::zero[15] == 0u); } Fixture1::PPa[2] = 0; @@ -130,10 +130,10 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator<", "[AllPerm][004]", PermTypes) { for (unsigned j = 0; j < Fixture1::Plist.size(); j++) { if (i < j) { CHECK(Fixture1::Plist[i] < - Fixture1::Plist[j]); + Fixture1::Plist[j]); } else { CHECK(!(Fixture1::Plist[i] < - Fixture1::Plist[j])); + Fixture1::Plist[j])); } } } @@ -153,37 +153,33 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "less_partial", "[AllPerm][005]", } CHECK(Fixture1::zero.less_partial(Fixture1::V01, 1) == - 0); + 0); CHECK(Fixture1::V01.less_partial(Fixture1::zero, 1) == - 0); + 0); CHECK(Fixture1::zero.less_partial(Fixture1::V01, 2) < - 0); + 0); CHECK(Fixture1::V01.less_partial(Fixture1::zero, 2) > - 0); + 0); CHECK(Fixture1::zero.less_partial(Fixture1::V10, 1) < - 0); + 0); CHECK(Fixture1::zero.less_partial(Fixture1::V10, 2) < - 0); + 0); CHECK(Fixture1::V10.less_partial(Fixture1::zero, 1) > - 0); + 0); CHECK(Fixture1::V10.less_partial(Fixture1::zero, 2) > - 0); + 0); CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 1) == - 0); + 0); CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 2) == - 0); + 0); CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 3) == - 0); - CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 4) < - 0); - CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 5) < - 0); - CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 4) > - 0); - CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 5) > - 0); + 0); + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 4) < 0); + CHECK(Fixture1::PPa.less_partial(Fixture1::PPb, 5) < 0); + CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 4) > 0); + CHECK(Fixture1::PPb.less_partial(Fixture1::PPa, 5) > 0); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_zero", "[AllPerm][006]", PermTypes) { @@ -192,39 +188,39 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_zero", "[AllPerm][006]", PermTypes) { CHECK(Fixture1::PPa.first_zero() == 4u); CHECK(Fixture1::V10.first_zero() == 1u); CHECK(Fixture1::V1.first_zero() == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::V10.first_zero(1) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::PPa.first_zero(5) == 4u); CHECK(Fixture1::PPa.first_zero(3) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_zero", "[AllPerm][007]", PermTypes) { CHECK(Fixture1::zero.last_zero() == - Fixture1::VectType::Size() - 1); + Fixture1::VectType::size() - 1); CHECK(Fixture1::V01.last_zero() == - Fixture1::VectType::Size() - 1); + Fixture1::VectType::size() - 1); CHECK(Fixture1::PPa.last_zero() == 4u); CHECK(Fixture1::V1.last_zero() == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::V01.last_zero(1) == 0u); CHECK(Fixture1::V10.last_zero(1) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::PPa.last_zero(5) == 4u); CHECK(Fixture1::PPa.last_zero(3) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_non_zero", "[AllPerm][008]", PermTypes) { CHECK(Fixture1::zero.first_non_zero() == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::V01.first_non_zero() == 1u); CHECK(Fixture1::PPa.first_non_zero() == 0u); CHECK(Fixture1::V01.first_non_zero() == 1u); CHECK(Fixture1::V01.first_non_zero(1) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::PPa.first_non_zero(5) == 0u); CHECK(Fixture1::PPa.first_non_zero(3) == 0u); } @@ -232,28 +228,28 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "first_non_zero", "[AllPerm][008]", TEMPLATE_TEST_CASE_METHOD(Fixture1, "last_non_zero", "[AllPerm][009]", PermTypes) { CHECK(Fixture1::zero.last_non_zero() == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::V01.last_non_zero() == 1u); CHECK(Fixture1::PPa.last_non_zero() == - Fixture1::VectType::Size() - 1); + Fixture1::VectType::size() - 1); CHECK(Fixture1::V01.last_non_zero() == 1u); CHECK(Fixture1::V01.last_non_zero(1) == - Fixture1::VectType::Size()); + Fixture1::VectType::size()); CHECK(Fixture1::PPa.last_non_zero(5) == 3u); CHECK(Fixture1::PPa.last_non_zero(3) == 2u); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "permuted", "[AllPerm][010]", PermTypes) { CHECK(Fixture1::zero.permuted(Fixture1::zero) == - Fixture1::zero); + Fixture1::zero); CHECK(Fixture1::V01.permuted(Fixture1::V01) == - Fixture1::V01); + Fixture1::V01); CHECK(Fixture1::V10.permuted(Fixture1::V10) == - typename Fixture1::VectType({0, 1}, 1)); + typename Fixture1::VectType({0, 1}, 1)); CHECK(Fixture1::V10.permuted(Fixture1::V01) == - typename Fixture1::VectType({1, 0}, 1)); + typename Fixture1::VectType({1, 0}, 1)); CHECK(Fixture1::V01.permuted(Fixture1::V10) == - Fixture1::V10); + Fixture1::V10); } TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", @@ -261,7 +257,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", std::ostringstream out, out2; out << Fixture1::zero; out2 << "{ 0"; - for (size_t i = 1; i < Fixture1::VectType::Size(); i++) + for (size_t i = 1; i < Fixture1::VectType::size(); i++) out2 << ", 0"; out2 << "}"; CHECK(out.str() == out2.str()); @@ -270,7 +266,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", out2.str(""); out << Fixture1::V01; out2 << "{ 0, 1"; - for (size_t i = 2; i < Fixture1::VectType::Size(); i++) + for (size_t i = 2; i < Fixture1::VectType::size(); i++) out2 << ", 0"; out2 << "}"; CHECK(out.str() == out2.str()); @@ -279,7 +275,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "operator_insert", "[AllPerm][011]", out2.str(""); out << Fixture1::PPa; out2 << "{ 1, 2, 3, 4, 0"; - for (size_t i = 5; i < Fixture1::VectType::Size(); i++) + for (size_t i = 5; i < Fixture1::VectType::size(); i++) out2 << "," << std::setw(2) << i; out2 << "}"; CHECK(out.str() == out2.str()); @@ -292,8 +288,8 @@ TEMPLATE_TEST_CASE_METHOD(Fixture1, "is_permutation", "[AllPerm][012]", CHECK(!Fixture1::PPb.is_permutation()); CHECK(Fixture1::RandPerm.is_permutation()); CHECK(!typename Fixture1::VectType( - {3, 1, 0, 9, 3, 10, 2, 11, 6, 7, 4, 8}) - .is_permutation()); + {3, 1, 0, 9, 3, 10, 2, 11, 6, 7, 4, 8}) + .is_permutation()); CHECK(Fixture1::PPa.is_permutation(16)); CHECK(!Fixture1::RandPerm.is_permutation(4)); CHECK(Fixture1::PPa.is_permutation(5)); @@ -346,7 +342,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture2, "std::hash", "[AllPerm][014]", PermTypes) { TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult_coxeter", "[AllPerm][015]", PermTypes) { - const size_t n = TestType::Size() - 1; + const size_t n = TestType::size() - 1; for (uint64_t i = 0; i < n; i++) { auto si = TestType::elementary_transposition(i); @@ -369,7 +365,7 @@ TEMPLATE_TEST_CASE_METHOD(Fixture2, "mult", "[AllPerm][016]", PermTypes) { CHECK(x * Fixture2::id == x); } CHECK(Fixture2::RandPerm * Fixture2::RandPerm == - TestType({5, 1, 3, 2, 8, 0, 9, 11, 6, 10, 7, 4})); + TestType({5, 1, 3, 2, 8, 0, 9, 11, 6, 10, 7, 4})); for (auto x : Fixture2::Plist) { for (auto y : Fixture2::Plist) { From b9375fdc80a9b73d3f2f4e0d867d8a5c15e1a200 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 13:26:40 +0000 Subject: [PATCH 048/113] Remove names starting with underscore --- include/perm_generic.hpp | 8 ++-- include/perm_generic_impl.hpp | 73 +++++++++++++++-------------- include/vect_generic.hpp | 86 +++++++++++++++++------------------ 3 files changed, 83 insertions(+), 84 deletions(-) diff --git a/include/perm_generic.hpp b/include/perm_generic.hpp index d9c0b94f..3420daa7 100644 --- a/include/perm_generic.hpp +++ b/include/perm_generic.hpp @@ -24,11 +24,11 @@ namespace HPCombi { -template -struct PermGeneric : public VectGeneric<_Size, Expo> { - using vect = VectGeneric<_Size, Expo>; +template +struct PermGeneric : public VectGeneric { + using vect = VectGeneric; - static constexpr size_t size() { return _Size; } + static constexpr size_t size() { return Size; } PermGeneric() = default; PermGeneric(const vect v) : vect(v) {} // NOLINT diff --git a/include/perm_generic_impl.hpp b/include/perm_generic_impl.hpp index 19f3ee49..ba145747 100644 --- a/include/perm_generic_impl.hpp +++ b/include/perm_generic_impl.hpp @@ -15,34 +15,34 @@ namespace HPCombi { -template -PermGeneric<_Size, Expo>::PermGeneric(std::initializer_list il) { - assert(il.size() <= _Size); +template +PermGeneric::PermGeneric(std::initializer_list il) { + assert(il.size() <= Size); std::copy(il.begin(), il.end(), this->v.begin()); - for (Expo i = il.size(); i < _Size; i++) + for (Expo i = il.size(); i < Size; i++) this->v[i] = i; } -template -PermGeneric<_Size, Expo> -PermGeneric<_Size, Expo>::elementary_transposition(uint64_t i) { - assert(i < _Size); +template +PermGeneric +PermGeneric::elementary_transposition(uint64_t i) { + assert(i < Size); PermGeneric res{{}}; res[i] = i + 1; res[i + 1] = i; return res; } -template -PermGeneric<_Size, Expo> PermGeneric<_Size, Expo>::inverse() const { +template +PermGeneric PermGeneric::inverse() const { PermGeneric res; - for (uint64_t i = 0; i < _Size; i++) + for (uint64_t i = 0; i < Size; i++) res[this->v[i]] = i; return res; } -template -PermGeneric<_Size, Expo> PermGeneric<_Size, Expo>::random() { +template +PermGeneric PermGeneric::random() { static std::random_device rd; static std::mt19937 g(rd()); @@ -51,41 +51,40 @@ PermGeneric<_Size, Expo> PermGeneric<_Size, Expo>::random() { return res; } -template -typename PermGeneric<_Size, Expo>::vect -PermGeneric<_Size, Expo>::lehmer() const { +template +typename PermGeneric::vect PermGeneric::lehmer() const { vect res{}; - for (size_t i = 0; i < _Size; i++) - for (size_t j = i + 1; j < _Size; j++) + for (size_t i = 0; i < Size; i++) + for (size_t j = i + 1; j < Size; j++) if (this->v[i] > this->v[j]) res[i]++; return res; } -template -uint64_t PermGeneric<_Size, Expo>::length() const { +template +uint64_t PermGeneric::length() const { uint64_t res = 0; - for (size_t i = 0; i < _Size; i++) - for (size_t j = i + 1; j < _Size; j++) + for (size_t i = 0; i < Size; i++) + for (size_t j = i + 1; j < Size; j++) if (this->v[i] > this->v[j]) res++; return res; } -template -uint64_t PermGeneric<_Size, Expo>::nb_descents() const { +template +uint64_t PermGeneric::nb_descents() const { uint64_t res = 0; - for (size_t i = 0; i < _Size - 1; i++) + for (size_t i = 0; i < Size - 1; i++) if (this->v[i] > this->v[i + 1]) res++; return res; } -template -uint64_t PermGeneric<_Size, Expo>::nb_cycles() const { - std::array b{}; +template +uint64_t PermGeneric::nb_cycles() const { + std::array b{}; uint64_t c = 0; - for (size_t i = 0; i < _Size; i++) { + for (size_t i = 0; i < Size; i++) { if (!b[i]) { for (size_t j = i; !b[j]; j = this->v[j]) b[j] = true; @@ -95,10 +94,10 @@ uint64_t PermGeneric<_Size, Expo>::nb_cycles() const { return c; } -template -bool PermGeneric<_Size, Expo>::left_weak_leq(PermGeneric other) const { - for (size_t i = 0; i < _Size; i++) { - for (size_t j = i + 1; j < _Size; j++) { +template +bool PermGeneric::left_weak_leq(PermGeneric other) const { + for (size_t i = 0; i < Size; i++) { + for (size_t j = i + 1; j < Size; j++) { if ((this->v[i] > this->v[j]) && (other[i] < other[j])) return false; } @@ -110,10 +109,10 @@ bool PermGeneric<_Size, Expo>::left_weak_leq(PermGeneric other) const { namespace std { -template -struct hash> { - size_t operator()(const HPCombi::PermGeneric<_Size, Expo> &ar) const { - return hash>()(ar); +template +struct hash> { + size_t operator()(const HPCombi::PermGeneric &ar) const { + return hash>()(ar); } }; diff --git a/include/vect_generic.hpp b/include/vect_generic.hpp index 40d6309a..d549615e 100644 --- a/include/vect_generic.hpp +++ b/include/vect_generic.hpp @@ -34,15 +34,15 @@ std::array sorted_vect(std::array v) { /** A generic class for combinatorial integer vectors. */ -template struct VectGeneric { - static constexpr size_t size() { return _Size; } - using array = std::array; +template struct VectGeneric { + static constexpr size_t size() { return Size; } + using array = std::array; array v; VectGeneric() = default; - VectGeneric(const std::array &_v) : v(_v) {} // NOLINT + VectGeneric(const std::array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { - assert(il.size() <= _Size); + assert(il.size() <= Size); std::copy(il.begin(), il.end(), v.begin()); std::fill(v.begin() + il.size(), v.end(), def); } @@ -56,20 +56,20 @@ template struct VectGeneric { Expo operator[](uint64_t i) const { return v[i]; } Expo &operator[](uint64_t i) { return v[i]; } - size_t first_diff(const VectGeneric &u, size_t bound = _Size) const { + size_t first_diff(const VectGeneric &u, size_t bound = Size) const { for (size_t i = 0; i < bound; i++) if (v[i] != u[i]) return i; - return _Size; + return Size; } - size_t last_diff(const VectGeneric &u, size_t bound = _Size) const { + size_t last_diff(const VectGeneric &u, size_t bound = Size) const { while (bound != 0) { --bound; if (u[bound] != v[bound]) return bound; } - return _Size; + return Size; } using value_type = Expo; @@ -81,25 +81,25 @@ template struct VectGeneric { const_iterator end() const { return v.end(); } bool operator==(const VectGeneric &u) const { - return first_diff(u) == _Size; + return first_diff(u) == Size; } bool operator!=(const VectGeneric &u) const { - return first_diff(u) != _Size; + return first_diff(u) != Size; } bool operator<(const VectGeneric &u) const { uint64_t diff = first_diff(u); - return (diff != _Size) && v[diff] < u[diff]; + return (diff != Size) && v[diff] < u[diff]; } int8_t less_partial(const VectGeneric &u, int k) const { uint64_t diff = first_diff(u, k); - return (diff == _Size) ? 0 : int8_t(v[diff]) - int8_t(u[diff]); + return (diff == Size) ? 0 : int8_t(v[diff]) - int8_t(u[diff]); } VectGeneric permuted(const VectGeneric &u) const { VectGeneric res; - for (uint64_t i = 0; i < _Size; i++) + for (uint64_t i = 0; i < Size; i++) res[i] = v[u[i]]; return res; } @@ -107,7 +107,7 @@ template struct VectGeneric { void sort() { std::sort(v.begin(), v.end()); } bool is_sorted() const { - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) if (v[i - 1] < v[i]) return false; return true; @@ -117,43 +117,43 @@ template struct VectGeneric { static std::random_device rd; static std::mt19937 g(rd()); - VectGeneric<_Size, Expo> res = VectGeneric<_Size, Expo>(0, 0); + VectGeneric res = VectGeneric(0, 0); std::shuffle(res.begin(), res.end(), g); return res; } - uint64_t first_non_zero(size_t bound = _Size) const { + uint64_t first_non_zero(size_t bound = Size) const { for (uint64_t i = 0; i < bound; i++) if (v[i] != 0) return i; - return _Size; + return Size; } - uint64_t first_zero(size_t bound = _Size) const { + uint64_t first_zero(size_t bound = Size) const { for (uint64_t i = 0; i < bound; i++) if (v[i] == 0) return i; - return _Size; + return Size; } - uint64_t last_non_zero(size_t bound = _Size) const { + uint64_t last_non_zero(size_t bound = Size) const { for (int64_t i = bound - 1; i >= 0; i--) if (v[i] != 0) return i; - return _Size; + return Size; } - uint64_t last_zero(size_t bound = _Size) const { + uint64_t last_zero(size_t bound = Size) const { for (int64_t i = bound - 1; i >= 0; i--) if (v[i] == 0) return i; - return _Size; + return Size; } - bool is_permutation(const size_t k = _Size) const { + bool is_permutation(const size_t k = Size) const { auto temp = v; std::sort(temp.begin(), temp.end()); - for (uint64_t i = 0; i < _Size; i++) + for (uint64_t i = 0; i < Size; i++) if (temp[i] != i) return false; - for (uint64_t i = k; i < _Size; i++) + for (uint64_t i = k; i < Size; i++) if (v[i] != i) return false; return true; @@ -161,51 +161,51 @@ template struct VectGeneric { uint64_t horiz_sum() const { Expo res = 0; - for (uint64_t i = 0; i < _Size; i++) + for (uint64_t i = 0; i < Size; i++) res += v[i]; return res; } VectGeneric partial_sums() const { auto res = *this; - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) res[i] += res[i - 1]; return res; } void partial_sums_inplace() { - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) v[i] += v[i - 1]; } Expo horiz_max() const { Expo res = v[0]; - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) res = std::max(res, v[i]); return res; } void partial_max_inplace() { - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) v[i] = std::max(v[i], v[i - 1]); } Expo horiz_min() const { Expo res = v[0]; - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) res = std::min(res, v[i]); return res; } void partial_min_inplace() { - for (uint64_t i = 1; i < _Size; i++) + for (uint64_t i = 1; i < Size; i++) v[i] = std::min(v[i], v[i - 1]); } VectGeneric eval() const { VectGeneric res{}; - for (size_t i = 0; i < _Size; i++) - if (v[i] < _Size) + for (size_t i = 0; i < Size; i++) + if (v[i] < Size) res[v[i]]++; return res; } @@ -218,21 +218,21 @@ static_assert(std::is_trivial>(), namespace std { -template +template std::ostream &operator<<(std::ostream &stream, - const HPCombi::VectGeneric<_Size, Expo> &v) { + const HPCombi::VectGeneric &v) { stream << "{" << std::setw(2) << unsigned(v[0]); - for (unsigned i = 1; i < _Size; ++i) + for (unsigned i = 1; i < Size; ++i) stream << "," << std::setw(2) << unsigned(v[i]); stream << "}"; return stream; } -template -struct hash> { - size_t operator()(const HPCombi::VectGeneric<_Size, Expo> &ar) const { +template +struct hash> { + size_t operator()(const HPCombi::VectGeneric &ar) const { size_t h = 0; - for (size_t i = 0; i < _Size; i++) + for (size_t i = 0; i < Size; i++) h = hash()(ar[i]) + (h << 6) + (h << 16) - h; return h; } From 50abc5fef54fe65327879da329cc3cd4f5989d4a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 14:08:39 +0000 Subject: [PATCH 049/113] Fix spelling --- .codespellrc | 3 +++ .github/workflows/codespell.yml | 9 +++++++ include/bmat8.hpp | 8 +++--- include/epu.hpp | 44 ++++++++++++++++----------------- include/epu_impl.hpp | 2 +- include/power.hpp | 2 +- 6 files changed, 40 insertions(+), 28 deletions(-) create mode 100644 .codespellrc create mode 100644 .github/workflows/codespell.yml diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000..59e07a79 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,3 @@ +[codespell] +skip = ./include/simde,./.git,./benchmark/python,./experiments +ignore-words-list=shft diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000..150c94cc --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,9 @@ +name: codespell +on: [pull_request, workflow_dispatch] + +jobs: + codespell: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: codespell-project/actions-codespell@v1.0 diff --git a/include/bmat8.hpp b/include/bmat8.hpp index 95ebce2f..b3b107cd 100644 --- a/include/bmat8.hpp +++ b/include/bmat8.hpp @@ -80,7 +80,7 @@ class BMat8 { //! A constructor. //! - //! This is the copy assignement constructor. + //! This is the copy assignment constructor. BMat8 &operator=(BMat8 const &) = default; //! A constructor. @@ -161,7 +161,7 @@ class BMat8 { //! Returns the matrix product of \c this and the transpose of \p that //! //! This method returns the standard matrix product (over the - //! boolean semiring) of two BMat8 objects. This is faster than tranposing + //! boolean semiring) of two BMat8 objects. This is faster than transposing //! that and calling the product of \c this with it. Implementation uses //! vector instructions. BMat8 mult_transpose(BMat8 const &that) const; @@ -176,14 +176,14 @@ class BMat8 { //! Returns a canonical basis of the row space of \c this //! - //! Any two matrix with the same row space are garanteed to have the same + //! Any two matrix with the same row space are guaranteed to have the same //! row space basis. This is a fast implementation using vector //! instructions to compute in parallel the union of the other rows //! included in a given one. BMat8 row_space_basis() const; //! Returns a canonical basis of the col space of \c this //! - //! Any two matrix with the same column row space are garanteed to have + //! Any two matrix with the same column row space are guaranteed to have //! the same column space basis. Uses #row_space_basis and #transpose. BMat8 col_space_basis() const { return transpose().row_space_basis().transpose(); diff --git a/include/epu.hpp b/include/epu.hpp index 8e9fb289..a9f5433d 100644 --- a/include/epu.hpp +++ b/include/epu.hpp @@ -118,14 +118,14 @@ constexpr epu8 popcount4 = Epu8([](uint8_t i) { /** Cast a #HPCombi::epu8 to a c++ \c std::array * - * This is usually faster for algorithm using a lot of indexed acces. + * This is usually faster for algorithm using a lot of indexed access. */ inline decltype(Epu8)::array &as_array(epu8 &v) { return reinterpret_cast(v); } /** Cast a constant #HPCombi::epu8 to a C++ \c std::array * - * This is usually faster for algorithm using a lot of indexed acces. + * This is usually faster for algorithm using a lot of indexed access. */ inline const decltype(Epu8)::array &as_array(const epu8 &v) { return reinterpret_cast(v); @@ -140,7 +140,7 @@ inline epu8 from_array(decltype(Epu8)::array a) { /** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric * - * This is usually faster for algorithm using a lot of indexed acces. + * This is usually faster for algorithm using a lot of indexed access. */ inline VectGeneric<16> &as_VectGeneric(epu8 &v) { return reinterpret_cast &>(as_array(v)); @@ -148,7 +148,7 @@ inline VectGeneric<16> &as_VectGeneric(epu8 &v) { /** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric * - * This is usually faster for algorithm using a lot of indexed acces. + * This is usually faster for algorithm using a lot of indexed access. */ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { return reinterpret_cast &>(as_array(v)); @@ -239,7 +239,7 @@ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b); */ inline epu8 permutation_of_ref(epu8 a, epu8 b); /** @copydoc common_permutation_of - @par Algorithm: architecture dependant + @par Algorithm: architecture dependent */ inline epu8 permutation_of(epu8 a, epu8 b); @@ -259,7 +259,7 @@ inline epu8 random_epu8(uint16_t bnd); * @details * @param a: supposed to be sorted * @param repl: the value replacing the duplicate entries (default to 0) - * @return the vector \c a where repeated occurences of entries are replaced + * @return the vector \c a where repeated occurrences of entries are replaced * by \c repl */ inline epu8 remove_dups(epu8 a, uint8_t repl = 0); @@ -288,12 +288,12 @@ inline uint8_t horiz_sum_ref(epu8); inline uint8_t horiz_sum_gen(epu8); /** @copydoc common_horiz_sum * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline uint8_t horiz_sum4(epu8); /** @copydoc common_horiz_sum * @par Algorithm: - * 3-stages paralell algorithm + indexed access + * 3-stages parallel algorithm + indexed access */ inline uint8_t horiz_sum3(epu8); /** @copydoc common_horiz_sum */ @@ -322,7 +322,7 @@ inline epu8 partial_sums_ref(epu8); inline epu8 partial_sums_gen(epu8); /** @copydoc common_partial_sums * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline epu8 partial_sums_round(epu8); /** @copydoc common_partial_sums */ @@ -351,12 +351,12 @@ inline uint8_t horiz_max_ref(epu8); inline uint8_t horiz_max_gen(epu8); /** @copydoc common_horiz_max * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline uint8_t horiz_max4(epu8); /** @copydoc common_horiz_max * @par Algorithm: - * 3-stages paralell algorithm + indexed access + * 3-stages parallel algorithm + indexed access */ inline uint8_t horiz_max3(epu8); /** @copydoc common_horiz_max */ @@ -385,7 +385,7 @@ inline epu8 partial_max_ref(epu8); inline epu8 partial_max_gen(epu8); /** @copydoc common_partial_max * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline epu8 partial_max_round(epu8); /** @copydoc common_partial_max */ @@ -414,12 +414,12 @@ inline uint8_t horiz_min_ref(epu8); inline uint8_t horiz_min_gen(epu8); /** @copydoc common_horiz_min * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline uint8_t horiz_min4(epu8); /** @copydoc common_horiz_min * @par Algorithm: - * 3-stages paralell algorithm + indexed access + * 3-stages parallel algorithm + indexed access */ inline uint8_t horiz_min3(epu8); /** @copydoc common_horiz_min */ @@ -448,7 +448,7 @@ inline epu8 partial_min_ref(epu8); inline epu8 partial_min_gen(epu8); /** @copydoc common_partial_min * @par Algorithm: - * 4-stages paralell algorithm + * 4-stages parallel algorithm */ inline epu8 partial_min_round(epu8); /** @copydoc common_partial_min */ @@ -607,11 +607,11 @@ inline epu8 popcount16(epu8 v); * @param k the size of \c *this (default 16) * * Points where the function is undefined are mapped to \c 0xff. If \c *this - * is a tranformation of @f$0\dots n-1@f$ for @f$n<16@f$, it should be completed - * to a transformation of @f$0\dots 15@f$ by adding fixed points. That is the - * values @f$i\geq n@f$ should be mapped to themself. + * is a transformation of @f$0\dots n-1@f$ for @f$n<16@f$, it should be + * completed to a transformation of @f$0\dots 15@f$ by adding fixed points. That + * is the values @f$i\geq n@f$ should be mapped to themself. * @par Example: - * The partial tranformation + * The partial transformation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15} */ @@ -623,12 +623,12 @@ inline bool is_partial_transformation(epu8 v, const size_t k = 16); * @param v the vector to test * @param k the size of \c *this (default 16) * - * If \c *this is a tranformation of @f$0\dots n-1@f$ for @f$n<16@f$, + * If \c *this is a transformation of @f$0\dots n-1@f$ for @f$n<16@f$, * it should be completed to a transformation of @f$0\dots 15@f$ * by adding fixed points. That is the values @f$i\geq n@f$ should be * mapped to themself. * @par Example: - * The tranformation + * The transformation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15} */ @@ -678,7 +678,7 @@ inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16); */ inline bool is_permutation_sort(epu8 v, const size_t k = 16); /** @copydoc common_is_permutation - @par Algorithm: architecture dependant + @par Algorithm: architecture dependent */ inline bool is_permutation(epu8 v, const size_t k = 16); diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 136915c6..55996c0f 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -522,7 +522,7 @@ template <> struct hash { }; template <> struct less { - // WARNING: due to endianess this is not lexicographic comparison, + // WARNING: due to endianness this is not lexicographic comparison, // but we don't care when using in std::set. // 10% faster than calling the lexicographic comparison operator ! inline size_t operator()(const HPCombi::epu8 &v1, diff --git a/include/power.hpp b/include/power.hpp index 71edd425..28309e0f 100644 --- a/include/power.hpp +++ b/include/power.hpp @@ -91,7 +91,7 @@ namespace power_helper { * - T #one() : the unit of the monoid * - T #prod(T, T) : the product of two elements in the monoid * - * By default for any type \c T, #one is constructed from the litteral 1 and + * By default for any type \c T, #one is constructed from the literal 1 and * #prod calls the operator *. One can change these default by specializing * the template for some specific type \c T. */ From 69ec3d63f30d2782d3834543e52323e880621a09 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 14:24:29 +0000 Subject: [PATCH 050/113] Reorganise header directories --- CMakeLists.txt | 6 +- include/{ => hpcombi}/arch.hpp | 0 include/{ => hpcombi}/bmat8.hpp | 0 include/{ => hpcombi}/bmat8_impl.hpp | 0 include/{ => hpcombi}/epu.hpp | 0 include/{ => hpcombi}/epu_impl.hpp | 0 include/{ => hpcombi}/hpcombi.hpp | 0 include/{ => hpcombi}/perm16.hpp | 0 include/{ => hpcombi}/perm16_impl.hpp | 0 include/{ => hpcombi}/perm_generic.hpp | 0 include/{ => hpcombi}/perm_generic_impl.hpp | 0 include/{ => hpcombi}/power.hpp | 0 include/{ => hpcombi}/vect16.hpp | 0 include/{ => hpcombi}/vect_generic.hpp | 0 tests/test_bmat8.cpp | 226 +++++++++--------- tests/test_epu.cpp | 98 ++++---- tests/test_main.hpp | 3 +- tests/test_perm16.cpp | 118 +++++---- tests/test_perm_all.cpp | 4 +- {include => third_party}/simde/arm/neon.h | 0 {include => third_party}/simde/arm/sve.h | 0 {include => third_party}/simde/mips/msa.h | 0 .../simde/wasm/relaxed-simd.h | 0 {include => third_party}/simde/wasm/simd128.h | 0 {include => third_party}/simde/x86/avx.h | 0 {include => third_party}/simde/x86/avx2.h | 0 {include => third_party}/simde/x86/avx512.h | 0 {include => third_party}/simde/x86/clmul.h | 0 {include => third_party}/simde/x86/f16c.h | 0 {include => third_party}/simde/x86/fma.h | 0 {include => third_party}/simde/x86/gfni.h | 0 {include => third_party}/simde/x86/mmx.h | 0 {include => third_party}/simde/x86/sse.h | 0 {include => third_party}/simde/x86/sse2.h | 0 {include => third_party}/simde/x86/sse3.h | 0 {include => third_party}/simde/x86/sse4.1.h | 0 {include => third_party}/simde/x86/sse4.2.h | 0 {include => third_party}/simde/x86/ssse3.h | 0 {include => third_party}/simde/x86/svml.h | 0 {include => third_party}/simde/x86/xop.h | 0 40 files changed, 227 insertions(+), 228 deletions(-) rename include/{ => hpcombi}/arch.hpp (100%) rename include/{ => hpcombi}/bmat8.hpp (100%) rename include/{ => hpcombi}/bmat8_impl.hpp (100%) rename include/{ => hpcombi}/epu.hpp (100%) rename include/{ => hpcombi}/epu_impl.hpp (100%) rename include/{ => hpcombi}/hpcombi.hpp (100%) rename include/{ => hpcombi}/perm16.hpp (100%) rename include/{ => hpcombi}/perm16_impl.hpp (100%) rename include/{ => hpcombi}/perm_generic.hpp (100%) rename include/{ => hpcombi}/perm_generic_impl.hpp (100%) rename include/{ => hpcombi}/power.hpp (100%) rename include/{ => hpcombi}/vect16.hpp (100%) rename include/{ => hpcombi}/vect_generic.hpp (100%) rename {include => third_party}/simde/arm/neon.h (100%) rename {include => third_party}/simde/arm/sve.h (100%) rename {include => third_party}/simde/mips/msa.h (100%) rename {include => third_party}/simde/wasm/relaxed-simd.h (100%) rename {include => third_party}/simde/wasm/simd128.h (100%) rename {include => third_party}/simde/x86/avx.h (100%) rename {include => third_party}/simde/x86/avx2.h (100%) rename {include => third_party}/simde/x86/avx512.h (100%) rename {include => third_party}/simde/x86/clmul.h (100%) rename {include => third_party}/simde/x86/f16c.h (100%) rename {include => third_party}/simde/x86/fma.h (100%) rename {include => third_party}/simde/x86/gfni.h (100%) rename {include => third_party}/simde/x86/mmx.h (100%) rename {include => third_party}/simde/x86/sse.h (100%) rename {include => third_party}/simde/x86/sse2.h (100%) rename {include => third_party}/simde/x86/sse3.h (100%) rename {include => third_party}/simde/x86/sse4.1.h (100%) rename {include => third_party}/simde/x86/sse4.2.h (100%) rename {include => third_party}/simde/x86/ssse3.h (100%) rename {include => third_party}/simde/x86/svml.h (100%) rename {include => third_party}/simde/x86/xop.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d3f535e..5af5edae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,9 +71,9 @@ add_subdirectory(doc) include_directories( ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/third_party ${PROJECT_BINARY_DIR}) - ######### # Testing @@ -102,6 +102,10 @@ install ( DIRECTORY ${CMAKE_SOURCE_DIR}/include/ DESTINATION include/${CMAKE_PROJECT_NAME} FILES_MATCHING PATTERN "*.hpp") +install ( + DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/ + DESTINATION third_party/${CMAKE_PROJECT_NAME} + FILES_MATCHING PATTERN "*.hpp") ################### # pkgconfig stuff diff --git a/include/arch.hpp b/include/hpcombi/arch.hpp similarity index 100% rename from include/arch.hpp rename to include/hpcombi/arch.hpp diff --git a/include/bmat8.hpp b/include/hpcombi/bmat8.hpp similarity index 100% rename from include/bmat8.hpp rename to include/hpcombi/bmat8.hpp diff --git a/include/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp similarity index 100% rename from include/bmat8_impl.hpp rename to include/hpcombi/bmat8_impl.hpp diff --git a/include/epu.hpp b/include/hpcombi/epu.hpp similarity index 100% rename from include/epu.hpp rename to include/hpcombi/epu.hpp diff --git a/include/epu_impl.hpp b/include/hpcombi/epu_impl.hpp similarity index 100% rename from include/epu_impl.hpp rename to include/hpcombi/epu_impl.hpp diff --git a/include/hpcombi.hpp b/include/hpcombi/hpcombi.hpp similarity index 100% rename from include/hpcombi.hpp rename to include/hpcombi/hpcombi.hpp diff --git a/include/perm16.hpp b/include/hpcombi/perm16.hpp similarity index 100% rename from include/perm16.hpp rename to include/hpcombi/perm16.hpp diff --git a/include/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp similarity index 100% rename from include/perm16_impl.hpp rename to include/hpcombi/perm16_impl.hpp diff --git a/include/perm_generic.hpp b/include/hpcombi/perm_generic.hpp similarity index 100% rename from include/perm_generic.hpp rename to include/hpcombi/perm_generic.hpp diff --git a/include/perm_generic_impl.hpp b/include/hpcombi/perm_generic_impl.hpp similarity index 100% rename from include/perm_generic_impl.hpp rename to include/hpcombi/perm_generic_impl.hpp diff --git a/include/power.hpp b/include/hpcombi/power.hpp similarity index 100% rename from include/power.hpp rename to include/hpcombi/power.hpp diff --git a/include/vect16.hpp b/include/hpcombi/vect16.hpp similarity index 100% rename from include/vect16.hpp rename to include/hpcombi/vect16.hpp diff --git a/include/vect_generic.hpp b/include/hpcombi/vect_generic.hpp similarity index 100% rename from include/vect_generic.hpp rename to include/hpcombi/vect_generic.hpp diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index 7f486089..1ca173cf 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -20,8 +20,8 @@ #include "test_main.hpp" #include -#include "bmat8.hpp" -#include "epu.hpp" +#include "hpcombi/bmat8.hpp" +#include "hpcombi/epu.hpp" namespace HPCombi { namespace { @@ -82,21 +82,21 @@ struct BMat8Fixture { TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { CHECK(BMat8::one(0) == zero); CHECK(BMat8::one(2) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}})); + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}})); CHECK(BMat8::one(5) == BMat8({{1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0}})); + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}})); CHECK(BMat8::one(8) == BMat8::one()); } @@ -176,13 +176,13 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::operator<<", "[BMat8][008]") { std::ostringstream oss; oss << bm3; CHECK(oss.str() == "00010011\n" - "11111101\n" - "01111101\n" - "11011111\n" - "00100111\n" - "11000001\n" - "01000011\n" - "01111010\n"); + "11111101\n" + "01111101\n" + "11011111\n" + "00100111\n" + "11000001\n" + "01000011\n" + "01111010\n"); std::stringbuf buff; std::ostream os(&buff); @@ -343,12 +343,12 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_size", "[BMat8][012]") { TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size, BMlist, "[BMat8][013]"); -TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl, - BMlist, "[BMat8][014]"); -TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl1, - BMlist, "[BMat8][015]"); -TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_bitset, - BMlist, "[BMat8][016]"); +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl, BMlist, + "[BMat8][014]"); +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_incl1, BMlist, + "[BMat8][015]"); +TEST_AGREES(BMat8Fixture, row_space_size_ref, row_space_size_bitset, BMlist, + "[BMat8][016]"); TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { CHECK(zero.row_space_included(one1)); @@ -370,8 +370,8 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included", "[BMat8][017]") { CHECK(m3.row_space_included(BMat8::one())); } -TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_ref, - BMlist, "[BMat8][018]"); +TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_ref, BMlist, + "[BMat8][018]"); TEST_AGREES2(BMat8Fixture, row_space_included, row_space_included_bitset, BMlist, "[BMat8][019]"); @@ -401,109 +401,109 @@ TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_space_included2", "[BMat8][020]") { TEST_CASE_METHOD(BMat8Fixture, "BMat8::row_permuted", "[BMat8][021]") { CHECK(bm2.row_permuted(Perm16({1, 0})) == BMat8({{0, 1}, {1, 1}})); CHECK(bm2.row_permuted(Perm16({2, 1, 0})) == - BMat8({{0, 0, 0}, {0, 1, 0}, {1, 1, 0}})); + BMat8({{0, 0, 0}, {0, 1, 0}, {1, 1, 0}})); CHECK(bm.row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({{1, 1, 0, 0, 0, 0, 0, 1}, - {1, 1, 0, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {0, 0, 1, 0, 0, 1, 1, 1}, - {0, 1, 1, 1, 0, 1, 0, 1}, - {0, 0, 0, 1, 0, 0, 1, 1}, - {0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}})); + BMat8({{1, 1, 0, 0, 0, 0, 0, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {0, 0, 1, 0, 0, 1, 1, 1}, + {0, 1, 1, 1, 0, 1, 0, 1}, + {0, 0, 0, 1, 0, 0, 1, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}})); CHECK(BMat8::one().row_permuted(Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } TEST_CASE_METHOD(BMat8Fixture, "BMat8::col_permuted", "[BMat8][022]") { CHECK(bm2.col_permuted(Perm16({1, 0})) == BMat8({{1, 1}, {1, 0}})); CHECK(bm2.col_permuted(Perm16({2, 1, 0})) == - BMat8({{0, 1, 1}, {0, 1, 0}, {0, 0, 0}})); + BMat8({{0, 1, 1}, {0, 1, 0}, {0, 0, 0}})); CHECK(bm.col_permuted(Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({{0, 1, 0, 0, 0, 0, 1, 1}, - {1, 1, 1, 1, 1, 1, 0, 1}, - {1, 1, 1, 0, 1, 0, 0, 1}, - {1, 1, 1, 1, 0, 1, 1, 1}, - {1, 0, 0, 0, 1, 0, 1, 1}, - {0, 0, 1, 0, 0, 1, 0, 1}, - {0, 0, 1, 0, 0, 0, 1, 1}, - {0, 1, 1, 1, 1, 0, 1, 0}})); + BMat8({{0, 1, 0, 0, 0, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 1, 0, 1, 0, 0, 1}, + {1, 1, 1, 1, 0, 1, 1, 1}, + {1, 0, 0, 0, 1, 0, 1, 1}, + {0, 0, 1, 0, 0, 1, 0, 1}, + {0, 0, 1, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}})); CHECK(BMat8::one().col_permuted(Perm16({4, 1, 3, 0, 2, 6, 5})) == - BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } TEST_CASE("BMat8::row_permutation_matrix", "[BMat8][023]") { CHECK(BMat8::row_permutation_matrix(Perm16({1, 0})) == - BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); CHECK(BMat8::row_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == - BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); CHECK(BMat8::row_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } TEST_CASE("BMat8::col_permutation_matrix", "[BMat8][024]") { CHECK(BMat8::col_permutation_matrix(Perm16({1, 0})) == - BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 1, 0, 0, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); CHECK(BMat8::col_permutation_matrix(Perm16({1, 3, 4, 0, 2})) == - BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 0, 0, 1, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); CHECK(BMat8::col_permutation_matrix(Perm16({5, 3, 1, 4, 2, 0})) == - BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, - {0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0}, - {0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0}, - {1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 1}})); + BMat8({{0, 0, 0, 0, 0, 1, 0, 0}, + {0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 0, 0, 0}, + {0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 1}})); } TEST_CASE_METHOD(BMat8Fixture, "BMat8::nr_rows", "[BMat8][025]") { diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 7d6941c3..bd07da37 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -16,11 +16,11 @@ #include #include -#include "epu.hpp" - #include "test_main.hpp" #include +#include "hpcombi/epu.hpp" + namespace HPCombi { struct Fix { @@ -204,16 +204,16 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { CHECK_THAT(shifted_left(P01), Equals(P10)); CHECK_THAT(shifted_left(P112), - Equals(epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); - CHECK_THAT(shifted_left(Pv), Equals(epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, - 11, 12, 13, 14, 15, 0})); + Equals(epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); + CHECK_THAT(shifted_left(Pv), Equals(epu8{5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, + 12, 13, 14, 15, 0})); } TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][013]") { CHECK_THAT(shifted_right(P10), Equals(P01)); CHECK_THAT(shifted_right(P112), Equals(Epu8({0, 1, 1}, 2))); - CHECK_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, - 3, 2, 11, 12, 13, 14})); + CHECK_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, 3, + 2, 11, 12, 13, 14})); } TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][014]") { @@ -303,9 +303,9 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; CHECK_THAT(sort_perm(ve), Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, - 2, 13, 7, 4, 14})); + 2, 13, 7, 4, 14})); CHECK_THAT(ve, - Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); + Equals(epu8{0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4, 4})); for (auto x : v) { epu8 xsort = x; @@ -319,9 +319,9 @@ TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; CHECK_THAT(sort8_perm(ve), Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, - 12, 8, 11, 13, 14})); + 12, 8, 11, 13, 14})); CHECK_THAT(ve, - Equals(epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); + Equals(epu8{1, 1, 1, 2, 2, 3, 4, 4, 0, 0, 1, 1, 2, 2, 3, 4})); for (auto x : v) { epu8 xsort = x; @@ -342,9 +342,9 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { CHECK_THAT(permutation_of(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, - 14, 15})); + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, + 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { CHECK_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); @@ -355,19 +355,18 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { CHECK_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of_ref(Pv, Pv) | - epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), - Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, - 14, 15})); + epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), + Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, + 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { CHECK_THAT(remove_dups(P1), Equals(P10)); CHECK_THAT(remove_dups(P11), Equals(P10)); CHECK_THAT(remove_dups(sorted(P10)), - Equals(epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); - CHECK_THAT( - remove_dups(sorted(Pv)), - Equals(epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, 6, 11, 12, 0, 13, 14, 15})); + Equals(epu8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1})); + CHECK_THAT(remove_dups(sorted(Pv)), Equals(epu8{0, 1, 2, 0, 3, 4, 5, 0, 0, + 6, 11, 12, 0, 13, 14, 15})); CHECK_THAT(remove_dups(P1, 1), Equals(P1)); CHECK_THAT(remove_dups(P11, 1), Equals(Epu8({1, 1, 0}, 1))); CHECK_THAT(remove_dups(P11, 42), Equals(Epu8({1, 42, 0}, 42))); @@ -411,36 +410,36 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { CHECK_THAT(partial_sums_ref(zero), Equals(zero)); CHECK_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); CHECK_THAT(partial_sums_ref(epu8id), - Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, - 105, 120})); + Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, + 105, 120})); CHECK_THAT(partial_sums_ref(P10), Equals(P1)); CHECK_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); CHECK_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); CHECK_THAT(partial_sums_ref(P112), - Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, - 28, 30})); + Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, + 28, 30})); CHECK_THAT(partial_sums_ref(Pa1), - Equals(epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, 37, 38, 39, 40, - 41, 42, 43})); + Equals(epu8{4, 6, 11, 12, 14, 21, 28, 31, 35, 37, 38, 39, 40, 41, + 42, 43})); CHECK_THAT(partial_sums_ref(Pa2), - Equals(epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, 39, 40, 41, 42, - 43, 44, 45})); + Equals(epu8{4, 6, 11, 12, 14, 23, 30, 33, 37, 39, 40, 41, 42, 43, + 44, 45})); CHECK_THAT(partial_sums_ref(P51), - Equals(epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, - 78, 84, 90})); + Equals(epu8{5, 6, 12, 18, 24, 30, 36, 42, 48, 54, 60, 66, 72, 78, + 84, 90})); CHECK_THAT(partial_sums_ref(Pv), - Equals(epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, 43, 45, 56, 68, - 81, 95, 110})); + Equals(epu8{5, 10, 12, 17, 18, 24, 36, 40, 40, 43, 45, 56, 68, + 81, 95, 110})); CHECK_THAT(partial_sums_ref(P5), - Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, - 70, 75, 80})); + Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, + 70, 75, 80})); CHECK_THAT(partial_sums_ref(epu8rev), - Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, - 117, 119, 120, 120})); + Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, + 117, 119, 120, 120})); CHECK_THAT(partial_sums_ref(Pc), - Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, - 175, 182, 189, 196, 203})); + Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, + 175, 182, 189, 196, 203})); } TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_round, v, @@ -481,7 +480,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(Pa2), Equals(Epu8({4, 4, 5, 5, 5}, 9))); CHECK_THAT(partial_max_ref(P51), Equals(Epu8({5, 5}, 6))); CHECK_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, - 12, 12, 12, 12, 13, 14, 15})); + 12, 12, 12, 12, 13, 14, 15})); CHECK_THAT(partial_max_ref(P5), Equals(P5)); CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); @@ -544,10 +543,10 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(P112), Equals(Epu8({0, 2, 14}, 0))); CHECK_THAT(eval16_ref(Pa1), Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 2}, 0))); CHECK_THAT(eval16_ref(Pa2), - Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); + Equals(Epu8({0, 7, 3, 1, 2, 1, 0, 1, 0, 1}, 0))); CHECK_THAT(eval16_ref(P51), Equals(Epu8({0, 1, 0, 0, 0, 1, 14}, 0))); CHECK_THAT(eval16_ref(Pv), - Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); + Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); CHECK_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); CHECK_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); @@ -561,20 +560,20 @@ TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][034]") TEST_CASE("Epu8::popcount4", "[Epu8][048]") { CHECK_THAT(popcount4, - Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); + Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { CHECK_THAT(popcount16(Pv), - Equals(epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); + Equals(epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); CHECK_THAT(popcount16(RP), - Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); CHECK_THAT(popcount16(RP << 1), - Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); CHECK_THAT(popcount16(RP << 2), - Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); + Equals(epu8{2, 1, 0, 3, 4, 3, 2, 2, 1, 3, 2, 2, 3, 1, 1, 2})); CHECK_THAT(popcount16(Epu8({0, 1, 5, 0xff, 0xf0, 0x35}, 0x0f)), - Equals(Epu8({0, 1, 2, 8}, 4))); + Equals(Epu8({0, 1, 2, 8}, 4))); } TEST_CASE("random_epu8", "[Epu8][050]") { @@ -608,8 +607,7 @@ TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][051]") { CHECK(is_partial_transformation(RP, 16)); CHECK(!is_partial_transformation(RP, 15)); CHECK(is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 5, 0xFF, 2}, 0))); - CHECK( - !is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); + CHECK(!is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); } TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][052]") { diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 03bd811f..d2757e34 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -18,7 +18,8 @@ #include -#include "epu.hpp" +#include "hpcombi/epu.hpp" + #include #include diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 856b6344..0735eca2 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -13,7 +13,7 @@ /* http://www.gnu.org/licenses/ */ /******************************************************************************/ -#include "perm16.hpp" +#include "hpcombi/perm16.hpp" #include "test_main.hpp" #include @@ -76,14 +76,14 @@ TEST_CASE("PTransf16::PTransf16", "[PTransf16][000]") { CHECK(PTransf16({1, 0}) == PTransf16({1, 0, 2})); CHECK(PTransf16({2}) == PTransf16({2, 1, 2})); CHECK(PTransf16({4, 5, 0}, {9, 0, 1}) == - PTransf16( - {1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); + PTransf16( + {1, FF, FF, FF, 9, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); CHECK(PTransf16({4, 5, 0, 8}, {9, 0, 1, 2}) == - PTransf16( - {1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); + PTransf16( + {1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); CHECK(PTransf16({4, 5, 0, 8}, {9, 0, 2, 2}) == - PTransf16( - {2, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); + PTransf16( + {2, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); } TEST_CASE("PTransf16::hash", "[PTransf16][001]") { @@ -97,11 +97,11 @@ TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { CHECK_THAT(PTransf16({}).image_mask(false), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).image_mask(true), Equals(Epu8(0))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(false), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask(true), - Equals(Epu8({FF, FF, FF, FF}, 0))); + Equals(Epu8({FF, FF, FF, FF}, 0))); CHECK_THAT(PTransf16(Epu8(1)).image_mask(), Equals(Epu8({0, FF}, 0))); CHECK_THAT(PTransf16(Epu8(2)).image_mask(), Equals(Epu8({0, 0, FF}, 0))); CHECK_THAT( @@ -124,14 +124,14 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { CHECK_THAT(PTransf16({}).image_mask_ref(false), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).image_mask_ref(true), Equals(Epu8(0))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(false), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).image_mask_ref(true), - Equals(Epu8({FF, FF, FF, FF}, 0))); + Equals(Epu8({FF, FF, FF, FF}, 0))); CHECK_THAT(PTransf16(Epu8(1)).image_mask_ref(), Equals(Epu8({0, FF}, 0))); CHECK_THAT(PTransf16(Epu8(2)).image_mask_ref(), - Equals(Epu8({0, 0, FF}, 0))); + Equals(Epu8({0, 0, FF}, 0))); CHECK_THAT( PTransf16(Epu8({2, 2, 2, 0xf}, 2)).image_mask_ref(), Equals(Epu8({0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, FF}, 0))); @@ -155,17 +155,17 @@ TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { CHECK(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, FF))); CHECK(PTransf16(Epu8(2)).left_one() == PTransf16(Epu8({FF, FF, 2}, FF))); CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, 2)).left_one() == - PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, - FF, 15})); + PTransf16( + {FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); CHECK(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).left_one() == - PTransf16({FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, - FF, 15})); + PTransf16( + {FF, FF, 2, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).left_one() == - PTransf16( - {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); + PTransf16( + {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); CHECK(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).left_one() == - PTransf16( - {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); + PTransf16( + {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); } TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { @@ -176,15 +176,15 @@ TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { CHECK_THAT(PTransf16(Epu8(1)).domain_mask(), Equals(Epu8(FF))); CHECK_THAT(PTransf16(Epu8(2)).domain_mask(), Equals(Epu8(FF))); CHECK_THAT(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).domain_mask(), - Equals(Epu8({FF, FF, FF, FF}, 0))); + Equals(Epu8({FF, FF, FF, FF}, 0))); CHECK_THAT(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).domain_mask(), - Equals(Epu8({0, FF, FF, FF}, 0))); + Equals(Epu8({0, FF, FF, FF}, 0))); CHECK_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(), Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); CHECK_THAT(PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)) - .domain_mask(false), - Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); + .domain_mask(false), + Equals(Epu8({FF, FF, 0, FF, FF, 0, FF, 0, FF}, 0))); CHECK_THAT( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).domain_mask(true), Equals(Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); @@ -196,12 +196,12 @@ TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { CHECK(PTransf16(Epu8(1)).right_one() == PTransf16::one()); CHECK(PTransf16(Epu8(2)).right_one() == PTransf16::one()); CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, FF)).right_one() == - PTransf16(Epu8({0, 1, 2, 3}, FF))); + PTransf16(Epu8({0, 1, 2, 3}, FF))); CHECK(PTransf16(Epu8({FF, 2, 2, 0xf}, FF)).right_one() == - PTransf16( - {FF, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); + PTransf16( + {FF, 1, 2, 3, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF})); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)).right_one() == - PTransf16::one()); + PTransf16::one()); CHECK( PTransf16(Epu8({0, 2, FF, 0xf, 2, FF, 2, FF, 5}, FF)).right_one() == PTransf16({0, 1, FF, 3, 4, FF, 6, FF, 8, FF, FF, FF, FF, FF, FF, FF})); @@ -211,33 +211,31 @@ TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { CHECK(PTransf16({}).rank_ref() == 16); CHECK(PTransf16({4, 4, 4, 4}).rank_ref() == 12); CHECK(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) - .rank_ref() == 1); + .rank_ref() == 1); CHECK(PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) - .rank_ref() == 1); + .rank_ref() == 1); CHECK(PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) - .rank_ref() == 2); + .rank_ref() == 2); CHECK(PTransf16({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2}) - .rank_ref() == 4); + .rank_ref() == 4); CHECK(PTransf16({1, 1, 1, FF, 1, 1, FF, 1, 1, FF, 1, FF, 1, 1, 1, 1}) - .rank_ref() == 1); + .rank_ref() == 1); CHECK(PTransf16({2, 2, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2, 2, FF, 2, 2}) - .rank_ref() == 1); + .rank_ref() == 1); CHECK(PTransf16({2, 2, 2, 0xf, 2, FF, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) - .rank_ref() == 2); + .rank_ref() == 2); CHECK(PTransf16({0, 2, 2, 0xf, 2, 2, FF, 2, 5, 2, FF, 2, 2, 2, 2, 2}) - .rank_ref() == 4); + .rank_ref() == 4); } // TODO uncomment TEST_CASE("PTransf16::rank", "[PTransf16][007]") { CHECK(PTransf16({}).rank() == 16); CHECK(PTransf16({4, 4, 4, 4}).rank() == 12); - CHECK( - PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == - 1); - CHECK( - PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == - 1); + CHECK(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == + 1); + CHECK(PTransf16({2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == + 1); CHECK( PTransf16({2, 2, 2, 0xf, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}).rank() == 2); @@ -251,15 +249,14 @@ TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { CHECK_THAT(PTransf16({}).fix_points_mask(false), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).fix_points_mask(true), Equals(Epu8(0))); CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(false), - Equals(Epu8({0, 0, 0, 0}, FF))); + Equals(Epu8({0, 0, 0, 0}, FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).fix_points_mask(true), - Equals(Epu8({FF, FF, FF, FF}, 0))); - CHECK_THAT(PTransf16(Epu8(1)).fix_points_mask(), - Equals(Epu8({0, FF}, 0))); + Equals(Epu8({FF, FF, FF, FF}, 0))); + CHECK_THAT(PTransf16(Epu8(1)).fix_points_mask(), Equals(Epu8({0, FF}, 0))); CHECK_THAT(PTransf16(Epu8(2)).fix_points_mask(), - Equals(Epu8({0, 0, FF}, 0))); + Equals(Epu8({0, 0, FF}, 0))); CHECK_THAT( PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_mask(), Equals(Epu8({0, 0, FF, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, 0, 0, 0}, 0))); @@ -271,8 +268,8 @@ TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { .fix_points_mask(false), Equals(Epu8({FF, 0, FF, 0, 0, 0, 0, 0, FF, 0, 0, 0, 0, 0, FF, 0}, 0))); CHECK_THAT(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) - .fix_points_mask(true), - Equals(Epu8({0, FF, 0}, FF))); + .fix_points_mask(true), + Equals(Epu8({0, FF, 0}, FF))); } TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { @@ -286,11 +283,11 @@ TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { CHECK(PTransf16(Epu8(2)).fix_points_bitset() == 0x0004); CHECK(PTransf16(Epu8({2, 2, 2, 0xf}, 7)).fix_points_bitset() == 0x0084); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 14, 5, 2}, 2)) - .fix_points_bitset() == 0x5); + .fix_points_bitset() == 0x5); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 8, 2}, 14)) - .fix_points_bitset(false) == 0x4105); + .fix_points_bitset(false) == 0x4105); CHECK(PTransf16(Epu8({0, 2, 2, 0xf, 2, 2, 2, 2, 5, 2}, 2)) - .fix_points_bitset(true) == 0xFFFA); + .fix_points_bitset(true) == 0xFFFA); } TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][010]") { @@ -396,9 +393,8 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { CHECK_THAT(PTransf16::one().fix_points_mask(), Equals(Epu8(FF))); CHECK_THAT(Perm16::one().fix_points_mask(), Equals(Epu8(FF))); CHECK_THAT(PPa.fix_points_mask(), Equals(Epu8({0, 0, 0, 0, 0}, FF))); - CHECK_THAT( - PPb.fix_points_mask(), - Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, FF, FF, FF, FF, 0, FF, 0})); + CHECK_THAT(PPb.fix_points_mask(), Equals(epu8{0, 0, 0, 0, 0, FF, 0, FF, FF, + FF, FF, FF, FF, 0, FF, 0})); CHECK_THAT(RandPerm.fix_points_mask(), Equals(Epu8({0, FF}, 0))); CHECK_THAT(Perm16::one().fix_points_mask(false), Equals(Epu8(FF))); @@ -410,7 +406,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { CHECK_THAT(Perm16::one().fix_points_mask(true), Equals(Epu8(0))); CHECK_THAT(PPa.fix_points_mask(true), - Equals(Epu8({FF, FF, FF, FF, FF}, 0))); + Equals(Epu8({FF, FF, FF, FF, FF}, 0))); CHECK_THAT( PPb.fix_points_mask(true), Equals(epu8{FF, FF, FF, FF, FF, 0, FF, 0, 0, 0, 0, 0, 0, FF, 0, FF})); @@ -478,9 +474,9 @@ TEST_AGREES(Perm16Fixture, inverse_ref, inverse, Plist, "[Perm16][031]"); TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { CHECK_THAT(Perm16::one().lehmer(), Equals(zero)); CHECK_THAT(PPa.lehmer(), - Equals(epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); + Equals(epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); CHECK_THAT(PPb.lehmer(), - Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); + Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); } TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer_arr, Plist, "[Perm16][033]"); diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index 3264ed4b..eb87ec05 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -18,8 +18,8 @@ #include #include -#include "perm16.hpp" -#include "perm_generic.hpp" +#include "hpcombi/perm16.hpp" +#include "hpcombi/perm_generic.hpp" #include "test_main.hpp" #include diff --git a/include/simde/arm/neon.h b/third_party/simde/arm/neon.h similarity index 100% rename from include/simde/arm/neon.h rename to third_party/simde/arm/neon.h diff --git a/include/simde/arm/sve.h b/third_party/simde/arm/sve.h similarity index 100% rename from include/simde/arm/sve.h rename to third_party/simde/arm/sve.h diff --git a/include/simde/mips/msa.h b/third_party/simde/mips/msa.h similarity index 100% rename from include/simde/mips/msa.h rename to third_party/simde/mips/msa.h diff --git a/include/simde/wasm/relaxed-simd.h b/third_party/simde/wasm/relaxed-simd.h similarity index 100% rename from include/simde/wasm/relaxed-simd.h rename to third_party/simde/wasm/relaxed-simd.h diff --git a/include/simde/wasm/simd128.h b/third_party/simde/wasm/simd128.h similarity index 100% rename from include/simde/wasm/simd128.h rename to third_party/simde/wasm/simd128.h diff --git a/include/simde/x86/avx.h b/third_party/simde/x86/avx.h similarity index 100% rename from include/simde/x86/avx.h rename to third_party/simde/x86/avx.h diff --git a/include/simde/x86/avx2.h b/third_party/simde/x86/avx2.h similarity index 100% rename from include/simde/x86/avx2.h rename to third_party/simde/x86/avx2.h diff --git a/include/simde/x86/avx512.h b/third_party/simde/x86/avx512.h similarity index 100% rename from include/simde/x86/avx512.h rename to third_party/simde/x86/avx512.h diff --git a/include/simde/x86/clmul.h b/third_party/simde/x86/clmul.h similarity index 100% rename from include/simde/x86/clmul.h rename to third_party/simde/x86/clmul.h diff --git a/include/simde/x86/f16c.h b/third_party/simde/x86/f16c.h similarity index 100% rename from include/simde/x86/f16c.h rename to third_party/simde/x86/f16c.h diff --git a/include/simde/x86/fma.h b/third_party/simde/x86/fma.h similarity index 100% rename from include/simde/x86/fma.h rename to third_party/simde/x86/fma.h diff --git a/include/simde/x86/gfni.h b/third_party/simde/x86/gfni.h similarity index 100% rename from include/simde/x86/gfni.h rename to third_party/simde/x86/gfni.h diff --git a/include/simde/x86/mmx.h b/third_party/simde/x86/mmx.h similarity index 100% rename from include/simde/x86/mmx.h rename to third_party/simde/x86/mmx.h diff --git a/include/simde/x86/sse.h b/third_party/simde/x86/sse.h similarity index 100% rename from include/simde/x86/sse.h rename to third_party/simde/x86/sse.h diff --git a/include/simde/x86/sse2.h b/third_party/simde/x86/sse2.h similarity index 100% rename from include/simde/x86/sse2.h rename to third_party/simde/x86/sse2.h diff --git a/include/simde/x86/sse3.h b/third_party/simde/x86/sse3.h similarity index 100% rename from include/simde/x86/sse3.h rename to third_party/simde/x86/sse3.h diff --git a/include/simde/x86/sse4.1.h b/third_party/simde/x86/sse4.1.h similarity index 100% rename from include/simde/x86/sse4.1.h rename to third_party/simde/x86/sse4.1.h diff --git a/include/simde/x86/sse4.2.h b/third_party/simde/x86/sse4.2.h similarity index 100% rename from include/simde/x86/sse4.2.h rename to third_party/simde/x86/sse4.2.h diff --git a/include/simde/x86/ssse3.h b/third_party/simde/x86/ssse3.h similarity index 100% rename from include/simde/x86/ssse3.h rename to third_party/simde/x86/ssse3.h diff --git a/include/simde/x86/svml.h b/third_party/simde/x86/svml.h similarity index 100% rename from include/simde/x86/svml.h rename to third_party/simde/x86/svml.h diff --git a/include/simde/x86/xop.h b/third_party/simde/x86/xop.h similarity index 100% rename from include/simde/x86/xop.h rename to third_party/simde/x86/xop.h From 5c06113c86ff1ae2ee5071309c8e3257831b6ee5 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 14:25:39 +0000 Subject: [PATCH 051/113] Remove extraneous files --- .travis.yml | 107 -------------------------------- ci/travis-benchmark-install.sh | 42 ------------- ci/travis-boost-test-install.sh | 14 ----- ci/travis-libsemigroups.sh | 19 ------ ci/travis-standard.sh | 22 ------- etc/check-list-intrin.sh | 33 ---------- list_intrin.txt | 24 ------- 7 files changed, 261 deletions(-) delete mode 100644 .travis.yml delete mode 100755 ci/travis-benchmark-install.sh delete mode 100755 ci/travis-boost-test-install.sh delete mode 100755 ci/travis-libsemigroups.sh delete mode 100755 ci/travis-standard.sh delete mode 100755 etc/check-list-intrin.sh delete mode 100644 list_intrin.txt diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 952abe46..00000000 --- a/.travis.yml +++ /dev/null @@ -1,107 +0,0 @@ -language: cpp -dist: trusty -sudo: required -compiler: - - gcc -matrix: - include: - - env: TEST_SUITE=standard - addons: - apt: - packages: - - texlive-font-utils - - doxygen - sources: - - ubuntu-toolchain-r-test - - env: TEST_SUITE=standard MATRIX_EVAL="CC=gcc-5 && CXX=g++-5" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-5 - - g++-5 - - texlive-font-utils - - doxygen - - env: TEST_SUITE=standard MATRIX_EVAL="CC=gcc-6 && CXX=g++-6" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-6 - - g++-6 - - texlive-font-utils - - doxygen - - env: TEST_SUITE=standard MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-7 - - g++-7 - - texlive-font-utils - - doxygen - - env: TEST_SUITE=standard MATRIX_EVAL="CC=gcc-8 && CXX=g++-8" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - gcc-8 - - g++-8 - - texlive-font-utils - - doxygen - - compiler: clang - env: TEST_SUITE=standard MATRIX_EVAL="CC=clang-4.0 && CXX=clang++-4.0" - addons: - apt: - sources: - - llvm-toolchain-trusty-4.0 - - ubuntu-toolchain-r-test - packages: - - clang-4.0 - - texlive-font-utils - - doxygen - - compiler: clang - env: TEST_SUITE=standard MATRIX_EVAL="CC=clang-5.0 && CXX=clang++-5.0" - addons: - apt: - sources: - - llvm-toolchain-trusty-5.0 - - ubuntu-toolchain-r-test - packages: - - clang-5.0 - - texlive-font-utils - - doxygen - - compiler: clang - env: TEST_SUITE=standard MATRIX_EVAL="CC=clang-6.0 && CXX=clang++-6.0" - addons: - apt: - sources: - - llvm-toolchain-trusty-6.0 - - ubuntu-toolchain-r-test - packages: - - clang-6.0 - - texlive-font-utils - - doxygen - # - env: TEST_SUITE=libsemigroups - # addons: - # apt: - # sources: - # - ubuntu-toolchain-r-test - # packages: - # - doxygen - # - gcc-5 - # - g++-5 -before_install: - - eval "${MATRIX_EVAL}" - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo add-apt-repository ppa:dns/gnu -y; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo apt-get update -q; fi -install: - - ci/travis-benchmark-install.sh - - ci/travis-boost-test-install.sh - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install boost; fi -script: - - ci/travis-$TEST_SUITE.sh diff --git a/ci/travis-benchmark-install.sh b/ci/travis-benchmark-install.sh deleted file mode 100755 index 49f17d07..00000000 --- a/ci/travis-benchmark-install.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash -set -e - -echo "Git version:" -git --version - -# curl -L -O https://github.com/google/googletest/archive/release-1.10.0.tar.gz -# tar xvf release-1.10.0.tar.gz -# cd googletest-release-1.10.0 -# mkdir build -# cd build -# cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -# make -# sudo make install -# cd ../../ -# rm -rf googletest-release-1.10.0 - - -echo "installing google test from sources" -curl -L -O https://github.com/google/googletest/archive/release-1.10.0.tar.gz -tar xvf release-1.10.0.tar.gz -cd googletest-release-1.10.0 -mkdir build -cd build -cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -make -sudo make install -cd ../../ -rm -rf googletest-release-1.10.0 - -echo "installing benchmark from sources" -curl -L -O https://github.com/google/benchmark/archive/v1.5.0.tar.gz -tar xvf v1.5.0.tar.gz -cd benchmark-1.5.0 -mkdir build -cd build -#cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON -cmake .. -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_GTEST_TESTS=OFF -DBENCHMARK_ENABLE_TESTING=OFF -make -sudo make install -cd ../../ -rm -rf benchmark-1.5.0 diff --git a/ci/travis-boost-test-install.sh b/ci/travis-boost-test-install.sh deleted file mode 100755 index b80b24c6..00000000 --- a/ci/travis-boost-test-install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -echo "Git version:" -git --version - -echo "installing boost tests from sources" -wget --no-verbose --output-document=boost-trunk.tar.bz2 http://sourceforge.net/projects/boost/files/boost/1.60.0/boost_1_60_0.tar.bz2/download -export BOOST_ROOT="$TRAVIS_BUILD_DIR/../boost-trunk" -export CMAKE_MODULE_PATH="$BOOST_ROOT" -mkdir -p $BOOST_ROOT -tar jxf boost-trunk.tar.bz2 --strip-components=1 -C $BOOST_ROOT -(cd $BOOST_ROOT; ./bootstrap.sh --with-libraries=test) -(cd $BOOST_ROOT; ./b2 threading=multi --prefix=$BOOST_ROOT -d0 install) diff --git a/ci/travis-libsemigroups.sh b/ci/travis-libsemigroups.sh deleted file mode 100755 index e82ef778..00000000 --- a/ci/travis-libsemigroups.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -set -e - -echo "CMake version:" -cmake --version -echo "g++ version:" -$CXX --version -echo "gcc version:" -$CC --version - -cd .. -git clone -b argcheck --depth=1 https://github.com/james-d-mitchell/libsemigroups.git -cd libsemigroups -mv ../HPCombi extern -echo "0.0.2" > extern/HPCombi/VERSION -./autogen.sh -./configure -make check -j2 - diff --git a/ci/travis-standard.sh b/ci/travis-standard.sh deleted file mode 100755 index bb8c2678..00000000 --- a/ci/travis-standard.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -e - -echo "CMake version:" -cmake --version -echo "g++ version:" -$CXX --version -echo "gcc version:" -$CC --version - -if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then - export BOOST_ROOT="$TRAVIS_BUILD_DIR/../boost-trunk" -fi - -mkdir build -cd build -cmake -DBUILD_TESTING=1 -DCMAKE_BUILD_TYPE=Release .. -make -make test -cmake -DBUILD_TESTING=1 .. -make -make test diff --git a/etc/check-list-intrin.sh b/etc/check-list-intrin.sh deleted file mode 100755 index 8ff7d6cd..00000000 --- a/etc/check-list-intrin.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -set -e - -bold() { - printf "\033[1m%s\033[0m\n" "$*" -} - -if [[ $# -ne 0 ]]; then - bold "error expected no arguments, got $#!" - exit 1 -fi - -echo "Checking intrinsics used in code base versus declared in 'list_intrin.txt' . . ." - -grep -oh -e "_mm\w*\b" include/*.* | sort | uniq > intrin_actuals.txt - -python3 - <; -# line starting with # are comments -_mm_blendv_epi8;__m128i(),__m128i(),__m128i() -_mm_bslli_si128;__m128i(),1 -_mm_bsrli_si128;__m128i(),1 -_mm_cmpestri;__m128i(),1,__m128i(),1,1 -_mm_cmpestrm;__m128i(),1,__m128i(),1,1 -_mm_extract_epi64;__m128i(),1 -_mm_max_epu8;__m128i(),__m128i() -_mm_min_epi8;__m128i(),__m128i() -_mm_min_epu8;__m128i(),__m128i() -_mm_movemask_epi8;__m128i() -__builtin_popcountl;1 -__builtin_popcountll;1 -_mm_set_epi64x;1,1 -_mm_shuffle_epi8;__m128i(),__m128i() -_mm_slli_epi32;__m128i(),1 -_mm_testc_si128;__m128i(),__m128i() -_mm_testz_si128;__m128i(),__m128i() -_mm_xor_si128;__m128i(),__m128i() -# _mm_cmpeq_epi8;__m128i(),__m128i() -# _mm_cmplt_epi8;__m128i(),__m128i() -# _mm_max_epi8 From d2f6ef7b49d978f6c0da4b15f395c0f34f5c5ffb Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 14:41:39 +0000 Subject: [PATCH 052/113] implicit constructor --- include/hpcombi/perm16.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index 6a792619..a844467a 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -121,8 +121,8 @@ struct alignas(16) PTransf16 : public Vect16 { struct Transf16 : public PTransf16 { Transf16() = default; constexpr Transf16(const Transf16 &v) = default; - constexpr Transf16(const vect v) : PTransf16(v) {} - constexpr Transf16(const epu8 x) : PTransf16(x) {} + /* implicit */ constexpr Transf16(const vect v) : PTransf16(v) {} // NOLINT + /* implicit */ constexpr Transf16(const epu8 x) : PTransf16(x) {} // NOLINT Transf16(std::initializer_list il) : PTransf16(il) {} Transf16 &operator=(const Transf16 &) = default; @@ -150,8 +150,8 @@ struct Transf16 : public PTransf16 { struct PPerm16 : public PTransf16 { PPerm16() = default; constexpr PPerm16(const PPerm16 &v) = default; - constexpr PPerm16(const vect v) : PTransf16(v) {} - constexpr PPerm16(const epu8 x) : PTransf16(x) {} + /* implicit */ constexpr PPerm16(const vect v) : PTransf16(v) {} // NOLINT + /* implicit */ constexpr PPerm16(const epu8 x) : PTransf16(x) {} // NOLINT PPerm16(std::vector dom, std::vector rng, size_t = 0 /* unused */) : PTransf16(dom, rng) {} @@ -209,8 +209,8 @@ struct PPerm16 : public PTransf16 { struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { Perm16() = default; constexpr Perm16(const Perm16 &) = default; - constexpr Perm16(const vect v) : Transf16(v) {} - constexpr Perm16(const epu8 x) : Transf16(x) {} + /* implicit */ constexpr Perm16(const vect v) : Transf16(v) {} // NOLINT + /* implicit */ constexpr Perm16(const epu8 x) : Transf16(x) {} // NOLINT Perm16 &operator=(const Perm16 &) = default; Perm16(std::initializer_list il) : Transf16(il) {} From 2e2903dc7ff7f71e3b6f3407c21b15eee781785f Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 15:25:04 +0000 Subject: [PATCH 053/113] Updated benchmark with directory reorg --- CMakeLists.txt | 6 +++--- benchmark/CMakeLists.txt | 6 +----- benchmark/bench_bmat8.cpp | 19 +++++++++++++------ benchmark/bench_fixture.hpp | 2 +- tests/CMakeLists.txt | 6 +----- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5af5edae..043cc4a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,6 @@ endif() ################### # Project Structure add_subdirectory(examples) -add_subdirectory(benchmark) add_subdirectory(doc) include_directories( @@ -77,11 +76,12 @@ include_directories( ######### # Testing -IF (BUILD_TESTING) +if (BUILD_TESTING) include(CTest) enable_testing () add_subdirectory(tests) -ENDIF(BUILD_TESTING) + add_subdirectory(benchmark) +endif(BUILD_TESTING) ##################### diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 61615ee8..66227de2 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -30,11 +30,7 @@ else() message(STATUS "Using system Catch2") endif() -message(STATUS "Building tests") - -include_directories( - ${CMAKE_SOURCE_DIR}/include - ${PROJECT_BINARY_DIR}) +message(STATUS "Building benchmark") set(benchmark_src bench_epu8.cpp bench_perm16.cpp bench_bmat8.cpp sort.cpp inverse.cpp) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index 0e0e5dc7..ae0ecd14 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -24,7 +24,7 @@ // #include "compilerinfo.hpp" // #include "cpu_x86_impl.hpp" -#include "bmat8.hpp" +#include "hpcombi/bmat8.hpp" // using namespace FeatureDetector; // using namespace std; @@ -52,8 +52,15 @@ std::vector> make_pair_sample(size_t n) { return res; } -std::vector sample = make_sample(1000); -std::vector> pair_sample = make_pair_sample(1000); +class Fix_BMat8 { + public: + Fix_BMat8() + : sample(make_sample(1000)), pair_sample(make_pair_sample(1000)) {} + ~Fix_BMat8() {} + const std::vector sample; + std::vector> + pair_sample; // not const, transpose2 is in place +}; // template // void myBench(const std::string &name, TF pfunc, Sample &sample) { @@ -86,7 +93,7 @@ std::vector> pair_sample = make_pair_sample(1000); return true; \ }; -TEST_CASE("Row space size benchmarks 1000 BMat8", "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size_ref, sample); BENCHMARK_MEM_FN(row_space_size_bitset, sample); BENCHMARK_MEM_FN(row_space_size_incl1, sample); @@ -94,13 +101,13 @@ TEST_CASE("Row space size benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size, sample); } -TEST_CASE("Transpose benchmarks 1000 BMat8", "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(transpose, sample); BENCHMARK_MEM_FN(transpose_mask, sample); BENCHMARK_MEM_FN(transpose_maskd, sample); } -TEST_CASE("Transpose pairs benchmarks 1000 BMat8", "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR(transpose, pair_sample); BENCHMARK_MEM_FN_PAIR(transpose_mask, pair_sample); BENCHMARK_MEM_FN_PAIR(transpose_maskd, pair_sample); diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index c0dd8378..0efe2f92 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -15,7 +15,7 @@ #ifndef BENCH_FIXTURE #define BENCH_FIXTURE -#include "epu.hpp" +#include "hpcombi/epu.hpp" using HPCombi::epu8; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 38c9f8a4..9c852358 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -32,13 +32,9 @@ endif() message(STATUS "Building tests") -include_directories( - ${CMAKE_SOURCE_DIR}/include - ${CMAKE_SOURCE_DIR}/include/fallback - ${PROJECT_BINARY_DIR}) - set(test_src test_epu.cpp test_perm16.cpp test_perm_all.cpp test_bmat8.cpp) + foreach(f ${test_src}) get_filename_component(testName ${f} NAME_WE) add_executable (${testName} ${f} test_main.cpp) From 4d14b5c0d0c99c7291f1b3bd0f7a2f8a124059f0 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 15:07:45 +0000 Subject: [PATCH 054/113] Use HPCOMBI_ASSERT everywhere --- include/hpcombi/debug.hpp | 26 ++++++++++++++++++++++++++ include/hpcombi/perm16_impl.hpp | 10 +++++----- include/hpcombi/perm_generic_impl.hpp | 4 ++-- include/hpcombi/vect_generic.hpp | 2 +- 4 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 include/hpcombi/debug.hpp diff --git a/include/hpcombi/debug.hpp b/include/hpcombi/debug.hpp new file mode 100644 index 00000000..41bb7cc5 --- /dev/null +++ b/include/hpcombi/debug.hpp @@ -0,0 +1,26 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 James D. Mitchell // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HPCOMBI_DEBUG_HPP_ +#define HPCOMBI_DEBUG_HPP_ + +#ifdef HPCOMBI_DEBUG +#include +#define HPCOMBI_ASSERT(x) assert(x) +#else +#define HPCOMBI_ASSERT(x) +#endif + +#endif // HPCOMBI_DEBUG_HPP_ diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index f157a32e..dca72076 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -28,17 +28,17 @@ namespace HPCombi { inline PTransf16::PTransf16(std::initializer_list il) : Vect16(epu8id) { - assert(il.size() <= 16); + HPCOMBI_ASSERT(il.size() <= 16); std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin()); } inline PTransf16::PTransf16(std::vector dom, std::vector rng, size_t /*unused */) : Vect16(Epu8(0xFF)) { - assert(dom.size() == rng.size()); - assert(dom.size() <= 16); + HPCOMBI_ASSERT(dom.size() == rng.size()); + HPCOMBI_ASSERT(dom.size() <= 16); for (size_t i = 0; i < dom.size(); ++i) { - assert(dom[i] < 16); + HPCOMBI_ASSERT(dom[i] < 16); v[dom[i]] = rng[i]; } } @@ -191,7 +191,7 @@ inline Perm16 Perm16::unrankSJT(int n, int r) { } inline Perm16 Perm16::elementary_transposition(uint64_t i) { - assert(i < 16); + HPCOMBI_ASSERT(i < 16); epu8 res = one(); res[i] = i + 1; res[i + 1] = i; diff --git a/include/hpcombi/perm_generic_impl.hpp b/include/hpcombi/perm_generic_impl.hpp index ba145747..2fb78636 100644 --- a/include/hpcombi/perm_generic_impl.hpp +++ b/include/hpcombi/perm_generic_impl.hpp @@ -17,7 +17,7 @@ namespace HPCombi { template PermGeneric::PermGeneric(std::initializer_list il) { - assert(il.size() <= Size); + HPCOMBI_ASSERT(il.size() <= Size); std::copy(il.begin(), il.end(), this->v.begin()); for (Expo i = il.size(); i < Size; i++) this->v[i] = i; @@ -26,7 +26,7 @@ PermGeneric::PermGeneric(std::initializer_list il) { template PermGeneric PermGeneric::elementary_transposition(uint64_t i) { - assert(i < Size); + HPCOMBI_ASSERT(i < Size); PermGeneric res{{}}; res[i] = i + 1; res[i + 1] = i; diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index d549615e..402b7dd7 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -42,7 +42,7 @@ template struct VectGeneric { VectGeneric() = default; VectGeneric(const std::array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { - assert(il.size() <= Size); + HPCOMBI_ASSERT(il.size() <= Size); std::copy(il.begin(), il.end(), v.begin()); std::fill(v.begin() + il.size(), v.end(), def); } From 2383a0be727634ff5eb0834fe026fcbdfe239916 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 15:24:36 +0000 Subject: [PATCH 055/113] Remove * + iwyu --- include/hpcombi/bmat8.hpp | 30 +++++++------- include/hpcombi/epu.hpp | 29 +++++++------- include/hpcombi/epu_impl.hpp | 10 ++--- include/hpcombi/hpcombi.hpp | 9 ++++- include/hpcombi/perm16.hpp | 29 +++++++------- include/hpcombi/perm16_impl.hpp | 18 +++------ include/hpcombi/perm_generic.hpp | 27 ++++++++----- include/hpcombi/perm_generic_impl.hpp | 4 +- include/hpcombi/power.hpp | 5 ++- include/hpcombi/vect16.hpp | 14 ++++--- include/hpcombi/vect_generic.hpp | 25 +++++++----- tests/test_bmat8.cpp | 56 ++++++++++++++------------- tests/test_epu.cpp | 28 +++++++------- tests/test_main.cpp | 4 +- tests/test_main.hpp | 4 +- tests/test_perm16.cpp | 29 +++++++------- tests/test_perm_all.cpp | 28 +++++++------- 17 files changed, 183 insertions(+), 166 deletions(-) diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp index b3b107cd..73888cdb 100644 --- a/include/hpcombi/bmat8.hpp +++ b/include/hpcombi/bmat8.hpp @@ -20,23 +20,19 @@ #ifndef HPCOMBI_BMAT8_HPP_INCLUDED #define HPCOMBI_BMAT8_HPP_INCLUDED -#include // for uniform_int_distribution, swap -#include // for array -#include // for bitset -#include // for CHAR_BIT -#include // for size_t -#include // for uint64_t -#include // for operator<<, ostringstream -#include // for mt19937, random_device -#include // for hash -#include // for vector - -#include "epu.hpp" -#include "perm16.hpp" - -#ifndef HPCOMBI_ASSERT -#define HPCOMBI_ASSERT(x) assert(x) -#endif +#include // for array +#include // for bitset +#include // for size_t +#include // for uint64_t, uint8_t +#include // for hash, __scalar_hash +#include // for ostream +#include // for hash +#include // for pair, swap +#include // for vector + +#include "debug.hpp" // for HPCOMBI_ASSERT +#include "epu.hpp" // for epu8 +#include "perm16.hpp" // for Perm16 namespace HPCombi { diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index a9f5433d..f424ca4d 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016-2018 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,25 +11,24 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_EPU_HPP_INCLUDED #define HPCOMBI_EPU_HPP_INCLUDED -#include -#include -#include -#include // less<>, equal_to<> -#include -#include -#include -#include -#include // integer_sequences +#include // for array +#include // for size_t +#include // for uint8_t, uint64_t, int8_t +#include // for initializer_list +#include // for ostream +#include // for string +#include // for remove_reference_t +#include // for make_index_sequence, ind... -#include "vect_generic.hpp" +#include "debug.hpp" // for HPCOMBI_ASSERT +#include "vect_generic.hpp" // for VectGeneric -#include "simde/x86/sse4.1.h" -#include "simde/x86/sse4.2.h" +#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... namespace HPCombi { @@ -67,7 +66,7 @@ template struct TPUBuild { inline TPU operator()(std::initializer_list il, type_elem def) const { - assert(il.size() <= size); + HPCOMBI_ASSERT(il.size() <= size); array res; std::copy(il.begin(), il.end(), res.begin()); std::fill(res.begin() + il.size(), res.end(), def); diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index 55996c0f..73f91e89 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,7 +11,7 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // This is the implementation part of epu.hpp this should be seen as // implementation details and should not be included directly. @@ -43,9 +43,9 @@ namespace HPCombi { -/*****************************************************************************/ -/** Implementation part for inline functions *********************************/ -/*****************************************************************************/ +/////////////////////////////////////////////////////////////////////////////// +/// Implementation part for inline functions ////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { diff --git a/include/hpcombi/hpcombi.hpp b/include/hpcombi/hpcombi.hpp index 89b4743f..e1795c31 100644 --- a/include/hpcombi/hpcombi.hpp +++ b/include/hpcombi/hpcombi.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,13 +11,18 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_HPCOMBI_HPP_INCLUDED #define HPCOMBI_HPCOMBI_HPP_INCLUDED #include "bmat8.hpp" +#include "debug.hpp" #include "epu.hpp" #include "perm16.hpp" +#include "perm_generic.hpp" +#include "power.hpp" +#include "vect16.hpp" +#include "vect_generic.hpp" #endif // HPCOMBI_HPCOMBI_HPP_INCLUDED diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index a844467a..d12df181 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,20 +11,21 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_PERM16_HPP_INCLUDED #define HPCOMBI_PERM16_HPP_INCLUDED -#include -#include -#include -#include // less<> -#include -#include +#include // for size_t +#include // for uint8_t, uint64_t, uint32_t +#include // for initializer_list +#include // for hash +#include // for is_trivial +#include // for vector -#include "epu.hpp" -#include "vect16.hpp" +#include "epu.hpp" // for epu8, permuted, etc +#include "power.hpp" // for pow +#include "vect16.hpp" // for hash, is_partial_permutation #include "simde/x86/sse4.1.h" #include "simde/x86/sse4.2.h" @@ -43,7 +44,7 @@ struct alignas(16) PTransf16 : public Vect16 { static constexpr size_t size() { return 16; } using vect = HPCombi::Vect16; - using array = decltype(Epu8)::array; + using array = typename decltype(Epu8)::array; PTransf16() = default; constexpr PTransf16(const PTransf16 &v) = default; @@ -438,9 +439,9 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { bool left_weak_leq(Perm16 other) const; }; -/*****************************************************************************/ -/** Memory layout concepts check ********************************************/ -/*****************************************************************************/ +/////////////////////////////////////////////////////////////////////////////// +/// Memory layout concepts check ///////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// static_assert(sizeof(epu8) == sizeof(Perm16), "epu8 and Perm16 have a different memory layout !"); diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index dca72076..d276e661 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,20 +11,13 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// - -#include -#include -#include -#include - -#include "power.hpp" +//////////////////////////////////////////////////////////////////////////////// namespace HPCombi { -/*****************************************************************************/ -/** Implementation part for inline functions *********************************/ -/*****************************************************************************/ +/////////////////////////////////////////////////////////////////////////////// +// Implementation part for inline functions ////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// inline PTransf16::PTransf16(std::initializer_list il) : Vect16(epu8id) { @@ -228,6 +221,7 @@ inline Perm16 Perm16::inverse_sort() const { // We declare PERM16 as a correct Monoid namespace power_helper { +// TODO required? using Perm16 = Perm16; template <> struct Monoid { diff --git a/include/hpcombi/perm_generic.hpp b/include/hpcombi/perm_generic.hpp index 3420daa7..0e0b2cfa 100644 --- a/include/hpcombi/perm_generic.hpp +++ b/include/hpcombi/perm_generic.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,16 +11,23 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_PERM_GENERIC_HPP #define HPCOMBI_PERM_GENERIC_HPP -#include -#include -#include -#include -#include +#include // for shuffle +#include // for array +#include // for size_t +#include // for uint64_t, uint8_t +#include // for hash +#include // for initializer_list +#include // for hash +#include // for mt19937 +#include // for is_trivial + +#include "debug.hpp" // for HPCOMBI_ASSERT +#include "vect_generic.hpp" // for VectGeneric namespace HPCombi { @@ -53,9 +60,9 @@ struct PermGeneric : public VectGeneric { bool left_weak_leq(PermGeneric other) const; }; -/*****************************************************************************/ -/** Memory layout concepts check ********************************************/ -/*****************************************************************************/ +/////////////////////////////////////////////////////////////////////////////// +// Memory layout concepts check ////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// static_assert(sizeof(VectGeneric<12>) == sizeof(PermGeneric<12>), "VectGeneric and PermGeneric have a different memory layout !"); diff --git a/include/hpcombi/perm_generic_impl.hpp b/include/hpcombi/perm_generic_impl.hpp index 2fb78636..bbf77e98 100644 --- a/include/hpcombi/perm_generic_impl.hpp +++ b/include/hpcombi/perm_generic_impl.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,7 +11,7 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// namespace HPCombi { diff --git a/include/hpcombi/power.hpp b/include/hpcombi/power.hpp index 28309e0f..2d17d6e4 100644 --- a/include/hpcombi/power.hpp +++ b/include/hpcombi/power.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,7 +11,8 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// + /** @file * @brief Generic compile time power * diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp index ff72a4c9..d0e5930b 100644 --- a/include/hpcombi/vect16.hpp +++ b/include/hpcombi/vect16.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016-2018 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,21 +11,25 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_VECT16_HPP_INCLUDED #define HPCOMBI_VECT16_HPP_INCLUDED -#include +#include // for size_t +#include // for uint8_t, uint64_t, int8_t +#include // for initializer_list +#include // for ostream +#include // for hash +#include // for is_trivial #include "epu.hpp" -#include "perm16.hpp" // for is_permutation namespace HPCombi { struct alignas(16) Vect16 { static constexpr size_t size() { return 16; } - using array = decltype(Epu8)::array; + using array = typename decltype(Epu8)::array; epu8 v; Vect16() = default; diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 402b7dd7..6260186a 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2016 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,18 +11,25 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_VECT_GENERIC_HPP #define HPCOMBI_VECT_GENERIC_HPP -#include -#include -#include -#include -#include -#include -#include +#include // for max, min, shuffle, sort +#include // for array +#include // for assert +#include // for size_t +#include // for uint64_t, int8_t, int64_t +#include // for hash +#include // for initializer_list +#include // for operator<<, setw +#include // for hash +#include // for operator<<, basic_ostream +#include // for mt19937, random_devide +#include // for is_trivial + +#include "debug.hpp" // for HPCOMBI_ASSERT namespace HPCombi { diff --git a/tests/test_bmat8.cpp b/tests/test_bmat8.cpp index 1ca173cf..24a93504 100644 --- a/tests/test_bmat8.cpp +++ b/tests/test_bmat8.cpp @@ -1,27 +1,31 @@ -/******************************************************************************/ -/* Copyright (C) 2016-2018 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ - -#include -#include -#include - -#include "test_main.hpp" -#include - -#include "hpcombi/bmat8.hpp" -#include "hpcombi/epu.hpp" +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2018 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#include // for size_t +#include // for uint64_t +#include // for char_traits, ostream, ostrin... +#include // for operator== +#include // for pair +#include // for vector, allocator + +#include "test_main.hpp" // for TEST_AGREES, TEST_AGREES2 +#include // for operator""_catch_sr, operator== + +#include "hpcombi/bmat8.hpp" // for BMat8, operator<< +#include "hpcombi/perm16.hpp" // for Perm16 +#include "hpcombi/vect16.hpp" // for Vect16 namespace HPCombi { namespace { @@ -76,8 +80,8 @@ struct BMat8Fixture { }; } // namespace -//****************************************************************************// -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// TEST_CASE_METHOD(BMat8Fixture, "BMat8::one", "[BMat8][000]") { CHECK(BMat8::one(0) == zero); diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index bd07da37..6b31f0ef 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -1,17 +1,17 @@ -/******************************************************************************/ -/* Copyright (C) 2016-2018 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2016-2018 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// #include #include diff --git a/tests/test_main.cpp b/tests/test_main.cpp index b1673b8f..0cab2cfa 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2023 James D. Mitchell // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,7 +11,7 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #include diff --git a/tests/test_main.hpp b/tests/test_main.hpp index d2757e34..475e3fba 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -1,4 +1,4 @@ -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// // Copyright (C) 2023 James D. Mitchell // // // // Distributed under the terms of the GNU General Public License (GPL) // @@ -11,7 +11,7 @@ // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // -//****************************************************************************// +//////////////////////////////////////////////////////////////////////////////// #ifndef HPCOMBI_TESTS_TEST_MAIN_HPP_ #define HPCOMBI_TESTS_TEST_MAIN_HPP_ diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 0735eca2..fbb8b16d 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -1,17 +1,17 @@ -/******************************************************************************/ -/* Copyright (C) 2017 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2017 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// #include "hpcombi/perm16.hpp" @@ -228,7 +228,6 @@ TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { .rank_ref() == 4); } -// TODO uncomment TEST_CASE("PTransf16::rank", "[PTransf16][007]") { CHECK(PTransf16({}).rank() == 16); CHECK(PTransf16({4, 4, 4, 4}).rank() == 12); diff --git a/tests/test_perm_all.cpp b/tests/test_perm_all.cpp index eb87ec05..77c72f96 100644 --- a/tests/test_perm_all.cpp +++ b/tests/test_perm_all.cpp @@ -1,17 +1,17 @@ -/******************************************************************************/ -/* Copyright (C) 2014 Florent Hivert , */ -/* */ -/* Distributed under the terms of the GNU General Public License (GPL) */ -/* */ -/* This code is distributed in the hope that it will be useful, */ -/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU */ -/* General Public License for more details. */ -/* */ -/* The full text of the GPL is available at: */ -/* */ -/* http://www.gnu.org/licenses/ */ -/******************************************************************************/ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2014 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// #include #include From a44eedaa9be7a234590570abb0c2251b5640b8a4 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 15:45:32 +0000 Subject: [PATCH 056/113] BMat8 benchmark done --- benchmark/bench_bmat8.cpp | 91 +++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index ae0ecd14..7c326ba3 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -83,7 +83,7 @@ class Fix_BMat8 { return true; \ }; -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ +#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ auto val = \ @@ -93,7 +93,17 @@ class Fix_BMat8 { return true; \ }; -TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", "[BMat8][000]") { +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &pair : sample) { \ + volatile auto val = pair.first.mem_fn(pair.second); \ + } \ + return true; \ + }; + + +TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", + "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size_ref, sample); BENCHMARK_MEM_FN(row_space_size_bitset, sample); BENCHMARK_MEM_FN(row_space_size_incl1, sample); @@ -107,10 +117,11 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(transpose_maskd, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", "[BMat8][002]") { - BENCHMARK_MEM_FN_PAIR(transpose, pair_sample); - BENCHMARK_MEM_FN_PAIR(transpose_mask, pair_sample); - BENCHMARK_MEM_FN_PAIR(transpose_maskd, pair_sample); +TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", + "[BMat8][002]") { + BENCHMARK_MEM_FN_PAIR_EQ(transpose, pair_sample); + BENCHMARK_MEM_FN_PAIR_EQ(transpose_mask, pair_sample); + BENCHMARK_MEM_FN_PAIR_EQ(transpose_maskd, pair_sample); BENCHMARK("transpose2") { for (auto &pair : pair_sample) { BMat8::transpose2(pair.first, pair.second); @@ -119,49 +130,35 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", "[BMat8][00 return true; }; } -/* - - - -int Bench_row_space_included() { - myBench( - "row_space_incl_ref", - [](std::pair p) { - return p.first.row_space_included_ref(p.second); - }, - pair_sample); - myBench( - "row_space_incl_bitset", - [](std::pair p) { - return p.first.row_space_included_bitset(p.second); - }, - pair_sample); - myBench( - "row_space_incl_rotate", - [](std::pair p) { - return p.first.row_space_included(p.second); - }, - pair_sample); - return 0; + +TEST_CASE_METHOD(Fix_BMat8, + "Inclusion of row spaces benchmarks 1000 BMat8", + "[BMat8][002]") { + BENCHMARK_MEM_FN_PAIR(row_space_included_ref, pair_sample); + BENCHMARK_MEM_FN_PAIR(row_space_included_bitset, pair_sample); + BENCHMARK_MEM_FN_PAIR(row_space_included, pair_sample); } -int Bench_row_space_included2() { - myBench( - "row_space_incl2_rotate", - [](std::pair p) { - return p.first.row_space_included(p.second) == - p.second.row_space_included(p.first); - }, - pair_sample); - myBench( - "row_space_incl2", - [](std::pair p) { - auto res = BMat8::row_space_included2(p.first, p.second, -p.second, p.first); return res.first == res.second; - }, - pair_sample); - return 0; +TEST_CASE_METHOD(Fix_BMat8, + "Inclusion of row spaces benchmarks 1000 BMat8 by pairs", + "[BMat8][002]") { + BENCHMARK("rotating pairs implementation") { + for (auto &pair : pair_sample) { + auto res = BMat8::row_space_included2(pair.first, pair.second, + pair.second, pair.first); + volatile auto val = (res.first == res.second); + } + return true; + }; + BENCHMARK("Calling twice implementation") { + for (auto &pair : pair_sample) { + volatile auto val = ( + pair.first.row_space_included(pair.second) == + pair.second.row_space_included(pair.first)); + + } + return true; + }; } -*/ } // namespace HPCombi From cb7f270d1f6ba0b1b04b369971aff76bfaba7a15 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 15:55:25 +0000 Subject: [PATCH 057/113] Added missing include to simde/x86/sse4.2.h --- include/hpcombi/epu.hpp | 1 + include/hpcombi/epu_impl.hpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index f424ca4d..8d24905f 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -29,6 +29,7 @@ #include "vect_generic.hpp" // for VectGeneric #include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... +#include "simde/x86/sse4.2.h" // for ??? namespace HPCombi { diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index 73f91e89..75501df3 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -23,6 +23,7 @@ #include "vect_generic.hpp" +#ifdef SIMDE_X86_SSE4_2_NATIVE // Comparison mode for _mm_cmpestri #define FIRST_DIFF \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \ @@ -40,6 +41,7 @@ #define LAST_NON_ZERO \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \ SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT) +#endif namespace HPCombi { From f4f8dc2a3ebafb78dd311702a086e0712a27753e Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 16:00:03 +0000 Subject: [PATCH 058/113] Updated README.md --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8829eb79..538192c8 100644 --- a/README.md +++ b/README.md @@ -8,13 +8,15 @@ and benchmark them on various compiler and architecture. ## Authors -Florent Hivert +- Florent Hivert +- James Mitchell ## Contributors -- James Mitchell : discussions + test cases + Travis CI + BMat8 reference code - Finn Smith : discussions + BMat8 reference code - Viviane Pons : algorithms discussions +- Reinis Cirpons : CI + ## Acknowledgments From 758a862ce8964bc1ebe2ee18f2d2eb9db6a8f13f Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 16:11:27 +0000 Subject: [PATCH 059/113] Update Doxyfile.in --- doc/Doxyfile.in | 625 ++++++++++++++++++++++++++++-------------------- 1 file changed, 368 insertions(+), 257 deletions(-) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 44a5ec78..51a7ff29 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -1,4 +1,4 @@ -# Doxyfile 1.9.1 +# Doxyfile 1.9.7 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -12,6 +12,16 @@ # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options @@ -60,16 +70,28 @@ PROJECT_LOGO = OUTPUT_DIRECTORY = ${CMAKE_BINARY_DIR}/doc/ -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes -# performance problems for the file system. +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -81,26 +103,18 @@ ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -248,16 +262,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -302,8 +316,8 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files @@ -339,6 +353,17 @@ MARKDOWN_SUPPORT = YES TOC_INCLUDE_HEADINGS = 5 +# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to +# generate identifiers for the Markdown headings. Note: Every identifier is +# unique. +# Possible values are: DOXYGEN Use a fixed 'autotoc_md' string followed by a +# sequence number starting at 0. and GITHUB Use the lower case version of title +# with any whitespace replaced by '-' and punctations characters removed.. +# The default value is: DOXYGEN. +# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. + +MARKDOWN_ID_STYLE = DOXYGEN + # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or @@ -450,19 +475,27 @@ TYPEDEF_HIDES_STRUCT = NO LOOKUP_CACHE_SIZE = 0 -# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. NUM_PROC_THREADS = 1 +# If the TIMESTAMP tag is set different from NO then each generated page will +# contain the date or date and time when the page was generated. Setting this to +# NO can help when comparing the output of multiple runs. +# Possible values are: YES, NO, DATETIME and DATE. +# The default value is: NO. + +TIMESTAMP = NO + #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- @@ -544,7 +577,8 @@ HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO @@ -575,14 +609,15 @@ INTERNAL_DOCS = NO # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that -# are not case sensitive the option should be be set to NO to properly deal with +# are not case sensitive the option should be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. -# The default value is: system dependent. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. CASE_SENSE_NAMES = YES @@ -600,6 +635,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -757,7 +798,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -803,27 +845,50 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but # at the end of the doxygen process doxygen will return with a non-zero status. -# Possible values are: NO, YES and FAIL_ON_WARNINGS. +# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves +# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not +# write the warning messages in between other messages but write them at the end +# of a run, in case a WARN_LOGFILE is defined the warning messages will be +# besides being in the defined file also be shown at the end of a run, unless +# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case +# the behavior will remain as with the setting FAIL_ON_WARNINGS. +# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT. # The default value is: NO. WARN_AS_ERROR = NO @@ -834,13 +899,27 @@ WARN_AS_ERROR = NO # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -854,8 +933,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = ${CMAKE_SOURCE_DIR}/include \ - ${CMAKE_SOURCE_DIR}/src \ +INPUT = ${CMAKE_SOURCE_DIR}/include/hpcombi \ ${CMAKE_SOURCE_DIR}/examples \ ${CMAKE_SOURCE_DIR}/README.md @@ -864,10 +942,21 @@ INPUT = ${CMAKE_SOURCE_DIR}/include \ # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING # The default value is: UTF-8. INPUT_ENCODING = UTF-8 +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. @@ -881,10 +970,10 @@ INPUT_ENCODING = UTF-8 # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, -# *.ucf, *.qsf and *.ice. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = @@ -923,12 +1012,9 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* +# ANamespace::AClass, ANamespace::*Test -EXCLUDE_SYMBOLS = common_* +EXCLUDE_SYMBOLS = detail::* # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -971,6 +1057,11 @@ IMAGE_PATH = # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. @@ -1012,6 +1103,15 @@ FILTER_SOURCE_PATTERNS = USE_MDFILE_AS_MAINPAGE = ${CMAKE_SOURCE_DIR}/README.md +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- @@ -1098,44 +1198,6 @@ USE_HTAGS = NO VERBATIM_HEADERS = YES -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: -# http://clang.llvm.org/) for more accurate parsing at the cost of reduced -# performance. This can be particularly helpful with template rich C++ code for -# which doxygen's built-in parser lacks the necessary type information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. -# The default value is: NO. - -CLANG_ASSISTED_PARSING = NO - -# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to -# YES then doxygen will add the directory of each input to the include path. -# The default value is: YES. - -CLANG_ADD_INC_PATHS = YES - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_OPTIONS = - -# If clang assisted parsing is enabled you can provide the clang parser with the -# path to the directory containing a file called compile_commands.json. This -# file is the compilation database (see: -# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) containing the -# options used when the source files were built. This is equivalent to -# specifying the -p option to a clang tool, such as clang-check. These options -# will then be passed to the parser. Any options specified with CLANG_OPTIONS -# will be added as well. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. - -CLANG_DATABASE_PATH = - #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- @@ -1147,10 +1209,11 @@ CLANG_DATABASE_PATH = ALPHABETICAL_INDEX = YES -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = @@ -1229,7 +1292,12 @@ HTML_STYLESHEET = # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = @@ -1244,9 +1312,22 @@ HTML_EXTRA_STYLESHEET = HTML_EXTRA_FILES = +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = AUTO_LIGHT + # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1256,7 +1337,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1274,15 +1355,6 @@ HTML_COLORSTYLE_SAT = 100 HTML_COLORSTYLE_GAMMA = 80 -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will @@ -1338,6 +1410,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1363,8 +1442,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: -# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1421,6 +1504,16 @@ BINARY_TOC = NO TOC_EXPAND = NO +# The SITEMAP_URL tag is used to specify the full URL of the place where the +# generated documentation will be placed on the server by the user during the +# deployment of the documentation. The generated sitemap is called sitemap.xml +# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL +# is specified no sitemap is generated. For information about the sitemap +# protocol see https://www.sitemaps.org +# This tag requires that the tag GENERATE_HTML is set to YES. + +SITEMAP_URL = + # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help @@ -1523,16 +1616,28 @@ DISABLE_INDEX = NO # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1557,6 +1662,13 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for @@ -1577,17 +1689,6 @@ HTML_FORMULA_FORMAT = png FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. @@ -1605,11 +1706,29 @@ FORMULA_MACROFILE = USE_MATHJAX = YES +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1622,15 +1741,21 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = @@ -1810,29 +1935,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1875,10 +2002,16 @@ PDF_HYPERLINKS = YES USE_PDFLATEX = YES -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode -# command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# The LATEX_BATCHMODE tag ignals the behavior of LaTeX in case of an error. +# Possible values are: NO same as ERROR_STOP, YES same as BATCH, BATCH In batch +# mode nothing is printed on the terminal, errors are scrolled as if is +# hit at every error; missing files that TeX tries to input or request from +# keyboard input (\read on a not open input stream) cause the job to abort, +# NON_STOP In nonstop mode the diagnostic message will appear on the terminal, +# but there is no possibility of user interaction just like in batch mode, +# SCROLL In scroll mode, TeX will stop only for missing files to input or if +# keyboard input is necessary and ERROR_STOP In errorstop mode, TeX will stop at +# each error, asking for user intervention. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1891,16 +2024,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -1909,14 +2032,6 @@ LATEX_SOURCE_CODE = NO LATEX_BIB_STYLE = plain -# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated -# page will contain the date and time when the page was generated. Setting this -# to NO can help when comparing the output of multiple runs. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_TIMESTAMP = NO - # The LATEX_EMOJI_DIRECTORY tag is used to specify the (relative or absolute) # path from which the emoji images will be read. If a relative path is entered, # it will be relative to the LATEX_OUTPUT directory. If left blank the @@ -1981,16 +2096,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2087,21 +2192,12 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an -# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures +# AutoGen Definitions (see https://autogen.sourceforge.net/) file that captures # the structure of the code including all documentation. Note that this feature # is still experimental and incomplete at the moment. # The default value is: NO. @@ -2182,7 +2278,8 @@ SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by the -# preprocessor. +# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of +# RECURSIVE has no effect here. # This tag requires that the tag SEARCH_INCLUDES is set to YES. INCLUDE_PATH = @@ -2271,25 +2368,9 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES #--------------------------------------------------------------------------- -# Configuration options related to the dot tool +# Configuration options related to diagram generator tools #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = YES - -# You can include diagrams made with dia in doxygen documentation. Doxygen will -# then run dia to produce the diagram and insert it in the documentation. The -# DIA_PATH tag allows you to specify the directory where the dia binary resides. -# If left empty dia is assumed to be found in the default search path. - -DIA_PATH = - # If set to YES the inheritance and collaboration graphs will hide inheritance # and usage relations if the target is undocumented or is not a class. # The default value is: YES. @@ -2298,10 +2379,10 @@ HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz (see: -# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent +# https://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent # Bell Labs. The other options in this section have no effect if this option is # set to NO -# The default value is: YES. +# The default value is: NO. HAVE_DOT = NO @@ -2315,35 +2396,52 @@ HAVE_DOT = NO DOT_NUM_THREADS = 0 -# When you want a differently looking font in the dot files that doxygen -# generates you can specify the font name using DOT_FONTNAME. You need to make -# sure dot is able to find the font, which can be done by putting it in a -# standard location or by setting the DOTFONTPATH environment variable or by -# setting DOT_FONTPATH to the directory containing the font. -# The default value is: Helvetica. +# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of +# subgraphs. When you want a differently looking font in the dot files that +# doxygen generates you can specify fontname, fontcolor and fontsize attributes. +# For details please see Node, +# Edge and Graph Attributes specification You need to make sure dot is able +# to find the font, which can be done by putting it in a standard location or by +# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. Default graphviz fontsize is 14. +# The default value is: fontname=Helvetica,fontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTNAME = Helvetica +DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" -# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of -# dot graphs. -# Minimum value: 4, maximum value: 24, default value: 10. +# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can +# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. Complete documentation about +# arrows shapes. +# The default value is: labelfontname=Helvetica,labelfontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTSIZE = 10 +DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" -# By default doxygen will tell dot to use the default font as specified with -# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set -# the path where dot can find it using this tag. +# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes +# around nodes set 'shape=plain' or 'shape=plaintext' Shapes specification +# The default value is: shape=box,height=0.2,width=0.4. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" + +# You can set the path where dot can find font specified with fontname in +# DOT_COMMON_ATTR and others dot attributes. # This tag requires that the tag HAVE_DOT is set to YES. DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES or GRAPH or BUILTIN then doxygen will +# generate a graph for each documented class showing the direct and indirect +# inheritance relations. In case the CLASS_GRAPH tag is set to YES or GRAPH and +# HAVE_DOT is enabled as well, then dot will be used to draw the graph. In case +# the CLASS_GRAPH tag is set to YES and HAVE_DOT is disabled or if the +# CLASS_GRAPH tag is set to BUILTIN, then the built-in generator will be used. +# If the CLASS_GRAPH tag is set to TEXT the direct and indirect inheritance +# relations will be shown as texts / links. +# Possible values are: NO, YES, TEXT, GRAPH and BUILTIN. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. CLASS_GRAPH = YES @@ -2357,7 +2455,8 @@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. +# groups, showing the direct groups dependencies. See also the chapter Grouping +# in the manual. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2472,16 +2571,21 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: -# http://www.graphviz.org/)). +# https://www.graphviz.org/)). # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). -# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, -# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, -# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo, # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and # png:gdiplus:gdiplus. # The default value is: png. @@ -2514,11 +2618,12 @@ DOT_PATH = DOTFILE_DIRS = -# The MSCFILE_DIRS tag can be used to specify one or more directories that -# contain msc files that are included in the documentation (see the \mscfile -# command). +# You can include diagrams made with dia in doxygen documentation. Doxygen will +# then run dia to produce the diagram and insert it in the documentation. The +# DIA_PATH tag allows you to specify the directory where the dia binary resides. +# If left empty dia is assumed to be found in the default search path. -MSCFILE_DIRS = +DIA_PATH = # The DIAFILE_DIRS tag can be used to specify one or more directories that # contain dia files that are included in the documentation (see the \diafile @@ -2527,10 +2632,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2568,18 +2673,6 @@ DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not seem -# to support this out of the box. -# -# Warning: Depending on the platform used, enabling this option may lead to -# badly anti-aliased labels on the edges of a graph (i.e. they become hard to -# read). -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_TRANSPARENT = NO - # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) support @@ -2592,6 +2685,8 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2600,8 +2695,24 @@ GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. # -# Note: This setting is not only used for dot files but also for msc and -# plantuml temporary files. +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. DOT_CLEANUP = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. If the MSCGEN_TOOL tag is left empty (the default), then doxygen will +# use a built-in version of mscgen tool to produce the charts. Alternatively, +# the MSCGEN_TOOL tag can also specify the name an external tool. For instance, +# specifying prog as the value, doxygen will call the tool as prog -T +# -o . The external tool should support +# output file formats "png", "eps", "svg", and "ismap". + +MSCGEN_TOOL = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the \mscfile +# command). + +MSCFILE_DIRS = From 81d6cd963ac4e459af30c6c992d154d656b4c40c Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 16:11:51 +0000 Subject: [PATCH 060/113] Remove some libsemigroups remnants --- include/hpcombi/bmat8.hpp | 2 +- include/hpcombi/bmat8_impl.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp index 73888cdb..069361ca 100644 --- a/include/hpcombi/bmat8.hpp +++ b/include/hpcombi/bmat8.hpp @@ -312,7 +312,7 @@ class BMat8 { //! Write \c this on \c os std::ostream &write(std::ostream &os) const; -#ifdef LIBSEMIGROUPS_DENSEHASHMAP +#ifdef HPCOMBI_HAVE_DENSEHASHMAP // FIXME do this another way BMat8 empty_key() const { return BMat8(0xFF7FBFDFEFF7FBFE); } #endif diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index ea4c0f77..de772e02 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -426,12 +426,12 @@ inline BMat8 BMat8::col_permutation_matrix(Perm16 p) { } inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { - // LIBSEMIGROUPS_ASSERT(bm.row_space_basis() == bm); + HPCOMBI_ASSERT(bm.row_space_basis() == bm); std::vector rows = this->rows(); BMat8 product = *this * bm; std::vector prod_rows = product.rows(); - // LIBSEMIGROUPS_ASSERT(product.row_space_basis() == bm); + HPCOMBI_ASSERT(product.row_space_basis() == bm); std::vector perm(8); for (size_t i = 0; i < nr_rows(); ++i) { From 4ba3cf63d81071bea10b44c117ffb54995dfd553 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 16:12:03 +0000 Subject: [PATCH 061/113] Remove travis badge from README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 538192c8..e431e19c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# HPCombi ![build status](https://api.travis-ci.org/hivert/HPCombi.svg?branch=master) +# HPCombi High Performance Combinatorics in C++ using vector instructions v0.0.8 SSE and AVX instruction sets allows for very fast manipulation of From 232796bee0320ba2cb099b045fd41495670ecd34 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 16:12:10 +0000 Subject: [PATCH 062/113] Fix typo in the doc --- include/hpcombi/perm16.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index d12df181..e1f5a56b 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -145,9 +145,7 @@ struct Transf16 : public PTransf16 { explicit operator uint64_t() const; }; -/** Partial permutationof @f$\{0\dots 15\}@f$ - * - */ +//! Partial permutation of @f$\{0, \dots, 15\}@f$ struct PPerm16 : public PTransf16 { PPerm16() = default; constexpr PPerm16(const PPerm16 &v) = default; From 8f085ec37cefb7a50e47edb443dce8f2f328e995 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 16:58:23 +0000 Subject: [PATCH 063/113] noexcept in epu --- include/hpcombi/epu.hpp | 154 +++++++++++++++---------------- include/hpcombi/epu_impl.hpp | 122 ++++++++++++------------ include/hpcombi/vect_generic.hpp | 4 +- 3 files changed, 141 insertions(+), 139 deletions(-) diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 8d24905f..3f99db02 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -120,21 +120,21 @@ constexpr epu8 popcount4 = Epu8([](uint8_t i) { * * This is usually faster for algorithm using a lot of indexed access. */ -inline decltype(Epu8)::array &as_array(epu8 &v) { +inline decltype(Epu8)::array &as_array(epu8 &v) noexcept { return reinterpret_cast(v); } /** Cast a constant #HPCombi::epu8 to a C++ \c std::array * * This is usually faster for algorithm using a lot of indexed access. */ -inline const decltype(Epu8)::array &as_array(const epu8 &v) { +inline const decltype(Epu8)::array &as_array(const epu8 &v) noexcept { return reinterpret_cast(v); } /** Cast a C++ \c std::array to a #HPCombi::epu8 */ // Passing the argument by reference triggers a segfault in gcc // Since vector types doesn't belongs to the standard, I didn't manage // to know if I'm using undefined behavior here. -inline epu8 from_array(decltype(Epu8)::array a) { +inline epu8 from_array(decltype(Epu8)::array a) noexcept { return reinterpret_cast(a); } @@ -155,70 +155,70 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { } /** Test whether all the entries of a #HPCombi::epu8 are zero */ -inline bool is_all_zero(epu8 a) { return simde_mm_testz_si128(a, a); } +inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ -inline bool is_all_one(epu8 a) { return simde_mm_testc_si128(a, Epu8(0xFF)); } +inline bool is_all_one(epu8 a) noexcept { return simde_mm_testc_si128(a, Epu8(0xFF)); } /** Equality of #HPCombi::epu8 */ -inline bool equal(epu8 a, epu8 b) { +inline bool equal(epu8 a, epu8 b) noexcept { return is_all_zero(simde_mm_xor_si128(a, b)); } /** Non equality of #HPCombi::epu8 */ -inline bool not_equal(epu8 a, epu8 b) { return !equal(a, b); } +inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); } /** Permuting a #HPCombi::epu8 */ -inline epu8 permuted(epu8 a, epu8 b) { return simde_mm_shuffle_epi8(a, b); } +inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); } /** Left shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_right(epu8 a) { return simde_mm_bslli_si128(a, 1); } +inline epu8 shifted_right(epu8 a) noexcept { return simde_mm_bslli_si128(a, 1); } /** Right shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_left(epu8 a) { return simde_mm_bsrli_si128(a, 1); } +inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ -inline epu8 reverted(epu8 a) { return permuted(a, epu8rev); } +inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); } /** Vector min between two #HPCombi::epu8 0 */ -inline epu8 min(epu8 a, epu8 b) { return simde_mm_min_epu8(a, b); } +inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); } /** Vector max between two #HPCombi::epu8 0 */ -inline epu8 max(epu8 a, epu8 b) { return simde_mm_max_epu8(a, b); } +inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); } /** Testing if a #HPCombi::epu8 is sorted */ -inline bool is_sorted(epu8 a); +inline bool is_sorted(epu8 a) noexcept; /** Return a sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds */ -inline epu8 sorted(epu8 a); +inline epu8 sorted(epu8 a) noexcept; /** Return a #HPCombi::epu8 with the two half sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8 */ -inline epu8 sorted8(epu8 a); +inline epu8 sorted8(epu8 a) noexcept; /** Return a reverse sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds */ -inline epu8 revsorted(epu8 a); +inline epu8 revsorted(epu8 a) noexcept; /** Return a #HPCombi::epu8 with the two half reverse sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8 */ -inline epu8 revsorted8(epu8 a); +inline epu8 revsorted8(epu8 a) noexcept; /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort_perm(epu8 &a); +inline epu8 sort_perm(epu8 &a) noexcept; /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort8_perm(epu8 &a); +inline epu8 sort8_perm(epu8 &a) noexcept; /** @class common_permutation_of * @brief Find if a vector is a permutation of one other @@ -232,16 +232,16 @@ inline epu8 sort8_perm(epu8 &a); /** @copydoc common_permutation_of @par Algorithm: uses string matching cpmestrm intrisics */ -inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b); +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept; #endif /** @copydoc common_permutation_of @par Algorithm: reference implementation */ -inline epu8 permutation_of_ref(epu8 a, epu8 b); +inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept; /** @copydoc common_permutation_of @par Algorithm: architecture dependent */ -inline epu8 permutation_of(epu8 a, epu8 b); +inline epu8 permutation_of(epu8 a, epu8 b) noexcept; /** A prime number good for hashing */ constexpr uint64_t prime = 0x9e3779b97f4a7bb9; @@ -262,7 +262,7 @@ inline epu8 random_epu8(uint16_t bnd); * @return the vector \c a where repeated occurrences of entries are replaced * by \c repl */ -inline epu8 remove_dups(epu8 a, uint8_t repl = 0); +inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept; /** @class common_horiz_sum * @brief Horizontal sum of a #HPCombi::epu8 @@ -279,25 +279,25 @@ inline epu8 remove_dups(epu8 a, uint8_t repl = 0); * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_sum_ref(epu8); +inline uint8_t horiz_sum_ref(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_sum_gen(epu8); +inline uint8_t horiz_sum_gen(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_sum4(epu8); +inline uint8_t horiz_sum4(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_sum3(epu8); +inline uint8_t horiz_sum3(epu8) noexcept; /** @copydoc common_horiz_sum */ -inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); } +inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); } /** @class common_partial_sums * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -313,20 +313,20 @@ inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_sums_ref(epu8); +inline epu8 partial_sums_ref(epu8) noexcept; /** @copydoc common_partial_sums * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_sums_gen(epu8); +inline epu8 partial_sums_gen(epu8) noexcept; /** @copydoc common_partial_sums * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_sums_round(epu8); +inline epu8 partial_sums_round(epu8) noexcept; /** @copydoc common_partial_sums */ -inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); } +inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); } /** @class common_horiz_max * @brief Horizontal sum of a #HPCombi::epu8 @@ -342,25 +342,25 @@ inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_max_ref(epu8); +inline uint8_t horiz_max_ref(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_max_gen(epu8); +inline uint8_t horiz_max_gen(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_max4(epu8); +inline uint8_t horiz_max4(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_max3(epu8); +inline uint8_t horiz_max3(epu8) noexcept; /** @copydoc common_horiz_max */ -inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); } +inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); } /** @class common_partial_max * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -376,20 +376,20 @@ inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_max_ref(epu8); +inline epu8 partial_max_ref(epu8) noexcept; /** @copydoc common_partial_max * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_max_gen(epu8); +inline epu8 partial_max_gen(epu8) noexcept; /** @copydoc common_partial_max * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_max_round(epu8); +inline epu8 partial_max_round(epu8) noexcept; /** @copydoc common_partial_max */ -inline epu8 partial_max(epu8 v) { return partial_max_round(v); } +inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); } /** @class common_horiz_min * @brief Horizontal sum of a #HPCombi::epu8 @@ -405,25 +405,25 @@ inline epu8 partial_max(epu8 v) { return partial_max_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_min_ref(epu8); +inline uint8_t horiz_min_ref(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_min_gen(epu8); +inline uint8_t horiz_min_gen(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_min4(epu8); +inline uint8_t horiz_min4(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_min3(epu8); +inline uint8_t horiz_min3(epu8) noexcept; /** @copydoc common_horiz_min */ -inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); } +inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); } /** @class common_partial_min * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -439,20 +439,20 @@ inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_min_ref(epu8); +inline epu8 partial_min_ref(epu8) noexcept; /** @copydoc common_partial_min * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_min_gen(epu8); +inline epu8 partial_min_gen(epu8) noexcept; /** @copydoc common_partial_min * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_min_round(epu8); +inline epu8 partial_min_round(epu8) noexcept; /** @copydoc common_partial_min */ -inline epu8 partial_min(epu8 v) { return partial_min_round(v); } +inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); } /** @class common_eval16 * @brief Evaluation of a #HPCombi::epu8 @@ -471,24 +471,24 @@ inline epu8 partial_min(epu8 v) { return partial_min_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 eval16_ref(epu8 v); +inline epu8 eval16_ref(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and cast to array */ -inline epu8 eval16_arr(epu8 v); +inline epu8 eval16_arr(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using cyclic shifting */ -inline epu8 eval16_cycle(epu8 v); +inline epu8 eval16_cycle(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using popcount */ -inline epu8 eval16_popcount(epu8 v); +inline epu8 eval16_popcount(epu8 v) noexcept; /** @copydoc common_eval16 */ -inline epu8 eval16(epu8 v) { return eval16_cycle(v); } +inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); } /** @class common_first_diff * @brief The first difference between two #HPCombi::epu8 @@ -512,21 +512,21 @@ inline epu8 eval16(epu8 v) { return eval16_cycle(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept; #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_first_diff * @par Algorithm: * Using \c cmpestri instruction */ -inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept; #endif /** @copydoc common_first_diff * @par Algorithm: * Using vector comparison and mask */ -inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept; /** @copydoc common_first_diff */ -inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { +inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept { return first_diff_mask(a, b, bound); } @@ -552,53 +552,53 @@ inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept; #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_last_diff * @par Algorithm: * Using \c cmpestri instruction */ -inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept; #endif /** @copydoc common_last_diff * @par Algorithm: * Using vector comparison and mask */ -inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept; /** @copydoc common_last_diff */ -inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) { +inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept { return last_diff_mask(a, b, bound); } /** Lexicographic comparison between two #HPCombi::epu8 */ -inline bool less(epu8 a, epu8 b); +inline bool less(epu8 a, epu8 b) noexcept; /** Partial lexicographic comparison between two #HPCombi::epu8 * @param a, b : the vectors to compare * @param k : the bound for the lexicographic comparison * @return a positive, negative or zero int8_t depending on the result */ -inline int8_t less_partial(epu8 a, epu8 b, int k); +inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept; /** return the index of the first zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t first_zero(epu8 v, int bnd); +inline uint64_t first_zero(epu8 v, int bnd) noexcept; /** return the index of the last zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t last_zero(epu8 v, int bnd); +inline uint64_t last_zero(epu8 v, int bnd) noexcept; /** return the index of the first non zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t first_non_zero(epu8 v, int bnd); +inline uint64_t first_non_zero(epu8 v, int bnd) noexcept; /** return the index of the last non zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t last_non_zero(epu8 v, int bnd); +inline uint64_t last_non_zero(epu8 v, int bnd) noexcept; /** a vector popcount function */ -inline epu8 popcount16(epu8 v); +inline epu8 popcount16(epu8 v) noexcept; /** Test for partial transformation * @details @@ -615,7 +615,7 @@ inline epu8 popcount16(epu8 v); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_partial_transformation(epu8 v, const size_t k = 16); +inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept; /** Test for transformation * @details @@ -632,7 +632,7 @@ inline bool is_partial_transformation(epu8 v, const size_t k = 16); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_transformation(epu8 v, const size_t k = 16); +inline bool is_transformation(epu8 v, const size_t k = 16) noexcept; /** Test for partial permutations * @details @@ -650,7 +650,7 @@ inline bool is_transformation(epu8 v, const size_t k = 16); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xFF,0xFF,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_partial_permutation(epu8 v, const size_t k = 16); +inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept; /** @class common_is_permutation * @details @@ -671,16 +671,16 @@ inline bool is_partial_permutation(epu8 v, const size_t k = 16); /** @copydoc common_is_permutation @par Algorithm: uses string matching cpmestri intrisics */ -inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16); +inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept; #endif /** @copydoc common_is_permutation @par Algorithm: sort the vector and compare to identity */ -inline bool is_permutation_sort(epu8 v, const size_t k = 16); +inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept; /** @copydoc common_is_permutation @par Algorithm: architecture dependent */ -inline bool is_permutation(epu8 v, const size_t k = 16); +inline bool is_permutation(epu8 v, const size_t k = 16) noexcept; } // namespace HPCombi diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index 75501df3..d89ef989 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -59,22 +59,22 @@ inline uint64_t last_mask(epu8 msk, size_t bound) { return res == 0 ? 16 : (63 - __builtin_clzll(res)); } -inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) noexcept { for (size_t i = 0; i < bound; i++) if (a[i] != b[i]) return i; return 16; } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept { return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF)); } #endif -inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) noexcept { return first_mask(a != b, bound); } -inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) noexcept { while (bound != 0) { --bound; if (a[bound] != b[bound]) @@ -83,35 +83,35 @@ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) { return 16; } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept { return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF)); } #endif -inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) noexcept { return last_mask(a != b, bound); } -inline bool less(epu8 a, epu8 b) { +inline bool less(epu8 a, epu8 b) noexcept { uint64_t diff = first_diff(a, b); return (diff < 16) && (a[diff] < b[diff]); } -inline int8_t less_partial(epu8 a, epu8 b, int k) { +inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept { uint64_t diff = first_diff(a, b, k); return (diff == 16) ? 0 : static_cast(a[diff]) - static_cast(b[diff]); } -inline uint64_t first_zero(epu8 v, int bnd) { +inline uint64_t first_zero(epu8 v, int bnd) noexcept { return first_mask(v == epu8{}, bnd); } -inline uint64_t last_zero(epu8 v, int bnd) { +inline uint64_t last_zero(epu8 v, int bnd) noexcept { return last_mask(v == epu8{}, bnd); } -inline uint64_t first_non_zero(epu8 v, int bnd) { +inline uint64_t first_non_zero(epu8 v, int bnd) noexcept { return first_mask(v != epu8{}, bnd); } -inline uint64_t last_non_zero(epu8 v, int bnd) { +inline uint64_t last_non_zero(epu8 v, int bnd) noexcept { return last_mask(v != epu8{}, bnd); } @@ -182,20 +182,22 @@ constexpr std::array sorting_rounds8 }}; // clang-format on -inline bool is_sorted(epu8 a) { +inline bool is_sorted(epu8 a) noexcept { return simde_mm_movemask_epi8(shifted_right(a) > a) == 0; } -inline epu8 sorted(epu8 a) { return network_sort(a, sorting_rounds); } -inline epu8 sorted8(epu8 a) { return network_sort(a, sorting_rounds8); } -inline epu8 revsorted(epu8 a) { return network_sort(a, sorting_rounds); } -inline epu8 revsorted8(epu8 a) { +inline epu8 sorted(epu8 a) noexcept { + return network_sort(a, sorting_rounds); +} +inline epu8 sorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } +inline epu8 revsorted(epu8 a) noexcept { return network_sort(a, sorting_rounds); } +inline epu8 revsorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } -inline epu8 sort_perm(epu8 &a) { +inline epu8 sort_perm(epu8 &a) noexcept{ return network_sort_perm(a, sorting_rounds); } -inline epu8 sort8_perm(epu8 &a) { +inline epu8 sort8_perm(epu8 &a) noexcept{ return network_sort_perm(a, sorting_rounds8); } @@ -210,7 +212,7 @@ inline epu8 random_epu8(uint16_t bnd) { return res; } -inline epu8 remove_dups(epu8 v, uint8_t repl) { +inline epu8 remove_dups(epu8 v, uint8_t repl) noexcept { // Vector ternary operator is not supported by clang. // return (v != shifted_right(v) ? v : Epu8(repl); return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v)); @@ -233,7 +235,7 @@ constexpr std::array inverting_rounds{{ #define FIND_IN_VECT_COMPL \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) -inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept { epu8 res = -static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); for (epu8 round : inverting_rounds) { a = permuted(a, round); @@ -244,7 +246,7 @@ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { } #endif -inline epu8 permutation_of_ref(epu8 a, epu8 b) { +inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept { auto ar = as_array(a); epu8 res{}; for (size_t i = 0; i < 16; i++) { @@ -253,7 +255,7 @@ inline epu8 permutation_of_ref(epu8 a, epu8 b) { } return res; } -inline epu8 permutation_of(epu8 a, epu8 b) { +inline epu8 permutation_of(epu8 a, epu8 b) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE return permutation_of_cmpestrm(a, b); #else @@ -289,15 +291,15 @@ constexpr std::array mining_rounds{{ #undef FF -inline uint8_t horiz_sum_ref(epu8 v) { +inline uint8_t horiz_sum_ref(epu8 v) noexcept { uint8_t res = 0; for (size_t i = 0; i < 16; i++) res += v[i]; return res; } -inline uint8_t horiz_sum_gen(epu8 v) { return as_VectGeneric(v).horiz_sum(); } -inline uint8_t horiz_sum4(epu8 v) { return partial_sums_round(v)[15]; } -inline uint8_t horiz_sum3(epu8 v) { +inline uint8_t horiz_sum_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_sum(); } +inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; } +inline uint8_t horiz_sum3(epu8 v) noexcept { auto sr = summing_rounds; v += permuted(v, sr[0]); v += permuted(v, sr[1]); @@ -305,32 +307,32 @@ inline uint8_t horiz_sum3(epu8 v) { return v[7] + v[15]; } -inline epu8 partial_sums_ref(epu8 v) { +inline epu8 partial_sums_ref(epu8 v) noexcept { epu8 res{}; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = res[i - 1] + v[i]; return res; } -inline epu8 partial_sums_gen(epu8 v) { +inline epu8 partial_sums_gen(epu8 v) noexcept { as_VectGeneric(v).partial_sums_inplace(); return v; } -inline epu8 partial_sums_round(epu8 v) { +inline epu8 partial_sums_round(epu8 v) noexcept { for (epu8 round : summing_rounds) v += permuted(v, round); return v; } -inline uint8_t horiz_max_ref(epu8 v) { +inline uint8_t horiz_max_ref(epu8 v) noexcept { uint8_t res = 0; for (size_t i = 0; i < 16; i++) res = std::max(res, v[i]); return res; } -inline uint8_t horiz_max_gen(epu8 v) { return as_VectGeneric(v).horiz_max(); } -inline uint8_t horiz_max4(epu8 v) { return partial_max_round(v)[15]; } -inline uint8_t horiz_max3(epu8 v) { +inline uint8_t horiz_max_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_max(); } +inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; } +inline uint8_t horiz_max3(epu8 v) noexcept { auto sr = summing_rounds; v = max(v, permuted(v, sr[0])); v = max(v, permuted(v, sr[1])); @@ -338,32 +340,32 @@ inline uint8_t horiz_max3(epu8 v) { return std::max(v[7], v[15]); } -inline epu8 partial_max_ref(epu8 v) { +inline epu8 partial_max_ref(epu8 v) noexcept { epu8 res; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = std::max(res[i - 1], v[i]); return res; } -inline epu8 partial_max_gen(epu8 v) { +inline epu8 partial_max_gen(epu8 v) noexcept { as_VectGeneric(v).partial_max_inplace(); return v; } -inline epu8 partial_max_round(epu8 v) { +inline epu8 partial_max_round(epu8 v) noexcept { for (epu8 round : summing_rounds) v = max(v, permuted(v, round)); return v; } -inline uint8_t horiz_min_ref(epu8 v) { +inline uint8_t horiz_min_ref(epu8 v) noexcept { uint8_t res = 255; for (size_t i = 0; i < 16; i++) res = std::min(res, v[i]); return res; } -inline uint8_t horiz_min_gen(epu8 v) { return as_VectGeneric(v).horiz_min(); } -inline uint8_t horiz_min4(epu8 v) { return partial_min_round(v)[15]; } -inline uint8_t horiz_min3(epu8 v) { +inline uint8_t horiz_min_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_min(); } +inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; } +inline uint8_t horiz_min3(epu8 v) noexcept { auto sr = mining_rounds; v = min(v, permuted(v, sr[0])); v = min(v, permuted(v, sr[1])); @@ -371,24 +373,24 @@ inline uint8_t horiz_min3(epu8 v) { return std::min(v[7], v[15]); } -inline epu8 partial_min_ref(epu8 v) { +inline epu8 partial_min_ref(epu8 v) noexcept { epu8 res; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = std::min(res[i - 1], v[i]); return res; } -inline epu8 partial_min_gen(epu8 v) { +inline epu8 partial_min_gen(epu8 v) noexcept { as_VectGeneric(v).partial_min_inplace(); return v; } -inline epu8 partial_min_round(epu8 v) { +inline epu8 partial_min_round(epu8 v) noexcept { for (epu8 round : mining_rounds) v = min(v, permuted(v, round)); return v; } -inline epu8 eval16_ref(epu8 v) { +inline epu8 eval16_ref(epu8 v) noexcept { epu8 res{}; for (size_t i = 0; i < 16; i++) if (v[i] < 16) @@ -396,7 +398,7 @@ inline epu8 eval16_ref(epu8 v) { return res; } -inline epu8 eval16_arr(epu8 v8) { +inline epu8 eval16_arr(epu8 v8) noexcept { decltype(Epu8)::array res{}; auto v = as_array(v8); for (size_t i = 0; i < 16; i++) @@ -404,10 +406,10 @@ inline epu8 eval16_arr(epu8 v8) { res[v[i]]++; return from_array(res); } -inline epu8 eval16_gen(epu8 v) { +inline epu8 eval16_gen(epu8 v) noexcept { return from_array(as_VectGeneric(v).eval().v); } -inline epu8 eval16_cycle(epu8 v) { +inline epu8 eval16_cycle(epu8 v) noexcept { epu8 res = -(epu8id == v); for (int i = 1; i < 16; i++) { v = permuted(v, left_cycle); @@ -415,7 +417,7 @@ inline epu8 eval16_cycle(epu8 v) { } return res; } -inline epu8 eval16_popcount(epu8 v) { +inline epu8 eval16_popcount(epu8 v) noexcept { epu8 res{}; for (size_t i = 0; i < 16; i++) { res[i] = @@ -424,11 +426,11 @@ inline epu8 eval16_popcount(epu8 v) { return res; } -inline epu8 popcount16(epu8 v) { +inline epu8 popcount16(epu8 v) noexcept { return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4); } -inline bool is_partial_transformation(epu8 v, const size_t k) { +inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) @@ -436,13 +438,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) { (diff == 16 || diff < k); } -inline bool is_transformation(epu8 v, const size_t k) { +inline bool is_transformation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } -inline bool is_partial_permutation(epu8 v, const size_t k) { +inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 @@ -453,7 +455,7 @@ inline bool is_partial_permutation(epu8 v, const size_t k) { } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline bool is_permutation_cmpestri(epu8 v, const size_t k) { +inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and @@ -464,12 +466,12 @@ inline bool is_permutation_cmpestri(epu8 v, const size_t k) { } #endif -inline bool is_permutation_sort(epu8 v, const size_t k) { +inline bool is_permutation_sort(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); return equal(sorted(v), epu8id) && (diff == 16 || diff < k); } -inline bool is_permutation(epu8 v, const size_t k) { +inline bool is_permutation(epu8 v, const size_t k) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE return is_permutation_cmpestri(v, k); #else @@ -496,19 +498,19 @@ inline std::string to_string(HPCombi::epu8 const &a) { } template <> struct equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const { + bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { return HPCombi::equal(lhs, rhs); } }; template <> struct not_equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const { + bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { return HPCombi::not_equal(lhs, rhs); } }; template <> struct hash { - inline size_t operator()(HPCombi::epu8 a) const { + inline size_t operator()(HPCombi::epu8 a) const noexcept { unsigned __int128 v0 = simde_mm_extract_epi64(a, 0); unsigned __int128 v1 = simde_mm_extract_epi64(a, 1); return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64; @@ -528,7 +530,7 @@ template <> struct less { // but we don't care when using in std::set. // 10% faster than calling the lexicographic comparison operator ! inline size_t operator()(const HPCombi::epu8 &v1, - const HPCombi::epu8 &v2) const { + const HPCombi::epu8 &v2) const noexcept { simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2); return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0]; } diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 6260186a..36f309d0 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -166,14 +166,14 @@ template struct VectGeneric { return true; } - uint64_t horiz_sum() const { + uint64_t horiz_sum() const noexcept { Expo res = 0; for (uint64_t i = 0; i < Size; i++) res += v[i]; return res; } - VectGeneric partial_sums() const { + VectGeneric partial_sums() const noexcept { auto res = *this; for (uint64_t i = 1; i < Size; i++) res[i] += res[i - 1]; From 74bf69a0a12be0d8679d49ab09d9eed561e0e254 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 17:00:31 +0000 Subject: [PATCH 064/113] Noexcept bmat8 --- include/hpcombi/bmat8.hpp | 112 +++++++++++++++++++++------------ include/hpcombi/bmat8_impl.hpp | 73 ++++++++++++--------- 2 files changed, 113 insertions(+), 72 deletions(-) diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp index 069361ca..d3b509bb 100644 --- a/include/hpcombi/bmat8.hpp +++ b/include/hpcombi/bmat8.hpp @@ -50,39 +50,40 @@ class BMat8 { //! A default constructor. //! //! This constructor gives no guarantees on what the matrix will contain. - BMat8() = default; + BMat8() noexcept = default; //! A constructor. //! //! This constructor initializes a BMat8 to have rows equal to the //! 8 chunks, of 8 bits each, of the binary representation of \p mat. - explicit BMat8(uint64_t mat) : _data(mat) {} + explicit BMat8(uint64_t mat) noexcept : _data(mat) {} //! A constructor. //! //! This constructor initializes a matrix where the rows of the matrix //! are the vectors in \p mat. + // Not sure if this is noexcept or not explicit BMat8(std::vector> const &mat); //! A constructor. //! //! This is the copy constructor. - BMat8(BMat8 const &) = default; + BMat8(BMat8 const &) noexcept = default; //! A constructor. //! //! This is the move constructor. - BMat8(BMat8 &&) = default; + BMat8(BMat8 &&) noexcept = default; //! A constructor. //! //! This is the copy assignment constructor. - BMat8 &operator=(BMat8 const &) = default; + BMat8 &operator=(BMat8 const &) noexcept = default; //! A constructor. //! //! This is the move assignment constructor. - BMat8 &operator=(BMat8 &&) = default; + BMat8 &operator=(BMat8 &&) noexcept = default; //! A default destructor. ~BMat8() = default; @@ -90,69 +91,77 @@ class BMat8 { //! Returns \c true if \c this equals \p that. //! //! This method checks the mathematical equality of two BMat8 objects. - bool operator==(BMat8 const &that) const { return _data == that._data; } + bool operator==(BMat8 const &that) const noexcept { + return _data == that._data; + } //! Returns \c true if \c this does not equal \p that //! //! This method checks the mathematical inequality of two BMat8 objects. - bool operator!=(BMat8 const &that) const { return _data != that._data; } + bool operator!=(BMat8 const &that) const noexcept { + return _data != that._data; + } //! Returns \c true if \c this is less than \p that. //! //! This method checks whether a BMat8 objects is less than another. //! We order by the results of to_int() for each matrix. - bool operator<(BMat8 const &that) const { return _data < that._data; } + bool operator<(BMat8 const &that) const noexcept { + return _data < that._data; + } //! Returns \c true if \c this is greater than \p that. //! //! This method checks whether a BMat8 objects is greater than another. //! We order by the results of to_int() for each matrix. - bool operator>(BMat8 const &that) const { return _data > that._data; } + bool operator>(BMat8 const &that) const noexcept { + return _data > that._data; + } //! Returns the entry in the (\p i, \p j)th position. //! //! This method returns the entry in the (\p i, \p j)th position. //! Note that since all matrices are internally represented as 8 x 8, it //! is possible to access entries that you might not believe exist. - bool operator()(size_t i, size_t j) const; + bool operator()(size_t i, size_t j) const noexcept; //! Sets the (\p i, \p j)th position to \p val. //! //! This method sets the (\p i, \p j)th entry of \c this to \p val. //! Uses the bit twiddle for setting bits found //! here. - void set(size_t i, size_t j, bool val); + void set(size_t i, size_t j, bool val) noexcept; //! Returns the integer representation of \c this. //! //! Returns an unsigned integer obtained by interpreting an 8 x 8 //! BMat8 as a sequence of 64 bits (reading rows left to right, //! from top to bottom) and then this sequence as an unsigned int. - uint64_t to_int() const { return _data; } + uint64_t to_int() const noexcept { return _data; } //! Returns the transpose of \c this //! //! Returns the standard matrix transpose of a BMat8. //! Uses the technique found in Knuth AoCP Vol. 4 Fasc. 1a, p. 15. - BMat8 transpose() const; + BMat8 transpose() const noexcept; //! Returns the transpose of \c this //! //! Returns the standard matrix transpose of a BMat8. //! Uses \c movemask instruction. - BMat8 transpose_mask() const; + BMat8 transpose_mask() const noexcept; //! Returns the transpose of \c this //! //! Returns the standard matrix transpose of a BMat8. //! Uses \c movemask instruction. - BMat8 transpose_maskd() const; + BMat8 transpose_maskd() const noexcept; //! Transpose two matrices at once. //! //! Compute in parallel the standard matrix transpose of two BMat8. //! Uses the technique found in Knuth AoCP Vol. 4 Fasc. 1a, p. 15. - static void transpose2(BMat8 &, BMat8 &); + static void transpose2(BMat8 &, BMat8 &) noexcept; //! Returns the matrix product of \c this and the transpose of \p that //! @@ -160,13 +169,14 @@ class BMat8 { //! boolean semiring) of two BMat8 objects. This is faster than transposing //! that and calling the product of \c this with it. Implementation uses //! vector instructions. - BMat8 mult_transpose(BMat8 const &that) const; + BMat8 mult_transpose(BMat8 const &that) const noexcept; + //! Returns the matrix product of \c this and \p that //! //! This method returns the standard matrix product (over the //! boolean semiring) of two BMat8 objects. This is a fast implementation //! using transposition and vector instructions. - BMat8 operator*(BMat8 const &that) const { + BMat8 operator*(BMat8 const &that) const noexcept { return mult_transpose(that.transpose()); } @@ -176,75 +186,89 @@ class BMat8 { //! row space basis. This is a fast implementation using vector //! instructions to compute in parallel the union of the other rows //! included in a given one. - BMat8 row_space_basis() const; + BMat8 row_space_basis() const noexcept; + //! Returns a canonical basis of the col space of \c this //! //! Any two matrix with the same column row space are guaranteed to have //! the same column space basis. Uses #row_space_basis and #transpose. - BMat8 col_space_basis() const { + BMat8 col_space_basis() const noexcept { return transpose().row_space_basis().transpose(); } + //! Returns the number of non-zero rows of \c this - size_t nr_rows() const; + size_t nr_rows() const noexcept; + //! Returns a \c std::vector for rows of \c this + // Not noexcept because it constructs a vector std::vector rows() const; //! Returns the cardinality of the row space of \c this //! //! Reference implementation computing all products + // Not noexcept because row_space_bitset_ref isn't uint64_t row_space_size_ref() const; //! Returns the the row space of \c this //! //! The result is stored in a c++ bitset + // Not noexcept because it creates a vector std::bitset<256> row_space_bitset_ref() const; + //! Returns the the row space of \c this as 256 bits. //! //! The result is stored in two 128 bits registers. - void row_space_bitset(epu8 &res1, epu8 &res2) const; + void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept; + //! Returns the cardinality of the row space of \c this //! //! It compute all the product using two 128 bits registers to store //! the set of elements of the row space. - uint64_t row_space_size_bitset() const; + uint64_t row_space_size_bitset() const noexcept; + //! Returns the cardinality of the row space of \c this //! //! Uses vector computation of the product of included rows in each 256 //! possible vectors. Fastest implementation saving a few instructions //! compared to #row_space_size_incl1 - uint64_t row_space_size_incl() const; + uint64_t row_space_size_incl() const noexcept; + //! Returns the cardinality of the row space of \c this //! //! Uses vector computation of the product included row in each 256 //! possible vectors. More optimized in #row_space_size_incl - uint64_t row_space_size_incl1() const; + uint64_t row_space_size_incl1() const noexcept; + //! Returns the cardinality of the row space of \c this //! //! Alias to #row_space_size_incl - uint64_t row_space_size() const { return row_space_size_incl(); } + uint64_t row_space_size() const noexcept { return row_space_size_incl(); } //! Returns whether the row space of \c this is included in other's //! //! Uses a 256 bitset internally - bool row_space_included_ref(BMat8 other) const; + bool row_space_included_ref(BMat8 other) const noexcept; + //! Returns whether the row space of \c this is included in other's //! //! Uses a 256 bitset internally - bool row_space_included_bitset(BMat8 other) const; + bool row_space_included_bitset(BMat8 other) const noexcept; //! Returns a mask for which vectors of a 16 rows \c epu8 are in //! the row space of \c this //! //! Uses vector computation of the product of included rows - epu8 row_space_mask(epu8 vects) const; + epu8 row_space_mask(epu8 vects) const noexcept; + //! Returns whether the row space of \c this is included in other's //! //! Uses vector computation of the product of included rows - bool row_space_included(BMat8 other) const; + bool row_space_included(BMat8 other) const noexcept; //! Returns inclusion of row spaces //! //! Compute at once if a1 is included in b1 and a2 is included in b2 + // Not noexcept because std::make_pair is not static std::pair row_space_included2(BMat8 a1, BMat8 b1, BMat8 a2, BMat8 b2); @@ -252,41 +276,45 @@ class BMat8 { //! //! @param p : a permutation fixing the entries 8..15 //! Note: no verification is performed on p - BMat8 row_permuted(Perm16 p) const; + BMat8 row_permuted(Perm16 p) const noexcept; + //! Returns the matrix whose columns have been permuted according to \c p //! //! @param p : a permutation fixing the entries 8..15 //! Note: no verification is performed on p - BMat8 col_permuted(Perm16 p) const; + BMat8 col_permuted(Perm16 p) const noexcept; //! Returns the matrix associated to the permutation \c p by rows //! //! @param p : a permutation fixing the entries 8..15 //! Note: no verification is performed on p - static BMat8 row_permutation_matrix(Perm16 p); + static BMat8 row_permutation_matrix(Perm16 p) noexcept; + //! Returns the matrix associated to the permutation \c p by columns //! //! @param p : a permutation fixing the entries 8..15 //! Note: no verification is performed on p - static BMat8 col_permutation_matrix(Perm16 p); + static BMat8 col_permutation_matrix(Perm16 p) noexcept; //! Give the permutation whose right multiplication change \c *this //! to \c other //! //! \c *this is suppose to be a row_space matrix (ie. sorted decreasingly) //! Fast implementation doing a vector binary search. - Perm16 right_perm_action_on_basis(BMat8) const; + Perm16 right_perm_action_on_basis(BMat8) const noexcept; + //! Give the permutation whose right multiplication change \c *this //! to \c other //! //! \c *this is suppose to be a row_space matrix (ie. sorted decreasingly) //! Reference implementation. + // Not noexcept because vectors are allocated Perm16 right_perm_action_on_basis_ref(BMat8) const; //! Returns the identity BMat8 //! //! This method returns the 8 x 8 BMat8 with 1s on the main diagonal. - static BMat8 one(size_t dim = 8) { + static BMat8 one(size_t dim = 8) noexcept { HPCOMBI_ASSERT(dim <= 8); static std::array const ones = { 0x0000000000000000, 0x8000000000000000, 0x8040000000000000, @@ -298,29 +326,31 @@ class BMat8 { //! Returns a random BMat8 //! //! This method returns a BMat8 chosen at random. + // Not noexcept because random things aren't static BMat8 random(); //! Returns a random square BMat8 up to dimension \p dim. //! //! This method returns a BMat8 chosen at random, where only the //! top-left \p dim x \p dim entries may be non-zero. + // Not noexcept because BMat8::random above is not static BMat8 random(size_t dim); - //! Swap the matrix \c this with \c that - void swap(BMat8 &that) { std::swap(this->_data, that._data); } + void swap(BMat8 &that) noexcept { std::swap(this->_data, that._data); } //! Write \c this on \c os + // Not noexcept std::ostream &write(std::ostream &os) const; #ifdef HPCOMBI_HAVE_DENSEHASHMAP // FIXME do this another way - BMat8 empty_key() const { return BMat8(0xFF7FBFDFEFF7FBFE); } + BMat8 empty_key() const noexcept { return BMat8(0xFF7FBFDFEFF7FBFE); } #endif private: uint64_t _data; - epu8 row_space_basis_internal() const; + epu8 row_space_basis_internal() const noexcept; }; } // namespace HPCombi diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index de772e02..394651b3 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -95,26 +95,25 @@ static const constexpr std::array BIT_MASK = {{0x8000000000000000, 0x2, 0x1}}; -inline bool BMat8::operator()(size_t i, size_t j) const { +inline bool BMat8::operator()(size_t i, size_t j) const noexcept { HPCOMBI_ASSERT(i < 8); HPCOMBI_ASSERT(j < 8); return (_data << (8 * i + j)) >> 63; } -inline void BMat8::set(size_t i, size_t j, bool val) { +inline void BMat8::set(size_t i, size_t j, bool val) noexcept { HPCOMBI_ASSERT(i < 8); HPCOMBI_ASSERT(j < 8); _data ^= (-val ^ _data) & BIT_MASK[8 * i + j]; } inline BMat8::BMat8(std::vector> const &mat) { - // FIXME exceptions HPCOMBI_ASSERT(mat.size() <= 8); HPCOMBI_ASSERT(0 < mat.size()); _data = 0; uint64_t pow = 1; pow = pow << 63; - for (auto row : mat) { + for (auto const &row : mat) { HPCOMBI_ASSERT(row.size() == mat.size()); for (auto entry : row) { if (entry) { @@ -126,11 +125,13 @@ inline BMat8::BMat8(std::vector> const &mat) { } } -static std::random_device _rd; -static std::mt19937 _gen(_rd()); -static std::uniform_int_distribution _dist(0, 0xffffffffffffffff); +inline BMat8 BMat8::random() { + static std::random_device _rd; + static std::mt19937 _gen(_rd()); + static std::uniform_int_distribution _dist(0, 0xffffffffffffffff); -inline BMat8 BMat8::random() { return BMat8(_dist(_gen)); } + return BMat8(_dist(_gen)); +} inline BMat8 BMat8::random(size_t const dim) { HPCOMBI_ASSERT(0 < dim && dim <= 8); @@ -142,7 +143,7 @@ inline BMat8 BMat8::random(size_t const dim) { return bm; } -inline BMat8 BMat8::transpose() const { +inline BMat8 BMat8::transpose() const noexcept { uint64_t x = _data; uint64_t y = (x ^ (x >> 7)) & 0xAA00AA00AA00AA; x = x ^ y ^ (y << 7); @@ -153,7 +154,7 @@ inline BMat8 BMat8::transpose() const { return BMat8(x); } -inline BMat8 BMat8::transpose_mask() const { +inline BMat8 BMat8::transpose_mask() const noexcept { epu8 x = simde_mm_set_epi64x(_data, _data << 1); uint64_t res = simde_mm_movemask_epi8(x); x = x << Epu8(2); @@ -165,7 +166,7 @@ inline BMat8 BMat8::transpose_mask() const { return BMat8(res); } -inline BMat8 BMat8::transpose_maskd() const { +inline BMat8 BMat8::transpose_maskd() const noexcept { uint64_t res = simde_mm_movemask_epi8(simde_mm_set_epi64x(_data, _data << 1)); res = res << 16 | @@ -179,7 +180,7 @@ inline BMat8 BMat8::transpose_maskd() const { using epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__)); -inline void BMat8::transpose2(BMat8 &a, BMat8 &b) { +inline void BMat8::transpose2(BMat8 &a, BMat8 &b) noexcept { epu64 x = simde_mm_set_epi64x(a._data, b._data); epu64 y = (x ^ (x >> 7)) & (epu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA}); x = x ^ y ^ (y << 7); @@ -199,7 +200,7 @@ static constexpr epu8 rotboth{7, 0, 1, 2, 3, 4, 5, 6, static constexpr epu8 rot2{6, 7, 0, 1, 2, 3, 4, 5, 14, 15, 8, 9, 10, 11, 12, 13}; -inline BMat8 BMat8::mult_transpose(BMat8 const &that) const { +inline BMat8 BMat8::mult_transpose(BMat8 const &that) const noexcept { epu8 x = simde_mm_set_epi64x(_data, _data); epu8 y = simde_mm_shuffle_epi8(simde_mm_set_epi64x(that._data, that._data), rothigh); @@ -215,7 +216,7 @@ inline BMat8 BMat8::mult_transpose(BMat8 const &that) const { simde_mm_extract_epi64(data, 1)); } -inline epu8 BMat8::row_space_basis_internal() const { +inline epu8 BMat8::row_space_basis_internal() const noexcept { epu8 res = remove_dups(revsorted8(simde_mm_set_epi64x(0, _data))); epu8 rescy = res; // We now compute the union of all the included different rows @@ -228,7 +229,7 @@ inline epu8 BMat8::row_space_basis_internal() const { return res; } -inline BMat8 BMat8::row_space_basis() const { +inline BMat8 BMat8::row_space_basis() const noexcept { return BMat8( simde_mm_extract_epi64(sorted8(row_space_basis_internal()), 0)); } @@ -249,7 +250,9 @@ constexpr std::array masks{ static const epu8 shiftres{1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80}; -inline void update_bitset(epu8 block, epu8 &set0, epu8 &set1) { +namespace detail { + +inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept { static const epu8 bound08 = simde_mm_slli_epi32( static_cast(epu8id), 3); // shift for *8 static const epu8 bound18 = bound08 + Epu8(0x80); @@ -261,8 +264,9 @@ inline void update_bitset(epu8 block, epu8 &set0, epu8 &set1) { block = simde_mm_shuffle_epi8(block, right_cycle); } } +} -inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { +inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept { epu8 in = simde_mm_set_epi64x(0, _data); epu8 block0{}, block1{}; for (epu8 m : masks) { @@ -272,12 +276,12 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { res0 = epu8{}; res1 = epu8{}; for (size_t r = 0; r < 16; r++) { - update_bitset(block0 | block1, res0, res1); + detail::row_space_update_bitset(block0 | block1, res0, res1); block1 = simde_mm_shuffle_epi8(block1, right_cycle); } } -inline uint64_t BMat8::row_space_size_bitset() const { +inline uint64_t BMat8::row_space_size_bitset() const noexcept { epu8 res0{}, res1{}; row_space_bitset(res0, res1); return (__builtin_popcountll(simde_mm_extract_epi64(res0, 0)) + @@ -286,7 +290,7 @@ inline uint64_t BMat8::row_space_size_bitset() const { __builtin_popcountll(simde_mm_extract_epi64(res1, 1))); } -inline uint64_t BMat8::row_space_size_incl1() const { +inline uint64_t BMat8::row_space_size_incl1() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); epu8 block = epu8id; uint64_t res = 0; @@ -302,7 +306,7 @@ inline uint64_t BMat8::row_space_size_incl1() const { return res; } -inline uint64_t BMat8::row_space_size_incl() const { +inline uint64_t BMat8::row_space_size_incl() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); epu8 block = epu8id; uint64_t res = 0; @@ -318,14 +322,15 @@ inline uint64_t BMat8::row_space_size_incl() const { return res; } -inline bool BMat8::row_space_included_bitset(BMat8 other) const { +inline bool BMat8::row_space_included_bitset(BMat8 other) const noexcept { epu8 this0, this1, other0, other1; this->row_space_bitset(this0, this1); other.row_space_bitset(other0, other1); + // Double inclusion of bitsets return equal(this0 | other0, other0) && equal(this1 | other1, other1); } -inline bool BMat8::row_space_included(BMat8 other) const { +inline bool BMat8::row_space_included(BMat8 other) const noexcept { epu8 in = simde_mm_set_epi64x(0, other._data); epu8 block = simde_mm_set_epi64x(0, _data); epu8 orincl = ((in | block) == block) & in; @@ -336,7 +341,7 @@ inline bool BMat8::row_space_included(BMat8 other) const { return equal(block, orincl); } -inline epu8 BMat8::row_space_mask(epu8 block) const { +inline epu8 BMat8::row_space_mask(epu8 block) const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); epu8 orincl = ((in | block) == block) & in; for (int i = 0; i < 7; i++) { // Only rotating @@ -382,7 +387,7 @@ inline std::bitset<256> BMat8::row_space_bitset_ref() const { return lookup; } -inline bool BMat8::row_space_included_ref(BMat8 other) const { +inline bool BMat8::row_space_included_ref(BMat8 other) const noexcept { std::bitset<256> thisspace = row_space_bitset_ref(); std::bitset<256> otherspace = other.row_space_bitset_ref(); return (thisspace | otherspace) == otherspace; @@ -401,27 +406,31 @@ inline std::vector BMat8::rows() const { return rows; } -inline size_t BMat8::nr_rows() const { +inline size_t BMat8::nr_rows() const noexcept { epu8 x = simde_mm_set_epi64x(_data, 0); return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8{})); } static constexpr epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15}; -inline BMat8 BMat8::row_permuted(Perm16 p) const { + +inline BMat8 BMat8::row_permuted(Perm16 p) const noexcept { epu8 x = simde_mm_set_epi64x(0, _data); x = permuted(x, rev8); x = permuted(x, p); x = permuted(x, rev8); return BMat8(simde_mm_extract_epi64(x, 0)); } -inline BMat8 BMat8::col_permuted(Perm16 p) const { + +inline BMat8 BMat8::col_permuted(Perm16 p) const noexcept { return transpose().row_permuted(p).transpose(); } -inline BMat8 BMat8::row_permutation_matrix(Perm16 p) { + +inline BMat8 BMat8::row_permutation_matrix(Perm16 p) noexcept { return one().row_permuted(p); } -inline BMat8 BMat8::col_permutation_matrix(Perm16 p) { + +inline BMat8 BMat8::col_permutation_matrix(Perm16 p) noexcept { return one().row_permuted(p).transpose(); } @@ -448,7 +457,7 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { return res; } -inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const { +inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept { epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev); epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev); // Vector ternary operator is not supported by clang. @@ -456,6 +465,7 @@ inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const { return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8{}); } +// Not noexcept because std::ostream::operator<< isn't inline std::ostream &BMat8::write(std::ostream &os) const { uint64_t x = _data; uint64_t pow = 1; @@ -478,6 +488,7 @@ inline std::ostream &BMat8::write(std::ostream &os) const { namespace std { +// Not noexcept because BMat8::write isn't inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat8 const &bm) { return bm.write(os); } From 4ab6377c5f8cb9d56fbaad9d91e11f30c83cc550 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 17:01:39 +0000 Subject: [PATCH 065/113] Formatting --- include/hpcombi/epu.hpp | 12 +++++++++--- include/hpcombi/epu_impl.hpp | 30 +++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 3f99db02..0276dc62 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -157,7 +157,9 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { /** Test whether all the entries of a #HPCombi::epu8 are zero */ inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ -inline bool is_all_one(epu8 a) noexcept { return simde_mm_testc_si128(a, Epu8(0xFF)); } +inline bool is_all_one(epu8 a) noexcept { + return simde_mm_testc_si128(a, Epu8(0xFF)); +} /** Equality of #HPCombi::epu8 */ inline bool equal(epu8 a, epu8 b) noexcept { @@ -167,11 +169,15 @@ inline bool equal(epu8 a, epu8 b) noexcept { inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); } /** Permuting a #HPCombi::epu8 */ -inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); } +inline epu8 permuted(epu8 a, epu8 b) noexcept { + return simde_mm_shuffle_epi8(a, b); +} /** Left shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_right(epu8 a) noexcept { return simde_mm_bslli_si128(a, 1); } +inline epu8 shifted_right(epu8 a) noexcept { + return simde_mm_bslli_si128(a, 1); +} /** Right shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index d89ef989..c7369458 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -188,16 +188,20 @@ inline bool is_sorted(epu8 a) noexcept { inline epu8 sorted(epu8 a) noexcept { return network_sort(a, sorting_rounds); } -inline epu8 sorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } -inline epu8 revsorted(epu8 a) noexcept { return network_sort(a, sorting_rounds); } +inline epu8 sorted8(epu8 a) noexcept { + return network_sort(a, sorting_rounds8); +} +inline epu8 revsorted(epu8 a) noexcept { + return network_sort(a, sorting_rounds); +} inline epu8 revsorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } -inline epu8 sort_perm(epu8 &a) noexcept{ +inline epu8 sort_perm(epu8 &a) noexcept { return network_sort_perm(a, sorting_rounds); } -inline epu8 sort8_perm(epu8 &a) noexcept{ +inline epu8 sort8_perm(epu8 &a) noexcept { return network_sort_perm(a, sorting_rounds8); } @@ -297,7 +301,9 @@ inline uint8_t horiz_sum_ref(epu8 v) noexcept { res += v[i]; return res; } -inline uint8_t horiz_sum_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_sum(); } +inline uint8_t horiz_sum_gen(epu8 v) noexcept { + return as_VectGeneric(v).horiz_sum(); +} inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; } inline uint8_t horiz_sum3(epu8 v) noexcept { auto sr = summing_rounds; @@ -330,7 +336,9 @@ inline uint8_t horiz_max_ref(epu8 v) noexcept { res = std::max(res, v[i]); return res; } -inline uint8_t horiz_max_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_max(); } +inline uint8_t horiz_max_gen(epu8 v) noexcept { + return as_VectGeneric(v).horiz_max(); +} inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; } inline uint8_t horiz_max3(epu8 v) noexcept { auto sr = summing_rounds; @@ -363,7 +371,9 @@ inline uint8_t horiz_min_ref(epu8 v) noexcept { res = std::min(res, v[i]); return res; } -inline uint8_t horiz_min_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_min(); } +inline uint8_t horiz_min_gen(epu8 v) noexcept { + return as_VectGeneric(v).horiz_min(); +} inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; } inline uint8_t horiz_min3(epu8 v) noexcept { auto sr = mining_rounds; @@ -498,13 +508,15 @@ inline std::string to_string(HPCombi::epu8 const &a) { } template <> struct equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { + bool operator()(const HPCombi::epu8 &lhs, + const HPCombi::epu8 &rhs) const noexcept { return HPCombi::equal(lhs, rhs); } }; template <> struct not_equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { + bool operator()(const HPCombi::epu8 &lhs, + const HPCombi::epu8 &rhs) const noexcept { return HPCombi::not_equal(lhs, rhs); } }; From aead6d472de7fe3d12e1d433ebf5c8a3bb2e1f9c Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 17:07:21 +0000 Subject: [PATCH 066/113] Remove random_shuffle --- benchmark/bench_fixture.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 0efe2f92..517c32bd 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -15,6 +15,9 @@ #ifndef BENCH_FIXTURE #define BENCH_FIXTURE +#include +#include + #include "hpcombi/epu.hpp" using HPCombi::epu8; @@ -30,9 +33,11 @@ std::vector rand_epu8(size_t sz) { } inline epu8 rand_perm() { + static std::random_device rd; + static std::mt19937 g(rd()); epu8 res = HPCombi::epu8id; auto &ar = HPCombi::as_array(res); - std::random_shuffle(ar.begin(), ar.end()); + std::shuffle(ar.begin(), ar.end(), g); return res; } From 02dbb8c599cffb31d070f7099c0e612f6e7ba227 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Sun, 29 Oct 2023 17:49:52 +0000 Subject: [PATCH 067/113] Epu8 benchmark started --- benchmark/bench_bmat8.cpp | 56 +------------- benchmark/bench_epu8.cpp | 159 +++++++++++++++++--------------------- 2 files changed, 74 insertions(+), 141 deletions(-) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index 7c326ba3..ebd4960a 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -21,15 +21,10 @@ #include #include "bench_fixture.hpp" -// #include "compilerinfo.hpp" -// #include "cpu_x86_impl.hpp" +#include "bench_main.hpp" #include "hpcombi/bmat8.hpp" -// using namespace FeatureDetector; -// using namespace std; -// using HPCombi::epu8; - namespace HPCombi { // const Fix_perm16 sample; @@ -62,46 +57,6 @@ class Fix_BMat8 { pair_sample; // not const, transpose2 is in place }; -// template -// void myBench(const std::string &name, TF pfunc, Sample &sample) { -// std::string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; -// benchmark::RegisterBenchmark( -// fullname.c_str(), [pfunc, sample](benchmark::State &st) { -// for (auto _ : st) { -// for (auto elem : sample) { -// benchmark::DoNotOptimize(pfunc(elem)); -// } -// } -// }); -// } - -#define BENCHMARK_MEM_FN(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ - for (auto &elem : sample) { \ - volatile auto dummy = elem.mem_fn(); \ - } \ - return true; \ - }; - -#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ - for (auto &pair : sample) { \ - auto val = \ - std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ - REQUIRE(val.first == val.second); \ - } \ - return true; \ - }; - -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ - for (auto &pair : sample) { \ - volatile auto val = pair.first.mem_fn(pair.second); \ - } \ - return true; \ - }; - - TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size_ref, sample); @@ -131,8 +86,7 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", }; } -TEST_CASE_METHOD(Fix_BMat8, - "Inclusion of row spaces benchmarks 1000 BMat8", +TEST_CASE_METHOD(Fix_BMat8, "Inclusion of row spaces benchmarks 1000 BMat8", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR(row_space_included_ref, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included_bitset, pair_sample); @@ -152,10 +106,8 @@ TEST_CASE_METHOD(Fix_BMat8, }; BENCHMARK("Calling twice implementation") { for (auto &pair : pair_sample) { - volatile auto val = ( - pair.first.row_space_included(pair.second) == - pair.second.row_space_included(pair.first)); - + volatile auto val = (pair.first.row_space_included(pair.second) == + pair.second.row_space_included(pair.first)); } return true; }; diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 6db5c472..16560e0f 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -13,26 +13,24 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include +#include #include -#include -#include +#include + +#include +#include #include "bench_fixture.hpp" -#include "compilerinfo.hpp" -#include "cpu_x86_impl.hpp" +#include "bench_main.hpp" + +#include "hpcombi/epu.hpp" -using namespace FeatureDetector; -using namespace std; -using HPCombi::epu8; +namespace HPCombi { -const Fix_epu8 sample; -const std::string SIMDSET = cpu_x86::get_highest_SIMD(); -const std::string PROCID = cpu_x86::get_proc_string(); +namespace { struct RoundsMask { - // commented out due to a bug in gcc - /* constexpr */ RoundsMask() : arr() { + constexpr RoundsMask() : arr() { for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) arr[i] = HPCombi::sorting_rounds[i] < HPCombi::epu8id; } @@ -44,9 +42,9 @@ const auto rounds_mask = RoundsMask(); inline epu8 sort_pair(epu8 a) { for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) { epu8 minab, maxab, b = HPCombi::permuted(a, HPCombi::sorting_rounds[i]); - minab = _mm_min_epi8(a, b); - maxab = _mm_max_epi8(a, b); - a = _mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]); + minab = simde_mm_min_epi8(a, b); + maxab = simde_mm_max_epi8(a, b); + a = simde_mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]); } return a; } @@ -62,13 +60,13 @@ inline epu8 sort_odd_even(epu8 a) { epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { b = HPCombi::permuted(a, even); - minab = _mm_min_epi8(a, b); - maxab = _mm_max_epi8(a, b); - a = _mm_blendv_epi8(minab, maxab, mask); + minab = simde_mm_min_epi8(a, b); + maxab = simde_mm_max_epi8(a, b); + a = simde_mm_blendv_epi8(minab, maxab, mask); b = HPCombi::permuted(a, odd); - minab = _mm_min_epi8(a, b); - maxab = _mm_max_epi8(a, b); - a = _mm_blendv_epi8(maxab, minab, mask); + minab = simde_mm_min_epi8(a, b); + maxab = simde_mm_max_epi8(a, b); + a = simde_mm_blendv_epi8(maxab, minab, mask); } return a; } @@ -109,64 +107,44 @@ inline epu8 gen_sort(epu8 p) { return p; } -template -void myBench(const string &name, TF pfunc, Sample &sample) { - string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; - benchmark::RegisterBenchmark( - fullname.c_str(), [pfunc, sample](benchmark::State &st) { - for (auto _ : st) { - for (auto elem : sample) { - benchmark::DoNotOptimize(pfunc(elem)); - } - } - }); -} - static const epu8 bla = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; -#define MYBENCH(nm, fun, smp) \ - myBench( \ - nm, [](epu8 p) { return fun(p); }, smp) -#define MYBENCH2(nm, fun, smp) \ - myBench( \ - nm, [](epu8 p) { return fun(p, bla); }, smp) - -// ################################################################################## -int Bench_sort() { - myBench("sort_std1_nolmbd", std_sort, sample.perms); - myBench("sort_std2_nolmbd", std_sort, sample.perms); - myBench("sort_std3_nolmbd", std_sort, sample.perms); - - myBench("sort_std_nolmbd", std_sort, sample.perms); - myBench("sort_arr_nolmbd", arr_sort, sample.perms); - myBench("sort_gen_nolmbd", gen_sort, sample.perms); - myBench("sort_insert_nolmbd", insertion_sort, sample.perms); - myBench("sort_oddEven_nolmbd", sort_odd_even, sample.perms); - myBench("sort_radix_nolmbd", radix_sort, sample.perms); - myBench("sort_pair_nolmbd", sort_pair, sample.perms); - myBench("sort_netw_nolmbd", HPCombi::sorted, sample.perms); +} // namespace + +TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { + BENCHMARK_FREE_FN("| no lambda | perms | 1", std_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms | 2", std_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms | 3", std_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms | 4", std_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", arr_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", gen_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", insertion_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", sort_odd_even, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", radix_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", sort_pair, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", HPCombi::sorted, Fix_epu8::perms); // lambda function is needed for inlining - MYBENCH("sort_std_lmbd", std_sort, sample.perms); - MYBENCH("sort_arr_lmbd", arr_sort, sample.perms); - MYBENCH("sort_gen_lmbd", gen_sort, sample.perms); - MYBENCH("sort_insert_lmbd", insertion_sort, sample.perms); - MYBENCH("sort_oddEven_lmbd", sort_odd_even, sample.perms); - MYBENCH("sort_radix_lmbd", radix_sort, sample.perms); - MYBENCH("sort_pair_lmbd", sort_pair, sample.perms); - MYBENCH("sort_netw_lmbd", HPCombi::sorted, sample.perms); - - MYBENCH("sort8_std_lmbd", std_sort, sample.vects); - MYBENCH("sort8_arr_lmbd", arr_sort, sample.vects); - MYBENCH("sort8_gen_lmbd", gen_sort, sample.vects); - MYBENCH("sort8_insert_lmbd", insertion_sort, sample.vects); - MYBENCH("sort8_oddEven_lmbd", sort_odd_even, sample.vects); - MYBENCH("sort8_pair_lmbd", sort_pair, sample.vects); - MYBENCH("sort8_netw_lmbd", HPCombi::sorted, sample.vects); - return 0; + BENCHMARK_LAMBDA("| lambda | perms", std_sort, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", arr_sort, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda | perms", gen_sort, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", insertion_sort, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", sort_odd_even, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", radix_sort, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", sort_pair, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", HPCombi::sorted, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda | vects", std_sort, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", arr_sort, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", gen_sort, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", insertion_sort, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", sort_odd_even, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", sort_pair, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); } -// ################################################################################## +/* int Bench_hsum() { myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); myBench("hsum_ref2_nolmbd", HPCombi::horiz_sum_ref, sample.perms); @@ -183,7 +161,8 @@ int Bench_hsum() { MYBENCH("hsum_sum3_lmbd", HPCombi::horiz_sum3, sample.perms); return 0; } -// ################################################################################## +// +################################################################################## int Bench_psum() { myBench("psum_ref1_nolmbd", HPCombi::partial_sums_ref, sample.perms); myBench("psum_ref2_nolmbd", HPCombi::partial_sums_ref, sample.perms); @@ -199,7 +178,8 @@ int Bench_psum() { return 0; } -// ################################################################################## +// +################################################################################## int Bench_hmax() { myBench("hmax_ref1_nolmbd", HPCombi::horiz_max_ref, sample.perms); myBench("hmax_ref2_nolmbd", HPCombi::horiz_max_ref, sample.perms); @@ -216,7 +196,8 @@ int Bench_hmax() { MYBENCH("hmax_max3_lmbd", HPCombi::horiz_max3, sample.perms); return 0; } -// ################################################################################## +// +################################################################################## int Bench_pmax() { myBench("pmax_ref1_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref2_nolmbd", HPCombi::partial_max_ref, sample.perms); @@ -232,7 +213,8 @@ int Bench_pmax() { return 0; } -// ################################################################################## +// +################################################################################## int Bench_hmin() { myBench("hmin_ref1_nolmbd", HPCombi::horiz_min_ref, sample.perms); myBench("hmin_ref2_nolmbd", HPCombi::horiz_min_ref, sample.perms); @@ -249,7 +231,8 @@ int Bench_hmin() { MYBENCH("hmin_min3_lmbd", HPCombi::horiz_min3, sample.perms); return 0; } -// ################################################################################## +// +################################################################################## int Bench_pmin() { myBench("pmin_ref1_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref2_nolmbd", HPCombi::partial_min_ref, sample.perms); @@ -265,7 +248,8 @@ int Bench_pmin() { return 0; } -// ################################################################################## +// +################################################################################## int Bench_eval() { myBench("eval_ref1_nolmbd", HPCombi::eval16_ref, sample.perms); myBench("eval_ref2_nolmbd", HPCombi::eval16_ref, sample.perms); @@ -285,7 +269,8 @@ int Bench_eval() { return 0; } -// ################################################################################## +// +################################################################################## int Bench_first_diff() { MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, sample.perms); @@ -293,16 +278,12 @@ int Bench_first_diff() { return 0; } -// ################################################################################## +// +################################################################################## int Bench_last_diff() { MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, sample.perms); MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, sample.perms); return 0; -} - -auto dummy = {Bench_sort(), Bench_hsum(), Bench_psum(), Bench_hmax(), - Bench_pmax(), Bench_hmin(), Bench_pmin(), Bench_eval(), - Bench_first_diff(), Bench_last_diff()}; - -BENCHMARK_MAIN(); +} */ +} // namespace HPCombi From fca0c0bfbeec145d54afa05b599c231e874b569f Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 20:10:04 +0000 Subject: [PATCH 068/113] Fixed examples/* wrt new directories --- examples/CF.cpp | 4 +- examples/CMakeLists.txt | 8 +++- examples/RD.cpp | 12 ++--- examples/Renner.cpp | 14 +++--- examples/Trans.cpp | 4 +- examples/image.cpp | 6 +-- examples/pattern.cpp | 99 +++++++++++++++++++++++++++++++++++++++ examples/stringmonoid.cpp | 3 +- 8 files changed, 129 insertions(+), 21 deletions(-) create mode 100644 examples/pattern.cpp diff --git a/examples/CF.cpp b/examples/CF.cpp index ab964c84..4c3fef0b 100644 --- a/examples/CF.cpp +++ b/examples/CF.cpp @@ -13,12 +13,12 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" #include #include #include #include -#include + +#include "hpcombi/perm16.hpp" using namespace std; using namespace HPCombi; diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e8e073ee..bb485f22 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -27,9 +27,13 @@ else() add_compile_definitions(HPCOMBI_HAVE_DENSEHASHSET) endif() -include_directories(${CMAKE_SOURCE_DIR}/include ${PROJECT_BINARY_DIR}) +include_directories( + ${CMAKE_SOURCE_DIR}/include + ${CMAKE_SOURCE_DIR}/third_party + ${PROJECT_BINARY_DIR}) -set(usage_examples RD.cpp Renner.cpp stringmonoid.cpp Trans.cpp CF.cpp image.cpp) +set(usage_examples RD.cpp Renner.cpp stringmonoid.cpp Trans.cpp CF.cpp + image.cpp pattern.cpp) foreach(f ${usage_examples}) get_filename_component(exampleName ${f} NAME_WE) diff --git a/examples/RD.cpp b/examples/RD.cpp index 78ed1486..52165130 100644 --- a/examples/RD.cpp +++ b/examples/RD.cpp @@ -13,7 +13,6 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" #include #include #include @@ -25,7 +24,8 @@ #else #include #endif -#include + +#include "hpcombi/perm16.hpp" using HPCombi::epu8; using HPCombi::PTransf16; @@ -56,10 +56,10 @@ inline PTransf16 act1(PTransf16 x, PTransf16 y) { return x * y; } inline PTransf16 act0(PTransf16 x, PTransf16 y) { PTransf16 minab, maxab, mask, b = x * y; - mask = _mm_cmplt_epi8(y, PTransf16::one()); - minab = _mm_min_epi8(x, b); - maxab = _mm_max_epi8(x, b); - return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | + mask = simde_mm_cmplt_epi8(y, PTransf16::one()); + minab = simde_mm_min_epi8(x, b); + maxab = simde_mm_max_epi8(x, b); + return static_cast(simde_mm_blendv_epi8(maxab, minab, mask)) | (y.v == HPCombi::Epu8(0xFF)); } diff --git a/examples/Renner.cpp b/examples/Renner.cpp index bb6007b6..8227ef6b 100644 --- a/examples/Renner.cpp +++ b/examples/Renner.cpp @@ -13,7 +13,6 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" #include #include #include @@ -27,7 +26,10 @@ #else #include #endif -#include + +#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... +#include "hpcombi/perm16.hpp" + template std::ostream &operator<<(std::ostream &out, const std::vector &v) { @@ -84,10 +86,10 @@ inline PTransf16 mult1(PTransf16 x, PTransf16 y) { return x * y; } inline PTransf16 act0(PTransf16 x, PTransf16 y) { PTransf16 minab, maxab, mask, b = x * y; - mask = _mm_cmplt_epi8(y, Perm16::one()); - minab = _mm_min_epi8(x, b); - maxab = _mm_max_epi8(x, b); - return static_cast(_mm_blendv_epi8(maxab, minab, mask)) | + mask = simde_mm_cmplt_epi8(y, Perm16::one()); + minab = simde_mm_min_epi8(x, b); + maxab = simde_mm_max_epi8(x, b); + return static_cast(simde_mm_blendv_epi8(maxab, minab, mask)) | (y.v == Epu8(0xFF)); } diff --git a/examples/Trans.cpp b/examples/Trans.cpp index f199001e..7cd56f47 100644 --- a/examples/Trans.cpp +++ b/examples/Trans.cpp @@ -13,19 +13,21 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" #include #include #include #include // less<> #include #include + #ifdef HPCOMBI_HAVE_DENSEHASHSET #include #else #include #endif +#include "hpcombi/perm16.hpp" + using HPCombi::Transf16; // Full transformation semigroup on 7 points diff --git a/examples/image.cpp b/examples/image.cpp index 03e8895a..3d359a8d 100644 --- a/examples/image.cpp +++ b/examples/image.cpp @@ -13,8 +13,6 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" -#include "timer.h" #include #include #include @@ -25,7 +23,9 @@ #include #include // pair #include -#include + +#include "hpcombi/perm16.hpp" +#include "timer.h" template std::ostream &operator<<(std::ostream &out, const std::vector &v) { diff --git a/examples/pattern.cpp b/examples/pattern.cpp new file mode 100644 index 00000000..2241b23b --- /dev/null +++ b/examples/pattern.cpp @@ -0,0 +1,99 @@ +#include +#include +#include +#include // less<> +#include +#include +#include +#include +#ifdef HPCOMBI_HAVE_DENSEHASHSET +#include +#else +#include +#endif +#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... + +#include "hpcombi/perm16.hpp" + +template +std::ostream &operator<<(std::ostream &out, const std::vector &v) { + out << '['; + if (!v.empty()) { + auto i = v.begin(); + for (; i != --v.end(); ++i) + out << std::setw(2) << *i << ","; + out << std::setw(2) << *i; + } + out << "]"; + return out; +} + +using namespace std; +using namespace HPCombi; + +std::vector subsets; +std::vector subperm; + +epu8 tosubset(uint16_t n) { + epu8 res {}; + for (int i = 0; i < 16; i++) { + if (((n >> i) & 1) != 0) res[i] = 0xff; + } + if (simde_mm_movemask_epi8(res) != n) cout << n << "BUG" << res << endl; + return res; +} + +epu8 subset_to_perm(epu8 s) { + epu8 res = Epu8({},0xff); + int c = 0; + for (int i = 0; i < 16; i++) { + if (s[i] != 0) { + res[c] = i; + c++; + } + } + return res; +} + +void make_subsets_of_size(int n, int k) { + int n2 = 1 << n; + for (uint16_t i=0; i < n2; i++) { + if (__builtin_popcountl(i) == k) { + subsets.push_back(tosubset(i)); + subperm.push_back(subset_to_perm(tosubset(i))); + } + } +} + +template +epu8 extract_pattern(epu8 perm, epu8 permset) { + epu8 cst = Epu8({}, Size); + epu8 res = permuted(perm, permset) | (epu8id >= cst); + res = sort_perm(res) & (epu8id < cst); + return res; +} + +template +bool has_pattern(epu8 perm, epu8 patt) { + for (size_t i = 0; i < subperm.size(); i++) { + epu8 extr = extract_pattern(perm, subperm[i]); + if (equal(extr, patt)) return true; + } + return false; +} + +int main() { + cout << hex; + int n = 8, k = 4, n2 = 1 << n; + make_subsets_of_size(n, k); + cout << subsets.size() << endl; + epu8 perm = {1,4,2,0,3,5,6,7}; + int i = 42; + cout << Perm16::one() << endl; + cout << perm << endl; + cout << subsets[i] << endl; + cout << simde_mm_movemask_epi8(subsets[i]) << endl; + cout << extract_pattern<4>(perm, subperm[i]) << endl; + cout << int(has_pattern<4>(perm, epu8 {2,1,0,3})) << endl; + cout << int(has_pattern<4>(perm, epu8 {3,2,1,0})) << endl; +} diff --git a/examples/stringmonoid.cpp b/examples/stringmonoid.cpp index f1333ddc..d6ad7a1e 100644 --- a/examples/stringmonoid.cpp +++ b/examples/stringmonoid.cpp @@ -17,10 +17,11 @@ * #HPCombi::power_helper::Monoid */ -#include "power.hpp" #include #include +#include "hpcombi/power.hpp" + namespace HPCombi { namespace power_helper { From 15519138ae866814a6863ff79202b3abf4ff8ff2 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Mon, 30 Oct 2023 09:18:40 +0000 Subject: [PATCH 069/113] Add missing file --- benchmark/bench_main.hpp | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 benchmark/bench_main.hpp diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp new file mode 100644 index 00000000..fcb49f35 --- /dev/null +++ b/benchmark/bench_main.hpp @@ -0,0 +1,57 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 James D. Mitchell // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#define BENCHMARK_MEM_FN(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &elem : sample) { \ + volatile auto dummy = elem.mem_fn(); \ + } \ + return true; \ + }; + +#define BENCHMARK_FREE_FN(msg, free_fn, sample) \ + BENCHMARK(#free_fn " " msg) { \ + for (auto elem : sample) { \ + volatile auto dummy = free_fn(elem); \ + } \ + return true; \ + }; + +#define BENCHMARK_LAMBDA(msg, free_fn, sample) \ + BENCHMARK(#free_fn " " msg) { \ + auto lambda__xxx = [](auto val) { return free_fn(val); }; \ + for (auto elem : sample) { \ + volatile auto dummy = lambda__xxx(elem); \ + } \ + return true; \ + }; + +#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &pair : sample) { \ + auto val = \ + std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ + REQUIRE(val.first == val.second); \ + } \ + return true; \ + }; + +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &pair : sample) { \ + volatile auto val = pair.first.mem_fn(pair.second); \ + } \ + return true; \ + }; From 4d4c72cbcc8dddc1cf1db6e7b36da227288bf90c Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 30 Oct 2023 16:29:31 +0000 Subject: [PATCH 070/113] Updated benchmark for Perm16 --- benchmark/bench_fixture.hpp | 13 ++- benchmark/bench_perm16.cpp | 188 ++++++++++++++++-------------------- benchmark/cycle.cpp | 2 +- benchmark/inverse.cpp | 2 +- benchmark/sort.cpp | 2 +- benchmark/sum.cpp | 2 +- benchmark/testtools.hpp | 2 +- 7 files changed, 100 insertions(+), 111 deletions(-) diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 517c32bd..5141cce6 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -49,11 +49,22 @@ std::vector rand_perms(int sz) { return res; } +std::vector rand_transf(int sz) { + std::vector res(sz); + std::srand(std::time(0)); + for (int i = 0; i < sz; i++) + res[i] = HPCombi::random_epu8(15); + return res; +} + class Fix_epu8 { public: - Fix_epu8() : vects(rand_epu8(size)), perms(rand_perms(size)) {} + Fix_epu8() : vects(rand_epu8(size)), + transf(rand_transf(size)), + perms(rand_perms(size)) {} ~Fix_epu8() {} const std::vector vects; + const std::vector transf; const std::vector perms; }; diff --git a/benchmark/bench_perm16.cpp b/benchmark/bench_perm16.cpp index 851f792e..8b729af5 100644 --- a/benchmark/bench_perm16.cpp +++ b/benchmark/bench_perm16.cpp @@ -13,137 +13,115 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include +#include #include -#include -#include +#include + +#include +#include #include "bench_fixture.hpp" -#include "compilerinfo.hpp" -#include "cpu_x86_impl.hpp" +#include "bench_main.hpp" -#include "perm16.hpp" -#include "perm_generic.hpp" +#include "hpcombi/perm16.hpp" +#include "hpcombi/perm_generic.hpp" -using namespace FeatureDetector; -using namespace std; -using HPCombi::epu8; - -// const Fix_perm16 sample; -const Fix_epu8 sample; -const std::string SIMDSET = cpu_x86::get_highest_SIMD(); -const std::string PROCID = cpu_x86::get_proc_string(); +using namespace std; using HPCombi::epu8; using HPCombi::Perm16; using HPCombi::PTransf16; using HPCombi::Transf16; using HPCombi::Vect16; -// ################################################################################## -template -void myBench(const string &name, TF pfunc, Sample &sample) { - string fullname = name + "_" + CXX_VER + "_proc-" + PROCID; - benchmark::RegisterBenchmark( - fullname.c_str(), [pfunc, sample](benchmark::State &st) { - for (auto _ : st) { - for (auto elem : sample) { - benchmark::DoNotOptimize(pfunc(elem)); - } - } - }); +// using namespace FeatureDetector; +const std::string PROCID = "TODO"; +// const std::string SIMDSET = cpu_x86::get_highest_SIMD(); + +std::vector make_Perm16(size_t n) { + std::vector gens = rand_perms(n); + std::vector res{}; + std::transform(gens.cbegin(), gens.cend(), std::back_inserter(res), + [](epu8 x) -> Perm16 { return x; }); + return res; +} + +std::vector> make_Pair_Perm16(size_t n) { + std::vector gens = rand_perms(n); + std::vector> res{}; + for (auto g1 : gens) + for (auto g2 : gens) { + res.push_back({g1, g2}); + } + return res; } -#define myBenchLoop(descr, methname, smp) \ - myBench( \ - descr, \ - [](Perm16 p) { \ - for (int i = 0; i < 100; i++) \ - p = p.methname(); \ - return p; \ - }, \ - smp) -#define myBenchMeth(descr, methname, smp) \ - myBench( \ - descr, \ - [](Perm16 p) { \ - for (int i = 0; i < 100; i++) \ - benchmark::DoNotOptimize(p.methname()); \ - return p.methname(); \ - }, \ - smp) - -#define myBenchMeth2(descr, methname, smp) \ - myBench( \ - descr, \ - [](Perm16 p) { \ - for (Perm16 p1 : smp) \ - benchmark::DoNotOptimize(p.methname(p1)); \ - return 1; \ - }, \ - smp); - -// ################################################################################## -int Bench_inverse() { - myBenchMeth("inverse_ref1", inverse_ref, sample.perms); - myBenchMeth("inverse_ref2", inverse_ref, sample.perms); - myBenchMeth("inverse_arr", inverse_arr, sample.perms); - myBenchMeth("inverse_sort", inverse_sort, sample.perms); - myBenchMeth("inverse_find", inverse_find, sample.perms); - myBenchMeth("inverse_pow", inverse_pow, sample.perms); - myBenchMeth("inverse_cycl", inverse_cycl, sample.perms); - return 0; +std::vector make_Transf16(size_t n) { + std::vector gens = rand_transf(n); + std::vector res{}; + std::transform(gens.cbegin(), gens.cend(), std::back_inserter(res), + [](epu8 x) -> Transf16 { return x; }); + return res; } -int Bench_lehmer() { - myBenchMeth("lehmer_ref1", lehmer_ref, sample.perms); - myBenchMeth("lehmer_ref2", lehmer_ref, sample.perms); - myBenchMeth("lehmer_arr", lehmer_arr, sample.perms); - myBenchMeth("lehmer_opt", lehmer, sample.perms); - return 0; +class Fix_Perm16 { + public: + Fix_Perm16() : + sample_Perm16(make_Perm16(1000)), + sample_Transf16(make_Transf16(1000)), + sample_pair_Perm16(make_Pair_Perm16(40)) + {} + ~Fix_Perm16() {} + const std::vector sample_Perm16; + const std::vector sample_Transf16; + const std::vector> sample_pair_Perm16; +}; + + +TEST_CASE_METHOD(Fix_Perm16, "Inverse of 1000 Perm16", "[Perm16][000]") { + BENCHMARK_MEM_FN(inverse_ref, sample_Perm16); + BENCHMARK_MEM_FN(inverse_arr, sample_Perm16); + BENCHMARK_MEM_FN(inverse_sort, sample_Perm16); + BENCHMARK_MEM_FN(inverse_find, sample_Perm16); + BENCHMARK_MEM_FN(inverse_pow, sample_Perm16); + BENCHMARK_MEM_FN(inverse_cycl, sample_Perm16); + BENCHMARK_MEM_FN(inverse, sample_Perm16); } -int Bench_length() { - myBenchMeth("length_ref1", length_ref, sample.perms); - myBenchMeth("length_ref2", length_ref, sample.perms); - myBenchMeth("length_arr", length_arr, sample.perms); - myBenchMeth("length_opt", length, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_Perm16, "Lehmer code of 1000 Perm16", "[Perm16][000]") { + BENCHMARK_MEM_FN(lehmer_ref, sample_Perm16); + BENCHMARK_MEM_FN(lehmer_arr, sample_Perm16); + BENCHMARK_MEM_FN(lehmer, sample_Perm16); } -int Bench_nb_descents() { - myBenchMeth("nb_descents_ref1", nb_descents_ref, sample.perms); - myBenchMeth("nb_descents_ref2", nb_descents_ref, sample.perms); - myBenchMeth("nb_descents_opt", nb_descents, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_Perm16, "Coxeter Length of 1000 Perm16", "[Perm16][000]") { + BENCHMARK_MEM_FN(length_ref, sample_Perm16); + BENCHMARK_MEM_FN(length_arr, sample_Perm16); + BENCHMARK_MEM_FN(length, sample_Perm16); } -int Bench_nb_cycles() { - myBenchMeth("nb_cycles_ref1", nb_cycles_ref, sample.perms); - myBenchMeth("nb_cycles_ref2", nb_cycles_ref, sample.perms); - myBenchMeth("nb_cycles_opt", nb_cycles, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_Perm16, "Number of descents of 1000 Perm16", + "[Perm16][000]") { + BENCHMARK_MEM_FN(nb_descents_ref, sample_Perm16); + BENCHMARK_MEM_FN(nb_descents, sample_Perm16); } -int Bench_left_weak_leq() { - myBenchMeth2("leqweak_ref1", left_weak_leq_ref, sample.perms); - myBenchMeth2("leqweak_ref2", left_weak_leq_ref, sample.perms); - myBenchMeth2("leqweak_ref3", left_weak_leq_ref, sample.perms); - myBenchMeth2("leqweak_length", left_weak_leq_length, sample.perms); - myBenchMeth2("leqweak_opt", left_weak_leq, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_Perm16, "Number of cycles of 1000 Perm16", + "[Perm16][000]") { + BENCHMARK_MEM_FN(nb_cycles_ref, sample_Perm16); + BENCHMARK_MEM_FN(nb_cycles, sample_Perm16); } -int Bench_rank() { - myBenchMeth("rank_ref1", rank_ref, sample.perms); - myBenchMeth("rank_ref2", rank_ref, sample.perms); - myBenchMeth("rank_ref3", rank_ref, sample.perms); - myBenchMeth("rank_opt", rank, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_Perm16, "Weak order comparison of 1600 pairs of Perm16", + "[Perm16][000]") { + BENCHMARK_MEM_FN_PAIR(left_weak_leq_ref, sample_pair_Perm16); + BENCHMARK_MEM_FN_PAIR(left_weak_leq_length, sample_pair_Perm16); + BENCHMARK_MEM_FN_PAIR(left_weak_leq, sample_pair_Perm16); } -auto dummy = {Bench_inverse(), Bench_lehmer(), Bench_length(), - Bench_nb_descents(), Bench_nb_cycles(), Bench_left_weak_leq(), - Bench_rank()}; +TEST_CASE_METHOD(Fix_Perm16, "Rank of 1000 PTransf16", + "[PTransf16][000]") { + BENCHMARK_MEM_FN(rank_ref, sample_Transf16); + BENCHMARK_MEM_FN(rank, sample_Transf16); +} -BENCHMARK_MAIN(); diff --git a/benchmark/cycle.cpp b/benchmark/cycle.cpp index e4d5cf2c..20a6dc53 100644 --- a/benchmark/cycle.cpp +++ b/benchmark/cycle.cpp @@ -22,7 +22,7 @@ #include #include -#include "perm16.hpp" +#include "hpcombi/perm16.hpp" #include "testtools.hpp" using namespace std; diff --git a/benchmark/inverse.cpp b/benchmark/inverse.cpp index 2ca6547d..28316fd8 100644 --- a/benchmark/inverse.cpp +++ b/benchmark/inverse.cpp @@ -23,7 +23,7 @@ #include #include -#include "perm16.hpp" +#include "hpcombi/perm16.hpp" #include "testtools.hpp" using namespace std; diff --git a/benchmark/sort.cpp b/benchmark/sort.cpp index 56778f47..52245864 100644 --- a/benchmark/sort.cpp +++ b/benchmark/sort.cpp @@ -23,7 +23,7 @@ #include #include -#include "epu.hpp" +#include "hpcombi/epu.hpp" using namespace std; using namespace std::chrono; diff --git a/benchmark/sum.cpp b/benchmark/sum.cpp index bdf8e7f3..604f724f 100644 --- a/benchmark/sum.cpp +++ b/benchmark/sum.cpp @@ -13,7 +13,7 @@ // http://www.gnu.org/licenses/ // //****************************************************************************// -#include "perm16.hpp" +#include "hpcombi/perm16.hpp" #include "testtools.hpp" #include #include diff --git a/benchmark/testtools.hpp b/benchmark/testtools.hpp index 5491fa4d..bacb3476 100644 --- a/benchmark/testtools.hpp +++ b/benchmark/testtools.hpp @@ -21,7 +21,7 @@ #include #include -#include "perm16.hpp" +#include "hpcombi/perm16.hpp" namespace HPCombi { constexpr unsigned int factorial(unsigned int n) { From 8d401cd3774a63e1393f621d12f1c010155a2a99 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Tue, 31 Oct 2023 09:13:06 +0000 Subject: [PATCH 071/113] Fixed .codespellrc wrt simde --- .codespellrc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.codespellrc b/.codespellrc index 59e07a79..4e78859a 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,3 +1,3 @@ [codespell] -skip = ./include/simde,./.git,./benchmark/python,./experiments +skip = ./third_party/simde,./.git,./benchmark/python,./experiments ignore-words-list=shft From 31d797ddea816a09d4d438a6b4ec839975d70b08 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 31 Oct 2023 09:23:56 +0000 Subject: [PATCH 072/113] Rearrange bench_epu8 a little --- benchmark/bench_epu8.cpp | 29 ++++++++++++++++------------- include/hpcombi/epu.hpp | 4 ++++ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 16560e0f..db17a42b 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -112,29 +112,32 @@ static const epu8 bla = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; } // namespace TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { - BENCHMARK_FREE_FN("| no lambda | perms | 1", std_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms | 2", std_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms | 3", std_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms | 4", std_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", arr_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", gen_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", insertion_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", sort_odd_even, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", radix_sort, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", sort_pair, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", HPCombi::sorted, Fix_epu8::perms); - - // lambda function is needed for inlining + BENCHMARK_FREE_FN("| no lambda | perms", std_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", std_sort, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda | perms", arr_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", arr_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", gen_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", gen_sort, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda | perms", insertion_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", insertion_sort, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda | perms", sort_odd_even, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", sort_odd_even, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", radix_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", radix_sort, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda | perms", sort_pair, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", sort_pair, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda | perms", HPCombi::sorted, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", HPCombi::sorted, Fix_epu8::perms); + // lambda function is needed for inlining + BENCHMARK_LAMBDA("| lambda | vects", std_sort, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", arr_sort, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", gen_sort, Fix_epu8::vects); diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 0276dc62..69988adf 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -53,6 +53,7 @@ namespace detail { // Implementation detail code /// Factory object for various SIMD constants in particular constexpr template struct TPUBuild { + // Type for Packed Unsigned integer (TPU) using type_elem = typename std::remove_reference_t; static constexpr size_t size_elem = sizeof(type_elem); static constexpr size_t size = sizeof(TPU) / size_elem; @@ -154,6 +155,9 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { return reinterpret_cast &>(as_array(v)); } +// TODO up to this point in this file, everything could be generic to support +// larger perms, such as Perm32 in the experiments dir. + /** Test whether all the entries of a #HPCombi::epu8 are zero */ inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ From 4309f365dca989deed8257fe724cb8103382786a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Tue, 31 Oct 2023 09:36:23 +0000 Subject: [PATCH 073/113] Use different rank default on arm --- benchmark/bench_perm16.cpp | 26 +++++++++++--------------- include/hpcombi/perm16.hpp | 2 ++ include/hpcombi/perm16_impl.hpp | 11 ++++++++++- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/benchmark/bench_perm16.cpp b/benchmark/bench_perm16.cpp index 8b729af5..81b63586 100644 --- a/benchmark/bench_perm16.cpp +++ b/benchmark/bench_perm16.cpp @@ -26,7 +26,6 @@ #include "hpcombi/perm16.hpp" #include "hpcombi/perm_generic.hpp" - using namespace std; using HPCombi::epu8; using HPCombi::Perm16; @@ -66,18 +65,16 @@ std::vector make_Transf16(size_t n) { class Fix_Perm16 { public: - Fix_Perm16() : - sample_Perm16(make_Perm16(1000)), - sample_Transf16(make_Transf16(1000)), - sample_pair_Perm16(make_Pair_Perm16(40)) - {} + Fix_Perm16() + : sample_Perm16(make_Perm16(1000)), + sample_Transf16(make_Transf16(1000)), + sample_pair_Perm16(make_Pair_Perm16(40)) {} ~Fix_Perm16() {} const std::vector sample_Perm16; const std::vector sample_Transf16; const std::vector> sample_pair_Perm16; }; - TEST_CASE_METHOD(Fix_Perm16, "Inverse of 1000 Perm16", "[Perm16][000]") { BENCHMARK_MEM_FN(inverse_ref, sample_Perm16); BENCHMARK_MEM_FN(inverse_arr, sample_Perm16); @@ -88,40 +85,39 @@ TEST_CASE_METHOD(Fix_Perm16, "Inverse of 1000 Perm16", "[Perm16][000]") { BENCHMARK_MEM_FN(inverse, sample_Perm16); } -TEST_CASE_METHOD(Fix_Perm16, "Lehmer code of 1000 Perm16", "[Perm16][000]") { +TEST_CASE_METHOD(Fix_Perm16, "Lehmer code of 1000 Perm16", "[Perm16][001]") { BENCHMARK_MEM_FN(lehmer_ref, sample_Perm16); BENCHMARK_MEM_FN(lehmer_arr, sample_Perm16); BENCHMARK_MEM_FN(lehmer, sample_Perm16); } -TEST_CASE_METHOD(Fix_Perm16, "Coxeter Length of 1000 Perm16", "[Perm16][000]") { +TEST_CASE_METHOD(Fix_Perm16, "Coxeter Length of 1000 Perm16", "[Perm16][002]") { BENCHMARK_MEM_FN(length_ref, sample_Perm16); BENCHMARK_MEM_FN(length_arr, sample_Perm16); BENCHMARK_MEM_FN(length, sample_Perm16); } TEST_CASE_METHOD(Fix_Perm16, "Number of descents of 1000 Perm16", - "[Perm16][000]") { + "[Perm16][003]") { BENCHMARK_MEM_FN(nb_descents_ref, sample_Perm16); BENCHMARK_MEM_FN(nb_descents, sample_Perm16); } TEST_CASE_METHOD(Fix_Perm16, "Number of cycles of 1000 Perm16", - "[Perm16][000]") { + "[Perm16][004]") { BENCHMARK_MEM_FN(nb_cycles_ref, sample_Perm16); BENCHMARK_MEM_FN(nb_cycles, sample_Perm16); } TEST_CASE_METHOD(Fix_Perm16, "Weak order comparison of 1600 pairs of Perm16", - "[Perm16][000]") { + "[Perm16][005]") { BENCHMARK_MEM_FN_PAIR(left_weak_leq_ref, sample_pair_Perm16); BENCHMARK_MEM_FN_PAIR(left_weak_leq_length, sample_pair_Perm16); BENCHMARK_MEM_FN_PAIR(left_weak_leq, sample_pair_Perm16); } -TEST_CASE_METHOD(Fix_Perm16, "Rank of 1000 PTransf16", - "[PTransf16][000]") { +TEST_CASE_METHOD(Fix_Perm16, "Rank of 1000 PTransf16", "[PTransf16][006]") { BENCHMARK_MEM_FN(rank_ref, sample_Transf16); + BENCHMARK_MEM_FN(rank_cmpestrm, sample_Transf16); BENCHMARK_MEM_FN(rank, sample_Transf16); } - diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index e1f5a56b..5b74b239 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -99,6 +99,8 @@ struct alignas(16) PTransf16 : public Vect16 { uint32_t rank_ref() const; /** Returns the size of the image of \c *this */ uint32_t rank() const; + //! Returns the size of the image of \c *this + uint32_t rank_cmpestrm() const; /** Returns a mask for the fix point of \c *this */ epu8 fix_points_mask(bool complement = false) const; diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index d276e661..3e3f96ee 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -74,10 +74,19 @@ inline uint32_t PTransf16::rank_ref() const { tmp[x] = 1; return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0)); } -inline uint32_t PTransf16::rank() const { + +inline uint32_t PTransf16::rank_cmpestrm() const { return __builtin_popcountl(image_bitset()); } +inline uint32_t PTransf16::rank() const { +#ifdef SIMDE_X86_SSE4_2_NATIVE + return rank_cmpestrm(); +#else + return rank_ref(); +#endif +} + inline epu8 PTransf16::fix_points_mask(bool complement) const { return complement ? v != one().v : v == one().v; } From e9c12e84bfb461ff79a07027b601de6f635910dc Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 1 Nov 2023 15:11:26 +0000 Subject: [PATCH 074/113] Add missing copy constructors + improved benchmark --- benchmark/bench_epu8.cpp | 12 +++++++--- benchmark/bench_fixture.hpp | 16 +++++++++++-- benchmark/bench_main.hpp | 6 ++--- include/hpcombi/epu.hpp | 2 ++ include/hpcombi/epu_impl.hpp | 8 +++++++ include/hpcombi/vect16.hpp | 2 ++ include/hpcombi/vect_generic.hpp | 8 +++++-- tests/test_epu.cpp | 19 ++++++++------- tests/test_main.hpp | 40 ++++++++++++++++++++++++-------- 9 files changed, 84 insertions(+), 29 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index db17a42b..b60124fc 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -51,11 +51,11 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; - static const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, + static constexpr const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; - static const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, + static constexpr const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; - static const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, + static constexpr const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { @@ -147,6 +147,12 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); } + +TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { + BENCHMARK_FREE_FN_PAIR(HPCombi::permuted_ref, pairs); + BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); +} + /* int Bench_hsum() { myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 5141cce6..8d2db5d0 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -22,7 +22,7 @@ using HPCombi::epu8; -constexpr uint_fast64_t size = 1000; +constexpr uint_fast64_t size = 10; // constexpr uint_fast64_t repeat = 100; std::vector rand_epu8(size_t sz) { @@ -57,15 +57,27 @@ std::vector rand_transf(int sz) { return res; } +std::vector> make_pair_sample(size_t sz) { + std::vector> res{}; + for (size_t i = 0; i < sz; i++) { + res.push_back(std::make_pair(HPCombi::random_epu8(15), + HPCombi::random_epu8(15))); + } + return res; +} + class Fix_epu8 { public: Fix_epu8() : vects(rand_epu8(size)), transf(rand_transf(size)), - perms(rand_perms(size)) {} + perms(rand_perms(size)), + pairs(make_pair_sample(size)) + {} ~Fix_epu8() {} const std::vector vects; const std::vector transf; const std::vector perms; + const std::vector> pairs; }; #endif // BENCH_FIXTURE diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp index fcb49f35..3d8cb90b 100644 --- a/benchmark/bench_main.hpp +++ b/benchmark/bench_main.hpp @@ -48,10 +48,10 @@ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ +#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ + BENCHMARK(#free_fn) { \ for (auto &pair : sample) { \ - volatile auto val = pair.first.mem_fn(pair.second); \ + volatile auto val = free_fn(pair.first, pair.second); \ } \ return true; \ }; diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 69988adf..5f1fbe88 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -172,6 +172,8 @@ inline bool equal(epu8 a, epu8 b) noexcept { /** Non equality of #HPCombi::epu8 */ inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); } +/** Permuting a #HPCombi::epu8 */ +inline epu8 permuted_ref(epu8 a, epu8 b) noexcept; /** Permuting a #HPCombi::epu8 */ inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index c7369458..be83c3b4 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -49,6 +49,14 @@ namespace HPCombi { /// Implementation part for inline functions ////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// +/** Permuting a #HPCombi::epu8 */ +inline epu8 permuted_ref(epu8 a, epu8 b) noexcept { + epu8 res; + for (uint64_t i = 0; i < 16; i++) + res[i] = a[b[i] & 0xF]; + return res; +} + // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp index d0e5930b..49c3e760 100644 --- a/include/hpcombi/vect16.hpp +++ b/include/hpcombi/vect16.hpp @@ -33,6 +33,8 @@ struct alignas(16) Vect16 { epu8 v; Vect16() = default; + constexpr Vect16(const Vect16 &v) = default; + constexpr Vect16(epu8 x) : v(x) {} Vect16(std::initializer_list il, uint8_t def = 0) : v(Epu8(il, def)) {} diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 36f309d0..5e1e1558 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -47,6 +47,8 @@ template struct VectGeneric { array v; VectGeneric() = default; + constexpr VectGeneric(const VectGeneric &v) = default; + VectGeneric(const std::array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { HPCOMBI_ASSERT(il.size() <= Size); @@ -106,8 +108,10 @@ template struct VectGeneric { VectGeneric permuted(const VectGeneric &u) const { VectGeneric res; - for (uint64_t i = 0; i < Size; i++) - res[i] = v[u[i]]; + for (uint64_t i = 0; i < Size; i++) { + if (u[i] < Size) + res[i] = v[u[i]]; + } return res; } diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 6b31f0ef..2079a60f 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -200,6 +200,7 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } +TEST_AGREES2_FUN_EPU8(Fix, permuted, permuted_ref, v, "[Epu8][011]") TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { CHECK_THAT(shifted_left(P01), Equals(P10)); @@ -441,10 +442,10 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_round, v, +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { CHECK(horiz_max_ref(zero) == 0); @@ -485,9 +486,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { CHECK(horiz_min_ref(zero) == 0); @@ -529,9 +530,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 475e3fba..3c93e6a2 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -23,32 +23,34 @@ #include #include -#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK(fun(p) == ref(p)); \ + CHECK(p.fun() == p.ref()); \ } \ } -#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK_THAT(fun(p), Equals(ref(p))); \ + CHECK(fun(p) == ref(p)); \ } \ } -#define TEST_AGREES(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES2(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ - for (auto p : vct) { \ - CHECK(p.fun() == p.ref()); \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK(p1.fun(p2) == p1.ref(p2)); \ + } \ } \ } -#define TEST_AGREES2(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES2_FUN(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p1 : vct) { \ for (auto p2 : vct) { \ - CHECK(p1.fun(p2) == p1.ref(p2)); \ + CHECK(fun(p1, p2) == ref(p1, p2)); \ } \ } \ } @@ -60,13 +62,31 @@ } \ } -#define TEST_AGREES_EPU8_FUN(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } +#define TEST_AGREES2_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK_THAT(p1.fun(p2), Equals(p1.ref(p2))); \ + } \ + } \ + } + +#define TEST_AGREES2_FUN_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK_THAT(fun(p1, p2), Equals(ref(p1, p2))); \ + } \ + } \ + } + struct Equals : Catch::Matchers::MatcherGenericBase { Equals(HPCombi::epu8 v) : v(v) {} From 32ce1be4406161c6e95524531f107cccc3f280e0 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 15:06:18 +0000 Subject: [PATCH 075/113] Add benchmark plot script --- benchmark/bench_bmat8.cpp | 15 ++-- benchmark/bench_epu8.cpp | 58 ++++++++-------- etc/bench_plot.py | 139 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 40 deletions(-) create mode 100755 etc/bench_plot.py diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index ebd4960a..dd7efccc 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -57,8 +57,7 @@ class Fix_BMat8 { pair_sample; // not const, transpose2 is in place }; -TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", - "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Row space size", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size_ref, sample); BENCHMARK_MEM_FN(row_space_size_bitset, sample); BENCHMARK_MEM_FN(row_space_size_incl1, sample); @@ -66,14 +65,13 @@ TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", BENCHMARK_MEM_FN(row_space_size, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose benchmarks 1000 BMat8", "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose", "[BMat8][000]") { BENCHMARK_MEM_FN(transpose, sample); BENCHMARK_MEM_FN(transpose_mask, sample); BENCHMARK_MEM_FN(transpose_maskd, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR_EQ(transpose, pair_sample); BENCHMARK_MEM_FN_PAIR_EQ(transpose_mask, pair_sample); BENCHMARK_MEM_FN_PAIR_EQ(transpose_maskd, pair_sample); @@ -86,16 +84,13 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", }; } -TEST_CASE_METHOD(Fix_BMat8, "Inclusion of row spaces benchmarks 1000 BMat8", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Row spaces inclusion", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR(row_space_included_ref, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included_bitset, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included, pair_sample); } -TEST_CASE_METHOD(Fix_BMat8, - "Inclusion of row spaces benchmarks 1000 BMat8 by pairs", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Pair row space inclusion", "[BMat8][002]") { BENCHMARK("rotating pairs implementation") { for (auto &pair : pair_sample) { auto res = BMat8::row_space_included2(pair.first, pair.second, diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index b60124fc..91c426a4 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -52,11 +52,11 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; static constexpr const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, - 9, 8, 11, 10, 13, 12, 15, 14}; + 9, 8, 11, 10, 13, 12, 15, 14}; static constexpr const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, - 7, 10, 9, 12, 11, 14, 13, 15}; + 7, 10, 9, 12, 11, 14, 13, 15}; static constexpr const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, - 0, FF, 0, FF, 0, FF, 0, FF}; + 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { b = HPCombi::permuted(a, even); @@ -79,7 +79,7 @@ inline epu8 insertion_sort(epu8 p) { return p; } -inline epu8 radix_sort(epu8 p) { +__attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { auto &a = HPCombi::as_array(p); std::array stat{}; for (int i = 0; i < 16; i++) @@ -111,7 +111,7 @@ static const epu8 bla = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; } // namespace -TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { +TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda | perms", std_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", std_sort, Fix_epu8::perms); @@ -147,7 +147,6 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); } - TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { BENCHMARK_FREE_FN_PAIR(HPCombi::permuted_ref, pairs); BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); @@ -155,21 +154,18 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { /* int Bench_hsum() { - myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_ref2_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_ref3_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - - myBench("hsum_ref_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_gen_nolmbd", HPCombi::horiz_sum_gen, sample.perms); - myBench("hsum_sum4_nolmbd", HPCombi::horiz_sum4, sample.perms); - myBench("hsum_sum3_nolmbd", HPCombi::horiz_sum3, sample.perms); - - MYBENCH("hsum_ref_lmbd", HPCombi::horiz_sum_ref, sample.perms); - MYBENCH("hsum_gen_lmbd", HPCombi::horiz_sum_gen, sample.perms); - MYBENCH("hsum_sum4_lmbd", HPCombi::horiz_sum4, sample.perms); - MYBENCH("hsum_sum3_lmbd", HPCombi::horiz_sum3, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { + BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum3, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda ", horiz_sum_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum3, Fix_epu8::perms); } +/* // ################################################################################## int Bench_psum() { @@ -213,8 +209,9 @@ int Bench_pmax() { myBench("pmax_ref3_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref_nolmbd", HPCombi::partial_max_ref, sample.perms); - // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, sample.perms); - myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, sample.perms); + // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, +sample.perms); myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, +sample.perms); MYBENCH("pmax_ref_lmbd", HPCombi::partial_max_ref, sample.perms); // MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); @@ -248,8 +245,9 @@ int Bench_pmin() { myBench("pmin_ref3_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref_nolmbd", HPCombi::partial_min_ref, sample.perms); - // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, sample.perms); - myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, sample.perms); + // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, +sample.perms); myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, +sample.perms); MYBENCH("pmin_ref_lmbd", HPCombi::partial_min_ref, sample.perms); // MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); @@ -282,17 +280,17 @@ int Bench_eval() { ################################################################################## int Bench_first_diff() { MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); - MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, sample.perms); - MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, sample.perms); - return 0; + MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, +sample.perms); MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, +sample.perms); return 0; } // ################################################################################## int Bench_last_diff() { MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); - MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, sample.perms); - MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, sample.perms); - return 0; + MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, +sample.perms); MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, +sample.perms); return 0; } */ } // namespace HPCombi diff --git a/etc/bench_plot.py b/etc/bench_plot.py new file mode 100755 index 00000000..f619bf42 --- /dev/null +++ b/etc/bench_plot.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +import os +import re +import statistics as stats +import sys + +import matplotlib +import numpy as np +from bs4 import BeautifulSoup +from matplotlib import pyplot as plt + +# This file should be from libsemigroups/etc + +matplotlib.rcParams["mathtext.fontset"] = "stix" +matplotlib.rcParams["font.family"] = "STIXGeneral" + +color = [ + (238 / 255, 20 / 255, 135 / 255), + (0 / 255, 221 / 255, 164 / 255), + (86 / 255, 151 / 255, 209 / 255), + (249 / 255, 185 / 255, 131 / 255), + (150 / 255, 114 / 255, 196 / 255), +] + +# Filenames should be: name.something.xml -> name.png + + +def normalize_xml(xml_fnam): + with open(xml_fnam, "r") as f: + xml = f.read() + xml = re.sub("<", "<", xml) + with open(xml_fnam, "w") as f: + f.write(xml) + + +def xml_stdout_get(xml, name): + try: + return xml.find("StdOut").find(name)["value"] + except (KeyError, TypeError, AttributeError): + return None + + +def time_unit(Y): + time_units = ("microseconds", "milliseconds", "seconds") + index = 0 + + while all(y > 1000 for y in Y) and index < len(time_units): + index += 1 + Y = [y / 1000 for y in Y] + return time_units[index], Y + +def add_plot(xml_fnam, num_bars=4): + global color; + current_bar = 0 + Y = [] + Y_for_comparison = None + labels = [] + + xml = BeautifulSoup(open(xml_fnam, "r"), "xml") + total_cols = 0 + xticks_label = [] + xticks_pos = [] + for x, test_case in enumerate(xml.find_all("TestCase")): + results = test_case.find_all("BenchmarkResults") + Y = ( + np.array([float(x.find("mean")["value"]) for x in results]) / 1 + ) # times in nanoseconds + X = np.arange(total_cols + 1, total_cols + len(Y) + 1, 1) + xticks_label.append(("\n" * (x % 2)) + test_case["name"]) + xticks_pos.append(total_cols + 1 + (len(Y) / 2) - 0.5) + bars = plt.bar( + X, + Y, + 1, + align="center", + color=color[:len(Y)], + ) + total_cols += len(Y) + 1 + plt.yscale("log", nonpositive="clip") + plt.ylabel("Time in ns") + plt.xticks(xticks_pos, xticks_label) + # plt.legend(loc="upper right") + + # print(Y) + # width = 1 + + + # plt.axhline( + # stats.mean(Y), + # color=color[current_bar], + # linestyle="--", + # lw=1, + # xmin=0.01, + # xmax=0.99, + # ) + + # current_bar += 1 + # if current_bar == num_bars - 1: + # Ys = zip(*sorted(zip(*Ys))) + # for i, Y in enumerate(Ys): + # X = np.arange(i, num_bars * len(Y), num_bars) + # bars = plt.bar( + # X, + # Y, + # width, + # align="center", + # color=color[i], + # label=labels[i], + # ) + # plt.xticks( + # np.arange(1, num_bars * (len(X) + 1), num_bars * 20), + # np.arange(0, len(X) + num_bars - 1, 20), + # ) + # plt.xlabel("Test case") + # plt.ylabel("Time (relative)") + # plt.legend(loc="upper left") + +def check_filename(xml_fnam): + if len(xml_fnam.split(".")) < 2: + raise ValueError( + f"expected filename of form x.xml found {xml_fnam}" + ) + + +from sys import argv + +args = sys.argv[1:] + +for x in args: + check_filename(x) + # TODO more arg checks +for x in args: + add_plot(x) +xml_fnam = args[0] +png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" +print("Writing {} . . .".format(png_fnam)) +plt.savefig(png_fnam, format="png", dpi=300) +sys.exit(0) From 949e41c31fa82efa4cf23b1c95822600d2b81271 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 15:12:23 +0000 Subject: [PATCH 076/113] Finalize bench_epu8 --- benchmark/bench_epu8.cpp | 210 +++++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 106 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 91c426a4..a577ac2b 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -31,17 +31,17 @@ namespace { struct RoundsMask { constexpr RoundsMask() : arr() { - for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) - arr[i] = HPCombi::sorting_rounds[i] < HPCombi::epu8id; + for (unsigned i = 0; i < sorting_rounds.size(); ++i) + arr[i] = sorting_rounds[i] < epu8id; } - epu8 arr[HPCombi::sorting_rounds.size()]; + epu8 arr[sorting_rounds.size()]; }; const auto rounds_mask = RoundsMask(); inline epu8 sort_pair(epu8 a) { - for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) { - epu8 minab, maxab, b = HPCombi::permuted(a, HPCombi::sorting_rounds[i]); + for (unsigned i = 0; i < sorting_rounds.size(); ++i) { + epu8 minab, maxab, b = permuted(a, sorting_rounds[i]); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]); @@ -59,11 +59,11 @@ inline epu8 sort_odd_even(epu8 a) { 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { - b = HPCombi::permuted(a, even); + b = permuted(a, even); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(minab, maxab, mask); - b = HPCombi::permuted(a, odd); + b = permuted(a, odd); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(maxab, minab, mask); @@ -72,7 +72,7 @@ inline epu8 sort_odd_even(epu8 a) { } inline epu8 insertion_sort(epu8 p) { - auto &a = HPCombi::as_array(p); + auto &a = as_array(p); for (int i = 0; i < 16; i++) for (int j = i; j > 0 && a[j] < a[j - 1]; j--) std::swap(a[j], a[j - 1]); @@ -80,7 +80,7 @@ inline epu8 insertion_sort(epu8 p) { } __attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { - auto &a = HPCombi::as_array(p); + auto &a = as_array(p); std::array stat{}; for (int i = 0; i < 16; i++) stat[a[i]]++; @@ -92,18 +92,18 @@ __attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { } inline epu8 std_sort(epu8 &p) { - auto &ar = HPCombi::as_array(p); + auto &ar = as_array(p); std::sort(ar.begin(), ar.end()); return p; } inline epu8 arr_sort(epu8 &p) { - auto &ar = HPCombi::as_array(p); - return HPCombi::from_array(HPCombi::sorted_vect(ar)); + auto &ar = as_array(p); + return from_array(sorted_vect(ar)); } inline epu8 gen_sort(epu8 p) { - HPCombi::as_VectGeneric(p).sort(); + as_VectGeneric(p).sort(); return p; } @@ -133,8 +133,8 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda | perms", sort_pair, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", sort_pair, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", HPCombi::sorted, Fix_epu8::perms); - BENCHMARK_LAMBDA("| lambda | perms", HPCombi::sorted, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", sorted, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", sorted, Fix_epu8::perms); // lambda function is needed for inlining @@ -144,7 +144,7 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda | vects", insertion_sort, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", sort_odd_even, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", sort_pair, Fix_epu8::vects); - BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", sorted, Fix_epu8::vects); } TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { @@ -165,132 +165,130 @@ TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda ", horiz_sum4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda ", horiz_sum3, Fix_epu8::perms); } + +TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { + + BENCHMARK_FREE_FN("| no lambda", partial_sums_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_sums_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_sums_round, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_sums_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_sums_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms); +} /* // -################################################################################## -int Bench_psum() { - myBench("psum_ref1_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_ref2_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_ref3_nolmbd", HPCombi::partial_sums_ref, sample.perms); - - myBench("psum_ref_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_gen_nolmbd", HPCombi::partial_sums_gen, sample.perms); - myBench("psum_rnd_nolmbd", HPCombi::partial_sums_round, sample.perms); - - MYBENCH("psum_ref_lmbd", HPCombi::partial_sums_ref, sample.perms); - MYBENCH("psum_gen_lmbd", HPCombi::partial_sums_gen, sample.perms); - MYBENCH("psum_rnd_lmbd", HPCombi::partial_sums_round, sample.perms); - return 0; -} // ################################################################################## int Bench_hmax() { - myBench("hmax_ref1_nolmbd", HPCombi::horiz_max_ref, sample.perms); - myBench("hmax_ref2_nolmbd", HPCombi::horiz_max_ref, sample.perms); - myBench("hmax_ref3_nolmbd", HPCombi::horiz_max_ref, sample.perms); - - myBench("hmax_ref_nolmbd", HPCombi::horiz_max_ref, sample.perms); - // myBench("hmax_gen_nolmbd", HPCombi::horiz_max_gen, sample.perms); - myBench("hmax_max4_nolmbd", HPCombi::horiz_max4, sample.perms); - myBench("hmax_max3_nolmbd", HPCombi::horiz_max3, sample.perms); - - MYBENCH("hmax_ref_lmbd", HPCombi::horiz_max_ref, sample.perms); - // MYBENCH("hmax_gen_lmbd", HPCombi::horiz_max_gen, sample.perms); - MYBENCH("hmax_max4_lmbd", HPCombi::horiz_max4, sample.perms); - MYBENCH("hmax_max3_lmbd", HPCombi::horiz_max3, sample.perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max4, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max3, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", horiz_max_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", horiz_max_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_max4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_pmax() { - myBench("pmax_ref1_nolmbd", HPCombi::partial_max_ref, sample.perms); - myBench("pmax_ref2_nolmbd", HPCombi::partial_max_ref, sample.perms); - myBench("pmax_ref3_nolmbd", HPCombi::partial_max_ref, sample.perms); - - myBench("pmax_ref_nolmbd", HPCombi::partial_max_ref, sample.perms); - // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, -sample.perms); myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, -sample.perms); - - MYBENCH("pmax_ref_lmbd", HPCombi::partial_max_ref, sample.perms); - // MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); - MYBENCH("pmax_rnd_lmbd", HPCombi::partial_max_round, sample.perms); - return 0; + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_round, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_max_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_max_gen, +Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_round, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_hmin() { - myBench("hmin_ref1_nolmbd", HPCombi::horiz_min_ref, sample.perms); - myBench("hmin_ref2_nolmbd", HPCombi::horiz_min_ref, sample.perms); - myBench("hmin_ref3_nolmbd", HPCombi::horiz_min_ref, sample.perms); - - myBench("hmin_ref_nolmbd", HPCombi::horiz_min_ref, sample.perms); - // myBench("hmin_gen_nolmbd", HPCombi::horiz_min_gen, sample.perms); - myBench("hmin_min4_nolmbd", HPCombi::horiz_min4, sample.perms); - myBench("hmin_min3_nolmbd", HPCombi::horiz_min3, sample.perms); - - MYBENCH("hmin_ref_lmbd", HPCombi::horiz_min_ref, sample.perms); - // MYBENCH("hmin_gen_lmbd", HPCombi::horiz_min_gen, sample.perms); - MYBENCH("hmin_min4_lmbd", HPCombi::horiz_min4, sample.perms); - MYBENCH("hmin_min3_lmbd", HPCombi::horiz_min3, sample.perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min3, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", horiz_min_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", horiz_min_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_min4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_pmin() { - myBench("pmin_ref1_nolmbd", HPCombi::partial_min_ref, sample.perms); - myBench("pmin_ref2_nolmbd", HPCombi::partial_min_ref, sample.perms); - myBench("pmin_ref3_nolmbd", HPCombi::partial_min_ref, sample.perms); - - myBench("pmin_ref_nolmbd", HPCombi::partial_min_ref, sample.perms); - // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, -sample.perms); myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, -sample.perms); - - MYBENCH("pmin_ref_lmbd", HPCombi::partial_min_ref, sample.perms); - // MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); - MYBENCH("pmin_rnd_lmbd", HPCombi::partial_min_round, sample.perms); - return 0; + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_min_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_min_gen, +Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_round, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_eval() { - myBench("eval_ref1_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_ref2_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_ref3_nolmbd", HPCombi::eval16_ref, sample.perms); - - myBench("eval_ref_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_gen_nolmbd", HPCombi::eval16_gen, sample.perms); - myBench("eval_popcnt_nolmbd", HPCombi::eval16_popcount, sample.perms); - myBench("eval_arr_nolmbd", HPCombi::eval16_arr, sample.perms); - myBench("eval_cycle_nolmbd", HPCombi::eval16_cycle, sample.perms); - - MYBENCH("eval_ref_lmbd", HPCombi::eval16_ref, sample.perms); - MYBENCH("eval_gen_lmbd", HPCombi::eval16_gen, sample.perms); - MYBENCH("eval_popcnt_lmbd", HPCombi::eval16_popcount, sample.perms); - MYBENCH("eval_arr_lmbd", HPCombi::eval16_arr, sample.perms); - MYBENCH("eval_cycle_lmbd", HPCombi::eval16_cycle, sample.perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_arr, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_cycle, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_popcount, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_arr, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_first_diff() { - MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); - MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, -sample.perms); MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, -sample.perms); return 0; + MYBENCH2("firstDiff_ref_lmbd", first_diff_ref, Fix_epu8::perms); + MYBENCH2("firstDiff_cmpstr_lmbd", first_diff_cmpstr, +Fix_epu8::perms); MYBENCH2("firstDiff_mask_lmbd", first_diff_mask, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_last_diff() { - MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); - MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, -sample.perms); MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, -sample.perms); return 0; + MYBENCH2("lastDiff_ref_lmbd", last_diff_ref, Fix_epu8::perms); + MYBENCH2("lastDiff_cmpstr_lmbd", last_diff_cmpstr, +Fix_epu8::perms); MYBENCH2("lastDiff_mask_lmbd", last_diff_mask, +Fix_epu8::perms); return 0; } */ } // namespace HPCombi From 2d04dc47cc91bf67c65a937f2b971e93d9deb1ab Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 1 Nov 2023 15:32:25 +0000 Subject: [PATCH 077/113] Handling combinatorial explosion in BENCHMARK_... --- benchmark/bench_main.hpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp index 3d8cb90b..db4841cf 100644 --- a/benchmark/bench_main.hpp +++ b/benchmark/bench_main.hpp @@ -38,20 +38,34 @@ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ + for (auto &pair : sample) { \ + volatile auto val = pair.first.mem_fn(pair.second); \ + } \ + return true; \ + }; + +#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ + BENCHMARK(#free_fn) { \ + for (auto &pair : sample) { \ + volatile auto val = free_fn(pair.first, pair.second); \ + } \ + return true; \ + }; + +#define BENCHMARK_FREE_FN_PAIR_EQ(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - auto val = \ - std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ - REQUIRE(val.first == val.second); \ + REQUIRE(free_fn(pair.first) == free_fn(pair.second)); \ } \ return true; \ }; -#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ - BENCHMARK(#free_fn) { \ +#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - volatile auto val = free_fn(pair.first, pair.second); \ + REQUIRE(pair.first.mem_fn() == pair.second.mem_fn()); \ } \ return true; \ }; From f14a337b0d33bfb53191e011fd2b33c24ae1b902 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 15:12:23 +0000 Subject: [PATCH 078/113] Finalize bench_epu8 --- benchmark/bench_epu8.cpp | 102 ++++++++++++----------------------- benchmark/bench_fixture.hpp | 11 ++-- benchmark/bench_main.hpp | 29 +++++----- include/hpcombi/epu_impl.hpp | 5 +- 4 files changed, 57 insertions(+), 90 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index a577ac2b..91ae319c 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -152,8 +152,6 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); } -/* -int Bench_hsum() { TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms); @@ -176,90 +174,59 @@ TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", partial_sums_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms); } -/* -// -// -################################################################################## -int Bench_hmax() { +TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max4, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max3, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max3, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max_ref, Fix_epu8::perms); // BENCHMARK_LAMBDA("| lambda", horiz_max_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_pmax() { + +TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_round, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_round, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_ref, Fix_epu8::perms); - // BENCHMARK_LAMBDA("| lambda", partial_max_gen, -Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_round, -Fix_epu8::perms); return 0; + // BENCHMARK_LAMBDA("| lambda", partial_max_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_max_round, Fix_epu8::perms); } -// -################################################################################## -int Bench_hmin() { - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); +TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min3, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min3, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min_ref, Fix_epu8::perms); // BENCHMARK_LAMBDA("| lambda", horiz_min_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_pmin() { - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); +TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round, -Fix_epu8::perms); - + // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_round, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_min_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_ref, Fix_epu8::perms); - // BENCHMARK_LAMBDA("| lambda", partial_min_gen, -Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_round, -Fix_epu8::perms); return 0; + BENCHMARK_LAMBDA("| lambda", partial_min_round, Fix_epu8::perms); } -// -################################################################################## -int Bench_eval() { - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - +TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms); @@ -271,24 +238,21 @@ int Bench_eval() { BENCHMARK_LAMBDA("| lambda", eval16_popcount, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", eval16_arr, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_first_diff() { - MYBENCH2("firstDiff_ref_lmbd", first_diff_ref, Fix_epu8::perms); - MYBENCH2("firstDiff_cmpstr_lmbd", first_diff_cmpstr, -Fix_epu8::perms); MYBENCH2("firstDiff_mask_lmbd", first_diff_mask, -Fix_epu8::perms); return 0; +TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][000]") { + BENCHMARK_LAMBDA2("| lambda", first_diff_ref, Fix_epu8::pairs); +#ifdef SIMDE_X86_SSE4_2_NATIVE + BENCHMARK_LAMBDA2("| lambda", first_diff_cmpstr, Fix_epu8::pairs); +#endif + BENCHMARK_LAMBDA2("| lambda", first_diff_mask, Fix_epu8::pairs); } -// -################################################################################## -int Bench_last_diff() { - MYBENCH2("lastDiff_ref_lmbd", last_diff_ref, Fix_epu8::perms); - MYBENCH2("lastDiff_cmpstr_lmbd", last_diff_cmpstr, -Fix_epu8::perms); MYBENCH2("lastDiff_mask_lmbd", last_diff_mask, -Fix_epu8::perms); return 0; -} */ +TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") { + BENCHMARK_LAMBDA2("| lambda", last_diff_ref, Fix_epu8::pairs); +#ifdef SIMDE_X86_SSE4_2_NATIVE + BENCHMARK_LAMBDA2("| lambda", last_diff_cmpstr, Fix_epu8::pairs); +#endif + BENCHMARK_LAMBDA2("| lambda", last_diff_mask, Fix_epu8::pairs); +} } // namespace HPCombi diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 8d2db5d0..79f642f5 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -60,19 +60,16 @@ std::vector rand_transf(int sz) { std::vector> make_pair_sample(size_t sz) { std::vector> res{}; for (size_t i = 0; i < sz; i++) { - res.push_back(std::make_pair(HPCombi::random_epu8(15), - HPCombi::random_epu8(15))); + res.emplace_back(HPCombi::random_epu8(15), HPCombi::random_epu8(15)); } return res; } class Fix_epu8 { public: - Fix_epu8() : vects(rand_epu8(size)), - transf(rand_transf(size)), - perms(rand_perms(size)), - pairs(make_pair_sample(size)) - {} + Fix_epu8() + : vects(rand_epu8(size)), transf(rand_transf(size)), + perms(rand_perms(size)), pairs(make_pair_sample(size)) {} ~Fix_epu8() {} const std::vector vects; const std::vector transf; diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp index db4841cf..4cde8e0c 100644 --- a/benchmark/bench_main.hpp +++ b/benchmark/bench_main.hpp @@ -38,34 +38,39 @@ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ - for (auto &pair : sample) { \ - volatile auto val = pair.first.mem_fn(pair.second); \ +#define BENCHMARK_LAMBDA2(msg, free_fn, sample) \ + BENCHMARK(#free_fn " " msg) { \ + auto lambda__xxx = [](auto const &x, auto const &y) { \ + return free_fn(x, y); \ + }; \ + for (auto [x, y] : sample) { \ + volatile auto dummy = lambda__xxx(x, y); \ } \ return true; \ }; -#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ - BENCHMARK(#free_fn) { \ +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - volatile auto val = free_fn(pair.first, pair.second); \ + volatile auto val = pair.first.mem_fn(pair.second); \ } \ return true; \ }; -#define BENCHMARK_FREE_FN_PAIR_EQ(mem_fn, sample) \ +#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - REQUIRE(free_fn(pair.first) == free_fn(pair.second)); \ + auto val = \ + std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ + REQUIRE(val.first == val.second); \ } \ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ +#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ + BENCHMARK(#free_fn) { \ for (auto &pair : sample) { \ - REQUIRE(pair.first.mem_fn() == pair.second.mem_fn()); \ + volatile auto val = free_fn(pair.first, pair.second); \ } \ return true; \ }; diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index be83c3b4..70d5d140 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -215,10 +215,11 @@ inline epu8 sort8_perm(epu8 &a) noexcept { inline epu8 random_epu8(uint16_t bnd) { epu8 res; - std::random_device rd; - std::default_random_engine e1(rd()); + static std::random_device rd; + static std::default_random_engine e1(rd()); std::uniform_int_distribution uniform_dist(0, bnd - 1); + for (size_t i = 0; i < 16; i++) res[i] = uniform_dist(e1); return res; From 825841a9f91be06bd80fc55ee741737c8a5b108a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 16:07:13 +0000 Subject: [PATCH 079/113] Move old benchmark stuff into experiments --- benchmark/CMakeLists.txt | 2 +- {benchmark => experiments}/compilerinfo.hpp | 0 {benchmark => experiments}/cpu_x86.h | 0 {benchmark => experiments}/cpu_x86_Linux.ipp | 0 {benchmark => experiments}/cpu_x86_Windows.ipp | 0 {benchmark => experiments}/cpu_x86_impl.hpp | 0 {benchmark => experiments}/cycle.cpp | 0 {benchmark => experiments}/inverse.cpp | 0 {benchmark => experiments}/length.cpp | 0 {benchmark => experiments}/python/compare.py | 0 {benchmark => experiments}/python/compare_bench.py | 0 {benchmark => experiments}/python/gbench/__init__.py | 0 {benchmark => experiments}/python/gbench/report.py | 0 {benchmark => experiments}/python/gbench/util.py | 0 {benchmark => experiments}/sort.cpp | 0 {benchmark => experiments}/sum.cpp | 0 {benchmark => experiments}/testtools.hpp | 0 17 files changed, 1 insertion(+), 1 deletion(-) rename {benchmark => experiments}/compilerinfo.hpp (100%) rename {benchmark => experiments}/cpu_x86.h (100%) rename {benchmark => experiments}/cpu_x86_Linux.ipp (100%) rename {benchmark => experiments}/cpu_x86_Windows.ipp (100%) rename {benchmark => experiments}/cpu_x86_impl.hpp (100%) rename {benchmark => experiments}/cycle.cpp (100%) rename {benchmark => experiments}/inverse.cpp (100%) rename {benchmark => experiments}/length.cpp (100%) rename {benchmark => experiments}/python/compare.py (100%) rename {benchmark => experiments}/python/compare_bench.py (100%) rename {benchmark => experiments}/python/gbench/__init__.py (100%) rename {benchmark => experiments}/python/gbench/report.py (100%) rename {benchmark => experiments}/python/gbench/util.py (100%) rename {benchmark => experiments}/sort.cpp (100%) rename {benchmark => experiments}/sum.cpp (100%) rename {benchmark => experiments}/testtools.hpp (100%) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 66227de2..1f52983f 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -33,7 +33,7 @@ endif() message(STATUS "Building benchmark") set(benchmark_src - bench_epu8.cpp bench_perm16.cpp bench_bmat8.cpp sort.cpp inverse.cpp) + bench_epu8.cpp bench_perm16.cpp bench_bmat8.cpp) foreach(f ${benchmark_src}) get_filename_component(benchName ${f} NAME_WE) diff --git a/benchmark/compilerinfo.hpp b/experiments/compilerinfo.hpp similarity index 100% rename from benchmark/compilerinfo.hpp rename to experiments/compilerinfo.hpp diff --git a/benchmark/cpu_x86.h b/experiments/cpu_x86.h similarity index 100% rename from benchmark/cpu_x86.h rename to experiments/cpu_x86.h diff --git a/benchmark/cpu_x86_Linux.ipp b/experiments/cpu_x86_Linux.ipp similarity index 100% rename from benchmark/cpu_x86_Linux.ipp rename to experiments/cpu_x86_Linux.ipp diff --git a/benchmark/cpu_x86_Windows.ipp b/experiments/cpu_x86_Windows.ipp similarity index 100% rename from benchmark/cpu_x86_Windows.ipp rename to experiments/cpu_x86_Windows.ipp diff --git a/benchmark/cpu_x86_impl.hpp b/experiments/cpu_x86_impl.hpp similarity index 100% rename from benchmark/cpu_x86_impl.hpp rename to experiments/cpu_x86_impl.hpp diff --git a/benchmark/cycle.cpp b/experiments/cycle.cpp similarity index 100% rename from benchmark/cycle.cpp rename to experiments/cycle.cpp diff --git a/benchmark/inverse.cpp b/experiments/inverse.cpp similarity index 100% rename from benchmark/inverse.cpp rename to experiments/inverse.cpp diff --git a/benchmark/length.cpp b/experiments/length.cpp similarity index 100% rename from benchmark/length.cpp rename to experiments/length.cpp diff --git a/benchmark/python/compare.py b/experiments/python/compare.py similarity index 100% rename from benchmark/python/compare.py rename to experiments/python/compare.py diff --git a/benchmark/python/compare_bench.py b/experiments/python/compare_bench.py similarity index 100% rename from benchmark/python/compare_bench.py rename to experiments/python/compare_bench.py diff --git a/benchmark/python/gbench/__init__.py b/experiments/python/gbench/__init__.py similarity index 100% rename from benchmark/python/gbench/__init__.py rename to experiments/python/gbench/__init__.py diff --git a/benchmark/python/gbench/report.py b/experiments/python/gbench/report.py similarity index 100% rename from benchmark/python/gbench/report.py rename to experiments/python/gbench/report.py diff --git a/benchmark/python/gbench/util.py b/experiments/python/gbench/util.py similarity index 100% rename from benchmark/python/gbench/util.py rename to experiments/python/gbench/util.py diff --git a/benchmark/sort.cpp b/experiments/sort.cpp similarity index 100% rename from benchmark/sort.cpp rename to experiments/sort.cpp diff --git a/benchmark/sum.cpp b/experiments/sum.cpp similarity index 100% rename from benchmark/sum.cpp rename to experiments/sum.cpp diff --git a/benchmark/testtools.hpp b/experiments/testtools.hpp similarity index 100% rename from benchmark/testtools.hpp rename to experiments/testtools.hpp From 764482667064b35193e5573781e95ad400cab9dc Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 1 Nov 2023 16:25:18 +0000 Subject: [PATCH 080/113] Remove unescessary copy constr. and op= --- include/hpcombi/perm16.hpp | 8 +------- include/hpcombi/vect16.hpp | 8 -------- include/hpcombi/vect_generic.hpp | 9 +-------- 3 files changed, 2 insertions(+), 23 deletions(-) diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index 5b74b239..d8fc99e4 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -47,19 +47,13 @@ struct alignas(16) PTransf16 : public Vect16 { using array = typename decltype(Epu8)::array; PTransf16() = default; - constexpr PTransf16(const PTransf16 &v) = default; + constexpr PTransf16(const vect v) : Vect16(v) {} constexpr PTransf16(const epu8 x) : Vect16(x) {} PTransf16(std::vector dom, std::vector rng, size_t = 0 /* unused */); PTransf16(std::initializer_list il); - PTransf16 &operator=(const PTransf16 &) = default; - PTransf16 &operator=(const epu8 &vv) { - v = vv; - return *this; - } - //! Return whether \c *this is a well constructed object bool validate(size_t k = 16) const { return HPCombi::is_partial_transformation(v, k); diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp index 49c3e760..5ea4702d 100644 --- a/include/hpcombi/vect16.hpp +++ b/include/hpcombi/vect16.hpp @@ -33,19 +33,11 @@ struct alignas(16) Vect16 { epu8 v; Vect16() = default; - constexpr Vect16(const Vect16 &v) = default; - constexpr Vect16(epu8 x) : v(x) {} Vect16(std::initializer_list il, uint8_t def = 0) : v(Epu8(il, def)) {} constexpr operator epu8() const { return v; } - Vect16 &operator=(const Vect16 &) = default; - Vect16 &operator=(const epu8 &vv) { - v = vv; - return *this; - } - array &as_array() { return HPCombi::as_array(v); } const array &as_array() const { return HPCombi::as_array(v); } diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 5e1e1558..2ba82a75 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -47,21 +47,14 @@ template struct VectGeneric { array v; VectGeneric() = default; - constexpr VectGeneric(const VectGeneric &v) = default; - VectGeneric(const std::array &_v) : v(_v) {} // NOLINT + VectGeneric(const array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { HPCOMBI_ASSERT(il.size() <= Size); std::copy(il.begin(), il.end(), v.begin()); std::fill(v.begin() + il.size(), v.end(), def); } - VectGeneric &operator=(const VectGeneric &) = default; - VectGeneric &operator=(const array &vv) { - v = vv; - return *this; - } - Expo operator[](uint64_t i) const { return v[i]; } Expo &operator[](uint64_t i) { return v[i]; } From 5a282901f5e8a60acc2c3f63539da10ff04e0159 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 1 Nov 2023 16:41:33 +0000 Subject: [PATCH 081/113] Suppressed g++ wrong warnings -Wstringop-overflow --- include/hpcombi/bmat8_impl.hpp | 3 +++ tests/test_epu.cpp | 9 +++++++++ tests/test_perm16.cpp | 3 +++ 3 files changed, 15 insertions(+) diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index 394651b3..f92eb59b 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -449,7 +449,10 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { std::distance(prod_rows.begin(), std::find(prod_rows.begin(), prod_rows.end(), row)); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" std::iota(perm.begin() + nr_rows(), perm.end(), nr_rows()); +#pragma GCC diagnostic pop Perm16 res = Perm16::one(); for (size_t i = 0; i < 8; i++) diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 2079a60f..ac165b6d 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -257,6 +257,8 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { epu8 x = epu8id; CHECK(is_sorted(x)); auto &refx = as_array(x); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" while (std::next_permutation(refx.begin(), refx.begin() + 9)) { CHECK(!is_sorted(x)); } @@ -269,6 +271,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { while (std::next_permutation(refx.begin(), refx.begin() + 14)) { CHECK(!is_sorted(x)); } +#pragma GCC diagnostic pop } TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { @@ -283,7 +286,10 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { auto &refx = as_array(x); do { CHECK(is_sorted(sorted(x))); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" } while (std::next_permutation(refx.begin(), refx.begin() + 9)); +#pragma GCC diagnostic pop } TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { @@ -298,7 +304,10 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { auto &refx = as_array(x); do { CHECK(is_sorted(reverted(revsorted(x)))); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" } while (std::next_permutation(refx.begin(), refx.begin() + 9)); +#pragma GCC diagnostic pop } TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index fbb8b16d..8d13f868 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -27,7 +27,10 @@ std::vector all_perms(uint8_t sz) { epu8 x = HPCombi::epu8id; res.push_back(x); auto &refx = HPCombi::as_array(x); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" while (std::next_permutation(refx.begin(), refx.begin() + sz)) { +#pragma GCC diagnostic pop res.push_back(x); } return res; From 9f14f46631d1a989fdb030278f445c5a79c18dc9 Mon Sep 17 00:00:00 2001 From: reiniscirpons Date: Thu, 2 Nov 2023 01:32:52 +0000 Subject: [PATCH 082/113] Basic rewrite of benchmark plot generator --- etc/bench_plot.py | 109 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 91 insertions(+), 18 deletions(-) diff --git a/etc/bench_plot.py b/etc/bench_plot.py index f619bf42..d878a534 100755 --- a/etc/bench_plot.py +++ b/etc/bench_plot.py @@ -4,6 +4,7 @@ import re import statistics as stats import sys +from math import isqrt import matplotlib import numpy as np @@ -50,8 +51,9 @@ def time_unit(Y): Y = [y / 1000 for y in Y] return time_units[index], Y + def add_plot(xml_fnam, num_bars=4): - global color; + global color current_bar = 0 Y = [] Y_for_comparison = None @@ -74,7 +76,7 @@ def add_plot(xml_fnam, num_bars=4): Y, 1, align="center", - color=color[:len(Y)], + color=color[: len(Y)], ) total_cols += len(Y) + 1 plt.yscale("log", nonpositive="clip") @@ -85,7 +87,6 @@ def add_plot(xml_fnam, num_bars=4): # print(Y) # width = 1 - # plt.axhline( # stats.mean(Y), # color=color[current_bar], @@ -116,24 +117,96 @@ def add_plot(xml_fnam, num_bars=4): # plt.ylabel("Time (relative)") # plt.legend(loc="upper left") -def check_filename(xml_fnam): - if len(xml_fnam.split(".")) < 2: + +def determine_subplot_layout(nr_plots: int) -> tuple[int, int]: + """Determine the number of rows and columns from number of plots.""" + nr_plot_rows = isqrt(nr_plots) + nr_plot_cols = nr_plot_rows + if nr_plot_rows * nr_plot_cols < nr_plots: + nr_plot_cols += 1 + while nr_plot_rows * nr_plot_cols < nr_plots: + nr_plot_rows += 1 + return nr_plot_rows, nr_plot_cols + + +def process_result(result_soup) -> tuple[str, float]: + """Extract data from a single xml result entry. + + Returns + ------- + result_name: str + The test case name + result_time: float + The test case time in nanoseconds + """ + result_name = result_soup["name"] + if "name" not in result_soup.attrs: + raise ValueError( + f"Malformed benchmark file, result record does not contain 'name': {result_soup}" + ) + result_mean_soup = result_soup.find("mean") + if result_mean_soup is None: + raise ValueError( + f"Malformed benchmark file, result record does not contain 'mean': {result_soup}" + ) + if "value" not in result_mean_soup.attrs: raise ValueError( - f"expected filename of form x.xml found {xml_fnam}" + f"Malformed benchmark file, result 'mean' record does not contain 'value': {result_mean_soup}" ) + result_time = float(result_mean_soup["value"]) / 1 # time in nanoseconds + return result_name, result_time + + +def make_ax(ax, test_case_soup): + if "name" not in test_case_soup.attrs: + raise ValueError( + f"Malformed benchmark file, test_case record does not contain 'name': {test_case_soup}" + ) + results = test_case_soup.find_all("BenchmarkResults") + result_names, result_times = zip(*map(process_result, reversed(results))) + bars = ax.barh( + result_names, + result_times, + align="center", + color=color[: len(result_times)], + ) + test_name = test_case_soup["name"] + ax.set_title(f'Benchmark "{test_name}" runtime') + ax.set_xlabel(f"ns") + return ax + + +def make_fig(benchmark_soup): + test_cases = benchmark_soup.find_all("TestCase") + nr_plots = len(test_cases) + nr_plot_rows, nr_plot_cols = determine_subplot_layout(nr_plots) + fig, axs = plt.subplots(nr_plot_rows, nr_plot_cols) + for test_case_soup, ax in zip(test_cases, axs.flat): + ax = make_ax(ax, test_case_soup) + return fig + + +def check_filename(xml_fnam): + if len(xml_fnam.split(".")) < 2: + raise ValueError(f"expected filename of form x.xml found {xml_fnam}") + +if __name__ == "__main__": + args = sys.argv[1:] -from sys import argv + for x in args: + check_filename(x) + # TODO more arg checks -args = sys.argv[1:] + for x in args: + with open(x, "r") as in_file: + xml_text = in_file.read() + soup = BeautifulSoup(xml_text, "xml") + fig = make_fig(soup) + plt.show() -for x in args: - check_filename(x) - # TODO more arg checks -for x in args: - add_plot(x) -xml_fnam = args[0] -png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" -print("Writing {} . . .".format(png_fnam)) -plt.savefig(png_fnam, format="png", dpi=300) -sys.exit(0) + xml_fnam = args[0] + png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" + print("Writing {} . . .".format(png_fnam)) + plt.savefig(png_fnam, format="png", dpi=300) + sys.exit(0) From eeb2b41af8f57672018d141ea01973b19b500e94 Mon Sep 17 00:00:00 2001 From: reiniscirpons Date: Thu, 2 Nov 2023 04:22:56 +0000 Subject: [PATCH 083/113] Improve plot generation --- etc/bench_plot.py | 122 ++++++---------------------------------------- 1 file changed, 16 insertions(+), 106 deletions(-) diff --git a/etc/bench_plot.py b/etc/bench_plot.py index d878a534..6e6b080b 100755 --- a/etc/bench_plot.py +++ b/etc/bench_plot.py @@ -1,13 +1,8 @@ #!/usr/bin/env python3 - -import os -import re -import statistics as stats import sys from math import isqrt import matplotlib -import numpy as np from bs4 import BeautifulSoup from matplotlib import pyplot as plt @@ -16,7 +11,7 @@ matplotlib.rcParams["mathtext.fontset"] = "stix" matplotlib.rcParams["font.family"] = "STIXGeneral" -color = [ +colors = [ (238 / 255, 20 / 255, 135 / 255), (0 / 255, 221 / 255, 164 / 255), (86 / 255, 151 / 255, 209 / 255), @@ -27,97 +22,6 @@ # Filenames should be: name.something.xml -> name.png -def normalize_xml(xml_fnam): - with open(xml_fnam, "r") as f: - xml = f.read() - xml = re.sub("<", "<", xml) - with open(xml_fnam, "w") as f: - f.write(xml) - - -def xml_stdout_get(xml, name): - try: - return xml.find("StdOut").find(name)["value"] - except (KeyError, TypeError, AttributeError): - return None - - -def time_unit(Y): - time_units = ("microseconds", "milliseconds", "seconds") - index = 0 - - while all(y > 1000 for y in Y) and index < len(time_units): - index += 1 - Y = [y / 1000 for y in Y] - return time_units[index], Y - - -def add_plot(xml_fnam, num_bars=4): - global color - current_bar = 0 - Y = [] - Y_for_comparison = None - labels = [] - - xml = BeautifulSoup(open(xml_fnam, "r"), "xml") - total_cols = 0 - xticks_label = [] - xticks_pos = [] - for x, test_case in enumerate(xml.find_all("TestCase")): - results = test_case.find_all("BenchmarkResults") - Y = ( - np.array([float(x.find("mean")["value"]) for x in results]) / 1 - ) # times in nanoseconds - X = np.arange(total_cols + 1, total_cols + len(Y) + 1, 1) - xticks_label.append(("\n" * (x % 2)) + test_case["name"]) - xticks_pos.append(total_cols + 1 + (len(Y) / 2) - 0.5) - bars = plt.bar( - X, - Y, - 1, - align="center", - color=color[: len(Y)], - ) - total_cols += len(Y) + 1 - plt.yscale("log", nonpositive="clip") - plt.ylabel("Time in ns") - plt.xticks(xticks_pos, xticks_label) - # plt.legend(loc="upper right") - - # print(Y) - # width = 1 - - # plt.axhline( - # stats.mean(Y), - # color=color[current_bar], - # linestyle="--", - # lw=1, - # xmin=0.01, - # xmax=0.99, - # ) - - # current_bar += 1 - # if current_bar == num_bars - 1: - # Ys = zip(*sorted(zip(*Ys))) - # for i, Y in enumerate(Ys): - # X = np.arange(i, num_bars * len(Y), num_bars) - # bars = plt.bar( - # X, - # Y, - # width, - # align="center", - # color=color[i], - # label=labels[i], - # ) - # plt.xticks( - # np.arange(1, num_bars * (len(X) + 1), num_bars * 20), - # np.arange(0, len(X) + num_bars - 1, 20), - # ) - # plt.xlabel("Test case") - # plt.ylabel("Time (relative)") - # plt.legend(loc="upper left") - - def determine_subplot_layout(nr_plots: int) -> tuple[int, int]: """Determine the number of rows and columns from number of plots.""" nr_plot_rows = isqrt(nr_plots) @@ -168,21 +72,28 @@ def make_ax(ax, test_case_soup): result_names, result_times, align="center", - color=color[: len(result_times)], + color=[colors[i % len(colors)] for i in range(len(result_names))], ) test_name = test_case_soup["name"] ax.set_title(f'Benchmark "{test_name}" runtime') - ax.set_xlabel(f"ns") + ax.set_xlabel(f"time, ns") return ax -def make_fig(benchmark_soup): +def make_fig(benchmark_soup, plot_width_inches=7.5, plot_height_inches=5): test_cases = benchmark_soup.find_all("TestCase") nr_plots = len(test_cases) nr_plot_rows, nr_plot_cols = determine_subplot_layout(nr_plots) - fig, axs = plt.subplots(nr_plot_rows, nr_plot_cols) + fig, axs = plt.subplots( + nr_plot_rows, + nr_plot_cols, + figsize=(plot_width_inches * nr_plot_cols, plot_height_inches * nr_plot_rows), + ) for test_case_soup, ax in zip(test_cases, axs.flat): ax = make_ax(ax, test_case_soup) + for ax in axs.flat[nr_plots:]: + fig.delaxes(ax) + fig.tight_layout() return fig @@ -203,10 +114,9 @@ def check_filename(xml_fnam): xml_text = in_file.read() soup = BeautifulSoup(xml_text, "xml") fig = make_fig(soup) - plt.show() - xml_fnam = args[0] - png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" - print("Writing {} . . .".format(png_fnam)) - plt.savefig(png_fnam, format="png", dpi=300) + xml_fnam = x + png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" + print("Writing {} . . .".format(png_fnam)) + fig.savefig(png_fnam, format="png", dpi=300) sys.exit(0) From a58d6287959c8e01f57043be60fa56af66caf90e Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Thu, 2 Nov 2023 07:26:44 +0000 Subject: [PATCH 084/113] New implem is_permutation_eval --- benchmark/bench_epu8.cpp | 11 +++++++++++ include/hpcombi/epu.hpp | 4 ++++ include/hpcombi/epu_impl.hpp | 4 ++++ tests/test_epu.cpp | 12 ++++++++++-- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 91ae319c..1fc50b92 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -255,4 +255,15 @@ TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") { #endif BENCHMARK_LAMBDA2("| lambda", last_diff_mask, Fix_epu8::pairs); } + +TEST_CASE_METHOD(Fix_epu8, "is_permutation", "[Epu8][000]") { + BENCHMARK_FREE_FN("| no lambda", is_permutation_sort, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", is_permutation_eval, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", is_permutation, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", is_permutation_sort, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", is_permutation_eval, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", is_permutation, Fix_epu8::perms); +} + } // namespace HPCombi diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 5f1fbe88..1c6e393a 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -689,6 +689,10 @@ inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept; @par Algorithm: sort the vector and compare to identity */ inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept; +/** @copydoc common_is_permutation + @par Algorithm: uses evaluation + */ +inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept; /** @copydoc common_is_permutation @par Algorithm: architecture dependent */ diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index 70d5d140..dcf436f9 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -489,6 +489,10 @@ inline bool is_permutation_sort(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); return equal(sorted(v), epu8id) && (diff == 16 || diff < k); } +inline bool is_permutation_eval(epu8 v, const size_t k) noexcept { + uint64_t diff = last_diff(v, epu8id, 16); + return equal(eval16(v), Epu8({}, 1)) && (diff == 16 || diff < k); +} inline bool is_permutation(epu8 v, const size_t k) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index ac165b6d..f2181625 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -695,7 +695,7 @@ TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { - CHECK(is_permutation(x, i) == is_permutation_cmpestri(x, i)); + CHECK(is_permutation_cmpestri(x, i) == is_permutation(x, i)); } } } @@ -704,7 +704,15 @@ TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { - CHECK(is_permutation(x, i) == is_permutation_sort(x, i)); + CHECK(is_permutation_sort(x, i) == is_permutation(x, i)); + } + } +} + +TEST_CASE_METHOD(Fix, "is_permutation_eval", "[Epu8][080]") { + for (auto x : v) { + for (size_t i = 0; i < 16; i++) { + CHECK(is_permutation_eval(x, i) == is_permutation(x, i)); } } } From 2a6e3dc52ab599807a1efb4d99b8fd308fa3edce Mon Sep 17 00:00:00 2001 From: reiniscirpons Date: Thu, 2 Nov 2023 17:44:38 +0000 Subject: [PATCH 085/113] Add features for displaying speedup and naming plots to benchmark plotter --- etc/bench_plot.py | 92 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 9 deletions(-) diff --git a/etc/bench_plot.py b/etc/bench_plot.py index 6e6b080b..77ae1a62 100755 --- a/etc/bench_plot.py +++ b/etc/bench_plot.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import sys +import argparse from math import isqrt import matplotlib @@ -61,7 +62,7 @@ def process_result(result_soup) -> tuple[str, float]: return result_name, result_time -def make_ax(ax, test_case_soup): +def make_ax(ax, test_case_soup, plot_speedup_type: str): if "name" not in test_case_soup.attrs: raise ValueError( f"Malformed benchmark file, test_case record does not contain 'name': {test_case_soup}" @@ -74,13 +75,33 @@ def make_ax(ax, test_case_soup): align="center", color=[colors[i % len(colors)] for i in range(len(result_names))], ) + if plot_speedup_type == "slowest": + reference_time = max(result_times) + else: + # This is the first element, since we reverse due to horizontal plot + reference_time = result_times[-1] + # TODO: fix type issue + result_speedups = [reference_time / result_time for result_time in result_times] + ax.bar_label( + bars, + list(map("{:.1f}".format, result_speedups)), + padding=5, + fontsize="6", + ) test_name = test_case_soup["name"] ax.set_title(f'Benchmark "{test_name}" runtime') ax.set_xlabel(f"time, ns") return ax -def make_fig(benchmark_soup, plot_width_inches=7.5, plot_height_inches=5): +def make_fig( + benchmark_soup, + plot_width_inches: float = 7.5, + plot_height_inches: float = 5, + plot_title: None | str = None, + plot_speedup_type: str = "slowest", +): + assert plot_speedup_type is not None test_cases = benchmark_soup.find_all("TestCase") nr_plots = len(test_cases) nr_plot_rows, nr_plot_cols = determine_subplot_layout(nr_plots) @@ -90,10 +111,12 @@ def make_fig(benchmark_soup, plot_width_inches=7.5, plot_height_inches=5): figsize=(plot_width_inches * nr_plot_cols, plot_height_inches * nr_plot_rows), ) for test_case_soup, ax in zip(test_cases, axs.flat): - ax = make_ax(ax, test_case_soup) + ax = make_ax(ax, test_case_soup, plot_speedup_type) for ax in axs.flat[nr_plots:]: fig.delaxes(ax) - fig.tight_layout() + + if plot_title is not None: + fig.suptitle(args.title, fontsize=16, weight="bold") return fig @@ -103,20 +126,71 @@ def check_filename(xml_fnam): if __name__ == "__main__": - args = sys.argv[1:] + parser = argparse.ArgumentParser( + description="A tool for plotting HPCombi benchmark data", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "xml_filenames", + metavar="file", + type=str, + nargs="+", + help="The names of xml file(s) to be processed. If multiple files are provided, then plots each (with same title)", + ) + parser.add_argument( + "-o", + "--output", + type=str, + help="Output file name. If not provided then a custom naming method is used.", + ) + parser.add_argument( + "--title", + type=str, + help="The title of the plot.", + ) + parser.add_argument( + "--speedup", + type=str, + choices=["slowest", "first"], + default="slowest", + help="Speedup display type. 'slowest' compares to slowest benchmark. 'first' compares to first benchmark.", + ) + parser.add_argument( + "--width", + type=float, + default=7.5, + help="Single subplot width in inches.", + ) + parser.add_argument( + "--height", + type=float, + default=5.0, + help="Single subplot height in inches.", + ) + args = parser.parse_args() + print(args.title) - for x in args: + for x in args.xml_filenames: check_filename(x) # TODO more arg checks - for x in args: + for x in args.xml_filenames: with open(x, "r") as in_file: xml_text = in_file.read() soup = BeautifulSoup(xml_text, "xml") - fig = make_fig(soup) + fig = make_fig( + soup, + plot_width_inches=args.width, + plot_height_inches=args.height, + plot_title=args.title, + plot_speedup_type=args.speedup, + ) + fig.tight_layout() xml_fnam = x - png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" + png_fnam = args.output + if png_fnam is None: + png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" print("Writing {} . . .".format(png_fnam)) fig.savefig(png_fnam, format="png", dpi=300) sys.exit(0) From ad56ebb863e14833af9607b3bf5846b0c731bf49 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 15:14:19 +0000 Subject: [PATCH 086/113] Merge + IsSorted matcher --- include/hpcombi/epu.hpp | 14 +++++++++++- include/hpcombi/epu_impl.hpp | 24 ++++++++++++++++++++ tests/test_epu.cpp | 43 ++++++++++++++++++++++++++++-------- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 1c6e393a..e309a78b 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -228,10 +228,22 @@ inline epu8 revsorted8(epu8 a) noexcept; inline epu8 sort_perm(epu8 &a) noexcept; /** Sort \c this and return the sorting permutation * @details - * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 + * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8 */ inline epu8 sort8_perm(epu8 &a) noexcept; +/** @class common_merge + * @brief Merge two sorted epu8 + * @details + * @param a, b: two #HPCombi::epu8 + * @returns void + * after executing merge, \c a and \c are sorted \c a[15] <= \c b[0] + */ +/** @copydoc common_merge + * @par Algorithm: bitonic merge sorting network + */ +inline void merge(epu8 &a, epu8 &b) noexcept; + /** @class common_permutation_of * @brief Find if a vector is a permutation of one other * @details diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index dcf436f9..8406e3a1 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -213,6 +213,30 @@ inline epu8 sort8_perm(epu8 &a) noexcept { return network_sort_perm(a, sorting_rounds8); } +constexpr std::array merge_rounds + // clang-format off + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +{{ + epu8 { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7}, + epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}, + epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13}, + epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}, +}}; +// clang-format on +inline void merge_rev(epu8 &a, epu8 &b) noexcept { + epu8 mn = min(a, b); + b = max(a, b); + a = mn; + a = network_sort(a, merge_rounds); + b = network_sort(b, merge_rounds); +} +inline void merge(epu8 &a, epu8 &b) noexcept { + a = permuted(a, epu8rev); + merge_rev(a, b); +} +// TODO : AVX2 version. +// TODO : compute merge_rounds on the fly instead of loading those from memory + inline epu8 random_epu8(uint16_t bnd) { epu8 res; diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index f2181625..7b07f441 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -18,11 +18,15 @@ #include "test_main.hpp" #include +#include #include "hpcombi/epu.hpp" namespace HPCombi { +auto IsSorted = + Catch::Matchers::Predicate(is_sorted, "is_sorted"); + struct Fix { Fix() : zero(Epu8({}, 0)), P01(Epu8({0, 1}, 0)), P10(Epu8({1, 0}, 0)), @@ -279,13 +283,13 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8id)); for (auto &x : v) { - CHECK(is_sorted(sorted(x))); + CHECK_THAT(sorted(x), IsSorted); } epu8 x = epu8id; - CHECK(is_sorted(x)); + CHECK_THAT(sorted(x), IsSorted); auto &refx = as_array(x); do { - CHECK(is_sorted(sorted(x))); + CHECK_THAT(sorted(x), IsSorted); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" } while (std::next_permutation(refx.begin(), refx.begin() + 9)); @@ -297,13 +301,13 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8rev)); for (auto &x : v) { - CHECK(is_sorted(reverted(revsorted(x)))); + CHECK_THAT(reverted(revsorted(x)), IsSorted); } epu8 x = epu8id; - CHECK(is_sorted(x)); + CHECK_THAT(x, IsSorted); auto &refx = as_array(x); do { - CHECK(is_sorted(reverted(revsorted(x)))); + CHECK_THAT(reverted(revsorted(x)), IsSorted); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" } while (std::next_permutation(refx.begin(), refx.begin() + 9)); @@ -320,7 +324,7 @@ TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - CHECK(is_sorted(xsort)); + CHECK_THAT(xsort, IsSorted); CHECK(is_permutation(psort)); CHECK_THAT(permuted(x, psort), Equals(xsort)); } @@ -336,8 +340,8 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { for (auto x : v) { epu8 xsort = x; epu8 psort = sort_perm(xsort); - CHECK(is_sorted(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); - CHECK(is_sorted(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF))); + CHECK_THAT(xsort | Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF), IsSorted); + CHECK_THAT(xsort & Epu8({0, 0, 0, 0, 0, 0, 0, 0}, 0xFF), IsSorted); CHECK(is_permutation(psort)); CHECK_THAT(permuted(x, psort), Equals(xsort)); } @@ -370,6 +374,27 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { 14, 15})); } +TEST_CASE_METHOD(Fix, "Epu8::merge", "[Epu8][022]") { + std::vector> sample_pairs {{ + { epu8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, + epu8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} + } + }}; + for (auto x : v) + for (auto y : v) + sample_pairs.emplace_back(x, y); + for (auto p : sample_pairs) { + epu8 x = p.first; + epu8 y = p.second; + x = sorted(x); + y = sorted(y); + merge(x, y); + CHECK_THAT(x, IsSorted); + CHECK_THAT(y, IsSorted); + CHECK(x[15] <= y[0]); + } +} + TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { CHECK_THAT(remove_dups(P1), Equals(P10)); CHECK_THAT(remove_dups(P11), Equals(P10)); From 1d7920a572f8f4227fc339e8f2cf13b4b52e7663 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 15:44:35 +0000 Subject: [PATCH 087/113] Remumbered the tests --- benchmark/bench_bmat8.cpp | 6 +- benchmark/bench_epu8.cpp | 20 +++---- tests/test_epu.cpp | 116 +++++++++++++++++++------------------- tests/test_perm16.cpp | 84 +++++++++++++-------------- 4 files changed, 113 insertions(+), 113 deletions(-) diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index dd7efccc..49f4a483 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -65,7 +65,7 @@ TEST_CASE_METHOD(Fix_BMat8, "Row space size", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose", "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose", "[BMat8][001]") { BENCHMARK_MEM_FN(transpose, sample); BENCHMARK_MEM_FN(transpose_mask, sample); BENCHMARK_MEM_FN(transpose_maskd, sample); @@ -84,13 +84,13 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs", "[BMat8][002]") { }; } -TEST_CASE_METHOD(Fix_BMat8, "Row spaces inclusion", "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Row spaces inclusion", "[BMat8][003]") { BENCHMARK_MEM_FN_PAIR(row_space_included_ref, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included_bitset, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included, pair_sample); } -TEST_CASE_METHOD(Fix_BMat8, "Pair row space inclusion", "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Pair row space inclusion", "[BMat8][004]") { BENCHMARK("rotating pairs implementation") { for (auto &pair : pair_sample) { auto res = BMat8::row_space_included2(pair.first, pair.second, diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 1fc50b92..d6c96858 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -152,7 +152,7 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); } -TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][002]") { BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_sum4, Fix_epu8::perms); @@ -164,7 +164,7 @@ TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda ", horiz_sum3, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][003]") { BENCHMARK_FREE_FN("| no lambda", partial_sums_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_sums_gen, Fix_epu8::perms); @@ -175,7 +175,7 @@ TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][004]") { BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); @@ -191,7 +191,7 @@ TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][005]") { BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); @@ -205,7 +205,7 @@ TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", partial_max_round, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][006]") { BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4, Fix_epu8::perms); @@ -217,7 +217,7 @@ TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][007]") { BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round, Fix_epu8::perms); @@ -226,7 +226,7 @@ TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", partial_min_round, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][008]") { BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms); @@ -240,7 +240,7 @@ TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms); } -TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][009]") { BENCHMARK_LAMBDA2("| lambda", first_diff_ref, Fix_epu8::pairs); #ifdef SIMDE_X86_SSE4_2_NATIVE BENCHMARK_LAMBDA2("| lambda", first_diff_cmpstr, Fix_epu8::pairs); @@ -248,7 +248,7 @@ TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][000]") { BENCHMARK_LAMBDA2("| lambda", first_diff_mask, Fix_epu8::pairs); } -TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][010]") { BENCHMARK_LAMBDA2("| lambda", last_diff_ref, Fix_epu8::pairs); #ifdef SIMDE_X86_SSE4_2_NATIVE BENCHMARK_LAMBDA2("| lambda", last_diff_cmpstr, Fix_epu8::pairs); @@ -256,7 +256,7 @@ TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") { BENCHMARK_LAMBDA2("| lambda", last_diff_mask, Fix_epu8::pairs); } -TEST_CASE_METHOD(Fix_epu8, "is_permutation", "[Epu8][000]") { +TEST_CASE_METHOD(Fix_epu8, "is_permutation", "[Epu8][011]") { BENCHMARK_FREE_FN("| no lambda", is_permutation_sort, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", is_permutation_eval, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", is_permutation, Fix_epu8::perms); diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 7b07f441..0deb8899 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -204,9 +204,9 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } -TEST_AGREES2_FUN_EPU8(Fix, permuted, permuted_ref, v, "[Epu8][011]") +TEST_AGREES2_FUN_EPU8(Fix, permuted, permuted_ref, v, "[Epu8][012]") -TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { +TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][013]") { CHECK_THAT(shifted_left(P01), Equals(P10)); CHECK_THAT(shifted_left(P112), Equals(epu8{1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0})); @@ -214,21 +214,21 @@ TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { 12, 13, 14, 15, 0})); } -TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][013]") { +TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][014]") { CHECK_THAT(shifted_right(P10), Equals(P01)); CHECK_THAT(shifted_right(P112), Equals(Epu8({0, 1, 1}, 2))); CHECK_THAT(shifted_right(Pv), Equals(epu8{0, 5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14})); } -TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][014]") { +TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][015]") { CHECK_THAT(reverted(epu8id), Equals(epu8rev)); for (auto x : v) { CHECK_THAT(x, Equals(reverted(reverted(x)))); } } -TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][015]") { +TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][016]") { epu8 x = Epu8({4, 2, 5, 1, 2, 7, 7, 3, 4, 2}, 1); auto &refx = as_array(x); refx[2] = 42; @@ -238,14 +238,14 @@ TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][015]") { CHECK(av == as_array(Pv)); } -TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][016]") { +TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][017]") { for (auto x : v) { CHECK_THAT(x, Equals(from_array(as_array(x)))); } CHECK_THAT(Pv, Equals(from_array(av))); } -TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { +TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { CHECK(is_sorted(epu8id)); CHECK( is_sorted(epu8{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); @@ -278,7 +278,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][017]") { #pragma GCC diagnostic pop } -TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { +TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { CHECK_THAT( sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8id)); @@ -296,7 +296,7 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][018]") { #pragma GCC diagnostic pop } -TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { +TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][020]") { CHECK_THAT( revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), Equals(epu8rev)); @@ -314,7 +314,7 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][019]") { #pragma GCC diagnostic pop } -TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { +TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][021]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; CHECK_THAT(sort_perm(ve), Equals(epu8{9, 15, 1, 5, 6, 10, 12, 3, 0, 8, 11, 2, 13, 7, 4, 14})); @@ -330,7 +330,7 @@ TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][020]") { } } -TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { +TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][022]") { epu8 ve{2, 1, 3, 2, 4, 1, 1, 4, 2, 0, 1, 2, 1, 3, 4, 0}; CHECK_THAT(sort8_perm(ve), Equals(epu8{1, 6, 5, 0, 3, 2, 4, 7, 9, 15, 10, 12, 8, 11, 13, 14})); @@ -347,7 +347,7 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][021]") { } } -TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { +TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][023]") { CHECK_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); CHECK_THAT(permutation_of(Pa, Pa), Equals(epu8id)); CHECK_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); @@ -360,7 +360,7 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][022]") { Equals(epu8{FF, FF, FF, FF, 4, 5, FF, 7, 8, 9, FF, 11, FF, 13, 14, 15})); } -TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { +TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][024]") { CHECK_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); CHECK_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); @@ -374,7 +374,7 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][022]") { 14, 15})); } -TEST_CASE_METHOD(Fix, "Epu8::merge", "[Epu8][022]") { +TEST_CASE_METHOD(Fix, "Epu8::merge", "[Epu8][025]") { std::vector> sample_pairs {{ { epu8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, epu8 {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1} @@ -395,7 +395,7 @@ TEST_CASE_METHOD(Fix, "Epu8::merge", "[Epu8][022]") { } } -TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { +TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][026]") { CHECK_THAT(remove_dups(P1), Equals(P10)); CHECK_THAT(remove_dups(P11), Equals(P10)); CHECK_THAT(remove_dups(sorted(P10)), @@ -419,7 +419,7 @@ TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][023]") { } } -TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { +TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][027]") { CHECK(horiz_sum_ref(zero) == 0); CHECK(horiz_sum_ref(P01) == 1); CHECK(horiz_sum_ref(epu8id) == 120); @@ -436,12 +436,12 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][024]") { CHECK(horiz_sum_ref(Pc) == 203); } -TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][025]") -TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum4, v, "[Epu8][026]") -TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum3, v, "[Epu8][027]") -TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum, v, "[Epu8][028]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum_gen, v, "[Epu8][028]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum4, v, "[Epu8][029]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum3, v, "[Epu8][030]") +TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum, v, "[Epu8][031]") -TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { +TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][032]") { CHECK_THAT(partial_sums_ref(zero), Equals(zero)); CHECK_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); CHECK_THAT(partial_sums_ref(epu8id), @@ -476,12 +476,12 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][033]") TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_round, v, - "[Epu8][030]") -TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") + "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][035]") -TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { +TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][036]") { CHECK(horiz_max_ref(zero) == 0); CHECK(horiz_max_ref(P01) == 1); CHECK(horiz_max_ref(epu8id) == 15); @@ -498,12 +498,12 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { CHECK(horiz_max_ref(Pc) == 43); } -TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max_gen, v, "[Epu8][034]") -TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max4, v, "[Epu8][035]") -TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max3, v, "[Epu8][036]") -TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max_gen, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max4, v, "[Epu8][038]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max3, v, "[Epu8][039]") +TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max, v, "[Epu8][040]") -TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { +TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][041]") { CHECK_THAT(partial_max_ref(zero), Equals(zero)); CHECK_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); CHECK_THAT(partial_max_ref(epu8id), Equals(epu8id)); @@ -520,11 +520,11 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } -TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") -TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") -TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][042]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_round, v, "[Epu8][043]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][044]") -TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { +TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][045]") { CHECK(horiz_min_ref(zero) == 0); CHECK(horiz_min_ref(P01) == 0); CHECK(horiz_min_ref(epu8id) == 0); @@ -541,12 +541,12 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { CHECK(horiz_min_ref(Pc) == 5); } -TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min_gen, v, "[Epu8][034]") -TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min4, v, "[Epu8][035]") -TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min3, v, "[Epu8][036]") -TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min, v, "[Epu8][037]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min_gen, v, "[Epu8][046]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min4, v, "[Epu8][047]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min3, v, "[Epu8][048]") +TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min, v, "[Epu8][049]") -TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { +TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][050]") { CHECK_THAT(partial_min_ref(zero), Equals(zero)); CHECK_THAT(partial_min_ref(P01), Equals(zero)); CHECK_THAT(partial_min_ref(epu8id), Equals(zero)); @@ -564,11 +564,11 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } -TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") -TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") -TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][051]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_round, v, "[Epu8][052]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][053]") -TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { +TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][054]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); CHECK_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); CHECK_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); @@ -587,18 +587,18 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } -TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_cycle, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_popcount, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_arr, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_gen, v, "[Epu8][034]") -TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][034]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_cycle, v, "[Epu8][055]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_popcount, v, "[Epu8][056]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_arr, v, "[Epu8][057]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_gen, v, "[Epu8][058]") +TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][059]") -TEST_CASE("Epu8::popcount4", "[Epu8][048]") { +TEST_CASE("Epu8::popcount4", "[Epu8][060]") { CHECK_THAT(popcount4, Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } -TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { +TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][061]") { CHECK_THAT(popcount16(Pv), Equals(epu8{2, 2, 1, 2, 1, 2, 2, 1, 0, 2, 1, 3, 2, 3, 3, 4})); CHECK_THAT(popcount16(RP), @@ -611,7 +611,7 @@ TEST_CASE_METHOD(Fix, "Epu8::popcount16", "[Epu8][049]") { Equals(Epu8({0, 1, 2, 8}, 4))); } -TEST_CASE("random_epu8", "[Epu8][050]") { +TEST_CASE("random_epu8", "[Epu8][062]") { for (int bnd : {1, 10, 100, 255, 256}) { for (int i = 0; i < 10; i++) { epu8 r = random_epu8(bnd); @@ -622,7 +622,7 @@ TEST_CASE("random_epu8", "[Epu8][050]") { } } -TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][051]") { +TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][063]") { CHECK(is_partial_transformation(zero)); CHECK(is_partial_transformation(P01)); CHECK(is_partial_transformation(P10)); @@ -645,7 +645,7 @@ TEST_CASE_METHOD(Fix, "is_partial_transformation", "[Epu8][051]") { CHECK(!is_partial_transformation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); } -TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][052]") { +TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][064]") { CHECK(is_transformation(zero)); CHECK(is_transformation(P01)); CHECK(is_transformation(P10)); @@ -666,7 +666,7 @@ TEST_CASE_METHOD(Fix, "is_transformation", "[Epu8][052]") { CHECK(!is_transformation(RP, 15)); } -TEST_CASE_METHOD(Fix, "is_partial_permutation", "[Epu8][053]") { +TEST_CASE_METHOD(Fix, "is_partial_permutation", "[Epu8][065]") { CHECK(!is_partial_permutation(zero)); CHECK(!is_partial_permutation(P01)); CHECK(!is_partial_permutation(P10)); @@ -695,7 +695,7 @@ TEST_CASE_METHOD(Fix, "is_partial_permutation", "[Epu8][053]") { CHECK(!is_partial_permutation(Epu8({1, 2, 1, 0xFF, 0, 16, 0xFF, 2}, 0))); } -TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { +TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][066]") { CHECK(!is_permutation(zero)); CHECK(!is_permutation(P01)); CHECK(!is_permutation(P10)); @@ -717,7 +717,7 @@ TEST_CASE_METHOD(Fix, "is_permutation", "[Epu8][054]") { } #ifdef SIMDE_X86_SSE4_2_NATIVE -TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { +TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][067]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { CHECK(is_permutation_cmpestri(x, i) == is_permutation(x, i)); @@ -726,7 +726,7 @@ TEST_CASE_METHOD(Fix, "is_permutation_cmpestri", "[Epu8][070]") { } #endif -TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { +TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][068]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { CHECK(is_permutation_sort(x, i) == is_permutation(x, i)); @@ -734,7 +734,7 @@ TEST_CASE_METHOD(Fix, "is_permutation_sort", "[Epu8][080]") { } } -TEST_CASE_METHOD(Fix, "is_permutation_eval", "[Epu8][080]") { +TEST_CASE_METHOD(Fix, "is_permutation_eval", "[Epu8][069]") { for (auto x : v) { for (size_t i = 0; i < 16; i++) { CHECK(is_permutation_eval(x, i) == is_permutation(x, i)); diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 8d13f868..ed7c54f1 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -122,7 +122,7 @@ TEST_CASE("PTransf16::image_mask", "[PTransf16][002]") { {0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } -TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { +TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][003]") { CHECK_THAT(PTransf16({}).image_mask_ref(), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).image_mask_ref(false), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).image_mask_ref(true), Equals(Epu8(0))); @@ -152,7 +152,7 @@ TEST_CASE("PTransf16::image_mask_ref_ref", "[PTransf16][002]") { {0, FF, 0, FF, FF, 0, FF, FF, FF, FF, FF, FF, FF, FF, FF, 0}, 0))); } -TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { +TEST_CASE("PTransf16::left_one", "[PTransf16][004]") { CHECK(PTransf16({}).left_one() == PTransf16::one()); CHECK(PTransf16({4, 4, 4, 4}).left_one() == PTransf16({FF, FF, FF, FF})); CHECK(PTransf16(Epu8(1)).left_one() == PTransf16(Epu8({FF, 1}, FF))); @@ -171,7 +171,7 @@ TEST_CASE("PTransf16::left_one", "[PTransf16][003]") { {0, FF, 2, FF, FF, 5, FF, FF, FF, FF, FF, FF, FF, FF, FF, 15})); } -TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { +TEST_CASE("PTransf16::domain_mask", "[PTransf16][005]") { CHECK_THAT(PTransf16({}).domain_mask(), Equals(Epu8(FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).domain_mask(), Equals(Epu8(FF))); CHECK_THAT(PTransf16({4, 4, 4, 4}).domain_mask(false), Equals(Epu8(FF))); @@ -193,7 +193,7 @@ TEST_CASE("PTransf16::domain_mask", "[PTransf16][004]") { Equals(Epu8({0, 0, FF, 0, 0, FF, 0, FF, 0}, FF))); } -TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { +TEST_CASE("PTransf16::right_one", "[PTransf16][006]") { CHECK(PTransf16({}).right_one() == PTransf16::one()); CHECK(PTransf16({4, 4, 4, 4}).right_one() == PTransf16::one()); CHECK(PTransf16(Epu8(1)).right_one() == PTransf16::one()); @@ -210,7 +210,7 @@ TEST_CASE("PTransf16::right_one", "[PTransf16][005]") { PTransf16({0, 1, FF, 3, 4, FF, 6, FF, 8, FF, FF, FF, FF, FF, FF, FF})); } -TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { +TEST_CASE("PTransf16::rank_ref", "[PTransf16][007]") { CHECK(PTransf16({}).rank_ref() == 16); CHECK(PTransf16({4, 4, 4, 4}).rank_ref() == 12); CHECK(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}) @@ -231,7 +231,7 @@ TEST_CASE("PTransf16::rank_ref", "[PTransf16][006]") { .rank_ref() == 4); } -TEST_CASE("PTransf16::rank", "[PTransf16][007]") { +TEST_CASE("PTransf16::rank", "[PTransf16][008]") { CHECK(PTransf16({}).rank() == 16); CHECK(PTransf16({4, 4, 4, 4}).rank() == 12); CHECK(PTransf16({1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}).rank() == @@ -246,7 +246,7 @@ TEST_CASE("PTransf16::rank", "[PTransf16][007]") { 4); } -TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { +TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][009]") { CHECK_THAT(PTransf16({}).fix_points_mask(), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).fix_points_mask(false), Equals(Epu8(FF))); CHECK_THAT(PTransf16({}).fix_points_mask(true), Equals(Epu8(0))); @@ -274,7 +274,7 @@ TEST_CASE("PTransf16::fix_points_mask", "[PTransf16][008]") { Equals(Epu8({0, FF, 0}, FF))); } -TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { +TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][010]") { CHECK(PTransf16({}).fix_points_bitset() == 0xFFFF); CHECK(PTransf16({}).fix_points_bitset(false) == 0xFFFF); CHECK(PTransf16({}).fix_points_bitset(true) == 0); @@ -292,7 +292,7 @@ TEST_CASE("PTransf16::fix_points_bitset", "[PTransf16][009]") { .fix_points_bitset(true) == 0xFFFA); } -TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][010]") { +TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][011]") { CHECK(PTransf16({}).nb_fix_points() == 16); CHECK(PTransf16({4, 4, 4, 4}).nb_fix_points() == 12); CHECK(PTransf16(Epu8(1)).nb_fix_points() == 1); @@ -307,7 +307,7 @@ TEST_CASE("PTransf16::nb_fix_points", "[PTransf16][010]") { } TEST_CASE_METHOD(Perm16Fixture, "Transf16::operator uint64", - "[Transf16][011]") { + "[Transf16][012]") { CHECK(static_cast(Transf16::one()) == 0xf7e6d5c4b3a29180); CHECK(static_cast(zero) == 0x0); CHECK(static_cast(P10) == 0x1); @@ -318,7 +318,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Transf16::operator uint64", } TEST_CASE_METHOD(Perm16Fixture, "Transf16::Transf16(uint64_t)", - "[Transf16][012]") { + "[Transf16][013]") { CHECK(static_cast(0x0) == zero); CHECK(static_cast(0x1) == P10); CHECK(static_cast(0x100) == P01); @@ -327,13 +327,13 @@ TEST_CASE_METHOD(Perm16Fixture, "Transf16::Transf16(uint64_t)", } } -TEST_CASE_METHOD(Perm16Fixture, "Transf16::hash", "[Transf16][013]") { +TEST_CASE_METHOD(Perm16Fixture, "Transf16::hash", "[Transf16][014]") { CHECK(std::hash()(Transf16::one()) != 0); CHECK(std::hash()(Transf16(Epu8(1))) != 0); CHECK(std::hash()(RandT) != 0); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::operator uint64_t", "[Perm16][014]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::operator uint64_t", "[Perm16][015]") { CHECK(static_cast(Perm16::one()) == 0xf7e6d5c4b3a29180); CHECK(static_cast(PPa) == 0xf7e6d5c0b4a39281); CHECK(static_cast(PPb) == 0xd7e4f5c0b6a39281); @@ -344,17 +344,17 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::operator uint64_t", "[Perm16][014]") { } } -TEST_CASE("Perm::operator==", "[Perm16][015]") { +TEST_CASE("Perm::operator==", "[Perm16][016]") { CHECK(Perm16::one() * Perm16::one() == Perm16::one()); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::hash", "[Perm16][016]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::hash", "[Perm16][017]") { CHECK(std::hash()(Transf16::one()) != 0); CHECK(std::hash()(PPa) != 0); CHECK(std::hash()(RandPerm) != 0); } -TEST_CASE("PPerm16::PPerm16", "[PPerm16][017]") { +TEST_CASE("PPerm16::PPerm16", "[PPerm16][018]") { const uint8_t FF = 0xff; CHECK( PPerm16({4, 5, 0}, {9, 0, 1}) == @@ -364,12 +364,12 @@ TEST_CASE("PPerm16::PPerm16", "[PPerm16][017]") { PPerm16({1, FF, FF, FF, 9, 0, FF, FF, 2, FF, FF, FF, FF, FF, FF, FF})); } -TEST_CASE("PPerm16::hash", "[PPerm16][018]") { +TEST_CASE("PPerm16::hash", "[PPerm16][019]") { CHECK(std::hash()(PPerm16::one()) != 0); CHECK(std::hash()(PPerm16({4, 5, 0}, {9, 0, 1})) != 0); } -TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { +TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][020]") { CHECK(PPerm16({}).left_one() == PPerm16::one()); CHECK(PPerm16({FF, FF, FF, 4}).left_one() == PPerm16({FF, FF, FF, FF})); CHECK(PPerm16({FF, 4, FF, FF}).left_one() == PPerm16({FF, FF, FF, FF})); @@ -378,7 +378,7 @@ TEST_CASE_METHOD(Perm16Fixture, "PPerm16::left_one", "[PPerm16][019]") { } } -TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { +TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][021]") { CHECK(PPerm16({}).right_one() == PPerm16::one()); CHECK(PPerm16({FF, FF, FF, 4}).right_one() == PPerm16({FF, FF, FF})); CHECK(PPerm16({FF, 4, FF, FF}).right_one() == PPerm16({FF, 1, FF, FF})); @@ -388,10 +388,10 @@ TEST_CASE_METHOD(Perm16Fixture, "PPerm16::right_one", "[PPerm16][020]") { } #ifdef SIMDE_X86_SSE4_2_NATIVE -TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, PPlist, "[PPerm16][021]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, PPlist, "[PPerm16][022]"); #endif -TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][023]") { CHECK_THAT(PTransf16::one().fix_points_mask(), Equals(Epu8(FF))); CHECK_THAT(Perm16::one().fix_points_mask(), Equals(Epu8(FF))); CHECK_THAT(PPa.fix_points_mask(), Equals(Epu8({0, 0, 0, 0, 0}, FF))); @@ -415,7 +415,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::fix_points_mask", "[PPerm16][022]") { CHECK_THAT(RandPerm.fix_points_mask(true), Equals(Epu8({FF, 0}, FF))); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][024]") { CHECK(Perm16::one().smallest_fix_point() == 0); CHECK(PPa.smallest_fix_point() == 5); CHECK(PPb.smallest_fix_point() == 5); @@ -423,7 +423,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_fix_point", "[Perm16][023]") { } TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_moved_point", - "[Perm16][024]") { + "[Perm16][025]") { CHECK(Perm16::one().smallest_moved_point() == int(FF)); CHECK(PPa.smallest_moved_point() == 0); CHECK(PPb.smallest_moved_point() == 0); @@ -431,7 +431,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::smallest_moved_point", CHECK(Perm16({0, 1, 3, 2}).smallest_moved_point() == 2); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][026]") { CHECK(Perm16::one().largest_fix_point() == 15); CHECK(int(PPa.largest_fix_point()) == 15); CHECK(PPb.largest_fix_point() == 14); @@ -446,7 +446,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::largest_fix_point", "[Perm16][025]") { .largest_fix_point() == 15); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][027]") { CHECK(Perm16::one().nb_fix_points() == 16); CHECK(PPa.nb_fix_points() == 11); CHECK(PPb.nb_fix_points() == 8); @@ -454,7 +454,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_fix_points", "[Perm16][026]") { CHECK(Perm16({0, 1, 3, 2}).nb_fix_points() == 14); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][028]") { CHECK(PPa * PPa.inverse() == Perm16::one()); CHECK(PPa.inverse() * PPa == Perm16::one()); CHECK(PPb * PPb.inverse() == Perm16::one()); @@ -468,12 +468,12 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::inverse_ref", "[Perm16][027]") { } } -TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, Plist, "[Perm16][028]"); -TEST_AGREES(Perm16Fixture, inverse_ref, inverse_pow, Plist, "[Perm16][029]"); -TEST_AGREES(Perm16Fixture, inverse_ref, inverse_cycl, Plist, "[Perm16][030]"); -TEST_AGREES(Perm16Fixture, inverse_ref, inverse, Plist, "[Perm16][031]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_find, Plist, "[Perm16][029]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_pow, Plist, "[Perm16][030]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse_cycl, Plist, "[Perm16][031]"); +TEST_AGREES(Perm16Fixture, inverse_ref, inverse, Plist, "[Perm16][032]"); -TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][033]") { CHECK_THAT(Perm16::one().lehmer(), Equals(zero)); CHECK_THAT(PPa.lehmer(), Equals(epu8{1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})); @@ -481,19 +481,19 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::lehmer_ref", "[Perm16][032]") { Equals(epu8{1, 1, 1, 3, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0})); } -TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer_arr, Plist, "[Perm16][033]"); -TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer, Plist, "[Perm16][034]"); +TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer_arr, Plist, "[Perm16][034]"); +TEST_AGREES_EPU8(Perm16Fixture, lehmer_ref, lehmer, Plist, "[Perm16][035]"); -TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][035]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::length_ref", "[Perm16][036]") { CHECK(Perm16::one().length() == 0); CHECK(PPa.length() == 4); CHECK(PPb.length() == 10); } -TEST_AGREES(Perm16Fixture, length_ref, length_arr, Plist, "[Perm16][036]"); -TEST_AGREES(Perm16Fixture, length_ref, length, Plist, "[Perm16][037]"); +TEST_AGREES(Perm16Fixture, length_ref, length_arr, Plist, "[Perm16][037]"); +TEST_AGREES(Perm16Fixture, length_ref, length, Plist, "[Perm16][038]"); -TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][039]") { CHECK(Perm16::one().nb_descents_ref() == 0); CHECK(PPa.nb_descents_ref() == 1); CHECK(PPb.nb_descents_ref() == 4); @@ -501,17 +501,17 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_descents_ref", "[Perm16][038]") { } TEST_AGREES(Perm16Fixture, nb_descents_ref, nb_descents, Plist, - "[Perm16][039]"); + "[Perm16][040]"); -TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][040]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::nb_cycles_ref", "[Perm16][041]") { CHECK(Perm16::one().nb_cycles_ref() == 16); CHECK(PPa.nb_cycles_ref() == 12); CHECK(PPb.nb_cycles_ref() == 10); } -TEST_AGREES(Perm16Fixture, nb_cycles_ref, nb_cycles, Plist, "[Perm16][041]"); +TEST_AGREES(Perm16Fixture, nb_cycles_ref, nb_cycles, Plist, "[Perm16][042]"); -TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][042]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][043]") { CHECK(Perm16::one().left_weak_leq_ref(Perm16::one())); CHECK(Perm16::one().left_weak_leq_ref(PPa)); CHECK(Perm16::one().left_weak_leq_ref(PPb)); @@ -519,7 +519,7 @@ TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq_ref", "[Perm16][042]") { CHECK(PPb.left_weak_leq_ref(PPb)); } -TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq", "[Perm16][043]") { +TEST_CASE_METHOD(Perm16Fixture, "Perm16::left_weak_leq", "[Perm16][044]") { for (auto u : PlistSmall) { for (auto v : PlistSmall) { CHECK(u.left_weak_leq(v) == u.left_weak_leq_ref(v)); From d338ad0ee2d2509eb587cf2985c9fd14b944fc76 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 15:59:32 +0000 Subject: [PATCH 088/113] Renamed epu.hpp -> epu8.hpp --- benchmark/bench_epu8.cpp | 2 +- benchmark/bench_fixture.hpp | 2 +- experiments/sort.cpp | 2 +- include/hpcombi/bmat8.hpp | 2 +- include/hpcombi/{epu.hpp => epu8.hpp} | 2 +- include/hpcombi/{epu_impl.hpp => epu8_impl.hpp} | 2 +- include/hpcombi/hpcombi.hpp | 2 +- include/hpcombi/perm16.hpp | 2 +- include/hpcombi/vect16.hpp | 2 +- tests/CMakeLists.txt | 4 ++-- tests/{test_epu.cpp => test_epu8.cpp} | 2 +- tests/test_main.hpp | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) rename include/hpcombi/{epu.hpp => epu8.hpp} (99%) rename include/hpcombi/{epu_impl.hpp => epu8_impl.hpp} (99%) rename tests/{test_epu.cpp => test_epu8.cpp} (99%) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index d6c96858..68a113e2 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -23,7 +23,7 @@ #include "bench_fixture.hpp" #include "bench_main.hpp" -#include "hpcombi/epu.hpp" +#include "hpcombi/epu8.hpp" namespace HPCombi { diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 79f642f5..8d086551 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -18,7 +18,7 @@ #include #include -#include "hpcombi/epu.hpp" +#include "hpcombi/epu8.hpp" using HPCombi::epu8; diff --git a/experiments/sort.cpp b/experiments/sort.cpp index 52245864..aceb8515 100644 --- a/experiments/sort.cpp +++ b/experiments/sort.cpp @@ -23,7 +23,7 @@ #include #include -#include "hpcombi/epu.hpp" +#include "hpcombi/epu8.hpp" using namespace std; using namespace std::chrono; diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp index d3b509bb..6d33892a 100644 --- a/include/hpcombi/bmat8.hpp +++ b/include/hpcombi/bmat8.hpp @@ -31,7 +31,7 @@ #include // for vector #include "debug.hpp" // for HPCOMBI_ASSERT -#include "epu.hpp" // for epu8 +#include "epu8.hpp" // for epu8 #include "perm16.hpp" // for Perm16 namespace HPCombi { diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu8.hpp similarity index 99% rename from include/hpcombi/epu.hpp rename to include/hpcombi/epu8.hpp index e309a78b..c40f7f3d 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu8.hpp @@ -726,6 +726,6 @@ inline std::string to_string(HPCombi::epu8 const &a); */ } // namespace std -#include "epu_impl.hpp" +#include "epu8_impl.hpp" #endif // HPCOMBI_EPU_HPP_INCLUDED diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu8_impl.hpp similarity index 99% rename from include/hpcombi/epu_impl.hpp rename to include/hpcombi/epu8_impl.hpp index 8406e3a1..b6da4688 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu8_impl.hpp @@ -13,7 +13,7 @@ // http://www.gnu.org/licenses/ // //////////////////////////////////////////////////////////////////////////////// -// This is the implementation part of epu.hpp this should be seen as +// This is the implementation part of epu8.hpp this should be seen as // implementation details and should not be included directly. #include diff --git a/include/hpcombi/hpcombi.hpp b/include/hpcombi/hpcombi.hpp index e1795c31..8deb0990 100644 --- a/include/hpcombi/hpcombi.hpp +++ b/include/hpcombi/hpcombi.hpp @@ -18,7 +18,7 @@ #include "bmat8.hpp" #include "debug.hpp" -#include "epu.hpp" +#include "epu8.hpp" #include "perm16.hpp" #include "perm_generic.hpp" #include "power.hpp" diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index d8fc99e4..d6a02376 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -23,7 +23,7 @@ #include // for is_trivial #include // for vector -#include "epu.hpp" // for epu8, permuted, etc +#include "epu8.hpp" // for epu8, permuted, etc #include "power.hpp" // for pow #include "vect16.hpp" // for hash, is_partial_permutation diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp index 5ea4702d..47904d6a 100644 --- a/include/hpcombi/vect16.hpp +++ b/include/hpcombi/vect16.hpp @@ -23,7 +23,7 @@ #include // for hash #include // for is_trivial -#include "epu.hpp" +#include "epu8.hpp" namespace HPCombi { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9c852358..4722a9ca 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -33,7 +33,7 @@ endif() message(STATUS "Building tests") set(test_src - test_epu.cpp test_perm16.cpp test_perm_all.cpp test_bmat8.cpp) + test_epu8.cpp test_perm16.cpp test_perm_all.cpp test_bmat8.cpp) foreach(f ${test_src}) get_filename_component(testName ${f} NAME_WE) @@ -44,7 +44,7 @@ endforeach(f) add_executable(test_all ${test_src} test_main.cpp) target_link_libraries(test_all PRIVATE Catch2::Catch2WithMain) -add_test (TestEPU test_epu) +add_test (TestEPU8 test_epu8) add_test (TestPerm16 test_perm16) add_test (TestPermAll test_perm_all) add_test (TestBMat8 test_bmat8) diff --git a/tests/test_epu.cpp b/tests/test_epu8.cpp similarity index 99% rename from tests/test_epu.cpp rename to tests/test_epu8.cpp index 0deb8899..c1e08e21 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu8.cpp @@ -20,7 +20,7 @@ #include #include -#include "hpcombi/epu.hpp" +#include "hpcombi/epu8.hpp" namespace HPCombi { diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 3c93e6a2..33c9ed63 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -18,7 +18,7 @@ #include -#include "hpcombi/epu.hpp" +#include "hpcombi/epu8.hpp" #include #include From be2f558e03f1bc04b92762ea4c4655403c5accc4 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 17:11:12 +0000 Subject: [PATCH 089/113] Silenced clang complaining about gcc -Wstringop-overflow --- include/hpcombi/bmat8_impl.hpp | 5 +++++ tests/test_epu8.cpp | 12 ++++++++++++ tests/test_perm16.cpp | 4 ++++ 3 files changed, 21 insertions(+) diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index f92eb59b..d536a79a 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -449,10 +449,15 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { std::distance(prod_rows.begin(), std::find(prod_rows.begin(), prod_rows.end(), row)); } + +#ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif std::iota(perm.begin() + nr_rows(), perm.end(), nr_rows()); +#ifndef __clang__ #pragma GCC diagnostic pop +#endif Perm16 res = Perm16::one(); for (size_t i = 0; i < 8; i++) diff --git a/tests/test_epu8.cpp b/tests/test_epu8.cpp index c1e08e21..cd3ab5b7 100644 --- a/tests/test_epu8.cpp +++ b/tests/test_epu8.cpp @@ -261,8 +261,10 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { epu8 x = epu8id; CHECK(is_sorted(x)); auto &refx = as_array(x); +#ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif while (std::next_permutation(refx.begin(), refx.begin() + 9)) { CHECK(!is_sorted(x)); } @@ -275,7 +277,9 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { while (std::next_permutation(refx.begin(), refx.begin() + 14)) { CHECK(!is_sorted(x)); } +#ifndef __clang__ #pragma GCC diagnostic pop +#endif } TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { @@ -290,10 +294,14 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { auto &refx = as_array(x); do { CHECK_THAT(sorted(x), IsSorted); +#ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif } while (std::next_permutation(refx.begin(), refx.begin() + 9)); +#ifndef __clang__ #pragma GCC diagnostic pop +#endif } TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][020]") { @@ -308,10 +316,14 @@ TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][020]") { auto &refx = as_array(x); do { CHECK_THAT(reverted(revsorted(x)), IsSorted); +#ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif } while (std::next_permutation(refx.begin(), refx.begin() + 9)); +#ifndef __clang__ #pragma GCC diagnostic pop +#endif } TEST_CASE_METHOD(Fix, "Epu8::sort_perm", "[Epu8][021]") { diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index ed7c54f1..5e8f5255 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -27,10 +27,14 @@ std::vector all_perms(uint8_t sz) { epu8 x = HPCombi::epu8id; res.push_back(x); auto &refx = HPCombi::as_array(x); +#ifndef __clang__ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif while (std::next_permutation(refx.begin(), refx.begin() + sz)) { +#ifndef __clang__ #pragma GCC diagnostic pop +#endif res.push_back(x); } return res; From c298c95f7fa23c9ba7af0feb05b9d731b927d14d Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 20:07:15 +0000 Subject: [PATCH 090/113] Preparing for xpu/perm32: - created build.hpp for TPUBuild - epu8id and similar are now Epu8.id() - improved doc --- benchmark/bench_epu8.cpp | 4 +- benchmark/bench_fixture.hpp | 4 +- examples/pattern.cpp | 4 +- include/hpcombi/bmat8_impl.hpp | 18 ++--- include/hpcombi/builder.hpp | 115 ++++++++++++++++++++++++++++++++ include/hpcombi/epu8.hpp | 76 +++------------------ include/hpcombi/epu8_impl.hpp | 39 +++++------ include/hpcombi/perm16.hpp | 8 +-- include/hpcombi/perm16_impl.hpp | 8 +-- tests/test_epu8.cpp | 80 +++++++++++----------- tests/test_perm16.cpp | 4 +- 11 files changed, 208 insertions(+), 152 deletions(-) create mode 100644 include/hpcombi/builder.hpp diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 68a113e2..74abb2ab 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -1,5 +1,5 @@ //****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // +// Copyright (C) 2018-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -32,7 +32,7 @@ namespace { struct RoundsMask { constexpr RoundsMask() : arr() { for (unsigned i = 0; i < sorting_rounds.size(); ++i) - arr[i] = sorting_rounds[i] < epu8id; + arr[i] = sorting_rounds[i] < Epu8.id(); } epu8 arr[sorting_rounds.size()]; }; diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 8d086551..6bc746c7 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -1,5 +1,5 @@ //****************************************************************************// -// Copyright (C) 2016 Florent Hivert , // +// Copyright (C) 2016-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -35,7 +35,7 @@ std::vector rand_epu8(size_t sz) { inline epu8 rand_perm() { static std::random_device rd; static std::mt19937 g(rd()); - epu8 res = HPCombi::epu8id; + epu8 res = HPCombi::Epu8.id(); auto &ar = HPCombi::as_array(res); std::shuffle(ar.begin(), ar.end(), g); return res; diff --git a/examples/pattern.cpp b/examples/pattern.cpp index 2241b23b..50656b8a 100644 --- a/examples/pattern.cpp +++ b/examples/pattern.cpp @@ -68,8 +68,8 @@ void make_subsets_of_size(int n, int k) { template epu8 extract_pattern(epu8 perm, epu8 permset) { epu8 cst = Epu8({}, Size); - epu8 res = permuted(perm, permset) | (epu8id >= cst); - res = sort_perm(res) & (epu8id < cst); + epu8 res = permuted(perm, permset) | (Epu8.id() >= cst); + res = sort_perm(res) & (Epu8.id() < cst); return res; } diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index d536a79a..eb01f386 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -254,14 +254,14 @@ namespace detail { inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept { static const epu8 bound08 = simde_mm_slli_epi32( - static_cast(epu8id), 3); // shift for *8 + static_cast(Epu8.id()), 3); // shift for *8 static const epu8 bound18 = bound08 + Epu8(0x80); for (size_t slice8 = 0; slice8 < 16; slice8++) { epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */ epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5); set0 |= (bm5 == bound08) & shft; set1 |= (bm5 == bound18) & shft; - block = simde_mm_shuffle_epi8(block, right_cycle); + block = simde_mm_shuffle_epi8(block, Epu8.right_cycle()); } } } @@ -277,7 +277,7 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept { res1 = epu8{}; for (size_t r = 0; r < 16; r++) { detail::row_space_update_bitset(block0 | block1, res0, res1); - block1 = simde_mm_shuffle_epi8(block1, right_cycle); + block1 = simde_mm_shuffle_epi8(block1, Epu8.right_cycle()); } } @@ -292,7 +292,7 @@ inline uint64_t BMat8::row_space_size_bitset() const noexcept { inline uint64_t BMat8::row_space_size_incl1() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); - epu8 block = epu8id; + epu8 block = Epu8.id(); uint64_t res = 0; for (size_t r = 0; r < 16; r++) { epu8 orincl{}; @@ -308,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl1() const noexcept { inline uint64_t BMat8::row_space_size_incl() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); - epu8 block = epu8id; + epu8 block = Epu8.id(); uint64_t res = 0; for (size_t r = 0; r < 16; r++) { epu8 orincl = ((in | block) == block) & in; @@ -466,11 +466,11 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { } inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept { - epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev); - epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev); + epu8 x = permuted(simde_mm_set_epi64x(_data, 0), Epu8.rev()); + epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), Epu8.rev()); // Vector ternary operator is not supported by clang. - // return (x != (epu8 {})) ? permutation_of(y, x) : epu8id; - return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8{}); + // return (x != (epu8 {})) ? permutation_of(y, x) : Epu8.id(); + return simde_mm_blendv_epi8(Epu8.id(), permutation_of(y, x), x != epu8{}); } // Not noexcept because std::ostream::operator<< isn't diff --git a/include/hpcombi/builder.hpp b/include/hpcombi/builder.hpp new file mode 100644 index 00000000..283b2785 --- /dev/null +++ b/include/hpcombi/builder.hpp @@ -0,0 +1,115 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HPCOMBI_BUILDER_HPP_INCLUDED +#define HPCOMBI_BUILDER_HPP_INCLUDED + +namespace HPCombi { + +/** Class for factory object associated to a SIMD packed unsigned integers. + * @details + * The main purpose of this class is to be able to construct in a \c constexpr + * way various instances of the \c TPU SIMD vector type. The behavior of + * an instance of \c TPUBuild is designed to mimic the behavior of \c TPU + * if it where a class: + * - calling \c operator() on an instance which acts similarly to a + * class constructor, + * - calling a member function such as #id acts as a static member function. + */ +template struct TPUBuild { + + /// Type of the elements + using type_elem = typename std::remove_reference_t; + + /// Size of the elements + static constexpr size_t size_elem = sizeof(type_elem); + + /// Number of elements + static constexpr size_t size = sizeof(TPU) / size_elem; + + /// Array equivalent type + using array = std::array; + + template + static constexpr TPU make_helper(Fun f, std::index_sequence) { + static_assert(std::is_invocable_v); + return TPU{f(Is)...}; + } + + /// Construct a TPU from an \c std::initializer_list and a default value + inline constexpr TPU operator()(std::initializer_list il, + type_elem def) const { + HPCOMBI_ASSERT(il.size() <= size); + array res; + std::copy(il.begin(), il.end(), res.begin()); + std::fill(res.begin() + il.size(), res.end(), def); + return reinterpret_cast(res); + } + + /// Construct a TPU from a function giving the values at \f$1,2,\dots\f$ + template inline constexpr TPU operator()(Fun f) const { + static_assert(std::is_invocable_v); + return make_helper(f, std::make_index_sequence{}); + } + + /// Construct a constant TPU + inline constexpr TPU operator()(type_elem c) const { + return operator()([c](auto) { return c; }); + } + /// explicit overloading for int constants + inline constexpr TPU operator()(int c) const { + return operator()(type_elem(c)); + } + /// explicit overloading for size_t constants + inline constexpr TPU operator()(size_t c) const { + return operator()(type_elem(c)); + } + + /// Return the identity element of type \c TPU + constexpr TPU id() const { return operator()([](type_elem i) { return i; }); } + /// Return the reversed element of type \c TPU + constexpr TPU rev() const { + return (*this)([](type_elem i) { return size - 1 - i; }); + } + /// Left cycle \c TPU permutation + constexpr TPU left_cycle() const { + return (*this)([](type_elem i) { return (i + size - 1) % size; }); + } + /// Right cycle \c TPU permutation + constexpr TPU right_cycle() const { + return (*this)([](type_elem i) { return (i + 1) % size; }); + } + /// Left shift \c TPU, duplicating the rightmost entry + constexpr TPU left_dup() const { + return (*this)([](type_elem i) { return i == 15 ? 15 : i + 1; }); + } + /// Right shift \c TPU, duplicating the leftmost entry + constexpr TPU right_dup() const { + return (*this)([](type_elem i) { return i == 0 ? 0 : i - 1; }); + } + /// Popcount \c TPU: the ith entry contains the number of bits set in i + constexpr TPU popcount() const { + return (*this)([](type_elem i) { + return (((i & 0x01) != 0 ? 1 : 0) + ((i & 0x02) != 0 ? 1 : 0) + + ((i & 0x04) != 0 ? 1 : 0) + ((i & 0x08) != 0 ? 1 : 0) + + ((i & 0x10) != 0 ? 1 : 0) + ((i & 0x20) != 0 ? 1 : 0) + + ((i & 0x40) != 0 ? 1 : 0) + ((i & 0x80) != 0 ? 1 : 0)); + }); + } +}; + +} // namespace HPCombi + +#endif // HPCOMBI_BUILDER_HPP_INCLUDED diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index c40f7f3d..18a45f44 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -26,6 +26,7 @@ #include // for make_index_sequence, ind... #include "debug.hpp" // for HPCOMBI_ASSERT +#include "builder.hpp" // for TPUBuild #include "vect_generic.hpp" // for VectGeneric #include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... @@ -49,73 +50,12 @@ static_assert(alignof(epu8) == 16, /// Currently not really used except in experiments using xpu8 = uint8_t __attribute__((vector_size(32))); -namespace detail { // Implementation detail code - -/// Factory object for various SIMD constants in particular constexpr -template struct TPUBuild { - // Type for Packed Unsigned integer (TPU) - using type_elem = typename std::remove_reference_t; - static constexpr size_t size_elem = sizeof(type_elem); - static constexpr size_t size = sizeof(TPU) / size_elem; - - using array = std::array; - - template - static constexpr TPU make_helper(Fun f, std::index_sequence) { - static_assert(std::is_invocable_v); - return TPU{f(Is)...}; - } - - inline TPU operator()(std::initializer_list il, - type_elem def) const { - HPCOMBI_ASSERT(il.size() <= size); - array res; - std::copy(il.begin(), il.end(), res.begin()); - std::fill(res.begin() + il.size(), res.end(), def); - return reinterpret_cast(res); - } - - template inline constexpr TPU operator()(Fun f) const { - static_assert(std::is_invocable_v); - return make_helper(f, std::make_index_sequence{}); - } - - inline constexpr TPU operator()(type_elem c) const { - return make_helper([c](auto) { return c; }, - std::make_index_sequence{}); - } - // explicit overloading for int constants - inline constexpr TPU operator()(int c) const { - return operator()(type_elem(c)); - } - inline constexpr TPU operator()(size_t c) const { - return operator()(type_elem(c)); - } -}; - -} // namespace detail - -// Single instance of the TPUBuild factory object -static constexpr detail::TPUBuild Epu8; - -/// The identity #HPCombi::epu8 -/// The image of i by the identity function -constexpr epu8 epu8id = Epu8([](uint8_t i) { return i; }); -/// The reverted identity #HPCombi::epu8 -constexpr epu8 epu8rev = Epu8([](uint8_t i) { return 15 - i; }); -/// Left cycle #HPCombi::epu8 permutation -constexpr epu8 left_cycle = Epu8([](uint8_t i) { return (i + 15) % 16; }); -/// Right cycle #HPCombi::epu8 permutation -constexpr epu8 right_cycle = Epu8([](uint8_t i) { return (i + 1) % 16; }); -/// Left shift #HPCombi::epu8, duplicating the rightmost entry -constexpr epu8 left_dup = Epu8([](uint8_t i) { return i == 15 ? 15 : i + 1; }); -/// Right shift #HPCombi::epu8, duplicating the leftmost entry -constexpr epu8 right_dup = Epu8([](uint8_t i) { return i == 0 ? 0 : i - 1; }); -/// Popcount #HPCombi::epu8: the ith entry contains the number of bits set in i -constexpr epu8 popcount4 = Epu8([](uint8_t i) { - return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) + - ((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0); -}); + +/** Factory object acting as a class constructor for type #HPCombi::epu8. + * see #HPCombi::TPUBuild for usage and capability + */ +constexpr TPUBuild Epu8 {}; + /** Cast a #HPCombi::epu8 to a c++ \c std::array * @@ -189,7 +129,7 @@ inline epu8 shifted_right(epu8 a) noexcept { */ inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ -inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); } +inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); } /** Vector min between two #HPCombi::epu8 0 */ inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); } diff --git a/include/hpcombi/epu8_impl.hpp b/include/hpcombi/epu8_impl.hpp index b6da4688..eeb5da64 100644 --- a/include/hpcombi/epu8_impl.hpp +++ b/include/hpcombi/epu8_impl.hpp @@ -59,11 +59,11 @@ inline epu8 permuted_ref(epu8 a, epu8 b) noexcept { // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { - uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + uint64_t res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound))); return res == 0 ? 16 : (__builtin_ffsll(res) - 1); } inline uint64_t last_mask(epu8 msk, size_t bound) { - auto res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + auto res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound))); return res == 0 ? 16 : (63 - __builtin_clzll(res)); } @@ -128,7 +128,7 @@ template inline epu8 network_sort(epu8 res, std::array rounds) { for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increasing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round; epu8 b = permuted(res, round); // res = mask ? min(res,b) : max(res,b); is not accepted by clang res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask); @@ -139,10 +139,10 @@ inline epu8 network_sort(epu8 res, std::array rounds) { /// Apply a sorting network in place and return the permutation template inline epu8 network_sort_perm(epu8 &v, std::array rounds) { - epu8 res = epu8id; + epu8 res = Epu8.id(); for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increasing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round; epu8 b = permuted(v, round); epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask); v = simde_mm_blendv_epi8(v, b, cmp); @@ -231,7 +231,7 @@ inline void merge_rev(epu8 &a, epu8 &b) noexcept { b = network_sort(b, merge_rounds); } inline void merge(epu8 &a, epu8 &b) noexcept { - a = permuted(a, epu8rev); + a = permuted(a, Epu8.rev()); merge_rev(a, b); } // TODO : AVX2 version. @@ -453,10 +453,10 @@ inline epu8 eval16_gen(epu8 v) noexcept { return from_array(as_VectGeneric(v).eval().v); } inline epu8 eval16_cycle(epu8 v) noexcept { - epu8 res = -(epu8id == v); + epu8 res = -(Epu8.id() == v); for (int i = 1; i < 16; i++) { - v = permuted(v, left_cycle); - res -= (epu8id == v); + v = permuted(v, Epu8.left_cycle()); + res -= (Epu8.id() == v); } return res; } @@ -470,11 +470,12 @@ inline epu8 eval16_popcount(epu8 v) noexcept { } inline epu8 popcount16(epu8 v) noexcept { - return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4); + return (permuted(Epu8.popcount(), v & Epu8(0x0f)) + + permuted(Epu8.popcount(), v >> 4)); } inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) && @@ -482,13 +483,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { } inline bool is_transformation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 // (v = Perm16::one() or last diff index < 16) @@ -499,22 +500,22 @@ inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and // (v = Perm16::one() or last diff index < 16) - return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 && - _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 && + return _mm_cmpestri(Epu8.id(), 16, v, 16, FIRST_NON_ZERO) == 16 && + _mm_cmpestri(v, 16, Epu8.id(), 16, FIRST_NON_ZERO) == 16 && (diff == 16 || diff < k); } #endif inline bool is_permutation_sort(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); - return equal(sorted(v), epu8id) && (diff == 16 || diff < k); + uint64_t diff = last_diff(v, Epu8.id(), 16); + return equal(sorted(v), Epu8.id()) && (diff == 16 || diff < k); } inline bool is_permutation_eval(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); return equal(eval16(v), Epu8({}, 1)) && (diff == 16 || diff < k); } diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index d6a02376..bd70fdb9 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -60,7 +60,7 @@ struct alignas(16) PTransf16 : public Vect16 { } //! The identity partial transformation. - static constexpr PTransf16 one() { return epu8id; } + static constexpr PTransf16 one() { return Epu8.id(); } //! The product of two partial transformations. PTransf16 operator*(const PTransf16 &p) const { return HPCombi::permuted(v, p.v) | (p.v == Epu8(0xFF)); @@ -129,7 +129,7 @@ struct Transf16 : public PTransf16 { } //! The identity transformation. - static constexpr Transf16 one() { return epu8id; } + static constexpr Transf16 one() { return Epu8.id(); } //! The product of two transformations. Transf16 operator*(const Transf16 &p) const { return HPCombi::permuted(v, p.v); @@ -159,7 +159,7 @@ struct PPerm16 : public PTransf16 { } //! The identity partial permutations. - static constexpr PPerm16 one() { return epu8id; } + static constexpr PPerm16 one() { return Epu8.id(); } //! The product of two partial perrmutations. PPerm16 operator*(const PPerm16 &p) const { return this->PTransf16::operator*(p); @@ -216,7 +216,7 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { // being defined (see https://stackoverflow.com/questions/11928089/) // therefore we chose to have functions. //! The identity partial permutation. - static constexpr Perm16 one() { return epu8id; } + static constexpr Perm16 one() { return Epu8.id(); } //! The product of two permutations Perm16 operator*(const Perm16 &p) const { return HPCombi::permuted(v, p.v); diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index 3e3f96ee..368425ee 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -20,7 +20,7 @@ namespace HPCombi { /////////////////////////////////////////////////////////////////////////////// inline PTransf16::PTransf16(std::initializer_list il) - : Vect16(epu8id) { + : Vect16(Epu8.id()) { HPCOMBI_ASSERT(il.size() <= 16); std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin()); } @@ -43,7 +43,7 @@ inline uint32_t PTransf16::domain_bitset(bool complement) const { return simde_mm_movemask_epi8(domain_mask(complement)); } inline PTransf16 PTransf16::right_one() const { - return domain_mask(true) | epu8id; + return domain_mask(true) | Epu8.id(); } #ifdef SIMDE_X86_SSE4_2_NATIVE @@ -64,7 +64,7 @@ inline uint32_t PTransf16::image_bitset(bool complement) const { return simde_mm_movemask_epi8(image_mask(complement)); } inline PTransf16 PTransf16::left_one() const { - return image_mask(true) | epu8id; + return image_mask(true) | Epu8.id(); } inline uint32_t PTransf16::rank_ref() const { decltype(Epu8)::array tmp{}; @@ -349,7 +349,7 @@ inline epu8 Perm16::cycles_partition() const { } inline uint8_t Perm16::nb_cycles_unroll() const { - epu8 res = (epu8id == cycles_partition()); + epu8 res = (Epu8.id() == cycles_partition()); return __builtin_popcountl(simde_mm_movemask_epi8(res)); } diff --git a/tests/test_epu8.cpp b/tests/test_epu8.cpp index cd3ab5b7..ac689cc7 100644 --- a/tests/test_epu8.cpp +++ b/tests/test_epu8.cpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2016-2018 Florent Hivert , // +// Copyright (C) 2016-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -40,8 +40,8 @@ struct Fix { Pw(epu8{5, 5, 2, 9, 1, 6, 12, 4, 0, 4, 4, 4, 12, 13, 14, 15}), P5(Epu8({}, 5)), Pc(Epu8({23, 5, 21, 5, 43, 36}, 7)), // Elements should be sorted in alphabetic order here - v({zero, P01, epu8id, P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, - Pv, Pw, P5, epu8rev, Pc}), + v({zero, P01, Epu8.id(), P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, + Pv, Pw, P5, Epu8.rev(), Pc}), av({{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15}}) {} ~Fix() = default; @@ -222,7 +222,7 @@ TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][014]") { } TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][015]") { - CHECK_THAT(reverted(epu8id), Equals(epu8rev)); + CHECK_THAT(reverted(Epu8.id()), Equals(Epu8.rev())); for (auto x : v) { CHECK_THAT(x, Equals(reverted(reverted(x)))); } @@ -246,7 +246,7 @@ TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][017]") { } TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { - CHECK(is_sorted(epu8id)); + CHECK(is_sorted(Epu8.id())); CHECK( is_sorted(epu8{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); CHECK(is_sorted(Epu8({0, 1}, 2))); @@ -258,7 +258,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { CHECK(!is_sorted(Epu8({0, 0, 2}, 1))); CHECK(!is_sorted(Epu8({6}, 5))); - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK(is_sorted(x)); auto &refx = as_array(x); #ifndef __clang__ @@ -268,7 +268,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { while (std::next_permutation(refx.begin(), refx.begin() + 9)) { CHECK(!is_sorted(x)); } - x = epu8id; + x = Epu8.id(); while (std::next_permutation(refx.begin() + 8, refx.begin() + 16)) { CHECK(!is_sorted(x)); } @@ -285,11 +285,11 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { CHECK_THAT( sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - Equals(epu8id)); + Equals(Epu8.id())); for (auto &x : v) { CHECK_THAT(sorted(x), IsSorted); } - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK_THAT(sorted(x), IsSorted); auto &refx = as_array(x); do { @@ -307,11 +307,11 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][020]") { CHECK_THAT( revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - Equals(epu8rev)); + Equals(Epu8.rev())); for (auto &x : v) { CHECK_THAT(reverted(revsorted(x)), IsSorted); } - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK_THAT(x, IsSorted); auto &refx = as_array(x); do { @@ -360,12 +360,12 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][022]") { } TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][023]") { - CHECK_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); - CHECK_THAT(permutation_of(Pa, Pa), Equals(epu8id)); - CHECK_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); - CHECK_THAT(permutation_of(epu8id, epu8rev), Equals(epu8rev)); - CHECK_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); - CHECK_THAT(permutation_of(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of(Epu8.id(), Epu8.id()), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Pa, Pa), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Epu8.rev(), Epu8.id()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of(Epu8.id(), Epu8.rev()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of(Epu8.rev(), Epu8.rev()), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Epu8.id(), RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), @@ -373,12 +373,12 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][023]") { 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][024]") { - CHECK_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); - CHECK_THAT(permutation_of_ref(epu8id, epu8rev), Equals(epu8rev)); - CHECK_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of_ref(Epu8.id(), Epu8.id()), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Epu8.rev(), Epu8.id()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of_ref(Epu8.id(), Epu8.rev()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of_ref(Epu8.rev(), Epu8.rev()), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Epu8.id(), RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of_ref(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), @@ -434,7 +434,7 @@ TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][026]") { TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][027]") { CHECK(horiz_sum_ref(zero) == 0); CHECK(horiz_sum_ref(P01) == 1); - CHECK(horiz_sum_ref(epu8id) == 120); + CHECK(horiz_sum_ref(Epu8.id()) == 120); CHECK(horiz_sum_ref(P10) == 1); CHECK(horiz_sum_ref(P11) == 2); CHECK(horiz_sum_ref(P1) == 16); @@ -444,7 +444,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][027]") { CHECK(horiz_sum_ref(P51) == 90); CHECK(horiz_sum_ref(Pv) == 110); CHECK(horiz_sum_ref(P5) == 80); - CHECK(horiz_sum_ref(epu8rev) == 120); + CHECK(horiz_sum_ref(Epu8.rev()) == 120); CHECK(horiz_sum_ref(Pc) == 203); } @@ -456,12 +456,12 @@ TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum, v, "[Epu8][031]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][032]") { CHECK_THAT(partial_sums_ref(zero), Equals(zero)); CHECK_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); - CHECK_THAT(partial_sums_ref(epu8id), + CHECK_THAT(partial_sums_ref(Epu8.id()), Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120})); CHECK_THAT(partial_sums_ref(P10), Equals(P1)); CHECK_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); - CHECK_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); + CHECK_THAT(partial_sums_ref(P1), Equals(Epu8.id() + Epu8({}, 1))); CHECK_THAT(partial_sums_ref(P112), Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30})); @@ -481,7 +481,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][032]") { CHECK_THAT(partial_sums_ref(P5), Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80})); - CHECK_THAT(partial_sums_ref(epu8rev), + CHECK_THAT(partial_sums_ref(Epu8.rev()), Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, 117, 119, 120, 120})); CHECK_THAT(partial_sums_ref(Pc), @@ -496,7 +496,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][035]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][036]") { CHECK(horiz_max_ref(zero) == 0); CHECK(horiz_max_ref(P01) == 1); - CHECK(horiz_max_ref(epu8id) == 15); + CHECK(horiz_max_ref(Epu8.id()) == 15); CHECK(horiz_max_ref(P10) == 1); CHECK(horiz_max_ref(P11) == 1); CHECK(horiz_max_ref(P1) == 1); @@ -506,7 +506,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][036]") { CHECK(horiz_max_ref(P51) == 6); CHECK(horiz_max_ref(Pv) == 15); CHECK(horiz_max_ref(P5) == 5); - CHECK(horiz_max_ref(epu8rev) == 15); + CHECK(horiz_max_ref(Epu8.rev()) == 15); CHECK(horiz_max_ref(Pc) == 43); } @@ -518,7 +518,7 @@ TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max, v, "[Epu8][040]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][041]") { CHECK_THAT(partial_max_ref(zero), Equals(zero)); CHECK_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); - CHECK_THAT(partial_max_ref(epu8id), Equals(epu8id)); + CHECK_THAT(partial_max_ref(Epu8.id()), Equals(Epu8.id())); CHECK_THAT(partial_max_ref(P10), Equals(P1)); CHECK_THAT(partial_max_ref(P11), Equals(P1)); CHECK_THAT(partial_max_ref(P1), Equals(P1)); @@ -529,7 +529,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][041]") { CHECK_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15})); CHECK_THAT(partial_max_ref(P5), Equals(P5)); - CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); + CHECK_THAT(partial_max_ref(Epu8.rev()), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][042]") @@ -539,7 +539,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][044]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][045]") { CHECK(horiz_min_ref(zero) == 0); CHECK(horiz_min_ref(P01) == 0); - CHECK(horiz_min_ref(epu8id) == 0); + CHECK(horiz_min_ref(Epu8.id()) == 0); CHECK(horiz_min_ref(P10) == 0); CHECK(horiz_min_ref(P11) == 0); CHECK(horiz_min_ref(P1) == 1); @@ -549,7 +549,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][045]") { CHECK(horiz_min_ref(P51) == 1); CHECK(horiz_min_ref(Pv) == 0); CHECK(horiz_min_ref(P5) == 5); - CHECK(horiz_min_ref(epu8rev) == 0); + CHECK(horiz_min_ref(Epu8.rev()) == 0); CHECK(horiz_min_ref(Pc) == 5); } @@ -561,7 +561,7 @@ TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min, v, "[Epu8][049]") TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][050]") { CHECK_THAT(partial_min_ref(zero), Equals(zero)); CHECK_THAT(partial_min_ref(P01), Equals(zero)); - CHECK_THAT(partial_min_ref(epu8id), Equals(zero)); + CHECK_THAT(partial_min_ref(Epu8.id()), Equals(zero)); CHECK_THAT(partial_min_ref(P10), Equals(P10)); CHECK_THAT(partial_min_ref(P11), Equals(P11)); CHECK_THAT(partial_min_ref(P1), Equals(P1)); @@ -573,7 +573,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][050]") { Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); // clang-format on CHECK_THAT(partial_min_ref(P5), Equals(P5)); - CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); + CHECK_THAT(partial_min_ref(Epu8.rev()), Equals(Epu8.rev())); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][051]") @@ -583,7 +583,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][053]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][054]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); CHECK_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); - CHECK_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(Epu8.id()), Equals(Epu8({}, 1))); CHECK_THAT(eval16_ref(P10), Equals(Epu8({15, 1}, 0))); CHECK_THAT(eval16_ref(P11), Equals(Epu8({14, 2}, 0))); CHECK_THAT(eval16_ref(P1), Equals(Epu8({0, 16}, 0))); @@ -595,7 +595,7 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][054]") { CHECK_THAT(eval16_ref(Pv), Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); CHECK_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); - CHECK_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(Epu8.rev()), Equals(Epu8({}, 1))); CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } @@ -605,8 +605,8 @@ TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_arr, v, "[Epu8][057]") TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_gen, v, "[Epu8][058]") TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][059]") -TEST_CASE("Epu8::popcount4", "[Epu8][060]") { - CHECK_THAT(popcount4, +TEST_CASE("Epu8::popcount", "[Epu8][060]") { + CHECK_THAT(Epu8.popcount(), Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 5e8f5255..d58fd6c6 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2017 Florent Hivert , // +// Copyright (C) 2017-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -24,7 +24,7 @@ const uint8_t FF = 0xff; namespace { std::vector all_perms(uint8_t sz) { std::vector res{}; - epu8 x = HPCombi::epu8id; + epu8 x = HPCombi::Epu8.id(); res.push_back(x); auto &refx = HPCombi::as_array(x); #ifndef __clang__ From 1336a65540c583a0c99bdd941ebc50cc1867636c Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 21:25:37 +0000 Subject: [PATCH 091/113] Fix inclusion guard for epu8.hpp --- include/hpcombi/epu8.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index 18a45f44..5131b622 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2016-2018 Florent Hivert , // +// Copyright (C) 2016-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -13,8 +13,8 @@ // http://www.gnu.org/licenses/ // //////////////////////////////////////////////////////////////////////////////// -#ifndef HPCOMBI_EPU_HPP_INCLUDED -#define HPCOMBI_EPU_HPP_INCLUDED +#ifndef HPCOMBI_EPU8_HPP_INCLUDED +#define HPCOMBI_EPU8_HPP_INCLUDED #include // for array #include // for size_t @@ -668,4 +668,4 @@ inline std::string to_string(HPCombi::epu8 const &a); #include "epu8_impl.hpp" -#endif // HPCOMBI_EPU_HPP_INCLUDED +#endif // HPCOMBI_EPU8_HPP_INCLUDED From cbe29f33d413225de094ab2ed8468b9fcb8490db Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 21:55:48 +0000 Subject: [PATCH 092/113] epu8 conversion is now fully generic --- benchmark/bench_epu8.cpp | 2 +- include/hpcombi/builder.hpp | 52 +++++++++++++++++++++++++++++++++ include/hpcombi/epu8.hpp | 48 ------------------------------ include/hpcombi/epu8_impl.hpp | 4 +-- include/hpcombi/perm16_impl.hpp | 2 +- tests/test_epu8.cpp | 6 ++-- 6 files changed, 59 insertions(+), 55 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 74abb2ab..2cd8c687 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -99,7 +99,7 @@ inline epu8 std_sort(epu8 &p) { inline epu8 arr_sort(epu8 &p) { auto &ar = as_array(p); - return from_array(sorted_vect(ar)); + return Epu8(sorted_vect(ar)); } inline epu8 gen_sort(epu8 p) { diff --git a/include/hpcombi/builder.hpp b/include/hpcombi/builder.hpp index 283b2785..a8c3ecd2 100644 --- a/include/hpcombi/builder.hpp +++ b/include/hpcombi/builder.hpp @@ -16,6 +16,14 @@ #ifndef HPCOMBI_BUILDER_HPP_INCLUDED #define HPCOMBI_BUILDER_HPP_INCLUDED +#include // for array +#include // for size_t +#include // for initializer_list +#include // for remove_reference_t +#include // for make_index_sequence, ind... + +#include "vect_generic.hpp" // for VectGeneric + namespace HPCombi { /** Class for factory object associated to a SIMD packed unsigned integers. @@ -77,6 +85,14 @@ template struct TPUBuild { return operator()(type_elem(c)); } + /// explicit overloading for #array + // Passing the argument by reference used to trigger a segfault in gcc + // Since vector types doesn't belongs to the standard, I didn't manage + // to know if I'm using undefined behavior here. + inline constexpr TPU operator()(array a) const { + return reinterpret_cast(a); + } + /// Return the identity element of type \c TPU constexpr TPU id() const { return operator()([](type_elem i) { return i; }); } /// Return the reversed element of type \c TPU @@ -110,6 +126,42 @@ template struct TPUBuild { } }; +/** Cast a TPU to a c++ \c std::array + * + * This is usually faster for algorithm using a lot of indexed access. + */ +template +inline typename TPUBuild::array &as_array(TPU &v) noexcept { + return reinterpret_cast::array &>(v); +} +/** Cast a constant TPU to a constant c++ \c std::array + * + * This is usually faster for algorithm using a lot of indexed access. + */ +template +inline const typename TPUBuild::array &as_array(const TPU &v) noexcept { + return reinterpret_cast::array &>(v); +} + +/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric + * + * This is usually faster for algorithm using a lot of indexed access. + */ +template +inline VectGeneric::size> &as_VectGeneric(TPU &v) { + return reinterpret_cast::size> &>(as_array(v)); +} + +/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric + * + * This is usually faster for algorithm using a lot of indexed access. + */ +template +inline const VectGeneric::size> &as_VectGeneric(const TPU &v) { + return reinterpret_cast::size> &>( + as_array(v)); +} + } // namespace HPCombi #endif // HPCOMBI_BUILDER_HPP_INCLUDED diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index 5131b622..b2eadca0 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -19,11 +19,8 @@ #include // for array #include // for size_t #include // for uint8_t, uint64_t, int8_t -#include // for initializer_list #include // for ostream #include // for string -#include // for remove_reference_t -#include // for make_index_sequence, ind... #include "debug.hpp" // for HPCOMBI_ASSERT #include "builder.hpp" // for TPUBuild @@ -46,10 +43,6 @@ using epu8 = uint8_t __attribute__((vector_size(16))); static_assert(alignof(epu8) == 16, "epu8 type is not properly aligned by the compiler !"); -/// SIMD vector of 32 unsigned bytes -/// Currently not really used except in experiments -using xpu8 = uint8_t __attribute__((vector_size(32))); - /** Factory object acting as a class constructor for type #HPCombi::epu8. * see #HPCombi::TPUBuild for usage and capability @@ -57,47 +50,6 @@ using xpu8 = uint8_t __attribute__((vector_size(32))); constexpr TPUBuild Epu8 {}; -/** Cast a #HPCombi::epu8 to a c++ \c std::array - * - * This is usually faster for algorithm using a lot of indexed access. - */ -inline decltype(Epu8)::array &as_array(epu8 &v) noexcept { - return reinterpret_cast(v); -} -/** Cast a constant #HPCombi::epu8 to a C++ \c std::array - * - * This is usually faster for algorithm using a lot of indexed access. - */ -inline const decltype(Epu8)::array &as_array(const epu8 &v) noexcept { - return reinterpret_cast(v); -} -/** Cast a C++ \c std::array to a #HPCombi::epu8 */ -// Passing the argument by reference triggers a segfault in gcc -// Since vector types doesn't belongs to the standard, I didn't manage -// to know if I'm using undefined behavior here. -inline epu8 from_array(decltype(Epu8)::array a) noexcept { - return reinterpret_cast(a); -} - -/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric - * - * This is usually faster for algorithm using a lot of indexed access. - */ -inline VectGeneric<16> &as_VectGeneric(epu8 &v) { - return reinterpret_cast &>(as_array(v)); -} - -/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric - * - * This is usually faster for algorithm using a lot of indexed access. - */ -inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { - return reinterpret_cast &>(as_array(v)); -} - -// TODO up to this point in this file, everything could be generic to support -// larger perms, such as Perm32 in the experiments dir. - /** Test whether all the entries of a #HPCombi::epu8 are zero */ inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ diff --git a/include/hpcombi/epu8_impl.hpp b/include/hpcombi/epu8_impl.hpp index eeb5da64..bd0d0731 100644 --- a/include/hpcombi/epu8_impl.hpp +++ b/include/hpcombi/epu8_impl.hpp @@ -447,10 +447,10 @@ inline epu8 eval16_arr(epu8 v8) noexcept { for (size_t i = 0; i < 16; i++) if (v[i] < 16) res[v[i]]++; - return from_array(res); + return Epu8(res); } inline epu8 eval16_gen(epu8 v) noexcept { - return from_array(as_VectGeneric(v).eval().v); + return Epu8(as_VectGeneric(v).eval().v); } inline epu8 eval16_cycle(epu8 v) noexcept { epu8 res = -(Epu8.id() == v); diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index 368425ee..3f0d7373 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -278,7 +278,7 @@ inline epu8 Perm16::lehmer_arr() const { for (size_t j = i + 1; j < 16; j++) if (ar[i] > ar[j]) res[i]++; - return from_array(res); + return Epu8(res); } inline epu8 Perm16::lehmer() const { diff --git a/tests/test_epu8.cpp b/tests/test_epu8.cpp index ac689cc7..f85870fd 100644 --- a/tests/test_epu8.cpp +++ b/tests/test_epu8.cpp @@ -238,11 +238,11 @@ TEST_CASE_METHOD(Fix, "Epu8::as_array", "[Epu8][016]") { CHECK(av == as_array(Pv)); } -TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][017]") { +TEST_CASE_METHOD(Fix, "Epu8(array)", "[Epu8][017]") { for (auto x : v) { - CHECK_THAT(x, Equals(from_array(as_array(x)))); + CHECK_THAT(x, Equals(Epu8(as_array(x)))); } - CHECK_THAT(Pv, Equals(from_array(av))); + CHECK_THAT(Pv, Equals(Epu8(av))); } TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { From 815acef91b4d6c1305176a3e13a5d7e4150af1e1 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 8 Nov 2023 15:41:28 +0000 Subject: [PATCH 093/113] Check for required compiler flags and builtins in topmost CMakLists.txt --- CMakeLists.txt | 42 ++++++++++++++++++++++++++++++++---------- list_builtin.txt | 9 +++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) create mode 100644 list_builtin.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 043cc4a1..4c5b0469 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,20 +48,42 @@ include(CheckCXXSymbolExists) include(CheckIncludeFileCXX) include(CheckCXXSourceCompiles) include(CheckCXXCompilerFlag) +include(CheckCXXSourceRuns) -check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_FLAG_ARCH_NATIVE) -check_cxx_compiler_flag('-mtune=native' HPCOMBI_HAVE_FLAG_TUNE_NATIVE) -# TODO check for -funroll-loops + -flax-vector-constexpr -# TODO only check for and set the flags required to make HPCombi work +check_cxx_compiler_flag('-funroll-loops' HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) +check_cxx_compiler_flag('-flax-vector-conversions' HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) -add_compile_options(-funroll-loops -flax-vector-conversions) -if (HPCOMBI_HAVE_FLAG_ARCH_NATIVE) - add_compile_options(-march=native) -endif() -if (HPCOMBI_HAVE_FLAG_TUNE_NATIVE) - add_compile_options(-mtune=native) +if (NOT HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) + message(FATAL_ERROR "HPCombi requires a C++ compiler accepting the flags: -funroll-loops + -flax-vector-conversions") endif() +add_compile_options(-funroll-loops -flax-vector-conversions) + +file(READ ${CMAKE_SOURCE_DIR}/list_builtin.txt hpcombi_compiler_builtins) +string(REPLACE ";" "|" hpcombi_compiler_builtins "${hpcombi_compiler_builtins}") +string(REPLACE "\n" ";" hpcombi_compiler_builtins "${hpcombi_compiler_builtins}") +foreach (builtin ${hpcombi_compiler_builtins}) + if ("${builtin}" MATCHES "^#" ) # Comment + continue() + endif() + string(REPLACE "|" ";" builtin "${builtin}") + list(GET builtin 0 builtin_name) + list(GET builtin 1 builtin_params) + list(GET builtin 2 builtin_return_value) + check_cxx_source_runs(" + #include + int main() { + std::cout << \" from list_builtin.txt: \" << ${builtin_params} << \";\" << ${builtin_return_value} << std::endl; + std::cout << \"Actual return value: \" << ${builtin_name}(${builtin_params}) << std::endl; + return ${builtin_name}(${builtin_params}) != ${builtin_return_value}; + } + " + "HPCOMBI_HAVE_${builtin_name}" + ) + if (NOT "${HPCOMBI_HAVE_${builtin_name}}") + message(FATAL_ERROR "HPCombi requires a C++ compiler supporting ${builtin_name}") + endif() +endforeach() ################### # Project Structure diff --git a/list_builtin.txt b/list_builtin.txt new file mode 100644 index 00000000..7bddfcaf --- /dev/null +++ b/list_builtin.txt @@ -0,0 +1,9 @@ +# List of intrisics used in HPCombi +# Format: ;; +# line starting with # are comments +__builtin_ffs;0;0 +__builtin_ffsll;0;0 +__builtin_clz;32;26 +__builtin_clzll;37;58 +__builtin_popcountl;3;2 +__builtin_popcountll;0x300000003;4 From 1dd240b34efa7540debe0eb565e8aa86b65447f1 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 8 Nov 2023 17:31:09 +0000 Subject: [PATCH 094/113] Fix spelling --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c5b0469..68963033 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ foreach (builtin ${hpcombi_compiler_builtins}) check_cxx_source_runs(" #include int main() { - std::cout << \" from list_builtin.txt: \" << ${builtin_params} << \";\" << ${builtin_return_value} << std::endl; + std::cout << \" from list_builtin.txt: \" << ${builtin_params} << \";\" << ${builtin_return_value} << std::endl; std::cout << \"Actual return value: \" << ${builtin_name}(${builtin_params}) << std::endl; return ${builtin_name}(${builtin_params}) != ${builtin_return_value}; } From 7cf15be1b61298f0e3821cecdb68e34bfc88ba61 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 8 Nov 2023 17:26:14 +0000 Subject: [PATCH 095/113] Fixed installation --- .VERSION | 1 - CMakeLists.txt | 33 ++++++++++++++++++--------------- .VERSION.in => VERSION.in | 0 doc/CMakeLists.txt | 4 ++-- 4 files changed, 20 insertions(+), 18 deletions(-) delete mode 100644 .VERSION rename .VERSION.in => VERSION.in (100%) diff --git a/.VERSION b/.VERSION deleted file mode 100644 index d169b2f2..00000000 --- a/.VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.8 diff --git a/CMakeLists.txt b/CMakeLists.txt index 68963033..95ffe601 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -105,34 +105,37 @@ if (BUILD_TESTING) add_subdirectory(benchmark) endif(BUILD_TESTING) -##################### -configure_file(${CMAKE_SOURCE_DIR}/.VERSION.in ${CMAKE_BINARY_DIR}/.VERSION) +################### +# pkgconfig stuff +configure_file(${CMAKE_SOURCE_DIR}/VERSION.in + ${CMAKE_BINARY_DIR}/VERSION) +configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hpcombi.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc @ONLY) #################### # Installation -set(install_misc README.md LICENSE list_intrin.txt) +set(install_misc README.md LICENSE) foreach(f ${install_misc}) install (FILES ${CMAKE_SOURCE_DIR}/${f} - DESTINATION share/${CMAKE_PROJECT_NAME}) + DESTINATION share/${CMAKE_PROJECT_NAME}) endforeach(f) -install (FILES ${CMAKE_CURRENT_BINARY_DIR}/.VERSION DESTINATION ".") - install ( - DIRECTORY ${CMAKE_SOURCE_DIR}/include/ - DESTINATION include/${CMAKE_PROJECT_NAME} - FILES_MATCHING PATTERN "*.hpp") + FILES ${CMAKE_CURRENT_BINARY_DIR}/VERSION + DESTINATION share/${CMAKE_PROJECT_NAME}) install ( - DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/ - DESTINATION third_party/${CMAKE_PROJECT_NAME} + DIRECTORY ${CMAKE_SOURCE_DIR}/include/hpcombi + DESTINATION include FILES_MATCHING PATTERN "*.hpp") +install ( + DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/simde + DESTINATION include/${CMAKE_PROJECT_NAME}) -################### -# pkgconfig stuff -# configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hpcombi.pc.in -# ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc @ONLY) +install ( + FILES ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc + DESTINATION pkg-config) ################# # Packing stuff diff --git a/.VERSION.in b/VERSION.in similarity index 100% rename from .VERSION.in rename to VERSION.in diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 8049284d..b48e8279 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -28,7 +28,7 @@ else() COMMAND ${CMAKE_COMMAND} -E echo "Done." ) - install(DIRECTORY ${CMAKE_BINARY_DIR}/doc/html - DESTINATION doc/${CMAKE_PROJECT_NAME}) +# install(DIRECTORY ${CMAKE_BINARY_DIR}/doc/html +# DESTINATION doc/${CMAKE_PROJECT_NAME}) endif() From f40d0b736a087b5757fef42f3451706cd3cc379e Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 8 Nov 2023 17:41:24 +0000 Subject: [PATCH 096/113] Merge --- CMakeLists.txt | 7 + CodeCoverage.cmake | 746 +++++++++++++++++++++++++++++++++++++++++++ clang-format | Bin 0 -> 3144809 bytes tests/CMakeLists.txt | 3 +- 4 files changed, 755 insertions(+), 1 deletion(-) create mode 100644 CodeCoverage.cmake create mode 100755 clang-format diff --git a/CMakeLists.txt b/CMakeLists.txt index 95ffe601..dedeade5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,13 @@ if (NOT HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) message(FATAL_ERROR "HPCombi requires a C++ compiler accepting the flags: -funroll-loops + -flax-vector-conversions") endif() +# Code coverage stuff +include(CheckCCompilerFlag) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}) +include(CodeCoverage) +# Code coverage stuff ends + + add_compile_options(-funroll-loops -flax-vector-conversions) file(READ ${CMAKE_SOURCE_DIR}/list_builtin.txt hpcombi_compiler_builtins) diff --git a/CodeCoverage.cmake b/CodeCoverage.cmake new file mode 100644 index 00000000..097ae77e --- /dev/null +++ b/CodeCoverage.cmake @@ -0,0 +1,746 @@ +# Copyright (c) 2012 - 2017, Lars Bilke +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CHANGES: +# +# 2012-01-31, Lars Bilke +# - Enable Code Coverage +# +# 2013-09-17, Joakim Söderberg +# - Added support for Clang. +# - Some additional usage instructions. +# +# 2016-02-03, Lars Bilke +# - Refactored functions to use named parameters +# +# 2017-06-02, Lars Bilke +# - Merged with modified version from github.com/ufz/ogs +# +# 2019-05-06, Anatolii Kurotych +# - Remove unnecessary --coverage flag +# +# 2019-12-13, FeRD (Frank Dana) +# - Deprecate COVERAGE_LCOVR_EXCLUDES and COVERAGE_GCOVR_EXCLUDES lists in favor +# of tool-agnostic COVERAGE_EXCLUDES variable, or EXCLUDE setup arguments. +# - CMake 3.4+: All excludes can be specified relative to BASE_DIRECTORY +# - All setup functions: accept BASE_DIRECTORY, EXCLUDE list +# - Set lcov basedir with -b argument +# - Add automatic --demangle-cpp in lcovr, if 'c++filt' is available (can be +# overridden with NO_DEMANGLE option in setup_target_for_coverage_lcovr().) +# - Delete output dir, .info file on 'make clean' +# - Remove Python detection, since version mismatches will break gcovr +# - Minor cleanup (lowercase function names, update examples...) +# +# 2019-12-19, FeRD (Frank Dana) +# - Rename Lcov outputs, make filtered file canonical, fix cleanup for targets +# +# 2020-01-19, Bob Apthorpe +# - Added gfortran support +# +# 2020-02-17, FeRD (Frank Dana) +# - Make all add_custom_target()s VERBATIM to auto-escape wildcard characters +# in EXCLUDEs, and remove manual escaping from gcovr targets +# +# 2021-01-19, Robin Mueller +# - Add CODE_COVERAGE_VERBOSE option which will allow to print out commands which are run +# - Added the option for users to set the GCOVR_ADDITIONAL_ARGS variable to supply additional +# flags to the gcovr command +# +# 2020-05-04, Mihchael Davis +# - Add -fprofile-abs-path to make gcno files contain absolute paths +# - Fix BASE_DIRECTORY not working when defined +# - Change BYPRODUCT from folder to index.html to stop ninja from complaining about double defines +# +# 2021-05-10, Martin Stump +# - Check if the generator is multi-config before warning about non-Debug builds +# +# 2022-02-22, Marko Wehle +# - Change gcovr output from -o for --xml and --html output respectively. +# This will allow for Multiple Output Formats at the same time by making use of GCOVR_ADDITIONAL_ARGS, e.g. GCOVR_ADDITIONAL_ARGS "--txt". +# +# 2022-09-28, Sebastian Mueller +# - fix append_coverage_compiler_flags_to_target to correctly add flags +# - replace "-fprofile-arcs -ftest-coverage" with "--coverage" (equivalent) +# +# USAGE: +# +# 1. Copy this file into your cmake modules path. +# +# 2. Add the following line to your CMakeLists.txt (best inside an if-condition +# using a CMake option() to enable it just optionally): +# include(CodeCoverage) +# +# 3. Append necessary compiler flags for all supported source files: +# append_coverage_compiler_flags() +# Or for specific target: +# append_coverage_compiler_flags_to_target(YOUR_TARGET_NAME) +# +# 3.a (OPTIONAL) Set appropriate optimization flags, e.g. -O0, -O1 or -Og +# +# 4. If you need to exclude additional directories from the report, specify them +# using full paths in the COVERAGE_EXCLUDES variable before calling +# setup_target_for_coverage_*(). +# Example: +# set(COVERAGE_EXCLUDES +# '${PROJECT_SOURCE_DIR}/src/dir1/*' +# '/path/to/my/src/dir2/*') +# Or, use the EXCLUDE argument to setup_target_for_coverage_*(). +# Example: +# setup_target_for_coverage_lcov( +# NAME coverage +# EXECUTABLE testrunner +# EXCLUDE "${PROJECT_SOURCE_DIR}/src/dir1/*" "/path/to/my/src/dir2/*") +# +# 4.a NOTE: With CMake 3.4+, COVERAGE_EXCLUDES or EXCLUDE can also be set +# relative to the BASE_DIRECTORY (default: PROJECT_SOURCE_DIR) +# Example: +# set(COVERAGE_EXCLUDES "dir1/*") +# setup_target_for_coverage_gcovr_html( +# NAME coverage +# EXECUTABLE testrunner +# BASE_DIRECTORY "${PROJECT_SOURCE_DIR}/src" +# EXCLUDE "dir2/*") +# +# 5. Use the functions described below to create a custom make target which +# runs your test executable and produces a code coverage report. +# +# 6. Build a Debug build: +# cmake -DCMAKE_BUILD_TYPE=Debug .. +# make +# make my_coverage_target +# + +include(CMakeParseArguments) + +option(CODE_COVERAGE_VERBOSE "Verbose information" FALSE) + +# Check prereqs +find_program( GCOV_PATH gcov ) +find_program( LCOV_PATH NAMES lcov lcov.bat lcov.exe lcov.perl) +find_program( FASTCOV_PATH NAMES fastcov fastcov.py ) +find_program( GENHTML_PATH NAMES genhtml genhtml.perl genhtml.bat ) +find_program( GCOVR_PATH gcovr PATHS ${CMAKE_SOURCE_DIR}/scripts/test) +find_program( CPPFILT_PATH NAMES c++filt ) + +if(NOT GCOV_PATH) + message(FATAL_ERROR "gcov not found! Aborting...") +endif() # NOT GCOV_PATH + +# Check supported compiler (Clang, GNU and Flang) +get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(LANG ${LANGUAGES}) + if("${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang") + if("${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS 3) + message(FATAL_ERROR "Clang version must be 3.0.0 or greater! Aborting...") + endif() + elseif(NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "GNU" + AND NOT "${CMAKE_${LANG}_COMPILER_ID}" MATCHES "(LLVM)?[Ff]lang") + message(FATAL_ERROR "Compiler is not GNU or Flang! Aborting...") + endif() +endforeach() + +set(COVERAGE_COMPILER_FLAGS "-g --coverage" + CACHE INTERNAL "") +if(CMAKE_CXX_COMPILER_ID MATCHES "(GNU|Clang)") + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag(-fprofile-abs-path HAVE_cxx_fprofile_abs_path) + if(HAVE_cxx_fprofile_abs_path) + set(COVERAGE_CXX_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") + endif() + check_c_compiler_flag(-fprofile-abs-path HAVE_c_fprofile_abs_path) + if(HAVE_c_fprofile_abs_path) + set(COVERAGE_C_COMPILER_FLAGS "${COVERAGE_COMPILER_FLAGS} -fprofile-abs-path") + endif() +endif() + +set(CMAKE_Fortran_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the Fortran compiler during coverage builds." + FORCE ) +set(CMAKE_CXX_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C++ compiler during coverage builds." + FORCE ) +set(CMAKE_C_FLAGS_COVERAGE + ${COVERAGE_COMPILER_FLAGS} + CACHE STRING "Flags used by the C compiler during coverage builds." + FORCE ) +set(CMAKE_EXE_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used for linking binaries during coverage builds." + FORCE ) +set(CMAKE_SHARED_LINKER_FLAGS_COVERAGE + "" + CACHE STRING "Flags used by the shared libraries linker during coverage builds." + FORCE ) +mark_as_advanced( + CMAKE_Fortran_FLAGS_COVERAGE + CMAKE_CXX_FLAGS_COVERAGE + CMAKE_C_FLAGS_COVERAGE + CMAKE_EXE_LINKER_FLAGS_COVERAGE + CMAKE_SHARED_LINKER_FLAGS_COVERAGE ) + +get_property(GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) +if(NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG)) + message(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") +endif() # NOT (CMAKE_BUILD_TYPE STREQUAL "Debug" OR GENERATOR_IS_MULTI_CONFIG) + +if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + link_libraries(gcov) +endif() + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_lcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# ) +function(setup_target_for_coverage_lcov) + + set(options NO_DEMANGLE SONARQUBE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES LCOV_ARGS GENHTML_ARGS) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT LCOV_PATH) + message(FATAL_ERROR "lcov not found! Aborting...") + endif() # NOT LCOV_PATH + + if(NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() # NOT GENHTML_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(LCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_LCOV_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND LCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES LCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Setting up commands which will be run to generate coverage data. + # Cleanup lcov + set(LCOV_CLEAN_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -directory . + -b ${BASEDIR} --zerocounters + ) + # Create baseline to make sure untouched files show up in the report + set(LCOV_BASELINE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -c -i -d . -b + ${BASEDIR} -o ${Coverage_NAME}.base + ) + # Run tests + set(LCOV_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Capturing lcov counters and generating report + set(LCOV_CAPTURE_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --directory . -b + ${BASEDIR} --capture --output-file ${Coverage_NAME}.capture + ) + # add baseline counters + set(LCOV_BASELINE_COUNT_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} -a ${Coverage_NAME}.base + -a ${Coverage_NAME}.capture --output-file ${Coverage_NAME}.total + ) + # filter collected data to final coverage report + set(LCOV_FILTER_CMD + ${LCOV_PATH} ${Coverage_LCOV_ARGS} --gcov-tool ${GCOV_PATH} --remove + ${Coverage_NAME}.total ${LCOV_EXCLUDES} --output-file ${Coverage_NAME}.info + ) + # Generate HTML output + set(LCOV_GEN_HTML_CMD + ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} -o + ${Coverage_NAME} ${Coverage_NAME}.info + ) + if(${Coverage_SONARQUBE}) + # Generate SonarQube output + set(GCOVR_XML_CMD + ${GCOVR_PATH} --sonarqube ${Coverage_NAME}_sonarqube.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + set(GCOVR_XML_CMD_COMMAND + COMMAND ${GCOVR_XML_CMD} + ) + set(GCOVR_XML_CMD_BYPRODUCTS ${Coverage_NAME}_sonarqube.xml) + set(GCOVR_XML_CMD_COMMENT COMMENT "SonarQube code coverage info report saved in ${Coverage_NAME}_sonarqube.xml.") + endif() + + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + message(STATUS "Command to clean up lcov: ") + string(REPLACE ";" " " LCOV_CLEAN_CMD_SPACED "${LCOV_CLEAN_CMD}") + message(STATUS "${LCOV_CLEAN_CMD_SPACED}") + + message(STATUS "Command to create baseline: ") + string(REPLACE ";" " " LCOV_BASELINE_CMD_SPACED "${LCOV_BASELINE_CMD}") + message(STATUS "${LCOV_BASELINE_CMD_SPACED}") + + message(STATUS "Command to run the tests: ") + string(REPLACE ";" " " LCOV_EXEC_TESTS_CMD_SPACED "${LCOV_EXEC_TESTS_CMD}") + message(STATUS "${LCOV_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to capture counters and generate report: ") + string(REPLACE ";" " " LCOV_CAPTURE_CMD_SPACED "${LCOV_CAPTURE_CMD}") + message(STATUS "${LCOV_CAPTURE_CMD_SPACED}") + + message(STATUS "Command to add baseline counters: ") + string(REPLACE ";" " " LCOV_BASELINE_COUNT_CMD_SPACED "${LCOV_BASELINE_COUNT_CMD}") + message(STATUS "${LCOV_BASELINE_COUNT_CMD_SPACED}") + + message(STATUS "Command to filter collected data: ") + string(REPLACE ";" " " LCOV_FILTER_CMD_SPACED "${LCOV_FILTER_CMD}") + message(STATUS "${LCOV_FILTER_CMD_SPACED}") + + message(STATUS "Command to generate lcov HTML output: ") + string(REPLACE ";" " " LCOV_GEN_HTML_CMD_SPACED "${LCOV_GEN_HTML_CMD}") + message(STATUS "${LCOV_GEN_HTML_CMD_SPACED}") + + if(${Coverage_SONARQUBE}) + message(STATUS "Command to generate SonarQube XML output: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + COMMAND ${LCOV_CLEAN_CMD} + COMMAND ${LCOV_BASELINE_CMD} + COMMAND ${LCOV_EXEC_TESTS_CMD} + COMMAND ${LCOV_CAPTURE_CMD} + COMMAND ${LCOV_BASELINE_COUNT_CMD} + COMMAND ${LCOV_FILTER_CMD} + COMMAND ${LCOV_GEN_HTML_CMD} + ${GCOVR_XML_CMD_COMMAND} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.base + ${Coverage_NAME}.capture + ${Coverage_NAME}.total + ${Coverage_NAME}.info + ${GCOVR_XML_CMD_BYPRODUCTS} + ${Coverage_NAME}/index.html + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero.\nProcessing code coverage counters and generating report." + ) + + # Show where to find the lcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Lcov code coverage info report saved in ${Coverage_NAME}.info." + ${GCOVR_XML_CMD_COMMENT} + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_lcov + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_xml( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_xml) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_XML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Running gcovr + set(GCOVR_XML_CMD + ${GCOVR_PATH} --xml ${Coverage_NAME}.xml -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_XML_EXEC_TESTS_CMD_SPACED "${GCOVR_XML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_XML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to generate gcovr XML coverage data: ") + string(REPLACE ";" " " GCOVR_XML_CMD_SPACED "${GCOVR_XML_CMD}") + message(STATUS "${GCOVR_XML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_XML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_XML_CMD} + + BYPRODUCTS ${Coverage_NAME}.xml + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce Cobertura code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Cobertura code coverage report saved in ${Coverage_NAME}.xml." + ) +endfunction() # setup_target_for_coverage_gcovr_xml + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_gcovr_html( +# NAME ctest_coverage # New target name +# EXECUTABLE ctest -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES executable_target # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/*" "src/dir2/*" # Patterns to exclude (can be relative +# # to BASE_DIRECTORY, with CMake 3.4+) +# ) +# The user can set the variable GCOVR_ADDITIONAL_ARGS to supply additional flags to the +# GCVOR command. +function(setup_target_for_coverage_gcovr_html) + + set(options NONE) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT GCOVR_PATH) + message(FATAL_ERROR "gcovr not found! Aborting...") + endif() # NOT GCOVR_PATH + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(DEFINED Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (CMake 3.4+: Also compute absolute paths) + set(GCOVR_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_GCOVR_EXCLUDES}) + if(CMAKE_VERSION VERSION_GREATER 3.4) + get_filename_component(EXCLUDE ${EXCLUDE} ABSOLUTE BASE_DIR ${BASEDIR}) + endif() + list(APPEND GCOVR_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES GCOVR_EXCLUDES) + + # Combine excludes to several -e arguments + set(GCOVR_EXCLUDE_ARGS "") + foreach(EXCLUDE ${GCOVR_EXCLUDES}) + list(APPEND GCOVR_EXCLUDE_ARGS "-e") + list(APPEND GCOVR_EXCLUDE_ARGS "${EXCLUDE}") + endforeach() + + # Set up commands which will be run to generate coverage data + # Run tests + set(GCOVR_HTML_EXEC_TESTS_CMD + ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS} + ) + # Create folder + set(GCOVR_HTML_FOLDER_CMD + ${CMAKE_COMMAND} -E make_directory ${PROJECT_BINARY_DIR}/${Coverage_NAME} + ) + # Running gcovr + set(GCOVR_HTML_CMD + ${GCOVR_PATH} --html ${Coverage_NAME}/index.html --html-details -r ${BASEDIR} ${GCOVR_ADDITIONAL_ARGS} + ${GCOVR_EXCLUDE_ARGS} --object-directory=${PROJECT_BINARY_DIR} + ) + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Executed command report") + + message(STATUS "Command to run tests: ") + string(REPLACE ";" " " GCOVR_HTML_EXEC_TESTS_CMD_SPACED "${GCOVR_HTML_EXEC_TESTS_CMD}") + message(STATUS "${GCOVR_HTML_EXEC_TESTS_CMD_SPACED}") + + message(STATUS "Command to create a folder: ") + string(REPLACE ";" " " GCOVR_HTML_FOLDER_CMD_SPACED "${GCOVR_HTML_FOLDER_CMD}") + message(STATUS "${GCOVR_HTML_FOLDER_CMD_SPACED}") + + message(STATUS "Command to generate gcovr HTML coverage data: ") + string(REPLACE ";" " " GCOVR_HTML_CMD_SPACED "${GCOVR_HTML_CMD}") + message(STATUS "${GCOVR_HTML_CMD_SPACED}") + endif() + + add_custom_target(${Coverage_NAME} + COMMAND ${GCOVR_HTML_EXEC_TESTS_CMD} + COMMAND ${GCOVR_HTML_FOLDER_CMD} + COMMAND ${GCOVR_HTML_CMD} + + BYPRODUCTS ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html # report directory + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Running gcovr to produce HTML code coverage report." + ) + + # Show info where to find the report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ; + COMMENT "Open ./${Coverage_NAME}/index.html in your browser to view the coverage report." + ) + +endfunction() # setup_target_for_coverage_gcovr_html + +# Defines a target for running and collection code coverage information +# Builds dependencies, runs the given executable and outputs reports. +# NOTE! The executable should always have a ZERO as exit code otherwise +# the coverage generation will not complete. +# +# setup_target_for_coverage_fastcov( +# NAME testrunner_coverage # New target name +# EXECUTABLE testrunner -j ${PROCESSOR_COUNT} # Executable in PROJECT_BINARY_DIR +# DEPENDENCIES testrunner # Dependencies to build first +# BASE_DIRECTORY "../" # Base directory for report +# # (defaults to PROJECT_SOURCE_DIR) +# EXCLUDE "src/dir1/" "src/dir2/" # Patterns to exclude. +# NO_DEMANGLE # Don't demangle C++ symbols +# # even if c++filt is found +# SKIP_HTML # Don't create html report +# POST_CMD perl -i -pe s!${PROJECT_SOURCE_DIR}/!!g ctest_coverage.json # E.g. for stripping source dir from file paths +# ) +function(setup_target_for_coverage_fastcov) + + set(options NO_DEMANGLE SKIP_HTML) + set(oneValueArgs BASE_DIRECTORY NAME) + set(multiValueArgs EXCLUDE EXECUTABLE EXECUTABLE_ARGS DEPENDENCIES FASTCOV_ARGS GENHTML_ARGS POST_CMD) + cmake_parse_arguments(Coverage "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(NOT FASTCOV_PATH) + message(FATAL_ERROR "fastcov not found! Aborting...") + endif() + + if(NOT Coverage_SKIP_HTML AND NOT GENHTML_PATH) + message(FATAL_ERROR "genhtml not found! Aborting...") + endif() + + # Set base directory (as absolute path), or default to PROJECT_SOURCE_DIR + if(Coverage_BASE_DIRECTORY) + get_filename_component(BASEDIR ${Coverage_BASE_DIRECTORY} ABSOLUTE) + else() + set(BASEDIR ${PROJECT_SOURCE_DIR}) + endif() + + # Collect excludes (Patterns, not paths, for fastcov) + set(FASTCOV_EXCLUDES "") + foreach(EXCLUDE ${Coverage_EXCLUDE} ${COVERAGE_EXCLUDES} ${COVERAGE_FASTCOV_EXCLUDES}) + list(APPEND FASTCOV_EXCLUDES "${EXCLUDE}") + endforeach() + list(REMOVE_DUPLICATES FASTCOV_EXCLUDES) + + # Conditional arguments + if(CPPFILT_PATH AND NOT ${Coverage_NO_DEMANGLE}) + set(GENHTML_EXTRA_ARGS "--demangle-cpp") + endif() + + # Set up commands which will be run to generate coverage data + set(FASTCOV_EXEC_TESTS_CMD ${Coverage_EXECUTABLE} ${Coverage_EXECUTABLE_ARGS}) + + set(FASTCOV_CAPTURE_CMD ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --process-gcno + --output ${Coverage_NAME}.json + --exclude ${FASTCOV_EXCLUDES} + ) + + set(FASTCOV_CONVERT_CMD ${FASTCOV_PATH} + -C ${Coverage_NAME}.json --lcov --output ${Coverage_NAME}.info + ) + + if(Coverage_SKIP_HTML) + set(FASTCOV_HTML_CMD ";") + else() + set(FASTCOV_HTML_CMD ${GENHTML_PATH} ${GENHTML_EXTRA_ARGS} ${Coverage_GENHTML_ARGS} + -o ${Coverage_NAME} ${Coverage_NAME}.info + ) + endif() + + set(FASTCOV_POST_CMD ";") + if(Coverage_POST_CMD) + set(FASTCOV_POST_CMD ${Coverage_POST_CMD}) + endif() + + if(CODE_COVERAGE_VERBOSE) + message(STATUS "Code coverage commands for target ${Coverage_NAME} (fastcov):") + + message(" Running tests:") + string(REPLACE ";" " " FASTCOV_EXEC_TESTS_CMD_SPACED "${FASTCOV_EXEC_TESTS_CMD}") + message(" ${FASTCOV_EXEC_TESTS_CMD_SPACED}") + + message(" Capturing fastcov counters and generating report:") + string(REPLACE ";" " " FASTCOV_CAPTURE_CMD_SPACED "${FASTCOV_CAPTURE_CMD}") + message(" ${FASTCOV_CAPTURE_CMD_SPACED}") + + message(" Converting fastcov .json to lcov .info:") + string(REPLACE ";" " " FASTCOV_CONVERT_CMD_SPACED "${FASTCOV_CONVERT_CMD}") + message(" ${FASTCOV_CONVERT_CMD_SPACED}") + + if(NOT Coverage_SKIP_HTML) + message(" Generating HTML report: ") + string(REPLACE ";" " " FASTCOV_HTML_CMD_SPACED "${FASTCOV_HTML_CMD}") + message(" ${FASTCOV_HTML_CMD_SPACED}") + endif() + if(Coverage_POST_CMD) + message(" Running post command: ") + string(REPLACE ";" " " FASTCOV_POST_CMD_SPACED "${FASTCOV_POST_CMD}") + message(" ${FASTCOV_POST_CMD_SPACED}") + endif() + endif() + + # Setup target + add_custom_target(${Coverage_NAME} + + # Cleanup fastcov + COMMAND ${FASTCOV_PATH} ${Coverage_FASTCOV_ARGS} --gcov ${GCOV_PATH} + --search-directory ${BASEDIR} + --zerocounters + + COMMAND ${FASTCOV_EXEC_TESTS_CMD} + COMMAND ${FASTCOV_CAPTURE_CMD} + COMMAND ${FASTCOV_CONVERT_CMD} + COMMAND ${FASTCOV_HTML_CMD} + COMMAND ${FASTCOV_POST_CMD} + + # Set output files as GENERATED (will be removed on 'make clean') + BYPRODUCTS + ${Coverage_NAME}.info + ${Coverage_NAME}.json + ${Coverage_NAME}/index.html # report directory + + WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + DEPENDS ${Coverage_DEPENDENCIES} + VERBATIM # Protect arguments to commands + COMMENT "Resetting code coverage counters to zero. Processing code coverage counters and generating report." + ) + + set(INFO_MSG "fastcov code coverage info report saved in ${Coverage_NAME}.info and ${Coverage_NAME}.json.") + if(NOT Coverage_SKIP_HTML) + string(APPEND INFO_MSG " Open ${PROJECT_BINARY_DIR}/${Coverage_NAME}/index.html in your browser to view the coverage report.") + endif() + # Show where to find the fastcov info report + add_custom_command(TARGET ${Coverage_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E echo ${INFO_MSG} + ) + +endfunction() # setup_target_for_coverage_fastcov + +function(append_coverage_compiler_flags) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COVERAGE_COMPILER_FLAGS}" PARENT_SCOPE) + message(STATUS "Appending code coverage compiler flags: ${COVERAGE_COMPILER_FLAGS}") +endfunction() # append_coverage_compiler_flags + +# Setup coverage for specific library +function(append_coverage_compiler_flags_to_target name) + separate_arguments(_flag_list NATIVE_COMMAND "${COVERAGE_COMPILER_FLAGS}") + target_compile_options(${name} PRIVATE ${_flag_list}) + if(CMAKE_C_COMPILER_ID STREQUAL "GNU" OR CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + target_link_libraries(${name} PRIVATE gcov) + endif() +endfunction() diff --git a/clang-format b/clang-format new file mode 100755 index 0000000000000000000000000000000000000000..2955c447f585bf96b86c1da914a44250207608ab GIT binary patch literal 3144809 zcmeF4d3;sXwfOhBHwot^fIvnDO#%uDSQQzsg4`sifQoI5_G#4w(Dnu#9vD%id9P-Z=zqn zdEb%0dU&u2?^7Fy+{ePOUzwNU;yeAt-<&t&?mMlgvG}#x@RZFDxpzkci1}OOF2t|6 zcyeA*UU9+o({He)Z-^!IZP+;SXJIEWYB(*WzLuP}_aS{_eElHD26aF&du5o%VOa*Sqh&_a5g- zEPm_%X!UoAQ-JH4;I-S|Yo|`X=E})aZRu4i*W#ofNaa}3WVjOUP`U8 z9(Ve`zj3(wQ8WT}DFL0uPxy23{QKrzd{^nri|<%)*B#u;zk&P&k0-NNB|rJ;rH58V zKD_wJ8xHTilxJD|ERZy=^)ml9X{U{L59giPKd$l*YE%fEsvFGPw*REv@%-eSrRl@y zjn<9}_}sAIzOp;-z9|19@G6~YRh4=oqfWgfmtfMd-a{9SY^!owJ8?9_udUBOzGv3_ zdH3CU-$i3C8r`8@bUW(&l+*B=HDW|tOM~cV7MMtTzPs+7amStWivM#)ng8O8E)vqJ zp*$D5tlyuj#;1tXSogwb`G+W2?|*QysxLMlb^q5*fo=+PQ=ppy-4y7iKsN=tDbP)U zZVGf$pqm2S6zHZvHwC&W&`p7E3UpJTn*!Yw=%zq71-dEFO@VF-bW@<40^Jnora(6Z zx+&02fo=+PQ=ppy-4y7iKsN=tDbP)UZVGf$pqm2S6zHZvHwC&W&`p7E3UpJTn*!Yw z=%zq71-dEFO@VF-bW@<40^Jnora(6Zx+&02fo=+PQ=ppy-4ysz6gYDIuLCC+YJrml z$~f#+L4RMhT)#6vtmi3%V~fI`3YVd7^@TH)r=>!vB98CbSf?rqIYTI=ifg0W6Z9xw zIA?*sRcVvnq5gV(My?_C{Vo;uS6&&&RCv-BRKib9;cG&zHo_B8%qjR_y)lt-@d;}g|FxSOgO*L3C?Rf z!TGbV{|fw}<3d;9`dvn!`*CmbxeGqc=6x)m?&O)p zr+oUE7x?V!uXf5Sv(L9h^*PIJsQ)~BUYYf8wCek(BR}tL^H#Z9$N6vH=53Ix$Y&RL zPv`{iacy|F%U4OEFT86%GyrDR$;eot%bVOcL!(&PGx51Qsy|;|AG*Z1Pbc^$b%Jj! zPSG?F9aWd0Qx^%1Ej>FYXK6;$3T5n@rHmoiu+$n=dqav^e&4pChX2&w z23=E|eB7%oLyaY`y8C2ix&JWu4eNUDGF3an%y{ves@81hbxTHarz^93 z=mmG+@P6#T2-@R%E9DwT2#=CGx1*Gawxf;1eBkN}&+Mbt=f9g{xG$@u-B}dg2YW!|0QMD;trH_q|soFaCEv-|rL%Gl)7dlMR^p>qtRFT*Z zXk?sRcqMe34E+kAV?OlE3!J$34O6!*_+{QHz|T!lJ9D)xBj*BNAXD?S_~KMibyh`T z^AOj9&_Wfi+pq0veAR6T4ukmxpi|w_o5xRyQ{%Smi?6*J8)d<`)EGX|=bdt{N)~)F zH8pqChR0SOzEEv6-#3jkhTG4JHdL<+wLJW@a+ltcU!aQ0Z&wEgXQ-l=4)-6wiT2(A zW-{~u&z<_3;fCk7E#67MOZTpXL$Q2eLQRuGFb7a71siF4Jl~tMdXvzs8BGXsT9G=+Tr{ z@8b*W@r;#yR9Nc!RqNQsg}!h-@TC7CrN*sZ`X8;Emu9q9Fb3(2m3q7{EMw(p==5D| zj)}iEIqbR3=N$oE5_w+^9@o)= z^;#H@=K!OpUA~g?1;BX;9L#oR3^9@@TPp30^wA$RcgcpDl_P*TM;m6`!@J+{?(T`c zz$)H-u;Gc7E5Q4k|Kkh%MoZYiiI^!pp!qlZsO4|OU1msqA8kn4=YSI#qP!6|w1BQx zFLTv~UY+jsBLf@Zmm?!Teb`ky3m()RzInWdwgg7t#}Aq`7ka<$ZKrkCS30M4-Qhou z7g`H#Z->?*^U_vMf->nJK_)rxv;(~_1oyeLdj#H_c)=T;zKAaqf@e}El&-=eTPCi^ z#gLxJF>PhG;rLXPT%-h7M&Y_T=ZkS21+Iv_L<_Nehk zf%($f1jDpdANax>f#K0KqxpI*tj|zJsQjFOv?28lHO1FPR`6U^^b3t}*PHg^&HQi< z<#MSb`Y!dvhD;fzYD1f`ui0v&&``(USnbkl^ekV!rk!``K43-j>M-}4BjW>J_;nnd ze!+DDviE}_S;pq2{aZP2ap#TLt4(Wt;j#Svjw{RHFvpZ{Q%^5J&w9j#y**T4r!Ajb zuL;4Yn|LpJ+(h5MDrH5_&h>@!&{Lt?F=!<3Uy*XOyJo+(Yo9wkcr*B$Wx-ol;Jw6$ z4H!5(8=b2d;tNPQ8CSIv5o~5!@MDjvmG&Fq!+R4fInCAd9g`-igAMpzGqBg$Ke)(n zxhe{ikqyS@lv1AM{`W;jmbPW0(kdpJHbeYtY=+s_-{*&$-2I1NI5rAie|3;?$b#2+ zk+BatMaGNH{Be9pYaPqkl_}NhmKByxUo+1$QoZ7NZCPRNt-q=;OfZ3qQ3SR*6a&w*~$?5TrPA%9_Diuo@BVQjQ(@57w!Qqt9z=VDenF|GH0lR z94T6a4KBQ#Um5+Z9ejxaKi3~Uto>~MjZYo=I`sc6zM_!xG*i-J>}5{z7SrD5j_?Jt zHhkaIt332SPE)n_xzup)EM-W0S^ZQ|)&51^s#B5COQLE16=)8PL}n{#=Vj>A~@ z5SfamN48B5v4=~fEqm-~I%?zLxA1s~cP}D~CD5MJVU29s*{HA`$a?{8nKr*E)$nAC zt=C&-Kri!IUjMddTajDy*_uIZ&q@wPiiSqhy03N+J8scHzt~u-R}_AOwmA_s#gus) zpPP6lGFP=HGWPqYBI(iZuD9Rm??%S{ig!{jfc+NRD)u&uzEXZ?_Gq_JAo8~vU-GY! zjXJyy)wt@Ve9n83v7b=a)G5!!Mpk^@Iu)<28@{WcHF1}_HCywo(=@ftq;0BZ>ewie zO_%ETi<2Gk838^rW;bk$j7t8^`!oZpm=-kA0{449UP3zKOrY(YHk&`~(j=@AwWaazhz#(Qi0NDZD#eSbe}sg_#f?}sl-z3B3t&DYH_hVjC~ctPL7 zDXS9bvXdY63_lbKod{Y-bsvBQ54%*=7 zFHB&rjj!Lsw=1&Lz3V9bm9gD~-RAfA7rzHTd9D{*F}f~G{DgrEj_gkeAN?pH92#)sD6pY^55;R7#E4#)bf^Jv?#9jmiD z_fG_d>9+zSi9U;8D!5erR*k(GoKJvHp)xoO1HVxh#39f0u{Dg|a>%m3JW#BV&u6v5mWDM{lk<>usWn`mAMd3mEBxOWr z#g@JN0poL)_$s4bi9oLibc_5V4tX5)O5-Ru@;J_Df*$*p_BRIY*J>L_6@;^hQJHn{ zUv#bcmCShfgpG^#y`%g5d)ict*SopKzXyyJj0MV9k6+)H@w;ASR21%zGViN>Z+vWIr);vMlpDXRp0QcK7RPA%%(72-W_ka4U zR{H~a?;c>$1G)gW@Z@R<#Fl-=JoN+k^nR+U z4MTr?ul4WjOEtC+|3={5!7_FvwwxHH!bdMx;UmM~RjMk=bTgN<#*Xzm<4F=@OgUqW zk2%`Y_I%0FiNnl&8&YhU_~jT=FLvPXh~E&ptwdT9@U) zXYy1`Y`_L(Y*)s+rVUCDPDf4!&xYP&n=*EYk7n7mGw2~ zH`@JB*yt^GKNwS6woon1aZQ|UiEF#FG;>4t-gGwzfgw|r=4Y`G?; ze}eN8%9wONVbMW+tvt%PV{opf-1aM%oaRRf-dW%+ve}fS#IISe6PuCyigTd{{Lj2n zy+W*Eea==__!N9JdHAa_?K0Q2b%JVIGQspA>Mj3Fc!VRjnB3X?)Uf>H)iInWgNmiTW zouz~QgHmQHWlH*~ajlfmDWmsOn+8;?(Z%kJIiYItQ?O^)rG4%JL5bB>4|kb*-~K!! z2=4us@_dq8&5g_OJ@k3{ssR#!dlH}aJgcqV6|>PNh;K6ioY zm#lfN;R_Ne(#}3;`Uy5SwDhaq5dMP9jhfsUCJskaI^r-ByP)%)PYk4-4K5ADHXK|+ zj1dkldK50qN3I#9C0`R7c9ZFM4*4Wf)cR9jxIZ>>Or&5%U4k+aPcGUs+)$fxc{coH z{)%F3_LL=6D-ZmA(a(gx9(Cf$^pVvo4}U||AETYnsYQDlzN0oZ9$K`=^Im-Iap>j( z=K=8ihI3MDZagQwCiD$oeaNj3uR>1!k1Api1E76|X@|cfb>2!19A%6@dO3FdK*Y4e zpZ+zz_B!S+4&7GUbn_pJjM>5S3TX2&WkeQ5XSWPifrdmC&cN>w8EWL6j7@ymB4r?D z`Fc@G*!H>2ac7-6IOSWmU43w@QHd`$3%ez9A!9}oeJ7T-ei~!fG~${UYKc3_J7^fLGpJ~!+{e$sMqWp|C@YEj@vpF)%7hb5; zSg}p`;wk2sfR0t!J|uH)%TC3_FpJP#iDAy?nwq6HZHw;P_lXZ&Z1=6tC}SUX{@7^C zZvTZgGW)87TZgKmV&)!8xQ?T3k+o6GGo$U!aB#my>wgW`!fDw%Rvx(66_~v!;f4dm z$;Boe8|DsRtkyT9N6i(~(b*ZkAGK;nYZ*5*u}6E7>) zk`6R7P7(7AZ$~B*(L;4X;xpo>$^6l?b=80dq5FadUm2izlC}r03rxfQW+pw>G8kIt z@;pb=lWLf!P1BNgh|KT5P!(OS4M^fUC@Bt3uQEQL4K7>ofd!YT?si;~!Q~~bb)3jb z6MY$+>#7~7l9Pm9j*K_amk-ai>_bj#WXw+F+?25b{LPqhf3eF=IT;x(WlXr})&As; zcI4S8TUNo`QEXKeYZ#&rXS8e3HrB39iLxcov4dTE#l}bM8oG~N8)(|K%`x~mc1_xM z>{>Kleb0bbs0TLA#w!$MC z*uH_8;o45#08Nm~?t_mC*=tGZu^$&e0c5p!O zMPOeNWe0C}=zKIXW*g7PKx@;FA$Biu{^7*TlZf3v`m!(Z0x|hw>nh?;n7+iYl)zzZp*0tcH>!Wk{MDX|HrjJh&-xWBldxaYSQjY99yKOnk5Y*l zCaa=E;%~#RdvV?f%~wCiv*ng@Epg5o=8U<0ecs$%+SisLBabp)G-LJP#Mo=b>c!Sq z*mGLb#y6{jVxRpU+ZH}D-k6Dh(|jV59x5mZM+)5G(2ucO4z309#zhJ3xHh-Vn^$y* z>pwS2t2O;leBT#7k31bcU(APJjX&-phA;HHo;3)&?X2rNu0hOqsg$oq<3EOY0X8SE zkNPyb%xi38p7Cwk9zj2+@+`OSs5ZYe_h4SD=b+YV`lbJE`K2e-v-y2V`XjFHbHHuwEW(Wm61=p z%*0{7*IbiJ#JA6#;cI1{X5rwqa1i=eVe2()?7~YlGu~kG=4;Gru<>T!)_aY$%(0ik z7Zc_NZ(BS(cNZ{s*)VTZ#`(0{Bbr7x5>H@^G~wu%8QXv}0UGs@c_ZUj?wIOZCLx~( z=;QWGHM-e-tGC%b$c$%XGw0AnflA@oaSyNsEGS#y@Q`tfYlhu((x)^VXp zAA0o!uINowLhIO(QEfE3XOu_YF+-x z#+Tzx<|8xtYBWAJbAHN*&&;}UKh}*4Z~uv^Z9?8dbEei0?xWX;4>JMV?cmtNeFZqn z^Y6O3QdZWhWxo3Xbd)@k#vPNxmHaKZPdOeL+qijhc(L3^#gHE2UQZ$SWy!4zs+1aS z{@wJDH+0_+ulaZQy86&0t$qpdB$uY`*#GQ}#3tQ4;)tV%h>6HJ_tD`v;{)u{hu9@w zIdyAgBZsd9F+MFHt+7Tkgv=3N+J_+3YH+vaR#+mj{ z;LnOT-uoQ*j5j5W9gWP>#zTkwLT}=7`=3n>H2;FQ9^*mtxPtHj;#4hrZ}#3Lb{bqi zE{O9+(zSZA)qkU%yba4%4#sY_(S>{bpr9(rzpX}7MsILdBK zwbpJ-xwzvpVnzv}vZi!w@gBXj z^p)Scc_$aM13+==f@J;~CG_zany*rw|q|VBr*Q=`cwI4o?_@;;*rp9Ch(tnSF1g? zv?pcS@gnkacWDf!o+O#BJ=K2>dVrXV zks|B9tcye!9cy%{2Po3b$?pmL&#-@Tp9x~{QUT0(; z=N?9$!B_xbz~s+-swa8I%hZO!k_(mOaKWb;3KwKwszPY1J!lw%;S; zV?J`=UpvOQf#*%>dG-GZtS0CAgJX>AJn_($H$971v$Ch$yr;il_l zKAsZ%KJgUc(brjv5Wap*_y|wG^(NP6-LlqZj%6E=+lEf&=?!i3^fWWK;wUzexLje= zQsC3?U$ZXWh%KH2oTjIFhHb6qnfS>12J$o*r`GnMOq9+4#UJBqtK7`1%$$uuCR}qp z2^-qztfMeKMC(eO`)0|}S|6RGrQxGUA6eTXm(5L{)>W)|UCG+lWY)k6SPRQ1Pb)9b zJmfcb$r_yafx8{M_my#Nb|BVgd=tK#{Ccy!Ox7-J#V*@cY*NN+lzEPR{DdEi%EZmI zU0vnb$(oz2OGVCMon(v4_;8fuU?c}G9B|03l(2V21oLTV{t5vKRXI!j)Q$Vg{7eDKU%Hzo(5_A=^U!PSS1bg^E$ zUjHZMSAYI4xWZHMflA=_`al;~Gk zluVcKKE;;lT<|Dy$vCErdgLOyT+^vY(R}1RO-tSJE;>nE&(z87$ouwAbaF2`*&m$@ z!|xn)vH^L|LMO|?L&gx%$#mv0t%(^m;l%zmDW<%qn`7lZbWf+B5A5><79j8QO?^xa z&Ze%&yR0vpc2eXWTt(i6t`0p)D5sF=lMAnge^?Mz_Y&ooc_TymVp-Ea&%S|Jz zR+4YFaW8o`%nyqi64GiE>v_Kbw~(z5I{F}MH>(GGp7{emnY&)cd(qp{Lh^3WDaH6- zQTg74$100k6Du-XRmBe{s1mhn)#6EkRh8$pJ~mVBIzoI?;`>|C*|MR;`dA;C&-%zD z=$8kdgr?T`(4OD3;(z01Juz6$UJ(VaMnTgY_KNt~E0RSHWQ{{p8&=Ax$fj6cKTnz9 zsmRlgJ)|^9CI&(0%w1vYTM7S z7Ff~algL=)RT)nbus=PpLpt`z!+4TNev+$wJi$lOix{7JnRGdLlX_(3eD&>>8L#;2 z-^JH|fPE9Pj`|dQJC469Ypdj%8n?Vk+wT4?tD)5t#(_*NwMF{sTUy-ej2G*{Y4xed z7@a(uoMQbMJ(u-3+RG9h65khE=0+kL1)jduf;R&g{?){jW~l{J$h*_$USi0Y^4Zs2 zflz@m@Dq3D{*`B$#4O19m{LO7m(&883%`#nH|-}^6j@>&mmKPf-G97>m>KI_Li@3_ zi<}qc^2^C?7*JY?FQ)0kp<}%@-b`)twajs8n&oTFXP-#y*dxBy*^E8nYsuKt_|Dml zJ*Cj>fqi;l0sD-cv1bQ$OD@Xm)Yr1)Wg5Jl3XiA2>#G@St_r|=bF2xSj6A)U`un*? z^LYw1P^nb|L{2)DvwwWgjVYs@?YRj{Kce>BJjMN)#*vSue2gC!;#stuKk|<4hgG{;gj?9}POWpci?R3)!3FCI`@#f98Z6PcEFyy8_DOQ#KD; zzHaL7Vf?QYU{+z*b&oYB$k<=Ox|7VS)1c=A_)+G2bV+QS=z)}xc|e%@{-JrfA z{HMI@ly6vnjxF(sj*>&5;(Y<3RmW#}r$1YwO!94;={@6q=&HKnL z4w3nqGM3{9Hj-mn0bio$rbo{4g^vyK)t8@AfpV7$|8Gz1J7ZT z(`$aMjKTbU7x3;TMsg2!YN_VyHLI_$*BwRU&v_&+=bRsL|2p^2bN>eScl|eIF984R zeAm9j?~%IIy&ril>zp^ZzQJ|Yb6FS98g$3>A94NBpl?o}HE8hkxbx1RzI4#D)1Tw- zJI@=+^||RY9%-6h@krzJIZHHS8f#TepGS%m&(tdiZ`af}f7o2;c_xu{wQCrodNQ{! zXIy%WHMKlVtC4sPveu$uPep#dhTK%$Zsi464?jIGcpUE|32H9uHbqa*R7MeYws!U< zw(O+@2ScZeC=z3Xs@rdQo@rWkgpIkT< zxtoIgU5&nC_ZZtme!owhNP%nDiF@7Xa2)UADU(21=2HuiQ@4zPdXV+;@C}JFm(qeF zgMQI(eEV$HEwdjPX7*onxWuDIuR_)q9I*Bq_?U0W9Ky^=tQ=+xM0R8!k{8*P{Q*Aq zBOGJg8pqg34p2B`!c=RI=WS5H#)q0RQ31X^A$wCG{uV8(#bO}ixXpkwk} z+A2WjhOoYd&K;x9)AW`2Nw_opYfSQmx3rQ25AAf|34LTuSm+~p3ms@!(N04j{oO=g z<$Zg9*~iw_-&C`|57XZ|$^@`;l2ehBo!Dic>ulRuhwY5+^QpJvYXu(@7qidSUM1=C z3Z9=_I0ad_8kx8X*|-uJnM~}8IU2T0b%U)c~`-D!e(&$!-u}WW>^0f(ZzIlV&!pJ^V7T8hq4LZN#6Sy zNAzKKEBtFK`6L%Uq8= ztLP_XBo@;|K0afEuE^!3+^Q#3Yv#jRpb5E@vGij4-7wVZ>o({j@|;dT8xo000Yl_e zo^2u4kwKe`$GblRH-YcaXfW$^f^V8l9|!*|x7sw2IwJql7Hc=P(J*8CC-N^bm@Fq2 z09}P%vL35|A-d}Zh66vgzR*DEBXsC2hKlT2GU)C)9(2_0GWQjvW%<~*mbPOYxg@!V z7d^Sh<*8|ge#3j^wPrFG+y|{>uWA(KDLDq4LTF4jq8*;yzzUstCWuK9ZrAg%P z)nuqmE#&J>-UFPRJGz@uvB&(8}@;icX)y@tJV0Z-42n$Qbc!157NgC8S@N8yLiw1s$lrk32I z&-@ zEzbg^-=R;RN@a}fXVGZ0*1Kg3^tqe4V>xtrTxbJdmqNc~$cxB4dRa6Y z*bTr=gO@TcXADuhY6q)>Vlx|Nm$mMlJ-Ssfrm~(D?yUcgES5lH(bb9=8+R1C$okKj z;(SY>+qYR$j*VgI=S%LPzGDo!6CkcLTykG~s7+2>M`9GBM^h-1Xy(FtTBa_kUYUs= z>FCi|;yLetPXoDNRm?-TkP9}2wq$G*UGelxthtO&@=z}8fV|%8k#+}Q+e0jsVzFR)>Bul^uUj)NorGn z@@>Dj+VS7|j#?(~*MXfqoHJ`Y8*Zduw>qDQ~b+1>j z3H#KDebOE~UH6Wmo2#^5rvCj+Xo_vR%hbO_Q-0>(;tM^e=)9YHVfxfeZ=# zJCmVbvA!B9(3nG>J}(z}(9h3fd$+4*5@-GB(A<)){1Sr+bfu8a=2Uufel_1;;W{)~hF<~ICOgG(t>X5%k&=(h{?XXFftSi2|uNx3yI zyb*g*g?^H&@g#fUS{sI`jb+pFta$tMJTrE3w(<5j+BVlFs9!c+3oF6pJlep=C>fj= zmKZ>$N-;`we-foaeLnH{L3)W$*aHZxg@s z-~NWz--G9;Uia2N3hprD}CWw)`iQS@P$YB;#x`hbGY75?4La&WwU+ZlvJ*_ zbG8A#P8qSUq5;4=wbiQ+1kM2N2eOBOu_V@3mK@^zAHfM+8b0SdpbyD0=U#BC{hTux z{v^J5P>{_!Uj-YVt6wSgQs*$Z~2b&8);)}$No%BcG@yKbnJwzm1gFUr02SMGoA z3kSKEewQ)t^S>C=_KPaKR^CI86#R`}!ut+wuaUE#-m}ksx~=tT%9?EpJnF8n>%M&y z*guSc{X-S5k@wJ8V80Lxdq&4|r79fQ;8I52hwZwf4+Hzj7}!s$aGAV^Zvy+tFMwU@ zzy_Bx>fUMBt^E|(kH)}$RE2Mo_sD|4Ui1a93mw?tQbyhBcHJW%1N)&E*bk}jRq|fs z3)rlabfk9-O)hs}gG(88$Jup@T7Z2|4D5SU_+ojF%nIxqz5w?54s38Kqi&X6cVh_H zGh<-SWS@+@M-K${6<+|mmjfGI%Bbsc>XM`GpB4q1F|KTy3TwO<-J0?NGCS56KDm>& z2GG_Als~V%{8q-10a5y)s2{tI_X%}HMjbg`O`X^>AJ}CirY-F^ac{QYL=Gx-OK7|7 zJQe;k&pXp`nX^R4l0~0aQ&w!@+y5Kc^n3ONb!J2LA8hqjv8Tg-#a8x@V@KHADfBE$ zz|LT=I%}(BIOqQee$?U2im@foW0SN+-J(;_C#{`6raehh;dSyJm;$>ciWV})#nNIe zWlfz1UKw?NXxH5+wkok5_SvkVe#e1D+hzDaHQaaBr|9^%Ie%fTFqu7oIdaCGz1P2# zajA?mW@N9w{~hK$jJ*??Gn!?Z?XyL&zvr86NZyK--*M~C_>^e)GImA7^(YmNJwt%G z(P?md+i<6}^}&LFQw;nsgMY71+Bx!;&?qH%QA|4%Xh%cVJL$_#eS3b}nTjbB7*`lG zh0duGy8zZ=V3l8iFDi1{OL&+Z)RQJ*UE}+y7nskaTPAVD#2TRDk0#%RDACC zv-ze_x8zd%+RIe9McN*#i1}3%`Y9h>&JP_*XL61tad*~*ilmOeG9e&v|0?xp!>mV} z(e+|=+=}g3`99JQrP$ve$DV#)M|y9w`(XCVruVOSHXr;q4^;>Eqo?;nqcY-DHxbJ# zlRnb6jy(9mlQ;l-t>_2k<++r7hrT*==uGEi&Kr#B%Xejs7dV@lU$5R)wNmJW&}}S( zP6DelINui-QFQFAOl$EkiO*`pY1_{|5#Lw&eBp{760>))pGxSY2cu=B>Ixt6AnP26 z`;kFe*GNJps$9r1@+7h?GF76fF`IjSm&aA_Vqks$*-B~g#zyl=_BteDt|X&N#&apN)lY_ zDk^`^-*54^o4*_QdpmzC{{Ah0@8EB3U1#w65fhJ<6SLpwZ56Z@oso6OXgX{m$8|M$ zEXkL7DCYx#PsJDZr9Cf0;^3k9%$15?B7Z z8PH~l%g0sGhmLR}ZuhRwzV3mS=T@+0yO}+tiu)4EjpqGo_UlRxtbcHvq2lw`HTK|~j`#wuoYld35{*3+ z*F3J;IzReXhdq^?>e97&ffQnqS?FpsPiL*2%yah+`Z2df#$n2m!!vyL^K#Bf;*Qa* zUnYxwb1!zN5n4F?EKT%<v#uQ>VPP+%Nb=yy5jul=`g;x>>%l#4iEQy23TJ|Jj$)iWs z5GzbF)r*+DbMh;c4R>(8DD0jS4q%G;_iOd*vYjKcsokq zr@!~r?5|2O_g@bto+kWKTh&{1!aAc$zvL>f*xFT#wZJN5ybAthat_S1jGfiUO@KJr z_u%m$`uV*l@&n&L&lfIFz@8%S&nnK*C?{SSM}EW;u0Yj!DtsSjyi{V7-N^6K0g}(; zYRM)>I)wU?Qzm)wt4_HBt0=3J+fs!-EoIImFqdFUlcXJdGoAAk`_r%Q?Q_xQ>1DIg z{e7%U%~u&_&T$pEii|JCcCl|GXxVG_!MrK)(I|MHar}W?!6Q}f+JiBZjI=c^8LWfFh(^C2?_-y$+vWZ;n+A5yopGUz+ z4gS>bS4o)s5YwKVzAwXBQ*?aqTc{K3PdI+J$kageN^;93k9!~Z%Q%>Mt2!vTOY?76 z-uqpt?A1&T9%WDeUUJJj^WW;2YdL3vX3?IU2Rfe^IKERj&CDye_Ral-cU{4_|Tbd#>v8h36-*X2!jvqgCXr`iWmx zVKc-AtU`BJaqX;sv2!Z@FI&7J#k`z6n;i4Zg^p*&m0)w_^To}n7+k4e0_ zvp${i{OVT|c6IRCIKNug(GPgQzAet9V}Fymm$SgjxHMPw_dT>VU1Z1c)A8$=&onM| zt!rGWtt*Qoj*7o0K0`TtZlZs(b|bcHBUcUkAm^ycd-1pAJODY9Y{PuU_(+d{>^G1- z457>E%d-XMo&%B3Cf3itUWr{LcR56!Xe0Nxa4%=vN_ihL+Y|VWucE`e`!DDzWxf_w zrUJZlgE5lYb|#tw`7s`Be{Md%BSbKj12mnvEO4nlx5}re9rhRYs!sDj1!mp z>X*p5v+e>fV{9S$tjqP(%MHe=@bLn*KwtFev<_FbR;X6wHAZrVADp~WGz`LXDjSPM6tFK~T?Pgbc zO9}cQy7FC-yL<6#h5q-c@TJ^`ieEM5Hx$`s%5SK6o7ayV|B`vq2%*<3a$TgHyhnZ; zzwQg4Z`Ru;=L&JYfG?cI{UGvPO0i)9V24V0MX#-n?RM67`t$Ahh-NUsV`@wf6V=1&c%31;v#b93-(U-o^PS9BJch6C9;+P zZ2AzQ++mS5`m~pN=)-co=~|O-lHV<7o-=Rhu%AxiY>`(J&UoJWN5GPOK_~8Yk)zbr z-XJ&gU>Ea0EhxFB8-}vqlxy4BhhNdmv*WVaADpcwpik++N)PAX4zbFqKEAN%LoV-g zwF&Ig%VSQWt;^c_D*JVpPRO>;FAZV8)JS9&-=KkWg3R*lu^cY@j-&QiR>8~U_^Op5 zd{~{aN^%$Q9gsUY11~vvXt33;oEhQiDYC#`A>KEWCxn0Km3HuBjF8<<6@JIV^r@{s zoDUa5zRms=O*Hap@2|iNB@_2!@2Jk4tC0RkTm6ucX6915#<&y!wvLXbYAB1J>GZr&87u6+4!61V&La7ce3d62s8zL74TF3 z%h%f4LvoceTAFyD!@hRG-{kqXCmNo34L!k|Co%p1%7JN+h^dkiEuvl$yAbJ&t>RdnKlDE^OR?=U`` zNsl8FjkVx<8@NW(tC{!BZ!X)t{jL`e1(yB(P~fBYO`4cELKDGp$z{I!vS=Czf23X< z=RR^4vcS{HHIT9Tg zK5g%Bwczv;IN*Ir3>-i5YPMHl!Esa0^f`J>ZcXId2E~j^a{j-JlhZg?M<@Tdd59Kh zED-;=f3OmLk$o5qE3NZFl;p)C+v5B8rtc=~#w(1ql>1w@R@+KGOmyA-`1SSFl{&;c zHc0Gd0kR-5qL3?N#{u#mCutciBSwCYyyv8vTrJ6*Q;ug&saJ^Js~wU{!dQg;Nyh$U z1b5SpqIeNQ@|> zB@x9&C-M4hGx3-kMi50rR-b&#cv(DrJHn1j>g>^WwVxD zVeCUz%YadhuO7hv>H{oO?jOIRUGC>1_jAcz75h2S!r3ez<41&97Zd;7@*`4O#8!wO zahT_a&X2MK_WAdwjIEz&o_~LX-4Ao@+Ay*0EDp2HJgd#la=K33E9YKYxxw2x<38}w zbJiL6*sk0+Pe0>c#)sR^Yabu7-)bLo1|#bu!Q(g4xMF*%w?~Q=(1+N0g~AVA4Gr)| z>ls((!iRe9O?i%9M|jAkYPW1ZeLnGz!aMLAt@bCKz!QJu^!>?k#Nple=$0QM;}Ua) z_0jR&x-9(WrD_2&&_L5t@$2J*F61%R_wBih^9;=Q)*c#(!^>G0;_Hrh3;Xjdu{F*v zXd>q^4LJ&NjU+PqKW zr%5jMA(-*!4t|`(WTIupk&_3ZUGp!s)7IYf60?1Co*3$Ep2*y;ocWnACg(18o>)S; z$(`^gcK#^3KKen`FZGhV`k%}EfS3gRFDkhmJrkMYyioFO=4>a9BjeE+aPsrc*$*0B zM*O(*DU(B-LDN#rvq?+mF)y&sCT-wsQvWxtx`9*dPdUD7_kqafLkC=24sk|ajl#}E z!^76F)r7f)P4Cw{F6>M@PC2|Mrascgxa;5MtCzV=9?!^ssTZ897+0mP_%_jV+&#Tq zDN^qz*xe}>t~+ubzG_bgTwO7^9y>Fx+wmI(SBExR1_#Wu9^2?g>;~Oz>x&W7m+g!f zC70&aN8>3myRP#gEnO1+O;MVeYu!vv7-#n$%2?!iM&{mW*oH*%t!2OPG@fO;IV+Vu zbAZ1c7fEA2nr6;_CV^WByDfQnC9F}T$IO2=G+6T=KQ>x&yI+>MD{u#xa0j%6D{EIy zB2Q9cpyGqdcy=9iHqA8m{Fr+Z^031t)X!uOjNmcJZGHbu_OEfkn7KcQxf8jWDJ?V1 zvJw~Vn9t|zxs$b(8MJjjF`BN*NNix0bSGu@t+2IN}$dVpuMp|8Fu@tH*SQx`MdOogtMJ2wPRD;^(MkF)&!z8M8IK>NPFMUFzGptn-X5Jj5=v`n zL?!8_k`G+#Mx*AwwcP$(klfmIQF+A~g_7cb1%l#%|a__(|Grt#o zmwg!HursgNJX_1N%wj9f_bze1Kavk9yl!R8W*?I`x_&k^m-8cZe7*kW`4RntL%|`( zo%geMiSup2hV@q&oF5_czKr0{coz#pzF{N0v20uCa&R&A@V_>89LKDfbvE)(4B+V* z;4iZ98Grk4C%;Z?*n3-uXUo~A?Qyu)-^81HLQ4kw!bjnWl+h&?hyQnw`{=SCQ?|1; z=(F9cA412RnCd^WSD$%o+g|;h;6~htG0EJ++_qQ$9ec0-GmBJhI21v+b=*D zr#=UXuou65UCUYj-UhyJeA2!jf7mayTFLeJ9(D2K+s;A}`zJri0gm32D{*`Lhvn`5 z!+EXr-$&GP7V=Ow^*?2?m%JoS^u z3#U$hVIIEi&S?`Tb$J$>;ITdm4_T`-akbC25Ae>x!Tin>w&D)(5L$IMzNZw~=T+Qd z>A~6N&yEJBnYVQlxmLGu4nOy0(`;DNzZ|S=2Nw0qrmOI|0*kedB6QcE|K(uyc3@Gz zET7o6z`E3iMXrnJ`q}pJ^Ed21QeSdXj&bi_Y}<(Q$QNw05gl_DN{0Hvf0MG5F>M5E zFCp$N8_}_>oJZB!Hy@onrh5|9#_0W}TFm}Z@&rb&!d^bG&qv;TMhj=FNt}6iD&;n@ zzf|UP#|B$_8f5JL3}5#%Y(6=%SsGkwv2@LO+4a=lM&_vUc!ssD%DIVXf%O358(Pfbas zZp=u_3-=(Ope%uWOv(5D4Ed3K?*rtOkniok;y2#9b7Y)(!yA(8;y1iGeYl?YhS$%v zY%TeieJKB3b;!t9 z&BhliOVq-{q+Mtsd7-~T2AmvOWbU-Q&_PZ+z>~TecHJYA>wUakk4@OeRk)A57y1Ib z?h9bYJFtZ>)Ya^|MUw0NNet{yIP>~-=m*cL4t;Tc{3nz(=>slh)IDI=t@{+(zsGs$ zau47Aa=+Ub-Y56Mf4P6d7k*Ffk%Q3Rx&I$u_)pxM^6+*x*t|-Nkm@ z(USXpe>?2g1j|VK`OOto%p4y*P|1fxn}iRIzbJpnCR)F!qvn~JQos^%u!*_t*Sor zVLhO`w(Qm)mUD=0Z&xJG`tb}p zIM1%Y7xvzq)5`t`i52$sPgC{fE|=HzrKjeb`QH9hdceHD)4pfyH1X9g zH22_}er_N??4QnaVxs1L>|TkjrMD~7kEn${vMwz&lCvzle6Msh`_4)dD<)RCd<#m` z=x6-E8*=CO??=pMxIc}UA7yu(FL~u6U!BkMT=?G3x3Jv!%U%fk98L5oT)sHb;{B4Zn7n5F z?$g|@@cNc0UO%4HS~}a}b^jP%|9WUUua~&Qt|kX(z<;^k@Fn^Cu+3lj-iXk4o;|PDtJC#!y^7M$!_Y;(2j}SLS@Fs~@38}~ zw2Rw$^{cLV^&<3&*2^0@;nin66JCXed^uh08^f#TU+t)0b-Xuu0)CzH%6|UEyo$q) zTKNpe$Y(HPVA#$a(cLJXMcdL0Y^l(BeAl-03wc!qujCt2ZM?E=>1DCDRP^dpVxYVb zUD{{c)g>cL{*({4_3gF}c6GTP&A&D9Z)UWv^=vJjC%Tp%yrKjD(u4aiU_HmStJeB` za_|Ctb4Tx%+Ps{9R=n(wt>(LA*wr6yv+Qbvt#_khcsa)5CAfCVOZ&OQONrgYo~b4| zr7{kuu%<1(MGyMsIQfLz7=w?NlRr3wTtRXIZ?to>qH_mD_9S=ksH`h9ca6>+ z{BPdvCI8SoI~EuL`j*2u;DgOTZ`d}Fn-@LyI58XY&%=i()7iT|!0+0w z{A*&WW6ycfIk+rejJciLt3@YZ%&{deQT!hZuk zjEv#KJB-1xHiLx9Hk)z3%?JONhh6FVTHZzL`itNc ztLp)teUYvoqfBSIo(lY~W$ZNnzQcIb!G}0I|NaZdB(eX|{=?d?d_DYobL@@s@6G#; z{ypQ6<=>k=-x>V-A3%q8-A)VUAycdEamdl<_ibPAo$RyGXY2d6GWJBaSo-{X#-4vC zT37vxXq^{B>kk>rV`*J&(|X~Tr1f+ErnLUbzlhc!qW95qUDFAzFS2Q!^(ARN^{i<9 zukjal(WkTW7dQvmv>Vsi{wr|;^ZxD_e>QsVxS^{!L3;494)jhBUJSjRv8j%^qptbt zD>K(_9oqE*RVbY*|?AV!mhOwhp z2cD+|e}J_)nX+XC6B`*DiIc_(xI4)kuNPiLiLG4n?g zua5J_oqE*#(V~6Z{C{We8Rm~KBCqZCEj@Sxw0HE)nLkGRf`@G%@Po`Rr(?aNbnJ_K z!D?{&M}5H+w9}O@mm+J zFiytG*utoJ-z~O`UHc_vZ2qs??ReLG=;)Kj>i9q4lYgO}pQ1ls!YBWy+0~bk$9COL z3*HLdW9@3h_E`>|cQ*RmZdV7`v=+O1ru~0M$k#o}H-ZGOqy6qzmW=L&ZnDq1wb;#> z2XUNz5YPG+-)-G8U&2qyE&(HwaXX28{Lx}%5iS%`9Aykw|MT<&7!^S)XC|k*Bs#ekEvHe9<}V*kZ;xA zOj*t!s~6rMI#)5Cb=?2$@HHAA*6^TN4F1UV%h;Kp z{nD~CTmFstHU3}3uY>4WG{3$peuiyB&xT)@egVJE`v>@SRTRIv_FeupKIHG|?^(@L ztXRNKy<;q3rydmxFvr>USisKQGsFV^s{^0XgBQXl$8W4dcg_?G;QQ&O&irDKqdRBH zA83ySy#I@TPapDbc<_JZL#{v5+{$tIL~rad)~!4)D;+Q@f%_`9%4AdxlUrgX)S@_GmTU2w(~NZ&Q2cGTmN(V*|Ulq zHlABH^mQv9U7EnSdA?$<*?xAA*$3Wx;&?}%TMqBVmb$o?Ij4`Ddd{9BUqkC~uBOcG zN*(yy+vDAtC*lI+R))_2)K*c$fVJj0b|9_}RvMBYEh->J-toU(Ic%4*!pIhykRZu4*Y##*x;b?&QQ ze25Nb)K(VCegVtZZr`8eoVzD+%g(}M~f;%rI%IqO`;)#SZ+h$qN7 zZi+dCoE>NODVKa!@Me$H$Sm1kI7s&9j-FG&9Pk~^M$iVSWSM8~uV+sZc<3i1W9yKE zOf9`-4fEeB)>34@pUzdj)k%!>;1vGe&M!x+8c+(Jx!_a6xqqH4Uwt;;zdVn=S#;_s zuTFkU#GSzTGyOP!CYke1Q;2b-B0FiwT{`D@#v7*0+28pi?{{bJT(42b*-#THWP91%N*QiDp{G8D>ko$!_E;AN#UY+1S4*fIBTyU9DO|EiiSVG`1-?^E; z_Cl*Y{jv`2Rny*08?5%6{jAc~=g`)K^M?t>_UtG)u1hR9uT1Cw=KaYHpg@@DMKLMiwdI>F3?w%a3C5KybD>gv~*~6odu_Nw; zfRxdf3^C+;K5}M}34d9fQ8u`}ukRN~Y$7E%`r<6pMgdFmpIzE-}ibq?FF9K4(2 zjO_y~yiNFP+TkBbR?BN$7g+sxF0QQ~P8*wOBN}F%19OpXG*7f(LfcREiG4caJV7_U zyVe~8FD>Xk=D zI&g06Yn1ZMigq}<1?NadIDatlko{Fw{drFPqkWC-oTb!WKgp{9THg+MWVJdnV%4AN z)W0s-@FZIGBiHi{yHUiwqIr2qvaw)o?>1N#o%;vB1-!5S)ajRe8?L|EuWKE++fxir zQafG?*7mjVTAtDdtC2c;+VHaKPjKolN;8`6`kzPRb77kC{rnznFdTda@f{b7?-rcl z4xH2sqcq;aqXV21>Bjf>C7uT7*%&w(4xA4%j9eRz==6hYQ*1hCw84<}kDB#`PZH}* z4eCz)b^VP}zH#dC>A|(>R{drDJJkOmrhe-yrY-nX|Mmc*6uaZpZ$;OHUk_GdlSQW= z9?%9yXz~Gk6nrk^dq=CEcXZprX}<&0JJ2Y#d4IfI^M=dGW3_30Twt#qV8MR7bJ%@9 zcVx-H1fuw9!nifdxNq$c;}|$Oc$xmx38$ZOeoty} zg#%;cV56C_B{Zx@;6OfmBbO@9zrnVAZLkp#yZwS5IKV#11EYwQ%UtbOd~bFf`za(| zXs-oV?bgComA#Esg*_Hj-*okYT=r+=;-^;M)MJ6^^K4~b##Z)aY^|KI>OFUT)wXkt zszaIukGWsPXWGfCnSG3^Q^0Y#tmn-CDiT~LSM`glpT|5}e5AYy5>V<_AN@8tK$Y`MNKlzmOM5O!n0u`Cly% zdchT7qqKRheJEM;9w<=t@}1-RO@D|zDXh`RUenK6bE&3I?%nC~jp21!oaO87%f8-R z`n`c~vu{w|;kl|_o>fuKq(!o-uV5dx_>aQ(!~Iol#jWd1CeMP!PUH8DCii^T2 z$5)@U-sc?=g_GK%jNGl(9-%dydm!VY+J05|GtL;a#^AAS`-dhWp9|sJ{d~_!`Yzu_ zjmG2h{{uXvoixtukTVZuy(Jc>0pQedYVr!_{1BZpJp{*Qa-U1VQ-?3#1JAeVm&hf& z-zmPD_zg|?Rv~0Z{{B-sS7b%b$a3~exqStGsGHP3nnf{yjQ!mipbY$_^ zJ420E7*meTO5mH76@}P4FLuv3Zri6YHf8?Wp<*buvjOe4DK+>g_9U&?8TYLH z&eFycv>|XK0<#0WTnpZ{Ii13Lug%t3e&)l#3Eu!cz(M~}j!})?I5z*jZDWll_dnC`Y!+8OD`12Q+63lbb zWsT9|!DwR%Hceq#4a&i9*<*+ZNUH52%qol7Bm15WWvrQJ<~y~Y~W=fw8N85A-n zVL!zVnX9}rSTpwKUwn{rLW*ADJ9WBw&R$AO0ej~>=v%IqUL$9C3*V(bzoxwszVUDr z8@@Wf5O+J5MQkFVsuA>c52ls&!XR;rh!TG6GF6_m%PZS2; ze10DL$fi~I^LZabk2&Xln(Qmf;Y`y_w0jI3f6nzA^o=@U>g=Y@q3REJ94fu)!$V>h zO4v`I1FSk=RSR5ApY~XyZ$ZuT%L=DyNz;Hi+|xsimUo;}ep*{~%=^lRtAk6Khna0$ zxNP^-dwz1r^X410({c!WehYpJX8%9t-aJ0a>R$YRo|yz@vJfB}NziN}nE~qtfmFGf z1k{iKN}!9{+kn(|k{~v?#-&NXCCQ*=Al5ea-Xu^pGl)wCC0)LNw6_US6s+{Ny}gD- zJ4rwd>jc61y+6-0PZ);4wafi}e}BwtW}fFPpY!>A&SyKHbIy%V2z)#F@2zLgzxZLa zS#76pftx<_HpJEr_1PPtOkq8BnvAt!)KjINmp2ZLwK(SNSljWBYc5p=mgxO=*e0(k zrGLHbHI6M6F;A85eBy=WL%4}ira<#Ga&Eie=Q5!?u*msEN$``Ly(c)6b3bcFfp_Gk zl$WwnCz0p(^IrHRhwpch4PU38@qR03?zCU?qh^oE(p}8^e5*2$#r>`aXy+~Xt_Oct z)9KSK?ytZ<__9;v>>( zv5n1~Q}tRM?LEVtM(}pR-YD7kdA464efYRDUY^OhR04z0N7m@_|EN}FivJwRnarH; zK&)VmHXqrKlEc}umgDFgb#$%)V~<(xzShqw9P@swNnjnjX~b8xGZ=j6 zKRMIkDEi2djiLEN{;~q@kk;=b9IowCkLzt0u>N|2{!Y@~R_JQrvO;jlxqliij|CQT zZtz)L&a$}wj2u{`C{v_vOOPi2#<#55hvpp&&95g-$ixUwKyAQCDt-ev#VyC-M2soql`8u;b(6M zpBCwJsJe5P%mZzX<&}i+dqeQOHpiqc?+l*pWG-lJQ`cy_IPUCw=HOJh*G}3lx_hHG zkE7}y9+R&|Rc4TXAai#o?<$x>^2`DKsX)N|_no7pWeob#;Q?2h7@Gqn9j&DuGpMZsq|lDDxr#jRXonb)&&<@Gr{_hqZZ zk=P@R&1xGoyM^|0cBY2wUb8!sI+@#2+`dVQ$4r?ezoAXOpZ~qt&0WYEzkY{0Pw;6~ zl_qN~`L1IBZj5l-;*e!L_f1qi?R?v4%l5T#y6sPVpf@|%=1(Ks6L|OhOQYR)P)^$W zm=EV(Hd$z|^D4_4x_RGyr~aOMri(}Mell(BQe%oY zs-x^zP$rhTyC}Eo@fVi!Oe^y??_Z*irM#E6-{Ji`&%dzzDK+-IHl7M?^WnUsh!YdXdc=|x_>v6)sh<__kM_u6FL=e=Y$oy zOi~@yJ3o1%w32ovTOE0G@!f!>nER~l8?4F{d0)Xh8}CFXadxT(&+JbEQ-yajnD-iQ z7DTvjpg*BCa8?UV1tFM&=GL8(=rAd;GL(0raPgjx1KXCoaob`u~{wdn*{D)eeXNZ zA1R*v_K}L04;^8Q{k5XYc*nfMwt4=D=Zd$F*xos;*)~1$4j8VZ{yAfbjdWiV{;VL< zeFe{QLi;Ws>COo+cVDDCJG|WHk?u>w%XuQ*DdErl66q#Dc`httMx2l98%Ahy^BkDB zkI>}iInR!d(B!7zI)`yuz_m3;;^^GDWNK($Y-(&aa-NJ_r{V15e?1d;_FH_Wei3-~ z%i74>!0nf@k&kaWYa_Q8)^=$6wQZa`quRM;IJ@{l_VRFWUkS`#;WP4o0q)}`Qy1~M{n~SKY+Z4E#A4<$a7v=ZrDk_NXq9Z z114-5kv#&3_!putqJYEjFZgfo1P{`GBff=K^DQVt|0U=bk=s(H9eYyxzeDnAPmP>* zIdj*dsG5Clv2MG~_R^}cIR!=0GT#JGN@Bfs-UjUs;)k5)2Qj|t;+Iy5j?nxdC%Cs# zZ!hxE5rPvtK1{LZ!vGWGti*@u#)mQBlKy<$bzxPl_AZk$&F6YzPv?k@N1StZ9m$z| z=!hocW4HZD&fk~wqd32>THv)lI(k9Tcb|}Zrz#i2KNk8FKSs*&d#q(@{cT3ua$kec z$cdewqij1OcqkMaLu>I#h1P}OnRJZHL7)|q+kWNkBat$FbsdcJWs_+9mLas?#Hwga&fNHZ!cjTDG{KwjIx!sL6%y~8#qS49a>gBTG}ZqG=K@U0AE{QE zh0f?M_-RcA@XapI@mn$#cMJ1e&aAvu^D|RQ{%-dw-p_el@!tVWeZlsNpr^EBi&W;%o@;a2DJy-zt6Cohd~eaF zjdD^J``YzoVT^_5QfCq$1J*;_U&9y@ZU&ZsI;#Kh#Fo^<=2&|rFazTNe%Hczz;QR9 zPt{SbceqRWFnF0u`K?m^$#cs~?D%gT&k(*<^-GT>I%EtLz*elP8!EulOyIR!mE8jW zQl2vg_flYO;U2+RR>!Gg=uq&FrH>eR!5fA6=tD!&8(!fxJ9P z%&zcBhu_FUkUKXH^6VpGZkHpUWAW$rW-0EIBb0&d8V{b8k@#)%ZJe#vSW)hm`BYzF z^Iu3VUI;%T+YLRUT9l4_MXeGY!d)RlvbcqNb$w>;6M z^o_>G5*e4nrvliDfvqMovg!adFQKi;+%*!-C!h1cO;^u3l`YQ~|6ol_f#SY`&nM<+ zZ|&nX8`nKmv(Yg=W8UKNne$p7&)>MJPH}JKvkQM+WuBE$Uu+cdi=&|#HbHkK?cIdnbG?dbO92%=_WyJoo4}PK2g1w-e!=Ey20{=U4T)E$gGF_zX9vBQ@Snz(-09 z&S_acCl^MW)3R@EoHnP&vDR~vIX!OV`E%Mc%$&X-{VMWd6ENJ%T&hOCTx=e+D3O)* zS3RC;!9QXD&8FFpIMx)OUiAJUWYb~f;dfcL$Y6|5!=E+Es121C*N$G@zZkS3 z>+JBW*iXgCA#AhVnhZh?)u=K3J@{jqoxt+~+7LOELBE=fM>}PzTIB*)Fj2dp5td{{KM#ZF>K9Y)5G?I&){U)GK~;#Db_>EBqyDbl)M) zk-UO2yi6Hs<6Yj}Joif1R&3rYuKHo~MsT=s!Y;m*u`OzBWh>*X!EOTYWATYK_ym7q zw}a!|`N45b3XZFUamn+GjZ17;3$zeB)UZL*Bh8vEFSKXBY67}(c$e?7dW(G9#P*xliM4;QnNy9mXGkvrrw>*z!Gz0PfC^sA8xMm_O473_=$sU!Dg zNSjhu;0xe4)m_8gh4@_wlv|73mU{#hWac(gJa>5}l&DpgEC`45$FJ!Akx&!JY`kp-*xL<{#ata*yCe(HopZ~Q3TGT?o7QVJyBWiGdHHP_8u*B zMPeqyJ@<^+_9t04819|nJ5B4Wt{wb(pe1v)#F(pVHGYU6 z;)j|LexS|;{qr|9o`|oMHC}0hyE%t=B3|>yp&9h8rHwX?Cx$mIgokW3BNkYg6PtN1xf1u&Cu^YdEy&KU3a7u%oLpi< z*4S)2o4YD<{M$?kE@{tdPA$oTk1P}Zh>UU^*=KIK?Jnv9%aFZn4ce_cgLZ4@MeK6& zC8i8_j}NSgsO#oO7rKVnGi4Yhi3jYGUO! z_EeyE=0`J@Imj-d+bZO019EkC`6RzBK^g0T&v$a4lp=Z%UW>8F8k4mI`@p`Dc9t>L zSFmH>yY+7W=*zpBKV=@g%kNJ!-){a?9qoO$dXk@5fLrWf@r@;SeGzrkIa6J4Ex%;J z1aKmHQ)FHx&z;pyzlAu1=q6{moxFPPm@lvJR{~c#a9M-Tl)1UCb=1e_7!ceh(M~x$ zGh2%fSv7u>cmg=6+JPLGSW<}Wp1%N^mh|)A>k5P;3!&{4qbKeV|#D9xptTx7)uf|nQ zz50i#jB~jI8%_SN;~Tx=?M44hg120^Rr)L7$5r!eu14Y#9^oIxTgiB@z()K89r5(8 zuef$$XMBd-j58&cte$t3Yj^cs{;MzVZnm4Eyi!l}Y2$5o`-_8ZMLm)3XWYBtv-w+1 zv6U+szwE831qVLvD>E_YB)<4V#@fc%CGKeBx8wt&f3W*PVp;IUfgo=XvkA(V8{d~X zv4Hs_yeGcBuD8zR4`jE-9~aw~C^}N~kLbt*aM?m!uN^(vNxL3t+uGH9FEXeXodf@C z@ip7C zhM*p4{BG(sElJAqK6Fo`DXIkBQX+F)|S&xp!r=Kcoi`}%3 zuWJu~?}s+{#wBIwOR)n4FQWewG+CeEeSD*?|MmQQxeIRieTk#sz!Sn^WeEw{TFWIV)*90t<$of>vqw#5+KOb!+k6Ume=hWmpn11c}<3AvdZmK%L z9q>iqwRZ=;&g@(lmzcTPpB2qLdeOPFb6h#I?XE%aT4q)TPMYhRr>YI$M($4@@4XaU zpGtRhY=QU1SM1^0$1mx$c;Lhjh~Kflga2atZsY%l{1$$a_HNVGDU-c_;+e#@j5e*o zHpejsl4w)T=`h}zjCY#e8fQkU7e}ig9kJ znCvgMDr4X=&F=Rlvle<=&Xz|s7-cPWytf}XQnhwd?PauENPf>b?Xq4P-mYtyc2)Y5 zxWXT3?w?9rihJRhZHy zwIOF?8M~B||KbmdE##qYG3`$(uh_CduRFgOUq<{Cxj&vYsj513hXp$SnfrVi$oJ|q zr`ad*-`EHD;Y0Qkx7Ox%CL-PwZQXkZDPEa~sV-&{((F99$NE?X_V_=gED9FH&@JCrjBxL<99Ls;#TBO0b`c6(YnLj z#mD%DgK_DnI*h9{7!&#rd=gl=^OKR&R)xSglV^6G88Es8MtJDU!uZ240b^!=VBr#A z6x`knZsj|h@44WlE;eFb2A>Uu-2FjpC38#bvQNyh-X-pujd#v!TXy53$Ym?|etwZ@ z*^*hU%O2yiWc&;B9vlDsye01Zjg{aMdt;yue3V>2ew5&*7CxM(#&&OvigkTTtmakn zw4yJ6J9TtcS1NHT`ztpnTjcpJ=2<2@xd3}G242u@;COpYmQu9>ei{Yc;((QyUBb&D zu>LRv*3ZnNya&DntOtPgQ(zsPrNjE68mqy1Gcc1M*Zp+VDA#+yIR+T71jf<8`>qD( zXfHZggLA`e_AM1r%KXY2$J)uPYahmc5CXU9KV&|sKZtwoUG)As%9!P#4TO7H; z_k13hG6wvO=2QDnl~#7KIwm^v*bm-cSFw1@=)w%wEz0QbS1U5ND8wg~+FQCu6RW(M z{jyutG5w37=Pu^KYZc43WSEqa){3uek#VM*#`Jsqa)+hXcB$Bm@?7xnDRHjRrno4h z4eBktn?66K9f6^(;_fZLSZTVdF4b&O2+NqG6-AA1t+ImV(s6X*`4}H#pEq9&;=PX z;6sKTqCGlHkh|;B9}UWG>Y?Yi^i|llScl45z1+*RFoIlkKfLKPqeHTlDv3$TJkOvF zW4?AXUsu(hl_!Smc$azE)8LpRa^o$&FPq-BY{K-muY8y9@9{nQXBl@ZKg+nXfbT-S zTgSJ~TRHyKdHct&o41|ygxc8rjVmHoyfh?l$bA-h0}m`TbXWti&VF=PC1dEWAO?sJ z6RN*bfd7;RKf3Ay@b?4%fuiFdjtbT{{N zcefW?s(oeJmUM|XgVQ(6X@0St#3r^!DVutuS%Z{!$S-WI!Ef`eHeeao9G$8XtezTLmsJl?;R{$)>&uS9RR zo;g*I{p-tSUt37qNt%zsnT@o~nF7HWWsUB*764~ajmq>p*YjLrRr2gUt!$!q8qeB+ zU&=1wc`IXB6s>S~YY8+fhIX^rk5t4yB(t^;N&3`nQ1PYCFH?M)JDGyGGkmbiwDBm| z|FXw9!TY-Y-_4nKO(&uq9dCfUV`lC|1a|@GkLUDp0lgntt z2{N(s1uuqg{|GcNeEXO9{m=02WgY9*U@XBjOkARjeu(7_&4~-iZl5PIp@KL8@eM6b zAZuZVM3026g;~*^_$KGY3ED0YC&;INk%@zNMZ#0;58q7 zCNToxwL*B!@c%CsC%FIQ`Ei0O&QLJo1b5P=5hsXXJQu>tN#YIvuk?!#d2HyH6Ubyk zzeGi!)i0g+dZJ%s&wvj(HKbqIcRrkc`Ext6hqeq^Q~^zTknt|^b57pw=y(&Ia(t4= zqBMVi{x$uAoRM~i^h?_C`sFv$Ht*`d?SB061=2RM>t5PEf$TbgPC16`I))Ak&}KWb zS9IGxbkK+7mW#|Qr)>MmZ*39z+iw0A@pbqGxg9Ql*VC8C!y(;6yP|vkjpyOI=f_f( zu^GCjhw;^NkM4!!;s2z2Hn9gbT=zUKV}}lg?n#ibn-%v|8M~%?(t^4NyrO&B7`M?+ zD`O8IU!}kt)Gs?uoz*Yz52IhcD}6(6gQmyj|4iABP2XvvU!WAMQ><&51pauJ#>))m!V&75V&YlaA`gPT(rB5yL9t;ej$9y-PEVPr=s6Y z=y@~xJ^~w!Gm7CmxwrBtxm%LorpXERj*qj@KKX6+$dqU*q{ChM{47;{t!@sW~BWbgyX;5Ugiv1fFk4m-KVl-S?OJ~e?$ z)?{T(K3`3#Tuh8h;1L)bXxGR$JN}{}CzHGrfLoW5{jM8--V#Oep1rVoixOlF+2{u|5s?_K!5#Ls2@cXvxL9xmfF##6&wCw93| zbmmKpTlQ;(zO%?$3G%o`P3m69IH5k9%LH`nal=$^gehUmjGkm=@#yDIdK1Sh6>px^kBHQqEd&@?42ccIB|{s;|?Z zAD5TA#b0_cawvD&;MZlqA^l{4%PM}0-fpA4qJW+g9pIVZQ}#QB^LklG-8+JH4S3s- zABl=%@Xe^b%>rMtsxZHSGgEO(+m`Z6#;Tdh{3ogB+pEvB;q;-vE%u_t!dwIIMc~s) zyXWLxy?3B}T$iv4#9z?UOy;>N9j5%}+lC#2E)<2X#?qGdaa2MK#?4dq= zx5}9eCS>LLXSs`m$huoQA1nTKFEX@`^?h)x<pwXlsew_r) zZ$}0?p?iwv7p8dkzQB17vK~M^X-C&#Dc&XUgT!VeheO&fM{Z6+4uO+|QI&ST#QhGD zkABF^UR!uc){DB^RLOXFR_U zEngyUUFdT$S`zQubjbV_FMFO@BZ$?@Py7EDEw`$c%0=+sm!Y8vS_{p;3@s&xMf8k? zwnYvIk4~a)IXNHXQ1thp1MRdYF(i>abKtq9*b}SBUt1NSsZ{u_QhX|jO?imXO8=c&KYFgA%?^Z^+DU`DTkJP!E-!i|&H=hK2%Yg3+#?TFn zuaK)1K+a_{ZX;joLp4TQ2d+`Al}5iAw6Q_#C-V4K(f2C)uA!cuPa*5;QPH906DZF) zn8Ta_s^{>F9e4uy_Uog8XX}~A#1*`ny(xAFaEd)8Ic^Qeq(t*x@+>Q$-GwQ?Kd#cnDsN8ZTz-y)XyEyf~obAcm@_Fe*(V)77*k>$wX z-OxgvtUrdr+m3A@@^m)5+yXDvk!#r?4V&>?ogh)5;$srYdYV;O9qVV82`0A6MIUZTW9kip}A&16vLy%@JTT=c}e4qvtzDf zJ){R4ESx8w0)|kY+z!3Mc@ldg;mh)&0k7=m>%muudtUsP^I;wC*>L%$5@$|guIBLR z#g`e5Uv12*e}rG7;a7N=92EAzK^I*wCH40rhh;7o@~n$IL6K3D;K7o)lU(AzMPKD= zu7tN`-=4?}>j)*clbrZob5gg^z0Yj(3N7-PvsQATY-%HCm}z-FsWOg2&OvAj(zJ+v z?vK*<0Q#T-JTPp|Y$y3Z$?*CyZQntH_t!r&{F?;tVSb;D10@aR8} z*f&@Y?6ZNl%VdqVkq4yB4PdMU#xC-RVi|)6-=Le{8&p>+`AVs81JBdUlU>Y{dFm+F zuDUXR7i0f~vF{3w-J?E@+>NfRgC|x2dwEKX);{uOzI^5~r?-NmI_Zmfi=L^HwJqs? zCC^00%bc7`T{|=g%|n_8&o2wg`_McjIS)x>S{LwXGA%@&%?;-2hRZXtn~nHmOi-Rl z9+O3lL7qix@+=2gl|wnhC)!Dl)rY|xE7o741?B_r%l`!PF63!=?v%*Wi}p|3vaeEP zfY?;M<}1A9D~NBV_4^iagFfa+5j>e*VC(n6zrt4%4+*DtiHSX0WxpbB5Pn|t|K0f} zvDNVTCi@~~PC1czR_HBbGLXZ~;T5<0b9bTe>A0*B9Rwa?br{CCrHJh;t9%XM;G;>{BS7hGe6KXk+ZnS&A!Y9)Sq599L3DXYhEmbB3a=W25vnq%+{))Eu79I2G!Zb9O0bLG=$omy`{R`NDz6yhemknO`Kdozp1Z63 zuIa~#EzK>rO~-EDEqf$SV1t;Dn>o}e_c}ZY

MA39d5L)V~f|hwkhD5q)hPp$?#< z7s`5_${Mosa)(Dztu9&P`vr4D#$=(soDXmiSQ~=8Ivl=LXqN~+60PJ72l0_~0enmi z!^a0nj_DtOkFO5lgEOt+gS$fTF-Uw>)^n3J-#XbF;N98SY_j)h;QgA3oD2L|D>}OC zwsI||N1Q5GVzwS~8AEB?nSNo~3SGZJPJ+<&{vci7BIkto6?C;|XDB$ao07|OwKEiw zp!xma$e`=^p>gPRy$)EV9XUfma2m=ZovFby3^+F;oX4J|ovF}rIC`!pmuxldNxLmk zN|n%&d-SSW<-1U+Y9k(64-Vyg40-P4wZ_q3@!!-nq#mlD4e-AYciUGj-Vzsmb?8|2u{TJTHeHJAdFc2xslk3N%IemAy0&-u}{4xq4&wJjBy({s;B)v!HHSfEcaCieGQvSa{au>VeBg{ z_xovVF0o(4ehRg@M4#*x{i*w%9omv$dexLgm;yv@d?S#H6=VPwuh29r}jW zB}e6->Yp3_j;zy`F+XyES!^)*pRbPVm-#L+0I{>`iPNdfJ6SifFoyQ5?alnIl>Pp_ z!8If8d=O%xq5BuY?M;dG%A5&*zUZjLEKI~KBH>%^)_iy*IYy!PYf7$>;ytilVh0h# z4kEqerHUPa>g(t*pr4021J4dO-V>amW!xz$wo~|cQ)RsD%N`mp zvQ@@H?5g9RHJ%$TFrF_GFFYOxFbHksbMDzg+$p+hw%@9J({H~7`-O7&!2|cGjm?}N zsNIFKm|U;=Bu6!CNXt8UU(ScSOqv@mmAJ4nU{mCbjd)i)@>A!Xv-jbg+ZX!)xWCj& zKYg^3PCpCC?bm4kPv~nu^nic7$|CMJVGc_S`eJJdA{%5*|MqjGsdmJ+89nCO8AZV5 zL-zpkvXr`%h2l;QqL4p zPjY3gk3Ct+dS!LzSm>M%^OT zG&Gne>oBhX<`3vc;uH0P&q%>%f`(7IL+Z{Dye+~vt_N0aF8}LTcON(_&K&C&UMYZA zhH%RNb}b&6YhrvCt6`^mz{i~U` zoaLAMsTUKh_i|W0>BEGtC3?GyF?0#uxJT*P+X_8GhtRqDP4FK+AHJMy80Q+N$p-x# z4cX6q039~m8j`FZ{V9>TyHeSG=3$i_3GOe(Pd&(dGu1jeIu&J_<(9%c>uOnpRjMu< zQ}|FVbHu`b=52?(^Ifat;nNP?ohdqkSPyH}igyR+=*t;uw6Cw_hP34)PG+f9I>Ke7 z=(YBE^iAdrY=RFvnMa#j`LATmaJLnV(9YR6&DOu0i;x-e?)jULA)2l!JwtAZ6Io$V z$6n)1QmRAGa1$9d+*+reyQ_EuHHt@U;Rw!PJdRHF^*{3St@t)s#8pn9XSx4sK-O}P zZ{_U7CBz(+L>K<@<^Zv2U)i?2c9TC>&MXK7R8IiCD0dyV7gFx}_sg&D&J) z^W)hjPY>tzOMN4c=5lz&XveAeb0y|pPx+pe+piZ{DLPJMbcteiFT=hZ&bBnpjKgQ2 z^31mj^L})rWAG$;LhQ%-{e^k><5L>`yD%^35+%Oz>)2`VYvF4R%~JNwOO<#j4}PmU zfBsCf_^X-B^+s@6zry66M4cFHy&CG&zE<;ecrf*qOvd0$r4VZQSgRJ*_G>q+6 zn5HCz+k-Mk0y|`mnusUK97XRcCbxB8VV(zksC6dt6!*{mJre$dkKwO<+2mgR zJ3LpT-{EGIk@kA~q=f=y#=9Bp*G~deElup*Zj$;cJMRTrr!R77-&T)?_ zzjwOL8pYiW>3cO9?9^qj#A)V&|82xQwCCSH`}`)E2b`;3B0N*f{Rl6=w7iQwbWv*B zHL?bp!Q7Vrjm3?nUT^zyc~%Qud{dA-c4(uTWUK3j&fON}z^@S$dt zuh(qy)tXIC4&;di5gyG(uO55OUGVMM3~O+q_Ml>+(|ewC*X1H*DGS z$0Ot2dV#TK@J@J1KIht=|GReXki5Y5yyFbMQi%Q%-DOb<^WH*tS!WgIML7!d@|oAO z#uRC~OXlEy_99v`3aWka*m0t}W)}`x3bc7Oq`Nwk!*myIS!qk^Yr0D@nX>J2cU|h|noi8RMF0NaMolMv zX8PkS>^}X@m;>;koR1eSSKL`jf}vZtkI;2$)+qgZhu+RPaP(mBc-P(F*-#<&Kvcht z^{&~-IXmlU#Lm3^*qb$0<+UBmnUh(ZBbTY4--f(MkoV)rLkgJ2^&=Y>?x$=xe0OE( zXM9PSZL}d}$}GxjcOYZs8Lzb6K99WlhW`u%6ZyA9b&(}VDk!K;f0A~l|W z%ddZr3*l9xZ-ngtx*~keVRN4|=O)um z6ma&!1Ng)md@?63LHNXPlR4#pMh7XQ&&9JipA62Wyp$1GrA$lMTpXI$?8ldKGOveg zDzLePFcf4++?bnIsQm7qOj%y$oR)27bgOt%j$8gK5K z;pV)a+!pBAL%S7O#I&J>jrR@FN-lBs{tU)1y3iUkOUu{nWBsTIyvaF0@_UX(yM+E$ z>Py}sG}g|fw1B@!u@=^mqP2I%92FZ(=5)n8O-5Gk2Padiho8?zStoEMkb4;3hOCJg z^YhSi`ur66aELjo%>`wK`)TH_F>Yhbwf?{qV{YQxziPWL(cLF!h}z7at~lc3kzaLN zAC2&Y=n~z(+N;ke(|_yVg*WK_l?7cS=k7(t+~l{;kT_ISe-C*PZQwhzXNKE-v(jY1 za~JR!a2Rm4hQQlL-sfV<^kIJ*<+dIDo8et8{$S>A9nKa1sedQw@U9*XUg2}#T`ln5 zsl&Tk0e=NLysItnioo<{P3X~*AI>te#pEc z=c8H9f0TKj$vo~y4l?f5Hp-SmQ)>g~;YYIm)}VB}8Em@`9dWU?$#EOD?Me6+7iwGf z0!G)#`8Cm%g-6-bkM4+moP908B4f*`w)>wADYK4shvCZnf-<4DoK|1ed~r)BxvS85 zwE_-9{p#0wc8_TWHgCMQ4?gYT9C*zRzBkdmgP78ih_AXA?PrYx9+dr0?~c>&4&P7N z;o1xBKNt^K9TpGJ>AyWl|4#k;N3%439hUwwezLy3+_Y9gxf2IZ)?q%?~xeMHh8tRFake6-usJrO4DZU zC)}Jh(cS*VP(0*41wGcbQ@@L^@E!g8>x};m%65J{3;#F1KZCU|sYC2;Qy0&23Xpy9 zV*|V`HUxTYuuSDF?3AKki2v$++Tl0;GiMhjA=kxbr~{sVLEbf7UeIB+XlMQHk=$(2 zfltxKR{H88w{I)+p^;cNb3v1nG9Mb*%f0}7)!Pfio|#{7S387Pp}RJQy5H!qXZ=q9 zp1>GFVeL94ckyBG0E_Ias6`j8M;9%QAfFvO#2T40HcCw#OI&cQoOk0$aXb`Zo%PTn z#v}1>C-jo|r^LbPGL)wEjFG)+1K$}}^a~lIjIRwFN@Ba;i73F1(#!5lSDKCkry-M1 zd}Bwu(6=$G0ADbw(!?CIGshY)S3BaXZQ9%tU-tCfk*qHcp9gZxJd$&6M-b1}?*%zU zUP$OY0cqp0RT+~)j9Yxh^cc=YL(ZmKl&S*8(+n=J8>LKXW=wVXlQQ3A@7n?5AP3p= z_z~w2jld_nKT~PiW>uzf_RM^_k320#nKB+9kh7WQ%Q-UjrrM5iz@BbSu8bnK^<&mc ze)6!>-@!U+4{iK`dHE4`o(Y`Dxkfjdm9f&7tc9KJFU9*U`e_}ZOp|t{pX<&9v@^1t z&_|Cu$7^wC8OJ`>nLkIb;-{{*{A8S4@&N=6D{#np{{%D^R$#05neFzn$du*#;5n`A z{BdrHqdmmfwf|3XN0?a|ls#TU>r-ZJJ!-Y>IPXH%qqH0z>Ccv`J$Gj4YgF$YAAXIh zA?|!X*jO(y8Gdif!;q;xeh=M zBDt4-31@pQR@}#k2N6GcPx-a1y{x!i9WVD~nz3O`@TAI_O$zrQ*6wjS@PmmnYJ9Wp zu|3yuPspm#+xA?~ox(r8{c^XD{dz^{q1?**roT!4huD3E@X&|L)hdUj>~B)0NqZlz;C#^Ry{vDgHAeizaY|uc zZl;fPb7H^BTz#H8&LW0JVv!8oO3ALBepJ3VrKj+0n)us_MC7T7bwU>6_aU>x_w(VqqUe`sf} zgU8E8DpSg{3N-)UL|u6&`5a}p+Wh2FiA^!4oUu!8bP=$mvlpcnScD#&b+9m}@`35d z%gPMaPsnw3=i^`Wj(D~?y-KZ`sy^F14LpU?2%A@q6wjRZIQIY+>taUJ-&k-jAE zEqKWpMrGEq-+mx2;F3lUDgHYoIKQ~PG`aE3zQG_*H8gic#Roj| z@O;gk;tAnU>FT7 zV}NNau*D;L5FIT>4?qtOxsUm3 zQlH42bnfn}rS56;Vm|G>KT646K^~d>cUC?!J-tDxYNxzy+hcn?z(p)qlU<8BcSYbT zGmr1zPW}7o_g>1kG*TWJ(w_But&WX4$fLemV%+;&vphZN@YDB{rv2dJ066K%mOKp; zde`jD0A|kY7}WZ;At$u;Q++JG6CEA07UDFIcO{TJCH)Bh^s-mI#cb_g2Tst*c>=G< z7vIDihU7D`P7PMt7%b*oCds*rrmYrax=&)IYP`Lky(OV_oQbqedAT#mH%XW2oH;X? zKs}+!Y3z|f)~tR4A6oe@&+>_#9;H3mcY6T7=;1DMIjdnmbm2nrrqhg>J7`NKXA1ZS z7DJ07{?}9f99YO-427khz0bqJl0)6Z_TG3I zx@aG;i0l+U!l0|fBW0YzLkq9{Npp*7e1H9lpEk?+mA777-&|==RihenG?EM4 zt^&g#>xrS``ZxL!Smy&<=<{cT&;KRtxjXpW$@8P&Mq)$8Jl*o$3(wOY=0>^_ugUlO zfjcPQSBJ{?O!(r=!wZq23y`Iy$kh4B)?1M8H*>FYq$YE-sC%(9a{}h5-A9q5a^K*= ztHi=h%X@X(`a{JKqM*P!)S{W8s;eSFp75&3S?OBi*UAD>(N_E?6{c@?vMk z*Up%dQCli97g;;zxH-ixZKRWnzU>NR&<1i(ZZGJFO0>F)*&|cNd5|KrM6QQETMV8g zhSGbxs+BGI*KK*xfk3W9d{3cIupP-IwxIXNSKsIV>FL0xdUBQ}S2q)1{PQx#6fd^# zs_QqcOnR_B-7(!s9Q9W#6Z}r#ZeN)+z4d@;x61D))72&i?|g;Wv3#H4?#93p@+`jx zO@Z+(nJbKaAM#7i-0+R*w2u1?|(M&BYc z1$Ut^d>0s$(}Afz@FTj6HUEXDYQ}p`k5)FDz<)h;1crIpYLn4sJ^q)`&aoZl>SGJk z>SIThYAM&o*!DxG1KB3`!6mBu=~T7JH*wC84#*+*ySEJ68hL)2Z=>Hu;O-duvKJiZ z))7DMefa%k&PS7ed{2(dYhWzp$gI-`&`a`c<;XlI_Kwv`e2+f&(WlsE8RT=due^1- z6I;JNgL1%j;wI_@%L)(e12&_qZ)Qr}q7 z-|6`p2go;umjqYsJa0ex!F0*vXkEF`AHnncEXvfqE67z}!CCLbAcQC6o_d*Qaz3w* zGGaR%9jOfdHTy;zzB0Kgs{13sFYgbmE%1DNrK5v0s5PAV@LLSL3B3%wjl2ln!pAK4 zt;_uG0rqqywO{4V^+k+N=5_<~ryt(&b!+jT-Na0G7xy}jus*+V4tDudy)|np5@TJ< z_9^a9IluHh_1pg4s$+1pI@&cx9mCn4m-=@{M)*G^uTSDhEoyAPiaZ!a`$oUU_-FF0 z58YIbJSmCgtbFpxOJbG5V)(XvKIlLvCgD9J5Qyy@Rk24r4uKANb> zfh2NszDDj*3~fpdPm-3yljQaMD6*-O9G=bDQ$4N74k;&i6*(}Kc0XjTT;!ZKUgmte zIms`wROa7p*wdl(TuoaduQs!9QK77wp}4KcM+>nold5)S(w@jm3$o85`Bs#bJt_Oa zv$Q`m#zGG0IBx>ywHdf~For+TZVz)fT}kU-^!vcVz5H(zJ$OKuSt76g{7ht1@A?u? z_t9Y}qrz(@b}@9AS}acbmNvyV z6@POPZGN3kV5a1GspwP_x)tB&Va@aj_Zp@q;vaa?S!IO6b!X=NJsPpW2bJ z?JLJV*yg&*ljRHb9lz9X>6^D^aQ41 ze`x;Bc<=bhigu^-QRMP|KE`t!&$neMxxTI2v0Kp94MIV(Z+dS*KEdT+u{ z7hV`;(QN*9aFM(i8!!TUkQnSAS!c{Und%<01>2>>a7_)BWq`ft#?lr zbtcJN=FC`p(M7boh`YUfH#s_r;_!!D%HTE7xaWt&qgIagOJ6;4+h! zUSv+|?wdHDyzaFANmoDD{4I0|_Y`)MkJEqM)%DHNo)i1QA8)EY{pl#ru^rf2_wdP9 zt6PC@-+HqfpWS_63HH_w)qM?kY@b-=F#&`4TkDvkS~>79{PRE)=QW-RY-)e;`$79L z5uK9c{Wti3H~hR6xZ79G_ooAo2^t%9_XE4s{~q-PCSo=T7RHRvz2|dm9?!~K{M@k= z)KO{P2HNvbE=Nt%aN(g%1E!U-?nqyPvvrht3){f=L$O`cv@@-HU6Qk&=8`e@uK%CM zd^}lgl6KmFLHwbmOBC&#saF1{E6JK~bS3Zzy|KeITfL35dsE>dx%Xi+?<6L)g*o?& zl~;LW%wnInj8~f$eG-@=ykH@RuH8I#y2cYDb)Jx1+4M-$z>W|c@8_A|=@Rg?7rcby zc_w{@@`#M_)R&;^uNY&S>-!!V-<0t2^#&H6q`x9?+JbDAd2Lbsxx({4WH57Fd#2^- zUdh?v8euLu81_+C7DK zB(CxnZCK^}F!uHP(7!e2_oQ|ZGK*8acK{#H|hmjB_SpI-V=u&<=v zer%`TGF}Vq%5%=g(%P1C@v&*Hduj70oR8K!%F!WXk+qC|U<@C_f45H^$RZ{w=igmV+eJ6$xRlt0?xWasvevsFIo1X|`74zH zGxE!6O6(>kIk28~8c+OSO$lqRP7`t{e&+jad~X1cbHLZC0=Ww>%DafW@Z=6WIop2~ zpN-rdAvSM|DXPC8nD&9MuhMTN?JUFQj63@1norf#?$6Xw-FK_8u2@qX=aO5!Tj_Hp zJmq3<@lvtT5C34z2I{b9(KU(BuGsP2yVO+IF8ckH&u7@}+vvN8zIV~@r+h|_4fc(_ z-U>~Ya$mDFTZ?`bN%ocYPcp|1Y=vz6$Z3RDpnp96OHb?KD z*~>$`Bxi8e8Q+J>8Eb*+?# zJq3(n!}$E1K}4N)e0r&)$tLRD36FE$WYt{ep{#!mMi+KSP9giecfSdb`{D|@C)!e3 zh;H(k3kH*K&GY*xlY+fG7;VzZ?j^rV>{juCefUWF+J(iv57{TO;Po}wsMPrkIV9gd zB1d3v)(m%NFz=yO%NdyAUi?~wp8vN+{~mX}()45AjeqebY`6ma|519J>ss)=BZx2c zhBH0CQ_oDRd30^y30*g2)i~DnP~1y)Ap`3^+7 z-+xwV`s-H5V9&}3?2S?Gk0ox$oWK^+Y*N zIdtz(TtuV!2>&!+;5Wos_y!lJ7`HE98BD}4XRQu?OZ4snAHr|)TkOO;Q&hbo zYg6qSU*OXt`N(765#7HIuy@AvA~}lbWAbF2DN3^I%@>u;`NSvcgL_qKkptb(peJto zj9S_Al3h4h&wkUjtPx5ahO_4fi^yeplYFC-Z(Z*8!58)X|0Vy+|2%83ELy2LeG|C> zFDe}s_+7F#+5_*tL7Uom1_QZ0&pHNwF8{;Eb>CoMGqwe1UFqYwTE@e(K)Q)D+4*Ff z1_jQX&jY!8;m@sy@wtH~6qe4<1F~0Xwd7@Q-#Dnz?v7XW@4K&4nude38~n3}VlX}u zTiS#Tw3%3bi?T2eoPLfh4(I*4pXfZlzg7Rfm3A%geVT_Hn-bAs%9OR)80$3gw6TTf zGGDn%eN4NyuWDSB@Qs{XxYuN{{}fxg2K`*inwT@*F_>8Xh+iugnLbw5;vCF(=gsgJ za!UG@XTFKDrj%gs-O4)l)Rxp<`pr2ra#|<3VwRLN_TQTZof$GOEY){{XKg>RCet+9 zXnPhe9jP?s0Gp4xe}W#lpO4IK$KUjw3Tzf$X(x80wP(BJoc88lpgrWc5)%}B^{i0RMKqs-jk5F>(kvUOtqzDw4Dtt z$l-j?wmH$Ap3ePnjN$#3Q?7l~)1^7H z+Ll{Jvlmrp(vZ$R9{N()vkT0Y`HH)UJ{CjIo)X94w?95hi&HN;I^6@S*^3Q^s>{_Zf`V>*H@*;7#zT=}SxG4QHfJUwVPAlY(tF zKB_D{wnP~W)Qt87T=E^~2|S<-=As+o|6I+P;QE}DnB#HmhfS9{ren)%{NMXa>DOwn zXHEFn67G7yUlJSRNoXrPJ9uqGM_p8b=dT*?Ca|6x?-hAWj@F+0P6VF47M^Mo+qFaByZsNAE8SJA7NwyX+A$ zeIGeOKg3)I`*M8UCx2<`@clvV8n#rc(;X+zH7N&&y{$Q}d6(D#Mdaepg( zO~@RvG0MydmHY5bZ|0n(UURZN3!9V}B=> z#E*iX_Dw>+Q}(&l*f8+s0@czT`urK5OM9}f{y%x&@&NnM$9f$U`4G2qnr>u?~~$+%?R2+y5_?()3-P#~AR2g%6(JJrXb$iY{uA-`SVCI+pYJzj}!u7viJW20QL87oRT-?E~fvNryI4(!B{ z`-NX>qLcx-S5W>hB93LjULqGxr!Vz*UeTJwnKINVHxX00wxRisYo2S~kGz@9J%_!w zD}!szX)bH#$OZYaoLiK_eX{6Fk3khN8*c)_xk_!;y8j< z=i#SDO;~;wH{&(jsOY_xXk}mrI_-1XmH3C?PQz1l!6_@xo#Z5mj(nKBiL)@IdH(6@6O8h?>T!+%hfa7EMq7Qy4Ruk=g=pgA+@_j;SV~JXn+Yel)p$qph zavpS$HpKmoMvw9cU5?^k7qQ+3|LqZa9DP_B6n)iOqYRFwk8{Su{e|a`N7jhayzBqY z@r3f-NA#!JKIlN1&%&!R4{~+|a%KPYQJx!fzy6&-ZaL46F^4R`ehj@Qa^NU3!jJ>b zw*$G0C?kAvF`d_^GnmHGV^GFEbm zHGZ|}{3>zVbrO$ae}M3%>`|2bW#M6LRX6*O29O;KpMl3kR=j^Euo=GH^C{yu9<~8+uXZB@t`BLeO=b6wcPn-bzw_%_pOieIFU=`$femS$`mjpF)UBtN<6ED zd(Rxqcai19Z%gaJSuZwP3bG+5S>$dSa#yX&NI(xygFcMgmu@=Q z=YH2mp}p3=CZF`S<(-clc0cV%UU=WcTRjrLxt4u2z4Z4j{9B>bWhstRoSX3ec4ULB z^;;%7JiZsldNjHfOmzENl)?5z;C!vAT6}5AXX#sC?CHx+@)Wi5`~c7Kk7eyOy&f4- z_M1ElcMtmhs=AedcUd>Uzx*BJ!{+-O*?9t995)l5D`fqQ*qdS|SL7SDGqgMySu24z zv%%{@=I}&xhsezPKM!o$YD%jt#K#f4p=Tv)K0l0X`fKp_IDYqhgLb5^*N5+`mYCed z`ie37LWdfCr9rdnur*V`o8fCq&RP8f#97Of0m)@M)e;FWM!J)fRM$hCQNcQ5p4jG^ zzQl(Wcmp+wp1|vhCObM$1#;!Smm+XrtaX3#w7%ATYJH-o2mDG~dd^F#_aVj>+GeLe zkh??14PAx5WNqES+?_@_nKNSZi@fQ@f2fCl#qN(_-$|<$pS1RG@6-1_YB3>raUX4% z@p--i48k*Frvw5~o?hmV??q*3j#&R1m@0Eb=77j{)hB0JBIn4Fm3dm<5tv$Eru%da zlf5xw+zDpFoi|jEP^6Q9wI>DdFEd07xGyf~yoyEGX5-jISuj@!;ufzxVmznTo zFYk!w<>8aPYWqvNJ0sGuQHfMGI^_(X$JukiIWI;X5_hn561_FxMDp#OE95*MrJJ}} ze=oR_KJ_?fiud40I$Rd|ASSiRc%DLCt!*{MC2f}*=N?VqJep9rzmwrEL^tdSwv!0Y z5a8IHL3`GNkCj@-DAgLwcV@WRQ_5Wq!~+Con+~&9J_zrEF9GkC3&49A`V9+jrZT|T z&*FJ?hC2&*U!a{I0p~Q}oXvl0W)QdTFgPdZaJ~_#Y`PeI*U;V&ZD-)KgJ+GdTQb}x z_^Xlkf@2LA9T`JDwFUoO{7}ninIE(AIGZOvU)E$N-QwvtN4Rao28xIa^y+d{iw}tm zU1bjGkDL`JW7PUPo#DO%`LT@tM0aWW8+$~XkE;W@wcIhNjhAzjdJpR7C}}Woj*`F- zUZ?ZvK<+)%5n1^c&OrEkeWI^F71^|$cEWw4TX=5xM3uZ3ySv7eTzLn!+);C~{bD$u z8jACez&ZO7G#nhvau=c}uLynrHD#cId?5}0yR(PzZ`jchO)Y79#X{71(^N$E0X^LMh%B@RwsQ_&GUagFW!b zNqlyRdFt~x*=uKB7_ra#Zw7L=pF(yr{~zReMxnAvau6D_=X)C0Mz~$Xc15-s`@eg? zs_*|kjofoFuWQ(Mx8$qjB@^!`W?o5*k28UEp8hCnNG4@if!j*FsO_!mm9`bv%e=7k z>hNpli9w4_Y+MZ|?`6B+Vt%}JP$AA6@BL_Bn4APZ=zhKIc!@>(%1p%UVxu4z?QlK`Fn_ultI63{hn~!i&zV zH=Z+(ad<3apgDexYSCjWhA)$GB=GDwvtsAI^)dbE3(v+?rCm?Gtkdp_%g&*l$Rq{Y zEkcIMJxj-k9kgSMTG$({@nYR&?##OSW+yR4j1=ro3IGeY3q6x1pD{GBi~e>1idliXf=y#{Yp2)vq3N$~z3 za3nUAp#vqK=d2EtJ(%Rx2Ky2GlIwyF)SrJdh+oO66+8^-z)OfF8uGne$FB)lJBD_w zv|;_(<9j@j8;^Kay>rAmZ{Lw2otLS@Hyv6Sws#kC^e_Ja`yunxBoTN4>@~==3;U- z1;+F3r^NmOeBkvG zd;ndOloh2X;cp+$oZJUwX()a_y_$V=S3i@wojnmftS1OQT~n1N$=Q+h1X<5J`a11b z%Gwyd-&l)l(TMHsQm&42$mRYz=C4*J4Os=91vZfvuL4I8eM!EbVJ~iwXROWmu08wg z1%daM1v&uT+@(&%vh{Jz5?=wsPqMoPRwI`b%6! z{CL?~jgM4mWgl`iYtOgI zUTDU8>zR?czd=`sE<(>Fi%w0EInAD)9`suy>&nHPA=8YkeTLsfoJrV@ENW*zz4%;` zuX+jN=^;Lpxy!GwOTVXo`^h^v!SnA=QJVU+-{ZWjQ?EAa_WXauoqK##)wTHdnMvSG z!Yhym31|qYBtflUc%-6Cf{y^&UIIRACD3Xoplzd|MwC3znh>=NU}@9d^3b-KSS(oS zHNE%#2+&6;)}mDVE(unh5Urwu35xUkuD$0ZC&L76Z}0E-M?NQW&VH`7_S$Q&*JkaP z42Etv3#pWaCs^ML{q$peK~jm!HT5&0CwF~(A_`=$moc`omSk+$K6M@$dDnscKy0JX z_s(junPZPE+Goaec)xkhV@&s9GmR~0wVCf*e`TIUlnYSqL-a>7Uy)lg6YH|%Oeg0_ zZJ!()+4=i!BO?kN9a`J`a%5pDbXA57VD|54PfITsMd?-cZ)pQwu`CDv@@@1wA~!hg zw!g%k9^mSW4aiEoe}v}^{GD963qM~KtOLe&O<(>aauf4}1Q@E?^N zH1;_~A@007I@Ma;m~PcJvcEm6$C48>mT}}AdCIHDvh^ABeB)v>7Rg=W!v-m5jid|w zp(4&0x%huB;VC@xmcy>|1!umnIbW>JX-(1Rz3cccd3mgb#i4$CD|Q8`FZ=2>lo21A z(2xg9k!#n^-JK$`QRSTIIZ(^BoC9s-+T*|L%O!X%{_DMat(@Q3-;g8L=DeArWrOkX z{Asi;@}J~U5IX+v2TdLEn=GTeKI2%bP|F3_ab*7%9J>9f9bal6XKOvaw-_JiKxcfq zZuI=;^z($PpAKx8Tae{)0vDNa(f$a=HBdj;{;|oXH5Wrey6vc3@pEXbe=ZWq*@hjX z+ML_P-)x`W%S_G)C&$OJ4wZTk9w9oT3uv=WWHxL|b<9=9>y#57KTu9m$5*|{70)kz zwbTBL9*ni0x|o>$!K%k5E?=#{m)2V|Q}nyPL%tdf+&sXx6Fr`D=SC-dW`@LSqGLmD z*D?Os^pEaleGL}=MVn4s_iyXjZz(77RMLj`CdqwbvJ)A6X8P)s)S}EI-|T?(HYhB>Wo%_+pl3)WR0LxRVnreRVn^6gIGJQD*F{XCg-*v zeWu2<*F#J~0x=1(Io-{j#O)7zLM6bs>AkTt+MiItEx%MRIy&U08;uURnsIkNQLJ^y zN$8N1wf;D{D;fRq`S|5=AJhKMNh-{=EG5g7W!H1N0;J*S`34KfaN;ULd$9akU ztZOr6WIndSzen%`%?bU7p{FM9Q_y$IiS7)22XeXOK_;gh?V#_Fc4GUi*ZvDe{y)n# zHmAQW{nkAG@yuZq<)pt{8$V_*YX*JJ4UB0gbi8xvMM0sX(TuMxI=(h%eE2av2Ob0m z0uKidS4Q#R2M^&%@OtJs4?Kt-e-Y>NtHFcM8qzS=8nR#D?_)i3ch5`VPZfp2#M5?8 zvO*7m2jPo*X`0Vy9v!85Wo0z9GUm1@%(o!h zIKKNb=7ZGN@WzkHoe#fz(dE54{w44JKTDUMe*s+u~yT5=h#iQMJ+8T&<8Pm=$32tL(w_b9yL%Txb zT9?9@>X2Ra_){eZZEABdIQepP{DRY18lOXXDJOf;7hUBX8eh!!zJ1Nmm$+#B zt|XQ6KcVwSZZQ1je?sT`Uo~`|{R!wik2%O%h^6yez>loWGtqfw6b4PFrE`m%f#gq) z=E87tZf#+l)p714$l|X0>OrUGWuFZ{mwnyILn^s3>XA<+_tTm37`Jhrl03$;XWK2O z<}uc^;>u$@hWQgGS)kv$@))P_zARBqy%IY7XYv@=z?&7cD6+ih*)>hMau%Pg<4pU% zkh56p*W?}%bQF@a7+c<2`!qR=7qVB~3Vy^dBsOR9*(W0#jXW1WXK@L0Ytr|CoccX7 zTXgl=8QxnIf;HRVNS6Pvu`iPG;=`oG6{yIFjg#Qw{9v;be43YF|XF^<17 z^in?BZj$!29oXFM;V$E6@3tpMo+tE;+@Z0FeQ-ld@79`M&=gy z${r-}4vQ`zI(OEjo;z!2;X0XfI&;Q9xehuOSoiL2;3SOQSFbz0H<)`h-uP=DAKdbp z;*jj?B6C#%k0#1HXNu9RPs0~S_3i(hMQSNSe?gIBdiSqhyqh~f$R{_IJu)snwut>C zro0-0jxY*u{Y}$^ob3#(bGZLZU|a=^MNbc7VQ)11upT4(u*fK}^9e;|72%~J*SxhX z!(8@8zZXH9%lUrFxW@F+!Hq)0d%aWsgLU`8($ruoW60tj6`MWQPBG_LVl6{G_(OEe zR>3D&Z@N8A$9;40LW74nKcvB19J*=6k1O811tzi{QowC5UWY=z(6X$TTE=u2z>%28w(0SV zSN7k3PJHQd^3sqCEV>T~pN!o<-)FCscqk{1dM72%03eswmm`cYFE$D*jv(WWQA0K~?=y zbU!V9@{b~Wrsd1~J^U}vg*?x5!xtXhyJqfB4Hnba!^~ZL#6#HIb3H(do*!13C zW^k^p2+?QrkM2h=$6Z=cy<8>rn%?{T4Dk_Y_I|!wba&z#AUYGy0`s!bl@(0hlr@O{ zMaOcJtWD?MsXXuuooSk^OARie&aJe!{5*f*3FO+}vfnKp?=M`-_>c3ux1O9YkmIH> z-ug!$E1iR_Q0r&vQ-i(=_1qk{F7E;}2Wj&)+SovznBI0SfM+@V|133FOaE8U|DR&| z^3hk`6*Y0;66W5UF#@;89xbi^%=gSVe$F_2s-@pJCYy2mET-)e)`Xt_a|RFRYIy!- zMw{xM?xsVB_BOA+xqJ6RqVMgaQRmwyq01-fKOSA41zr9--yFJphjIKX>GI=0s0~|I zNz9oFj*Zgg)4Z#Wfk!M|cKrFDqsxQT`y_N(M*l*W&obX$x`;!UyJ&Cu6o27D%4@nz zs%|;L`2Q_t{Qodr(&jAMI1^oNqW`Heef=xx^6Z$lW9hO=!}IiXIp%*rmmeRBPnY)) zzxM&(9J-wQFQ&^Iz@fu0m+`;Qr_J+^^w~=farosWz~i6cmltR_o|#|%l>UV-hn;~g zGigug@@dK&y7adcGXCb6@&7Y)`7PQw6J7TFNp1LgOke*>y1X~0?O3|}1O3bXbUJ>y z;=zBOU*16f@%ZJ7+*3QBZw_5P2_F8H{PLOK8(I{fsXs;kU&6B>1|Cs(Gr z%hi&@$%FpGiynmhO)C@0Z*2vSqn|PMRwoCA)x4c?w8xP%^Po>TTZWM{cOz$h#CX2P z?^Dj=M*h+CBl>fZ1rsRi=p>zUxabs`D7#Rf2_%0+YV%aqRoh586QpYU)26f4(K(bs z&rmA)dK_6&%BbjB*xg=8p9jo6P14@$$OY1djlRN07E85K=iSSgh2EZLOm2E>bH*K| zx8G5Bd9A+)NSE!iTEPyqVNLuU0LyFO3ZC6w!JFDg$??JrNBo(}12 zK5hJ#JcYgOAWsU6kmq{oa0h)|71LMw0PUOPIqc>4}K7Co$>AT ziJtI4UQcSD&s+ySa~;()^|_GS6R6!kY+Gyp-du%H%d#c=7BKKv)mUDbNut`kM;ZM&Y>f;fR*m9XKejMVbK`dVCVtl)# zth-zZx;Xuta;iq*f3>^mI?B4sh;L01`MUz{ve*r!>^#b9xZ+EabGc>wH#VB*JyEzi zWAIb9Ejlh+`rB>Vko+r78&{x)XUwBA*#DyV(uf|d zE+%KnJ@oqlc|RQAB!`x_1f%{t-!jiE`VFJ6%nYH2-8;_c1<=oi$0Uef#CF?v#mR7X z!pSd;_ZNj z&#dn%@W4+y(J4vXRr<9mgfC|ccFYuFgT7_B>Qfj3O1o%j+WPBxBgHR zYKq3z9AeMrz7BM5CZF1{X#1k?6FqV7_*X^8|26Ym9UXscTW!Gf6>W!4Z=MDmWDE)) zmd%Vo^tj^t&`Nx%qkq?91pYF{I`K=3-r?9r{I~wieRs3aHExzW8$|awSUr1xEUeg5 z>(I?>SZz135X{k`OORWB z-mhI?8MQ-5yHeh1_c(MEJAd6SvP6^9?hh|CxTW16(e4$ob3@PBTlRc2x5JcuGCBsq zYj~)!xikOm#-}Sy`(%8ZGH#gGp+~J2Uq8veQuBf4&r2RO&*9+H4eu@QM;4BcHx^)Z~^-2q+t`gC0yCg{3^CSvhYPJD$!i_4oj;zreKFA16}YY4f^xZGw*wWlD-_dfT#*MTI6ikX!n# zOYk9OJDcjXPEmBrp`~j|^TJJ9x2)+I-LkXhz;`bX*z!fQc09M6=QDXq^MHZ<8Pl5c zqvI96AU=ZQSgStqt-wc6&zZFo7mltqo#~qMC~%|cL=|Yd8xJj|xW32YBkWqw#RJi^ zn^!&9$E!a49X_JN(Nn`io08SkpVD{W>4s7**FAKR=#49L(Zhnj9u2oBuX5_xNowjg z>ijdXF1pdccx#z?UjGST{Ts$Bd_SAq0<+mKmblLolY8~R2Xd~yD7Y2=vX}BVy2}8E zFmTuF12_o3{;3NFoMWd}G9NwvYc2}rGxsFqzr6Xxa?MvJmTRh!YiGx_TMiz|ft}{_ zz-}OaR0nqK+XdpASinAoKiArwo`T>Q_S~`1&QFQu8ih~UPbBW*b>rtH`OqZa-(!qd zV#`-fAhw)1-*2&JYQBrCb~a=Bz`aMY7IX0hlDcB=6yCUs`cg*LY|a<(>x%NmYBB(72RQvOiNM=L^OC+6&;veKzpS!bfft z_6S`T9$?D$@7v_}F>}0@F?Mn0a^@oaiGQ4#hvA)9_nSxmew&U*7Iq&OpV-mO55&x4 z2Qq`)gAn+PzXv&OyU1zCL)(!@o_&En9bO`RVyln4mZh$Bqp^EB`<0xT9UgWrW0SoZ zI$N9LTF2SEclNt@ZOEC~*_%Ggxb>Q2Zx&t^aNYUaW@OuUZ1&zN_O!*>YN6!$MizKS z_KuFvnmvPP7`=xOm$pIIkCyK^i=08Mqi4@j1^A`EbPV}u^Pr;cdit#LsII$es~b^3 z={P*jmi^Ay(KGw@9QMUx=;_ESoG;yFrV@7uELcO^^NzXJ&|=om;^-PGG;2uKa*0Px zt!M0W;O((CtEDmR23TX7H@<&Sa1U#DKYdFZx!82a5NjW19;rDh$a>noGks)`^|SUB z{3LItYz8*{bovq7rR1snFY1;?+t+*>ej@a2vo4?oZ8O_Y%sQK`j#_*xqpqwW;m6yk z>wNPg_xNe2pRJbtDwjIOPKsPeoz2vdGT*9Q)SbzmlSO+OVxwcsvX|M&@bc|jI=(Ek z`8;qTwq$#C)K{DJxepn)&-R+t+(4V~=F}6=gR#ew%d5(<@n&foZyo$W{9u~8BhRKs z$5~B3!)TwptMjD2+n~$gv2%cTgrjrNFvo{c_{W!{^AP?aZCkf8zNpPvrOabNB{p_6_7-XOC?M4vyW|jW3D+(Q6LB zdBM|c#_C?97LJ$B_%EPAhWj9Sl_9#HpX0zj&D?h!iUM0e2Hly_s@D| z7wdf|>-{cheUoRDzU%1`cOTe((IrI3F-6C?zxA@(FcFyOHGb{nzL-rXKKCf`xkqb_ z9_XZ5OZT|jWPOygH^f`>GWHVg%#?92b=P4W5>q93XOG=J_u47_Vk$HpoNM}O_eST) z{@t;69C;;;&sHpsa?%WLYR8)Av?z{7{h>I7KajqsA%l53KCM4u-U282CURae;}bus z#WDW=b@1_*^*)-`{FhZu{6w0LpK$6+Tu|(Iwu~_2@QgOkkq*s(D_ z*V1-7cn#~A0h6zyfp-$KS3d&XT*_>Gc_kkrdjxbI8SD>AY)Wk0Ky2LJRN6pqiGM_Y z-Sr^mEBcOJ8ZGDig$$x;^!_QVAM)O!C%hFn^~TxYQN+ z*>{+>^|*mwF7k|q@tX$5weYqfz_=F=vPPXU>SDb{6Wrf{Q`q^QdvS0N^*{Qpt3I$P z;XY;E$K@AmKL0-7dvORnnw;+oFV=WI%sT9%j4kuWkH}`d5-(7yeO-w2$;X$Z)9cG8 z#-PGRU!M)HZDq|&VD3$G!LFYGlvhz!@FZ9k2 zeuS=5=D#*4!OX9ix%`;<2_A*F^!n;w0c}t=s?*s%_b-}8EB(=&3Uxx;bBrH-CUey= zeU>&E+i&hq?1L$NN=(SUs85QfDdYg5srTJ$g8rI}4S-xOGPcy_Vsu!KM90QHB{?F4 z|JDQFkLHQ#)Olha?~VD9I@OdHdJ+GrTJYdQ#&P>k_0kf$8j)pv&1)Q=ob7Y-3i{~> z1Ri$O7e@9*@w>Vt+P`!A_A&28x9h%pMd+Xop4xjK+3K^qk*T*kahgr&9rbT?-=nsq1*_p{)yQ%Z%f%Bt zCNRSfQTE(+WFV16wXA?He)q#l%jv~`GS3a?YI%LT=-r*RTFD(2p`OTl;R)b8Do2TI z*THYWv9$fZ{Li1*L29`VSoRD(24p717@MGLFKyKElyW}Cm0KC;mby)=Z#iY(*g`$W z*Vu_`+$QkZd!`)w=B}D~NAXoH_V?7UA$O9c3O+z*oW!2QdhNZp`$$YaFu6Y{j2=9~ z*`|a3#il3q-0u~>!pnjGhT2PmCy+xc`Tx2X4g5;j4}EjEC!94tCdc1%|8QS}BT=NOYGO7>})lZ6~J&>rl>@JVcd*dfNL zale&6`9Jp}Z1KR*=82xR!sf2KJpK|==G@m#PD?2xaS`&=tR_zI@n}rt1 zUl@-T$rbIUMZMOxvhkeXi!H9)H19scJGZ^<$jZdjxa0Y} zYdqt;LG{&tXgu5PYfb&YXBg98_Y?*^-O~fN>?v&GIna0~#u@MUnDPD}##>hE@ABl|3!Flk1-U%N&@g)zj5?*{Y67bc)Ph;hv&`Ip}5+6}ANbPj|HIC~$LoED* zRkL4tg0HT}mIW{IVu$NMb`c+k!}t(?R;dlU2NPq1oy{iqf=$i^_=>hi+OJ)XjU~NH zVwj>j8|>T@gGGCe>pkG`&&+ewIZ-=zw7leL2#uc~YF?KRe5$s8J|!PEU$NJ91KVWy znb_;(PNmU|&9T?T<2Mekk$RFdB3tq?z|TCiu}G!s+zg`Al6ZhunVaOdd^SQ(4%(Hn zWzeBbS#oXd&inJzgNu2u`OcH+!5sKV z(%IxFA0R&H7=GCTe~DR;y*R1F=m+aOt}H<{7=Av@%I-UPHhq(hv#w0lcEqJh5FPB4Ov!S(OXs$1Ar*5R~cYfVS z)#8ku{keDS*O3?gW#B9R!SUjH-UBug^Ch%79=a{&Oj1W&Pp%c1R_5^pYq;O60GI{X zhkem}!RMruoX8=@LB3auj|g{Mh4&hIMvmy|NS+<~4*2d!RMUUVd7|kSe{UYSP`<>K zKi*g@Ipki9@ky%SJVCn-{>3M0E9b&P(Q=|Ys=1}8H$F)A#5&6VPM?`Gnn!?_Hsok` z+$8(n{%qO}IPF3|PP_YPcN%b)`TC-Hs5>Ypc#t?N_j{Rd;5=iO<}RUy<@8e^cbn%M zya)1C%XP!>pBQlMjyh%SE`HLiS+7}4JwK;eyCM_G+C9!1+z*Y(+Wi7+ z_m%PPwcF<>?vACCIvK26V&1yiS+{$rzlb|~Wq&p6Hnn*<_>;DzOe^cPjWW)9eHVJ- z9J)4ME`AMva`Cq!d&lBx-@77bn;7qQ)~-dMHa?xT@{51DgWtqOWLS1jHGt-~X(sAS&-|M6(ymA3~^`@7UZ z-q3YwEnR1Z)S|p z+t^`vvUMM^#n`;EROUwQTV`DCRk}dl?y5tV6?@wp{N@M@RKau&# zS%^pQ~`*YWP^@+C7ybN7Ml zYn!;|U9so2c(Aa>V8LwUcVv0dD=DzUhE&|B_MoBSy3iO9@S|G_N( zeOtg?F7VCva;M_O-zLA3(0f+%r(@d5CD*^`N2*yPoyDdtA8qB)Rvu^8PH$fRti;Sa zMNd)Ans|!6eG%p7O#OD_GV*%Qv3&dYpQl10=t1t2%pm&}I+?k#5zVfYu#2l?u#x|7dbKcExilpdXk>3E<~T>?YMt>je~fI@zjJegk~@I?EBM-a6@2{* zD)`21S)=%%LC-S=EAIXwmeG|L%rCZQfzRA1eCC^H$2e0~;jb&_0vYS&^b<%gsG-#tG% zzlG-cb=tcdyt{G!oXqXhR*%N})|tV3F0*(V zwG7S^@`JTPH;f6|f5*tXUQ>Ln=T3>|YmJ}i8}WUu_g|!*U2dT#sz;|qu0>xxe$?>afX503 zm}B7h>`FWFtSN!x-b(3H*45Jj&-|jE^7D!~Q=5A+%EtT9eWo{m2YF~akK7MaHlBM8!DIeu@R&2v;IZ~X^ZZ5j zsk|mn@F@KX9_e$%*5N@73*Y&{dT3qnsChEHI5Lao+u^_ z-|_b6$1f&;yR9jORhM~szA*wnl)1${ZN=DkKJAh9y7q(HihIKH`#5+l_JG%sLDuft z(N7ik_@JNiD|uJyF>Cd}Btu*9;=e&(ukrnLuL^$ZG<>+|Yy*?6=bC3#9M~NGJ+OKG zDg&F>2Lqdt1~%tj5S;T*z$WK>1Cu=$n&*-MV6#TUCbh8YyB-6RMvsBTb)KHetpba* zpoYQs`TvAS?QR(PhEFD~$bYFFxm)K`o0c28ZMMeWBOj@SO{S zhvBz>pg&E2DHjF9Yr1rwch_>$Bt&{rT-s)~WE@^6$=WskaAMr?1Cf zr`#84*6CLA9?Lr2>TNks_-$=^Fu)uGx6L#B_O|5Sby~_=8i=RvPBJhaKhiv3MVBXK z-TVHP8rHNcYf-l==cK;;M%|09LKeId`H*};driI|C%$IS&c67X0^)0a$ejU8Zz^t) zT)RsSa33!^yQQ~yTGA`?x@*a2wX|`z{ThrHO<|Wie*J$VYg)*fcC}tjLl*Lbg^M;UT`_IP&Q~b zv>^43jpE$9(V-tN?g<>u=(SsnD}Lqa`N0aYIj057_7s0&+sotd zp`GxjCgD%4$#T|X*)yNK{&MP+eT})+N*y!**BD3H-7^1Cz4LG9o$do3SPuUyEBy!h zpibF&v^T!5kLRV2bm@aT^rHR#l6TB!`%cd#!4>SGXflJ1!QlE zw)u?o^Hbh=hV}Ndjd#nhqwLpsU)xt!;9qt(?+z#Tmfgy`7074h$Yy1w|G-$NQ--_~ z5P2s$maj|uSuz%1Z~JR_r{}z9dT<4DZ#i;r*>O2HelpwJa*>ri-1QXg<8JBW|M-FR zpx!f(ar0pfNUF_{``zGEQU3oEUw1{)z-lk*A6H{Z? zY%aFXF#1zR_bonS_>~;2)93!QK3}&?Y~-B%MDMc`nwIs`!FoBlvafwv)AIKx`Z?3N zRQhWl>y=#nv~)R=2_?iT(MX0p+lS# z)d`<2{;Ci@y+-uH_dTq-yU7JK zj{5g_$L{a>c=q~S&iuZU4;-wYu6A~QT)bZV#d5hjA_APoO#VjWhhn>mJaDjS8fR1P zlmF)OMD5G!p$+NN9-i5d$vy5LA|H~s3O+Wv`2>BKy9zU!v%%;6QQOOE>Wb~$iTe_L zTY&L7I%$aoTa0g@_$bI+r2P*$>ju1|_h%QWqg~9w;$HVBrWlw?U!t#^NFVsu5(kIQ z#|K>?n}e4EX+!M(E19GC302{vT0Ih1e@8X`XI0pW-j1wZS8aXv#Ilt2jlW^v`k^%}`B7Khzk7!_>-y+x zL`N?^+Tq#QZC0GUATLFwDgPv!!m9c+pWcK>HC~bt! zQ#*Z;KHBJnzOuc(E*~_OI=Q~_eehfN@dF3z&Lw9%_oEC4&!WE-8WDP&IectG!SK|E z$Du!?r!v0&LQ9qJs?hRbs^@p_N6_(RG@rODal;eLTVQpUr=c<4+-Eb+oO`nuw7;r? z6&89k@Nop($efbF$+B}x=1R_f!O6u@oMa15@MCl1q!s!*9ZsCOf)ibzbC2M}8Qb^y zKa76vL-ET*H#Uwt?7qQyQ2fqsgf72{UTO4yUPx|6#sw_YU~|8g@4cdK-%-oNl>EE# zKm1_4=%dGUDemnFGY`o#DRWv1y~+Gy=k;mkbv5nExZ2U(x#v|)99aK(HTI)5r>--v zc4$N9rR|i!^zdF4d_|c%qZb_Xhs6KR&hZz@`cyvueVbXMwJ#W-GT|4k)W@&ApZ~nb zN^*u9FS63F&|gsgLuevA23tc#U*6zNtjSL5%l)#Fr|f0QN;!p{a4>EBS=$B2HYY(V z0mh-oUxx2gPax`dE;%C6@#^}V#Si3cHn9nNX-E1>8|v@5KHB#&WIxk?Q791gt(X4u zSZhxIJ6-*Y4`0buChko7#x5midT?XfEsD0g0{^V?Crk{Otb2ilE{~2{%1fR#XCA+Y zzTNXUhF_3-E+x>Uj785S>es!NF$wI2&WaeTPsSCEhjWk1DU$;{vZDCWK6&55kMvE+ z?T77~Gspe3_eJn6{Wv(fhj+)`_T&4U+BJ^-QSdJR`@13w%eXtmR?J`TrN*CHz7=ui zFO9B;B5y-ug{%i?wj{yO?8VrrQae{3JO-T#J=|5xy(_icX{-7EqHfz-{*3S@Kkbwe zm!Obg%RH*5=T7d|iMC}?za9D)K0g*a(h9woq&2@onKPXqgr22ur~VM?PbKbGXgvWu z*o~2mvhV-p&i+0_GFQ3lCXZ(UPw|bfowB~Mm~%i5`-bB03)F}13+(%|M98J`9p@{BdY-;xiXp9`O#1fO4IB`-S+F4y3fAbR>d z1xa{2bnk9)C@ezMmvzO%ivHMEQI@uV@ihaZBJp9BxRoqa8tHok5pCzYfPZx}{jv#eoV)!4v>0N0CH*Ke_x9iq?8 z;ByoFy4AYo#3LznjVW>mhBYF2xU23v-W2vV>ccxEH@}p(tw$Pv!~e0eot($gfCWxP zaxR{4CzdqZKK@ep%PDF0o&2|h;>YV_f1AafBEMyE1O}iQv4>s&#g1a>I~n6?`jFv+nQSWOQ9;cR!k# z(ES4b(OaNTaH08}FV)v@gfdBqo^H`Q?&jOeK2=bcp4QL_JdOa*^P%WYui1i@ngtovRC1Qd&(Jo zt&LxxtyE98++BdJDbH3TZ7XHuoP;qK3r*Oms?zdXL2F1tx7OwCsh(N3r-r}UnN^pm zmT(d=Iy;L`mYt_g1`eZN-kKQF?^<{_O`WW|Q_J)RY)jvJVlTuOWW(W0#pX0EaNZp4 zcd!JyDIXv8J3w|6pP|59)#5(4Fn)a>gV-s>&tOsoe4nzydyq|a`yF1T*W~kj6I$)? zve$kTDbU}sub_iJkhdaD=R)@7c|-hOn}?ms_dr5O{K~{$gzW_%vP*+oRvDSt^ELCV z&C=(MxVcisG5#Xz<+NWKd=Vch-G;g}xQ%hh7=FPRwH}|fa%#>Le45!G4x#^jihFOj zq5nk(to6V9;mxsn{7%|(^f*hP87EJQ=y4>!tmLkd+|%y-=3++`KQGbAzK%{7I5mIo zrN~C%3x4vVglBQLJ$H^ho~0_QZ-T$WmwWdo%F9{L;9@rI7ITIy;?4j|-0eR~ao}PV=gXFSCwu2#@@9cZ^amRn=e1WyDc8*96iQ62eH#e|MzYFc}Vsk z#dE+r2OXVSy$ z0qm_QOTVmIEOfC0mH(t`zUjr&ta)O#Kk1rZ&zg65YAoOESZMfV@y+I0klDvKeOvx5 zk3D4``iY9w$XmLNXx^q?-dhdt?fF>y?~cDFqMs9-Hi+Tyr4{M80AHF_S;pB~bQ^bU zjBG3ehp)e7_)r6MD*LF=U+lUMNPZ#fl7ET^>AG?J=xgXNmIvL>yV$k(|6(k+#2L$t zF=ILQmH&S{sON=KdC-$@4$Omo#aP`u==ra(?+XsaUqUIJ<4f@DymRxQ|D^8ec~GB^ z+z0SP&4ZvDIb%Od?sDB`M}h7q+b_DfjOO*g@-Vzga4Y`IGuTI4J*}6PB~@R#6B-N@ z6CZB)*IN70S^fMN^~fwf02kDY9c(zqw>KCo&hsUH3AVurrp#G4M_7%u&AC#vC~(6_$HE zg?YdU+iD@YzrsMVr_kZ4-=>{dp4yI1BA&n4L*uczosOTvi$6I(jqeNS@KX=+Qh$CL zzx*fVrwb39hM$tt>67x)1^6M|MeJ1|8uyO;xLxijT>PTpgNM&Ce6S-!Z9snBUiS>~ zQ@e|MN{FZLBnBY1-j?4)`R4>v?;!Q0j_>EiJ@&q0WCjz%EPK75^|TsalTn+~f*t$B zAJx$IQ=DPt`wYJSiT6&t;~wzHIk8{7qwrvf6CC;r?DsKqDB5qvvgd3wmhI88jAAUQ zj784&t?YAqqJ5X)_sss=3p3dJ706TC&qnTc;#S&X z;#M-Q&e3rMW!Na->p|g}5}%dJ`P$A*T!4Q$?aJdkiF2L8J6hDVVeI`}lCx^f9K_AZlTVvLQnp9odLjSVo+vqn$x1p5U zrj)hK9>+Wd|2>zVROhIZHfJY$++af!DOa3SeswatPPN#r3A|4+@6&j{miM_QLmAG0J*=@a&`*42l|25#L7dq!^@yVyx* zDb-cY+H~%ev*Ag3{J}4}s+r#`Ym|vwEe4IVw7xxcZaLVtcUz#!5GbmraS9k_DyoMb!V$mP|0>sC_ok4C7V*u|ezYHCFJmMw(iy+72RF0=``EELd!P?k&W(ZnZ<5qIZ+Dpa9^ik67Gv8u<@W&dJvE z^Nbw$zHJe=3Qm`@R^)6V`xmUL*|kFvbUmrqRI8(J9%ivtVC<_n{vL1{{M*lf7HCc!_}v>lT_oK zzi7Uys3Q(E?<_TPKYcE<@`6K7Mn>ELEj;2I+E7iOvuZpQvpKieW3lVF`kVEDN7rpS zE9QNXr(!Q~wa<=uf2pU!=6zkvd-tB##u&t&!&)n}%R@6p0kd2sFdNm?938)VFO|K7 z9F7}_fvRZtuJ318b=tD$w9in;bOP({MgN7o)!(*|COp@J-V=W0?0K&+j#u#AZ1VUE zf48)0M#muR_MSA2yDs1^e!$!@wy_;ubdx6opCJ`|+@o~vkj21i9JqD%9eCRjE=RK99qZGIUAjb)%-`sd(#tnL9walPE_~q@{H~p&l>+v z#$U4RKWB`+)W6R5%J|22T|?a(V(?@ROPGU7!e-65Hf6vIpubLhE}o~oN3P8au7Um< z%I}{s*O%PjhZX`~%32UQ+dtz&c$007NtXZZTeB8?(%;RL_v!LU%{zfwmh~w6J$J=`JJhj_^)w2OoXKLbi{eQ$lPfg4dg( zcs-1*z>}pGI(Wrbs&o^y9p%x7hO+0sHazC@sc5FZA$NVulsI<3bNr=?*=N zGW2kVLl1ZL(nH&Ah8`pi)V9)l>EV<9cIe@_*P(~>$e0cWCW)<{jK zbEEw>F@~{}6S_O}lD8rUf1>Q4v;W{9ve=Tu1QdVQuoI(+zj zb?G1B;hQ-l%6#i6FZcBHE1%fE{H^r6`QymcFlUqisWH)+=I^f@PeYyBJ#Yog7-R) zXFqu+UkuaEEc}MP08QkcEIKIq=lq_kbtMT&4fAXjd=vh6N5`Ujzsy-{o3HhzZR~w7 z@O**i9Ua;Ce)$S@?<3R~{V}|+3;2nzS7jb$@+jjwSKXU!;VX^r(P3ixN*H73Au~of zmzB_m#QNm`i!H}j{aADl9{&H2fYE_)wy!8B&O48Ll{&$h+yyFkeoi0HI0cXAFE z{9VgEx`Lbcfp0ZBIMJbm6S}99E4r}i7F8&|PI6zAiLXC3*Pg`U5A;oNFLyjGg?45! zra6qqvV8lU@is7C`5$eyTOeKdPUJ3f?~PvC2e?&FYdU-AN+I_@*=btICFa8L)@AG8r4-Kk+>PdXk zZG5i9mhY>nZ#?or)icTJopssZe&*zc#`~eoqh8hXos{nX=%(N5533ITgt3dC?5p^6 z9%XHhPDp966UH{66Ydee|3zMJS1YtIc5qt5k`qZP(!k53N5d_QZActJ?>*fAWmoA7Flx7w-;e{jLPN z;U@2>t}@oBl3urR<>^ zb@U15^h4I~qpaVj-@1R@^F37umsvv^hVVDMNChwA_lQ9$4I?-cj6lbC)H|$MeXVL+ z`$My~y^UUe1a_dw#U_{fE_@$1Grv2~yFAMJm3iRjzVH#|E_fDPsYKs?iJK7p{)@m_ zc+3)XC)Iot+;s4Mzoyx-XWZkirrf`M9XrD6Q*-&wO7Op{fXV6Bt?U5O1pKQq)oQc+CR^KJdU6;eoy@)N{iBYWnzJOF#aHt|}h?yXQ;)IRATqdvzWDC;P7O zKikFs&V{D_Cp@l{F*!W$2LAt7@i_lc!{ffM+y58vxDQtK<8jOS@wj4mTyY(bj7^5$9beVU?>^G=@8fqL@;jE_9p~LZ{O&rw_2+lR zef-WoGrv1T{ZGvAj<0g@yW&&%9XROacgR_B`Q45<{4N=Om)sv#1M|D$D8GAP-Sfv* z_2+l|9+7kgem6db-=W)kl=T{y-}(CZ-Q&P19>4oK?}hiBNv0n`xzxuZ8*|=Yb#O0n zz9Iw0=JCxHJC!v{=VU}Tw{x7tD63h)no484+Zy$u{V}<7b=i)~g4v14<|hx?P2_oX z{6U<2Sh{}uWx+q+hW}P1QXprTCgfr+)_7;bp38!fMdTD%1TQ+s9q{Ce2uL1=3#JCk z$)ggUs6y+vVjJRJ&Yuf{Tal$-UEM#%U--fjV%)2<1{wdd7v9oO*;oIIKC8#$W6U#l zT=D$rQ_EB^ZM47VT4G=V_+iTVU*e7B44NF>b0s#io!{w;)r+D>ERRH{shV55AE%FQ z_QtX-f6t1uRZo!Li^=0Aa@}Pfd?@f0M~2sVqzbms&ujM`l=H;~@^|d?SVzaP*VNU1 z{yFaMnH+#;*TF+`X}cP^Y!z)Cr)_D&qMnC(B43yB&4aycljJdGzHa|;ja&A$cz3Mk z-fZ#|<|i1LQsmn35NwLc%AUZ3(~fO|8~^3q#JrbY#}|k&`x}k9n1L%w09pPw{b6@;?X(b)4H6^8EyZg zkCocgDIvC?jk52wo{C4#y}j^r@Hin39z{;(Y_{+ivb3D9LkN3&ZGjheTZia3@9!o*xNAGsj zrkw8c&xR()Ll5szb|6@vjz;2*MPT3~epBG&z~J}MvAAho=ATEorWYP9t^3TDJMDL@ z=QRKPcT|3f#8D=wZJMSZMfU*Q1{&WkkFRUzl={XE;-lrMUKftM{jQ85oP*y8cQifk#oLDy(Z2C&Rqv{ zZ;jq{@Px#C^uO!i6~4(`2W3litOL1g;ic)2xRm!ie;mJ?C>b~tA(X8tOZ_&M-9d8{u{K3T!=euY_@?yx|a2P%<^z#wt z4UEJ+kMaJWqC@&~)8RB%Up8&J*XdGV9e16Qm$sLGo@3VOR(RXRvZukr>~o=4xgP=t zNhb%Yrdx8LO6;UVXG7`FO)vlFiv!b((2|>8Rse@jLN8Cy=9%cF<2!~1Ew6wZYk=URv$slum2@C3$J|dtm=O?+4-oZo2uh(9IB& zkI$}Fg#*=*FcBQ+L$pJ@-0f9l1Y5_U=R2 zGr|)$Wo3KQyHw7mo5IM7a*nlkY;4@{%*MtH;`pXBht9wH3v;=zN2Po@uYVhp7M>xJz9^1J(V`#Z3Wfn?zG(kf4^dQos8e;3;SZX9DAzUb}V{{ zGsVtsMF$ZtcGk!HdVHz!;8XSZ9*SOp7^cb9^dA#D%Qs?5#n-N;W~QpCp6OH7>wLsi zWK3x*PlmYSY?I@}*}mqAvwe*?+tI|?Dl0gYIhr`zvClo4Xg_yKob6~|1Mx~DlM)lq z^_V!@#|Nu|pRhMg=Y2Qy_N}b62BDqN*eN6@eKoSWjcw35XNX=|Vt$7_kUnOT-pht0 z&0#OQ0DNb&mu-P=a|OSvTW4CMcF)X7shK$r9IMrPX4=G@vo3nZ)zr?+t^(JRJJ02R zXGdBKOFoKBv!4*3FMrR~Ur5~WS70qTl)X{*Mf61)f19oA!C$@J7x%F*UPxTDGY;7o zWh|}WU+^b*%L8xNc`~-Ve&0b^3vv!}&Uo(J@iz>K6KDJ{gm=6?nR_72_$BwO7dw3d zz8Z=6hz!CvcQF3dcIc$ngMW1*v3?1}`g!rWF!v6~+~@~CZ|v{rCXo+%V~{2Om~+|` zi9OE5H}?c(Zyavok5w!5)C%utg^p(8=NyecR^q4IZUybli6Q-t_+wRoKkipm%Wr-W z*(ka$C*GKR{tL^;`+F`CAJDl`pK0Qat5(^e5{WHZQLOJ;k@;7x@`dy`txxH3&f$Ar zOELKb+3y&4C;T<{(WLJ3CBnN#H#dMcv3VIeXms;MKaXq-$HXJHM&pH=Breev>k}w8 zv5&|(r^G&fpYu<=*r7$gGqI0#tj$2NjA2yskM=lz!2NUGpZL<{pabgupVTH4(>}-JNIF0l6V=(;naz)H3_}yAz|rNbJ@Cp}(|aama_b`- z&*9HcS$WrguUE}(I=2kuUo!A?FJ6B#g<^LjI~ zOLA4xn0#cBImiH$kfD;xm!TUUy^L6ug?6T|LDe9Or0kyAv3~VTxs%1(B0Mj>`L|_> z8{~d?&fKL{$QFLeW0y?HvxZ1cnBKPD^bT#7_BI1E(W{E=+P<0?5#-kJ1Z>_Ps}N_$ z9(cxlALmr7BJ^$1ef$^g3!iWIRA1UgUi~mO-|!-%OFe@Aa~3ka=u+?X*!#WEM=xz) zqicgVY1(L-7~IPEz6g$3qp2ZutLO|1JNJ60h#akThG|B3Cp;pXHaoemu#5h4b~NGvX%k*Dk;=`&7rK1zI?z|`tosix-a3$J5TA4y90H9PZe=@&Boro z3Z;JZZbT0-^|8nJM)bdP(B{rT@g*oi4pWqudk8fTCl=xb;3jhk|2?90GQZ+C_pW4| zTzNHXYaVNCE^BQLYp#U7>?-y$ujYBeKkUm{6D57|VgH*v?@7jWI-d6t-(z{+2fX`# z$@5m8VT=dkj8Wk49^=xo#MIs#67aqHUB(vlMc((u-J#RGje2@dK{j&aIVX0{{XUny zMsRgHpWV~NExTbZ`<9cpM$3%Ii&@BvBJ15Zu)Mfd^3}x1i))ojUYz~2$i|Oo!;u%4 zpkFCxZbYCzJF??}ylu#i?EAg4<4|PBTa4Uh z?j0YE4@w62tE4x-25)>F-iVB(aZ=do$aQ`GHL>?V7@1D&vsqfETfE4~bO+8L)5*Ot zBE$9hMVnZ0@wt&$@t47`k!vLeDYN-LWK7P6mG$J=uw60Yb>!BlD~8{pA4ZPd9-G7C z{hzw!Sk7X|Nn#(`_FHqerT83$J=htFvBSXo>VYx(gu;5_%lGjExtd`c2~84V<)IqT1S~hy!H5YF0yQXWHnsvhU}- zI?gK09w+A^6Eo9_?Bmq$?iM}S?xszPj1EfZTmGEyW-ld%5W8m^cF&>|&R9;D{Z|hb4`}MqL z`d#ko_a0Zj?zUp>q$2Z3Ue%rWdB~YTu|M8SU9*PmpuJFqQqeb~?*gv<^T#S^BHnv} zYh<3U@&_EXUvt{bHbTMXjO8^4yKRxNvdZ(^s+bFd}nJUCJ7 zyCv3a+#pqO_h0?&;Tc_i>`s#RR@<9vCu&*c74&@atqfhrG0x5}(~j)-(3@`0kzb^( zYUZimb2gUu@~-{igwU?lW4rZx^mopCIV0@keRwtU$OIMK@~V063Lktz!}~ z#n7m2alR%0;4IE`<9xI4YexocpXdiybAWpZ@V|<^_)2Vy=G@`X<(szv|C5n|vZ?)P zY3oadmOA+El>Hn3|BX4k2Mmy>fQ1$OH1F!hVVha4D&GSZ?Z7Apn(e@j6H8<7p;LGt zor2JsrmK!btxK@+gA%^=#Pw>!v!5~d5;W}*yHwh#w(r@e-WTS`?slurja5*Gp12;n ztjKNFjtz}<=%e${#d^@in*FjbmxSm_KG>;Jt7}#8Sae>A%qd*u$V}$GKk*ZlJO8A9 z*G}XhzH2!MJU=2ZSY_nJHOP;SyqL=wpaz;OlRIWr0rx8yxGg`W{XV;~w0|(#ejEB> zN2epYFGsJ_PM=?)jdSEfHc=;LNuKNd^`H7>b zW6&S4jsm=sH6VOE{DdlWaM#A#YGZz~CJMma%cY*s@4?&Mnn(2aLwEeoMOWe0Pl#@b z`>~pLMJ{FSo!yPyu_Zzu#PRp&{x0FS=p4`w(C>D;C|U2%@cf>pC-ARBt``~Jf)_;h zr_y!Lpcj)h?=$)YfvK#?jtR1+Q=3~qFLIL#UP`&9_x?JgNyFKk(VcVo=GC9?PS-H* z|Nc~(R?VsSTPy_EE5N<9zsrMv9_IqFMacgO*?RM1i@J9Kmum8S+KRD7`Goc@fj*aW z<~X6pb*jI||7M?miPPurKR=KS#+;G!I5Rjl^AhI9`Dc4h!c5Jl#b!PM`hEP5{?IP$ z9=rKI5jaGBGq%r7oEg_QL+sd2oic0`%ULrU$R&I|=MJ0tB_6-AVfj zKDSwLEil#iHnZ9|czNVgwXC%~|h9Z;s_B{nz{N7?bc5Lx;xps*hw&NbfpfyWzr&T`f=Fz&JMH)9ZZx$Qb-(C30)IvF@m-sBJ8qS z-VI%%?bBtP`|S>YH}|G>+pV>wfeTfod;W@e=1j&cG7kFcwW?#!rMiAgVX&+g9d}|e zJ{g{EDid4xXl&g(elb&R{~GU&uZBu2;=8pi`%SCR-EJ7###XRBud*;0z=t3CSjU6p zRxp-3RW~_1`;`|uJGs(}Uf0BBh2hgaMSdsvbbFLf3x5`VY;g{Wto9;%C9(G>BLkr4 zS~(Qit54T;9dd)nOrm4i0UWh#QR4|_Tr0Gd-uxrp{SLiqn0+3@x&-`6`2H>CjJ%)PIcJUb|Kg0&qH*;3nZZ2pLti~%$%j3e zy&QRmvFSc!Y+rSatq7YIb6J}cxQOp*n)m$W$BctL$nyC^e&AKF*MSOtdpl=F8JkM) z^0ROJo-vF+t~M+HuQHx-l-Bz|$teMfQ znM~G97I}vTp&#~e-ZuMRiO3--v(f32KhB$}^Mi}6ZZ!R4&`%NjfdyU8qTkI*?fhVf znmUgDzRvGz{4s6xk@gMKx~t(Q-&kekg=eD|WSxjiBX$sY%mMp>;*;Uw>f{Vt6=WgX zgpu3FP)2mJH(~QwgB-+}{u35{rhjHnl(P?Z+n&R;Z`Mz` zmV@fZOO%KH)T+#0Kpi07bMSQ8Gomz#UoJ5t@p6`r)5* zp>JGK9`j4d0iMKwE-MF~Vm~!7^)}=8yD>ZgojUjvd0OzRz^~-15`DA83mO<3*`*G6 zMSV2p=K-^{Yw{Wd2HA>pFZ|&E7#{Pv-j&%CTpx9L5^(2IY=oIlsv|5IX1& zhcVsbl)p#Tp+#)K+)pWeS4x<%bwVr98*>?*l*L>YfWu9UPhgS>Ot|czA50FC=lI9y zq?(Ww{-$A*5#={>pYdpOR_DG|@hiWJ_TiJ$e@t$Bxi6%Vb`7jDHLM%M)q_%^0C z7xTa9&pyY!sGSqIcLX|4x~Q%Z8%9GqabBIDOJLu%+-28&m$Ei~*GDE^u5F>Y=T8|pqmc_+2Gs9o- zw*+2E$g6q=c?F-7aHQ;DfE?vzFV)V>rQM&xH;%Da4m7W%e_&qMP}T!OOpw$y z2N=!+hUJ#8VG;f{b0*g}N=!nCGaNjraM2EAM9vjSoHa7Atz5@=@S!<+K-f3kNRPxPnR z^)ikpiuw0|@0#EUM@B$~|gWXes3yn6R_<$GOxd}VtJZo{?wuz_^i|4H{%G6(CGO4R zqpq+0|IcS8fzM3X0ttbjCIpuRT#6!)R?Q?>O~9pM&}v%~;A@)@tHrvZViHo@gkU9t zTkzWe_O_X6=~r5)q__97*n2NaU65*Tmsks_j#Xl-e-HC_xGC`z4rCI3H@y6+rBpSDfR{1ESaPy7+H4+4wiQ1eR}+DEe7D!RNu=f>ba2-`NEb$P?uQS8B+VA~h?Q}8vv z!jXg~f z`R-V+k2p_x7Ig^v#(I5rtoH>k)?4oI8*+WbR7BSbej+jnO~W@=B63uEavvqA6(#?p)Nm+{}T2bV(UPP>ke5+B%rdkL&Dp_W4Q)_btO zQaR)MO?)9~#**2}twv_Bch1^n)y>?KK`kU}@>A*ivbHxGKL|yA*!8)csj;vc__Gt8 zYtC3kMaTtkaoOf<2+g1PeyVw^VOmq1yyeLP0z_9`gx|2QDHGHG1qznbGn}HRb zNoRQV;DY=(^ z_DHzTdhr>r2IBUc5>p0xr|+}E+f>?lbBW*9O$M_Lv>uC1`J?TdPm62${s!!Sm2&$a?iIH#@B`{X_z(_ zKN`N$Fn+Ymzi+#|wWjK_obQEl*~heSE;JperVc6oro3e83BsSs6QSPA>(q98ZRXYs zPpUPlb?YO})R9d7KDbxBL4HftFXqFF-1HS=X z=gr{#$_1BBZ;`3KqLA>7ONe=*-F#A*J*x5lU1?vK(>|8s`^T^tHqXXl3_%-KQaOuC;p zXHDqAz2_{|p0m<{<_v%N@w@u>lRNnxCVFAF-MPHmS6(wNN_?te4D%Od4*~k!Ylh~} zUvnAp%-js-Pcmuy5^AC{e;LdlYf!P~Ip>WF8hgmQ}#jA_=`jFDvp#GCxLMzJNkTG_jhVT|cm1Dx8&>d=B+ z|KJJv`rg9M_&anph(6EJ8tFbCLiXKe(}nGu>_r#(z}VM+ef+MQ(8(RYHG0OyHVo%I z$oa+idfYavk%g+?w;jC1)6LB;-IVhETkeCmuOV)MC$F>jJ|O6%dp=cX zsElzn;a4(~soObUxaVAI#(f^RZL6{Kai;C#9D;qroihUWb054s@fLITcjoDB@beD% z`MGd&31=Gfp8Q^_wcbVzI{8lT`j~yB;NKfJwqsba0M-b{fpPMDwYU zueoxlM^*(Qlc4(}Ukin|uQeLd*_%^KjWNlr4C?gZ-z?5&{>J|mos+T2ho6bhGdj;` z__Zy^WQ#a^4c_B^M#a?WZM+{aQ?PlB+?m*TGuI?F9Hcf{J8OybH0_rpJ23vi_sn6UxPi0 zvT4u4ZzlWPUI(g&h-M?%KQN;yZ`=M=yB+FuE={N2+;FZz+q2Ym0@watBfcLC?Kn2x z0rHt&{=4mSZfWMMO5OYAe#1T&-RRR^w%>xj(&{bNBYe~?)<|mdLQCRl$r9ylWdgUo zm*XY&h4{^NrEeJ*wZ4>aoY=ym*hTDVvinbotfkM@!;C3?WySo@`2G`mAD#BV1Z0nk z$5zfg7Y#Xk!bV4q%fARM6~cp~*M!1p#1X^?|2fFGRD09y98vJ#?0rjvhEgMmZQ$a0 zeEd4UliYzq{7}RJW`BZ>`6fO&)mqm5wiAP=s{V$aQC=xH1_wsNMMwIPf8eMU-7a4K zDR)opG~XHxUHV1fjfe9AU{tZ8?$73kjSB6?pSD}Nk=6^@>e=$AcsT^hEuP`*x%!0oKYGy%M!WP{;$(uI zd#%)+_8GOa_wR$2!0UG8b{laG(Z^3e>YqOP;$HL7%6suNqjgkzs&r9`T0VTJ z{zN+|kr4F$5$_d4vsJe2bk>L4y?J+O;vTI5Zd)Pd={)XB=RIkz=?8o?fRBd$?4a-8 z*grjaZRexosNVQao|CTo(e?O29!kV!&|X(Z=J&?!S+>rpSf%t5^BwT_lB<_YLN6JN zUIKpiGu8&yiS`uj#f6nMg>882! z@*Vwcl+kdB3zx0GxPA1Uzvi0TPQvFJaP8Suo=sH)Zu;ihXe~@xO&wMPf0voEE1O(N z$<}*rGQz|Sz+vy4e%T-T%(=1mIY6UIMH{0cmodNlz<0dAP0t(PS>L^K)q2r-MpSy@ z-z5`;Yhw4U=!Z6LUA;}X6+bw9ORbn?wx9kaS#mmk&%mG8TvKuq&a}RY_Pq6e8u${- z=>v?_Jk40+?QYi>&Yt`s4c7O}=(M*yzu3TeldQSj&rj;}{E^vx@V~{u|6hT*U;Hya znQs{>vT-z~zOZHgnA*oCzRk0Za5l7{oLS}Gixxx&edkkkkWFY-c@X`@cDy)1e=;rm zc7fUcz6 zVAa)FLVPiRZd^{zU|4%{O(XZsM?x#`i`X^IRTD4^d9~3ude<2AB&})Q^J93Ha~ZzOKHyZx~F%knu-9ttepv1$r&!@5B!^_&B- zOKrd53v_Io>g+SvO&dp$*DHf5yPgETU|j#H^uN!SVxJ*Bk2(5xp2culU&`9(!Sg)6 zG{IBNyA9m`NNb^u^U;yt@V~!#(OTc%T!@zR?i$*9gm*nW?W1<^cG~^sZ1M$-=!>G4 zd-=c5Z|*vq-*@v{dw{%sKNh*Pm4>ggOnEOOw)}Ykai}6QFe_)V@5Y%AWQ;6fo$sLD zcGDT5Lv8-r^z)G&%UCCuVHfoHhe>W>#Um>H>BOENxqB~3ktQqk}PLlYN=x+-{3*+uYf-5?sRH%&g9vd-?*Y7&}nQ6sm`2Pb$OZcz^`@o zhJ@@2^lkvHYF?+E1basxrrVAGNi2CB_*F3+K>)tyB@pGW~9q;vL zo7nJhzwf=@{M1PC@;Ge!&`h-bIq0_3P2Sq9wbPJIX(To*{|Yf*_6oIDQ2#=p_Q z`OWTqJ-_rTi7Edt`1s+U87t41-_?~zk~bRT{~t2t7GMdS29}G#%jwRhkRFPxUB0s5 z+=i!JS*txBV~};5sIRU$xA&V&cIqA3^2W7kmN2*XKx@K>_YQpB6{TMG<4m__5mYfpwuf$gh=yeN#)!jatam_7U#gM0;;OWi%*vYaBIUtP#@9$?q6JdlT4m%(Gv8pEZgxIXb(u2Sa+h?thJY z8?n#X8<6}Ya*dh-6E&B&|JbSVVUL--zHIDg`KAhJFPE_@-fID);LF8s%}$ixDG+%U z_`Gr8GqJ~4POgIV=2F%wwY?WP*4KC5_q?J0auWX*A!mubM=4(>Uj)9_oT;y$!W)t~ zWlL1w*|2jBces5y{+rau5MY)ZJAyCQyH>z^HvaLW3IC(mpa0z~#SlkDG9ELhm-*~i z;quJRZFUYO^zmWs(FFO}TJS^U#xhu^!3Q>HORUltR_vfqHW~YuEb`W5j~+hHnFsN! zp{^3@G_|4omos;H<=MbPehK|Q2Oi|J zn0p5FL4MOteBA^uAMd-L#y05vW7o4+Kx?8o zs=gN7^!Mys{QsKI35@Bb>%b*rcmukKlWDJ6)#r5QrXk~A%UFLUZFS4G*Nm!jx^vU! zB{;GzlUfUXpH&Wvm%F0cc{6~|9?O|#LmmA$pO=SSs$8|RHrdxFbDi~~?Wg2H5<_Yu zMx;G5Hs40c9i3t9exCaAZ_UT2Faw+;L$oj2%RTXYC}wg`4ZKs=GwHwnUABA}r^s(R zUF|!3dMDK~v9HDDw~hE!pW1h<$>mum*S-Um*Q^@~y5tit^(Cwmjh2y%WnW*|ljCN? zoPVKB!}#G|%h^bd+iafW`M$OAwlfdXZA1tAkXPSc4^1nVZ5TC?e4$0Pqb6d0TFxkHxR$o6+_$YOP(nE|%uS%rrM;{Fm?B%SYAuU~l1X+Q&TkLin-q|7DjJ)Bk_`zPyf zUhNAfH^M-sCD5jL*r0~Z7HFXve~v}2Oxb;B-DsI-jx6(KTv8VFU6RAIJ~J>=_QSuk zW^5ss*~uH}?X%4`;6wV)wuqxy2DLy~#~4#KzXelYx-ocuHS6fhI}G+VvoD+9f4cT$ zdW-oJh=B)*$s`e*8A6O^D6ty$ggrEzc&EQXb%^8#j54>%PgPvm#z}X4BRzU0_R{v- zoEjNBSzm}J+cnji>2ti!fk^nG9do#n{tpL3EjLm(?jvAw$)m(MX|m11n(wSnIKlKXr+@+dReswdvL z%Y5(QJsW|!i?~ud=aIFKGFlFBzp|8g4zNW1V<$Rs_%V_1@qRU9&LrQFT$Y8U|3xbqYt`HE5&w zm%yyJzRinq@w+vg6)Jt@#6#CZ7uP`_*Fq=PKrge1?X#cGpJ3-RO<~UoIcP0W#-h9d za5wv@0`h@AczBQSlUsg&G;c65Z}guWV!}Dh$rstnqrIf^?PW3cuy1VrQpR0)Lw0?j zocuEIU}+yD?S4XA8JzQ~JVBi$>Ag3P_Ud>in|CtEQ|#{@lbk{AFKF{y6Q#RpJkpc> z@cu*KPw(twJ?iiNcisD{2|gtbXVv&dH*hWQJY(VW$TaDQ&c12ZIoiFQ_lWhj9EFxd zPtw6N%&{G7IEUgP-V<&0y|6a-cQlZ}NUCJV}mC*FMgXR9wtkd+xk8F(rhZ z;_a9H`iH~@um4x%0P&+4@D02#eb)MML5?|s{n+X=!AvR%1S)o*|7IK3tc;=g6$kiT z_Q;icg6G^gvld?m^LIlXG9igtj+xg^(s>JAn-h)o&~R}O9F&lgY#2)$Ew1Z)>{9Y! z@sF&}1gEv5-T5DFw8$qmo-umxO6Jw3JMGt|_kTb8fs7ji_@wg;bK&^OwfK%xINQ)z zsW`CuzL55)JNd+B`dv+bf`zjlj?Uz)hgYGy3UgReg=uxe{l0&AM6j1~hO6>*P1=Zu zXQ2HQUS9v0uV4#&y&$KFI`mz>+H~?DCyut~W^^P5y{sk|mD*O9D!y}!|5fmm=IUPN z(3!8%kp$)tnKh-EbMJG2{UBqizb?0)wIO9V@WdFyI`i?ypJTVq9MAqPGpVBr-ns_= zSkq8rJ^Pi5A6tCoo^|J3GxJX78f6RjhUdL!NI)#}@vp%}ig> z(GQXzYh>P%hx&}9`1cfJ(z}`PnO*Cf@m1NjT}tFR#&JLMSjJqn5u+)CHsfm>#qXJb z{<`;T|MmlFpFS))%Abk7Y|~Ky9+(w3ADW9b^b=1h%bDnm`LoWg#;Y+5yv8Gc zp~dn{_EKUmYn;qQS2-W-law|3&3JdP4wZr5EPH;X6Iqcbzy*5(POJ_ZCxnk?=vKUX z5A!P@H`)6Z1B)wvMJ)qrKUX&f zsFANRSnTU%t?wGO*hih$QIU)2XEm^XSLdGr`y~Dw*ah+YDLQ=w{x@hN%y;~_G8tEY zzU)F^${u8Flpn)`s}&jL?3Xtq@Zf9*J|FNc>4t9%@Ciq0mHt&31~dqcm9KLJa4COQ z^TAqf!)mzK46da!N6JS!Hp_W#3uAhb>l)L??zK$xnljcb-{Mrp?&NYrxu@D(4^T58 z-tO`09uaSfpflQIU(jdl``+(=?3RU0mb{A_ukc`xxl426>zK}%4P@AJmV+&*z}pYeqmpK|)* z^Z1ax4TEUQrhy^8hPL_In{PJU!n^n%UKsxxwWYC7Te+_Gl-n%2>6*#9?l&4%b8jEl za=qu5`5S)qys^8PPb>HBd|7mtRbANbFYRG7o-_nN_uipk3w!|-(y{?i+w-}eUhv?U&~+X{qQDty6N2WjVg z+PRi9b$^i>nyz+o>~nWLPuhqac= z)^%xdR}Q%?3+6mId*+;tcjqh)j>AtpMS4sJ_z_IxGBi}oH5ydoBE(v92m0D$(Cub3 zr9=5I%gFU<;x}i67T*j#x%u0(KSoasuy<#BR(O?Z=L0#lvCx5^?v9sSsxx)$`0NYg zS0`W}jf!j|UXs1WIZH1{|HMRI$XS!nFp++P=$1KiL(u?9A?OiZR<08pX$CjyYsVvj zRYrMvR64!l8S=-8&$XY}^Z)0uCz9AovEu_!F81ii5Vt>_mnVJpW9DEV>z3|?z`y!3 zsI~8%0hqx#a+$PINR3OL?^Q$aW^#c9PbTfy^86WRTv@E6hgLh|N{huVZ9dn@nfkwl z|90>n1Sgk{kFZudHTHLtcPSk}@X0s9-u+!|=NVxe&SiqrXeaBOY#W~#l|0otv?(Z9)azj zj4e2lT0sfyOLhEQ_zGU|a(%=8^l1AGpW|bhWl-aYT5{}LZoS?(vHyOf>i zzebZUt;1lgs>?#IVQXb2fAy%*aLZBUoaF&e&Lwr}Uoywe$}q=tST(ijXYjl2yc=`S zgDYlWuh>2fa4-ME2VY7E`{CU$7bdR!;7imypfCA0#`wqX`VBmB92@o+=eci(_Rvx6 znl*}J${&Q^X5^Q6_v`Tbb9)mSCUbxL0Q^zjJwGSQrz+^OM?RfAKOPL4C*`Yq|KsOt zn$0h&4MmQ<@|2mUHkV&s6u;~p+`}&i%x-=e2fwuWGbg5*X-Nkrg}RP%uZ<6RhLI)t z+{@dRi`-b){tz)BV)AthtJx2Vq^#Ju%m|w|2WXtT$-{QPLXcWBWZpSAZ~0 z@bYwfzLwKQ-H)7e`Z&)H|Btb-V0~R<0q3pN!51a$?^Eu&)*Ri-=U%qq?DZq(ueewH zW2q%G#wZ?-4{Q8oBO3NIz6WT(cA{fT=7Z}mL2L1C>HWLno+(Aw^4gS*xrBF=ZxSF* zU!*fX$Q{xB>WNh?4bRc`g*|PbP22fAb0O_{&(y{}Qw4v=x9#QJ#J8>d@uI=bxiR=W zx^o2j!;$Zk*v1iVpUE1-g;g!o&S_8|3+UJDt2;MMo3%zY%g=zGFGDxAz)?w|bQ=EBh9>x}8rlC8>xs_tQVv%EZSCb8 zuL9`k<2*C>6Z%Dm*q8-=ln3TrbNmPC@aP$l_x#4n<-l5Z{$sOuy!_L<1CxzSb>I5o z?7AQR{BFkPoLh6IlZWDs&uohoFJ!%_y&B)Weg4;&$PHIJ`w1<7{x3hCK)o#RE;(W1 zYnE=dm;12=Q>U+Z^Qv$qdDU9WJ_bI?_U~pH4Z2nezqaRXwQ(N;ftZ{*1GDmKB7$zDj%J{7LoCp-*QA0`=+m zscxip&UXhTHYhgY{eJFwqv6))jm|3MRu}lpe!L`Fm*>o*#XK%Wm+)Y3J1>#_tnwjH zU)7PXuTM%d?6cOlvPReb@sV&P^Xi*0=V&4S1LhceJ^zHYUFRAce(I_8+B|At0B;@b zmH_Xh^|k5m?hWNW#frdqdmaExGN;=M1j3@m@C94RS}a-Tp+I<>0u|pW4y9WL3K6>mYoT$MfyX zr{;*7N3*b@>NAPq(El#U=z3xynxAj*?t0M<^xsvHA3iob)N-7*n`UI&u(r`&JMG#1 z)1Lab%rol6kAn521aAg^`B*q+gv+ zyA;|KJ{gx;XUomrM=l|&%L&qSmoz#Z&XiviWy&__S|ck@crj*FRiCNe1jCNiA%LZ5-2kQu$_lKwwNx3JtcOk}g{oSD$chg=^s z!AQQE-=a&8Cbu!~9z7JfYpQA$<@5W1^c47Nuy_eMCq6ugoD(l_Fmf8CC@*p zvGX!gq*J^h7+4#X($C^$Xm;P{sTZfQde8q+xB%WT@W#KF!+Y8ElTmFfd~?uKjT%cv zAatmT{AP`XU5LfwM{Ir?UyDUrOt6zpE=0X;;$4Qk@F zQ&Yk7h25 zR%i!mA=!uo+rlrV>Uto*Aiv18nKFII-3jWQ#{FdK!>0-_y;vKy&KWO3jfa`O0 z@_ipV>f{nj4^jTE@|U!a&N?1@x*J}6G*-Z>SI&m`eC1lBvlad?BY!N=xp#V9EH>4s zzAgGq@Yn@D?e`e7mG48(_Ug}#iU|L$Uvm7|t$9Y*t^9A>%zS8WOPsmYd5^XHS1r(- zxLQo|O>9N>op|Ub=4cMSiDLW_iFTe(EB=wI)fR2obBpdX!)|*^?)DSRHT!(?_^&px zZF|)Ms6y|(k$cJ;KYo`Fe@6f38Xu;{Mo*2J1biA!{#h5k4V^*#&oe@~S7N^s_ln@x z$o=Sg?Uk3!GZvhVfi`;T&)klmJ3cSkJEQmbUE~(}oxOb8Yku@wCjJ*6WA!uU_gYQr}&*0#;YMh?y(cM-en{ge#be^>A(0hMm=8onpXyOqi zA8o(SnI*ixo%ioun;0&gs6G2Shg!Uq8u=3M_l4mT=I8#z=!+XT!}bXI4f4PLh&JSV zFP&<1Swqt5sR?Y?j8Pls(Z+UagQy1Pnh95iQ`ryeOH5r=R-iNOMn!6eS;gA~8sGiUZ`T>I{HNo^xVEC+A9gd;6aR zwt;epR9`}|_j}TeDe}MLGnkFv*53bRfoHWXd;Euu#>zis8OgTJZl+YUrW(aZ$$MME z+~thYc}nT^x4^o>dS(;y=`FUh!D{*~R#yEmR&X*>SDbR(}F&(`qT`BTto16{3nOI#a8!ijq? z^vNmSAJ+5!3vut~>V5ZKh?+42_CK=c{nc^r=jeU+UI<-j!1qV@yuUo|{jqxAy(ish z!1vR8-mi*#KcM&Bd!Y|b@%~vo?|&!m{jO(u-@O-lujl=F@AmA+(>%`O9gl`YGf(2D zFN@pn(?c_Jw2sw?8>XVE4q0j2T5U;#vFgbYemv~Jz51%L?7n9!^$H|K@jII{yE#lwqFtFv$U>zrB@j5v& zo#P&DYLu8<YiE#1Z2v*#HP2yeOIt;9dJqgD8*Sk#TT zCjYpO1K>>f+Dk2jq2ycq_0#+nwLD+WHOZdrN1tmv_(|n26(bLTvvHH_8vg>${^F0m zSbD5-PX@4&b7kXA{+Z*0&Zs{&Xk5Me7G9ma?9q{Co_iX+tpacOENr|5xpstSlso!a zaF@mN=azGR@oDHS#p+FOALsXvKlu($iNC9jP4W241V01e@2{U0*mw(^cw`Uu>PF#j zB6%ex;IOcoSlHo5TpUJi93BG)WwA&05F^@H`pF}EUIxZqcnpHaAb13Zcs$<@{UDb|0(8_+YO!n}%4%%}0LGm~z zCL3LuiB*@!bOr)^_Q_h#4q0O~#IRvhciU?t+vnIrV_kbF7k|9$ur}Y=UiMHH;{whDV33iu}85e4r@(8~{_@7Maq>8=N<%SCH^xy zCJZ-<@dc(>mm4Eh=fqkZ7-uaWeWUO_=Gw5G!|T{BXG9Lr_U-uI6@%{EwrYfx5!(qc zry4`%Sw`_o~qw*0mT#1e& zx+^6o)2RO2Wl7Z~(F(4W4uW@vZ@GLm*8|mOMcdCKU*YhT(WHeL(SVr|4R&TsXYEXO z@SWPRAG_`h`VMjaiTW#IOzLmuVW+>DP1FXb25SkvP4yS@_w+Z5YoWoPt-lO(oK3!G z8i|uN9D@$;8=Q}7Vx-Ii@48lK$1+CS@ytTz%$qk8{11fhB4AT&WG1vPoSuyD^j`S> zHM}W&R~?T{)0_+66QBv{M>Bw<`G?3;&VljQ*tDaxAH(wC%_5l@FMB03Pc?U=`niGP`+abjLR2rs!HS%yMZz;L^V4Dgzmo0i9RzjA;DbNu0k*y`Y)+WrJKdTsc<7 zy$*0)hU{#E4-UcuOBWi^T9f@c$WCv(nXFU7gW~;KdxR$sAL3Ktp}G$qB+LFM@vy|j zgJg&!&xrNA@=S6y`|)o?1&8FB<|D_I6_RJl8B7P}HhiH2@zm#?m)l%;LdaK#Zbo+; zU|zCq8X4PB0L^-Hq_tD)Owe6tf^l<{>CTb2t|i6IPbTv-(7L9%`QPx5_~~~iVre1% z+kQV+mNcJ;J$)4YX(RXaz3A72rz)oaU)^%+1}8@;sl2?qwtU+~tV7y!9=FeRYl{5c zC)cTsk9CN5%WZlM*!*kL6TDl_9Eg9}mrAwx|}s$vReG=t?{B)t?}{n zx*%?yKOMck3O@Q9hiWg*Mb0?7nxjjdYFwOMlG0y0`REU7XCD4&wPoi$g6~tcRqpS< zt$oDNy|(7>ciQuG$Wyh~I;j8l=Gte_IeWmho}8);<*-&FZ%RLeNBf!gebCfCc*?<- zY3rV+YHw(Qcx%*vyk)OfYAd=P#ua&sCp$wT}86|TP65u2*qE3F;- z55|h`dpBoV*WAYv4QiBVX@C<4UJs7qesYfvroV{YDODra$-*eF`V+1#`8 zDe%45H?AX-J?`?mDDNi;+veE+F~Z8bJAx0WHObK{OOcyp$j$5|Y%}=r2zJ|cXy7^c z?PKC$8Roc-R_JRxJmAP}M_<_v{fAv1AO=xfz`C$y5NEF?QC|dGuPk76%{8ko--~{` z7aBiWQV^ETxdr;?K)#06*o0fqZ<~2`JNGveJK0>ohj}xQGsofSWMIu=-phce1v^MO zz4*+a-CExn^+xqeml{cQ?&2*S9jqdQN+d%70jZ{0wwd z!>blGLu40@=eO<$?0mtK^^h^FTkRQ6&W>`Tv?hOzYZ>^Ze3z_p^bq0Hq92@mBw%Tk+%{T2FIq$#b;uy4X9GYAu{$>sblHsH1dxxtR z?R52`uX3%Dy|?o&ex|X&)>TIL(pBEQya%St&e+ta=_A8Gs$(0xv69~>9-0YV6+vHx z(Am|{+g0Quf0@{y8z=Moh?DgnvvcAT?w%IaEOF~Y=p2cu+6TgCFL>UW^x zqZBzKTSfHH3e7c3W-{MlTV|TkFk{>d-E4*~s(42B&r;~53Yu9B%|y7~iY%?-x^$`9 ziPCTLqqT?3XeRNII%sJKGEVEHN2AJP^0S`S+OizGfOWH$`<@&(h?8WIPg3f_^a=dl zHY`8-G2c6Wh%5&_N2Z&$OpoD5*vm(@n_w)BgRzz0lIsJ(DEP|y@KZdjRm5-Rjr(l) z5xm0BL43@u`W=TK!GxcpOJ{Fv2Ug)|``tz~1dhsq;U(xZ9tW+QpEVy_WwncgUuRJ} zoN+fZFKyTk&CE+0GR~VDukQ!o=W^z!)SaKOJ9hPdAN{w}{}J#Q{-Md5m>(@?p33OI z4je4tBj2s&qmKDlz{lS2#C#PnC+fqd5uUSQxjSUTk`codGid7rBcSU7yn-=;|kgRa$dnNpJ~+ElZ*Y+7f-j+ejfgatVQd{P7A-kU@B)nRZjWI`Syp=^ULsWh<~Jmig&X8w?)N2`k#3w z@)cQDCEj5@u61?NI#(wZ4x5m#P4H}=qamN*AJ#qrv}+}7&$5Um!Pl6M*5AbuVk`LIr>W4fcgr4@ZGTjbA5>T zO|37jUUemN9xsSi#_20f(4(WTq`8Q4)$ALBEQ~-AKxN+ zh~$^y>Ml*_F3%Ht7fk8BbQkBp=Bf+(i2Ti#74Xwhe~>dP2i9LCQ+K#>E?$4}Wa_+r z^q1cPpWvzju7miJLLbJaDo(DtK>ELp*!D91i{7%(`$Ti1x7V=`rR(bY66kS)drvuJ zm!TueR`l-Gpf8{8-bYtiY0HC)PL>Dxc7D{U?M3O%+E3U@&GY_v#6V9T&Y0|dDyQ&& zb`zs&F0 z4A~NXbh`v}yhL=pLFjygu_Y|d9QBb;>zoPZ<>K{{Q|NNay?X?FYVWf0Xyg-dWOk45 zvfY(ERp@nD_>WrMb^9oBzrx}9(JsDo9(VMpc3019hZag$!}IvWkU0jt(u&NHAJf2} zDV{&*>JnPlrRzDq%=~U&=C8>iX+sW{$H~GrVDa?2Qh0tXc97&;D{UNu*2{oLag=}y zPrP1N2`thpq}OQ-l7TbP=|jL)%KM&PCpg<&-71^srCVix*NC>GTl|{56X|#H@>%+w zC!Y=ZchR+~T$pV6jQnuqvl;DAJ};16A)VKg&w>XSJpImcVX%PV*W|58zq=k9*I5b! z*%M=-slL9?`1RD$aeDhc|MQCuosp+IZ5r)s4duM|;_pFcr?Qc4Tgv7AgRbooZ%g^$ zea9azTdJ|~U)_E}_*T!PAK6j!up@qG_meWhu-DKUc#?SHVwThOe%~X5##j8IH}AMw>b#;rLw% z+)Lz{K|DK{ci^{&u;F=5XKnZ5Gxk@hW{mcU9EKmYZ(ce>E;>SXtS2U$hK`W;Q=^l- z)ne#xH+#)@GnP(!Uk&m6baGb%im%-eAik!WXUf;oxm9B#>ydlpsO-kKwi~|d9Af)0 z*~^8j2x-65V594q0;8dEp5i*ABbne`G}nqhwM>3kWI;J}-hS8@Eyu@Ki66F7epvAd zyb**(#lxjMzkutl@NgA0S}Q(5zbdq8v>;jtjmie<4PQ#64)`8o?bKeRD(uBF=BXDC zu?~1Vgsdx`fS*7$|7(Bb#PI5fuZ>^-jO{D`x_srXUw;8|L-Q>f zsmHstP#TR*(|NdgteZ>dLph0B*A!D0?ssXA2)Jq^?%GV;HM?^As?-x--t(-ByT7oW z%O6|H_{)%Gk`s~_TUi@~6X8nucwmC;l7eWCkz&VA%CND#HYY5$Ry|Ct#?hfhMZQmc zeritb20yzu2aQgh*Lp4bf-OV9_cPdkh2TrJcj0$sBl)7w!EbrkNs_NUxK53nNn4WR z$U1vZyXJTc@cj|^w}Z>VqvZ4=zh>qpu9VMFFzdeVX}|Lp`kKK#jp09S+o~YC9h>g2 z%xPa3hk!qw)u*_eU~a>Y(pS%R_)4{~ir|q#c;#w%<|=sS%kUMoe`_4R%BRoM@l}y! zbWUO~mQ$5fi=XE}h%cp8jZK=&N_*DEF-(Cf4(=oSj%=8+IvSg*Jpg&gl7eWgST<`8_zwQpRy*F9 zH2JFVT-sHlea0C-u|4_+PyFO|r}dDs(Lv*Yoy8~B8a z)^}plLbR*=y&8N5m-G8nIdS4a>6XfCEVPW>qv6jq)-c&qAG!Qg&Xcp^L=JXZOuD0Yg&+oL*W;sZX) z^WyELalEZ{W~s~DTj1-Lpx3>OBNG{$z&Q8)58|!H#Tzs`?cZIzErDjY@>_DdzdR!^ zU-NWvf#_j^Gz#`FkvId$ZqK6-CFeh%2@;ZuvXpM-Y$J`JC9aYGwJ*pS7mT zP9K7-s$TN9%UTDU>uQ&jXn#&u)2CO2o0eomC0h*EHML!My|ro#&jymw2NKZ%mi*|l zI-Ze?p+;#gIp3W%*vVb&$7svg>dJr_)aF>$*nWu_R;>Fv`0NbYdY^Sxd+_$N)~OH4 z!|m6dVYjcg)rb6ztI>5sfG`6xS0xkVbY z;vD;sX9tlzg~)*&JTqyit{V&Qf#&y`#IGki`2g`exepmTiurq=HSS$xo9KTF_OlZ+ z9o8`)AHgT+4)UjnPpjb5TKLq0W`(cQ`QM#-2Dc5EC*i>>ov-NnKsU_y0kdZ>icjNV zerKA)r{tASYlX(7|0(AFeSV+LR_wwSl)b1kaO3R7US~+l#-7jqCh@d#Mzi6W03XSH z+gAmyyqqA#sC?veiN;#Nhw_PJpB?49$hFU$*uQdD&^gg%&|SL}|Ig$d;cy~4R;H_0 zDgNKe^-QkIUd*<0U$D*4t#Z((Uc_FsfMth0|2_8W4x0u79R_~qCHT&^Kx=~MICNAC zJY9Ty_UjJUe%%4BGyzKcVssonAKeKsKz6A7GLVt8tV9(^BZP3OGW*<;+!MQq42;His)sSJ1|7j2ya9iaeRRk<*I78`aZFob~Nv)HgNvUfr@Y&pDtsyvHy z)~Fm;&!1@P7EX+=gte!@i_xJI#nba6CY^vS>^Fcz!bJ2Fknh;pY}V*V+6e zTlu-h!&6pMRyaC+8f(|7ZRKldQ)fbabiEJT*iZd=>AK`_n4FPzVy~5L8dL56UvlgBsT1zHc#7moFmTtmHW?W_=vS6TRvyb zh^1c5^6@E&4J)$DhW+S22PPN|Z?P9yIRTerAB7T)5gD2OH}-FO$NrWbJ(BjTve=u( z_g}!R-EZc(4a50gdd%&2&LH-+#`XJlKmBG8YFKer!b$!9gnp|`r{AoNJ^g-kDg9o- zzIVR+j=gW~LrZ&T%L}^CGzf6+g7I~Dmi+B1)^`KiX$A*cpKAHmIm7|twAqWwqS`~t z`!9IsqLdLcj6cufT#Yjen*o1n2DSV`v^Sn>!dZMR%YpW@SyiFj+M&iYzibKTVM7ChxxOXn zT<>q+&(|+aOtI&Ov-bAg?X%}F>mKL(&cDT;?$sar4((@GpW85ZnuGss)1B}6JnO-7 za1VVRi(Oh<;M8&>C%eUl=h)zBHjVDPJ-3-U%J%Q}+jH6Lve49?49=pK%oP4dJ!(WP z=r9ZXr@~vK&D33g`gF#MZ1%JSPAuM2#NLb@?A4Gymd(D5cJ>nljpI+Kra}7TrHx0~ zmr)p7yeFHzmLc{Nd<&dtjg@_^{ViR?#?~JlmR^5JpZx?M@?6@tjBq+@(7o>dmf1Ew z$-##X0wLF4NWx2m_Tek;DN8oQ@B&n3^uHA*?pB~Q6qqaxpW%Fzi* zb)HLJD7U--x?>NSeWuH8=uXh4at4&2v5$97b*9Vp^qmRZ{hfpHf0GApXWV^xppOIFmM{lkVq?lkCj=MQx*0-w~cKa(KRtb^n0hXn2rwdc>3TW$)p(XfHr} zdY7|Q(gXA-+zWsD5Ys$Go`+LwPCB1z&53_AM%ev!%L>adx~!DYBFju3X{8v8#*mXl zB4uRy@Pw#+Cd!to%Zx46e3o36ahdjCaPGjyf)nKDpRiiZ25WU<*uEBbe$tj_$nm3% zvU7`a;Gc?ZmUDj67jMaq>O3Kfcp&EzwNzf^%Z^h_8?4+T=X!&i@)zTu1n9u$6t2xzP<+GlSJ#FhzLHwZ5>~_&9HSMfhF0#)O zE6YUQ-)Jm)?r~$~TfDcAwM6gw;7327IpNsTn$u=vTv-8iE}`3U!OYyU&zSxA{h!CA zHdd|l&Rgm^GtAr1?!}><^DV{#>zIh@K)!SdauWP(WR23?eieF*Kl|p^DTym>{D1=s zJ56)voqZF3J)7(Q4cb`lts`pxbJE5O{m{k>MjUPAeh%6oZ{$OdKSUe$e|L>^X#;)3 zp$+nUK0Fm|90GUowDB}DtsmNWtuJk)4nP~PH2>qYk@64F#$|D|5wAO7N6r2N_iWyF z_q{$d%-g@-bJlqk^|WIz8R%;|V;-Fi`J#8Wj|9i8e`TBngI}PbhJ8XGVdvGc|J$xd zcssQkiCLJc1^I_T@JC9dfIf%R$JMvpZFH8Sw+1u^`GXrW6GQ8aL}OiM06Q;Gb$QP2 zf@om3FKUfojd%mP9buoFn_@rz#-N5w(}}fbuGw;#v8L*>T6{xRIr;at4Mu!{xRVVF z{j_20+pvTNH>~A+?}JN?mGGfmn^&-D?J4DdDQjgZdsKe_Zp4p|^6i~x03K}FdaC_( zhq0HvGu2xMt7e7PLO(H?gotFr?}<0d7w=qGzJ&5_M@OFJdK>stzFc7PQ;nQk?TBH)p!fP)6KxHU&~K+ZoVxym3tRvEIKb|%r}g1 zv-YqdJDb_V)>YxbT*fEe^AXZ7%*TZ2ZK+h!xW2WjbUNfb>NcvAA=UU(!C}pi+9cs~B$Lq9}j~Lpgb<%YMTJ_G)S@ve%di$zH zZX}DI)aOO?xte<8Q`mbW-KCQMTcpqN+xqGy(fI!2+r5u=bx!KXHzyDuNrdkP!FMKn z=YzJK{L6q9YN77&lojx&#T+WPrhY-$^qHx_q?u+?5;&nAh1D^anBOGM=PBiEo}WV# zwb$m<2gXzHhVwdfHV*rZN7mj_8m;9VnvC(r`f7C6&4Y}|sm7D(+FM5LwTQ9iFxE-#IIp2iYI$%*O4aP< zM?xz)7@NjgW>O=O@lITny(#`YROZ0u_xQOP&fGNh!INqNP=|Bk1&hAD>D$-?%fQj^ zPsBD1F_MxDb4aj)@u=@>sL9gFcvLsuJ3}*xd7Q-9_cPW2^T=Gzo>*2s-Ac8REHjvt z!#tK?%V{3V8CTHYe4L?4^%ab3NJ^;X0-k@LINK{`Ab9;PcSf&geBfZD^mwl>IQ5QZ zm+`LbzKOi6GlPVaDsX*1aQz%4BMs!s3yLm>& zrZ#w9XF9B@IV-BOZ`RgiMBAtNbKd=#5$@33qnmDLzTUqi6mFk3Bd3G;Kgj$y&nVfn zrslTjT5{0fzbU^khb658N8T8QAEk}~coLrBpWPVuz!ZEm8=#5T!T;<@WecX~qz005 z;34wQzaADy*39NsFs<*sNgXHbZTO2ON5cqYRauz6;L zi#PF%C7wyjV_g7m7hOTkX)8jUy(I!4y^Edr!HZ@XA9%}y;O&DK^Z6ZWd6#t|0*|zl zceUoCN5VGGBvppRGuzGKN#Yj|hB9j=X@D-a_ud5MAspgm~+my zBKI5FBE4_3i|=8J?$~rEbi59@-$P#AVGgnJ?Y(;~XVVJbk(w*QvP0jc-gpyd>{Thh ze(~ZJVdgfoyexW<`kNEWv!l)AG#Oh%$H{MeVSMM8PZ-}@GtAg(9FOn&$ny>A(sKQW zT;I&~-%nnaUTL0RAz$Bk;1GX+n@-h;%;20LWYExy9Gl5}JN@#jt*hczd%h}yGE7~Mt<5g1@tU_0BXpM-bKyH&KiD44nFimxi471ozE`bzk^Tf zJBiolzhhkQd&|83J>sJC8!Fe`{=LlWzR%~JDd*gI?v!)y{MPrfu3OIMz#0E^XTn)O zxwEDrZ(T*T(XfoqI@+Gg*bgK3Hd7-?`od7)(|N@5rSJ5mvi}AjBl9_l`8?m9&$`KV z>E)x1>C$J%!#9;aBiGZbKfd`TyB5tWjO%|ISF8S8xJqzwHTeHNt{$~XjejVHPjXTFxyx}wb9W~6`N?R9Av1_1W2Rxy(L2Y|D zZqH;saT1-Y5ML)ST5=k+YG49=E$-j}xP zm$WfjW3=UtAG@rEn%Alg*Ddd!kbK&P-YR*^S)TRCz$c`0Oe$Y+l6*TK9YpfCKiRv6 zF?f231FMoSJN8rSLcr`8rEA zG7}d+5gfm>@%-|N=uC4+1e`{(BRfrSEVd;ajF@t*Jh70gpl1@k|O zvw(eeq#wJvrZJl}O1NGC-ib5q4l?H+&L@HIigR{0g40M+Z17YMSE3K^chGLj2VE^` zgjaT>^K8iEJHQ&22mWX~QVPE9Ip)3p&RqY4a8L7KjfYz{?D4xyWQ7m;8Tu<736XTyP;iR@&%**VW&t+StO{+ZX01fmyI-q9f{zMZCQmEt$bw6(HA% zBe=G&`kO1gf%RQ@$aedCm;O$b+xlzlN(zCelnR`)gt|8YE||-j&JuKMdy%bw79{wc3kB-?5Kl7Vmq;#|*gZqx72AF#0(Shf6 z7oPtDo|~A%In1Fq?s;*veDeIAT$9Sn6`LB1O;J5Lubl(H(sw>Lxc#=#Z@k~lt3`@_ zH>HdmkT&c^&d5v=9gw4L*X7mqX7HeT%EDn2x@d>roTzgtE-<&`Zr*AvtiTt!j{R}#bWg1{vJ-mw z-uPcW(ZJ8vpopZsT;_%hvG&*zc*x8KH)>-q3U2JDeg14S=^#^YSWAxw^ z{R&%7k1IFVBZhD3wG}r(Qj7#IG~4K;gsErx`d)k{JHT0gu&B>g<%W@y*5)6!tE`{?Po_mb`Oiiex|y=CTOLt+ z8WZoXM`$^5?+(7_%| z(Wc~)YAkLY06%TukQh*BD|&t3c|q6e&L`-JG0E@XkBk5pnk(4^ZM$e!4-LiV3D{rOktYd@qB z(b-YZ!Ru`kjD}wYj14)^L|{?QCh(J7>Eo;$bXxgjaWHI9n>qCNX0kD*iTyB3$VHsO zJr4Y7Nn7;YO{K}k#(LTdkoy}6<;`adHV7%8~AgFR}8=S@pSp~ z@K<-r{)(BSE6R{p;@3)YhQr95D}duW^r`;Tuh*ygn@oRO)t}F-7%_-L^3)&gUPQb5 z7`OT!e(vMxkJI0W{$#r!`CIOA+ZX+4U+_8fA$%1R^N@TQooGx^pW55&j5C#S2GOsa zaRzwiwYV_=KRWdY@Q@tg{DV&An(PLaEZ}=y^4;)PRMGEi$;NKM`r72>jU#-amJ#6O zeRKtlyBT}k8+Q)l);wUBY|NprczA6;X*WJbfe#0^PGD1>#H+w1cslVTe*jE^$57wE zqxo42Jer>^TzeIGGJMGuuTFlv@dNIE0M0%Ho|)jHo$=-YR~vG2BJGthALL$=3m=Gh zbP~dL8Vdb2*L+RBj$ZrOtHFWtK~6k02Yx7qFK&cCW}_41=csY~9N4Zcz_w9+j-{Qx zGKw?DBkigyQ0;5TA)oP}evdV#7F^3YPSjP;Gt5qSgE|8}Ytf&f4X*S~2O!ir0Vkr|PS5|z-98o{Y=d1tDFZZlS@)4slht*Gc z>%k{pKpx)dORgX48(zQ899;i6aC|@<LwRO+ z{YAdS`XxTIb1t~*L=Ig9jtugK8ffziz7h3_%O4Jp@FmobclybO4j znS<={B>RT$d><@Qa8#rQS6v7kf_?ZkkA(eI--y0Ke}eyinF;mZ zGyL_BzVN#hKLZDX|A)Lg6xh~rhT8CiE2A~09~=(lGrWF&@k=Y-G6wO^Q2U*4+ps6r zKhC`a@KphGUCn-gyp+(PQh23`_C|nzpDEalZrJ_F9_)S__Q}AP2>gS`J`#S#9m9p+ z-|fI32(J2t#smD;<%_|+1N%bL0Ee8}1`d;4*fmZE_QCbF=wXUIS&Y$|vwg*@j8k*a zx&GA^x;~US{D6C%$bMowKmO*Tg*iV4$2ObE{R&SHPJNdXI`)sKVWd@B-%`(%_Q3Fu-i~`7_W=8FIlwF0>&R3XuzeYAY7Et+S8t%mjG%f1W*RhM8SE# zYd`ZOGa*5HPCvijANfq4{p@G&wbx#It+m%)d#yFii#mSktBU-KukPV?U$iOWV151J z=avB1krhXtvyG41e8yANd??oAJJbGGU#6|Yht01ke@@eLGig_+l?Z4h@}LcxWxsJ1 zUL@|P;LGxx>CAB`UcA6%0WK$Sr2|*aYVG_!#v(A=vr64F!OdA{4_BT z0>08xmV96u`g+%XwUIKdEX$Qzulq4ic}@QvbIiXnxXp?y{@to>OO|Wpzw33jzpMXV zN8PLX!&h3>cZ_xwUDE5#uhV}M1NFPZ(vy#{#AH)#o3hl{EGA8FSIcsp*cO0eyZ&nq zv>U%|T8H~S{a%De-9riFP((ivTfUW8^;%R}!AN`fhFs*o*zDcl4_-iC#phXjj_hRi zse6#?QZJEusno;1ta2mw;rDOl+J-{4_t5Vce0C?ga+rP|?hn%%-m`HYz`4A9Z{d5{ z3)cdZSU>NXqd)+qH(&-y#5nnjY;lvlN_P)k!8LI(<+@ljXZlul5K825f zE7j*N`ow0C!Fvn1Qf=?1?SLPhYCBhNTgH2cu?t><@wGmHuL|&N33qhJ-cx+%dV*d2 zT-xWE(4)-gQ{44R&s9$9u++ljZyTF2kpyq?Ad7@QTvI>f?tT(nic>%m2amoZ&bFe)~@R+5>Hj+s<8vx3-&H8%N?F zgS}Ah=WOLZb}hbxUhy03tJSw`7v0$G`IgMRS@OLmc+&XaJKyB07|DI@TZmmZa=JU+ z#2n0Ky`odP^SR?TUF(oMW&5G+idUwpI{^5&&z1enci5MSeO9*_#(Pr96;#`a!+3?m zBfzaeGrB*+cfr|Ia5n`V6q=k$OdDHxTwqLdWK7+C^`(yRtg@2=zrqH&@*GrZ4ZSzxwjt`@7F%`q3}6 zl%f&8XN;1^MfqAz)6Um&*Hh73U6isLPl4uY%CrqzBlNb!j*-t?uI%f$k{;=8Hq*{X z%lr}8j$XHPM7fk+ejeXVXt5Q$k|VKj#ECG|V(c5Lz5jfwt1XSbdV}kMZg5pFhVF1V zf$MsK1=#*4uCKb zT6)n4H3!JhZ#q^BcBSrP&0?%#Z(X_0u{bq-{)qp{cvmpqZw$Mc`R;{gKI%3%d5o<) zye8o7q$_y78@kC7_-Xfl0((BN2e+4}em{@M zl{=i+)V|p~2lruRuP(H~zHx=31!5@(E%elf1s+&I->saHwEj?Yd-0((XnrWL3y)HD z`0c6AAnJ7Yt2KPPdb^`Fgj{zWzW%l>w1M3sZO5f%5EjGIfU9PJ8Vm-+!BTv^~seiRUkLEEYVyC4~`Sx?- zj1?g>nZMn^`b3;lT8!KK3cQ^xvgHIQZ=M z+;6xNii6;`M2;8Rk>lW@jk@TVs?TZk`7n7$FJf~hKUm24rF<^5^~gwFwo5!?#zwqD ziGL%pmN)~1zH5EQC)H~06W^L|I$FE@|4t&WD94~$+}2h1wJ)N5qufa@)ve}x+kZrv z*q91w|6z2F1w1={+sGZF_K8mPzC3<)+=u!O7Q#P%Aa$u@p^rjzjJZ7bR99k2w5EkG z?k(oD5ko@FOYZ1ky?wpLe&ECg9mgkU=)Z9RTwBlzojgyh4 z`bFEPy0@6f{er$`HANk?6`79a6q(L%2iL5hF|F7|P1H>Trwi$)iGCvJXCeLAo@&}` zyYk&*ViTP4QRz>uDualoajxSjK z@$4z9|2|twzkh_5{=kUUroT*nn!GKkf4qNo>K`ANUA4#Ts^@Gj7yIOt)u(39!`|;; z&eQm|=#NFS)2xmwh3SPO(pK)7EboC``ICx|%n9OkwhYy-oGdS&{5fmP8#AT7l~@}} zm!eJhy~xG&sfku(RGeo%GU_`&(eIF08LOK+&sM9J?SAzcC1bpzQ$};An*GFQ)Ypht z^Ve@lJ}&(1KQi5_X^P4-RfFJ zzr%Si{8##Ne{b7zv4vV>KZ3p8G63C~b{go*`CEN|v=BKZd!#(}Bv# zC-}1R{_Aie{))ML`wr~}e@kBDt8m>y9q}I)JXU-|JRS?ctL6|q!prn_19&{hd%@Kb z+EZ(5kG==0g_j9z*TQd6w{M^i*+;L}8ppU!*!6WdGE@J(;XR%ITRNt(Cys4v1dbEs zQx>0$MK6;(^EX>X*IsW!*G_>x4Sl;M($KYuMf13hBRMl1$@hZ$R%pw>J+T0;SW*i| zVB4NM0N7z7So4jD^|MnRedb$y$|D?tNMV})HvSh>SHc_1dnyU zQO6S5Io2718{(pgPAPlZ&T;;xcNl`(T#HWG`HeNQSKO#RkfWX7+Qdd0fO)PC&wS?U zdVwcR-?u8f*FRn0e);9)3io%={xa?D>D&cit#v5eXX&_?Z+Nr+ z{rv~*_H=c@ z+R=52FRSqm58x>9O(C*U;47l7hd-F=R=Nu9h_ITjf5XT`*o@2gCI=l?%6-@AeaM-iahFz0EGwXVgW|`JM7s`f zH&tSoq%-H7w>PMM9MP_mQ;At}BXUoZJsx?@_vt2P6WkL704_4@YkH(#TGx}TcNMgHp zzte9vX9G`XTm$D<>fZKx%L7x?optz%3(p;y1-hY^ezLW20I4Jfs?47}OPHkve!4u4_ zqB$Ac^D!>Mh8Wj$;&=tmx4t$)Y~;kc$#U)S`qf%k)vEty2iAfG8@V@3 z>rixZTZ~I|=Tp$ihtSGLVYSJMnvqzm;v?OgZ*))imV1MTp{Xv~PV_8a1I-F; z$CMF|OsDbMUTEAo1$#B;O5PZJdAh~3@S=w*y1|y4_0xOoZ!81 z3NcWgxCPtwG;G)3#db}cD*q#KjexQZ4qG!Rp@BV9$%- z-HUzZQkvP_Li}y{UTpi4v*0>#YP2IUZ(G6~9c!rHQod($OBgEy{JbZ;2)}+Y-@ox$>Nuz)&)wx!Fzw13v$hc9 z$%!6-{}}xLYhygA@9zlEK4%xsU+h(|73MoSX3?)#cu`;t3!e>+0rfKP3r9jTQ@Ub6 zZKW(&KK_3VbIRAi{8$e#KhzE8{?H@y=+PR@t^uopV)pQy!E^h(cc5PrblerE%!q#& z?{IWrcNq5;=YYTBT(TV+`WCcSv7K|OzS_98znG$8xviTmx|+6Z3NhBC?thNZVx@j^ z7=DAld|{K^!!)6TRn)b8Q_!KbaV7_}2tEv*Wd2CjIWegHtS>1)k*2xs9SMz< zHfC3#a~budy%x^H>UkGjCgt+?D(x8hlg9qgO+7`&@M+=MvJMO#FStCN^6s#Tj`cHv zAqE-ne1DhQ5+rkzwd~ep_OzVwB1aoeP$%{N{<^*e?C?X$-xUI5@A}o%wsVn(!Rt%l z{zqctC|TGoZc=yoDEvP-Zqmtsj&)VsBy=o=t3d21(XrHXsE&nP)MMi)x!9|YHQ18@ zjdjVP!JhY#g+cO%{UR~H=u`X@ziIrVAFlc)`d3%%qt&Up{`EVNr9pnN;)ll>uHB5^ zy4ELmaZ5jKvG~bN^e5AVcwuLW#s$thaLZ-6J+ zA!oCJy-d(Ppn-Q%{%NYd54e`Tl{ffboAVxi!?%ChpM92lTa@1qv0e_^$qi6~ee)pp z%}DH@S- z*ZSJFAM{m@Zt>+{yS#gBg^D{Tcck`{yTXFK)KO~LydU_@@E2rRvy`=)Et@5-a3kmE zT0f2R3uGX19}=`7m5tl=Gw3_`rcs@t$9W3)KYoG9^=N^^y@kDX8FV^?wvxhzRBk5M zdtv!sCl{6f*JNk;J?(kr!`gGpH&1qyZ<_32zf{EE6ux&X;D7#d_&=LHdIjH!&EN!k zY76Z=e7D2x{J!S?aRf11#TJjgunYUV?0dVPjeYT>T<$vkp|*g1>8&r}O&h5HVxAu3*}2F2}<{^ z>7(auFXSB76y~@hJ^-U=M{-C!&N-6gh%ZM+7u!~PspF^E{R^=B6R!t*|CJvdv(iuE zEbC^CZ|Bp-9{#kH`6EVgZYO$03jPMupkvX^mg6Up4gdHE{CbsNd#uV`@g-~1{Pf+I z9rXPG@=w>(hkE`+>PwDv1;@~~sm#B`M@!Ouk6drIgL9O$?$8gU)-Au-Y$$v0xmSs(l%ut|{v8k-wqw{jXpXU zuK@?PA8?%D5!&jYZ4>h$^z}0EnsOjIWe<;P7wwJa>u%vD1zGGK1x;h4q@idez&MSWk%x{z=TO z@RJYVlkA;TeivpKQzIEZ}W!2(OvWMvZ61c!8niPU@+kN5%tvkQCH~Xfq=d9XVkeC4wSwa7RFuW zzWDn0W6x?q_qU?^lY94xNcWX}W@2yY{!l7TTFi{ga-OygnlNjQD=F|U9VX7(=(i8> z2@S$~Hk94%xUw<;Pq-SB#6uhGiDH}zMl}|_Y@*bs?#jSgdpZEWv~>gTeg+NXkcXp@ zm<&nkjuUsEw&8zZGP%TlExAC_q5r%2lbmvL=2HldNMZe+2(<5ymh*2h)$%rq6BucCx=_l;Ud*}gumjSIq9 zZ-YJeQ@<=N-1XukW_v{N)Kdr!L9VmL(V8t6}(5AsN@&vwld(@u7>u=0XC;vw{-?wlk@uIw= ztm1R>+OTimz;lD5fotb-^G_8T=m!m0paCm15CIJ^|4&5Ww;1L!&coSbs6386oo7ov z$8`9rq4}ZU)H_wD`$XtoVtn@pj+P8ZN0Q8Y&Yo5`26kdA12Dyj29P5ochH6q8Fj zbd4~#&ji*8G4zZzB4;$hFIUp8z}L=N5+3=Kz`12lw)ZnXahL=6k+s1Y^9+_^Q@N=Sdd)34|xXXI+=rLnM&rT&JxB&lF(&+-!2a$e+#=5=Ot$=;&^zdH65^$+u%?$;3K`9HjWRMk&ZxVedE;V*^M^TJ=8d?R}U8RL!ePT_E{ z=LY^4{*sQLjqnmDzuj}}7VtD_QI$L>ftmkQ+vNzsS$-6td^*@Sxfa2HG9j*ts?`q5=x@kz8;f%+C z>CtX5>F;EIVm(WF_c?hg^gDW2k8f}{Sbk5;9*LQOj?(KlpOcHDxAwkH9!T!7$iRME z^a5uRJbppx_=idyoCxsT${eRu zPrEF0D|09M%q8wdeF49bIWgw#Lu7neAg7p|5lc?7&)|`btOLn;imdN=Xt=%)U#ZrD z*0Gf5%kxdH1o{@;LE0m?jnbPgvOkhOuzj(oi}!2|_{SUTtBf{(#h=Lif1#Z++I*Wo zk^A#`7Tr7&TAEG{COOBXNUi9l4tmvdWN^X!1LGgrLU3s4>8AK$R0gs zs;&cK^TMVCzd}wZeg(cSf$z7#iAnLR1kYgl-U?0>zQKPi&k~E{$**#c#iCDt3C|I^ zRR}L~!iUlsXA0v?d5Ku={GC9q^yixt^syEE$=OjP&+nRtwQ;YUxHm&RwM$|*4~1rG zzbHK>Ha7PcrEdy-SQHHARD_%p8hRQ$e@Pw5&36JkUqGgpqEoh@XBd3c zzM3-66N`)~eF4~y$jX9~k6 zp!44$>?`C98($RW)N6+CGMDM({rb8&yqZ=7<^<2X3I^bXZr=9Dfqvi&1{W82Zh4~HEwb?hgc0nh5 z6mH`@)xfnII@zOe8}C`m^DsppIY=uE}nhhwkNsQ&bgl0xy0Yxh;1zTn3Nv=| za6KOSvlsmvrvh6A?Qni{z7Oq6JE{%#lg=38=$f^0PU2(L@|ysk8+`t$W8y3R!AQ;J z$JX0In@-w{D}JA$CW89ppLu2;N7aR;xD4i|ee6$>c=xGcbR;uW#O#CfEj@$`cuo=ftgf#(36kA%Rv z5g5uiFD=BLzmmGseg2IVBlfEOXT@*Xhr&l>|5jeDV8)mWFBJfS<80%VxohqZ4_}xR$E-NqZ$4Zh6yPJF5PnL9YN{U^JPuTOmF{M_FSUt8ls@pD5rc>Wk4 zitkV3L;G`f>;JZRm-r@j&i^6c4?O225`%-akQ1k!Phn4y6GhA#gtxOz6Ut6^Mm@v{yVPrP*s6tHvW&&pSAIBVm6l) z!FSNnvMq0z=5&Wo3(!JJV9b_4{NnEQL?^h~9?1Pw@ki^7tvbA2*01>IoQTynlzv67 z>;(K`Sif4wcdy8O3~9HR`@(WhSA616pVC}&Im^CCF7TE#?TYP<=Qg#9Z%R4(AMds! z0~P?|b^MJwLtLM!js{DAZJhY^!kgVy@XNv^t-*M{mpc=MBeaIksE18;Svm7}iMm_+ z=DJ7q<=se4m5VK9Yu_nua*1y!{1#<{v}IFO*;F_2UN;olwT7*Ir@D(G`G2(5kP@Yh zzX#sdN?Ylp9B%Pn^P}_T5c^;|_|3s*btO6x=e-S{tm=Y@B|DcZny@-nG_6?!twiKA z$9CcYjh9$_MjiYe8cKK}si*I>ld6g+Tzr++}uY@+_{X*VH(#|2;p*$bj zu`k3IB4?By@0dP2Xs?+*efGsieD*^h`{=tVBGr*yi7zGfxziuvIe>0-Roz8;T?=*d zsVlJ>fYXzXPrdA0M1DE>6}w~}zup^(9Vx%Zv~2lRzI?IZA6{(Pljy>C;PupZ2D-e& zZaT&P3*H~-`iS3`|1Hrab1HWze#hS*_!~jQwqBn9hIl()&vxcq2|0_&# zG!%Wy(eP?u4n#+lc!CaOc4LynuerCK^ZAhYH6qV-8QGm>EQ%X<75em@Ablb)p!E|U4V&f&I4lgmDpI|Sqq z&17v=-F2g@8rfL0V&P-;)|kqz$VTX9MN~|UqL(PEBfCDPtiYhF9dX+DpE92o`~t9> z9nIl>hL`)e@Rtrd?d4Cu(M24o0W*oMT4If=ETlc#*);{$|9E9NdR49nTSgynPpc)7nO z^&Q&3r7w>*kIqJeku zBAqWpGuC?g0v7usXsh!*unK(gz9iZ*a0hiN;S)2$wec^|9^>waI#=C%W@nvmX3P=a zqN=k#;B1Ow-3P|s#IxrhJmRXl@DrnM01v8e34VvCq3w5Rzlq=9^wf{>?N~>9{mh!? z=p}W&!)yNFtFAxe175vPVw(wV&x4oBy?I5`+X6IAoZw)Zwu4K>FQMthLGxA;tLANm zyo=ynT{O64e(kgi?Dh;|Cp-^)%tr@v*36vwD(esVcFa8EgMQl_#AK9r8NkR~`6@p? z=qtJ9WA&~K+mg(Yz$o*yi2R&__v)DHg8H5Hop|3ltEyn<%(~_ErWiFpGl>OhM<&zm zip)bb@L}i;*k%RRBG2}P(eQEN@$6y!2(8o|UQ-bDqx$9ZOi?9e*du1>-#6@7TvHHP z(?vsz`gX~Jbxb;{j*<#7ssvRS>nB&hG$KdXZpK| zUl|Yh?4o_vS{3Vvwv-HO0`AT_(E1E$R@S5FG{pDPWgz31HCqL)>%g^qD|OcK9{7l} z%6s~c;E(fh6$4G~_p5d@s|sc;39L=l#iFR{fAy1K^t;B#(a^g_n`?w+M``=Ob75--Oy~tAhV8-PI z+DSLs$r`H5uGNWM?aU0wF3V6QZ||Hztxh`z5Z&oM=mn9$49p~JiJ6A;;$1H6TztD}$Y!S*e(!qByP(#L8) z^N%0FTuKZp(eabzJM7ugmtE)=xm9^ary(2sz2s1J0eNxqiECs(kgSc9SXvTyUg{?! zN6YZtkn_(J+Tctv?kb$YZvrmEoVVV!3QUo`418vSg+qMDtZ z^{RJP71YmqP4PBWZ-_Ro1UZy3bEoQ$ICJ?~v^RSRJP%oF@Vq)?p1>3Q{>bQ8JKvuf zuD>_nUCf$1%oyr_A3I0nm-KZo0Q;fCH3d;YFy&9P3^dlgeP*cK6h5Tp0NhJVUYWa+ zC2I;Y`sq9~1D;V5JzR8Qdp@!vg0(03S;Uw!8I$mpOzH&76av*L9L{8nb&RpfstsI3 zy)5eOfJYf}xQco*=IEv1cGgaXGgSv1i42Y;UZ?aGG9KEf0v;Lfj5wW#2@k^l6DP0< zE$ul=-ZErI75yvtM2`!Sc{8H)??fgE9j;`ZX3iS) zD!u+<^e1Uo;RBhKMY}Suf{z;7?W7Zn-maoi>Sa<-jZtXbpb6SFXkrI^&7eOStMDA@ zW3g$)pgR>C|6HebF0j z^et;@A-K!GSz@i~d;0wrZTv*$r8Xe%^7s{b=j2!9T{?0=e>0-&vQDz74SgurT6lD9ly?)n&$LbJAM0?{K1#^ zhjTv0*g@>;rVMndnMZ1x7v1uT@6hN!_zs2ZdYZ`Tg~)KJmq$Iu)t*m1#^r0GUel<+ zp3h_zpRwI}$Yhas7W%WuFV7+)*9*i5^(V;iow^Wrc=R;*vJKhRElXgf7Q?P%;H z%aXM5e~~eWEMTs;;`bnX^b?%h=bifpa$bwhq~s6Z zkHn5ZjDe%F=T66;lzr0s`FpP3Cs}kkx#9r)Q}nMr3CKp`6S!&1;OQk4%H*QNj-`8X9>_7IPIi2!>&;KseiZ3cQ755A@?dMD-6O>B{({m zrORF6-I?$X;Sa*U7s8XE?S>L`!i+;Qm)O#%BRtu_;aXYi@CnZ4<_azwnLm;3xvY8b z=&AxIx}-55tfl-6ctt?Z)pb+uut*=QpL~(~V#6``K|1`v063HP&Jh6pV}TZ)Dk!K1QX zQ~FP%=w`sVHtN{|TXntC`BTxAMDG&+x>|l^ZKngTtZgrNmbINnxvcH|Jj>dy?KUT} zmRit{yZa1cKT)=n(irW08h%X`X2%NKOx+gBz3glc^VEr_aBUp+zw`URJ@p#aGGA`+ zF1E|HgF#6yo_q%&WoxA7jIr5C><3)Pj_GF8;OmIw&%w;DS$9{6;A)A-2;(v86aa40n z{EJiN{}lg>U)<;4h_AEC6QSx_iNnbEsxH1-!F8XbF7~S3*MooY+c5gBmNxomyX0I; z&I84cDK^|KVs>{v)G4wdzX&9c=D^zoEe54EoT7pj*MoGLiVGgca$b+ z=N-UVYG*wHKXp2+*d|(c*7>H#9Pt$|I_G2Fo1l>u5%5vwyoqN|I(RJu_TZSGO5PME zsdFsJzj`*FGcs(4Ey$|idP0Zd2QToz-wNPwopjj5e%zod01 znfGdK8M=Tzmt#$?6y`m+4@;l2=5(#mM=bbCJMnZ6j$Tu6F#Hwv@{vjp>nt0kmozpDzoV=rctIKudGEacmDeCy#`d)>!A_I+*F8mAkY z6?*9nR?84_Hui!e3-Ka)eTN@-ukTW?@orS_@4CkZGyH2G0oUW9C1j0?*IcXfkoRq^ zH(qrP>SX;}*(1pOadt8HE&R#vg{R0FG4iE(ft-yRX3lm-|1V2?v|fPXozJZ$->LN7 zG_$6_Qz!cYY}q$B-rg~j{eMls2E{lIpjWASb?AXnfw59Al6`bWR#&~Xdc76)vtqNN zEzYV-SX+XR48cdLcC?hWWrKzVFQ>2Hk|fV5iFse7F*dP7M1rS9huK>oo9kvC@-3=4 z<0~~qwQ0Q1WQ=XtzT|xTEZ-6j-yRvJO%Q*1VnZJ#r%=J2^8a}5Y|F2$8(y-yJ&)o! z07p-4XPC5!G2rLfEbI>TJC}>ip+D!^pI}GffBO-1_DuG()LHQ?Yg)Z$%(tQ2eqtJs zFLh>92b=mt#?`Qsb-v_`*d1c>V`yh7xRGyowjabcT}4|$bD}p${Uh~N1vRt9mPUPO zYiUhYXZ<$Lq=gpRz{457+s8Lzmyz$zMb{MUJQV1Md3ufcT*`cYGD`A&2l|8mN?w!E z;PebQeU^RBdz>>27Q7A7aVh$`oY#rIZjA?5f&PNOS<2X==<~sI)dlOJg(}89fIi1U zQ)3tJTux4A&L#%f53xTwjvtfsg?+@1&1PaW-+hXlcz-eWIQb5o%mH`u4bS$)*lb=4 z)cb^b?^V|=e+~GfX@C8h>Vi)>b9kNa&az&!kSpNa-k*L(gAdwXaqB0Xm&lwkCl7*a znG>GvhvBhC|NWVhF^k0(rO)l_LG@~xk43(S%|PZ=%7lN5zbj=D!zeBqeZqjDmAq6^XAg8E`!m|< ze5d-ST3Bkw`~OBu&;Q~wf?E39=_Fi#@S`h-m2 zeis@@vE1^-Iq)EK-k)!8sDpN*>X*y>41ms;jjmEOAoKYebCpFu)LSuNNsXFA>SVhD zbRu<@_EP7KpgOlwNBaH*`a2CTiU95ic+G3@drlbsA zTFvuk{$1}NZ&+_3xU!nLfU#=Yq=9nI zw{@W2f5`Yc`b{_to<+`-GcSUhXOSUe4)656h7D+G)>$9pYwC{-S+B1V>^=QGcJ?c? zbzC;QRu7*U9l#Cq_~4o!3-aULyU&E_ zFtjnJC8OmGG_pkYht@knafoaQ!6`g3^qClU!8nIkhCbtO6#SmN_d2fAyKuduuDY3X z7~hQ1hkcPX=fHJjQ{({0in>EGM%}gsZS9JF$GAM({9|BW^*_e#tHVs@I=ZVHCUIxQ4N>-28V#QqDZKP&WE z<}$LrreFqgM(`-|T4?<2MgK1Dau2rN6&Zk9%4DyI92`j-!msx7 zt8_2)FpYa}A#l*I=o&K5QHPmFQxtw~k@mIdu%ffbnO%#t16_+vRpdQ62t~ejoHJJ1XV2PE;$v{_J{mi)5gYC8tn+1{?_@-auBNwl2p!&N&&JvmJ{~-Fr3(tL1#YG9 zFkWCQ*s)W$?aF%M%o=<|*mKacS_fBbvR^4Y)+Fm3{07#!{!Pqlo$DpoF3DXRp_gUF zLNDRSCBY%M6n#VJPvPR+4qrw!bi3qZo&JJw0X=#+k5V)zZP;l;WKMAZ!UHPnbUjk$ zG4l^Wz6RJER)6}ih4JqF!z5%RlA z=z#S#6<9=uzqYuh;5}rU$nb4KSFHKH^eJ^tBD0M4$G}g=9O`QSeRQMO;kQQnOYkw- z(u16TF!R}hPx(&jiJX4}c`mvh&-Q0ITdW3t(FY!c9@aw-16EBPsOmC?Hx^YDxMu14 zq}0C^xo}9}56CON>v(WckLPWFaU zS9Jaw=w&u*QO;&#sCz5&_tw#`sd32}aJBdsjnmh06>^$f@yHV5rj4#@&ibUzSAXcR zuYTq^A9{6D^lgrI_DRjN;m0+|&U1&)`e>txcOHCHB+qPttQ#xxO6kJL9f|iWGG2W9 z)VzY5_0{#u!FkCxa;b`}QhT2w&87A}4>0xsJhI+SKo2=LkcUBEHyvXRS2sTk|Gss} z5#MWz&iG`_v*&17550VPSkZeEWu7I_o}#J19#7_F%%SRnG5rH;N#>NbW#DWHG$^zp zJfI04z7G7s%Q}{zvz}YL)3^FPZg? zd2dH7ttvQuSht^8$WzvY-0nQrMelX8HufJ99~*M9VGk!}R7r~Xj2xZ!*p4b)mSyl= zK;A1^7Qx&vq(8gX=*tkh`#|p1^IZ$`B=8AON~iA~@UW67u{&a8p^Y=}B$0JaaFQR)$JLUXl(9WV5d?Mhl@~ur^foGDJEx*`gwdbP)j=_b2WZ_PckGOUQ5i zoF)-lGWYc4J^IGG=E3@V9pP$Xrm?Y>j?bjI5^+q ze~t4!)yHJxd5e0kxxoF-so})<=udot0mLa7NW6kU#4U&=#s~S2!@|i;X7SVp=Q$2b zYH#UBEaItVS1j`9!VGn%W4+wzXt`Cp(h_ENo#5Se&I7}^KO%QE{{1+2H8wjMF5OBV zZ05x~L%*+)!aceUIoISJ_U2`>ZZ7bykoy_G>F{rqSWr^F&8F=6>Wo@qCSluWzg2TSB!tF*u1ofy3fos(>S|M*tMm_E=_Ii#4oBVlp&3}T{`oE_(KU-1uE z$elJL_L9-|2N(Ps9sRWN0!J(R9;5tb&azt=+q|K=-5IzPKIReUECpI3Mr%U_@;W03 z&T3+-H};V;QMtFA;_S3^SPsn_>p!JM?E^3S990Ywv@dtR0 z;6m;-*{CP}luc)K{%VhPWVgYaME~#F*KA2@O$)OLEa~X*-em4ZYT88ZvU3MQ8&3q3 z2ARV*=~vFDy8j5vyhVTn_M%V(%k!f{#|X{RTTV7{`NNN zmCUhME*kG}7pH1&{M-k6v7t#0l*Q-$yGmJW`?;67Fu!Edy084>7V>`HKy8_WvPJm| zCq+pa{(Mp<^Ce}Au22@OmuZxhP*%#gE5TL1sWM68C*mxdjfX z>)nhmE;Ncy%(4o`V|jja5<2Fb9AI=X28n;NF#m~3>u676GLM9}x3H#9aBn;eKh?IV z@C^d5(4w^Ig_p`)S-K+#*o33;sSui_9qVvMhxlu?(_c9_ix}eQn8A0M?Cq=Yi#d~Z*jH6|M(H#& z@vE72uDUs^8r?JVkZ%@yJkdRom7X2!F)QI+CAocFHrnH^*xXiNiX0W@7J4M7>@u-G z)Bt1hL~WV$XX07ze~HWySPl>u+BPK7WyeRxYU}zg6zwo+cr&w==+;MyQOWjXI${c^<{J-ya-7j$uT9fiYjkB9cZAgY-w$^=!P~RocfXub2Jw^wa<ec9n;)iXwY)yv18bhVnv^7EjPK^CgQ_JtYW9!O=K>nr*_SM3i~v2tu(NvCm8ek zP3U0QH>86T)VmfPRESJ|x}VFLq;)tMzxbrK!dqLhU;ZQTv`2n3y8O-8z@zB!%3uZ0 ze&CUPouYxC4R^^n#Ftq;*AI6I-`3y7syJ=x|2JY)J>eJXdD(DRE#DQ(_(m|7f%V#& zHq|C~Cx3@5ZbiP!8Gc2$HsONk574k0d%U(x)e6p?uPJ!|G zSDG7rq+>relZOk^D@EVpe2F-&u?N}TAN8Vd=9o<9_p!GSo0s6SvpsMt=Sl)sXBqgF zy>v*~&tetML%s#yB6~vWfpZy)(!mCm-Mm}I8y0Ur{u5v%e(b{q<12fD?G(H(otTEm z7MC^L;akUd)3GO7iLvV(WUZ8#z>-HSb(m%L!tK~LR%%D*S372Ym%rTACuWzeZke5r zKb#F8%~bR&jWX@+^Mw8UmtW9cM=H6iksmIJ!?jYyNFc5%^Hvn$P(1Dd{(l#KxD~$o z&=k$Zq3@@=&13K_5&lOmFW|XH)!$PqWq0wv#I`ljRxnRELU{{wldL7SIpF_xne(u_ z+uhynm8R0ql_%z+C*O{~T!7v@2mSdr;(Ogne6Mij6ZrvcN#5t+cNt zlx;HZJ-2e7=S}qG3sYr(!CmL6noD3te{P=fU;d5BeY7_eZC>wiNA!{XUPPOHJl~th z?QM!U7{)wWsB0D7f?O%=sj^#W19{rnPIjQ36eWA`1H3-hjlR*AN^D~zwuh1Dxust& z`oA7e$|(D1j3x1(G?u<0?c{zVoYaC}Zx496@K+tb|9|48IRL}|c8(iE+WFt%r9RMB zA@tIkrniOvPg?|Zmjd0Hpu6rgqCtNq{<@d{GiYaR$k_hp>)^-#>cqvBtcJ%cM zqnx-Yj|AE|$(~?UXL)=V{FFawloLl~ccA`!?gl?D8HD2Pcbhc0V zOrxAQEJpc!d?Rl$%8AEf%;R?S3uK<&4sltGekFH;+-K71unV7ZzA(-xCr*n&Z}ZW? z0xq<-~O{%1|EXVG~TlyLSob51H~S+EMnRha!0|*KE#dv zw)jk!-{?YRch&EKxuswBPiw!ucGE($wQ{Z5dQfEhLxa|B+AwJCCef{AomG7M`ArWI zw{(Nqa!~U5DES`nc`b})Pa-m1d%eKE!trLLDfgG`J31#E zIEYL0h_Sx05lb9d|2~Py5`|6@jqHwbUnvj6c4x+(s>fx+h8G%}h5b!j5wTWoRQ9{~ zwLOhDx0J7$Jg*PBrr?J65(7+Zp=pfuyd`{t*hszC-{RIx*3LXZgTbBm)mz*aQ+%a& zBKc|20dt7gxHqT``Thjod)ZI5viI7JEFNE+K1rTMXO!=HiU*_Ozr3WmFYQC0NkgC6 z*QL*d|0PH-di9&ggE04{G}@t!6SQUMR~P;>ApfEN&(b8{qSh8awrrD(X*=^KW7*zq zEIWI_#UHKT3>UxnC*xvoU<{3-+x3DAEA;r;om1Vx<5~di9D#PabjO}? zv(VpXl8;M|2_rt^@uEWqjb$G=5}W8r`t|PgHWPfacLme6(6gcQyVJ7WqLbE^CrsK({cQ`(M-7^sZO|kOU4Vo0b z_djXxCZj!MPH=l7XXM#a2;QfXmxnk#!R;Hc{tEr&bM{l8lkZE$;C^X>`)y!T4O#)mwXZ(J zXytbHg$2-2b|<0k~?-Q`w{ON+-qekCp#06CvnIUVnro?#2E!T-!|;0 zqI;jzu-|e|*EaCxtM1Gg_oBvNLqxu=PpuoE>KoOQkk zK3r^(BY>fra|ik@=9`h6&tflHZ+U8<3;Wi3+pHnPwv)ZLeotyZ&^H$9yMw>^S>T(e zLca+!`i~lX(Q9e1e{;!Sl)q`An zgXV2F<;J%{d;iR{>Gz>W$2HET_sjchoJ}{%`>#2h{uOmtr~lB|w7)!@c=LncNwM%I z&UT+bc44c!z&b)VT;CP9Z&{j}Xv z;O8d%W@^cCg`8e*BYvQbvmUWctz-|QpSRxu%yWVLcK9j06nR_TH#{zm?@F zW)|mp>pt{y;taP&Vhc1`+X_Yh%Ur_fzM9z%KX@{Rckz zsV%qPRebV!^sjh)$|Y|7gTTjpGmdy-1AmXcZef0=u}?AU`3;EoLw>|;Kd~Iud`#uJ zlsJi;^{V+ujn?ObSl}0~2Zw2)^YJHeyeh`wUJZ;Vc=slBwJIjpy?TxQ{F=nZrM!%K zkMtEgaL;g5$bI5_8?(`&mPKSa+;_0IN+;fA5x>2C^ZYg16CCySox~HocxyN~2k+&A zbI#oU0X&@O0T0hHzWLzc{s119G4A=`;r;*~76$kI-bx@8MXlB&%*O_;QKk&ztH#Jp+%iW zq<#HPxdGchf|s7?V^%l%z{eqkJ~%5;^wEBow!!&|!!7&LdE|$6LifeMn8W__AvMoe z!`7WS;o%#c0B ztL_K)uvZD{rF(&8w8u6y}$DTfBSmmle+{8%sL|Jh6VfA>;;Qp!hYgSz*7ALBRl--RXqpgcUABP^1CYiqu2bd37$8>-$^C&M0O{5e()pz##%G?=@T3)Ea5|Mw>>gs zkhAz_A_l8`=&^lRgZYkzgYb&vU1sGcn8LitJg9xso_?;i zcM>OxTnmxW5tWO=xvQ0`xz2{!JvLKx{F?(oA|vu?Pk|^_MskIM0lZtgHs-Rp9(wScV6mZt?^~m*lUk+_ZM(+D)gy zc?3FCKDd;#+7@^-IxRT2mXwPP4gaqEq2Ii%98*$T8S`eeF`sz4lE>sUdzya;9#f8j zhy91NCAw{i@B_>EiX;PvVdT6LpMnZ-XjzGDSz%h{MHWe}2l57yyCAOZ6n^;PpD*7U z-%X`Vd?;n^#E0WXd_Toj!*hV&b?J!h^S%omOvNq&e=+n$=p$HPtq`5PGnV)Wi6ySv ztI^+uPIY^A+2m7{i|uI;{7ToWMHi1#=l3_Uj%v&AYhO!S!TSpF?b3O$4wC^xYZ$uo z)IQK@B=AK+r_s=94Dzo(I^6*LVf(m3;8MP@ecgM|cRG;aUj}S|l6$(ekJj;Sd15=c zzgPIfwCwHJ(5BR;xL!VGI{LBSrcLa%BnQuJJ&fr>Ph$c%U1Li0ywYtb@vxFCqq0b0rI|0DhT|AIEM?iW|5g>F>b11$ez- z8PPQ->;#1B2jBkH6_mM2Z?Ek zuDB9s`^D>Y{qq#-OZa;_`)acm3(wTgR)X8VllJADDgAm!;G9W6gL;Z@RX^yZ(vQqr z9KZ+ek!^SE$y);oxgLwvryoNdcK_CJ`fq4>kS2`mq7AL&vsO-)z#haQqSF(B)P=To%~2WwM&SEYP9 zJW_J3w=zCrxn<`+ZJIL=pR%>c8XI~m`|R_%w0(-TW@g=2{|TMo%YGBR-*I#hlSX7i z+X8aJp-ZU$E&Oi-emRra1^gR;efRA@%wBx54#U+zUOhdmtqxi;2QTUsc5ZfpjzhWzHMAf%+uIktU>wadBh-XQfX+?^8~T5edS;*JV$7QQTgp%uV(BfoM7#vHT;w-W=br#1K} zefP8me;!!-v4J%xYu!6PeRI53cryO#ycav)wbu8%UTERzK-&f_)Pe`$SNp$Z8M>B! zEcaT6TFagP zLszxXwaFA)DR`4y<nXz&#S-ui<- zbe|^%VlVEa?8PBEQ2EeyFFZ}zgW(^Yx>;BmHsNO`D}Dl89?sa3hZXu9;I0LmSo>7; zxpKyaJ+LJV--F1qn=j=!I_?PEdr1y5<=4%Z3b zzXaTukm2Ys4bonrS!*a-CAn&2J$q`j_QJkpHz$+VuaNfcV6A)kPW(wzuxDF=#rXmE zBgl>Hj6pX7rX22>qyx8el_^{L5S>!=ftTq={4JTA>{5A0-(vf9@a`A9tK~efyS}-S zHGk!aIpFa&@OdkEoed4n!j7%epfT1{v?2bf#yB^~I9UTS&J)1Uh)nu5eLRBA8T*;C zs|LfD3P1iE^Ei<@qQ~~8!-}0r)~Wa!zZ>}-cOtSo(FE_n5e19E$9QMBe=6;C3hn{%8zZ{kInmZ3}w-r zxiTNNm9mGhv~7c58*NmWv~fdPpExFX%`Qhz0uNsEkV@;a z`KALt?MHSODtrz0I2i9+^uwK+hT5s7qqTqLY(0(N@}zcusm1M|AvDJQ6=-1*?ZifC zyS?+KWQ(u=>F>dVu?lNHTiBDVKsuU6U>T|o2@?^MpJPf=g)F#x~&CDT#w^?ls*mu=Y6g+|#MWdr-` zDd(qMX_=~BMvrb#{u9jGseStXC{ygAH|qJ?T7cs@;PKAetLFwq7kHNEo_M9fC*@8Y z`-}4q_JPeV!P{cxo2fX_laQdm{Sa1$5tenz_Z{# zmOkWuQX#n1%5ESRSNMj;Oa5`ja~aPb-mm9Z{Fz$QEQ>EMwZgL_STm8VolyU#F9UW{ zIcu^FARhtyFu|$pLj_*9*fGJW;Bzm0#{(VabH8o|u$KUPegO7D18xmE9p!N6Gat66 z;x^AMwoS5<_dw<+$4Wd}?tvjwo7bXuNNkBkoF|CfSv)pwbG<2|a;K>=`#rgfHP*VB zTpHMA8nf5SGvxy<__1R<9SQw%SAW@@*Ik>&E?hWi8!)9hos)9RrjnBI%A`_L|B{m3 zHT(|Xw<>8zxRxCW3?=k4lRLGdkBFQLLl658aP@Xybg_QyRfZ$?h;tpmT{(fj5*P(; z%c#iB$im9l^T#I*iGt41t0O3%XNqk*$@;NdiNAsTllDu|c?$76<^GVOZK3OQ%5S31 z!z$CcACqi~ zEh&QkeTMAw9?#X+9DZNMnsfHmcKb`i-2ODyD73lHmzc;Hu#&Ar@ZmAO_Ny*VlQpfv~iyypMFTr(F@3gw=eo9Y-61Ao%i}J zNyp8xZHcK{?v^@3;hB;bbd%tbx<9z!pYQ^8JLpezU71tjST}r$Oljv{jV0b%c^rOM z1Ri8gMR!byPpUM^w?QYOBU$)P))IGI!Nt{cS<75rMh<)dzw17yF#=1_oIZ4oIki$Z zcuqU#5k>QQA>9@Bz{rgwIzrbos0e>fDJ{f1c znLI5!9BwPPlr=XG+{l`OmgZQkT7%$Aa46@G));ceLKAuN%RRITenoz|;iYP>t&YbV z*^3ITOeo?W4d?12izc4$x9d9v$g1GM+EUoyM6cQ}suDv-0* z)~RrX3&;k}TIJ48#Bgokr7^5!b3~gJ+d~9&6$w9@Io&yFZM@^Bhs-xjJj>lv8{bR( zGWpiHk0fQllI^9UaBD`mO_>yfDiQ zFa3#qiTn|?^Z2C41MO7ZJmB_2!xL|$4SUs7VYi=|s5R^~X@k!3?JTopf~rd_hQq-A z8h>Ixs)^SIE$07Q`6K4lpd++FY?sK}Xg6j=;*I~#|M}CGPLdcNr|}su`ggYz166D# z*jmM}SYs~#01e0-T70app^lEd^2}WBKhHDt^IM*~&*{qZessSsU&Ho!GmvNb=zr7Q zt!a6S4d27iv)XXy#9oYvo2c~jU zE$l?b(DX?`~Jx6WOG#8 zrvei^roXRen2M0)=Yc7eep36G=8O&Wosxdn<4*KdX`>7{2C}wN)e{O+vgR z!P=r+l8R;$yaez@K&{$s0`$9`5Uq{%MyVzcZ3lwZ2+CG=mw>I!jN*Eunzk-M+ASB` zineyUZ6yKQPKdPyxy&GOe$UtEGoNHK1go~Yzd!QG%zV!0ocDR3_j%vX`=m|0;#ecN zNFGzTNFGzT7>Qi|4`iHVoBr#&<}R2!=wEA+Cm4YJ0D4rH^r*!~{ii?kt?_?B$J!09 z^S@!vow(c{yMFFxTE@rC$#n}@Uk~`xx(df}`dP*qRo#QNomek?!nPMu>E{r9xC_6v z_Vfnm7yD?+V00!uGxgouPtxap?)|i;{dkiXJJVu6_|We%(C>y} zBMryaB!_*bmz>(6*eGf6XfNmNJz@Fxzx&?!k|*|!FDb|FXhSB&3$Vf9i=Erjj>qTW z)4oZ0CPVD=yoD2tQ7#`Xf{#j;`=E7n`G>??tpns5r5U3#W-Tb01Am$cPYlC@_A{Pr z=^WOGdAx4-i>@zysD$66j47UM>)VVQ)tCiFY&PvxmqPv?#E&E+!BblqFS7LtWFr2q$@%!3?`GZf+jeX##pB+@@7VUPpP>m+Cxri8*!{^{c0f*N6sH zFGS}^q!-1?{O#G)k&$2N5qzjYe7@7+P0_`7mMA}D1-7elMCfn+3iR|6Y%;Zx4G|n- z#H5qRZXHU0PCXgtY()5Y+8VVZM5^%> zzx+yQ{0eA&4m3X-+Mk6isKgiG($z@CVQ@WSbl2TQdy#42v23bt5qOHBgT*v&_YQd<_K)5dURcvn0br_S+`=rY zIg|NoO?#IPUfU-=(*s|>r4bJ_;ZbJTIRD(OVT|O3QU32o_V;Kn^w1#&An_z8Y&uq7SELq_W@Ut_?$KvX1JHd^7 zEN>=!EZSqDRkx2Id_5=OW5Ew|K zhXeR0=MO*KU-AI$wiKa5pm$-HmsYnM^=JCZkF_#hM!LUr4$m9(oc1Rhe}97+vXD33 z)Dkj)v38;XefHC_3+J_rjr`b+Z{URtd`(RI#&SegTS&0ez^yDJiuGY`?duyJryr<@Q z%QT;p42vYqUC)X*L$`&s<-K;dy?(RI1<$qfdj`IgVsswuLDk(E&KeWX*=?O`gj`%K zOsuu^>s8Ab7$$RKU z${Rca-#=>mbFDCb`MxuL5q#JdbM%eSJPqPTWp75X6WXXV*a?repN-Np#m94yvDwJj zEadl?u-g`1Hcj;yyC?ZW%@y1iU>fH)R~V%$it&}Bx2qq;*&6Xrt3NB9x?UQGHNyvf z!d=|GOD|;(moS%^$UbsEnjD_k4Bt?GP-;$)>!(hhS6kER(Y8E%o}5=I#NVYeOP$P5 zalW^flShG{zmPb}AOlrErs ze(PtTFEd>84TF@9^wQi_#!J#`ETizHyhPsN#Lsn(lny&1R5fzeW``~ z^&o!@>;fx1tP+{d-p)9)BGkbC>7wub&_?96gkO#|UBmr=r?PI%XW$^ zOSaf5&IzrezL?&5=hLe5!+Vd54#*GNX0&{XduSz-UD^wtU2fA}<9L7UUyp&uiT>Uo z{RXuczUvQ7Gp$|HK^4Cbtoz36+mHO4W9mR;7v1h7iY?-^V54Lb#Q7Y8odn<8%uhZR)UAi z!N+CnrAzT8TyhI|a&qsRyu^M^W>Y!mfw{}9-Cq0rB75zP(T!DWn1|+AnAi(J)t&+; zZ4DQOwh&8cUBo#>WM&0;R1D}C_;L0U@v{WHk(-E5#M47@&fyU!dB%uc-b=ipH?H$; zapDH(_GR+RK3NgUd-gHTIpdFow%X9ezK_0v-Bfj`(OA<6y+onG2ys`PA(8)HHK=l{ zjRPL`(M$hsl>Wvuw)wxQGdv1ByESUGzSc~mWt@HfVqCbIISJN6Z~1dCZ8Srz=#}G) zvBX`S+E{zBFS=<{<~2%fKJ!3DY}O*=5;3th4>Zv9%8b_K%V&@e7~`IwSYEn+Q|l1V zraWKAMXlq}jTiWrwCy#PZ24d)wGBI7Z@bKh-EprOYPi=Ex*eV0q3IKKeplbWma%ou zbn2c-UK{I2@D}{OHRw+|Z<5MyQ};~`I@M|P>$YztAMxlvaE!ELX_*?sGsqanZ~Xx_ z4feqxGAlmQxBCb-eT+JvhpF><6dCp&GHkET6|&aybMNO&QZnBRHI0jgylK0q;1hkp zJ2L9SFPisq$3^~S_s`hC@A4^Z6zxf-o#l@?zl{w`4q4lfcgTZQ+T}x6qOF&f8EZB) zT@%eGC+WR;-cYOOoHI6fzVz>Txx=rLFTV}hrM)lTg7VSwbH`rQmD7097{0f9uZczq z(rg;dxZ7CMI`k`{$RcmcVm`;YQz_zIJ3exk(Qf?FxV5XfVpHCPysM0V>AcD)+;B?7 zXZ+QCwxTz0!ou@(*HNTQ{QA-HkyEjYE;2@2zVYWr78&h|^XXiE1UzK(Uv!9!i**id zyvQ=nJ|lu1?$W9K48OxLXugQ~7cL1zvyj1^z&IP26iaWzevsc?{I-j{sOX-#p(T5% ziH7~=+SYkLjjyd;4*&eRe>Js+CZ`b-S%&XQ{glv;{&z9YAMx2gl5=O^!RO7P_K>y9 zKC`{g!&%m>P~dFh3DudQ$a>)2$2;Ei7A_#Mc}gd7&8k@1nvcQ@$G{7PhfZ)%bZTp~WUka3iQ8JWWxJ`XGf#A&W4 z9-W8cBQ{%;>nN1KT{f(7$B+Lip4AOZ)lLq-#SxjGsr`$mLO$!;JQu;4Pw~ zT@4P8NL_MW;OpR!J`_e|IomBH1Q~%5gTLks4?Mbz>B{l z93W5R+Q-e1pEhGSCnjIJllzC=M;qG?KJ}7NrF4|**#k}RrRT|+%H{kqbMg=MZ0cZ6 ze&~2RbE<^rR>vK_RCzhEyXvgaG2tU=?I-Gfb>#CH(G7jYd7jE&6eq5H7a#Xbe=Hq1 zZiSZw&b}lR{`6Nuf5s2~lu0fbx`=3C|EIIr`#GTq>m9jZEOlbjTRMm0M<`D_9?{yZ zHru6}N7kF|@9~WI0`;!aTe`i-1NLm^Pl4g5C&D1u7lE@#9nb3+D`g&k9SNN#;6DVf zPMt$%;=R=0$y2T(mon3(aW6EEA1T%e-_YLCSsdvSZ8}?g5%Z@lIl5IELvp9>V)UsE z%EvKwDc+io?8h0fOMWD^(YwTVdOGm^&H{IgRsSBkzH|fiBc76*$X7mv=WR_loS-)< zE+-xj|7=Osp^ts39RJaO*=KWJiKeZ2iNDl(O99rm0sRhM6Qgdgt+#bU$I_kmA?sG- zuamy4JQtnMOU02RUsCFUHF?l6(pl>v=paMUMbO8V`mjwr=#E2)UpPGe2s~cCTgmbm zW7MSiH+2p*H@S5g#g~U}jIXV{B>@-k*l-mLCg?_(ut&FMw><{|GUq|Y9s%fRpbz);4J7Av^apaTzTb3=K+FnV>Cw_C~(@1om?<40o zc;`mDu+v+SRjRcto^#8c$eeE?bDGSSpYgeC*zC}M+q`^i_&ka9 zTk&@v+cvx=EF_=C<5^P6Z*w&$+T?-ev#)KX}duk@%Reu3pVZfQ;1?Q04 z+?4&H`=lFul(1$o;w9Y$87-6YxmR={HA9~?LPh-E%^r|UjZ;%{0sB^S?qJT>G2cGi zCg$5pd~gRmK(g1xX&rN)9*x^L75@C}Ut$h6ZVl>UxwwtcPBnzlj;FLUp8NY=erJwT z=8rR`MINk(cH<+J&V1UlYaff?XD%Z4okI*fy3Vw5=LdIjkLN%)c+oabsH|4kJZnxxlk4d6o z#Z-j9_z;hMhotDX3pn+g@c!K+X>0Z}#&Yo91&lS|^;O=#i#S4j4sbDM;401>(6$dR zw3F{77kh{GpTCm!o#0jVC|i5t^S{COn~=RO{yqo(I^T{@iEJC{?4|b|97X;_U5x|Z znz0!k@+pSPkxL*gxjkH-lj6BNs$rE8tZr|nfZS1A*Md?o8|B>k1 zOW)>7yYFjxPkk>UR@+Ith$}k#D0MHYVMEFUVT1}@3fOP$$egw=#v`R zFLeH?JSLtbzl?G(Wgjd6*N4BP+&yoCC*_3muzNNBZ^;i%ZhM{D_L#9Q#&GyoPB=?# z>G!piwsX`rPQR>$+craO!T(QE+WOTNJ!dbvM;G{2`#Acv@c9GUzRVor+mx$25;;eH z?$Xiti_+|vgya>nDygo+>Bte&J}ag8q+huk+h&#)vgbPQV$Y)^um`Zi!j=pML|KTLY111Ze>(czsm{Y=cHh`~hWk2*Ttzw>Y(RZ%lT&j~hD;id=tMZjo z&uS?)b0QulL?ca3PNeYgfolJFXO>3r0UoI)-&{H$yq5aH$I79bZ2Ax2`?Td4zR&IC zf=hd$ER&tcJ|Jyj@9FLPbiyX4Yw%5QYRqVy?S3Hy$rsk21yL)Zief|WQC>=C8 zx6{r&p6TH4T;UHII`*Yg(2-6BhpDvW)R1#^pn)_+?spO{pG@Gg5?sc>W#KSziQReT zF#oZw=q1|@<0k`uLGUM^oA4F{R|a+U4)YA!3sW=4f&W((v7&PM3yqlS2OEaDDL2m` z7n3?ZB}Tg~_m??sa?dnFM&o_d0J?uh?i3@Yx*R@yXZRUan`yIrgraW?`nKg6^!+&F zi6_0Zh@4t`O|ru;z8haFSYGFu-jTm4PVP9qvY0(@V3N&k0aJt={185|?yr_deI(O& zH~7iPrQXO;BbIyI6UrTF5NF_Q9I&orAI=8W*}N-X)XoRZXcluRF7wAGHF0*A*xWnh zY+UKsQ3t*=uJQ+3vj-mVN4JopsPC=BTO`NTZZ_vnSB29;;iem-!Ek!W`l>$~ylqHy z#eZs_W`}=>O?PLN5emNL_}f;(Kcqh>$2ov4D*RaBC*p}jBZjd#=y7l(p624`Cap&U zekYw1K!$$PULW?!Noa2m&l9=t1NNf9IH zW2`xZ-~JFX=rH?gD!xbg9oG{-^pLZ#9(%rmc^qmmN{im=Gb;UdFjke>m9Mre-Pt>HXOgBbLL|g&-ZZOh}(x72S1nJ#Sa?AZ{Y_)`0as1 zPE14l-mOo)oOXlZyc}2!c$;Dv$@^aCUyk2Oy}QKT{}k^}_n|7$8UF z8`EA=EJgc9HhZMW4_`zVV-Lu`yB`_h(wimu3SFrV_e<Kltr5>EKh)Q#L2&E?>HywJF|`M4O#F*V&C^8d0v#0{HhYi3vL63m5(ICpY#a z9o40SXTNT&S(_M3u_Jq4$A?bSJr{fV-L273Z5X8w@SWHIu?^Kbc4{-E*7eX_s3kf@ zxqOdsert2~=$7oX`>K>bl5HAG>^d976tN9r&Agpx8uDzM&=6cy$fBr;Wl)Q*Jf`jYw;a0 zUh*f-lG$)@2b(Gw}_=r+n#7OspuMd~R@7WEMy5G155K!}AmRqkrLeuv)9ZLHJ@8 z@PC~4KH4Y4yMg;E6jQnl9&N+(etdH$^z{UPih&MR8^PU&if{QA_)BZC$8r}KrPH7z zUpn@nSsg9T_qSJLYvvOtk*!gP>?kJhxEj81Dd(Jav#}eCo?ZEv?X#uMrs{s!ds*wY z4ZW{`SXV;#lMk$jy<3C5X|)O089g1;SFrJnU+Y8h%=7s^WAi-VJIv=h;HPcLYlzHg z>%(or#$+G<=X>Z{jOY6&KQ%9NdODa3d;VeV7h*cQBhbTu=d69FhkT?Y+1@w=-HYFR z_xMYT;YE`9E)ELi=iph;Yiz!YaiaWXkH*UjcnMb1AkH-gvZ7s0vVT}$iua3s= zEemCR(Em=Lckp*s^3L15W5dk34DqC`>{Zct1NkqvgO>rceG*xGH~ly?*B>ud4Fxx5 zti0^E@R!KnxCU5s4kEry@pKD)8K2zJ4D15=<*aaEKwX8?ZQM!kKh%WZjX#^8bKbvF z{CrPRylzrOXe#x7=A%O>uHeeYy{wVsqX%9q`KX+|Y(lI!9X4mPZr!|Bx6sKQs9R{* zx_>2l^XWWuW#|Y#OK!U7CU0ZD+mmFb`jh++ZGND2@StPUr)`_ir(|VKLRQK)i2%PX zBlaZp7i8x3yeGS&c92>Sy-R0BthMteP7k?PP zN6wj*^t)rT9$JdMkFUtKN#)1Ejzm6$lJD!dn7Ov06Q8ig&}e+ykU@Qd-39-3v*cVO z@e}m3O+n5XDel+lg&!vDHc2M@AWz| z5F4Y1dzXG|hK`~$k%!QW;uh;JFnT}Cu2yW{wWa%Q>x8X6$I<<^;&c2j_V~*OiF1AF zhv?KN<|W2wX6#mM>cl;-^SbFRTF2-Qfh)0&kw2NX&nU8p99*8oJ}n%4V}{nyvUM2N zkZ01DE_2tg$ygG!*U*Fg3ch&05ncKSzem2uol56;LW-BP+PF10wF$Qcj9EZGwBHLH zCNlc9`^ov53D3cwXPnwtbsO`NPIz>V?#O{J@uzXG2A`4Y4*R@JH#tc~&pz6Brt9!c z*N4z6p0O63+YX^xTId>!}QI`+O3bBJJ{cP=to-hUXqwxZuhVACaf zyB}S}l59fu%C0&CT;zV;2+5vIw(D;PUd?MeHiYtSw*$9pLwp)s$bNWqc zJHZRF^LQ2)FVxrxAA#GKZMUHpjnn(!HbTx)=Q$^dm3&%sHO}@q-a`A%ANJcAwS1S2 zk-Dd3*Fj_Kss22}XmvTZ-B0XTR5tIO!C%}v1mDbPejfv!X2N5$;I-7uUYZl?W&LGm zD8_Okox*RR6Pr%Y!RAoSAK7@``v=mbXo(oh2G&)4<@?@ocZ(+5&Y2!+o$1-##q(bF ztj$-xcZtncM3Z^sacI96O7~&E8^ao_etE#5JH@HWzGb*}|1;LIPuVuVLtp2$D7IYr zEi;7SNfgoJog;;-PHHl5}zL0;?0S^=p9?K0l41hH}R1d zo34+(DBt|VejPUe-TZ2ZF?}=bwxcJR=(0NNcp32Ndp5e>p0m;QSObG+ZL}-C&rm$1 zB-C`nOSz_To8X$p{h;r2{;-X4Bk#?Ow%uk#kLH$_#^ao~BrhuXRtI~Vb7yz-u6@fWee|l$wcq`urgoB<-OWW{!=$Y*UCr+bWGdHGHBm0(TQ`zT@=#DEksvWT|{DU6) zR9|W@A5o|G$H+16L15m((H8uCm$8Pc@dtbnxWsSP@cCYJhammv9lL!2esADa%nm(% zjbL+NrC;>d;B~Y+ukoTsggf|J7xRsphFwcCi0&#F{)usp($6F0g{dCWa@Ih)R4#TL zx>Rf;_Q85;Yb;}2H%9imzobzoq49O)!~B}Ql_#n?7kZwSnA=+3(b=j@+H4-sCM*0t zbBZvg^8E7{uNyoRa@Ku6YdhOS{!$|)pFa=z7=Na%+n0?q<{&xWnZR!$>y*oxnnv{_ zd@u8~G@`$+u=RK1=)nI3edt|1S9|&Urq5;0H#&1(NSojnoH_5s-%X8}*fQRCWPuYG zT%VY80q?3G;Wn=b`UAiD(3{pQBY}?&c(2A2J~|R(ucc3$R-hyO7k%X#roE@{rHyDi zIfl4|-}W-5)?PlXIB_Gbqxu%jhIww!hj|E({>1!?=vTPhNxNh`u1mBR9A&`3x%-^c z_$}^D!*?+pUONJw%l$J;shPLU>)M>J_1E&FzBv3dg8hVRf`%KDBH7*z3&6YMndb?%Cws-VJ&|v+ zJqzjY5M#Z7ZGRY_Wb(YWLu;DXL~wk#&sNTA-i%LPdmz~!PKN1e7bcyP7&;K9&khEY zYY#V-hXj*bpXLbtsHQl)`?chHh)#UyYS!&4*6&JWB|c)W6Ce5x?>xm{e5UlcES_i6 z4&80(Sn{uju)ZGdKelCLU!S3-3DNFt>><|ZFgWh^84u;zwqnx^$xfeZPtZp4<^1k` zo6xZ*oSc67H6Hw$YZG#J4f#sa-He3pW{rZ@StHw4L%vB~jBj$}g=|=z(atrp?7i{g zN#-s+LH4oMxOeI2nA`Qt@jCqE*WxF4^n^wK%kw`NIr;pr`WW-iPsse#x!?VF%>7K> zjaL5%oS5K-^9V~h`^LJsXEWsA);W|%xxemB{M$JR|8`D_e|z(X`nR*g6|CK?%ZxQ@ zH^9F=*Ep~pzKVbQq5R{He|sY`>|)xkM0UD<@Y%@FGHSF~*!r}6CV#$YYfN|{{a!#@ zw_VvN<3J^RMLfW#+@anwd|u=qaZa#kw?FFJZA5K<__dBdJb8?a-uTqAamE4kb7zb% zI%Dh=-KC7tO`BPYRgN-BeP0Y#b+4pWo5{V~$V4m4^|d=Wb-LdxTK!|&ZrVXTW}lB* z4u5{IiuO*7V1&~?c@Bl-St)0s_9^Zt`|{Jb`6e4L`L24fs{59w`o}r_#P6Lrmb0q>>vh%j)8xUyq)#%qDS~H8~PD^uXpmd$CG923ITMRTr;cZ75Zwx&%0n@ z0=LL0>2JILoVG^sAIV1I;4jo9nAO@DTam`sMZUnvd{1rnVdMV(V~#uSU2^pqHz#G>F)8Do zN;~BNbQcV#=F$jkrIY0YIJuBne&XQQkTcHyAL!3=^#66NSL1T!hAcxTRc?rUEV`$w zX-4olCr3m%RLb+VV?SG$QR=4gqRxWGi{wKzwt9}o>&zv}@kZ}$C!W;u7W}OPy}=$Y z$p<)Cd|5dT-33=rU&E1OLDmGjxPC=3@wI0kvHeDhhr6+mZrwS?7{kePRLoXq>h%2* z>@J;4I5rPi1K*QxEOIq|zU%SxNq!YjXX=mGZhO)~(NCj~TwwGbn~u!8kgUGg$JUny(M?2SC+JJAZi1dK-6Yw6_f> ziqc>jxj9Wgozcm;`6HKej{0ioW+rx@=w=RdGY`7qyL{oNF;*veRmxj9g1v@swD%~! zuI@?D4mSPm=%wNnf}@e~&K4Zx(l?5J&@qGHPrg;{tp(6eE4lwQ&`%q={{iSH0zA-! zU2jNzs76f|eI)#Oefpa0cN4ywnk!)MpPA17naF$izaDaZVQ;>Zmg&aBlYf8h9mUDF zw!P1DYgKvF#16|da^F14_rdwvdA4sYbL@$+*-FL7>@f>l0?3WPy^g;sm0t$IjpBU5 zspF%{3_l4Rsqa=&pP&Z0@JpVh_VK>~+YgxYV6ffyA+T*qfh|NIaj(wv4Mj(=(8GMl zW#V~DhoOgg@SmPY%l}LtN0FPI8Uje3In5;U~~o#9*gE*RjRwJK^t-d~f}D#iU<) z!U%PLb!Ieln;C7t*C>5)qS21*pB^A4-ih7+PrRcXo{t)H7W{+lhw=e-!5nZt8{E%g z4Jwh1mjkO)ujBK)uUdsCt5N9Q?IL+19V(D~etG@(HhvX0kzqk$TcHg zAo-X(0bdmP5w5>kYiRwIcMZ=bqvQS=k@)&5>t>_9p4-Ts(H6e`9n`HX zCSO4{h|YzNH6W{(lczD9-RmDC+5roO?XOn*tWIjpXeb4O`D|1s8lcRBfl7qfPvE%xyy(G`2L-R2En z#pmKRH+60!pBK7{P*bSzo{rKxuta~Y=&_!#ei`fN@&HV4w9(=+gi7s3~2(f@vE zZx~}=!C!+p+#dT(#x6>XEk5FoUBuYkj9mmxcPEYA&DhukdH%2RnfT_kz*YX3&SYI+n$(ik-3a$Q7r7kYCudUf z()^+8{xiPzZT4FyZMq7aJe_^mER*2f+K2MV>Y09fo!@lt1%93Z`-**vjCXjM%fDP$ zpaJBkW#{~z#{A_E0WNf4==J^hl*-Xgy(;VQC9%-ATV4-^Uq0A<>{kbi_QwxG59FYc zTgrKc8s=j(dRj)%hu&2`JJ^4t`7T{{uvqom__Qj~>vBzRPd8_joI8aaetFkb|NeT; z8!sL_`m;9=UjOisgLfL9U7A1m#0Q@pzKed_-~~bUP;K!gp_|XUG-TIsH-|NUd794R zjc6`LpxU&-IKF!!DrEj4d2e;p5e`_?tgXl>`&#a zYv%WNN^{no!oA)Pnx`~><>Kh;;;RlGw8}HqeFa-O*n7THc{Wi0Aph{!7{hk$ zkiq<3Iuj^4o{t=q52S(h>ExbS?(yhp<$pjm3{_VgUvJR4+kc}CI!5U%+U$oOd&o)OPYk|x#E!>`Hf5}% zX7}dV+!I;LekkVsbD_B&p8uKq5_-6+VU_7GeabXSZ#IVQf(CnTX7AlX8_Boz;9)EI zt&4eH#<(V*Kg~G4?xhbe#vW?s?;ZN`-`}ul;R6kuDjq1?RC9mXrrFDkmOB1cfrETx zoA8;3O`Ds3&p`uyV)%i39$v{fbs9_iS~#TkN2}@Y-OAsF`;Frr{1xR>r^kMOc+WEC ztlFP-wAC8S0_Oq#GN_d~NwtEh6I{%n*1LnY)bCS0BWec2$H0l^p>xu8#g~#NAUe-z zzU6#Bdwh7a^~m3D0;Ul=H@$q#&Q14>TDR%tD|Z&XJZooR{weF)@<*-fHizvj z;`d4BsXJ@=U7z*58Q!v(`4tt7S_iIna!>L)pXu9K2p`jDi~k1yQ+<_oj&hTiT*ra6 zBNdkzD34E%st&v@kDHLkjXy;3 zE4{y$-x9fs1LU^}Jtx->bZdyX@GCYX87{t5+$|ZN<;ZY!gJA5V$?#y344*(BoxH1l zeuNCq=lfthvCpodc8Z;Ajhy11+4e$ovH<6UBlrjv=gmHF&l?u|sp~Sw|KWizk0*C( z^F-B|bNtAXYniooMAuI=p1p6p|5zS-ajnN6l8@F;&aH)R6VcZ29}~ zaa{BH1o(I`g0C2Stoj@HxbGzRxR>{(<8>F9x1`q9lip&CRL(}@MVW>8p+3Vpp6l=R zP4QFn$P9n3%khQDUwnI_-q9Tc>K*mV=FO?R@5?{0@s+K}Qn)O!-GlYBEaK+{^Pg zvN6Jb@I6ow%}C_e7h!XWCY4`rVHfSEfANWK?tBThVpl02lD%C^J*A>rYH^*zGknFB zH9UKRe9r>zctEb^fCHUtDO64tbRA;v)Y|biE1Y54dF%%+;Vjq&<`I6-k1B20-&@%u z*&|e2GrMO2`p#L*OZ#>_@VfnU6T8xVA8MCtX4_|_*JK!@q&pvFd;|Ekw|pbuS$vm1 zkf*af4T(DXPhP@(uWN0;c?I*;e)#5}@Xzqv_AQ>{kA1-RHsF#ykd`>JCYbVxRVC+G zN2~Fl;JecP$xGDuI$0j8TjNgkBb0CT!3*)VTB9IvkGV3hIlzDPq-Q(Pu>$00cEDRq z_==Z(q&&Xy#3{ayP3PQ!rCPGt;l<#kx^7yD=8m5tHm$%2E#ZIg6n{JNdlYrA+MP2) zIpH(D&pFNHu@ZCB8U#}2s&hp0hv@eRcNGXOtqnP@uRHvspN5ljC4=1m#kg~Wqw(J) z;D6(xe)y*yi*NoV{8xVMlilf}JWBCl#mL}+N#}k~i^diq$M`#hEFxz0jFU&Xd`6j< zcpkcud{ZUF1@2ZJ75h_jS6!^!p^5w^-gWfDv34x(-9O^{!9TZLctvh?fuAgJs@=mX)A*fQC-^6l>#|s3AM0$fCsyV)R_&h$UqWXngtuhiUy;w& zv8~as+4}?Hdc>`25^*cVZB*k}yt-3$j)_0mb&iQa;M29wfA=nRYOlEURt7&urWz~6 zvVh6B2pVeTE(Lf{wwbl75P0Ovmu|EV`jqXa9B|#cpc<7U*dIasjH0h#g;5%jjstxq z>xYbK^B6Zy(aIX*0}12v={vJ0xCZ7mxM#G@@bi1(Yg1z|KcSD*vzhLg?szBXWp1rb zBmC;~%J)2xkF~?7$ssxw|M^J#=%bL?quCE*h-GAwOO$TsiXEP5 zwxCZ>_U$pD=8ZAq7CSW!31omfGw{0;0f?K^%Kdf=!5OMz=pkh0&D^N2rg`U!IKNyi`>~# z0&L1V+`_(AOy2!`0iPq%kL>fzW5dINU3QK1qqWphY^42IBL{zqvoDpW_$Y01y+)}m zgRlpR7GV>YljjJ1c1}`!o0?_t(qN*NVq>C~Vq>C~VsNjqWPPHRV(>B}W}jzRzqPb2 zEjn#t3Oo_;_P5}88e6jFm7t!((6?V(#CI!y(0T3Rh~$n z{};v^w0*+9c}R1Cr@C1F>H@ZI(Y$KLqL-+(WW0^b*T?~~dY7r*t7wlf?X1Dm_g z#^@rq^9b^a7@}=sw6TYBY#Spdd?9nuK8z^Gw9m%KPS_Y(;T=5Rvv_Jrkl5{tFF1J{ z7UwQkqHig0Lw6s!_J!_+`#APR{P&K15wT-d{bwGN^>Nt^!-3&s`Z_fH59i?)XBbj> z_<^@i-Z0mA)nfMWEu%WT5*F3|!srviLbnUn|W4G*KdoH79 z#w_}4Bj3V0bEaK4!oXIH?&F*fxbTg*q|cW#E#b>itXVm7k)}lbv*(T2_QY9P!`nP# z%aF7K1)bgrSiu@aUIZ8T*CQ`tM~~NM{_xTGMwIu#n(BA5a~YpEmvlcqEL4bn7x^mo z-6HhbF(zkjl}d8mk*AS(@H^F1YyaD}Jc>|@)VFv$~@cpy=m!8q)j>&u?-!nrI#_M9diL_Cztpe%li|n)U z(~ta`JD8bUu$zoGFWXQ*NK4F$+Er0p>e58g?zH0J%|jkQ=oG z8k3Kc_(Lp5W06bMt#;&4`KTFHo7B$MYZ+?uagyyL>IPQtBosUywa^^7FsE zcLsm<{-f55%geU!?ceXI&$hDO;*syrW-!0H&IujHwz2(OBk|8*cPMV4y_`dx5Y-gZ z_Zs%)FR9bbJ7(LLhpd9&a z!1&5Z;2iTge`q`Lv9Z0EthR=7uDAE=Z|tBRisCY|OMMpS3CX)HGRN$i)H`!^1Gzwf zWx>bFsIlTRK6p5XIPkBDFGoK3`WuCovAOfzufMSg7{g~1I~+N-IpZuN6oPLMtL&?B zv-(DSCnIy3GqAOE7ic4~qFtv{M~98bjqYOohou{_J@6krQ|`~|>-?c0b@*1mkGAle z9~?rfHV*Y&e!&;O;pPE2JaSSTUY)?9d@IRtc4Pbh54d&5de8P@46L^!eBNr)#fkBm z2k5uO7hmhjh(g`N+-2XxyuFRS^gZ|V%kcX+HJI{rw{YiYS{BSO>^jE_E1dt6o8xO! z+YWZdVG8fB+54H6;0JrQZ->^V^1bbh%VO3t=N=~gZn3xBaq>%@{wJ0>|8p3}ZLd5# z8MIZ7T-CAm_@wN8XnIgt8f7a0Z?DHlT`^uswl{FH-ICp`}Pt-8fd;iK> z1jug_pAE1l)SvF;>Z@0UpTCP3NB%?OTPooB*D{X!SVKL)?MG(X?d8)ml*6xmNUTA9 z$ZwvI5mkELcg}^gHf``3yU)+a-vj)#Eu&xQuv36dJRlXn-B&(XHEZa1>@zW;EBO5G zrW4M@9Hy@+9}FD;S1WaRZMa4`a0!;d`v0VOAN2|Ge(}e|`_Yn*hW8C8!+U-b-mh`+ zJ`ow*MZX`0cOP*wUp{A-6z7_2wiKbW_)PQ{(ac%sL(t6sA9#NrxIYQKcXoX|djIxE z#5?hz!Rb9K3Ga71cz=bqO$oebo`l}*z3VfU?D+EoRV$uOI{!7UALsK^XuacqnbxoS zgg9^dm^h#D5pllkWH>)P3Fm7ZoGUNk?7xNcr&qlm`qWbgXMOo+2WJg^`(PiPBa>Y^ z|D-riwFfQE!UW)%k|&BgDmG;iC(-#=her+c-4()ft-~|z7?z$lav!Gde;JH#NlvJi zk>rNPi6q96e%Ch+d|GiAH@~HS%z@|SQpYtI_lWKHSB$0q&O~EuW7U#Qr=BJ6{M9O- zZ0EuvZ>QPcRTH=AjA%sn5ABAJP(N4p1od#+IbYQhL6^2=!b{W}T4e0rhhG2oOvQ)B zgsHcS4(Y^M-5dd1rdLend1m;FjN5sfy4LDD=H#JRtBhzHYfgOrp(5@(3Z89-*2AkB zFOjb>GkhcONQZHKu_k!WGjiXQpY&w*=b@DQ*@Rze!Y;9O-UZybgq~&@M$F3eFR{!~ zqpeJ1$ymRsT>Ws#scBK=DH$D&GmH+-#dR#7kvqd^$NqfC*j&+ToN4r0ozzWkO%K_> zwe#B)qc`^}M*9W8tok&8vu72N16Q}4I7ID4f9$h26h;HH?KtzOz-*(vVLA7?%70-_ zvSA5)V1@YpnstMts|WDc+t0Uh-Vs>Q|6eQmw+g#vc_H7s!FAz?jA-o_qzB}L-vLL8 z)oBfMpIl2teC<}~RNtq~jIV9w{zu(|8ss-@`}SJKSH5>`l?VF;`hHk3cXA&6!nx>q z@L0Kr{5mHFHa6^K%_{f6qnVfPqZ};8=$`iy?HDzlmOny2%3+*#y_2KmM>in`-5vm* z+s7MAYQ8}17aPysU*n*A+B@_vJ)>Vgbz^;h9>QI>%wsU0c``n=);{v0jnB)ra_V=W z2ffh7yRut?{8t?##XohIfX1}(pP&`3M_~D-A-hHhJirUSy+55R?kyZ$(H zDlhrpgy}8DPlf3>DKOo!CoR+V;jk8WUQAt8c#r$Nh41kugR>HxJ7zy~Hk4Se^W4+u zZ{Ln>UU*GKY0*RWd7}C=M^${S5k8eiUxoYzUuf5Jctojf z`)}BB7>l;PL|+l_it$z>`4{YI_ir|^x^{=bne!`{i;X|_fxCZQyCeDcLe|>tDt6JwKU_-E-$gmOpk1^K;vEDR=cT_yzLd_SyJ7MqAB| zL$7IhI|rS^I)1Bw_(KJh7*V5F7c(*BWd!s?9DN_3(JyEFzM{) z_-F%maRd0>S|+HnZh7obxW_K8#=U|LBco%MsN&c-YhOKL3}l z_q0U$KlVuvdHep-+J> z|4v`|FL+v1H@@$Che!9K_J4nsJHnE`|F@?_zNWtRZ*z`0`TJk-WhH;V&(o5Y{QYiE z%P8oH+6(r1gRR`n`a_dib@v$cg{)m0IcPrU|A_e02x`dI)t1a6e=4K*k~em6e|>H_ zwTj7i)}640v>QvVeXe&x&kktqMfwSxS~A__yprG>%{YIAS6f4n>0VE1Htmz&2}~tt z`zC5Wz~iD@jMDcP;m^3tIM793nsXsGhDHAq?3na~@R#UMcV`8ONxN$nEueM_xkGxd zlYYv*%K6uQCu74sv=4Z*cb#^r(b7PiIi92S2lGC37Yz=E7^|C_Gsn>H$Cb+)-xd7NbW06F5iXti-0de-2G&<9wA3DQnr=*BlwRWwUGP4bYIFcPZsA1vcmA0 zHDjsiT{6*lLF*HtE=Ysds#^s;HRO2e8}T z9V15bCw*s}$bP=#uieM@`J$tAr(el|!o=rgjM3NUx!!@aZ+QpOexA3bioWpE&D~5c zed?aRkTJ+dYcE7k(E3gIO?+*Keb&^ebD*`nl=uHiyFi2gpy2qZdqsONbswAtZ8xxY z3Qq?=?1Lb5_dfgJIO{x&_AdP!qJMI?zQ@?Qb58zYi+hgWAC&edK>MeK&t*-6(EbEy zKXqN#53(0j9|8J25P_e~Vo&6o+%*radZ6KL=-H`sd#do5kXav>ZV&w~>D`Y(w}0fl zWV-z)U=ZE9yui)ZxL&_QPr>1wgNL5x@qGdK%%vVoq0O%c&aXLjekI8HmNFZHM-BR;aHHd&-CGOkS_+Q|EJux8?C8!L@xZhwpevxSNGNT00+%`A1@EhG%REQzQ!}4+oW95$>34-BW?SJIH7!2I-6V0 zKJHz5;`!s_x6UGW**W_y+WQ@UgXwXK4=E>XBYaLawrhuwhkjrqd;0b-D7G^uOmgsM z#c0T3?28LMN1twv$nC^*(4UYUy06x$f$axo-5a6#45kNl@P1jEabON-!h8n2nOJ6s zoR1=-<8|vE|HFBv6_#FFW*G+xI9phBx-q)QciL#qE!9iT`qHSKo<4qbwdtu}!1s(% z{)Y>*{0~=-`r7lk{KnqrOkMSJvr_vTZX9@=`f8SeOdT;{blEa*y~+HO`x9@@usG{X z{|)qI^4&tGuzq&?8%2y!nDw>iE3^EJlUJ`gj=oa=+_Ln%$9PkqB zu*V#A`slK)-uh)JV+w}kF$Kf0;r`N_fnmK1gAW)se>nle?a$9jz!Cr!U%7v7(LI^- zb{NJ~%lyJ~D^g&vkljB1l5v~rYjNvkOPG z`byvp-aBNT_5a47fB4kK(T`*s7u1IRp^0|;jGpbxcQN{WWo=Q3elO(r0C~z2@!MA$ z89kN6Bx`v-wYIP%KlPzmA?*AE90F z-qG{2ZkgbW2Oga9d_BN2zZ)1kh`Md(IzP%vg&VYoR^X@SfU~u@0k`&SI=? z_J=(O#(SOd4)Hx1CO>V2kMHs<`S;HXPR*TXojfZ{L#IV9L^z*M-0roapLtG*gImmT z7<)@TjE2o-{kg1RL#bI$PUL)@iEQ|PX8i)rgJ$GO&s9FPRjfUy_k4P<)SO#(zfm8c z%}&Oc-Q{U{5}V^x?i=)^66ns#=CT>tLU& zeHmkJCC}3u>AyP^SXm|iPdRNW?Dyc0yG{F!k=(gvy{UalyIJ9t=&!!nKK4VH^Rq=} z#%#V9w+-Puy`De)VE^;s{m-GPspq==e%q`!*sHnY%=&We#gYD(ih?VuYQt@}35FWz zk^RB_@!W^R-!Pt^yXt|eoAFhvHb6eGd>_0w`~mzOi97gXemWwS_LzS0D*8{FAv{dud;_#J1CBxsQNB78~q{WrFA{-ruH|)oU zsCpSq2DKa-FT&pmZk79-8J=)DwQiDhnIHZSyoPsA)^`MXzK65pHN;GPH~2$4knz)! zd^SGn!^tNTq96M{5AWc2!w7d)UO73zs(&*k{G`WNGljXyZ{53e@cTm4E{%49Co1e* zs805oe)}ePz2jo)_{5?^);*h4>(f?>FXF<6jA~_jf_}b-zz*Sq0C$rFL=2D&L6a#)O~c z;Bdpv|KGwM*1UEx*B|n^>=7q^qTfb%oi^e-b4DUx$b%9+M4*RBjME@L4twEM##vSC zZMm{HKJvlZx{^)4Q<_^7<6yH_sRpC)R2DPp4WIvOt;meB6~z5V^Z(zdWjWr#t=g_h z?3;W(KVmuKKg#&E(C{qQZ!!JULDR$81Cu^Ad|lnL>X(2C5{7pPU(lcEw)Sk3dP&Y2 zEoL1!YuLV*_{gj@WHM(b;mxCJM7tLo?OKy96Ic)Ygw_>_^?=rd>+RU=ixco~Cs(;r z?}G>Fzj@qKYjIAWngaD2voL@AW4@02UN15_4k8~`rhVIg%;3Ir;c^MJ?3LHDh`9(C zseN^Ti@()ZD`zLv7dp|JYv{+JVJA1>Vtk=jd`vtwB;hCoj!sM9Uh7ayf727NBIBoO zT>bu&3}+sb|BkV`8LON15Y9eqO>SgOoZQ1XQC}W4Uf3tdQ2fd_ycEE{z#7gCTt>W$ zbqaRj^I%Ur@td?Y@CBQuMMK*Ycu(iIO3As+&^LUl68&1A%k_D<>QQ9$NIto15ae^} znj)w7i(e+={So%ayVzgJbWtPu{x|5t51$ZDKLehBZ@&*b?bIf6VdHcDNni_{1h&?M z9MC%53S2*>-$LP-vzy9c#4fStSP8FiX|@QO&FHxA^#^a??_B1Gp0VG>(X=5B9XF^?`ZD2Z*aS8mY(#g)#w{)5_i{eFYe{Xq zLFbasePun+m+mGz#`&wk?kBS%{`R1yyZeyiU*Y^7{Oc%lK3bL5ayRoW1=jxo&xeq` z@|i9v<_=i+TmZhejF^UX*7w(`M!RIkH=uF-Zqq;fPVF*yzTT73Qv>c-az?kVg1gSl zVRc)<)yfQi`)qUMs5)eYetQMnudL!OR^kyy@sCA0;}}He!N!boiooL7N=AWv=10X&(}7tc#ON(Vr{^9djiHC;L(?W zlYKk@P8UYikVu8I1~|1}0^mN`XW+&$Pid_P?Ex?TG3c?E_}lmISzk4Y_7>~)bL_8E zGL6{q8UFTF$d=*6qSdw&p8YduY0Fv2JLqQ%wAsr4cf6;Bw>$uSAq(wi?DeO6&AEH- zF()tQ^8>?6rk!dI-xcSM!%5gH27gcEf7pZnL3;Xw>HgB!u+4Klp89gWKQqc0{qrni z^eLmqU$CdwtPip7R|1>Pt$hpFF6DoGgmGY-kACnw?7{CKKKKy*oHArQ{o-fldyCIs zdS2rD2Ks$^1pQu_Id5b(eP@rq;5_W?J@_5u-*^qbL#`)p@R*kV0`h18n+%e1f-ZG+p%&qW?O+I7v(^F5G_h;twbXMjCBN+SX zENASW;b+){p8-8<>I00q2iczoJqwm^QR8LAz2>|vy=AsvWKWi-z7yDfw#>sj>GQVy zd3^mMzV9jb*Ecb@Q&ZL}*F3dHy!+3r*E`-*!&;XG@5Sdo1MTKgg9qJu{sUUeBBMSJ zTy2>O@6%d=-*>Ssx3J#gFIt;b*x`F<_n`T}V>Vs*9lFSUW@yo@zr?5NzP<-tnn`+A z`)+*durtlldd8G3tTSB6c_HE-xp`)(^&QozVLa?Yc;oBTJ}9j|?5%&5c`c@d~G>zCP_UF z)MEd`FR8z5^XE|w8;2ITuuAU>ur{)hq1A8PJy=yw-;Wu^0+vDI%G9^APVUNuJX!b~i zn3G^>!?!M+RRYUS;bx{}Aa$Or(}8CQa18}NoUezki*{X|+=71wuvc7M+_|wLbQF8< zkZtq9W8qUfJmli1MW@RqS%!b)m&9IG_oNm%hOR%V7Tx`*d^*sFJ=fkr=IYvBd-`p! zAAgd$qGPti?(`Oy2-Vw&4UinSsb~P-Sf-kD{R>*x%Bfl9ohL| za%ta8$wEm|KrM~+uy1Pd6}=~EnG!=w^YTDL-HXiR)$Xs9ftgV_G=e4s}26E zrirb?z|UIQtJ3{yv}chwW%rnq7b1@$o)Y{9S>X$h;nyH0$=bX?4bnRCIbVr?ZrgHy zbJ0EKyfS#J7dtdJ(^KDp&P2Wre&6(YW$@OS`fiLJR%Tu@c`tgC2Y3#X=X3{aw$YQh zE5_%z2iVx-ZOQ`xjQr({%sRDgC6K&Qm`ttB4qp2I{8h{ZTrBCPV>>{R@N;4P?~LnpT{~j!mHdib{vfL zeT4OLa+)il-^-!n%b@2=$(i6j(V32Zau}Qo-`@t`WnOC8L9bn&aXlT-=^^S56ndE- z_?BN!^URZfFA3jmLxu0m@Sd21??P%bat}wH{0tWO{xj)cN#PQ2Kg619?*>>8 zorQA0M{JsKa&B0^0W`4&+^*-IB%Q@jJJE#bJU~D0JcBm9^L`S&+x0S#hr<7d(|cyP zAu&&f-n07XJ@^Uf-JMrZbO+rAsk3)(Z#T#>?J6)@J`0S4 z`X5?uC}Cerv2lnGLU}9XkhI4y;J(#~{@%`+X~%1e|EWlKPnDscdZ10$4?7IIZ`N7E z)){8UMHc7n3VreyH(r#m=SBt44Q@v_kggzjZ2!g{ayt6-m41JvYb(j`9Ozq{6Q9>C z_qHf5mVwV%;};bTTPORjd+qx1)%>r7|L(w_yxVKA*vCDn7 znP&m)O`UJqm7vwZ^q@>|n1!!eG`bzVuy*3*MT}X8ZaRPQc{dcv4%NB9I{7?rD1J$P zPg|z%L^r|TDcz`$b<-ZQeW%2=RU6px31o)f$T?A8N%oDN2|k3=N_-3QF)RjV%{_y; z&&Fqnzo2db^9ILrg>zfJCi%me;9(kFqAQqYf9nsiTK+&WIIKB zCj&Yx&d*p^x$Fzk>&%3o9~$&AciA%T9R$X*t>`!5Bb39=(aO5FSC)48=&92@DFBjURQYo(#NEuNw1ThX5beKN{3s4Uklm{9;_}=uI~o;tQ!MF z&aA27oYi4$7rj%1e@r@@cxz~gW7FC6Puz8u_JicLId+%!dz4x*lFJ;zJ*ix7LoQ1m zANde@Jds$-$C1Z2?IMG1`v6({Kw@p~V*Z)nLi7I>u-}195Pmc-^j_U1nvXp=OOaX&*49P1mfo0+-2tEAfJEeDVg3d3H=y5 ztzkMb5&T>lYXg4SV+oynE1wJKN3f~i#)IU$12?wXfbsOsINEh#a$n!#SYq%2`oEts+*gM7Y?Xi!R@fq2PZ{FykAL0_e zT~>Pe!<-+m@0hUtg&$!*=e$-CiW66F%{Jz9mr0fMSa6EoJc-~TMXoz`*R>lYF; z`5ot}{&n7v7TvonTRz18F2iqKGr^dz_`GVD=JB}TLA}x!gVJ2LKvQ~8K32WA2U#b7uKgb4 z>AfQJG$&?tDn13qIo!kjDewsF{MYiY8Pc+sxhQt^^NrkD^?F4}^Nk#kdXW>&K z7ZqDV^5_UOYV(QHhYsKq+u;Ly;LC@Wa|efFTIA@c*2qriWOn_Kmi_RQ#d+ijz*Epc zh%NbgrX=ED;xE77Jve_!7dH+yMrl9inqzwuAGi4p zysZ{qS!7Odco=>H-!1rLSLk=~woLo?4dOGd?~r{}h>yp#8r_wDO9R(-sE z{c&mg2H+D-|6mZ>t`h7C+P3`x-of+V+?prZieD=x0DWwiyd!s`23eB3i8v?nY6tW# zo|`O>3Js(54q^^l;GJExJrCSkoG(lMJugAezMmL#%kt3!pkr$*XNBlbzW?O@Dh>Hs z7#lfd&`$f_VC*)*!*7ZQC`Oqa54ejyq2q}qU|I$ruyZKgyhhox_`f2aY+HWFhb@2C;n64Bt%dMG_)n>P zL50LwIepirK+q2gH!{9sX~}kl1%yirJUY+EZI=Ksgr+s9W`UHSj#+#BHBBp?qr z0Rox?d`LiRRY0QH+<>hCe2gg7+9sg3H$-buv?AIB(Dp*Kaur2WI|OJuxv{j`fmZAT zL2AE2Y`-FEr_;_5pzQ=wM^V58!TWoE&I38QxdEJ+@9&SilAN>8-fOSD_IlcDtxX++ zk;tu)c5FjvK%^uy%h_Ag*;}iF_>z`BotL$ARYNJV?V~__!#jcHd1_xJ9oW~toc4-) z&^J%>&2YX^|B^ZFjN#WwWoK3!u1^@Ap?iLXyCQJM-8WhCzv^sIv{VO=@@lDj{+c6S z`#koV{C*Bytquli64=GhaliPk!04I~yvL@sHCDVG-?B|>)B-F*MwF(=h_9Rqt=-x; zt<6ZGwYQmz;I{RVVBaxg;@gK=chy9bA7&(aNomlmQGC!h_Yjv1oDtSfKA#YDCR?F> zXsn?SybFIN=o|~dlYB#)10!n|Gk=|_>VH$VK0II?xL05LzYUxmfe%;WFA$F^cl`wx zf9KhD(D3Ge`GER_z#{lV$EtQ#V-JZI(U)sPH_Aow&WWc1M?Nt);_o41aJ+NkrHpHp zIW(s>rO-oyXXqR?ukgF_arhC>?X`ukh(7f9iQ>iVwR|?zL+4K??>qi*3VdU1-)s5D zSp%=X2fcqZZTC_)g`5EK;Pz7Fm};v0?G^F~ zV^en1#v6NgJ~7qM(>2NDs;WF=!a-~(TPJRL7@fG~1NpyXV;xM90rHo%*El}rcE;`Q zFJkvcKJluy;Rf*~WhG6cjfvmHZWoQ)xxnxX>Cgqx>SD<;Xs#1_w!ojxdkuK3rz@{L z9@j9UxgT_!F0R2^+8x(0&GktcDL$#Cu1`v~`xyQfJ=%2{;76h0@R|5wY+I(z~_GY?+9Mp zT*Dl%OoLO=ORbAj_RNN>z-bLQtpun0U7Tutvzhy9)3|#ExXWT+%^Fgl7ohEa@TB~~ zBF36P?x2lNo(rFcnSbh9Y{Rem25^Sy!~cCTYv-TWT;?SjQ5^9w#%^Qn+F1kDvRRBT zM0Fi@Yu~Rk<9~JczV_a~YJcPR(q7rex)rl;6_2!!eVbt47P$Mid{1(Mg?+00N%h|p zyt%Vj^@@D?ymQc}6_XCn>XGZySveK{R)#$NoqaEh=6rAf=eEsL6CKbw_?%y0&Nxz? z!~N6XVE?YNl6-L7!QIA{*cR18mgQBVUmv1=j6zCQ8q=&kAe%C6d&8h)PGj)H#zPwr~5vu^9>F6mNzvj6p6Q+)EBWc-&0w*;k<7 zL%XOWu~fBZbLwxv|C0?antVCM-xqDE=KMNpW01$Cc3w@dVqjAIvg*w$ciuh=7$50c z&NppYPW_qoQTUapkz(7!j6H5}Z(Z5{?IOMvKlZP61bTU!=g&X~e){;g*B!dwcsMyx zw%~VY^Ef|>x9cm`E(d)f_b*cut?UWKIAGHZ2~ANh5qg7UGCZ^N0QF?pm*aqAM&Y)G ztd@u091rj2e13lce8jW;w;ex=aqR)upLF1~liU2PVV|)Pui&5jpV<84z=5M1^o-T= z;b8IW;h(V|sE|{!y>3^!ZH5=%}zt{Qps17js~h;xw-TDHep@FxHBXTG0Y6~&I0-+4DOWTJQs zzR<_`em&nmioLuVUfEcD(*sfDPYq+;!MjI!M-cyYwY;k_Q$_H-LMyeALm0$a476W>+#zU$BCZf(zRR0kiGt?56f*$b?)8yY18edjb| z^}p*kdIgwrn2&H;Wz$tl$uer6DBnvy0}r3Vt%uWj%)tQn#}=huH!qG#KJQA}5^MQ_ z9czh>QC+~fAb#3?wq0m-gcoa{Jab9)QMU2U^W$8`d&PC6(;5Cwpi6Nx zhMNN$J{KLch1zEi_Dfiwb@jNe%k4UN(&6)|O=mqAN@%Vj#p?VZGhs!7@laz{!nn;i zX0@;eT6q}EjnXCTiqjIJ

zHZ}$d1$9cTGWKqw)a_`eiNRzxxMv74Z?aTp>Q!O-BmCtw|DE;@&Yaj5f39u|`9{VU zv*Lx!fB)B$tKQ7AV;2E?dZ>{r)II1A@on`yVdK?WYs~aq;JJf zYztvq(=I=YydC%`<1QR`=EKAY@r;fAYT9Ua+WFPXC%UjO9%q%QyXu3yO$W z_$biOfPLC^33kgZ`1bIhbi8uma}7&a=Lqo-qR&5rXX6ue%m?#_F{caDe*Z~wW0Dx@ z0%U0ywg+~9$C!(rZlGqxhTTIW#1w^_w`skIGciJ)7at;);_WbZ37i2Px_5}le_eDI zIXt>gfcYP;MW+K+`#yoNv9U>WW{#rW%*U?iTz-bJA&WUG7f`qf@qRP9>DT+!-+Wae zXUfcRy}6vTmgNo2qS4c^3CJmbm$_X%B+~T<eR#-P=QwOufz5~$O9qEkjF*>c5B^A-n7z>|c+*<0{)S^E=%iqUoA~evh zwRQaeIRnY3H%bcJeu{R`&znYP?TtoBJ>TsDe^Wn7%6GNG>X&W%I}ZNYOhg;eg^`I( ztgYsr%TFu#6KzRPc#3?4RyS6n`L&Fd`dd1k^5j~{=i@wdVhi84oCk|cZHopy=8;@iWdkTF)#~ytZAE&Qpp|58fM)Ez5o()r= zFST28QG-oi1vY)Zcdd)YY#!K|6+d5e zHo%xrV0BJX{)o<(b)JB~wkwl4jA!kogVnJ1wXC^`u2{*MTl}t~eKN^?75EK@@OP`v zhIka*%7*an#uZ&UHHmVYM=+-BhbVQJ#OI}xJV1Nhl~|>FBjHz3c)~LDn*HqY{qUV~ z)^9wy`}P?+aLj=(r0#2reT^@q9y^}+n_m# zM*D0dd?auB`FY+EGtpzs2Bv{`G5WZ0FPpp(c#CM0E{9b$jy(RPeVW5hIr}HU8jNGD zMTcSH4w~G(x|y}q-`-wra`$Q@wA1A7Rqu>rFXN}?v_Aicvsat?$Z0*H?{e#a9f}c^ zE+F~+Z~uk-w)mTRtBt``Q<{P!I-8K8x%eDRyXIQ9K8R5=>1+@(9$oVFVxuQbT%AK zTZ20u;6AiX!Rubyux@@r{`>ax(Ev z4-L3}8!^L9?bl^_A?m_TB5z;y&P@DM@w=V-o}13T+^%Q6XD+%3cTQYPJ!|`W?AWF; z_E}zi6a1z9b4EOuF%N>X)-k}+pSmg+8XMQ)7q;a}C>;M9f9qZz?ve@HFwf}&W}f$e z`Jz*U`L-08$9(0~U`9?MJDv4wUz!o`+=t#=`l_QhuSOr%S_sFd2yfYirv~rj6nK~1 zbZYP}^256|7=K;x*8bLk_dAiUBdwnf-Zj)(Jr$bBPJwsNtW$&c>wb8H!FZ$K-MrO- z_b&9IPYLht&zu^(ZS#B5@YHKh4PN{W-SmKLTOfFoZjIL2=>MM*-Zkf)8oV1);9WcO z)Zo2Q@O}#VG*1zp6)Erp%1;fR>-_jzYR1pB_mhi1e5Vzk5`SC9pBnydOM!Rpv{Qrk z3O~GCjX0Ocv!A--=FbM+#D_OcrZ()b=HN}8_@U>|=8n3Tu#t)9dhynA=m_}7Z}Iih z6b$b8JN-0$_|9bMWsOU3=nQ|#d7moXoSic7B~wmq-ZtHkyK22S6qz}sQ|Hx9!D07{ z7ltLvmd@^6=F=fmyCdf65I^}p)U@^K5HVMWsN>t1t3#|q7J9WYQgw)irO8#21x3j4 z7<$Avz+EoqKz{l0G`hQBvCkCdOpa$_TW#a+tX}N4i`D_ zgn*+qXdKwjSw#JS=Nje(Gq<11xyPfx`1;undv37i*pmuRee7i?nfF)RvF$b8eMwwv zw2igV*s_8AbH4BNQy=$a_>S~3XU$jmrt1MaY2>NHakBZJ(Z~Ef zI0{n6zA@*2Z|r18>b?N~;NGJ-8wT(o96Xu=NA8fm;n=wAn~7BVFFpw!`~_I-y*a1E zhT-l1$VlADn2DudwE63#7oSEKk^m3*XYBegNAEK6D+h=PN>?Ywx#QA$^zu&o4EFn+?P32a?t2aK zt;@#Y3kcbHKK2>9)h~vBGHW||njOU#EN^H($4IPE9gCL+;&b3E5_-@XW6sk~eYOJn z*b98baBw~oCPr{rbCkS0oi`|!>`Tyo>KI|hC}Irdf#fj84VlIUeY=~u1^G0ppoI}e zcssGx2Qo&Ly{!5ywb+f(OwM%3KY0b;j8`vM?G-z69(cJAoGs$qqKkfx@LYLZQTCZ> zj;dc87*X%|Ov25tQ|mG6tM}Z5pS_xPCB#@f$oJnP?q?C>S;QVz2FY2+FLV%}*A4s) z4rcjbod~QE_LL3p0P=jW$#a>r#u85wZ2f^PO27ZfdOZogO~q&6E58sNTGW(V`J>D2 zx}Qa7)isRc4$^YsXylujctNZoi@pxd#&+ko=3<$n>rLolZy@jZe4b%G3%EbjhKqUo zbG%if=g8Nl;*aC3XseT#e6l!Go&9~}iwD$N?#>D(R%aMpF-vO@NLd4Cp0EEFKNNaz z8+PNpti>0h*PlZ>@4P%Pt~-No=AN<9{Qf2PHZNK_AU;xa@~wk4)>()B;A~t}STI{U6W*O;}3IEm=lk%Vc6>ZjqK)4E&< zzS_^3mMG;6dE5oWg_@c5fH&bYA3xFGlW}=wxy_*r*w$ z{D%FUsmwe*GTG~64)NHWN$=PXtjbsVztDhuyN$){LB>~I1na3-!*nyWz5SeEB9rlF zkP}r&-Q!phb2igLmIgd1txr_ES?)^-}$Pwhwn;Dg{R?cxP}2i2T<6 zHZix&CA6u=FU6Ts&wA3`zd4=g37i`PlO$uy9obzanmuFGyCuMHL zwb<|ag1eQRWPO_%uB*5<<=ZCr-4y0LBjsJpeK!xk+tieIQTN?)_6@O$J#by_z7su8 zOnF!2zH8>Y^LSUvU6y$hguJazh!EXkQ!H?Ab+xW+raof zUtG>I46?abUC{-#$yKJ$FHyi*qmglmUAG3g zO6*0?`h4LtiK~F`NGAK`u_uES&mdof&Smst@bi6W-ZD7j|Gab07eappx>&C>e(4ha z?}!JDU-wt}BAv57<;2vwc=xw^#Mghu$npoa5Id~CbS`AuN}LIt%2~l2bfBg_&I%^j zX9Z#Vtl-;sblXY+*;i?8rSNfEDa<)R%snG`i#QL0w!&fLw)boSejjYZ7c6X}YT%Ax zAFamcv)c9f)Zz1~!{<}v!)dwZ44&yjJ|FUZCdXsgwhPWyU6J%_Bb;bGW>K%p)?=Yv zzdo*WJ5L{f^c9`$a;648bnj1U9FhkwdR`*`K{JtG;P|JEI=peZBf#ih|I=Mqqdr)p zz^Z-HBQ9yF>QMEp`w}kl(_8mG@^Vr{Z#OE|o%jc6tV=#8-3M@d{o&o3vsFaB-9g60 zBmbFPwMR7S(qk*Jlf~={YB=YOD~MN&GaB#4MwmB;GiuT67G$IP^3z-Ut=PAFq04;N zXNoTNC-9ZpZWm+Ooytm^Hgk0HGJTis`_8u!!U>{R#i zlRliOrqik5%-gsBTYLEi(Mh_!j9<#x%f0E|UbAIA*K9ZV)SkMivwN)!d@*LIu3`x^ z{_^XN{W=GHX0vD7+1J9Y_Qy4RBc7nRSU+z%_;GTB+6#^(c`|=aJf2az1)hPQ zL_FgsCun!Cuib3gCAVgb;@1w)4K5UkH51< z*oe-#{r5PxAI-VFxkEWV&Kb7Vd_VJQ3V!LO(84o@ma^xS``3eZPw_lexx1&b-~T$V zd%w?xkDU_jy1xCD#7>^i-mhlP#nkU?AHu!gml1zW>_Gl`POaEf9JE7UckY31cDa1- zB76#3OK9$K=&9+$f1e!tFgag=rAlolzCKa&6JHN?PHKa%-^%>3vm73|@wB*dxuONciCtv&;E^_eTKc8VzWCu8 zqK%XB!@Yg;!_0@e`Qcd}JpKOr@EmFRpTYC@2fN`Z^6f{hNp!D#pSN^|nRB}R*+b{> z@sGQJ$>Sea!530>j$52OogRBuwD$qeTim^DBL6*|qJX>8y}#}F++ctBOb+}a7d|sQ zacpb9Pr+wq^WBZ`nH$Kn?(fuwhR?u{HiY00#f&RHSm5d&emeg5znz?SR5N#v|J321 z$?r!V1F={|xyD5Clvs{K-;Kzq{L|4(i2qEbU(xv^yB#{W0uG%I6s_>RzL|p_k@`*h zIx~Jh-?Ue{=hfsT{YbF5a$y^FsH!A~h8Pic|+bh>I{z>=Bt${vuhf*JHRswVCoSU`=c3f)X>5r3JZGLcLr|^Hh<|kcf)8PL$ z>}UQDVIS;+UHmzE32~*=r}ft3c6U9hup^4ReaAW8Icm$A`RPtM5~kL6wac%`wK^a8 z?C#<6zvmHfy0by`O=5|hA1?t}dk z`4M8X?B@eIrO(6$4e0y@@P70`R{Wzcaj(c$BmUCT(Vb6m$M(i=XT?>2aot{H2lv0D zJB97o!(VXk%kVD-6ZSm2vf|U}TYg4;m#+Quw71*dJ~TcZKb>_Z{#)>nr@O?6``6!V z=;K=ckGl5Azhc+gwm}Yh0N9QJF{V6aM~?|Ne}VDZgXls74Z8-H;_lYKlQfT;n1gC93*R=3 z)Udbxpzgfd!RKv)zt*L>b0S?&(4YUC){m2ul!r__gxyCu|F;n{5To56;ropNW2k?w zC!VK!4utI|Jx$|Z&iIEeifq~(7*-R3huUjHJYM?%mmd-19Rps)Tes(9qxfPz6gS$6 zoNhv%TmU@!+lw3RJH|V`4RiV;XCJf@@8QSC)$}Xa+Q?&0JI|%P?O@rt(f(I-+H>5? z9nH+KE%@6hZCQ1fZBXvr)#14EGG$Lq1+O~Gn)|qX!n%9S=*YaF&gW+|6Njb|Ys-(n z{ZhMqSwi`;nV;2pndg@zmougBnJInm8*g%#hSy#>O+EX*!)_lr-uDFB=5jW1C}?yo zVJ}X7n7s%*oVmq+u+Fgg$lKhf7G@ucf3;GJ*6&C2?Ef}++L7Ogr}VawB=h|`g!Nf? zDfPL8%lywe{Es}sRk9b}LXK3kF~ZG%;(1+4PR{$#s$aiI9jggj#T#ok zI!YDe7`SoyO(8blRG_t#|HW8b&VJNX}Nn z&ARL8@0@oPvX=6)4nAm7h?b*5{92c3~s zQZGnUtBTu|0IHGt`(Hu1$W{^R03@b_KFJTCkaS^?&Gt7HsaT8HYX?!GdwBQlrXicH<;u`z=?2a<=G$LSya^WY@nN$g2N3WBzjR?|Ig%` zEc=^KpWld9?Yu~Qx<7%o9DdQWUX$()J9-y+oI$sa6z$U8Ij%ZN;zjW8W~)d#rx~}3 z41BDd=buh2{(*;_SbU!Q=Mo*Rn~ zEw`Ux#0|!jj-%QdmSM)5@Dt>tGu8!KE{ka|4mP@sjJnI}mTZ|4Bd^IC&HwnSSAQ#z zSUm{6037@8t@Phr)>--6Ch^lo=)#WMS>nX)Z2c8>3h?|1IndWV+@BUdu|DuK(LR63 zvCkjA3VsfQ``FnIe#C#5F~>>dD%jtHpBVUQ4dm3vz)vgq$#LnVWNe%T(;%GVI>1HV*1s94vNmu!MFRvn>q{h9Or> z!|1x6{P?Q7BVEx~f^q2<5qQg;&{ox4o=xq37Uh|0>_@*MdJlKZfk&zi#J}I>$+vD; zj{PcwdRIBrWFmi`T*A-|kwk(upV*Ief5=FLj8AmDyb7@~WB?9$6a9>{tw}vMIe9 zHGX)90WUhK4UctB#H~@213bfA9Z$I+(73Sja z^OIjHl%z$mC4; z8oI;*@xoGLScgF$a|61=F}Hara9z$fcQF4u!O@T5?N`AQAA{Gc-_77`8got}za-BF zB3p*h{#|6t9l@OXM~O*#)O2unHQ)UKe*YfzYb0k{;P;X-f8ggmSI#5@13Mz@wIp&T z0_^{VoGCU3)o1f_S8!ncd+_`B=<}N3X~@xm_4AnL^X8!KZOEI5Pu?U0gF5h$&Qcw> zY}!dSEr*AXW3H>sT$_grZ_~kd2s#O{7m(?*S^@!^M*hfJ37?HTlg{eoMhr(sI!IbdkA`$t}5N9UAgt_*-5uW60*}|D=Frzh-a#g zkw<=KWpHH2?2DdlxELMv8FE6k9*%xQ3?Fbn*Av>v1319GD-Gs#?8T=p-+}&C-Et?^ zHP@!E@6*1D_I37s6WpUm->-n@82*gd(d5QeV;uWc^(<6-+Ti~9Am7NoZvr-hJYSw& zmj7Uovq&S6YvHeg7uY-<8*ex5qS$j$p6#Z675&D4jV&&@pctJ@gW3d&T{9@FQ1_;T zLnmMU@a(DJs|+3bRh$1A=)mF~=wSBm0;AxqzG_VzB;HJTJ?z$=Q=F@H8gZa=ro=6a zdnF8Wy)n4%GU2>!(BdfuI^yryGcEli>p6>Ef7=rIgH>~3cr*4~#mx7xA->fwQ9IYz z^c~viUY-1b*`1*+#t!4yC5ikWUYe-JmMV~cWbx>u)4|ocWTMsgRUK0E2MQSeMjT#TnBpCPzD@J`DOv~nuOgA zQS5ZBjby_bXkRge$7KNj=gcv7gbwVQTrIxOy&wztwi7t3_^G>EmJnLe~kFAvB8cl+R`-p zml@=8e*yc~Y8IWO{ur!>o~6%gHJZcJ4l#%>yFjR zUVE20wU2f3aHsOV9X_PBBLBXtF=efydj5n{Khm!+d3U{PuIZlt`DM|r>UNMb0?k*C z(Y;gC;78P!pG`a`{75+Jty9_O_O_qrEMUHuy}W@R@G@y7zg6qw7=25Q=J8(d@FjJv z1Q+5xoB7*cH>C*MzYgB=>Rjm;)U@Z@D!x%I6Ti-roYoIJ`ZRdJV0gh0U>wT(VZa<_ z?fY?$hocL3vF6bK3l{ajEY_ik8ihJD8#c#?e|V13h41nWo5nJmY=^6 zS>l}ChMQlZ7WH9#8i&cX?>qi21Jm6bN9SZp_%c%WkjI67;&);@V{~q(QGFsluZ{(h&HmA-P_qD%a zh)<3%j%mJ{$BmqkYaYmp4Nc&d;(iIq@U{WSA8O~Q%~ft2Vi-3x&{jT$pWqMg0_FnN zsAw#HRo48{;A-+c@|vf8lw5U$cOOHef?2uUdVZPv{FmAE!%uQNvI|;jL(eLKMvg)w zw?IFl5yk234vx0hc*}2%l^@feLnEV`UuVvpyz_9ro4&7f@m`L9N^zoUFMRsjOmo{r zvyF)b%&(34b+HbPA7m7AdqndK)Xfjieh7bBpt~sKhez`INAD(AHPS~beKiI8CteP?-@SX{@1Ehi zCw$*^p)>#7g|{w+2kV{UpWmXr-&ZKz$Uv6-lsE^;;pozq$&8EkBjttFp=@+ype zJR6HY4OrW2BPF@ypzg!Y*u{N}MflZXz_S|vpNW1b*`(+4zsa{IdsTIuL(mX87V|%3 zKDllUB(MIF^f~XIV%2bTn5;@4>Ge zMfbF*XCS_`Teajaq&@pkI*!%Ps>e>>8DvjxoXdSQ$aieNdg(OFT%D#ASn&N0uf5RN zlsbmaflgM7DFhsa;8^mf_7fvfi%b+OXNIVifb6h!YVDz$dY+d^C)op@Mc40>)3^U6 zhpuDL^%cNd#NH_$8QHXrxu`aBCi1Tmy1IO1`ODH%_G43c|2JaGjTjiJPXCZ>9ypjETNwVzqYtZIV@-L%>v%v8w_v%QwI3kiN9qppFVMvj{vNTXf?4l|Eum zEqWr-^=J0EV~d#0bD(Ffz1AH4gS%h)k%N`f{512DZ(aVGG002FtNYhczv>q1ezMoP zlGKRe|2$)4{Y~Ia_@5mxCJH9qpYlg)i$eGI|KbTTYV`ao&uu;_TH>B@n;$=EER@c< zhrUEhUxt=ugM(CkQ1xnxrgi(}ze5}O?OPDQ+DMT`rY%l zPgX^V7p~?UFq8a)BJK(_sJRld$WIJmn{Z!f{!z}94d9}Ue2P)-p7X~V-^Q5t1B38$ zHDl}nCyw2k(|mwha^j04*h?=#2T}0c%3iu3efVr}Ls0buJ!_`U8S&jKisNB?q0|#b z-W|#^x@xNGDmtba3ugkWMSBltiVaqqngZv24e!}E^Y;hwtD4IjGIiI^u?yq-z`_1} zqpM@ASyD}#edm}Zihc3pwm-Nvn1kd`lzo~HeYTO0c{jMdj_2kI$A(P?gYb?1$kPGH zRSWqVg8yft-)0dbLrp_?nnQ0hc|L>p(`i>u`zw&Y-0K$%+VWR&CKvwl4(DX8K0c>f zU#;w6&RI9@J4OCr!3%U=tC~c++4sAFSv2N<=z_j=U?EZQ@9&`A~JC! zawRrkbo~hU=@E1$`7v(3m3l?D8V4fylc!fkN>txq2Kyl&{7y%Hl#+udo;;oJ^{n8< zh6ZonF92W0C&`K85$ex^reb$l-%}pl9Qqr`bB$-YSRq zU90^RMMuzk`+sl}5>C(yJe=%9cWH#~%i&MbORbycbXx3JYzX@sZ|~Z;9_Zrzf3aV| zmEV|AlFkmd8ZR$t09koWB;Dve0pgbBw#?caXfv zeW?HSvA?4en7K7e*(Z8G9Q-Z&I&?MAs^9X#Lp%S-7~Er%wJfh}Q*4j1N#ClS2^ zaCZ=2+XDr46@NzdJ_p}c={IErc9q0=-#+zla|D)8DZ2wmO>#LzB$w$#cX2BmD z##cu>IU{K%C%UuAUh5QDQRUfEz4xb|?|3}3g?WqasRP$h#oxpdR1{+e%wya;pz$kj zx})>av$i+9ZWxp9K;P>KW;d6b*&Rj5V&$+~H`R37GPofG zg+=lqcqjK-fQu?*v;58nuc7wyBF=uc;+F(A;nK6av!OwMZOwIznOa-Z^VNEOTEE{x za}nIq6TS+}RXt$t_A6*i3tlQ)IK*A%$o%XmRCBj*ZEROO**5xdY&+e{lG_}i zJ!=_0jE>^m)sowMAM0S@1GUWDdJF&JCj5;taJ&q>WQK@UVjh*ETjs=>^>xd@_43c4 z)0vqyb6JmO;K<}|cznpVy&2=Ze+^=+L8_n0fA%PR{0981e#&CjMRHSXQ^?u~2Eige zmOf_3*(>sMY`O7wiOv0u1L8eJhI7A+^y(@fUJKB-r<3!Ziq~3jw~M$Dm!2ldXC2kr zm^oQd-rYkV(IpOEH-lFM26%Pyp*ygdWhXs_4^QW#F!ODFbSO2k=-1;dItTLN zZu)v|uW$6T_WHg>-+tI))W2R}85ifU4%!FfpgDVepu;7=^i)4%!X$7v6nI3NL1HT^ zsii)iXY&eQ%d4kO0bMKzdFw-xrH!@!g|q&dLcH*X3Xj660;hF^7?*D|3g& zhi5!op)-Nurm@hbNA9^0FWJZ)J1z8M<0@nf5w1LZt8OX%9zagclD#<<7#f+6zFEw( zNde-vh2uWHYo*`CqV)wMEpfNVxPR==T$EKT^D5! z`T5qdMb3F_9r!SQFv70WZ_MVNh00&yx1es5Pxi@|wi$jMHO6Fy9xN*<#eR_8HQWqk zeluuXd|a1gz9HsovB$-$>{_(Y3TLddbe1fh^*i>U{6m?*@6G=V=C5-(z5fmGb@rCc zdt@@mAP`MP}cCq63LoIarUe&75#<3JPd(3{_|7LVl4^&ILOgqxSsujb17y&%BqV**hA;LdTxq;N^uv zmp84~`0n@hz$W`37yP^fenv=k(SAgV{F08XZ}8pJ?DyuI244Brbe=02_ax)^zpLS0 zpFG3cd*Pw>-g}sDwf92suT-Ak&|ui+?|vFweOzve2A#8)yXi+Xc+jRn=ULOgcr+NU zKTd;v@G|KG_S&t;OUuh{&NRZ)2lR$9-Tq1CTUPee&XDDk)$C#McFAm$b(PGnhM&ws zW^)g9g>(r2vx7X7ED!Ka7&-3ppdqO|i2EnTDNfYWol1U2EI0C5vhD-qb1%4Doox($ zE6J+Y7$;R{^LR>#^E|Cb^k$<^dC<<<+Bp2z5iSlLy)FyfRiBm)cYW)9nZT}ja)(H7 zxWyB}nNx$4DkTPO8=S28J8|JjERQTxE*13i6{ z_boBwo?bMk;tPq@=-u+`d3v{O1y2vO?HPDLI{BmA1V10X7hRyq)dj?d9X;FOw?kNa z>7Z}HgLNKW&K_>EbpUG8paawZKYlNR9=n_FIzB^Gf6+sp1}91e0%}V{yTj9 z|GaQ&e0;6T$7e83UwnKOv1eLOc*&6JF`3044S4ut-#TkB*;#| zNZ!j=@845nHNN{^dLpq16*=s=jp&KPT|E)IvWK68yIY6on_aG+XwNqdJdU1-%}t*n z#_@mW>52Y1csOchPXXf_g|41>WI*3~qGCxLS(p?F9!E5sV?!UuIR_e$IAna2KXRIV*CF6fm# z_|mRS{GhyRr2gSS&avA02{7J^Zz!(M&6|Bbk|=y}rJXZ-gK}m?Gp--eZ)a{szabuU zmd>-}H~bTI6(4{;PDEqvjt=ue;Vq7AdYx}<*^^^Te35V0;D>g`bL`qweQFu?Rh)Cl zaPzO-Z)3zMujSi)f9)-E)pu;DF|mR7@)wKG{*>>8hc?d0h*|D>`Kysc33ZAT+h*~8 zN^RtcgY?BY+zan~Wu$$F-6G=HYJ;OY48~6YV{!GO$)(>XhM6-e?(G^^77W$T15e)! zS~JVd;kHeFKXT3UW$XS}U<7vMDi3e|E-+{>)pB=X3(xiJw?Fm#*yM95PWA+Q6u0bUk??|+AT-czHRHOInCdv zji2xO^HvSs*|4UeL&b-P4htDexGkGjR4-r8MDSD$O;scRMN`|*nTzrHE7s=~8&3{R zUC!JN>Wr1|^UaYp`rE-*PV@aKF!cpP8@O7QW-n--P3ROl%kXHUi1F*TT~g$JBCmshp1k9@5kKK1_iL3`F%JkGnZ=OvRo`TAeP?VpG?+C4rVb@teY z{m3VAbu8&X2754_4y1d;LJ#-0`TgHYUqSC!;n^F1p>O&3?+2H4@RU6RsTo0?Qsq~0 z*7zp(tyHYV&%65^;!Ip#qch0GH^Oh$MM}!?{Yjs){gU|UZ^GWp%XnD&)ryVtK1=+R z*1*w^wzy-dpVfS?@9aK^d6`T*?vcT+%5DDOC&UTZc2#bDJ9d>~%oHE5cQ2&iqJXpT zktsY<^U)aJ_Kjhw)=yvi5&wi});l9u#TcE?k7zb8We+~XI!edO59Zdy7<>1hsZpzb zIsb|>?l)PdcGrGHckC!9R`oS(0mb4c&++AP=vPnCYK5QJha`LH{!`so3i9zj_JF}G<Bj2UcSiAocSiBYYdI^p<~N*wnTcc5bS@u22Lg|h!zB~r#C@)qQ8+O! ze3hBY8e*d5oLED7opaVEAHX&nH#%!m4ydEs4FXpEe+9USx;}s?wCcU@t=|Nj|7}Nd zRcMMawA9R)zlT0KOL@Wg?a26U_Kg2K#mJyTyosLlW)z*$|1Nbd=CzT;$za`^(e1Ms zbpm*+s5z@KHMZby2Unq6TI}-opqdAAE(>09sq(iuEUjj{0N=f!jQ^A_gP-ZaR$XWqXwfoH2hOb2x1 z-s!D*?R9zJz&c`p$&a(wFe5L%#a=_l7U4TPZnvEn(a^D~o#=2I>~@j7_#EPcJev2~ zc`&iooVoSvgSD{XS$h(A&Ob$X^0yRq0JlSX{|$I-y8f|K2j>HRe}Ogf_T4nj+?1>N z51yy;E;p9r1v~Ez|5F%WwsKGJ9puC>s@{Nn&sp@RzXfCCcPl%IY3!==>IM<(D4uEO zp2BC9N4n$3UJOW2JMjOn@n1|c{*Re|dLIflj`A3lBPSe$(Jw#i^-Fr5qLI zv~YiOT)e5u=1t+|_jngQ10M@9&pIzdPoE%rE|<74>8ZQFt+)p2X+fv!Z%D4%IEMSB zmFo_kqu?@JeQzgqv0r>RFLI;+op@CsBA;qk#H-EZ;$HVjOP|~S{^p8XS40WSlmJeR{-NXCs{~HxM4*%DA zot{(2cY|QkbMZv!Z+^bvk0FT-wa2{M6z76hD>C!qgVxQ+sB%Y2~ zZE|fM+Bt53vC)(1ne3B}_a6Egcf62$Gxd1xO}?2$-p$b`CXsjZ4tQz({rL=dM9=*>02s4&ev%@a?ftRBT0?HEVRTuO z9iN`~f@m&6-)}ze?e}gUO98b4^kdXHdO#|UZQf(B*UoeJ$6?lS%b>E(R{AU;pFw_2 z>SlG+G0q%hxYl_GHPSSXUooEOG6#FO@VOzz0_a7m+y+8o#-XqJlw18 zlG-=>Rfz%+lDc>{wZL53K*YE z17A;_LcYGbH>KnM@QYr3m~LG6)%L8LPQC5ppQ7K>PN84fcdo7SW++Yjl0U5vP!PF6hKvUBn(dbQ(5J5FLhGbz;Pv*hl56FRh!R_~1hW z$Unv=?m145Jx-H7PR{Xh0*tdc5a>|-oF%{}{X+L!8Q65K*dqJL=al^|-Lj1_oqTlX zzLWXQ*i^J%Bbh-C#XiO?Fs=HFp^shgCBY!N|01+0ytMH@bL@c5XMIsLwDilJ`@oey zcX+owj(Fmg;C3eV?+kK4ot&lqe4`ls6Wui?njLeQlcy@**(4)SziNpQe+HfJS>nXL zH>j@S7VL>!@U(2LlgG|I(6dZn+d^Jt%cjmESSuUkNTN&mk}AEPjtCf0Q!qRmS9kO&Wk1x^JRatIMey2z1SIZ zlkb~p?l*p%k;mKvXR@u2&jY;Ja8To1^C&vo>GKElnTq?xC+Y`$+x?KYZLi^(PCrki z>F2JLezpzdoCTP^o3hror?eB?kI?2;^a+i%7@b06MaMW}ebbKz&J|PP=kDPic)$-( zGCyU^@{~4yTrH(fjl0Ihl~v%3dsCV@PD;~Hd2RpeNA(+hp@)0bTT*{>ym_4A^!It+ zJjSM(2f9sVbN&q?-e}ORsV4Ib<2f=P=gY1H||`^pxxLO&zA=eFvfMRQ&uprEk&GzjK$AfA4YT z)@yvtXQzGVRJi#y-x#;LduTDXv*_ANCMR~4lB-+l==qX~jr8ZuSNTVFjHHi0;>+qW zo}Yd-2hm%h+kccZ4Ds274~D()|I~RdP3co>wa$gV!S`)y|LNfW27N6_!BI_0|AKiT zZH_YcnaGJ$91!cU@(tdnK3Bcvzwo>je$vjjQ`~Pa;rGY4=+gV z^Kzc2?%SpRnVdD0wwCk&`tz?3W2_{Wb-2d!KfjRYse9+vj^wNh(zHE?wz@McwU0ut z4@YY*j zRSbbs)5jQrzJU(y`>vx3deXPV@~vt@?+TtJog~MO=iOu5UfuezopY1opYd!a#mGqx zPDOqmWUi9`)#wCaXvMMNoPIq2%N%rN(Rq}8)XMpQ+Pp^`NwuqUsK%P;TzzSNo{i9| zIBfI_>_q#WgVSO5dBYaor5h_1Cp}_v@j2$&c@u#S=@hHcLCe{&aqj-L_ zVF~OV<|AB`6U(o*KUPc)K5YlKaK}{6r*yUw%Y3IH_o3YHtqkN+H&Aic@*n3mFA-j7 zZ!s77DDZ0$>xNAYp39G6Q{xXTKxg%TBYG8#S_?mp@1igD@AbDakXwJ4e09|-3vm9Y zyqWHP!p*m);N6?w8opCI`E0T8SL*Ln`>LmHKmNwwY`1~_B*!dsSUrA%9=_r_o{Pr+rHMSvKHBtzPdd5@ z&WAM24nl^4j?Di0o!Lqx9u~k)4)bv%$fD! zb(}@=^TO58Ftr}qspHTDEeyv8GvaR#?9|zSgUcMd=Fvw1Gd?0{9;iZhXb*hlrC2gK zLH_$H*1dg6CAojHQ$ihkq!Y8gMGK>y`!XY&_Or)3E{v2YH*eoCW7GcAsJpP#DA_ZJ zyvwn&UxqdRf;#}TzjnX#t%OGdo1rNS8|H?iNzRLMn!gIp9QtzBZCdI!f(Tvz;tZUwczI(@N_l!x{BHhS7OIx*go91{<*JOz0R3SX~di_J_B8i zo5MY|g&Df{ShUsCFBSl%kGL=DUfOKzKQO-fbLRY~FI<(Uw%)$!LJxx0YYOqdr0YYz zSJjkAS4+a?2dCQ??17 zT9Txu0(gGEc#d=TVsaYKYXgZS>nr-w+Il{Q;q=*lKDl0uBN|x?%}g+@nX9Lo_8EyE zr`o$OfFtQKoxO1ROb=ZC$cM|_S)aG_zt->!{Jby!l4!1dTDMO}_asD+N0+ePZPWnE zH-_7`@3Z{h#GO0o=*C&2p0zu?njK%@yVU*ajSW9e>)RTov2EJv-p8r(!uDk~vS&EY zJji*3d;zM1`P<8kl`Wi2Mp-j!q+`p=S3!M-3BTr@fn4+P*B-vET<}8ec02DzvR>_F zoAb>^@%ffBjA5R9__6!04P2@1e*m*nzs1SBDW@-I4&AcaJ~QZX28W-IImlQ2bRTfz zmz?nUN#JfrpL~Wkf<^HDG6j~ij>FQ8&(t;X;8<`1IPN?N9KQpOdwg(+X8bq_rh!8^ zvG?;*ou38aKh%p|)Q>Yt)r`$RPjc$UR%TF(9sVL2lE0CB3jY4ek1|%`ujRbnu=S(a zpEFh#vOjfR)>MF=N&TDMS$CZ zuD)4o=K9fem#KWpX5&BVQUvyjO(5>bo+O6Up{mj1-y|6Yw9UlCW zg7MFMYttvW!);p^Pr=u-dJ~GgM|Q6s?!iZVNCQZabard+o9wj+A6ih$OCNj9S{wFu$8uZ`L)O%k%RioK^6w zm}mbm7dK>0G!ofmW+HPjy8R=D9s9bPb@0w1pF;=KIpk|U?0pW|L*G6Znll6S*11Mk zuJOW9#hGjj81)>UY(L1^tx=Gh$2ql*ikc|IDkjJl!G&-KjL zOn3gMyF_}-AAQd%cFsgZn~LKaVOZQDl55w%z7HK$G$lXsV~m>{%-!zb!mfK-(ZzhK z*f04R-1h*COZk5|^DN=_H<_pNtrJu7ifo8J=>I-5Lm?~Ph<&bfV7WT7=MJTyNP zv2)IC-sF?1e1}`0_jhgg)Ny|A)Qc?i!d zSmWr>vXZJn+;c=ubP>3T4t4lfYI|qCJ>wOUL1WX-AHWyTby|m@>+ilGZXMfYG47-m z{x>2Q#XVZ|Y~h_ezh}<{whkK-jy_3F*iFsWLgW1Sg1M%BkC5cP_^{-}waB)Dza|Ig z7X;s4^3>b*Z^co4q;a90kNMr#Sbm(?@R=9d@U?pIwM+oMZrN?a7P!!cZIAi~Hr;C= zc*=l9v@_O)iyD)ws&+YWy|SVkt{pC1Z+md1jA0Z|SLp{CVT<3Y-66d8*1?=Jw=ivD zKZ+y*@Q_k!^{<6i7teM4Z-OJt7(3BTVrTp7{ts)u1iZ#n$4z!1GTd7O&sNpgmZ^Oi ztUn(fCK&Vs&Dn8I@SZLcdMq&V-!YN9dgsUGartwCbJRK)4}Sc9+CJ_|>Er9*tv9aR zdKH=zaC`hF1}~5;m}v~F=WM&)G(-3ha(dY02i7_35c;xH^G)Y)m9kxkKl<;?d0Gm* zQ`5lf`KN}Zz}p182^-$94evbu4ghl{>mES%&9T>gn0-fue?6aM4*vh`bwuvycQTkk z>}VD?XEwIyKy<)C@G9)|t-<3qdaLBOSND49s7?zXTProJWrO8lgB?Py;|E_+utjHz z*kD=MTI;aEM%nt;8@3I`xg&AnUCB#%moyW!>8g{yrS%x78pqu8g1!}FPJ6J$a+%Zg zT+TkR(eshJRh(ZH4KXI}#xByHzJ~mWpL1`f@~xkl8!4%F{bcg#`h96pba=HNP3>l{ zjo)YN$+W}7%1btQemdoBE8jYaEo>q`pOC#*YT~nW?qL@1*~d3to89=M@QsDNIb|>> z+4|dLgZgS6mvK+0d;<=k_zc@!Du^S`!cV@AK}R!0?(z^6XC)|Gzp3 zj8CV)n2LY=(Y^4$jBot-FQ(1U!P}|A#DU=n@izY?FbQv|G;v!BOn$trL?lSKT&E9e@Qip*U1N)g&+Ske0s!d6)X1n zSlN@sUA0eKiu}mu&Y$J*pX%g8JEwEbu6R0=*f+(Ee-~UdQj5RsJu@L4N^>n155dj} z8EaH0qHFaTM%R7BbL!lu4%}cv3|Cu!yL|zt-Qj-5#W$J-tN}4 z47iH8PcJsc=$cKxJclJ&9W%kFcycB=#+t*7jj@L5;Cu!+zk&O%6nip`c{M5aEDt@$ zom+yr6_-tl?>omxC=N|%Bip9DMr|@^O_KnH|(rnq@v5 z=^V?mTa8fttw+DKvvvZt6QJeW3b!__HnR6G1~1Ov1NS#VGwQ1_3huJadSb4-#_(;W zaau+hW|C@I^unHL4)N+tl5EFoZFo^J})m8m@%~cbVBW zGjf(U?4s^%W?*?kJNbFL%z+*Fjb|+f-^=K?n0}Yhul84LDt?mk2G!)=#2I{76x=E% zx#`M(Qhm-%u`#mo22ZMHe;sz$w~2MNSXcD1#Lsyy{V0AqK1}GKmNQuC_z&^hrjvdl zn}>acb#K8R(e_6(p*?#Zb9{!mPwq-iFyQH@i!LrSx(wRn4`5$_Gs{?8vG3cF&T;M> z*MQGZ*qDr7ae#X&X5j-XK^Yvb{$hKQ6Nm8kYH+xL->R9uZ*0b$NAK!~9O{oe8h~7~ zkWbyVw3q8>>yzgjn}#=XHWV@9qd6lASHZtNq`n08JK8Ia#3trkGHp`4D8o4IAiC#n z^ic7yL!;1TxnE2*U4#eI_-Vync;P$bcHXDD{p>mJnV4`0zW_XO$KiDj|88fzL&$Hn ziGH0p)u6TgF#2vO{1d;_Eba{o+ux$2F8r#IXy@LDkNHtM@v-y11z$7CYdB!eKXb?p z4>Uex%*q3X2Lha1^IZKFtuU_3pJUFCjWA}_pkLQaA7r1u&Lh^uJAc&}t?0!~tZhB> zvBw+*4^4W- zc^%{)5@Nr*a!Mjyze0A5EF7OYbba5`)p(?vbSU0Z1Vr$?}=&M@Ok43&KCpDykj8;w&{W!SgbYVhGmYp zSmPFjeK@)56mW8-i<6n%V`{9gFqY^fk39QFefYVPeAsgK0eh~iEZ9;~^jp@h(3syu zeA^Z7y2K*gFw87HISk`I=fGeQpD4NJtxE@a81}kAYtLgBl?ORr-@${z zcgBx#_w!fZy4UO|2ocw zp{v$wd*XK)W5|vxa(Q3=8hBe7vXc2fg-#UX-9CQM8RGkSmTCN;B6DGJr(hA?g{F%~ z=U*6_jx4>7zmfN$>l}V9UQq)NsMXr-;@&(nQgUU&xNfzZYgT#8ty}f3Iln42^~loy zpw?}_NJ$OrTg&=d{BJR?)&jq`P1`oEeQWCRn(5@}m3DD3S zX!b|YSt)aq9bd}a=6uGOH4i*gf`{$w9dw8{^&uH;#_BqZ0m+jhxJ+tz_e}{>H?KjH|iof6ZlWz_Md7Ok}X; zdyD4Fd(mEq9Pd2ZEMvanO*Mm0&YNbCyDr{TJgDb<7r5j0#Ia;cZgZ)N+WugM3mGxOmM(3k!Hi=2C$!T&Dp zOoyh5!Cfgd)kIw9@wMP@twoTrZJhJ{qqJL1JMGt6*5grrr>;c}YjG>>v<_MiZ!LuP z>723F0(&O9SfvrH`3Ld_pJcc%TJrX%^m==L;%oBL60vXLU_o8QEBqF&zM%j4ZAm^6 z3n@OWaefTFZsMJPFQ40p9&?7Xm!D=YTj*m=0js&zZRgo+V`x|4wp-=4<1W``$*qrw zVOQLUZAU8@hj-&8y0xcfq}le4Wf_b>|H9G|?-*&}J)I+$kRC z?ftA%u*3G_uV@0+rktLzu0mdE9IdzP*ATjV=>8%b20gd3?V7=z#f)EUMt-LH^8T?q zF6tXko(amM!J>Br!yZfN)VH}${uXIfJ~ezcxivl-xcAAXqv6Bu1v z{yDzqEMY^{f0&5};N{w<{{C0;-Z=e4vSQtFS;0Q5ki0O?iX1Z^ZW>e&Hcl%zGyCht zS;n#Z(7(%l`&;z?RvE*<+9;XV=`6t5T4{bb(zi6$-fiJ<< z^m=k)%!O^P`z*Gz``NSZv(|s_euh0g+>`ZZGH&DO?s3(pXt7(?zgUrZza#6*JChTo z8`(J7+uh#?_gVW7yPuuQ{5;%w@QF`(daPxvuZY24KKf_n;`OFSPdD2R4=qRE%l{!h z1^$+f{W`yG8HS!+bQL@en4{D)u=fzKOLo_z1N6mjy*)IKezb>*u;B!wr*G!4?xL|m z>LWIAPO33Ei*;z@D0(9_=hc+r@N|~$Avhv`CUVrn>mU9Jzb5w-=8peiT2~LHf}8H@o=NGyNZhW-T9$s=eAay6swhYct+` z*2H>xYx4?g=%>ej0{*&z4qb@9Mze0W`9?ZfAYIn4zZNtnXKki7pnO>|a59`ayY1lF zBDP6m?1uk-Tm56-OUG3I@;BJ)$UHolL@ydI#y479gL{;$frKa&8u7LCQBEH=KUM$2 z7j#gR;nIPt!*!ejAAb#;+k6~&{B&RelOHEH31(_PczSU(I5#aX6^|4CLkk_7xfFbR zbRn1x=)&t`tUK?fmECKx_GtROqkDd})|zvwZ27ad_t$jy6Y|y>d%?r0>SCnBbGp8# zYYxWG?w0J@O8`smKn#cjJ^x~^nCbS%4n4u4xNPbU6wQ`4dW zliL4SmT1S1VE6T4a^l_ z*M{$BUgh}vLeZ!#mzx8b+l#r+BNQ#_)Vw{I>ZLC-hjji~Pmi0Sm|S3L#kTXqG&+Tz zUj07wB%dU-YwKpv?^~>oM=QVPeS!O4dqgpq!jt7&&sR^dp4vOddc&zwiy1k{iId7gznP($=uN{76n2yb< zcxS=J8Xc!y@dMGRgj|hH890HTd!upodfvPoF6s39YpIc+Z1(%YQbSdnZOWTi`)+ z_HZ>BTvgqIKK7=Wh<*h>9Pj2~&q+70$(4_Po$ceVh6fd(XBFcAGap`aT-Pc<*V3Fc zm+|RoGn#52Wh+2bK+qB;&!E0u$k5a)(Zv?3pq!-a@zrB*6 z)(P>3a!C-J-}|%AImsjhwBP4>Uf=KU_xt`auXARfeOY_$wbx#2?X}mIO%9qiwu05{ zIV zt~3VwWF>OUd*GpTXqvH4n*7L3W#eX=vpXGAkFWnPG`7j)j#s7-%Z>H}lY811-i3d` z@$cv#-QxosW_(D$+l3GL$&4km5Jt~=lKijWd(>YU4;Lv5cl)rl-RSg3HqK^m>U0D9 z5NoQ7yLs4Gj>n6Nm}@$_k~7JkYgwoEz%H1VNMD2Yr58k4dwSXX(rtU+UpS+E^VL@9 zV&I6Kk3~O%N1tNPhe-T4`<)M>qgdJreh@9f4|bo{H^HCmA(pYt12(Dd^~g3~q23aB zg65fIBNsnfPh6aql2b4wm%PFVdkoM zQ*$5QB;Fj0gORk?gzZ2)DdJt+FAfYXA`iKI!RSuR4Z}yV)p=MmT|O$8vjQ6lTSRd8ek}1 zh91Z>9(_&v8-6-P0r;OZc!~VWW#*+p@<+kY)N?M@8#BbHKK-y@y}8Y;2KV zN6lQG!W=NR>TR+y^e>mXve=pzqh{@>-5Tvh|3iUKaBu&ettW`T5J%ah)wcGec_f>X zkGp-0Os0GvIceM9=}E&!pjUqr=x+vVsQP;|a)I>gSpMPTdlR~*k!c*BCi_DH#tj6)e~rfd!Bzj*0(VzYPiQt^o6;ND$(BnKN_8lw{kej^7vbrRdK z$F=}JnoF56eQ@c+h5q_m>e>^i!`Ks`*L-IV$PSfHe>DfhZ!`x!O$lyp zh{3NvhIaMsL6;T|_l)r_XWacS_oPiC-vM-v3eGKRUCLsPwVsX z#aMljwdZE=5k#NvpLXnhDy==o+oRIh%BN;4_z;|;ukA5w!_k!|jzy|6wU4luFCCw8 zjKttbx?F!}0{p|#;gDTo&yq>!@Lx4E6qVk40($zok!4al_KG**cLne1p3BXN*0b_+ zC>>{o`mWJA;HiZ@s**jPP{=zfEZHGeZwMf_C$G4*v+NX&QBntrM^V{)`+cw8bL@B4 z*CJxknmpxtKh4Rn_e-7pdOy?2j~rZfg_B?J)13TzzuL*KcYHb2ZVvYUvc*n*y?@2Y zulH-5{Cc11AUA=Rnlmotq{-(` zb0}+Xbn1NGk1YKedbA5}W>y$p&nsV#O!LI#R%*-VmF{Y?7Titk4AXR}^UzBB2E;(68(tIJB1 zuX%DfN8jgK8`b|-&KT_`7Wz|j?EdP#?Q8H8WV-@<0W==|(jL9AZpr7%*5!+B*?P{R z9)7eDezXpM%bU^9;c2Gc+28D`cj7lbzutJ-mcH5EyWR)fdWTc*d%lDm7oYKE*X6Ru zlM^UAH8*Plej=WPge=CS47refWh1|L(AQhBciaM>$A|L)`Go!jzx6A8N%dFp{88h0 zouAYm-sT9gc$Qykh1$VkgrBJc+3hrd8xzWPl+)s|#yH-BwE`Tf+JPQT`1Gx^jv zcIRPi1+(#svGC*cb4HW%77zG6$j1Sz&VO!jv*k%0fjl;~4wyX|RfWHMn7Ny1O}PR) zhWy1!2L`){%f8F+8G~ML?fWtMQ#E;oPiSpjrO&E(8rTF=*2mGQ2Z8^)@D_iZm6$!A zb1mrj73smX7vQ(L8d{lCHZeCFTFLf|Nyvs)s5>zodz9q+GU%p;vgd%Kl?F#+J{L!y zf}^q%;Hb0@j+CFi7#vBrETf-&b&>0#7r(2EAXkZ3R5F%rz&KIwj7K%^8c*T(D(drr zlLlfg{tQ32R?^#`n>~D6@O@+^{A-mqkN0$IY{yp0){YD25i`lqeeBr#fmh@2;;w_( z{e5wl#xO2>BR8yuUjY^=$b zHPp#hbW(RdZ*0DWCz*WM6Ycu0>&e#%&33nYy`8Vr&F4S4yS@)%^94>e_0_UY8Tc}K z^1U6KFVoJ~%>4@{-^TRr`u-A|Z&SLdZ#(Ou$(Nhao$uAye0dorUxc{wCf}=>-T7XQ z&9^JldE($*nI2leE2=t?S5%|cYV#V`Mx~f)K^M8 zEY0b|?0LF;T6F5tfoSww&;|VDBY)Sg0I?`hPzZ z2rn!11Pm{mgMY&g^z|_Td)W|il12CoE_hl#lDMVD)|J9M_e~Tv2DhL z&tnW$`mDOr^kA+ZpQb6~Q~Fm(uf^BiL;g&Bi!<0a8bMmg(dZ_<2hmBIzek*4+7K?x z7_rv6>xk-@6jL|xiLgQUtj}MRZw$faVfY4X*?MdnF5f8NJsW;4TTT)>sccr;Z?9ZC zvD&J?;LcgE-|I>G;dP#qJFgoM*EyGb>7Muw`F^Bx23z|R*~$L8c=F99U(Nt;Cw!N< z&Hj!>gVb2ZEs%zhS``SPz{H_D@Sn~&(_*y84mCq^=B zeZ;zLy5&h4p_nH+U*)kLDBML}%1_b%Kl0vqcSc2%6aUK2b9dqPm^{CC@>Fr}gWy)1 zDr52A#bx~1;M$3tWhoUtIfr7)hL%~gUItEgY$D*a@7M@e@tkiAOqy9XJ2|0l2lG!f z<<7qgyXW7;cz6C)c;e?xM3&Gz<$gYMp21&j?MnBI?pVbBlfi$o@s(W=NVYa#1Khcs zaWd(QNB#h7Sp{o8W0)ve^v9mTthp0&bD#s{TQj$z%LRoNUovR(w{rLwniQ0@0qq1@ML8 z2SdOC=TZtM((b9WH|NgagT?rOFG0>beL&owJwKiQ@~Ri+ z{~kPT@eJ))&)o9d^Vt0H_dGtoWmVJsJ*yre|0DA=S3Sye{e0_%4fBUur<@FqJ4%pG z^9S7dF@8-&ANq!bC+)j)t@`ZYxAmmB`F9WT3|0R>NW63HH_E^5_1N&Gt9V+g9;5!p=YNPV&lbn@)c=0pP^U|`Vxlw5O_XF-v zd)myWA%T0+)W!;jgC;lVa$Hxwh}LZ#OoG zr$*^x6XjhR^3u`fuY7TCrIlFz7IAL8 zrHgZmEl>F#bos@WckaxigQjsdt$sIsT~A*> z{_gvDnW@sN$ zFwLgFwb*`kl$Yj~4YZ~hz8A9w1UNT&FS3O@PSV?sH%_i!P&~90VVwLi{)V;A*)-W~ ziT#;m?`3he7k`1iZI)p}UrL*?ecuS&EwX>(pWEz>JF~z20Jk6Q_zlS~Bxhl8GyWop z6Bk;WUn$SZ?c2_dSEBz*>l zCcJ+vF%Y27sbg*WoE_)TC-b{m^1SQ=4t*z($Cfhp$CUvBv1U3 z|5eU7xA3m@)RiNi=YL;X_!()U1^Jtb7Fa{NX@Sl7si#xk9rubaFz$1mai4vhaUVf_ z%dZHABGavU*75q%Q`pysFNAHaCz^7xWNc)|(P@MoH&qQo>8Sq7NIMgwKQMZki{>?D-~~fcCogiI5Go zPvnm|ud^DSqqE{=(3yD73*bv*5B_3* z>n%syzWm7kQ-7y2YUjhAc5aWU=Vam0sYmOr^0HQT@B7EXEZF+uOa3eWN|`y(lIv@{ z65MZsCfm>{r;)BZI=jc`AJHlAvd(OA{L4kFqwxP0P9vOG7c5($xewMN=gWTzJorBc zme@J&XO4T2N#$SuEOnSMr+(3jU~%Cn`%84nOTf|Je61n&4tH0W^VQ=h^A>W3VyJ4Y z{C{%&HtadvAwQhYk?&rCr%Arsj*o;(GqH1c0{y!c_>e36z@syNDdA$^QQKLZ3lXhO zarnnP&hmX;+d23os_m?pwqwV>jJ6~F(D7ZsBL1-YIQ(Jz?}3GJZvPzqFi*Peaq#2% zR^%Wf)Cxbx;Z>IcV_*7|PrDi06U^s%*gZbCZT4lL>wO@`AhdF8_GWB@{UZE(VWMR zPx|)1GJnM*`S0WFWdjqv$!>|f%l?=(vaf7*_}Z6@?P1!mg3j+jE4rgmwn)X~{%gIvuQMwA z2Jh(>el4^mn@+no)%5kZ;6~>V`r<}&_4~*Lsrb3ywwQA_#48H;>@q_%2kA$?a6(r^FQiS zd|nTFf)_mjU16K<5M}Sbs7FTjf5{59#mLAX6Wi)$?EkTT8`NXR&-Hn!ZvgcTP*LXb3JoF=eXNJ8m^Q!g0=_YLjX)X31 z%&S(`87A#!(pufLU)yO*Nn`IXyOK2dIGFl=Y^PmH8ouXtTGkYkZvkoe5ZGzNTvR_UAg$HjZ+X?aV5&(wpR_7B z?O8i*CTSUN+I7=RzH>=KPPFTL&rZuFjWh0cTGMos?<~?R(%vA3<)#}))SnBz`EN@Z zL2S5JvpuKQXD_mL<=j{Bg8V7YOPqcCdTc5sR&e@M_9)IIZ#jBICVL$Ru4E16H05r* zEXlrEJSD5l=Be{Fr99(#*7B6x(9Bc1!FHaqEk}6D@6=EHu5o_Sn4z96sEJAC><=>HIU*pK|yh}$^(lAb&>wSuP55&~F4c;mJdb>9m`UL-PyS#XbG$F#BHcv*K9m@EPDNQ4#0J@Shyn zfvjD${NvrZnj6W)W;>Lwy}}f8PFr^fyag<)NPh>q`!+rrvKg>{_1a;ct>7a9-ZYji z_~@_X>`t+7bjNMp?>BJw-ZG_a$-&l38<9u2TOlcRdH^{6{H~(TN}qRG6?rPTM=q1~ zXD@m7TB+083z2#8OXy-POUG9H3gr`l?+Eq-H%&fdtka3TV8t)uZTtt`;duqH3Pxf$ zEl`}N?a)NyAg}RxQM;YkZ)GQ%0UUx==fNaz>RsR9-9NCl7nUUl>-&J}#{9P#@!OU~PkkG$we>SpALm z#d`%iGH*BRz46c+_9pNkek(Zt;M6Nv*W=?;o8mS5T`J!Sec5*{LR&H8^#L&`G+vQU zW`>Agu{RTWuF^ZUqZQm8LzAZ5m~a>K;vL3nw^Q~e=7!-T4oy~b57Zd?5=)axi}(ui zws#Dg9+}TR9^dcar@n6G+g`rC^4_rN#qfmA>74)O%-pB!jUT{vz8pQZbS1iX0k-G2 zwCA2`{Bjg4UYZ}OH|s0b zltUBXli+A1=f4iAy$|kq-72u^KLn?T$lp4_>S{XM+Gy4$eBl=Jy)B)1czjnKvlah- zPyzmP!>6|agWYy&ciXMBEkFL7X7TMD<}U5CFjFE9cgRfY(aTkd7?Yi;MwVX9NdS&rlwH&m`@8;=hcwVxWn>4#WRk zJ}e$w#d)>9{MR~k*Xsx1!S@JnvlyRA*2YWm1@l{}b%9SSUymowUu0q+R10ja=fgAe z@fE;MCx3+2ONj9X9c57V(|0`~tuvBWrnpkaE!!|!p4Nnaa*Ag6hO*!%W)bJw8rZKMK>stTd57=c} z^0Yo^E`37&#c6gsx?B1p@*}@3`8+IpBtKnpJ#FAuP@IRPcb~7XSsMj z``7qf8=i0Yy?8tAX`W_%HfVZjpLv=IFV{RR|7_SatqCeqN12gY4}h<{;BSMMewDUOUyzl~f^SzIvj&{ii*L7duK~p6YpN>z z?tS(ekPrX+&#nQ2BSW@UU{c+;QFq@pKzS_s@I_&@*8m&VZ%`j&JXNp?AF*qI=1?Vd z%Qo-waQ`tpya&hZ->&ml+?;y^xuq0aEybPYti3Zrw*~yNGLDe4lhUqQ^MSYF@m?cw#pSqbw{ zc>1EeKHrznA>H73yk6hM>nA}IXTc+X#UA#9=!d`1UN&R$?!CeZvIS3lf8OkI!yoyt zd_QsW{oTp;S0`Tv^~LrhcFj_~LG;O2oicj=9q%Kl|KCmhso@>`SN+m!W9x~!{n-Rx8ZeM$feSebf#cD-Q@Sj0kd*ZEf z)(QQ|yc;s~8eQVqw8zd7gXPuP3Y}>I$I_rurypla|$#SE$j*NS~fiv%% z^B)r~W&UdoZNazbudFHQ@D{zNJL^TWbc7Mdt`}A0(R$%weuv5P3OYh4X1xf*w`3D= z*8=GVDMmNwfh|_nyu^WznCuIptOtE{gG2Pq)eQurbc6G$U+?;c&7~-rbwPd@SvJpk z*47PdKa68_1N;}7Sc@5(7IcI36R!(SUA_5NU!E^mf-&pD$92c93(Awhy6{C|wbun3 z)_+i+)&;>TylY*k2G7zBI^lcDTj}6N|K0UL_-Mr!%%zD~ydOaJG%`N2S}`(yp6`1N z#lF;zAoz_yOQMZd){6s_kEJWS4tpJV+o?l#fKKV+>_wCJs)hpl|u%eU|c!=@9fx_j+VA9{~vZ~Qm0#>?iZaeNruR1t$oV-jQy5l*)8treLu z)&|sBd>->|7WNd*2Rink&%5Vc^N@4Z9eYo)iaz0Pvz~j1tZ;3dwM({!VZ?+SV?9;) z-h>|Y#y?ycu=utY(+e=?#}aEcvyOO24kzUEP9_y zzRNwM>vC4C=$uC$$<&MBsnd~r113J+;JWOiSH7N&Ej@oK@vS+tGWXNy@SViXbob0P zhJP`5(>xu;8Y`Vp@fZy{nfV=zKsm;1M2F^v-kGBNh+X z1AKOwQQdWIIYC`7ICZsRhnS7dB0TLyXAOa)Y;eTclDcekqP^&EL#+QOOTK zzq0k6V7{PBYa+00Iai#twG-*TtEAn`;`hzsV*e>W~-Y1se%9k0(R z%j(qrzHsB>$)%O^fm?PdJAHI7S_$%P9OEi_DM0QQ?fV&5|BBl>)rQ~auS;VruoJQG z>(b$9hj!A!XBeD}?r23<8OvUoTRuQMGeaNLk?lz%z8P^KR4=rj?it(hb@o@z9rP`P*NfgXB2~KaovUuzd=R9D(2I*^@ubfpc_tpvjNz0l8wgcZ~7j+wHaX zRr`L}uoyn8GB&=jmzcb1;S=$Z{p>IH#z{|G9h|Wj|7kV$aTjMj-+$xOFa8wEo7CPv zs8_bx&$}c0M1D)U$5S?N@yYm1?ydF>#6M|3xD$T)DRxTjIZ00_Wvs_CCJifBbj}(1 z&9!@p*`MQyubT^BDxPEQi!gsD!8f1&b7DQp`0yX&XI>E)TvviW&pF^c5^Y@Tw>E!_ zEwau}8Sb##*QWK&=rAeJnQ%G=o|KA?GAjH}Xp=qOhl$I$X?8_$?Pz#MKI_{~=Knt! zM+H_fIy`zZx$vFA(;bm{6yp6LDe!IfM zXx67%`mFUxzWm~~R|>wE{s=zqd6*_Ys3@_&UIga7^rsjY1;1dd9gsR*@Rt$aK7uTq zh7WDT*Et$#u zb&ly(r)Xz8zP0noXPsn?kQ}mnH99o3u5W(Mh7==@v>=fY_yOUGU^y6ld%s)x0hbp@SOb7vd+G_k9vaG7Ygv_lCfjI6_;SA5c zx22D`@3xaie88G83^>n#{|#r4Il$OAR!y#0=JBoB5SJK!e0Y5Ww*Pu!?Y*vg!Ew># z6^lAg8S%}vFIz+FUgo}(myv^a&$jkSZ=3L0boj{fYjWT9kF0CuPO=SF!kWja=R;t7 z!n_X*>$xjqSHqXR1H$d3ZyDfSCZEp6fOpv*J=vG(Zoj3zq18$IEqr?*d$E(tgKKvs zCDxq+USBRP&z%g<$oCE`Z$XYzIo0L1bMqL#Jq z*Z$z)j4saoQeTH}VmQ-Es1uHUO?lwnD_!E0A)dOI!P#%P<0Z-+FCNBi0=U$EjN69b zImPR#J7vW+oiCGbx9)<0hrcYilf26`K8>l~Wf~uqeVMyseq#-)`=@1>d7Uzq-eKk} z#^iCoF?e~Dyyz%j*j6BWR%#Y>HU%420=|0K4I+cAu2{RlpJQy*5pd_)r@vVnw@&oo zub7hCN_vm3*khZ%A02%RH0J7eEyM}?8@wzZ{Z8-s@Xt(eqk457ZU;7G|AqE`&T{mz zM)GZCJ(8}Mbh z&z7DMTbFFl$dHMB`=UFfWxKqRzOY}tpqPC~*`Buy#Li4Tf>E~TskE(rwW0fdjk@(N z8^hVy7>c9aHiidm8^hV%>q77S$27ycKl+YsW5{PMknLG*1bJ5V-hXuJ>aF88QpVkX zOip|#O>hKbYz%*A5kAiP z-#p6RNA#2Tc(}H}wZGz~T;dBXTZ!B&`bakW8?rG=H%JbTgZ{K<5y6)8>k+dL%T_2` zu6@>CdwR*?bn>XIy*FZ)O(U&$yY7B7Ww1+VUF$+mbZNGFFY>)JwtA01i?#2u??}CU zX?4+i(V~USL3f=8fV+rMIuc;W9gN_zx8z}PSTOY9MbkDSOJ@!G#`k1%!qA8L9#&&}ScC1M1KY!3Y!B~a zJ7&M^+25}oSs#y%v9xqi?z8B|i35Xqr+bIx3>#?WDPDo>5L>u2wFw)F$AV{-$hM?#uo`buHK-o}s>vJi)wuoJl#uJ3Qwj z{IEKZ$xS=h{|3_5%T{8}K-&5cTjhryPu;VnOLK>N6LX#|Jw4ahAgWHU$iN1nvlYeI zEM#xcepxKry|m|*A{&RO^mGZhPAc=>v43_Hks zJUGsG^O}O|U>Kk^AmXD)F4SL76QNsJfysHh{9x||Qmo=m=@%Ck%6R0aH z7_j%gO4}RgkH+;G`m%;H`!D(x=Se)FqxjUs`rc;(^GI;_31_KFiGSq!-!H;efWEl7 zl6Z6_^fAc3$JMLgE!f)iex=^Ajq3gDdOy>CU&y=cc1KqhaE9f@<^`+blH7OEW&}E% zNnGJMW?$D6-a&cYYZcGkv)VTWR@?ybJFu)g;xFW4yPDeW%M8OK>r`InZF--z@rSLr zs=~k5&@Q%^TJ8|HuHSlYmVB4nbJ^djo?&@fIMz{huAeg-e!ta~USj37jknJ{4k1Q$ zq_QQi+3K9`p2u;|;8S8>Bl?g6`hs5Yd`(dUQDgx0Cb<#8Pl0{6}}(n=h<1o z(9{jp4SwwDmvKk=^WK!YXc}?lusOW&?RyJzFZhoeFaO~F!o1%-ePaZBr1r49qpe-* ziHmp|xNRbyvc}KvNu3)wBltqLXY^d`?>XDijV-zi6{@tEYHe7znS)AGKhRYvZ(HE{i0hj722d-~AaFwu+;Q_AvIBSE&y9-n2 z-Er$O8NbZ!XB#;8@ZW`TF>T3q9}DAo)VIm&CvS3iCf_;lvNjU^@Z)sL-@$zsMUu(W zud?<%0v-<1uhu2l;;m72(|p#ZF)?M&>Qi<$d+l!77nvuqWiO4hHhqb*+wn(EdH;^J z+IyM-E+gK8pBx3?JKg_QpG~#pqnoUf@}@f%~M1 zRX7fs5FVQ8b6%hNzjT87pN^?NK;8M&+fKcEpD73xY*=mTUhLFePu-C%E$qQ`PHP1Q zKV{wd5&`bg49*MPWUUZQv{A>mZjL^DU@7qdD!1mQ^hy^YHQy(=E#9!=)`B$kFt{MHnHC_jIoGT zTA?U@UGR*gX2yM#8TV1)Ujg@a#{IyfR){iPD&wAWoJ;%cN$2ecmTK={eDyW=tolim z(Q_;F`m4NG8~U~CC-81)KO)+<>c{gxQXI_d9@B}zQ09h*8fI!Nmss_$ui*SPPx0Jo zCU1)wBZJq2(P=tA)QX?PclC`i-+mx!#=MUIvXwu^^T+(6HP$Y7eW+#r%jYJ8IcQyw zSHFIwwQCQs_^?gK@mo%xnzt3d5V+yq!q7{P-pHP0#ruqX_V%@ff&EXI|4+lu%3k{) zg*&=_Z2nh)-#HCcg`r5c8?YGCYA5_0?sV@vv*!?zT9y|LOm`!@zu`rnQJC2jydTiS!e-&fEn8f15q!%4q zPjr@#^)Z39elRg>e4HJ%*T-bWxXCxRv+w%YRP&>L*T;>ITDzL~t!KSG4_is|O@Gf7 zzi9GZ)p?|v@&DxNT$d(8Pu&^H*s!jl0GizY7Cr?07T*+o*zkf&;03J9t+U`I7j1@|A7B0Z+P3pVAn3?t7)0hno@PHY~PWyfW`OXjMkcaX6THA&Ly~Q zU*ohr`v`M{vM=&qFyO~Et%);`zv6#iSR{`ed;ZrOW}b7$6SyA$PZ(r{*wf;C9sV1{ zfO(6#LM)hlk3zf6?C)5}0<*z~kr|L(+1EDv9f~d7iY}MH91bw=khPLL@YjvN0so!S zjBJ_SZtvO8fj)Biu6o;<1M>NAM*dt)Tpp`>&h72Q`AYw#wJc+F@OF#uA(ds`t_K$7 z4aeJggHGOJ^1>&JLfm7m?^)FEeWM>2jV0Ue-V28 z-C%PUQGf7=ZqsjIocsm13+D7+=nnVQX&PJPRqnnF;`1Ho&>n6!Kg%=ca4RtByXeWK zB`JshJ@57oU@0bV3HS*z-m0tElTgtHohh&C(_AZY=B?`0 zJZop3>H8O%x6fZ3eRweanu}bdwk_tK%GMefX5SuhVF3QwmNlL4Yeh@j(M8Ik=L%>y zfbM}$DEbfgbyK#Ke(R2m4Ei*$?B-m-wVpI$((EdVMmNRNp5Bva)8qqYlMj3Z(P2$L zcrq`T|M4B?1%ihy>XJ^A1bngSs&_Q$``NFKO-D~C(s`b~>Ede>q2UVXQtdR+uj}Z8 z#-fV(yp8|Jj2l`xt6e8Q8|DddXp5GUrPn}s40!q$ zc>JTP!x>whBZHr<-O`(nKH%`tXJh$j6Jw$B+kjCpRP)>m4zQyqSa53~HUoQit1XZS^SET;kSZwWcQ6#@ajeUzFCFLBfXgU z7Hfm3g?8l+CwywI$9{{>Qv8j9$Q#7oSm{TG^42R3ob&~)ACYay!N^0c-zj){PfE+V ztnE+lNkFzhme9FPoiESuEIf=%Q?wu5sBx&ZZ@lK?cw!lX|6*{?+Fj%aCf4d@+~;TR zam(bH#oUJ0D>UCEw~Nj>EAyKCVU<7Nlo5aZBm7l$W^*53Hnf+|_-l=qEkJWu{6Xt- z5Lh+#f~%Tx5p+-8A(WwYM0AmWya8P9dfiJ-pbp7Q#MZ3af^Xf%Giw{fCrmlo7Co&o z^fdSOz34GtN}PN9I{eN?dKw!^Ab0^}!CmI*NQ@~v%`1bKF zpLr=-!Cwq~2PzJMH^Hks!n5)T$8UmT)#LY!s`G1pgJX>icc2*@%XTIlzYBaej+0|> z4DSPfmEgElIJS<1W8i56$46Jr0SB|e#rf#4;HbvN(R9{b4>SM`zjnMlYQybs-|^VW zcvy_b@yhm=kL)>>k3VlE=iExqtZt|xcElpDe`o9*Igy`oblbpP@=HF1Uvj7MOSbDo zwtcnR9_CByY+mBsmAuHR-&8ZI6M6RmYsHJ_M$lI zLhqrszsQ(!&bQECW2et#J^2dbK4%s38gphVW09WBx;>s}B5SUmL+mqHmm??qd4gi2 zj1Gq?*-4?GtiR^8|_D-_|Zx_qV% zue3J1Fl>W{0^hOQU+=Wvl3}-Rp0wWv+@pQLuDmbV?XRYNot4zuwjNyREa~NHd&y0q z!?Y2pWS#Q)D;5KPg!cUl&aNB1sCWC3yKNjSM)uh5yn9*W4wC<8gSqeP4+Wt^&~3yy zzg7Ya@?rn`UFcIw;2{=qD@psa{#WWhXH)q92LGFUmvnL+s`tC_Cbpby1F_|Nm3eR;^I#V9pmz-G_Ivu^3EyOmQ;VJso;s07a>ocyso{L^ zB%eLusSO&Ejcyot>fC)%FFcLIH{Hck>c92IlZ%^K>;vrR)4qjYv)VULr~Nfk_?P4=C8(v_>3>~gT>4T`0+#V=E|Yc18dWW*UBB*!LI4}{K|gy7kFDO zx`6EJ_FXG!;U6(J%H!v3N9{o3O=9CZG}GEAIml1^F15Xy{$(ROY0b%~UU<9oJMq{c z@@cDCbBOU(H!CE$)X5#Cc&wkS* zhQ1VcXmofy{k!l^>~_Af9o$h@^jGXuJAf;jwfz;^pX;onTEoPvmr%A?vKH;vA{RAh z*fz-Rd@Yo#_z!HU`X@Hol9Hai58vCH$VNnyfhfX zr`x>Gfyd(Ho zr-$#I+*>(^XM{e*=KH#n?^d4d6Q%r#IKGPcAw0$A)4qi2yM^avd{=A)!TLL$kKrAE z&J@*egHh6{)TkD_ifKS4YID6CY_g&8S%;S{b;e5~Id*8ZW zlbyCFw$`#wZMi}F0qN3}9Jx_47yNAB65vpbk9p90 zr6b2k1{BRd3QW>h7V(^e9D@#DA=yp-qdH&RIz~L1vv$-mk~Eczcgjk(+0Oho@k5x) zbD7K1AB?TW6Q6@!U|$5A#aSMIyjzcW_toIJx#p^4cy~2z*m}Ul;Qk`UU_R$RFGS9Z zGxj>k?y>Y$^+r_Cpu)H-Aydk<2&{PALX>s*!Wa+2+3rvslG|A@|4>UCs3@ZQu%r<0B+ zc@cUrI^Cg%8`H9Y&#{m7tJf*6YW9U+Ot))4fo_+m{ndfqg9Oz5B;Rm-(1g{v`?-yF=v*kZ(AIC zlWe^NdMkD8jvn62up6rHL4H~XWYds*Et^Iuw!qyZ(0`D#)J|U=Lo_vqJ)knj7Ac)Y z`O5pTMOu7oC(e()e;%%90Za7-_@_GhGkeLHCT(2x2Lt$?roC3h*<`P&qZT?Ug9h{- z0Cvtw?K%}3MkD=_J!U^~GaKovg}&Y4O^Db3r-<2tUT*GG(zuK|5Z$CZZ%Qe<5t>@f zGeLF`>JXk~lhB;bppL!XcJV-DsRNkTG8a4AnTy0#$PvG{lB=NW-uqo{ z40P#Jhp!!g+;pr?B^_3K@1k>LuV>wTw08FCMzaRB&$8;L!xIl7C%5~pk1~+QTWhLk zWIM94_(-6dyMEw_`F(g|Jm*^GPE9iYdlz7jaL*GaVgFk<+~$dYV@``FO0R9BE!on3 zyj*;-Ae6;fRb-oRJ9k3lcH1;k!-{Jvn?_fK75W)CoJGDYV*D6g4BRzAKf4`WEgihB z=YP7-*Ks$rcPi~5gRI>hLoc;Mbg!DRT|W?ObD0ILsGe=mvr9Aot2UPc>QI}-_&4LRct`d7<*NvF@qCq<>mMq2YEpESd7%2=acL8g;@!u;c`6S4OxXYvvA z*_ELdA^V9JONKgfd)&G=;Pa*3a-JP~bsS@1>Y$wJkbdgQ^Y5%=T}6+Mr4iYS3y`(? z>X3)8{c5+KiVfkoJib;ueqk_lkHh0v@r;G@HQ*FHiZxJ0`_Y<#&~Y+!Jc_j>g*7FW z^?Wq8fr0g`<xFwqv1mM{BpBM_XH21mbYF7E5^$KTnC$Sn zcINtCd<8^{@^^x!&3Sf+9g{GVpgPd*KTi zqk(1}-r^nAA^LoXwIG8w^nWXDsD5|9`rm0ow$unbz*~Ff|o}xwh*w}K!=nnDx3uq(bIDN2? z_oL9m!OMP0Kk!#tNnG6;?)I{X%K{H_?S5^Han`DWPQ`1AN%6kBO6R!}t&DI}sJl?tr=4lBsRVH(5rpL4MeEO=g+rgLS+C%*M zwk?@Z^L04=j%{}_bQ3#I{O0u>Hk@{{+T?z3P=inBTrW^LuQkKaO!gpVyoYAV*TCxkGF(yij{2 z`Gb)ckXJO9b3B&TC(c6?CzZK8QF1f$GDzPwpS50h!ejR8TpMT7mk={OfjgmcJPCED z5+l3nUGK2>srLi+)Fiv#n`zbmo_x}Ot9KP2$-Fn?w9{vJh7R*u(@&q_8+y8V4j6io zc@7%7)I0-2ziXZeL!U9vAw&OQp2LQImwT0Trt!XA>A9x-XM?8Q*QWOYLx<^opWX)z z{g8b3(U;R_1cv@W@Ay%kK7;!79?<&`Q`db%^ge9p9}bhAXywTt5}C`$g85ynhlz|S z^eaWfBT;eHPl-ib$*N&L>$1_Jn{{emnS?k)7g)?Z& z^}D*3yzRh}g$*}C`CHJ94*3DVaew6r%TX_oZWYT4C5pHBRm+UhQxZQqg zu6~(5s!SDolv_Ci$C_zuDLO|tmp1#WrwLr$!g%cFod4alW%HNhjy=3rGN;_Rt~%7l zVA_y=J(#vPfrB>sR>Jt|`xM&HnatlOJ z`UzjySIrY1VPd~$@}3C}W8380idSCxb|Gs-h313&zB1wGv3Uyml=(b3?*z`sgo029 zJY^?vmIG%3{Ny^|?2w+|!igV2iQv?JHqQjny}t2|ZtK|hd+@{e6~6^m?gp$7-_p94 zGkFcR{CLJN9(#OGUithSyeyvaiS_$2c8?fY>7-84vuy6nI2M1#zJ{DHS`y8~+Sk%VXV}+X$RJLe z{LTJ^AJ2HLNh8D94$YZ7+03;6qu7SFESl1>Nb?XL+PBR6++SvJZ)|AxK1U;XaAfWW zT-{kR_j>5F#XBc~3ldku52+vg$ww^J7}3kp}@ z|GmZ0wMVljfv)|kbnSqAcG;Ua8$YoK>%Z(TTiBDB;i*k~9sVZ&Ypr4DP>1%WBg9lj zK1pi7s9>}9(6mmrKWgux%^^?9m)RegSFqX7KAI^r&$C(OvhdL@q@1x6)3-L{1=*l; zZ)GhezhYywE)h*AuG;9Z4_tSiZ`+6tJyj5z=c&TiA)%uPABQ=fq@59OQeEUpPe^Im zz@ReS#7hmPeJF%p^M6L`TE*%7|)>GQtobQrMw-tU{rWN%rMaa^RB;_4B^4q3a5^HqixbYj0n7G_+Hv*#l-A+90}BOe^RTS$A~! zN0e=)ZTU{5BSQp9f0#69U$ZxVKo1emiq-^9xVO2N`YuTf)Lm9!)weTGWRulAxg1|L z(U1*mvVrvw`*Moe*Mbgr@KW1O)D8}0E2@PbwF7S+x?g0%tPu00Uj1`rU9-;?gP)tg z&s5^ubdMdlk$m|B`tz3NOibQV(FizG+#h7_LNgwp44TebK`f8dj!bmPpYd%NWur9% zkV!_u6O&jgl36PTbo+Jm;!icmCHQ<8xn!P^OP;lLkkvEVNPpu7{D{TR9-k$f)C1Wk zoyXk^*nksj8;~{d7s!88PsPYvImlWkzAfL8)_EEmTgEs;V`Iw}y~f4^&Gq=FECkjf z`1d8qwgZfe@pt;`Kci~1hxCiaePR>ZGn# ze#kL}C*#Z0gl#Ex33IbpvOD`$cUR45?Av?tqyMOz`t^yo3k-L{Dj$#*k)%-!f`u|mBGQa5#6UtJy~T_bN55nu6`!I z@;H4fEo5KYX(NfaSWVQUJvZ@#2Mzq@%*rt|2d%&hdfV`iEPctW??KA-wS5_zSB-V1 z>?s-0h($lF8_ub}m%NgL7@JKd?cQ_hzvHBpupXMUADvTwx0BX_-Or>wdrtkGZW=m^ zG1L4y zz`R)m4s9JEIeaF0TY>kh)Dz*mY;yVJ-D7yGe8p`3qxLQkj;U)sG*HENjfHgk5!4~w zHMZWdq*>ghDHu%q&_Nrv)Mj*j+1>iu-2Mo?m3G_3li_(u;j5@aW##YIUm0U-|FU8U zdQHqc7e9hE(}C5{3oshFXV*FPT8{%B))#nsAk)e-^;1WG_4~o4=8>20HeS3PZ(-k% zAHhGdZ|go9A8mb!p9lO!eFKoohhpO$22UCe?@EM+jey^d3?1DTN37t1#DpBc{$?C} z$-YDEF#Jh4>cyXs5f=cz_CU0^xf?ldE4CTk46faDkskUr@*vw zh&4rW=44(eCJ2x zS6yyBZR{mThJBv9ymRSS9{q~d7q4*o()9(#KyZ8Ws|ugteiHdd{ggY*g!k8JQ+VHs z3>5tuXZ3gz57pcy)XW$}?kZU;S`baV>&4LRMfeHK$0lU2x2pdG`lxYK>~zt+@D|(84EmX2W&H7k zdXUe*OFUEf>r3ZT(QkhW?tWS`;sAT z)e)bs{_G`I{m*KK_pIZCs?M$Qv5r$lK3=zcEBbH>d&&nc85~N2FE*nOG_rU8DEKsX zSDww<^Tr0NIP1;qck5ky=Cgoh6!>ihMtmDrs1CD^-^4mTp5N=%3c-cz&S;rd=L?5Dh4ZjoT`OK~;oFVx z1U@G<@Z|O-J-*7~;W_x1%w@l-n7aJ04-j^x=H&%E>ybx*^rkAKkIl_w{^l9n^AeRSkZfz&MsUd}Ik{K=DPb z#TPZi9twAZ?hI#C748Ke9q0+4J`)J-1uqAS2Zi>6o6dLRLVIhh7415MOC5W_(Ocl? zZE*BXjVD&>fb^CGXt*Pc>rhJ&yC{Jq0gOpZ|n1 z{e_f0F`W*APFb(0yIWtcN$Z54bg@pqY6Z`!e~>zY_!VhwHhgq~H%~UsGI%Wat{Ywz z8Sl+&eblCbhoEEPp-y>Wn{SuN{~b@>YUt18AMfO^@#Ho0UHf{QeTlaJt$d)6M=fyQ z>hsjE=lj9y!4vXg8!(g%v!;yXnFep#ag!}~{D!CO)8YZe3DB=Mcz8DBnCxjtYw|4s zhxUHo#gv;GM?7BVtWG56jBzX+l4ni(%rCY7rqljbwa7t=9**aq?$P`I*a-w^Bv!}^=9NI`H4!dQT^J#NTiLX zBhgKp1PlF6pKPC@DSIRsN>;2;N4*B!I2x|*1ZQ0l}zy%b&RBr zD;#+HgH`%u6Wt7bjNb*zG%_?)OVQgoqRv9?|0%@Ji+&Ob*T8*;{rM}r3cUUKD;%U9jh&x;8I6aD+d1o4o42cf^`1}1 z*u5Wc_qZ6h`g5VN>aSQM+G#`U_*Q7)a^4NTzlt0L?|cqk9w1F~P~+U{6Th%^%O8*~ z-Lfx#@KDYjkN)h}LCR@74$^1U>(*J#IS;L{t-uvOtp)-kYs)z{^8IKLY zXLS{DU+4nh5}umWKhponf5C72q4YnlSJ1~^@;2opZ&y*)9#>o5ewKWiOMYBCF$^@M*JTfS{uaK_%YE&S>_tpirQ_z8P+ zQ~pZb*y%T8r`o48A>ZY{`gR}BMr1eH`cgb8<+H&tu|gYc8-uOyrBDa7yD5szM)^v; zR^A;>dF`q7rH^W40C(*>y7E%`v5@{0As1nP*yhMX3BeWqI|pClKi5hO46#P!PvYJd z=+i$WxR!l}2eJ#2>!+_MeL*tzbY$TYr#@pwSGUzx+*_!KYrfO$c9e*FuN3w)aQ)BEMwns z#-Mga`XH}uPvpC;_oW&=^Y_c*)?qW<7@08J@lTg6IyJnXK19wJA4si>oNsj*x#$yY z8;|n41m6?td20!4{Wc4k$X@IBVDBsX1fTm^!7c>$jh&Q_blmZibf4eoxaq~#zQxRu z&MoLClpW3eb)D$Wx_dJk9T19YjlH@hZ??B{nqolsXW1Rw%tl{o6c`$QD%AxakH&wN64o5^)psC9437eT|b7l;jdFFueS2AN#^yp4ivy+qq8!O;jg9+V7^gf z^cnYs@%|w1hBmQXJOY0Z?=f*}z|YOJv79}Lo1snF(`sk3muu)SIsEUy(M1!`rtU4rfzg6TJVG5|ezAT$e&}FiQjjt8hp*(D|4#gbFYy1c zw_o+K>u}$7sn5Q1ul4ueDxwcRM&0h3a=Ksu_P*=v7Sj8}@8_+v*HBLD>{zGXzU%Ba zNblcP-FGcH#!lU9EtyC^1h>|cUr}cUba6gUL(`l=D_{(fBc{(JjlImx6BxsY`1_Eg z5$TD+Wlelf&kWkWIoZKwTh02A6n>xl!j0z0Suyo(MNW7Qf1tN+&|TNuHxK`fwRzt} z-v02_#@t07%hBHG(Ur&Ayr)plE~ienKE+6VpLsHxI*%;v?Uxs=8OK~X7FYYBJ;wE zj&>ZmqQ=>?;4V|_OAalpnV~)#c>}#zvB%oT_gJ|@8+;S&P5f8?Bp2TX9yGo|#`^|f z=%#()%dsU}JSxDKdM>oR`suOXCf8A+II8!@q z@YQ!2<0jJkn-AJkIc8qZ>a}mvYd%~{9}QjN-`|M6YrO;8dg$vx+Aw{FH)_5JC$s4z zb6{%soM0}%TZ>L};Cv4G#-+!tq>Bd@iRLLQpNYGiIwhB$?36J)BxWDzKS=M7rh4;` zW9P&|$_Zx)PQ5N1uB?6u>F$1g0Wdwwp1t-gTag3ZeS7pjXloQUuN2ncRK{gAX4<^8XGKo_4&v=)6S}G_$zxK`Mxn7XHS=G=Cl3(u(L$u^J~sR-8*9%vXa&! z`H|htJz=|vi}fP7Js9=o9eUJTuYG%L6wk^>u$=z4436;;ycXIgP4Yw(xp^`Ef3n5I z`Uoyu%361Au5qvg|bBheT0 z+Ap(tN(6W^kVWV74d1hkrd?o&R3OLr(srhMQXCn@g(1brnNw&>c-R5X*6U9D2^Tm% zd@13}sY~!7@7EooF6lyVnmd3;g+qKZx~%aJX0L>CaP0@K?tKzkj4^mE8GM!@ z3)#Mj_8v!&dX(o7?YO$FvHQ)!&WBAZQtY>Fe=#!d9$#-6SM)FYB&5$yJfO(fnK)^ zep6IQ+zRV$YbmnQ(}w1&XC5sXWgV?NlX#cq){4rLt*(WJx7hm}eRbn(V!Y(!1~*S6 z&SB{!YeWS)S_L}Vb?9h~=xD4tMHT31?=he5MMtY!QSpLw2h-+@)yV(W(s_4qYL)d!zaS`PNhGS1_xK122VJh7&T`wj+4(|Z2ZBaOJ`c4vK()A$x!R)GWu09 zZb0^R^y7N^@h$rC*KgT)FghXQtvT4YKhbSoVulfWVP!mewBj##$j2TPxYJX zj;@@`*vvu4uwxVS9fSSsHR;?QJOvwttw;LqutMJ9*5+dTwzK_Jh1mmEra{lH%r&$ryBh1m1`B$43iug6Y`KzGdgJCpzGd z@-8dH_#(3&n;$upSfTjPLfalsuIr*}q{;K$6p zzmb|CI-WHhzp~A;-Rt~j6dR}PthDfHBb-FzNT2 z7rpx;JBhD;B=+D>DuB6r9k_2sah#*0^IbBdc(80t?_BTr{7kse(a~+cngqr#^5liw z1%+SDQuZ?81JWV=#AO_;-yL99Qj{>Tg8eQh0RU~NzZf@?)yD8mI(i9xH+8aRsXxg@lF^dG9emeZNyq29mAT1T>3tsH53#?H zk1w+Y-%Eo21LTw5lSM4qfxJt0`UNmC2f{P>?&cT2UwwsLR{D+R?4RK?f<-pVGs#=R zn1rAS?JXa8)EBBPY8so~xeGdehxA`og}J{G#8snytWi_l)E9V-fJT-t=OxK3quI=9;UI;W=%Llg3f} z+O@5?_LO?cL~A7Hrm}92hJMqa<1x_lSoqLM*qR2hwivrgkF1ODK`rAOVb89WGXqIC zzzdi7Y=1ELM>j9>-Oqf5kMiwe=Cr{PzF>wY;v2>qu`J7jPuO^m@d-oTnQ|@gjDq%d zW6$kG79YcZ(er4*nQm>?z9oBC+#gr<+E{+nVQjpkJ*nlhNgtK?y#|A0=%*7N(YjRg z+19~+NFAjw2J@b0KgjQ~&MbY=%DaWJISe1r`ME~-p23MpaCmdtTW$wt9 z>(Xu>v@0J^|LF7)s_zc!!?$tst>e7e+8fgvzmhe6#o__kvTZs0lD+uJR_5T*mH!6L zulj$8d;j<-t1I#UnHeCHA0&Vvzbqtb3mGN>EvP7!3EC<{0FBsIyIT{q1(FE?`mq&V zOQKdGnVFDoTa|uyfwsGnVP-%#tft$o(7G)SGXbWb?QZ+o{g_y{kYtj;vsDvY&F6jY z^E{bJ4C=PKukRmuC7HSR+;h)8zwf#CLi;bU2KTZS_uxZ)p1L-}hguFF|AGADJ1ady zor^ux`__Nz_xOxwd>IFY%{N& z>}jZ&Y@6)5k9!!LQv-AE<6f2B)C%86?qD;&8{K(#aZlm2)f2N_+(W`%W423dtg1Js zc4LHJIJ?}%=l{fgzvAD%!Q4c*vHs|4$yv_`J;-49_IfowsZ}_L&tCw*W(z+8tWU1?uTFY12!#+U13EkXs-DG6rN_3>~{xJWG4O_x*(KE`9(?=USFZ?JqFMNiv zwRF~b_{{m?b6>KZjGwL5Ph%ay)ft*=*QWoW?nkVJjWyx?sdcvL@o93W-O+TMe#PL* z3FLq3jeA25p&N&5()qYs;0y7kxu;XDjmitL4)Exi%st4SFHwWQ+P#fUI#gI+amX>_ zAvsTWXu&6258OgsCOjhhE8K&la*P6VhWHn(t;!LRNBJ7`TF7{ZXfL(2$GAJo8t1^{ zf4M^LiLxUL(6r^BX}I{9fip2lFyh8oh}9Iujv0lH7>GWE&EGSTQ$e$ zXh=ar)g(O_9cX@J60#cIZ5~Jxe}l<>S;fQO~;D&Kn2~ z+(*LSuHHw!x!a!8RD7A|n+N%Q7@GLGTf$$awe)drukstaBK6-`8L4UDbABT9lYH7r zM^3NQbjY2I)_H?WeD{N|)JqFKPV-Etk-M51dIfy4xy#c@PD$`yYP+K8%fNABOJ3() zYJt~7-P>3iGy*dReRtA6^VBYCmg_l>oQv#l;Vj_rm8?7bl!DL8fF)xYJUuJv2NH4TXLk{0x{h z9JWvHm3;T`q(^lRxcBRi@}7KtQyxEcj>$~})Oid5bG@UW$qNqE%=54<|5WEUJb&1N~kI184F+Yu*({VA^{kF?{rH#;SHT2Gg-gfR-s;B*NyIt|f z{mfhFd4SJEA6V=3=X@^zXUfjlyXc00D)!KUjhKLqU?267N%%rG<=@Wa{fW1Ef1+N+ zPqxrX{Dj@azY>$}M^5d``FG|!6qmDC_bp^qVv*rt&d$R7Xr7@N;84i>6uXe&T^q5_ z+}(d@(%^MB(oXjNthWb*-$Z_YXTCWh@Qis(4=nA9bF%uEqK8YtEAn?K*V- zQD^Jgc4F0Q3ek^S@QsS`dA3}sk*|EezjH5e#m-FOcc%Z*nKQ<(8s&c&c`3`U@-U`$ z{JNGhU{Jff^l$jB@Hlu~i}nreCGjyn#b?C2Gsn(B>=QBLG-^GI#kZ4uM4q5%uOmA@?uHXRGshzabb^pB0Y2t)muG+jB3|16R`rDB<#S^bk_?&0& zb1WT1FT$Tq+0WniQgtd&(byN+q2O_@+?!8c0oa^}k-OSdjCCpeMcyv;-B{j}b)3Zd z)e}1_er+)2Xo7ZK;Z*j9j?Zto3*I3nNUo)w@GAMr_SfN4`Aqy&FP}@^qmsM26pvJ{ zXsP2f4>d5`%zd%{WL<@aSMd4sz_Vbld<#{a#%Zl zdnJeT@tp6952ogF^$Ha?slRW?_`BTr`~EExh#P)Lygh!M&9Ecq?AvA@R`yfn%!BBx zcdl~C{$Dz-ASZ&qh&;^=b#ZP)eC;FPeH?q@)aI&poS^T9N4RIyX})!|A3yNW9PTR; z`@*|Lzk6-s^0ywJ*z%9?`FVajvB~h^(D}d9hE7U;SI+vrsJ;7kf!6kK(`GiE2M39v zt(c*cn)YRd^%cLzZc4rXOH;M$e~I0B&3;91J@TVxo^$5d(@}}dX)Leex0iAA_pyT(D}!0JAZ#y zPV1YWqz?W! zYM+WYjQ#q};KLrp@=oA@Z^79O&fVWHS9#VLTxy%R*UWrdD5rJ!3)+zE6Wx9b zwMA+_bZ+Q5aM;XV=sVxfY3&4e>0_LYofG;bG}}dunefl=#U5=W?w5ET``WpPe397Q zFBo>W6THO6AE$;l1x&xlfavhQSkdwh|96736{$fS+u z@mGx+ynirgGagR$)I|L`eZRzyWfPtJEwm9`XW2ySJKH$lW!lTTrfPe?vg(GG&F{T- z@^)yw3%P+_;-{Qkh^(V;_s*Yj8}VQI&PsIFUd?ku%ZdG((qkLHXZ(I^irmkg(f_^S zW2)_e8(Iz?HQGG((0SFTm$u`<9=mrZZJbp%wtR)Ple&u4{9N{K_wJR{E=hjuZoWTu z<&7~hb|%3r=rPF#G!`S0%Et@@t0vE{ONf47@FxoWTG``C>w*frH&;@i!c?Tffu z=!3?3{R75oxas|4t-9&bV|9JVSl_$p((@`?@&2*wD=t0O!4>1q%L5G+oot^Pfmeu) zw``qt51FF##ub;wC!>E6xd27ypNx;sSlDt$?mL_E@U!24PcHUnZ#u6vcd#?d-m38J z`s(sSmy6%%2z8R&(+cOd^l!r6UalSM;y%TqS5~x0UjB7#r31N`dA)I`f^CMj;n+`Y zXK$h{d;K>!(GQ0npL4^@j^(Wij(={;zwuom^$6le@Suy5ExB(_d;{@k1)iJpUef<0 z`>U;A`SlHZ)EiXy41U3c+kI8p^6UxP^`iF#erLh)PpSR7tJtr@j>;WWavyOO<8*RQ zgyhlcddRmurL~?A8_OIt<{;yWuieEwMEB=2?g8+ZJM6x}zTSDpL2j=Bk_8riq|g-3H>`6$GX4jI{ue> zLiP3+_AvQngZB=>JBQ#M`TT(6p24{KT-8+X&CEqgK3@F7cOOaLV?21@TKu{4k-Lol zmA;+)Wa@VDo2~l^Rh=AmSon1dwco<8mFVgt^bs4hho4q+ZN*;J^WNcZ>WCyZ7JF^I zcWC&#Xjm{nK^RYT#d{1KCA!>@-W>6O~HlMf= zdRY0yW#lNP!!PMP<>*?B%)G$E;PzoT#R;s{Yj(*`j)t>m3~j&4|3Y7h$qjvUy|S6N zvmYvU=zpmgK0WtghtwlylT)CUIi~0Y8@BR{h7W{~kG(ng z?jvg$&r1%2+8BqiZzK7k?yGFI?`*Q8Z{MFMQgf+VuIA!2?f+#Zw+-!~o}?3*l5sk} zG~ru@J-EK*(0i}#b`nQ6@V)0_#`ph+?|Ybsvi+635%`#4^Iw15_?^l(Y`?($3jHea zTQ{#l#<)8NTil)r`*yyQG1~7i{D&c81>&Nk3#)EL2eFu40^yjz0{ox0-1s7wj z%=qX7+U_%Ps5f!=!3VVcq1pC;+4k8FX!~ok?Qyeh@B`Y;G3Tb4b9?*)+TLrn^_XoR z`hd0%n{6x2wzVJB*2K5c#CPKdwB2XItvBI*?gQG&I%uqitl3)HJ#&fmz0q8=LbL6v z4}kfOiEF2c>-Cv!zi0T(yOG;b{C4+C-+c@|Z8vd0!0%%EJ^PqV@#a1MKCFCVtB=^Y zg~wJY{uW;SwTa(x6Tdl`V~*kL7tOW@%(exzecGJ&Gt@h3zZsFWJ4M#U<}u&dw5k=~ zrPj)K8ouETyy4L~c#$^^thmCHU0<;kIk*1Lg&wMH(Ty$H#PlxycfH%Ydlz}21MG>6 zeBWSxZ+>^2-0sMCUGMLEoEU86`?8x1*k<2Oa&jZz9W=ko7#Dq&{@%7?R9`LgyDIa$ zjJ_H+W(4+$(S4;RNSmm&N<6ucI74Et5VTQo>lM6J&*$f|hMyLl|A)Fs*y${EtrI;w zTK`*bccCYbYMqW2TOqOR0~5Y=V6L$r_%gL44(;1>6kQvxZ_y_5MmIj%mhY9dQe$vx zWtsirLhw}n?u1V%`*7tn&H(;s`04%Jm4UOmA%-4a4ZmfV*_U_YQx;GBR7>H+TO!&- zN2IWbbInEdpDLVK#{afRk!@APX0!9#!*4AjZ|ONdZEunH34M{P-K4F7GYwb=t~?=33uP zEHl2N_QDAbvZ?pm!aNi$s0rqNi0>jw9oDq5yI1<0S39JVte!*|ap`YYCtbVW24}a3^XVl2o(NFS7|IAnx ztiOTYiqF_v6B@mR|77eFvHv&n-Q`WfgM#1vd^SR(#jL+A`xLY~<9Zf)=X!YS&fY5b zidl3z0-Y@S{AR%Llgf~X`QT#lfbGIC@ow5z8ocjzWcD^{lX&mU^qWTe)%K9+hGO!$ z#mHqLx?o|KUFp0WQ#VziBZ?2&Tb41WblV$S7Q!D^8%y^oIOsn4PW0x;_onVs-=q67 z`&znBeTVKF+t<>4>U(rwW?xJ9sqfHzWBXdVPkoQ>%j{c(&TVuUzL<-5X(b<%jSj3v zFa4d=8nYJcY`5gHP9ht(%N-J}x9~oi2lJ<5&5GaT+OGWp{j8i);{y|>TYpPVTg9iQ z9{u(tsqZ%Koc!L8jk|3`-?pM}haVsRu5R(mm$EkB+IW4-ZtUYpVmGPta87hKNj!)S zo5r4dlkLRB7OkPIUF6`?;Je%Y_Djb$N-mi@qKNB=W2SK@M0*$e2p;g2b~n>*8L*Gx zxBor9`FHS<-zE_Z8PMc6S7|M8Tqb9EHE>wP{;E;?Rge5TiT!L-IY*$)|E2K5 zutVMtabQCyU_&Po)3B%g2=8G2)JBcp1nM}_HRLMhouchj_Moa*?JuDJhn&-)yKU+; z=X7o7ERU^)SRiym!GuAnqtuSquh5P+@blP;`75;claf!u|I)73-gS1`TAT+awyMwC z`E057?)+_9``bTz@9B3$-+f}WdQ2RD1oSjNk_jnCMZD;Ze5O4*}b zS2|nO|67dziHA+w<ow>tL8@%`# zb?#vlFCMqCj{q;;0xv?B|9~H5KIF0BN#hJ&`o3c@V)HTU0%+Meb2sJ$wtE> z#cw10ST&X(7pM90Z{Wv!Gx#x(JGiA<_)&a{p97Gt(n0&o9Urx&`-Yej_}8& z_fHJ|m;)T{zGbao%K02;y2l-F};Cy&aAS*Igfh2Ie)hCw$c2Lakm_HSJ|nf+;6IQdroKwoC>ed zmfkm)yOQZ|*s=8Y7x{jnoRj_MrMw5DZJ%X(X5S&^GRL-W-}#$4Z^9m~oJ+0Yo`jmN z`yO(xrgkrfcf9b~_wa7E;PsVhyZ3!-+Ma#fi^6@%^~r~LM{`!zjV;oDx71LxuYJ4i z{NLYKz}Xgj0H?#VvpW)yIt_1jE;S_CM$N`a_ImMkDpp=a{l(lIYFKBx$d~Oc?wV`7 z4Y+gjLv(2J*yjH-pK(Or@4k*Vyq9YiH!tVC8rpiN&NJT3qOJ92)@Jy=I2=gsNrm<7 zXouci)2B0bY)ge_u(7WxSse-M-j0MG^7rZ9kBi>UiSd01*vB5VY18+UPd`45I)G1r z|I8d}NO<$c9Y|8YqHpnc=*5YC1KyR?sN~^WoBgRHx%eiv`r$jY=`yd~)4=r_?V@u= z&IUEFvsb)tUcheO(-)}do@~_aG+dUm;m?Y+>G#dlE*`jy{cdQ;y4TJ+Y)O`cK06ASNA9M)_ikr(5fdAOy-of>ajpv59JN$uR|Nb%X5W+ zS3Ubvb3UdWdkMTA0+y{$6|$J zb}!N2201fvmbr`nD>`e><=WYmTeORZ?lsqX`Nzz)-jcc2?Ztj-iTcdiBI<@>8FfRu zKX23py&CaX6jNJN{2wc)3Lo-z`SM0%?NrUv?e}WSaIUBCM-Drw{opj^#Y%Dr`(4__ zu1N-+UWFd5fvBFnwnJ}qrwksL=GockqbAAXp@})iQia;|8}N;0J(b125IzBNvH9Ha zvQJ|t%BWE(Dx9orx8(XJR~a>8!Z-8anUG3sqA9nkbb4tVd zz3zm*ucS|ZrnEUZi*+7s>d3uQ! zinoNqq*izSIbaxZhcV_k=S6nRm4f_~7Ld*i3Xs?iHg25n=|Hq?6O z#I`(O4WqN=EYC&YT=~|czi`^@Ipv!6_dyqYhpaT*IdMY{V_R@)fD-^tjcqb;azjpA zjx7KVu!o9)wHjF2z}f>Wcr3PM2fC&vn2bTwp4<+-+SjjJ^0bhg%|dkf zZgja5opJtDpKj2{8LOt>DP;0+-(kI`Kd$Gl>(^ymq4^eK3uz~K3j7%DLbQv~E{Ap& z{PUBvW7*KW-=V#5oy;|Jtls{SxfaKfeIIiLzt(bdt}l27YtU!8;Jv2*lzupDjL)1{ zs~l>_wHL%Dlyc9f)M3sW3;$GV1pb~9e{xU%VZFOQp|9TBXV8*8&E8&ra!r3kKb+s8 z_tqHjYpyLDOfjE4+WSfqdijx*USrEs^j@v_Bs;WuXt>R?nJ3}x`S3RP{%??+jkoCJ zzEkjHF1*d#3_YFHkoJJ<4e<3d%;od&^-K8%U&~va@LrDG>o0q|2JL6Xgg5<)H}e#4 z3SX|X1^3;EUbqpxa3gx*M)bnd=z*EQd>cJ*eJZ6d^7)gCqEUU(+7A6NdLU@mUKHLh zBX2ha+^AQmn8(jVeu(=j#LwBnSdEM&YgWygdTp~}@LEXT0QH(OA!EPbK#tJc@A3Jo zw#sjk3p_>*pwfNSrGbhbbQ&{S~gh!^RlbdR|I31uNXC@D3SA;6kJA!V{(7`f>U^ z2~3xEJo!EBg2=|6u*g$HFD{42^4=Jcjht74NmqGPca_Ak#cAFZ-V%Kx_k%aDGiA3t zSh2dFd7db^Q}p*1=GquQ?#(*d-Dk3w-O6VF$-Y4HP)9_+9e&##K&DKY?q1|iz8VPY z-D?thaoFHP;pdYb*~hMdm5_%}~qNxL0JMbm@Bg`S8cP}#ih>hq#Xdi?2J<#42#y2wYb^aem+a>2mzfj&2 z$wuhz?Z;2^r}<;yiR@#q&NFBucxLxS^esVjb#Jet-2wbcfx9Jzugn}Qn#5BR z$YA%{xL)jM4VGzV$7_F*xJ=qBp2;_5)}QtX1@@KZ_*>A4t?24KZg>Q}s^GzwE`PA1 zI#1~&#g}61)Owihx8x%OJ`<+2PoQ6F(|mkL@@Ezfsfhk+ZHK<6G~GTH2v+p;Dt%;b%51#F2Sa+#OR zImY<+nd8S8U(Q6!_zM|-KI5NfeCl(QE$TAuTqEN)mPGW>Dlmlt55jJTzhygMq1&O=GVmP>N8~tEf*hB`^`3AV&Z2lAxhNIZ5BuYKFY;`xc}{3IbASG0 z+H?iy#xy>SCH|yq0J@=TdQQol!3RCndkSBzJgV2ACwkEv790zo2Tl4HF)vv&%NJh6 zIyCh~^cwU(h?i!tajk$i^OT#xH0p2~;fdMR)MVp;vfR;B*&i%f1LLWy~|;uYEH! z7CAFsEVA60ZNKZ9L?GD`4eNKV>(HM^w~UTOroN9~i{Ep!<>LFzpJ(_t=i#4;#ok{O z+a~6$dC#Hv-}atE$*hm>{I;pFTKumdv=%*$9mn=nzOWzPW09?C-<#M`iLb`Sd*_L9 zW)Lf>*lvb)@%A2v5!)62_r-Rb&pA|VceB`kH!;e(sJ<`~*Egf1#BN{(A@)P=6_}?GE_h{Da>AL@1Fx@uUTw`ngHF^8>;(En&u1n(C`L;@Z0J#0O{G8X!7(-wu=(CkRabR1}Pb_~s*D({SM#s6(CX|4c2FaNvN;@j_L+ywgc|4_cSrm6tv7pG1&1g;O{qVu2g^J3sIOlK-&h?sa2s+VwDsHaLw;xwe%f*D z5b*!u3T^siVvxRXIgW`gI`}Pz;5QWQ2qc3g5q&Fs8Y~&mU$*7<+P>r1AUH+oBRn(7 z)B~cU#WuIjtJK9V;s@p22aRmZwFX#%t2+_F*G}khbQ$lQ_8!>d*x-Wh@_in$X*s;F ze5DTfhFqJpso(A3EmXtSA|uMyRvCQRehOXHlrOm-bRD`XZ*8z5)fe3%`i{?Bhn{pS z%>lm1!J4Q)Srd%vYf2M(4Ll~f#sl9#cbMOg(%VtdwS51CV?#IkSoG9EW6Yr%;D~*% zL7o;Nql}~MySqT?S>y>BySMAh*&BF=bNZey zW*=L+1|9!p?c(dK$CmFoj+K4Sq2fS^pTsWt@>!E|R z8?nt%F+>Qy#s?S`3th>2@2@ayR}{Gxo+NLRBzK9viR;}>eLA+iRpOGKT7R-yavI?d zJyx628?m$KkoExaPG2IjL&g*M!b1{s?YUZOBQDlc;O@6Ma`u}(RxLap0r%~0e{yOm zw#1##TT1%S8B=51(UtYYZ`athSOor#*rtZoUP;vi{L=@V2sx~mMQ*{f?lYd&0pL`E zZz1>|0N+k%Abl&Dx2DaBRY8lZp+%840bMXPG*z>O} z{Sp27tsVL`iGICulBad5HZ^vjBap1`i|CcUgx*OWMf0pUTc~NVD%~^qN;sH&r8cZT z0bjn6=+k8^a0p#vw}rM}VT^N;ajvoFTgUKcd3g2pGH@d;?$ zpxI&%K;tK&@o%B=6WXMh#vC+lLa2VW))t5N1xHQGjXeN8CqU0zq309OQ}B2IdOis~ zGy5Iq3}OlO$|L=t-__9X2hi^g=%@NYzbByIm!aPR==U=%OO4y0O=4_Y=m4=OZ=^3h z0KF=q*8%8N2)(R%9E4sC(5ni1bwRHp=%wZYy?zG0eg?gsgkHaeUYWRcPSe`93vS>v zvZjA5eWn=n!LMlb!zWeH#|C}MpicwzVIHw6=<@{hkvaQq8amY$it{e2(8qV3(Qgzk z&?jOWpTFS$&}3*Mzr)au0nhxP9V>Xt5z8t9AI(&tsFOmW1`Qfw zqb*NxqV3wKKUoru>LqJC^oXB%ecE}J@nW^~KjQZ%2Ns3()BXp*p2A&&uVc_|Jhi-}ac6bH@;IlRy))OiG6!g7}BYG!g z`sP7$J(64R2Bs7LLgs4CQO1+lM!u6=tn5!+5E|f%|AF77=t3KEim$1Ba>?JhOT;J7 z^P=;X*I|cQ4=?|Z_~epjTS5B{yY~0U@^tZGhB9E`+YbRJ>Fx{b@xD`fyfme&xm@cR z?28AIeW#$CKcPp=xhyyJXg2&9#P9eLvM%v~!llo|1)Dh#4^$lShta(B?9WbMEM9=X`$Pd@L9>MiR!^o#f_ z7OXsHCafuHPO?7MoJL?RGI6Q!44&-^B(F(C)SS-3uh;aiJZr)8lTX=?5B7WJwFO&W zMqZ+rH6U)%i`So4Id>njDtVw3{&#Av*jRXQT|c;f<%JYHDE`=4_QeFI@LjRUX9n-d z8@@Ay4@~+M$lv&8V#kV!`viZ*6O0>YP5h;Ce5VxqKY6{LEe_t6tFMsyqfhYL!d3CA z2}9y`IqR7smyqI~qp0|%_&FlCLKi=I>$)pF%UhJsC~{vA^5fT9eS-LVuZ`GeyP~P= z;VYlCCQO`=(EZR&?5xapJ^o!CYj!#s*PF?uFSZxNwqqwe$Y`e<+ZgQ74jmJT_yhjh?hB0Ymo=Ae=3mNO`mRU5gN9s3 zv+!lenRD+=MfX~Na&;~F#zo{=x2E)wb?qg$=ML*WWU)M&(q)dP!~W#@D08fh>qnU5 zc6(W?%=7sUcqty$uW^g7lG4Q|sC=Pp@IxcbOBrg=_NYcBrKA4>y)nTeYQL?Zr}JtDcQ9#61`I#YuKd6YU*OKL^;)~= zgaL9?b?Ah}@q|90_`#$2;Tq`rJa+a@?CdpxK7${`XJXA>;q$Bg3H|C4;Rk#$WMLDs z@B}^-x&j|`g7QI!p{>{`!EyIiEsY~FnT6x=TDd1G6US_YW8Y0$dlmIKABNxa3Gy`H zt@t$oSX<$jFf{RXpg+OI(kG{* zerOxj2N-u?P0HZ!7d(UY$b+Z@+NB;4pU#Nk2W*oREyN~C+%Id= z!u-zCZ@b&DVcXFeh5mqwCxrF`8`1mF-B|CrhTl*BYka}vcI5s}bl63Fyh{9Wp;z%ozU0I<8g@m~=H_r$rHWM^nCEGI0(dh^{K?NT&ePc6 zueno-reA|^W|jJrpLa*}XQ1^n$m84gSuw^7J)zAE{osrElGy&m;8;iB_24)Fjsmk; zD~P=Tj+?;odT=z>Ncbx&RPg6oTOslaZHSYm#wIZKt@M9_u^(V;#hZ+s#n`to_EU`g z6k{8FssUp{==8m5dRtd~g2JRllst5P_zCK;Xk=SMv z_8@azw%={QG2*QmD&7*GV>|p|;7gnsAx>Om#)5k;s}S?D*HT{+sHiVJBJj||Q$v22qiyES`O^wGKbl95AnZejmv`uK=xNE% z0C?B=!ukQ1%!)S8fd^I~PUtgGs^xae&ly&wpp6mE$w58HdWN(5tF=szA zCiqIf1qpw}a~+W#=ia!A?U5|+Cj8^Uc%b4X{Nq{Y7TkGGn+1$4*MVCN81*@Ky(4f= zz|Wiff#iaCL_Y_Q-4^cGANsPVHOr9|yCFcGLTN<**S=Hw4bb2Q$E?^+^wf=sK=K`Y z=6xm99{Bq8<&HTq;D^p>v(bZ|_NU4;HNSJfKc`)hGYpBncT=T>SEr-%uiO}g#JTh#F(ow&)aU)+7{!3h~DcGSm3+>eZR>a*ME+z?yUB-E`W}| zDhVWay2JXj(D9x~zkZKncI;vF&yObulkYK?o6F<+Uq@2exg|@_j*KaDIu+4}0v-C$8goqDGx)`* zKl#_CVf|+2`eWuZ-k9fp;%w{rB>P~<;D?UE`0b3p+}&r)3;y^hV{BT&85Q7HLbJ}p zQT=#rLO%}8Ho<$H=(yWfHB|hNwSEWs+{;@3nzg>okrfhp3J;FA-l9)6lLsqHv^E|~ zUW#lCZORKKpTn*`4ZC>u4|cUiqDL-V?`bcxPs!n~U6nr;IGf<1pTk2>F^``I`t_Ce ztlr9n+#`f6kJZ=0k2}M`+5SvCj^* zykPL5#P$2P8h9^vOwEz^z`NkZhadK|dK^_g(2rs$$h^A zjS8@RUHIdwPa;sUi8}+&z2TTTU(2e|4m;J_i~hav1y6gHMjjS^=`wWr(6iY5_Kt|Y zGmm+&M(}iOXN6~Q9yJqROhxn`cO>+`_Q^cv#f0BIt&&4HQn5Ar8 z-VdC-MPk@GtFOn6ef0GaS7^Q3_njMfk5bt}>wA@(?ltY_wVuJJBL3u40-H736qdOe zu-yj@{xWQ$5fe9<^w-?A49?Po!QtWe>8qrcF!XMxCaj`F3- zL@(!K-^xW##^J}*h<$S*->&ko?g~%`gk0X>$cgR5pS&4=QfO=0wA=naXB)Ir^BRkb z--a&9z=i)q@qNxVkK_dzH*=3d_%u(;i;W$(Q_E3)>nEzK|A*f7{vi%D0^| zpY}TV_lVobp^*=JsW2X>IFKLRLGIN{dl&1vUE(l_*Tg1>T#BxoPwZBV+#kJ__}ldR zbBM*=?BhzDYs4Apn63Vs!yAtMVR)wGhAiJJg^wt{o~2JL*|Fl+g@5TBH#V&Nrc=d+ z`3@u2Isr}U!sJbu`@kvURdl2H&uacrf3h+h)ji0*?2*)q|BU^34qYTXF1GrqfaKyO zMm2nY(aTnRdMIQc7l+PP`YoRK6}Och*)bjt>6mYh5woFF zSnu}A6n=9cyaQZoj&pv2buweSd8%C*vSYP75Rv@1p$pzX7m2^l|E_{*`0FG6@YBFw z`iahZ0Qj%eqDN@UKFso`j5r=WH^IgEc-2p2P17bSIjf@I1EL?P$J&&V{TkB;WxR(( z_u=pHnd^akZRvVsNpQ2qAl@1^2LHPjPe!ks?K**32~0HycvR-_05Gk2oaVjs{a-iU z%^=UBa+)=tcD9II^hypt6-b`0jgs$)BOCpOTwoVoDIsT35>fkCSNq|C71~)~#va&F zHux0Z-L^Q4&BlhoPqH>O$WMVbfttC3Q16fIDxQ}8;u_oRoL$ha3mV9N@W^=U$oFrR z+BqZFTp1(}fgMtLh_(GX^HDH!CmHdUz_R)ti6->L<%y8I(C*wtF1MFlX1?T3$m5=1t`bi*`^f`F!g@ggctbCli{y3dRbE%(#R65Y zAoUenC8qoD8WWy4GWny8De`IXfU!;u;11sn)rb8_4IO>J7uO5@##*D3c0Q+Uv7u`U z8f7E0P4Tqo?oN1m zzUh<5e4b*g1&sA`#@fVKw2u*E4i3cw$suH`o!D`C!thlE*5Ze4ZSD!+`XsRtc0zEK z9Qt~HLgkmm&mRESO{M3^kZ)g}!2f|??}_&7Utd+$`YU3lAH&E0 zH$c4P@6d1V>(i&$=f%GMm}mJ3V#pH?8~Qsdw8SxmxyX+#RPfdGeBNUp{xy76v7cjc z*)|Efkq6!jE=vLt{Q|l7CFp}C_>@)H&!=jG?32ox8F1Hs#eiF%v-%y0aji8!3N4nD zhxNS?d~5b&Xdl}Kzn-9uZHis+n-Z$qZfol_*Ie44!JoThYg|8?>NVO!)2_9_WK}q< zcai_DLI+rVo9h^Nl_}>}Y3(U|Op%qbaH^oK;NGy+pDe@1HGof1oV{ph1gy}}r|oTD z$3KyNN2AR5a9p3#%pSJc??h=JSp}bzxsCneq90w>B7Jv4i@(O_65mMROd&2f^A$r6 z${ylu`xPo4CFT7*1R^VlwT#L8am)>LAhNFye2V+26FLhVT%NRrY@xNh^i^Z{sBP=HMl{wWD0x49$w-u_eik_5F4UoKPYP=($vibwPYc)SSNR@Tey?lMlA)lM&z~(xf;Q5tJH{7 zOMkObvl3#DxN#lzm(9SgP3b+g%@}`%^;iZS1ie zWIk2Yz#m*2*Si9u6Kuvf;NSB_yOK4*^R(NaYz{>9=KO@dzF%r$4V%1|HJGwCs$Yn7 z=oj(~nH4;9{yYOuVtNZtn>lZRR~L-xb#C?q8TUS0L2u<3>>Hkf-_&@1LpKY5oQ{}u zK^GiFR|&mkjGlXtL!*8BX@8(%u|K+l&%NS@?+pekj>ZjsiVg27RE<>x|kdXcGAoLuFz-gOU2#@ey37_{NF zMRImM+f|Bvmc4nwe~szWG?=tZ#RJJOyy8xX91Vf_&8$o!95ha!>(+l;NA-^ zi^`*VQc@5ke`QR0?e|3K1g&=l`@hpAi&33jG3$GB*2)|i; ztnAym4n35gKIh^`_A_~RKIdvInXq^_Q)WZZtOviYIvUr#9ch^jms9gVK3?k8GiCM+ zYd%V5C!}d%*^x3sCfdXJx~WCRSxT`V@LyUl*_})O=sDe)o|Di-<|na}=s0R@mAp5Y za(Sc{TVF%G4xJ=^R%?sx+zXv7S@gb_)h4+Mr6ci$pr7J%p|$XCxWu3IuOmk``Lv!Q z=d0H1TF>D6(m=9c;INXXf@pfZV!zL7+heY^^mWrl>VVuOGCpg~_-5@Ja)$1EC#$WS z_Oh>RcRWe*LAc#@udP4@!BXheCC~u!_|Z|HEr-8Gbzp|d9|+M7JuUd{M1 z#`iwznd&WGI92Ix_`v8Mhtz^$YfWC-8F%{?-eP0dXV_T!Ez9ip z|L53Z_XK}0eo>~MBKhOgTI@JF$LBWr5WlE7pkz|^eL!9s8<1C1#%HCb?ajpDpeaWm1qUx{FdD{(k9T~;*3U8c4?K{i_E;b92Zhl; z6RGjz4jk>ZtPR|sqx3}&di!cyj@tLOfuEASNHAF)#D6J`lf&=R>C;>J?-_m24Q}aU z`hU_VhJNW`zqbc}vpQw_a?er^bY(V>i%_Bo8VBNbBBuePcZhYz~}}>H!!vU zV|cR&;Y?!ic*B#-4vSV8}Z4jESR0a5@~;z39B2wI)tG&{yO_np4=9lDNJ& zVZ<#0Z}l$?c(P{Qf(v@zIwv*2KJjO<<*!k5uY5e-d>pa2v>nL>GLEvhx@WKqyV(#4 z>qWi}{UCGCw7WgYthUb*2e`(=^MBzUN+iPi%*m|6SGbtfjJ^Lc6hk zN{sdENko*d*#kdWbX$Bnm^|Xc7wVVzksPYTpZF;agJhEAgk2iB0oElm(_n*I*DU`fLA z#|uKCf68i`VA?QgUnhJMNyi8WUNPiC;NNDOI8v7(I)U~T%_4U~FKo`ya(~6q{_qZq zX0e}SE!||m9;dCus_C}XC$g4)Cj*axCh&%@n%AZAzEILUV#|A8&T6y#EWx`u9Mubg z_=B~E3`(8$(fnX?ZwdR>Zt@n;%<=Fb z|D!Zb#^xhJ_)|U96UAyf^hUw`-*ZRg=twk>JQ4>My7_e2uwNdBK!pz-Q&N;-0X$PUn;#z8U`$ab-^n$HMeq8K( zBl=kEfUL9FM9U6HEXnsuPO41(9*5onJ8vs7@6Ub<&GLF1(8Zbk$?3ni|6ytx$v2^& zlRddn?m$TBF?ZjHuc3TTXcdQ6erVz`*R9@^1w34;kwz2FIw2~U) zDIuAM>qu7Hzi_s1*e<&Fu6LyNKw>3@1G>T$Pw0)Q3>-=V$?_xk3r+ZK?g1laMTbbv zYi~(dUot>U5H!XY_#$&T;jA`+FSN4g)5AOHqDzWQ8jW|pPsP-sEZ+4r`U)LWHTYWK zp7c4gvb&*{0$v=bJ6 znxM~Gd_8w+1hyaB<16LdTUhd_rcFXOUqFwZDPga$Qjd~SIgPjQ?gG+Jv((S zUA=+i335keN8)-}KWC>`$osgXeFN9#^cbJFO;xqi4dxi+?qWT%7L0M4F`5};fH4Nv z46shJ$5u9Yficc7#vP1t2V;n?kr=koX6$hmf60KaV&l(w+OM%0=fw}QUJ@Jkto0|o zwZ!Xd;(E{I0VDrU9j==g#jkv5Bk$k6k(?ZDSJSqewyR6~^asp1MApdQm!{cYcXD6?gLX05 zcXRuKidTKX4$fvuoMh18)qAtrc++R;2pyW(xAu{rU|xcgtB#?wKkns@u36#ZuV&y zQ}nBa>z2E-+PDjL6t2bevv3Xh{K-b-F%;pxwY3@aV-1dkBl`Mqoc&-U?irz91@xOa z4*d>uA7!0MKkP+b9eaIwQS?^=y<+Guf${1cS#4jNNRHx%+}k@*+b}{$_MTSv_l@Az z*iX(k710~}xC<(3;O369AGH=)_7SI1-&aT7irA8Zf}q;NsdE#%_mAWbh6*Erq!#23 zfiQ9%H)V3;s=zwJMG~Qy^^z>Xu?_cKi z1(JSbFYd1VQGWJE0&r88dp&P;-&;q^;M@(4aFW{q4 z=Ryq4o@6TRFVvF@H|#;$UkKgEILJVcIVaXE$(re@UUSbAN z*Gr99qfsL^LKF7%LhcN{EhVl8wvSv(-a3On1}39J!Cmc(Hl_4!oL81S+R->QTnY45 zphI8MhfGliRk_Nu^F*x`^QP(KrS8ppjiG}+%Glz2DBlnreiZ$C0blZHK6ewBkLJx# zsS#I-_pNc`BGwo? zkz@KuBWn!pQp{U)tlSlG8W|cuhMJL~x~&4!^MYmbs*5w$nVR`>{F?n{UBD>Z;TbCr z+RR#zw^DW1i^FD~c9m!O0afFc-h*!ZL>ji_SgpNf@f$uIHaXy0GmjyA!Pt3n(>sc^ zM_(9~XT8d>btCX<;G1ETkmC$@!3Z7?u_m;*-)=~Kf zAH!C{w?ClI1N3>Lm;C|cwAz$YuWf=+f6(h!`Ld(s@G$rpGJ}6HH-o0^*8_7@E>HEZ zK<4nlFZikJTN_t(j3fPLXBqqFDjzc;wzrQOEjO}>4ci+w#*v)%arPT4!;;fZuV>Dz z%vf@C-zo5$;#FcvYMf18HEI#OmqRmC=UMdN?DNEcOq~x-H_V0#~Sjcq&7`jv9{|4K*J6Rq1T_=?r|ZH=16@&}7qg!3!aH zp&DKoRTHrmnGKLzW?c$OOc{f=#{;8jTb&DSiR&5-{%Hq~ie~C<7~d;7{4>nqC~`Ap zm1in*R=CHqvzE$vRYj+SsYCmu_QK>}=(GfR+e<%-PA+Fw+r_?M@@#~2;#)iPso*5K zC(XB0V^Z_Cg!x>^h3^6heXq~biPXJJBF>R972i#bO<_;#jwpN=?VwJ>>tp69K5P+e;i(+;YmvOepI|%?|ne~aqtTLVUj^F6^l%b z6+o}`(5o4Gt%qJJu7Y0Hop@riS@TfOJMSMyY`|jHWdM3DhF(XY*NFZfnlzGYDYs1? zIfo2yv=>lISm!f&iT(ehepN$a@c-MBmdg6~n0!Rr!mLmOU_OX*PIm0k3YxD`q_5c@=jto{RA;e64z1vHg}mpLtjJm#*i01$Aa!%*Kiu#k3@_ZTI8VT55|2-YCq?5>0FKE5=P*`lVNOY+=7SB<4%DJGY7`{ z;{(5wh6AsT)DH-Z0&qG4j3dA}tzek7XC3f+>M(m0_~^m3{__rMBm0|qLGnGl%%_w) zZ-VB0u-S|8{hGtbc?!M!mPeK4W$* zbsC)CvF;)|Yu0sKZL_JpuM^OHHGKAJfSgPup%~#{pg?4Y#ZR#mxf4^rtRb)1 zn*%9Cuf!N#)F%CO-lOoa$xx*VcqQ`Uz6(9*RVe(d|oE=q83v4dJ1=+ zRAT3)roixTWFM3m<+o|L9-Cd&Xi7Y7^{dJcB#X+U)LT#kj~^;Nauxnl32}8vc~~zA zB>3Unfi^j|5&!8b&Y7;#9Ps&+&=SqAbe-!r=`s4)d;A>sV3_)YqKt96!v5q5=1^9e z(5sk(HBJ#{a5oaCJPzJt;Z}EMz$KTw3b^-~=ZZv@j&=m7wPj!5m(VYq5*icpU^`BP zgUJ)M=^TjR_fymV?^@d$S=$Vn$r&9PyU3iAm$N^if799yNNi-b`>)XGerWV@Xmr0e z(WH?>(J1HFTHEy*Fh7VM_d}0wfYVqQJ^!M$@xGJtzy7Op&-vv?n|thk|M$<`mHgA^ zp2I7!f9%{{9UpD(J@L_VuS7OD4?l88s)`}Tkqr&mkULXFHY`4x!kv~PBi`8PxpyK5 z$B_dMa!{${z`7T6R;ArW^vU29jJ*=iKO^GffUhtnS7&$L;z%Txn=B2Nz@h55Bm?DPfGb7$a zCwS3kuAulcM!lMxHC5*Zh&@x#I~D2B^ZI*rIq%N7$Mz~>$}(zL4ZS8lgBEIh*}zTq zHwvRsVm!HLqhFU=54np{=^o_+b3bei_s!)+)qQhC0r7#+aW8lV9i{%{E$)c^cu7Kk z9G}FXyTG@F8h@0YyXapxW4IYZ?$f-De5uqY$QYr#=43VcJhZh#cO^Inm+xtHaVB5R z7B!%oJR!yAH+(Pvid zL3ra?biy(FY}LO?o1yUZzL>_dk9^xL=*V3rZq@Lw3wnn9alMCFw(7G6ZtA|=+Nj>O zhI&1BzkU#%|6tNHcue`Pg4>KxW1H5tLiCl1+ndmE88lp09@m#a!y;`)>_KQa8yemN z4WEUE&q6~BKY>yGg4VVG81JKziN{zPDLkN2CLZ3SqiOUeG@1d8?uJI+Rx|<+)rb3q zpwV}s(Q?OJMWch9F%(+6qFUQkGM7Jr9y#GZogO`{e>y#4&;9B22>s2UPLIa#|LOFo zK6nXw%o|^Zx{x96O&K~jB12gr@BY#B=!8Es_(N)p>fw(<&M%8UBYp;Fj(2_={x}Ek ziVi8{?hpCx`m6EjA$L9JAj_S#oQXzWI+2$H$cuuBytow}8Zh!gIp5XVwwZpT=@^ z`%uJM(Uj7cf=lQ-X?vl3NBWl2SMK}Z`lH&II?JMLP6)ed;oIs@rX~}^p%bO%#G*%iUm#iOquveuX5D8a^ziz5qt04a*2?Ek*B#Vj zm!RPe%jWeo8a7YPb`N5M%(!F7$NGu??(-94#mzCV^eivLU$AJGvnPXg*L&K(0PS{h z=G06bC;kJR(IHD2R4 zGR9k+w$Cay7^NrE{8r97`1>Nd&zB%xH0iCs-`~!pz4tNXlRc~IeDak(&Ts`e=i*Pp zbSL0p;$D9rdq3D#wT8%RSFRD4G=$^&i4tQUQ~aZ!_xbU$vJ|i8DIbd(i(Oiq$VoLZ zt}D*Is+;}kK5`N!pTr}qd6VP;*w5=v%SrX48EZ@K*-vZ;-Mn&chC7HVxZh}GZFByD zJ4O0~$-+cfuV*i*(ATd&XwqJ6xA5@^e=ym#D6Chp1}AET)=~=reh+JH|6#2YFx1>!DXwDxseUNIuT+WzJ!r&n37E zAsW}Snk8N{Xr%1lS2NZ+W3T3f@Rn)c7XWt?aGxvh&~NJ(`_3H=t31m;N3KQg$25GF ztWfMLTHCEM7qed>{VM5qyo7rbBPpdDMW;6INRN4|jXRm}Ni2C7MMro+=qLAEG_14a zmj2t){rgld#yGQl(3FQv7#^idj5`*K0{)7wu;nYFo40Ch-xnPM-CfYVk+t*E-qP)b z0e`YH%$laSBa`*8bo+0cg2_)5MD@S*$MwIBr=Z6qXdt?t_^?K6d&XSP5j?iaT^%CZ z;DKy6u(xW+0rsUjty4$ZgGXh`Y>%9ZkJ`ohHL!j**3ate-K@3E5d4XEGy5K}<}7@N zKJGe9Y`v~S?^?^+cRI{E9Ps_JsY5g7AIg}&?2~xV8)7djqo!>`hdvpQdn?j&&fyN( zv2=7L`bOwj?*>jZY}`-7nGTDNPxdz@Kf~u=j&|rz!mpVytE;rOC&tndx*RVN{blgP z!HC>HCUT^OdhQ#aj>q9U)n4d`T|1a!e-0V6q3^7|Iot

B>wxcCIt|Px|%-lVzo0 z-IkAE42`V5u0I>U??)Ned++#tKgzh(pMU?_^{h>e(A-n||JZx?;JB(Q;kR3|jUSS1 zPZAqU5|A+l%iYam!oy4^NQUr`B?J1nWZ-^xu>1n7haYvzjv;q~Y-7MWr;Tq-ncz?Z z2HV7~K5atrQfBHS=3z*?yB&J!l4NGSFUU`@y4$d;V#_2G-{0E%obJ;t*<`*+)mL@v z{y_=JXRp2XdhNCMT6-;Ix6kB&G{%kYwZpqx*#G==Zn$S;90LBePmr}cYHtMl9|SA$eEeRnPZ% zlrv-_??m98t~Iya()!=b?+u&o41CMu7vCYHaeRY}uEXW=x(C^6w;VN)=Dkp7iC;81 zD%#se6ZmdXaPG~hizm9g#@^PmFK+J?IVoRnvZb`%N9#EMcTsaG#NKI}_5OTb;BI+a zv1z{9YnCA^4akbO*Thq{jyc3L>}UDB+0U?}QfJ1J@D%9T)JAo3Z>cG!zuPb z*GEIIiKS*QW_=#Ly}dhjYsoee1If089Q>a&d&&v*D6lR*(Z~LTJxUZ*o;;fGawe2> zzqXyO-JaPR&F;c(FJ*0GBF4TgJxHICqHjGWw(RE8Jhud=jQ_3JaJkd-_6%U?I%sui zbs03!dj@;=>z0*0*%4RQ)@0PREmGHKN^&*%yC1pd%YncYZAX_;>vgBlz=d}y@OFaJ z5^x%=aT=Xm0!}-NIE{`aZw$P;QS70&|1jox#w-Abi@`zL9dOtQ4oAS@QgFBw9O}7` zTLKOSmiYdo!2$dD*REZ|SK#h6%X=(cJEuM;@*7MJ?8h#*a#kiLQ*Rj=YtJ?gtq)n* z^^rLD1_x9%`eN=5-2LH(z^gB~yv0Y%KJUcIn?(M)82@hL7A?Q4xC^emc#HTuvd@p7 zQ<8^x2ER6iJcQtBSn$AS*Ydz!hdta!ZMZJ*DsM6L9w9z^DD9|_jjzdAt9AzSF}@8y z8(c)5F6Qo8azQhhuo?Bv}TXGsw<b z%?%~K`oBNXA-GA7YpLGNib(mJo@JN&=7M+2Bjhb26Pv^0H@I_gp7rKBKC?8}i3oAk zjH6EU5odBrbNw>kn|+zTzuXOdkO6P(_)E=RbDVL_{QGgnB^lR23{qk%9SQn>L63Qf zl2C*N#>{B zmR_lK#8p4=@a7KIgWV3^pBCtE+)WSNL2{i7#G2hc%CS0m-^}|sY(y+KsN-rK+`-m_ z$@{l;k2yEyJR$bbR|2nIMBG+-X@1HXl0K=rPhF5V4Ikv}>}}ojsYQn`w@Q6#XXT0K z<@?mE7J3{5U*g-#6nwz{7z{gVKX$9H#mI#8U6mf~PY^5ogjzt-!&TPwP&AsoJ;9r0 zC#8O27t{yeFmK=o+3Vn+i5s)V33pKIC$tASqt;KJJ(}KB(pxeV+MJbKeK|{aB4=^z zV;B3311`k&sL@y(>#Z?sllpv;$H?Dx%&i`{HNXu4H@Ba&c*@tGswJAu&vxh`l~MM5 z_Fj4`EzgoG30$v#RBBw{)du8pJTMx7aU9$>xwy$4__=VCeS(@Bu^2t5X8U1)M+`jB zA+4+EM0OB7_Jc<+c<6qb;PE7QK%W>ik3C5*AK_i4Ob>9r5}7XI*UOz#-pze(qhD>_ z&JJP+X2ZY1PQxeUDmG5~A|D^E-)Gk!AAR?2d+7C9aMULqsWSdQiH`5?cKhUUPSE^+ zrE#dkXJ0(H9{%rnzY`{KlNW{s% zqfZ0Any&ddt+D@B?9ydcT-RY`e6jXr=R^PgYR5zWu6sRpq7$h!cTdnR7nk~+?91#I z?A*>fv7b~ec;$Sl3)*aY8|(&F54e5`y>HK8r`V5v;SlHd`n@ABn~=k1 z_M>G96aVjEKeC{u&{g2l>oImaG!WP!<{F2c5SX3l&L_aV4Y;2G_XFTcZ%BHQCm#at zlfb@`{V?W~mPcqNzGSE2_EvTJ}ZEWUO(U7#<$SN?cr_a{eZsBm+zPLPl^2v z>zChelbmeiEJ*!B*3!Ql8AIRcRXVQuA^gc0G6rv&`MqK6AJ1HqKXYd4*diUCm?nL{ zWIx$@>by7FPn{7Me-b@zV-2rE*UO=+BQVc1db4+o>A$@jy_tpH>_%^Pqc_Z-1fTu} z^d^ShG^01yB7fI2{(slD%G5z4DXQtNkx5UXaQ=j`=)1PG4fP?l}Id zp%>b7zFugO6DE3+yDrc_r%!_xI$Ze|W9^(Ld-ue_!X{WWNYm zR@Q34wsmH7oPqTu<;<}hUtZo;PK=O4j?EK0Cup zXoj503w{ ztib!4`K5WsjxzI7TjZ;kGxiC_j%Dm8+_QVO=}BbxN&J>i3+Q=w-qeN6_O@_L@Is4atf% z+=d?@ zv3I;>(|v?B-7P!&X(X;@Ek|}{yRw5W$$r|9CcY8o4V1%KC4b|Go^Hd-;{0L*YgyAX zg2!}t;QDk8DXI0v_y|5{BB$#ewR0Z-6ujqvxA6aR_C}2Z-~NJcpXA#o`Buv>I5b$u zFZ<)B$N?RT8)^P~>Cq=W1%teS6CR1JIo`^-vI~8~-)-*3_i=sILGE1pLUGlR->>E| zkNB#oICom`Uvfw#->vJ0regosaEgAPp|rBYIi>rLztS{xAYx^&uz3^NlG63}=p(N8 zwO)K>+25Ljb@WmHT7&dazaO5j4L-B&*^BWt?lb;x1M(U>gTHYmaQEdc(d;Mjg!+k- zQXkkQS^X3|XW0og4m_pjw3b!Q47{7wI;GDklXNfmwhh3Rv&sbKyXc$77ySe|9X+f0 zpCu=8Le;3(Wsj6xzfSz7mT}be#9vs(e%VP(F_&`RSF>M+bxuufv)sQlBR8{w*#JGO zUAlg4*7*OD=Pt!z52HTo8xpI;PwbB{E|QP0qUU*YH|J#fz&G>%4furDc(fQ4eV4r+ zeV+)tI{p(M^J2?3eVf@UC8i4)mCA8+L93Sif4 zHT&tuoW0Kvj2=H*^iOIyqGJnKdw^bc?d-|xh^f^Fs**H7_-e1z$61Pwa%PsfKIYC+ zYOXxWT;wh6zL@-lCb{PZx4jAOs?mog;voyM`!};E9uDyyD9W27?B`tRKd`WI&Qjoe z{h^xz(km_)YlNN?suys!SfEVrg*uZ9ifs_w_A+;Sin~wFYe88jb5M7C?ZQr5<9aR_ zdtDgy=uS_B7y|m)9&%&0z?hCK%6bLH0v}&HME-8TPQnAhp}v#a<=FA<=-$9=7YFS4 z!P!=JUD#0vQ)%^rC3mgEVU^Zp`r!2#oci1GH@A1{c%ZH$zK(l9#-mEX25>3rLTNmA z817v;I5FOC+a7YqBU{r`cH0YeiB%)PAh+}l=|d9q=FZt|JvRO81Hlflvz|Pc<}!U| zin#LqcE?!-Zq4w1?ZLFB+b~?|3DrhVs8uC6X3b6Ry!2}^Yc+BjMMqG^}t!k+UJ7DV%EGN5NJlW8WvX2%Z#BOF!YkS&6x9aKaPz?2G23B zd@f|VnLOY(8=33kK%k5_T28Q2o}9Gjs5{VQ-r#NTcI5>7w2*tDxya8#E29p!N<3n? zocxCV5;xvx`b%7l&W=Y11N^*bo4K3bJC}d2AjLYx z-AMhrdD!_Ma-R0XEYb#;3!@1&H<5uCC76d>qeYlP8&4m$x0qW00&irQLJoOHcgtC1 z>M(J{o5@9eJ8ZYTosX{)IMRE2AMz|QE0O1=$n(CysOB}u*r}>2(qgAJEcMHCaSXj4 zTzQr;dy&hf%q@8kjAlA(QmPIlV7rWjET6+E|xXw-lt8%$Vd-Wc_K*_)}XG z>XegFr)u&F+>-oTY^LnP@Mk~#nJD`({-4m4xXo+rMn_M-3(UpvXK^T{HrT{91EU66 zQ_((6XKtb4VHpPvpJQFy87n<{_E~n@Oe?`X*O+GTc=PFKwCxT26`>7(Ygp`~%_!ju z?zJeuilKdWnzeO1v!yYPm2pf^fTQYg|t9Uf~t1f!VM$+n3I38~E^%>aQp7tG+OHpXjvgqkJzpUguY{kCLY)czvHSU!?5p&kB9? z*h|ynGOx~;esSRSua4Em_$0r_zVE|J;B1$9>r4-FV62^%x9Mv+t(iWLbAF86F|L{4 z8yHuVyN{@U{@c31N~yC{!i9YRTzU*H{Wqkowx6LF8WuAA2zLQ~So`Unp>5Mmz@grG z2@bWqfg?6T)-)$=XXn@nb((Y9z4&bO+|)hgrp>2sX5OL4B5TDbmfm>$n_iy>TVOWH z`rLhSHGKaOw75PMU-jFn^UL=|zLoWSRE3iTdA#@e&MnDX#NfAL%DFB;hzJziiA zxNF(Y&$Zz7v#_0gxFw;!aOk00>-HoBuSblmBw1hVUZ0#w?ZY|M#H`?HyMfVv><5jL zk6nKGWP0Z4UXM5I+bDF=y&i9crk;$;_d=6;e$pe7+}m{9k0 zSM*~$KjClxjn(IMH1YrPoyfww)MP&5>bc=1dpNw@&O9Oyy0@-x9@ZH9HS6>C*S7+J zmDjU(cTE0~-m^7*n6KW@wErUJ+v~Ic^8Kay`^UDuhb?+6ZuV69Uhourp;P~xqXKuG z2@Ka^p~ay9Z`9Z~dKj~aZ{H7FZ4Wr{Rg3UV>3KNkN7y2vF~2t~^69Y9;}GK)1*-J; zCNuu@`^eSUPtLFjPk z-zI!0{gJDytCQDPk4~=T{$LJ!%p&A*5qbn2iaZm(zgT5#a+B*9h;0$O^lR{XGwi7E zw`A0ilQ(t=pI}ZGZ!rpv>IdHox(mv)QTG71!__BaU#qlztp_JRyz{#=>U!$9VwVR+ z{$$K*_#yFJso}bp`Q<$Q5%Zs%kWlv&((0p7UiZYDQavSk?C@WHC_T8)o73of^83_j zoyz*&rw{8+_TwXg(XnIP6&wRDGWPi(KFZg{N0}e^TzGyh_lGjqJHS6QFQG0;@h&Hq zS4(&g@M54kxif5Mzq=={ju4Z)G;~4%FDbprUgw@~YnXha6KVB&RC>2gfu^IO=Xqr` zy~OovM3;6VUvD9wBFjsl>As;Gx4ab?Q^wy1xVt`v{l6@gQZHErHS;?Lr0+PlKUEWvu^eYLzDBX$LKdIH?W_Ht=mZ+)D(18 zelGFxUgYiw^IppOFSQHmBl6I<(>YYwpbf%QyXnto|!4O(W2!->p3+}*hQKHxlp-TVP@ zu>MSiT-7I4$M`FB~@3kAG?01<6Oi|J&d%zTDZy`tdhorzUR& ze|*%_N2|spIX|ow{)vz21ka0!pXmh048vg<{?iQp`{W%8{16$HaRQ@q?Pd782zV>8}5QWBjGR_-Ip`|<%|pS|4#icw~e9R!5Hct zjH&s#dMh~QICl?sAF2LYOv|y@yoKm7bgYe7S-U%~YQrhjYUR~Z>`|IGpS!R-`{D_; zuPX%|^Xe${llMs{)7ZzpxcbP>sC)2>hx2;0zKfQ5gr1)74_()e?zY-AP4PYC9&ei0`Z`)V4J@yG5xtFCe#WY(so?s9o~euo=_`pY1~~~Rhz7= zk$oloz#a1Pkb&ZUwjxAb7O1F828ZUA?Ai1cN0ug^QS{g$4W@iWr)h~xP# zyCsoOcV*IQWnobJ1_Qzi&YRoK8Z<8!uWB4RLJs?pw1dx)QIFYj<`=$kQeRZ8O9M#g6F-s#+UuqV594Lmp>+6i@4I<0<+-n|u^T(&=#XkhU7 z__>lhi7m*Ihd=U&EKE9!9F@-y=&_?qn~#asHFql zrKXtU#xS1Z?qh{TJ}q?gY98@#XOV?+dTg$fS~sRI!e(UgE&3w(?Tf5sukimC`~`m6 z+zEv7k-F+KD%?`gwcjKUZcob2ZiYsCkpC<4rP^;^UasJCHaf+EyAB!&XSk6414E?C-FUEj~e&A28Yw> zh@I_sca>6uT5=WK$ z%F8($jtxG#O=77Ng1y^5!CqX>9xS*81vhBEmpw>$b$@W|Fggoeh@Tc~vpmFiL7uksW2GCh~<(SK{sCBMi?KC8*^nPrjB zN_<&jw7O<(zK{Gfa({`PNseWwY8=XwZ<-I2t7&JHgPckp9rKYhR-<#qWUdEUkEtVr zUyWT_a58ZBPhK2FzSD%*zsxW(-Wt9;^NqktiQ#`frt$RjlmFL0j}2ChZwUn6_&@(2 zVvW;GocBq|k0U23c%F$_F2klCPq}%_$nb3LRf$zMgG0mZtXJSloL6EKzpJi_JxAPA z<_Hmo2*S4qq1i0P%*w}CK_gAO!Ia(BXT^zwPH2``uF&o$HGywQ{9SrzOJ2a+a^6CA zWL-8mby2Uji8wtxI1LWj-n^qaa{KA+DY?cbFFW65W%HbK>~LD;Q+e_hYm)+B=huBL z@T$O;*lM4^o)Y+6;)@S`aptwAFV5U8-^u)v2O!_cUQK+t?c%W5Io|zn7toIHAu%t0 zZ}_;nszGAOZ;^Lc6R@^Pj7HY~_X3-kvgG5Y>jJNBuQUD9iH~P(aP9)8#Qw*VOEW=oY0C01Hk?mR$@_nNP4w*ffBNIy*OEW>Aak$& z^_i(2KSZX5&%&4g3O=F-UFk)5{Xk?D)Eb z$%E`J=$ww=nKO5f1@1+}e12E`#I%*j>XV1?<3#5V>710O>T^}aoRt6Vno)l*{K5ad zM)H78A12ntx7!aR7vx3Kn-l$?mh8YDSlB6lex<;bG1Ja7yq9yl_4_}**8clHO1!@x z{yw~yUT@Kn>&>#Zr(a=g{Px4$s{7qyvPwt!e#RJ6l zF6AyDbF|wXB^vn1Ai!PFv%8E*N`V7?*HYwLapgcS32kC?xNF z4U8G^OV%qe+QGx4W2Y}xFF0I6tbS1Jaf4J)U#rVeTq6gdKG_l~y|}=rRJV z(z@u?>fR9ovwso$_>1^$z-neab2i{VF}|7Kb^Of3r8Iu5pDv8I+g=R0$!7w+j={`>c&8|-miww#r?S-SH6GIaSX25@55i}@uiPaw>R=+6cXgDUeIkqCUS=~&1i#Ly9N0BET+w`x& zj0u#*HYFY^cd<`1H*dVpF|K^Bci@l2{v|MSFxlHHG5c?un7!DF@#u3kKhKcoKY*XHZ!WPgH)emy z;=o-$BxZjIy{_@a?1?v?ejHo!xD!|BP0Xlc%q4pF06OljpP1o!I%e1zs2VS}<4>Kl zo;hzEVNO~1M&{gzKJ2PB>yvl6+~dr7yr}nMw4Ql$7GuLL`bXkEQzxl9hN%~kB&{~a0nY)3B$7k0%K7$LCv9zTI7Rqv^83U(eASYUaVf= znS1(x|DL{o5i%k1@>|XNn!%$%?)N=eJjxm87Gjd`gzdI>@^LNCCU!E`A3NzkhAx;` zeKoO{sSOYKV94BN3^n)ANzo;VM+`8x$g^9s09`uoEV60rMtxBSO)OjFx>Em2bTjfS zzL)Uzoe=LoZAY!IORJj-uKh%wH`>wcnAU_k>cH=sd{Le~K9Db0%*piN2>lm@*e~q7 z!u||G@4G#EmF6$Eb?;bq+a;FdAz>@g#f_YKci~TtWJ7|`yeVO9hvC`yvKYxs;_xzV zDR%5K&YbAMFus-eJwcBZ`5~@zdLMdyOG56%Sugv5$Q}G&duzemQ}VmMgZI``&Hi{( zf_JmX+fA7~^~<4$dj}#q`aW^__q*ci@ra4@XdPYEIJAZsywsr(9lgc3KOVtf*cq6h z@A3sFxywJAva`E5YtBrk)y8mM$-SoFBr!LE$M5>S-NPv^IMkW=JLA`m$=mwg4SINb z+RT^+?&ynh{2uZ5x3D=fztF~SUwgoVwe(+>Cgy~!3q3_HSL>M9l%k!DyLP6hfnc^-lFlN|ZWY){Qfx8#U_q)2ddnGe^Zp|5?p?*07$Bs-_a} zoucEtDc7emwu850(6HUW=-)s*d;{_D!?Gtr!)u9$Gp>09HsogbE@L}|Pt04-7;<4w zZ>XRf-^Td%HTiZ!-p;<8=bacn+6;@kVR{t*)?LHf@ZAqfPZ+lpBmt+pZvk97K+g-lv`*zkU%NYBAW)_ACwH`1Z9BL_BzKRws>S?Md*7vHlqAFPTzJP?UHWLc3Y<~h88w3# zx6rkRoOHQg5qU%IzPgV4km>S%jac^p>(=?$6B4SiK<;-auZnAi9tWYv+t6bj^q5QB zTd$jQUS&>z;I)J}cqKi=W(qyl1x7=YDst!puL>>~frrGyg~#hS2e*58%uA??Q{;u` ze0X5@=I)8BgT%w96A#xoyh6PU;!=-=seN%Gt#(E|+6BPII!`tZ=@Rt|Vd&tWX?tvb&QtG=2>ae*q7I``kSYpQ(5l27CIr}4g&fnt8 zq^hiKB-nmcV&bC1A@}}j5phkaZ>#MdbUTLJtiHN&?RD&BVzb3>IEvk_^dBx`o_E0G z5O_*{z$JD8zk?h}E3R%y5X(dsu5I0%eLdHwt_9DnEd_0(PBHhXD*Q#ahUuDxjk`}( zRgu1o6}e@fQO-AE5>mRyeW$>oq7j)%D$ z`78WxljC!acqd(g@vakX z+hB`te=ZorNL-&#@TkV;U)1Tsz@L$P?vchzp0S*xF9&})TQ9o!z_wAu#YYhrA4OdJ zhk;SYv8i_-BrYD3Jz=4Vi32~ozB{g}!zp!>l~;?WH13wWt{vE(cjF25ZWp=5k-S0% zl1q?{qiOsGV&eO3@^2$2w9g1W=(ptQh-_5p`?|R3m+OnOjx{I9 z78%3d9&~w7ortz^##nWK#a+$`>dl|w&TkL#153uZJ}~o~z7t<=AwJ`J=3qYRsaM5b zCib1<9zf#YEh0a2O&t6V)_%aI|ARv;tCcu7?{swkDAxWIcK1r`?#t1F5}4Fii7ofZ z{_>FN6(GF-44X8mihbP8bzK497Up`}4tBDCfBXE&;Gp;Pxq*qz{C~rW3fb|G7afxE za}!qE!J5P>Z){T_$k^an#`6BewTCixfO@5T+pOQ7bV;4XsxKYeg9779V6?Y24!zAer>L3kGE9F)^uGTd^iqT7V%Y@%-nkXUOS=c z)6_LXw=Sv*4!aZZ*n8L-9Daoip)XpdT`OO>dIr>za_>ke(%@N`;_qZ78Bc! z&W~mz_(R=w85O~=SU$ILH+Y>jw!PP-iOiqD4)}49bxEuv30>}IUBbg<(8a6CL9Dg8 z8@uh*-3$0#GzZTT{4e}L-XFsr5|HXXOPiu zE1?Rl#8q@`o4Cr=!(-bf_FGqsZI{C|wRZLz+)wY1*x8PVqvnOusy8gLZQ_TV&m^|p z0iFD{CIn7pY+G=Z_})W76C2(R+=sxGxK~o@Rb=wL*+j-siSCR_SW-Wwj+iZVh~TXk z+X)H3@XrQhEb%)TyNtDN$_=j-61AmPh`=b-3K9B#itf*1@2qk6mA&kLLg)K~<3^+{ zF+yGUsE0WvF6+f(&lStAVf_++l2~?)?5k&sW4rZED&yG8geGcu99!a_{`JT_PZZ!)#J2ojl0JZM_h)k zHMF=j>B&*i{J}qH4qhnzXy$~lk(G4{ZiBxPW4s*yWG(!?P2Q;g8Q`53{*o&xat7Z{ z3w{sb14?h1@$BPCn|Jb#!4aOuKHv?N;8BBJSSw?RSJpGOr{ax&Ff!`z1Jx#$I)!iL zZNJRZ4&BGH29fCo%g!E;#8ty`@)CMoIxzQOua)iVOi;IpS`J~U4{Yj@#hCX-U9aGo zZPS72)eU$C*aBDZCm-NHTsOc^+v2)h+oK}#vU{9omfYh#;#D6vXcO&+5!K|Ae{&cXH+Axlql}hWHavkI} zv4;vZw54&z7Ge z3lVt>b5(Sk=%2{M{{d}($lMptn)|Tyu98|8;*ZTnK04fdnn7^t0H@jDG)NvmvGyD| z#hE*v&gj|}x+YLGnr&s@?80BnPbjGF?=6Ie^yy#?{;dcJ-562qTJ(+$2X0!Z^T-t+nLCzal}k+tPiZ*&E4uVf+M+3!ed!u z2kW3`U3PgVxuTsJwVZR9)&X>LWp_0DId_{M+9~zZ8tKzFImvv;)x_95=9_~*@hSB+ zX64A0z!r$V)JYGDu3&tZmMV{{Ce0P)_oTKS9^~DQ{x(LJ$6Y8Zzn)f<6HOJ+R(6tU4$=(8f{dW*M zJ<&zY&>+1|;m^u0sbM;~nco}sAa6fJS5CtpEzj)nV*6*K+q1bF4q}6ZNAu$F$c9Ip zE%)c8<`BMaTjNk9O}-L$q|tC%MLMO1smVbTx$*cVHd%7;WbGHQ|HvM@k z4MWbqcSKU^KvzLsL67T|uQU!(XFpq?qBl_{rR$tu#kmdsb4HmQllA`=J16Up09$x$ z&TsT|bA89<%y$JgY=rg68x{xEE3SBBzh3;W1jPC;EI^-j>~7i!Hpe0obt zt#iyd9D2u2P)q*~R~Amf%aijG>UArlUY}o3zvlO${f-*4dE+*(zm zZ$F_nTfg{V_|5I`_rTTP`q0DUD()WJ6*a!O3#^>ye*pK7%=6!v=Q|w424xY$e z`$Bl~LW`-HP>FkKKGSP(FYA^w`O2DT_NRqDwI#&cg1Um%*_G&9y%lY1uaUfcIhRi^ z()!EbtgOqZpLR-ZVDk0_PMW(Q^yTbu#%bCGu`e2*5c&VMqppJATi~~b)7CiDWJzuQ zJ~dFsy+$gpD&egae!9M=AI@E&lv>%DS7*ro_k7RN8i)zPx5~MHYNgcYgR(cWhO3cz zhdUXWJ56nxxV(obr-`1s!T9qs_X+YHS8{fheu|afmgcU}bBlcuovS?CaOaV{s_MdI zJNuG%_al7p%l-eVd6(tz>#>!akj123PvdHK4Qx4XDAm-XJ^>iE3^6B-Mz zCSzNK&I`fgZSZ&@L5!(IWTh&}z5d$u?3n_e-(xRelg`N5C-97^EwwE0`RCZcE3nCX zh>`V(-T~(TaP|P_3gCG7Zh`NgV~>9t&8W|@)99SWe^JQfJss-cele0$dM(5*p5^R+ zHvF#w=Wk+zWzOeTL7NfgU0Bz+`;NfGFnpmpT^;*xIk{f%Il~yn22G!zKCT4Im zIYBeh#CYM+&FtfAx|?<%=DpGz(C}AwLf81XE}vIxq#nn*#yLsu)s|I{PyU#A*`mOO zu?@Vzlvu*D!1&mU$jbfVrz0!p%YNBxYJG`3_pt6w;IIrFn!!P81ulnQ$I(-{PrSl* zcsG;cT##4G;o%};N*nMEfX&^N^mV3}rz^`+@9V`BIgL3--PE1Z>k&GeyEbz#)&sA& zH{x!C9AIqFsn%$AO+2nnaUNRJk|Smwt5X|+C#p8nHUw&?t1*7-Dc z&fp{cN=rDE;7>C5l=v#eetyxErr`=`IKFCp@|P|2aCdlT#JrOx7Ssyv;n#Y58|DNwpVLzqD{|fjk zKbwP7i}boOP5;+KT*nfwglF1jaaM1K2IJLbTfB8kU&TWXIgT!$^qM)}zQH}un#dmY zMr%r)vL#1B_))(8HPldALk*?tsG(G%h2igZ<`Mo@<^l@e7QycgtYuUyZ}1#)7iz>V ziX2tP4!u3qm^+sokIz5IzYmn97r)}O}i(H6Il;lBRHNg|PYYV!(nSqQ* zz3vV9{pxU+)c4ml@`g@zS=p!YVZRqjt2LduVY(==#MYMUL<-Z9CYjsBvZB;K%!6 z8F@4OY=;K1Fn68Uuw@Bqe`bKkc|mCKve*lJ@z22PDQKhPcJNp7!_E!c=%2^qm&3#q z=O@&awa~Ix^n;ib`qvb(v%wR6_%x;$l*Feh*DdlUZ>xmY$aepVoZa@zF5-UwGW9OL zZ3fPp?Zp0?_L#UDe`{c^*YvZ`h>X;WykWBij;u2&yk?!ttkOC^!|s1J2!1yGb?eMJ zEAeO-JY3%wJWjvtM6)ZR3Fwkyoaor35p>3-gTbABW{h6?ez;fq&Tbl)5oOD~sT7)(k1d_mMjE?U)`E=p$XZzY@icqn1Y{ig zdh5FKKs5VuXIwp1pchMF(5wrY-@yJjikyP8t;a71McCNjSZA&X`>6u=cFWxKMCe_W3Z97&${%6R0tho2(PjBk4?EU!$Ab>!=kP<_;oc z`frW2mUdZ-PbY*HFLx%?3dRWiDs^I{@1>8>Q(=1eIOX39U0!y$!$OCJRzkBTmu5mM zdQHB$16m!1cJREJUQk87H*dd)&!;#m!4rW|wsr#teGKY7mHa34vFfZgazMW4d?nx4 zw{F&FWT}C=J)bwVUa^TcBY)&H0Y~~Xm%|#6{bn^bjGs~?TKcz)7aJ6(A2o2LA9W8l zvIpO9FMl8Cb>VUzIn3WTV4FvUQ|jiZoP*dO+nR>1!!|!1kE>rnyIY=OL`ODpZJwGK^NyF}C zeXZzLh#I-X`u=D&i{v6}CgT8fqb*2G=6=)cIj$VLx3-ox7Ku`9yU z_Ep^P5W8L&imP?Nd%-eyV0yjo9z313RmxGlD@S`r1?b@g9QHKR*R!1dd-+amdk;U8 zeCyG~v+q51$Z1zW)5Je#QTDOJO)?kza&vG>QhFsX#+FMio&MhEdz5fAHn@D;H`75`ih5Vy z32fO%o}l2?6Bsj$chltD*KP7m2i&;YIp>;p!pwDb)6g;IdY`#&Wv*M9OY-IY^EA8s zl04LJMy%}Lh8*<|;k5b(WNv1Q)wa>KDaIa<-{{_pi);+ETG^&9YAnw~2Wus^3s2>K zjdf}|yRiM=HAV8n?2GVO;K_J7SFT3(?`cV>YweUe85dm@+LZjoVLTIAMQ>#M)|{nt zWnXtPYHP%dT@4RU_1W1sqMW^3@V&8}l9OxvZF=|l{6@1*R~CG5dw?rDX*+wZ4UNO(6o*AN>1l|(JRPyK z*Rh`KS`VlK~=98>elk{s?oou}J7vGveH?;))| z+QZY=?s$$pA-^A4F}Ti1a5m-KyrYwIb6-xa=FZ?< z&MZ?VMyd1WsMVpg+FBS?C#$AN&DYb{J9hT`sH5JDr`4M&xig?n=fb%||Cq9~KaP~{ z63(aoYNhRFEt*${$^Dl9A6X7f?_r@!aUIY??pC~Y;8z*Bv9Yx$N^)~H8@g9)Ha>^T z_kL*HhHY2{4%+uvoa=2n+&O~pVbklk1NlS_+u@JQ5M_PI3(M8R_{b_Y1?YD)M_XVSq zTfqA#onUr8l60XjoO)^t|M~2(-Anl;b4C{ ziT`p>!9n)YYIQD84l_0YU*c)*VLbb|L+gFHEvwY0!>s9M)+9Z|4rjziFn1D1GSrjA zjxVXBR$Hy;r|iEaSeJ^gD!g`Mxka9cPN6#!fUDH0KDMn z*d}6`l1KDidGE4`b3gu75#BuUtx7!J5wWv318;F*zZx~)$j*^kXaug0MGmMV%E}KzZb~5kw1@)JLX5Tyxmua(WQ5hJujF2b#z_oU!Dw8 z{}Nr&azO0hjh2#}71uaSr_7$V-QB~BvcTMjp_i5g_B8z6EwcA$n>5TXpg%QoQ`*;S z^jZc4rn|2{2G4dnMsF^S%YKCn&uyIZI`7a%(#z8K^vmrsc13WM^Rnox2e$_}yl3nG z=;FW@vnQR+W{$Aec0k|xC7-?Bva|J_3Dq8O%%e6HQJHdwO5YU_Qbub^Z4>OcKFs1@21&DACJ2<6FzZwwVE@mp_S-9v>HaIN?RIC z_5iO5c(N}CkxT6dVM`x~@)pJQi7W9k`fGe%q0caybMDkM$LBrVCGjrFr+3e^KAiOW ze0R8IeB-WE!+_R@vBx(f5AVXa4T+KczU{&%_StrWPd~8QdU%r!486}zn%g*;d&g$Z z{0-2hc(3PQi^t>7(%cPU^Pb9cmtnbg8LVXzbwL?lq-#C$GBCkW19n>VbQaWS`Dpf+ z4snX^3^`kVy>$Oc2e}0mAPlukbHt&ML zOU_3np3elgqnw{-cBa)Xa4X5kYL`kCW{_)RiC#0SP_7akdYQmd$sD%1{d^PYtk6x!>~BwbF)AI{}?Prem=q*-{_f zinerGay9$2#PQ8pL~tp$rDb$;$1Q_EDZ^VzGH}cNEB&WgHK*e2I zY3~?L2f0fZ-PU%&XA>%IooAo7b2l0c6Bn>kx=vrEZ4&!kpCFdg%9#q=b*RJ2UQ$DC zffn*@Z2vyyyYJTV(;#n2tCLG#q1Hv}uc3m)JutF&g{UUiQB(z9I;hOF#Z{Ds>>Y4t>y^L9rxJB9V$li&`d z!j|0!4mYG7b%UKzH-vNQ(TJUW-ifP^_h;0_UEI4!ey`ZNzZF{-q#nW)-OJ(p>an^m z!L#VF)FqJpdMb4Z{J9rHT~_w{T?uk^>6H^Is7LB-;wK69k5)?k9=~7K*CYFp++7ss zxKL^?B!7@?9D24|d;zg9UoW%uMjw2%FXC36`~kF=UIb?!=cA<_Cp8|tUIgcw&%GOv z{iQhH6t{=LSlz?mF=+7~eGw{S0D{Y7)!e0V$Kvs5*xI=}llN{riL?Zyw1F|6Mw z)5F%T#|0TzO|F!TleM31oNw)V+!)50J_hHW&$Z>U_Tqfpv!c7Fs>UXNfbRYSYZtl? zU;F6Tkt+BHPCp=T>IdXaeVe?gq`Rh3_;UWg|GDY%B6KmeBZMya3>7>u^z_l?TH}w> zgF)wJF4DfB^o%X{H_SNS9;e4it=VWaXLXtYyGD&K=Kq z{9_N7u8^G#>p4=?56&s+7r=Y*we8%=8e1&%9?9=@ZNI?AU+(Ir261gdIoOlDzDEGp zRe~$E*E{i5Tga_(Ozrhbx|_9^`>wj)HRm3i`^#GG&jYWmafmwmh7ZHm>e>YE3zyZ& zTO0DDqG{D&mFON$N3#xR0UMt@jE&c8MQ=WzU}vww7H)~B)Q8w;sR>U``r5sD=`|qd z<=R+n)nw>U&a-;f?&TIO!*_2;@jfoZo78;1NZ+&rUC7aJSie&+bNb<|C`9q)C7)$K z{%iLtSkf__`L3B7MA*x43jx&?Z6CJIWwEkxJkox9XcHft-FF&+PKa}}5Q;5MZ5 zMYvW7xQV#xv@$9;&xM<99Euitv-A0TcOs*r@Z@ZB2p|3DP%B|&JEIAe!={BM%6%U6 zLtZScd(eLv?aJ+zHDBnTP@!;2S$WA_WjwN*j<~jFHho$`!rO9M>TxIfRHzfaL7#A| z(bq2Ivo>PRW1vkuCgj_fNUYbASiB`{t_o>tVQu|Wmu67Uv__*|q$ zYD5^AT7D!yM_}F}a#+K=5NecQ8|cTHT-4Syv`G7q@*YU`jdEEo!^s^VM($UF(^VQL zcv>;0Y)-w`)Fk7g8{U;Zk$PK>9dz6^ZU`)R{9}u=M%O} z&HOHrgOHVtMtJjv&5-wcXOjb?2ga_;nwDXAYTj)mw*~&Kf`1b)-s17>lkYY5Z*t|J z3co>qFZsyj@(|D1*-#332&I)3mU|6P9;|HGcIZRF-s9wk$wLXA*&8%}z^P^841-f! z`ThR}#}gHy9dg}XA!?lwHA zZOy38kZ(PI5$fI(?bF%1N#~eJ4UvpE3BX&Xs?L_lYIi zmCHj8xFLLk9C5pNt-Ic<8;82^Yr>ABY71%A8XZo%kYi_S3XW=tV&}*|I*x8{!k#AK zV+{Ej``_sY?bhs&xlKH07Iji=zHcA!;~`^`;4#RXTMrMZEmOH~`{$8b2VxhbK1wI^JOS=9 zhT1JLuLi;8;3hWe_MoXhGw?`I*VP!unr5-4PkX5^kVg*W9daA=&6~XTh8A*&I}5s& ziPQ;sVqNgH?1aEqr6$cB=q~SlpW*${L;uMd3=csM52n0D`4n8eIt=Z|Qf2Lk|41E% z2Z1g9C`3;DwHJQl)?Vl#KVUrTn8bflU*N2@7uYN0eSc-`g@TvHg1--T$YyE^m6`ywWtZ z2fp7?mrxVK@^&t-^-^T17hUk=aQx`Nt3ofK)s_7Bm*6b18nGFD)R7U{e2Dz)hR$f) zGF>Zxejmha`q(3YL9GD#z>ErfOJH0gW3M)SWc+W?8&aarQZpl^nnO}20DUK>&>Obf zdLw&Q0f%}83!>Hwk!xmq*_lD{4cG<=^|rg%!E|^naY0DmZv@7v}$K=Ci+b zvY#OLsl=Dn8bG&a|2b<7P`ey^yWO>QUsh|N1G$!Uu4V0_GlS5d`UfM!aQ&CcdIKZr zomWS~VPM`}l1ORNt4mmY`X_)lsA zNWFvQ>;;YSX!c^;;jWJww)n~?f`Q}$Y}d>*^%yzlZA6A=V!HywTKG5W(0;8!`tQhl z+Y88pUM#Xl%>kj?6}-1y&f8bXnRzH>Wj|&7?r2KQVz2OOXY^TCc98h;yhuvz?<$y@ zE3Z&j44Jti<*4PEl-koG=QpG4g7Yl&ecYJ9tAe}WJ)8e-k$I_+AU1BZ+q)ipIo(Z- z1inv7t+EW~1#kQ>7%yY5lW_}8jf9EVf&p-q`UnwhPIoA!GI`#1R+;(;2f}uCc_^+n z=l82SoPwIjo6iN=D**X-i5y=H#HwXuT)NTp4I2b=(Tj={Lw!guh+LoHDm3 zXM(@feUf)GN5Fr%TVLghE5G&Of+^IBWS-}F6R<={PS#{i_Y>SzEeGxi;GPEVrNEHb zr@-}ONAywGyg*)dAulu0d3iJ1OI;KV`$!Rzq!B=ooko_+*Re_n;01Oj@Ft?>0)__`XtO3szU!p=RXsRKBG z{=$dT3u=gC7W&kqA>x>IQo8{8#D-mFMcW>zaaIMmvlMuK+LZ7g8Pw>47L*o_xMtp+tfU$NAL7H+;PuaDfRK7 z)E!`MWb|>z%07;ddklSlATF{rd_5Pq@8b7*IOOc?&(jI@d`m`s%oq)e^TYdgv~8sE zC7cXD#THb)@Amjfyg9o7dN;d)t5d)59!}jJ+n=} z&2Zm&{{~uach|ohJnjIGJ>c;Gv6K_&-Q(bY8T|En)C+DpZi>B@xT)yM3F4-Hd6T!^ zFQs@FkaN_n-SmP6p3vO%(jKRKX>W&jtC45v^-SNAp)Xpa*@xqC^#ykv54YsVNuEaD z!_?SnbX@TN4g2eF-1pu+GS;Qj)^2iGk(aGbN?n`E>HJAq_XCWR-r1+I%`&&ChdQ?ui8@G~J4AT^*8K+5K9c)+}q?wp zx-RALo|pHnRzZDFYG>n9RlfU{I@-Kr)G=rIISKx+fCisIgA1VDWodf(B#^UAT3u%4 zmEH&MGy9g#KcZHys3qEZ@@b}C+C-c*S3RG&z;d(|>)cwmF`hT+6?o1plp3`kO0==cLO-?ksI*J}du#M>JnShad zkB=q0=+eUAPtHNLratu7d~t8!#`P7m0Sb7Q&p4o znq-e0d5k7MZiu?h4|gYMoMuRXw9uFklA4{=YS_#UTU z_LXV8l|Gxy`sJetdw)i~)G6->jK8q8VDr|3eGoaFf}D5uX@HN>Df zI=a<%WgfrjI&7QR;!5717TzQ0MVpbg+qTAau5>f<@}}hTzmXbQ#!LJ!*E_*sYdoQ* zM6natE#}m+_eLG>X~T|M6GEm~&q(-G%DcdNy)(gkryz1ZZfVcsP;xncV8 zKI?l4o7nHJyB(Qe|LMnP(Y@Aw#(KYC{qU_B{2JH;uJQIDP1_FiHO0E1ZE25T{p>OI zz>>X4az-9T*BYw>$&WcFa98u1oGCAG{T9=^W}@z0Blckd{0=}PuV=^W4(Et`$@bmn z`Y3!83n1fuco&H8xf=d+e)&1H`WXBI=__6XC6P z9NCJTF#Zi|{5`(@zlM4L7CxXJi5UOp5$@W4z+Ic<@xbHQ_sHXspOya36gj_s$JC}= zi#=}=Ucp~2-&!w?4F>Pz>Q#+HPjk1vCWH@Tr_~#lk?%@+Nu5fencr`2GBS924ewn< zC&YJ_T0p$_i*fhx+F@kp8DvN6Dtn6P>LR{fjIGxFCa_KN_H#oWZ=JddO5O=dEvQ9| zJ;d0xuH2cvccV*v?|3I8xt#Jg-@J9EE?A%{=JgGs-$lc{?%0F)d~@kBC;c@SAo@2F}^k+!{9a8M&MJJMx|2pS)d{dPMDvTQB3_&wJqh7&3Sy&D%EO4f>7F zs-~f%gLZZwd*&^BQtFlhZ;-zo*dqN<$a|wl?C6;Ml(j7q{5S(i-5|MxkT=hQ>jlg^ z0KWv!CgxknSgAcmT^0RS?=bTn=FPLz4qAM5)6fy-dyo0P%Y5HuKEXqJEl3SAujj%5 z^UYyCd8b~_JLewki15QrIuzB3(7Yb1^WbF3}QXV=1zJOAGH8?36khH@)M)w@g+9FMm(6c4#4Qx&+3?oR!@L z&38E&dQ-W*=iuSVK5T6i`__U#g6)*|Dgi6YQC+a2Uce^y9C)^f{(FteophPlD;J# zRhhnx$YPP^z0@u1H1FL6{~38J`!4okgW#VVEWUS>yTX|^G!Lh^FO+wWyvs*!wnjJu zu&!%c59qrep_$CT1DaLh;h*!8R-e~X!x<~{3sGACrHR`RktGwMiR zPA$EU_m#jNGclUo7$%22L~qN&pxVdVqt(2DzRtm~k5Vfuj{Qw>&KFzC8|a^dPXB zE8$(goHID{i+&gJBj)>#%{@8#uH~TSZ10{8eqR5f;W{``uf4|)yN|ks$f5`PY;T`8 za?U)Pjz8m`-J4_|L$@p6KGSbWpXEJ#it8etDsKo%GCxtZ3|o3Tdvrnj$$V^ zhF#l+kFmO2db*Bm&*sP;g^w}d!V#Js#$N5nAdkq*4#(9U_K&0ZFt-#O|Q}P~~_qQdPnn%y-P>Jt5B47R<+SAuvDSDlz zvHdpp^?CMk(bu_+Yk$?{^||xgOX+u4T5CxUP0wB7QE|=Zz-e)!^d@>2d&E)v;$3y7 z@4SD_{<->vuFKFR|NbCz_~iX;@1b{L1CCDMEm$~Be=lQGmy+Meea1QEqO?A<-|cmE zqU1lAH_=)?*zc|y6WFo^yHaUW%4m|wD{Z@PF}8apJ{NW*S$f~h+xociahA+&oU=>6 zhaN*d)~MK3Rin#n)fV{tQzx!I-=9%e;amPjbeDUg9`Q*$`vN{oxCffad+4*#)>|k3 zrS>7kpFh}YW#@L$yRjpqUYO5*0-pSPa*@S5rcU}nBct!+O}$U?6?IJl{{Gl}d7o|e z`X%haOQYyyg#EyRzm}0{@kPZ?l%ETcr{TV=>ANTTxDXq3s5Y8?G3BU57B+@AKvF+; z5%l(AjT<87U9|9_66Qh1zQEXpj6FD*Q`)|x>*BYGt}hb*T6BAmeUk6weV>ParEcr7 z#4^jqwlxkt#@XVC<)|aADRrr3cvm@A+ZcGGZH)HqtCEl9?d*3$^sCJ8SC7SAn!&4m z++8i@ocm?`TEv~*5ApefVz*rWtOw>zz+4Z^YV=v`rSu+L5|ww+<-G@o)7|vw8-O;$ z=+4z*T!n}fWOK4_=ax=!NYdFrR2 zYfar56%3nw&fj0Yz9>(;lSei;h_2vY_-$BaZ*jdo?0~G#?2+c}#8UKL-u0SZ$isd7 z8Q^x9bM}l*_CIhF{XN3DYG)vz`)&&^OOS1;ow5{Mq=zZ8qj`S`t|3turW^e28^`Oewtw6ZqmfiCQ7>mK5kV$0=@T5u_z=WB=Ax2g#Bu)7oL zAZMlN2lDEYIQF@V^D4f}O)Y+ZtWNu5?Z7j#Cj8}n7`A&~pmxNwq#p+v(*r$s@GWo8 z%XGz~MI>N);K>}_ykpJ;tN6BB*E4TrT!y!@^o@x@hu1th$XLlaijnhjKXjC_%aC2k zg(DZZzrg$BDDRI=&Teh7&)dDw@U+m#wa4C=PV8}O#^g;q=y_MPlt;OYd8O~1jP=_o z8QYQ|*Ax8?b&`Xcb91(_Qx50(+HhQ@?eZL5KP*|pan{gGUY@K$Y`nnijBL(!vM=O1 z$Te&!<$2od7hTNNn&DlGRmuY&8K%io?Gc#3^S@y!=Bb9rFV^#rZywGS^Xj|ts}0@c z%|JQr{QlIky7KoT9^4T2Qu2f?=orFY>U@3Vr7(fMKiE;}pOV2A${ggo==UPyI*iLVpV{`VAoNf)v2gk2FW#bFxy@wu;E^U&WJ#zH?d@RYkPd1s~|K_9PYHGSO zc3#2G&cm+FgQgvcg2`vZAF=JIo0~0pu{CAz=eTF60g-th+=ev0dR(}@$h`Dt>agfT zH_wHeB_EzVj#mEOl^~xNzLT|5 zU@D0^6#_PeQ52jZps!YyVxuiK>9!?+-ER25>N?h5e&5wHpf^*!tCb5bXV zf+}bDzM1#^=8yc6oU_-n*Is+=wbx#I?R_q%^F#bB*10gla^ZK(2(le0_#K@g!1s3+ z+5_`h zhb5K&F}_&_olUw7`E#q}y0M3~5_`ar>eJHid28kV`jv0>_f7w_pT4Ul@lN!sRz;*g zmkj!VQ!syXe_z2T{cog@ZRIkU58FCn^XC5KZ$3euJnuuTTnjd@+Mo0dz61I!bO&+m z!_&``N00GF-ymBlAl^&19pi02<2k8REr^tq2T-75ln)2r#?x8k` z5$#XX4Epxh&mlL+{o`+7yPn2AT z&zGQ2l)iC5mV+{DjV*WHBeJbGB5vvs8NMYt8one;yAFE}&-^-+dH5RaA>eoLHhV`C zes|w%@A$hu#g~dQ^lcV;PTv7Pi9Y>C&)zY)*@E6jzOSaeEc&15&!9Y&qjv_p8;*CB zuX%8sy`!Tmlu6~Tm9ZrHhrh2*km=R5Ry?P?bevK6<&VJ&@nr^`2N?8Q;q6r!Q~96j z*(5)kgmNcCyiPUEI=qKm(f(mR9qLXeJbZtBU}=^2*o;c=i90L2ftqsf$%k(Co_^#e zZ%D6qcj{~YKp4dTH^B7kh5X_%X$!iLBVzO;5u{QrWyX|07iPwZ2^X|kA**A#xlR_v|J+QRY3tEY9+7~3ALc4^0 zCm+(GZF;mZ4sDJ14lGRow%|U7I*+5y6R6`mz33}7@cepwM{|D`er&*?{8$n4szF_1 z`itu-<5_A}B6FMcX?;O_r?FdP)o;uTOm5iB7JP*^H*91J+Rucl8~zvWJ-~02pBWGO zhPUxuNqT=&SRml};epV^C&sdohIlr~q!;@-5gXi~6Z>3OyVjY6tUg2Ii|aP%41FzE zx2)T!6I=5HaZkp=k@IxSMtrXT-*4!Qtt)iKJ@r23U7}}2Rvp?Vq)Bs=1+;0P`2*@w+DYwd(usSz z^%=gKz~k-+JQ6Q7R(7j+B;Nes&4Icb&M@@nP3YGf(Z4sKpUa@bqz8he2ghc7fcJuT zpp3b{Lth_5U)M0Uz%pXPIyS~0bX~-PPK*l?dMd`-gG?y?!*kgzIIy$?m^(eBLu!m) zB-w+G*X&;1{mfk~==yd-F0<#5ZjSPe1Fzo)E+P#WdSN90>hCow5}6({s#4eUs?Z6r;T37V5B%j=rG;Kf~LK!!<@j1zC%0G za|O3J0d2sTaqPdrYXJW%P;W&%liFewSg_%826?CV)xx&F&}8?z^03}Ly8J*Bcx~Et zW^K1V&3EVUxVB^HgU?a!Aarx~vo{55^Vq%-Nv!QBXNtWI;L(wRc0(=6JydFQUPWK3-{UCFkY80)YAQ+$*yHeJkYbA&xyfDu?Q6yuCjC+C zoK_D=CZw+o5%jeI{ZgxocRyIe+8lrV(B?=C6gi&!(B`4v7ky~+)9(*<*(_q9sJ0UM z@zD3$TUhX4p{un`f_wHPd+>Q9XfL-1JI2R zZ|Hw&KmGq>dvHDeCpmO~AKKapdv@#1XV$(z&rome>Co1{;n}|=vo<%LMXK{9ybtMh z`%XWigRRlS=EOmd z1sBiJyQMbi0+ZO>PU-C|1oZ#(FWI>`o2*0OCpXED_cQtxu zQ|SFdps2gKa|+Wtr}~FuEG4mE5p>{lT~41XiB*6806RJd{?`eamn@didIG#qemv9L zgXJV6Ej_*?s_^;vPVkAou*iINl`?mh#AlYT7_=hzJR$Q**atCR^}rVG!8;T9ROl5x zJ9V7TE+Y$01fLtgXL$nVPMObRGM^bNt@W79XD4jpMKYg5=v90!%8PT84mrRl&7Cgr znIZ9+;Y$Z?m#*ZUV=|wFjhHj|O!IBTJLGfQ2I*N~k`wjm!i7AG-yI)!oCxC>QGJ(U5fJ#=mpGM%t!h}wx8?}%?0A0KeV}6 zps1lK&erhPI9nWec3>=EZnQPL0DBF4;BR2w@6exQL;csVB@McGTf;hChr?Lb;lcTy zMNK6^L!xt5`f|g1J?OXgAU6y1nM;@6YFP03{7Xt)TM#uLx(7OK^j>)Txpg$(S;yMf z=-zIT%KV(lVBW0zfR~x~R*AO+^cIwvBjdb=GXA-Cn@7(Izr(!if5>iIgfZF$-e+H5 z(lO`Sk`9``4uO}eFkWFZ3VR?cdUg=c#t&z0GL=#-%?I^DBOvnAmp+W71%MW|l!Pw4z;obLs!ag&q0u35|_ccZey5 zRwj%_Ki`1k{Z_BtJGH^d@TMju!dKYSrgDpvcP7DLhaL|rG->1T7iF1Fi@=-nT zx5d=5peVpjH|GRsUK3W?0|&9jItc%D7~0qi9|wNA&x|s&L!o!Axwzv%$gH0RpU{f^ z#47wIAJ4BhV0~r}?hb{v7NczpWBi(S=Jqt-8}OnYE9#;5#qi~K7!}()6TICD9%q2p zTcEqsz2IHaUGU=0YGiJjPrLQH&E0wdKWuXV@^q2@nOMj7bKx&E3+_=HnLAVjIUh&} zd{+$pq`5PA!$5Z#8|cCNL--+|@PvV39zlN=L$BGWK09=CC1m*ACiq*>2NTPZp8X@9 z{SrEI@JT!DU%JnCH*3p0U3);cjQ-DnE~NWjN8dU#Sa9vttV6`w!;od~u;MIi{y@or zq*P<;e1pBCzJ$5I*5}i@wRHnzR?*n9PHb#l*9$w5F4P2EmzG;if}wsF_{f9|e?JVm zAwMG<*@Ah_GSkwUzMOeorP7?13OmvfNv6)@q3Sg7Ev6b;)!2vd{bgqlbL-$QHDVst z>zuRhNRDeGJij#WkbMx)4_F^9g^k$7u#SO!K$|SEhsUM)1?woO?a=A6GiyV|%qGUM zRJvyXISJKR@8q|`?mK#PSYKuMNS_?Al{AmIFeVxPA6Qz3I!e*kC9qFihb5@$u1OFF3=z?QdYlLk}x@22MsB8$?6|Fx@dPp|tFzk}lKe8Pnd*ecz z$=*DDhWx&>X=%-IHlG6g2yVF=;PapitwVCL4pHW- ze&`ZC7eU9@9z*>p56ZJtdr+8K((w!Ugk)oMOlalp5hb~(+bW<>Pe1Z2%w+;>f)4tk zhu&b`T8edcncxoXO5FDyd>gW3$6%LE0S|U;%b*?8X9voeum`#+?VGuD_9fo9XR!V> z)Hh7fDb>0OWSf81>e z_^&G=zckfrG1j@|;SXAF3DFZie5 zuj{?nqFwhK$a8n4u+^eD#VyGqhXqf;hSJzAPf2eT!Gj6stg-IgYu$LYyHKDxInB2R z{Z6qI`nw0ZHzR?yoq}B@8Hk`u1D$95T*0^scDWexMpOlHUId>N|}-5p`Kbz4|deSEhZk@o8bEX)9Y$ ziMWh?^gU0Mq29^D=ql6K!h*&`_?Y_4zIOa3T_GAezenfd5i6QC2D*cM?ERjL*r@a! zZ00=3wOmMV&4G?gLJTFS&&hANjxDG|yl4~Z?$skciuPk}?jt$n3wB%0ae-oMg|t^B z3Q1KpkSndnDSpIYx2axwR)S~nIS^;d?90Wvs0y@IOY0XWA9c=Z5R&Jap?g;73}NR& zUkSTv>Ac1D!q|~DUkgWTN86v+2pK8$3utE|>cBh1xLn!)2L7mB)F$#Pdm;M*VN4Z# zv&lyIez(i*n}+tSNBc~pz$fq~=s(blN6> z_vu1%>t^uXFb=UgL-`ZAV_0<%^i=kZds$&JcwSLf&`Qrp_CbA;&xSrM)*Jhr_ps^$ zz1@abUtt6Ki}t}0|F3G0`-);!6yvMCn^hAIJvXDj3P9f=fc`GT*wG);mp;6B82W3w zFl?TH{<0;>_;+&r>FCeR=&uI!SG(L_3J=s@@90nu`fFwS%Itqd`$WvSo_uk=p)tX2 z{LAlbHSoKW%=dU};cr*Krxx&=#yx_OgX5m&X-}SEkVf2R$9Htuf;5y>Y(dk@VOt=@ zcVG`rpMmX?`)D-Tn?%GmAI7@X1bNc$9Gsc_F{>`e++T-pkhdVOA9vVPR*3c`^Hx5K zyBUm?_&?vXgcVX5!?UYqd2e}ZuuMMkFO)Kl`E25g=!>*Ow(t0O7Hk#JADX&5mAW61 z>wYwP-Q`U&)cxb3b$^MvPf5C*?j!w7+gQ3J^c?F5?b6voHJDq_-~L_jaSI@m0Y>rZ z;1FAVGYfwA5DRw0#v|?{*$X{r4nDyBfYjMo!`w+e9P{;#s|3BFujyW_F)(g=?_|N= zIKl`e?$LEJV5@(KyxAZ*bY9FdV8S1+Ikz2M^G47vdyor^oj!8Lvh?Ll=}=Z z99j$iyAgBSofJb&YwZTV{@ch$7yIU*jEM6!etL#Bh>Y2Gga5CP*Fo_R^4Z8vP>TBMh=Q@ISY_vmvEM~1e-l@dAlLR?ok6MybsVL(^PJ~~%v~F=F>TpH ze5Ww}??pWg`t-h2unA7gpTq;T;S}1ilx!{DQD4JX`@wCfBU^u-HY|m$mhjOA!jIGj zg@;IOIE6MG$6UjG>M@c}eZ>IwaiHU3`b8r}!6fy$2flbf)$in&9wwg@b6;K7Kg?5An5Vq@^Yb(t{Xugx>eu7QhZuER*5_w5}l@KR+E>dz$15zWrC& zJG!x-m8m;`tQJgSiQa`&_{~2y448tIvsfn2(2a&3M4O z)6nf`pvpkB6*2(xsgk1M2bMhdef5FOu5bt{MlY zF%Dja+~^&RgO+IH0K7AlQ;+2qX&iuF_93-n`DKg)2_NHt@FVfA@DOPn5bt9!w{`X> zPFgcS`^#e_p6&SmS^Tf<;~#1I*t6yQeY`8~fBJZcz5XxmY28nON(iZY7QtUlUsU3g!VRzA&2RYh54B5j!<80b^n~ z4o3U&Etfq#3vrbthTme%@$BY>27z@rC>{oklf(fFBq3K|~>|)isg@TQ*THj6uppAfLSj&y=;oJ&+N}moQs_c?bT- zn#*ST1ThWhOCI>8HCXTTXJ@rc*v!{pSK7Dbf3j*;{>sg`dyucyELc}ZTVwchxbGfm zjpIQ(B)OpP|BSz{;`K^=m?6tztRZR5URxg;5;L+C@R-p&nV!W(%R|okXaMf!gmX7;s(90FZUw8MxlAr zhIf=hMe`}*1Pd5^1^S=L)#7i(KMwl^x;`iT^;rA@r^d(UegWv=6GifAxL<(x@P*Fr z7w9CLy8kSnW{mGsJo962!1~8+!r1Q2z}x`;iPmitC!-h{@&^ZFWE6L)A^$6#%cMrO z|3=W)5Pcf#JLuJMGK#xUY*oS^#qlG>$rK*QcD7MHs&$*XKiX6O|I;6K>u7C%*zx>){b5I(mGGnW$CaA?AY1qA`&e5o_MvEe^(t``*g7Si(Skj1*nQ+yAl43g z`aK(C)ZBix`>{*TtWA-hHKN@0_hT*t<|BA+Xy~|&dyG?kf=I?qur}>chK7L@jSG~tX@pg5t+chYA;u?F9 z;y;eTcstLFxGRCDIP@oxY>B$y`JJj8GZDB2L5kHjMtAg(_Bc;Qt`~A zzlZjtSF87`DJl$|pNt95fPU3Hp@cqvyk= zIMwTDbEsL5{dy7g8Bnhg^(TRkWbApRV9!gBd=0+WMRC0HS80z<7~HQ=%kNNJT-(;> z>_F%`#1Q8SHsl@f7-BvC55;*yV4wA(As|3W*QUAUOKbZju_JSqge2LU^%e{{1cwh{_;!u zj8^|B=x-|U=L-9Jnkkoev@Z^}VD5V4iD8YtnuXRn87rc{J@`8p`|)*~?A|==t&vS0 z^yM&L(*Dj`$g2*zdB+G=I1e$1a_sMXp2rr9L(WuzU~H`y#_VZmWJg~ahPkG1+*6GGNC(y&)Mm*xjJd`=2RuB4 z_FqEwL3uvm8h0L^&&G3-C-%T?e&lGCxxisrK(;A zJ&#l`=}v>D?g^-yo-e`kJ@b~XSy{&1(;@TC*q{6c>l)|N4V&AaF5bMdk-0bH=Tpdj zHs;6%A)}9BjDDW5yq`hdA1_?KM#P@WDt&V6v-+e~%-gLMz?ml`@9BKnemeJQcDmBc z-rfk>)5xXY0QyGAhxVjiMtSUowYFe?&XCR~(>jsLkser}C$xV^Jl+Z(sZVy@6pqCjL%Ap(+b#ya^$M;oOa3ySLcCKepP4`#qpUfWRk~V8UAn3c|*vx8!`qH zS@0lYj@{@_^%y*Ydg1%!({nw>qyLrFvm8@>3ms2-){6uGaG(Wu0*Id&a>UEnOj8P5 zphG`M{akpBn{-0bxp;Qe0J)KVd0u*Hmg}VsnSSnp3dHJ&-h@2KZpgF_^Og1PYuxkj zyK>?+Zp!Vb$KUnSuW?tVwyvW(%)*LwYHUC1EQNl&C;6_-y;zbJ&z)BCr6r#1LwTyh z*tatK64EG(!VJ{BkA5AR|3dv|VnUx2J_F?#P)`5Ggpxop#j(+5KX6!l z32dC$2U}3R6xfZhAG38VNPEtm&)QGdCa{6$@&ATltiuZV)AQdyi#sWilOEEg*Y^O& zF$_6)LVA53z3I{s4f3WylMXE47S^J^P2Vg#@I2}! zJA*rjrDr{Owi?eAnq1JdpsXcTY(0#3_4vCMw#|$m%p-f6Q1&s@jhw>{E}w9oPN3#P zY3@_=un|7)F^moLpRE`%(Aj9e(m&}0NLH=?s^pN~E^3mpF z&ve5jY(W>=ZM{s)vEu(q*efb0U@X#kmtv;tvm+0aX!22470NV##*dhe)yiqiz@~CU zV*w47J&d{XApHD-G4{#H;_mF0Mng^mcp_e^pl1!l3+yn(bZFmY6>zr`FW|58Dlw-L z|JQ>TDo41Kmqm3rK|_1k(*#}X3c?A-YGJi4SGp#C_~S!=u^@!^Vgy3PtYC)TpGj3Cp#;L z?F4d!zCfKcmV01V4(XX2-?wN>gRMw|FE3zRk*$c7(?xz3OT``k7=PsR(^wL7Sw|P_ zi-d=O_)N(2TkB6mzW9@vN0G1lF6@B= z;}3H>_8HQAki)xJ+Xpw&m<0A_#Frh|6BQvR2g;k#zH{*iez49IB)PO8-jbh7d6~xg zd61<6Z7jw+@=YLD=M-F?JaCtqOUc1^7O}SD1$n z>67uj9DDF%l)D<`2%`^WlJK`w)_}9dc)tPp2Zw3iMnBCr)3>m2whcV^&{w^vvx>1` zt(l*nN<#TP(%C)HN9&;i{7*TfRp^UeVAO*idwTVhPl~*T`W3*S_etO@34Bxls~hVZ z>hBWiZ(;Cn*dV;qOaFfbKAXYc!{B+lklgnbo;MOEY_$`Z^CdYYOFZ_0$1)YI!sB7o zLp;J(fk&gnBej!wEFR?1AoFM(F9(EAS&SG06m(&jBCM-Tl-S2lULe zC2oP(h%=s-I7eC=H{4?taFOFBaj6Aw$IUd=8Wxl{y0YsUTe6J%;UciSjAw8$d~h z<+|I7D>4_jB(h{ zqwT`9C+bt5U-vKI3E!1UqPr)Qwtu4^?xXhS213=}!B$fFt-98Ak=ophGUmmm+sM`g z^{lOBB=T$!<0RSoB-t8stZ1KBw$ATEG!G2nt@{V?MmfvG8})@(#p7%E8+o!FI9nKu ztOLkfjl;Sg?+cM@>4dFq9Er1Au%FL^=i4!^U9hJV7lnV-w-J3rw#$z^U^nS{m%0zLsTdDQC!{qtA@yoUct9l`Aly6IU;3SVvFsIp}*CO;^)2O(d4cCI~$I^UfGd?(JE%{UgK7=e`c^Fk^0 zJI~QS3SLLUPYdAxYm?YQnh!I7Rx%mih@Mn~nD$2ezXbW3q$g`pwmVMMt`e+0;WyDa z>t8>qyRG3i(}EVrEDw4_7{5|I-;MTdW)R<*4m?*e~iIb$> zBITFU7)irf2F%aU=b`aH&`4)f$ftZ>(rv_%&}M3{A3i3@gJL&dSFpAd@cSM|zZ3sX z*bvhc-rs8d8QxKO`_5__>KNrtff95a3`L5&Oht?#- zE1lKQ!S?0@(}})Np52N6X^zF7bAFo;n8cr_`}{YdU1+PQqHj@^{p?Jr2zRmj6j=~H z$-}G219pStF-4IF^deGwJtspb$J>io72+?E`+?#vrymgzkBNL9{|m%cj>B%8fZaHl z$U2IKu`S4g4R7g%DNbMkhq>kmQ>BKFI>&;IR42RmEH?dsX*(N8~ub2PiS zK8pIxJ>o#wfx6qDpH*@D)`jA7LyiIM6Ylq}?@qLL^gtIQooQDv1hoC+L%&9BOF%3{ zdsdL!T+#i7~er+U5M(}?+|@~T9MN2;e5^$>3p!CMAo zr04|kFHW^TC)}U0p5{4<-TxZ8m!)S$&!wD4TF=tC#trzEKma@;COJ}l{w`2c0$u#A z6f-%OeW92Y%~cbplyq#vykdggssm45@;vWV&BbY$i*Z-T0_2}oJKo~Hr{gW;7rZ0? z{SKX3hg`E^=i=E!tSizMNoN?5H*q?&_IuO=yZf#mc3YXN&7^;zA^Y|{+SvP6<>bRV zZ=a0#Y5wkEtZ+Usl$;2fw{u`i4Vbry1_OU}<&FaP8;p&ZR#!K<-jLjyhUc|{=@0at zbcA^E;*Psw;LtPrAN#w7=d-8kw*3)f&ilkgBQ2mcPHmb+K6x(Me;7EuXh#KXc|PXP znvLw}T=pCv%XhR%Bo54vVmi1PaGnHn&>#H^mqPj>`@_(0-v*wzK?0Z|A=zWW2i|P=WIk`FK~3GKez-3&3j?_%h*#u#9+*7|y#yZ$ud?OTVd(2JEpBP6co* zs1JK*(%MwvrxyKo5b-LCUFTz6l@F|Wz^Xtw;>jZN-$sdlL*HRw33ulQtZ|6*Q5ndD zbk_t6%$XWPvMDTeblxBZQFvOt%&_)YS)G2&*IE#2gOD$Wb5P0eWC$B zsrWIM`wRoO9zgsiA9GA9;))9lqH~tzc~>^}Y&sb3erZHqy4TMj?+oA4(XrrXSd-Ho zFLj6`crmu}!R9>!Q|9hTlY9&-PGvrg^WwphoG zZWi3@-bB9Nw0L{RoAImz-;VBB2^r0YUxWQe#NdB0QicUz{ zA`Bm-qZm2Swj)1^;wc;ZmmeS>rU@8ozSq5WCew`hHB#)tjK0_Ip*hZU+Pbi2AYWXG zE7T6?rG2wn+_iH5fWA%r?ryv@?bQdQJ8+n=nK8E6M0e&~%*V>mdJ$u{U|i*&eP2@T zrFeen+0PvhblN-`ekev#3z=T4;f0=)KC+7uC%PnzC170TGSeUEJ=x6Oal*rDyUF+d z;Fr>TjImk6a7U4yxp$t{w^6%lo0_K3zM=#7=9qBjPS@dv`y7b-J9_YJ65{)tbne+pQI{xR~XVyA&iPAj?wey@)9sh=Me+A9XM7N_Kyk$A3(!5@)6Wxo>AP+;P z`;_QnBIzj8tESlwd*+*|c_T$cT0F2*<2DLjq)7J&N!aA~j9iE{nW z9~zUZ@w--668wgK&k@}Zfu}{(=fA@_RC?Bw;GRQ#egM5i4vif)@DM$J%^oB@QTDU{ zO3$9fGw74@>|Z5%JR3>p%){+_8GO_}j^`)IrwBiPf!dv?!`)1H4xjU}AK{yfBgNMC zgdHB@|5?1JGwO|KSHpg+J2v620mOwHehk}w3!k5=;_dEvZ;4*&Lt0x+gxwP%s|+*S z_eZR+vRKosApA!M%6o7(o7hr$z_bK+GhMYdTNu~1F83ep0jldKsEcI#hdkJStn)0; zX~!nolTPzpL^9e$@s%_m{8CAdqw&n~&z;hpHKbdeSeK@te`>LRRzz~Zvs@`oIaJTc zAC~lDh@OS-S5dFweuTc`cc~;}T^IR26}o4MVxuSEYtb5j?)`HiZ$f<^L5;|c?t*NT zxFljOvnrN_AX`CN`|~?yC}v4_7N+~wfqy&t1F^k*UApvEhB&nVnk$zQO=h3x>b2`k zi20-=_Fq1N@{G;0-)qI4dyxOY=b{U-G&|n0&RloV@6N1U2OT@{N6}070c4gS&hZM) zh6y+e_=?W9?UcT)9QgibgO~1mQeqH?uolmOylEX-JjUMdguhSztW@WY^VC^uc4wo` z@1Xaz)(Iw*BD9X&7v#c(3h+buXouL6L$B~Y<##mkTrTph*CQ54_d4WY?45>O7enV9 zbKoz6U&kC~TiVaO7o)HJz&F5;?2Ttdb$NE1b+l<@T^_SJ;Dao}`;*8Idtnou8~AWa zccOFZ!7_#%c{*>9;aiV(9WTST=W$=62Rh?OVq32Sjk^64VMA#Sh8|V7tihO~`wnG4 zwL)(XnsLV=m46!L{LR!Bezsxq%eWsg5xx}Gpv1Gq%xsjqW`U0uLT;Ns1idu`CRLz5 z_O#vWKP0#dK;u9gq&A`rxXXm@PzmC$m0$_VNxXx9;vL`J@%@GPhm5)tJEt<{oN9u9 z>H0C|EcC4l?8IYC>H3#xE7Zu-nJX# zm5 zW7$GEN~(K=1dOSHr9AKDyK6x6hOWe+S!0cL-_aS%S40KhKyY*`Y(Qw_ixU-z|{KbjW8Khdj6VasQoG#&lzo$VT(xDd_(2q>$NfyS>Xyh#?xKCm1oW|IpyLtlh{CG0L z{3y*Qv-8e9hp{~OGauqkMfkYeJ}>fm#_9H@!>)Lm@19J3;AtK`IT8HO9Oh}BjJ}|` zlyogqvXvucTS;>m<&;VF3FFRPUwfW=9_srA=9lj>S@1E;U+r^s`$+HTY!=OnU%=Ma z2@H1W&dL5bR&)|}y`%Y#K)Vr%S56Wu?1lZaP^^UJk=e{PrI&eYp^qMU?qkrk3fztB z94{7CjN^J+F%EZ|LRS$tN%MfG?adhh{|xBvT%G%kb!Tv!JPa2!9~ zO>!sJ9SLmGvf(9xAZ*8<6LGi2aQLzsKCg|D&lSYyF6zHA;4|Ad6@2#6+^(MMbL6=m z`St$d9X4qm!?{IVR@2sfvpq;U;#Afr=s#MYM4aEU$yVHrF?AQl)}0t*cVMjDj`^(` z^IL+n7b&$DZ6>)Lf=-Twu9NT7eFgUm8ior}yIt2~{%923uItG+C>`n>;JMZ}7^CG?LhogdY3BL{IdS!UV^$WIa`lr13&j-__fI8 zliHPH4<1L|e*EM@&-~-cY!1vFRL*bO#bsPj<;{Ehc}c!y(p{NpzQr`ppsWS9h06JH zk16hl4|bysX&5^vaF?ItlS5YIlap+h{grtYS#_OAETT42dyVPj7a67a?qPkYZ60Fq z)OJOdUF7!yrz?-y4#4N7zVG^5Xwo(C(I$ox5q~1POJ{uTL2O)MYXkQa9eZ-khd5I- zpGMx-WWKqGQ<8s2xuP^jQEo{&(+{q$6(vSK5g@qhB(=$#d|(aW=KBm2jA=4{vi3Z zWnO-N8o!VC3&_I{n<(PU1#EcmAn1tiO47UWWnNz{a)xiixs=!8TR~>D{@u|KT8njl zQ9JaCbX&w2AUY{tgx_YAA=zX*ac&Ma_<5}VPoNHe-?Dc;$3A;0=6>n^Vfc~C{ll4b z|FFQerb4HcJBBIFSPp%sSbi7wvNNT7doz9GiD%r~tK2V4w0y5L!}klkgFkK~TC(@G znA_6vjP5e7mGA6zU=7p_zLdDm!+7s$zK{Iyq36*gAMsZ7%MA3-EwKBz7qyJvi<$$P zaa31X0@f{Qc$bbc87P~HK7{Y3PjI8pRu9G>&aplz$J#(SKT%X`_j(|2+@-qY`?*GM zuo?3Sen{pILgvtWAJ&+&x^%1vcHG^9xC^awR$N}3)dg@Esn9u2HFfsWH7IBAXiY=wA3fSccRnVgU0V@n zKM22w^nl(MfH!LId*D0#Ji~X1%rogH18y&9=w5+&ptECanDImBG_p|cG1S?O_lo}f z1@+~gLw>m@Le;DBp7Id1cW5eO`w`?fP|V7KvzC#@+m}xwCr&%wU|(pwQOud_vU)#K z$&aGEoT2BTM4#1y{Vm9Z`%VVm-(P_rH4bBb7az9=+vPPC%_mq>(Rz#Qp^~?vD_3FanNh`rf0jhExj=(y5A%cYo)QJ2*JuptLqnb#s<9!iD1K%Lx1 z8Zf8OH|pbkb}8PByR2|tR$E{5Fx=&;!mS$!Z6$hjIRX6|+Qsi&KKalv=<0Cj>j_&)lTsBkZQbUM~o+wV_%eHHyXA#$ZfAY#+t+N1iUBT znbs(zk2NQhc!FYk$>+)PJq^9!wioe)LEDRbJ*+{xq3?>^$PSbJZ*$J16oe;bR3SLLX|er|~`dfaV~~d;9fRk5X?vimLAyRc&Kq8NJ@)~jB~?#JpV6G(|I?;9)Y&7mlb=y&w5wsZ+enubeyRP;P!OZ9*%%d`< zhw3Ej$ zT#)mq>A9^A{6-%7+qsYv);90z(0`6+?E&0x{w_UlhdfB{kW`iC!rnX9qcd^OO$VJD z5dM)#=dkfk+ULl2;(G&_qxrg$+fo~JsHT4Mr1+u6TYMlmuU_C7TPwgRJm+z4d_3xE25J~41yGVY$XV{GaQ4m@^;Kc$g(Y$kACh{~4 z6YSo@IzwwfFYdwj5Le(#$)0H_hjG4V1M(RzehJ@$!k9pxw`QRH=Lz|=dR0xPlKGiw7Z#?^qk@W4(OddVlgmzv)jd z`Ah#-zy7YjetGixhw<&fH-1y~5&b=X;E@BZLQ*Bl%|f|_c$QO-yOxyst)S5E9R~Z> zVt#A=mGO3OgD?W~cFD;`j2ZGFD;SrvvUhKQ&aw#4hvPZ<7WkIk8lp2Lu(xMJKa1no z+ugtlK}U1h%#%zv^W?XwE$bdczDkRrPnv*vMqBqG_(OK&w|om3QC&o96@Io(%Zn3o zVNS&LFpps!j=S1TfnLOje9fjnAL4)?LDnR@K77N?Q_$e;MXcr^*5sYXLxqkJY>Nqb z8shOAg5H zwU1dkt2N4pbHHshhbns_*sorD2!3-f@d`iaTyc*K@R|nwv;4qZF9Y<4E%&acnBO!1 zgq#A5%{KTUv=(x}uKE$zq5J~c|A+ppF}&L75ohCk5bV_pm|waQGFmh7j_$IPaw%Yc zzn_D1Y>~UhqryITl{Eeg*j&wC0R;e!6nUe{6o5c%J!bC46&QFGQN3u(&vDe(J^C z)0>AmTb`pjA+KD>E7Bab^}E>TsEL?+FVq}$KD#)7818bH?IQjCKX&nKyI2dkoX;*k zg?=U3ox?7!j>Rr2bu2mu*8kW=)}gkGcI0vVpJW%)!S8?8E}orFtVeuChj=-UA>-Su z()^f@v(}Y#mKcbpa zj5R3Djb4r4Qvjn5T%YVEGLP z=3i1#iT$6MtFZo0#@yzyzPWw`>Me$ElncLP7r1p zUF&kO2KET4NhzRLXph21OZ2#(nbyEmUKE~Phr1LmXx_D2=U&0yzNB-M?}m2J97nm= zg=?)Zw2@iq3uzTJWQn~=*btuck< zzE1QV@{rc|p={OD0$dZG%l#PTa?iqdCV+Jqv4%d(*A~=y7w%&$L;^7*J2XFH@Ea;=_F zj`gR==O(P$#?C37&?j|1cl~U-69)HeL1*dii&es+ceYb(2)@e0(5D|m-ams*9l2y$ zCC=kZdELV%dDnNM>;{~Hun)=WhAt20byuFeYUHz0UiUEMb?-%9w?Zf9b@$cEdEK4_ ze8WWL)2Q1pZv7eLDpD>})s{3zcYXlHlxhf`{s8mDwO{}#WS z@cVc8-8{wroqY*%vvJN5`}Nq9bD};M{9V`}X0=lk|_Hxiv~tcK2#AKrHy`3@|- zo?-md0H63Dg);egFY%9fbnp=4{L8!@lze!ZfAHu>K8eEPWx!DCh{`Ky9eOs2?gBan zK8fcI5qKs(cSwBVJUG^|qvBobiez3XRuQSsoc*EdWhfK4x6GT_P`jqq{1z&kjbx8RWsG|gB zrT+a2d%{Y;d=(nfAGP4&{Pzg(*l>1%bIr-n2mL3Cooh~n-t5n`zq&tD*V$i!{Z8`@ zo?nSVTcM@x;IGxrkz&7?-Qf%Qzm%PXyj}oV9d; z&Wryk22@!f>@#4jTmSW^Yq0++jpyw+Kg++JW?UgYoa8HkUn&Z--=VQi^z;pzUih39 z!h^qPhhHGVPT>6-qy6>$M%WVMyc{;dUWm9i(Hg8>Lvl@nO|;7PL@8&I%UMy*2)lGR z^z;5otFRA#%Zfo>H1%zS|6q}M!T8@|zK@T8>ZiT8M7LXBmCPjhyvXI#1NzPg^lH1c zTT8nwO#5~e+Fli{#P|HL-P#m|R*YXN$zJ03PrOdDJ8eW`|yL%|@UbDqG*yc%5XnTj!t~Rd>*E>E6tsy1cXOgTvVcM)Hv{o(chn$vl z+Yp7eLQDGvr=>nRIsNQ$rS79TE$wMeOFG&Yg|??pD!CaxUccQhPs$LffmQoyKXY-k(RI6%EV6WBJiA-Nq<%R(!WX8OzeXS?Hf< z9c*Y{c;y(}VFP>b%Ez8=;t||)BisEpj!k{P0@!4?%YiXy_Yu?oZ)Nv)u$b+>D}ij8 zSS0n^yWxJjHG&MawyaJiM~VL9a39zs&}(g3iQv^znok3uV^ED!7JtDJ5V=#Ec2zfG_X59_NW=Rb2=9ccd+ zg|0z@jnbL4AW` z>W%+qW6Jvj#uV1f7Tq@PA3Vt0*@Sj3jUY4am=e<)B)P%I2)c%sbBx6tgY4 zN|14maGWOab8!qfbs8L3oQ!j^RT>jCo<_!i)1tvC*U32L9EaM)V!-j9g|lsvH23CC zl=z|k{-)||+o08%-CCTnGCvQ7`T27UIK3JiF-OL6a-1IU^HB^q2HZ!h%&*k%U0E_t zJI5jWu`LE1s|LrFA>(v$oH^jR8BLG+<)0i{-VrC4e=ywMRCRge;;Cin*U9CV@$$rfyt@3tq2(Qd zTz+Mk|6m0l<0AFdA?62+5K@3px1=+1d4%n})sE_hMAA?S1 z36HOl`t{?mKK1~c)}j_Uj#w<@Nby4>#Sc-R6*`5yV!BH?xFUz`!+!Y+YekmNDm=I! z=k>NyJdnN(q0mv>F~0$K;-TE295AQ5z)!h~pXb&5d==*BgCD}rxET5Q=}>-hp?@rS zf}3*6Hi3_3);Q}6tUJEM9@Uz3d~cWDU&-I^)VyDT_mSj#T-~1P36gHpyu9LvwC4iK zH$6?&-^(~ZWS@WZ1NgBDW1{!<+M%*lWTWouiE3Wj!@QLJ7w~esnwJkbFFhC|xj%rH z75HX!Oygj~P}$Jhwh?s+Bia{k%w%m$-YbyCN#mpmZSF*yW3@y75xpeYo8#JN zl|k;x9*rdTTR9J;0~=(ydsK3-O!{vYS1`mLS5VrVpw+cUd7H@|%#qvNrE2s4iMmE@ z72?}x?a9!LtF~}k$IET?|6kJ9)oAMwUEapqnumV<>Sk_3#PsMkq$-DeilnG^gMOF_&#II0a>IBvG2qpy@g%>_F~rY&9`Cb+pKw2O3dbgWEC+V1_I(9% zlcf1|Y7{^7mqX`QKHm)GXRDf@2gCea{QnU@ezv#BV1EktLmO5g5AbLSaXu8y^>3c^>`6O3a`6P?3 zyhy?mui*1J+VF7`oY0KptG3eIKQE2P5v@goEsCRM{ z+7=b9m*&yQI`)5EiW46@?&@` zeh$hHSC`l8hTCJg$f*F<8X)=9nxO;H*;1#WYkduIHq^j=kpliX^%BK#q+u)Z44B7dbfP zZjsmfKF28q&Lc747;;qn14sOsj3Z9wb{PEM9|Mk6gVVKA##zL1dZ15LG2m2aa9k^7 zoOd}6&5yPiaOyNTeliLv_&qgp6|^$Dua89|KN>2FJ5l#_@0*YLhPpoH`AT<8c{h#1yGrI^et*15S$u z$FfMqSro>3CI%d@2FJfZ#`zP+p}A>M3^=c45N;q50mg~9gLA%5la9T7tmN_!ctsIBsbD}i*Sodmh{53MpGLA$2^+gOgyEQnT z`(+%@2;MHpdH;U^$8oQWvxW1c13#TH;Ph(vvD_`=d?w?7pSNPb5mR^`FxfEiP8lPs zSZWj5uz!jH!+OzBUGA!uapuc7Xj5wpI29Tk*DM*Qo#T)VTM`3Kod&1;RtaaXS!BFj zzli{U8Wdt#8jS>E6sm5${5`oqZBe876XRWG*ox|r83Smx>I?SZzph0 zmqZ`u6&f6mUB>C+I6AcHa11zg8XU*!#Mr+=yt-Y!Eu;m94E)2 ze(#O}r$U2c87t##<2X&g`7{QcIt`9LTgLf{4X?? zS~NJG1R1BAFoRp)JJty33JvMh1xYr9k{P) z>tW=op11BRdEL~eu`$-Y;=>_FbsDm+`vuQ1|CC*8S0is{7S& z-JLPkefYxEz1n!$m!py#X2c3$@;)ZG+g-78|K+Y23EA?vva z{l849?=D`S4t@Ni80+(?>T8w9)jW(V!tK)?nbf6YTcx(B&N9u25&I(*Hra>*j9DYF z9l)-X?1IrpW7aRXh3p1Br*U2`+Y6k7**Ts6TY|cKQ8(d5svGUzV*Z%xF3B`C#=4D{T#&kPuSi&T z`=`d%?h8~md=cL6IcWER80*fzFzw!A&P|cVY!m8!H^#avFI3&t;kuuXvF>>nrta0| z7kPi@q1}(iSoZ~z`D$|~ue%g=-x*`w|5=%5D|YLvaNUz*tUDH&mt$;O@vRPN9J_eG z(we~})iLz$E$yCk3;I;a8<28R%dvkewlCbAG$a=n_Cdyl+zh~#&Z~nqT0VbO4(%8J zSLEVSy|LZ>qxI{!pLQNwd4$V?^!ImDVzXBtU6`@6#hf)vvKc+7`}G*>K766-zKhpQ zy5)+oZsVmFWRBQkewo)zGOvrV?)(c?_lIGbSH@U(<%Oy{Zunq-7sOciybD$L4ZQA6 z=t1o8>VBEmy%YWY^>wk0G2ex%`%k>?Cbaw080-G%Le)J%j*X!1pT}7D;R{u_ zlh;l0%GEK}Z5;of8{hKWxW)XHya$W^UJzs5`7zW@b7LLuVU^~_G(P_}!N0yss^k1~ zV+HV)xzRF;-+3nCSU3*p=1no+)TwY>J%nT7aj*wvet=UL15S$yN8&^AqvCM?M+ACp z49q)}eziGwq-0xks6QbM$JkkVE{Q zgyY~iwEj7AEsgo}_m9C6?jMDxT!kkW$Jz<3zr=uLRbdHqo;xc{|49UTt!(R5^io@1 z4b%T70=-(c6)IZ9y^vGgN%{d_m1B^;z8L{SD_1oSgyR{$nB#oLaj2iy#embI<_G(H zjuh!^&1!Rirq$-jX@leP@)&R`R5+6E7V>_g_FNQ! zUaPz6a~Bf*gPeX7^ln%LdbRFulkUjMlH!#T?J`bFV>o0xdkm{-ML|XPvRo&4J{pD2 zpgKDs)hX+D9%y@_(7q_$U#6<}bXZmgqRUJ#~|$_AcV|rJ#Q;0=?EQ7;rvRna3phc22JY{R7^Z(F0=>GA zUzF}DRkht8=KZHpXd|{gjgJkI-9r)RwQaA^wtY%?Y~2}wUfXtqDi2?3do8CY+20(2 zUfuS)q`Ny+ZC}Z0n?O4`3a!$1sm{(Y&-qd4wrT79BFt}26xtzmjz}N0K`Bw_wn;Kl zwX=}Zl8nww{sA_iHcWRU3f-TidoxvauH>{N+k;VPm3EqGj{Sewd-wRLs&nys?U{t^ znQ%);0t7S(c*z93pxn}8GYM*wfUU-8t5peT+X+!Ccq1Yvfz~DjsDmgi^pv3ewV9Dn zFQCjh?IA!pw8d^lW&`B-_oZC@+z+~>B}6r{hxdr zuzhRP+ZwNzZ-4PmzTIp4R&;_!bKS;Mc^{c&h@5^VUWO4VB{2=g6 zcKDQm@bPsUPvu={_Q!7Em)YS{2g1kKZ9J7Xw;z6y9e%_>`1rbwr}BIz{BxB5Z9Dv^ zf$%Qo{2a}7terFT%sw-}bI@nsuzxpt@b467b{gOPyuZFO`*-68|IW)E6~=eJHop@d z`Lg}HoWZ}V=WG$ZX9ci{{p=J`}!QkKC_0-tAxzjPqHZ&V!qA`^ZA zcxBq*Ck=!Tr^VqbqKE5B)Zs1SY z;b#nl_YI4~Upv}p?{mQa-46fdf$-tfIQ&mdcxmsS?C?bc;eD)|(nSAnH|a0@`-Tl( z*TxyTj*KN)GiAX3rN6Is+hD7|(Z9EmHTDenGyU+d+Tp)E5MIWf0YBcPrSMSD4u8eK zHC!_G4ESr~@X+RGcKGK9!pqn*;8(@_ingw`!~ferd}Qny@GU011ALye!zYw)jXmLM zubga=|g3k}_-+y`F_oZhq6<$7& zH`Rn+PX8|b0{90K=$U_JhFMOjS)LGQ19?yNm-DADP|jXkIYxUXo8zX0_Us+x`*ZBZ$^Cd3-!C!0U%>aF zLB6-!jUL;#jf;y5bCNGuEPhR+oQ}9Wz~}qRLCQ&_Yni<)=o-FgrqK1HydRrj%I-(gz#Pjl&Kd z*T;_!+hLRY%Wcew!{(Z>yTM_h9aej|uiX3#lh!0 zof|I~8dTcLP436nH!cp_Xu?haw#*Le>c>~lj>Fn=h`wTn<-9u!ht@H1Slgb*SI1#J z+CY3uN5^4bG;wf%Lw+2#ECDv08Hcs)fqX$6wmJc}F(VH9hyHTY?f53!=sPM7J9JVX zKcvKA0}17Jq{U%v`|+Nc8rKDc$6Pl0j*P?F_IN*GhaG04?}#{T#Kc!*(0g`R?ZJNf z;{Y}pk{)=i5xu2#Nau`v$A6i1+}r=%!uWTerz2L|_8CdU@pkKb_2NNSQhSdzSM)Bu zF7V0MuMK?qjoE?7ypq7_?-mEneD8Oya%MGUe3W;iDK94=+sdpm;%89FnRbbCx7sdu zr9IJqNLxP2yQ4o3d&UiXKZ)!>Jw_nk0=Z(oX_VdQtxGT4#zSo`HV)yZ>SEQ}5E9fnKNbOFt-~y+*t9RJuMzyD8_+q(wS- zdB_L#?9%+?zmA+#?VDNdN^SOPId6Lfo-^k;+kH{zrJJ&Qm+msk%vY4LOut!aIqCm* z->b1hZXWd8e99)L)YI-QEYj3sZ=WA>o%^g|i%bF@c$7IHo2T$!DtWi8H4 zTX{Zsa3B7;xJ!Atc|P}f>Z^_G*$X>UHhoSVS?8^z6TX%GHb2zVy20v@_p;yWKhE<$ zKi6BUFB@{+^68&JXfo>jG)XdJ><_?$Qn#GP*KxkO>3jNp(7uyzkHpV++Hs!m1xC)O zdGS2&^M`s1&&OBtt+fX*XFCr{JFD$(@AoU*A^^S?jf-ErP;jqx`6{QdoaskevWM^!s`-^5m^ zw|bXqf!^8pb#!l3&Ot0lF5b~KNd@L{2J>^Ay)5#fcWH3|SbsZnkw;?;F4Goi_|)|4 zyoEXZpO3t#N`T8xB3^iQgv9?3^$g6BD3H%Yyei>zu%_)bqo232#e zBhS?|kuUvq3yyAN6}T;05XVh$5*%H_if5*}yoIcRN%hpKw6f}PR~KD8WYKZ(^jx6b zR5Z=GC_F(`7I3y&dEO9zGjeTiZS|5G*U;uFOs26x?^bhaC8Q`h0Js;J#G{kG;^=Q*iJ8 zp7IagXMT6@6Iq=jXNZ&6bdvE@v^`}})p*Jqt1l|PP*rwOmgkMOo@&YptU2D}&hrM? zBYt1-c=N&FWm|d{&V94z*nJ-iD9guqIZ9a~y929MS9spoV$5Y>a95nsF8Is4>ibt! zRNuRGagh1gZG*4gx@GY(=$0IR)^yqGioI=H7Vo26p@EmNA!WEIBe;Kc#mdQTi)mxI zp5`swH&o^fcRB4Upk0fMIu~7F)G6=<&~e}DEsM(=mZg2}H}IuZN5Se<6~X=eb*wbv zzP)oPo|Tf47q zRbShhfcs_~&i%DD6$QuRWYQXxctO8lLmZKzGh8d+33j? z#qqK&dKt8mviJ9wT}>a&u;AKOSGcXVCzR)*JXxP2G&*+qs*3VA<20J$-La3p-S_^M z#a5qH13MuOJD~l*7QC^#BDks_)&mXX3}~}oY169ya)WUim;bMEX;#@~z^BIHg$4z0 z#AzmYtdc(NFI)5{p5|voS4KW#TE+1X$9`H7<{ZN!rPs_yK6%007>CTg4N9Nx zQ>D|bXANp3_cz+^Te-Pdz(r_+zAyQ8oC&8lU3 z!EYXTWg+ESW$uM$t|aLf*TQ=GMf#yCPRApZE&B;$hpo&(FOYt--jxE=0F7Le(d(1% zT)UTCPFJVspyL*MR7(0qW4(`s(LpID$_{8*f70OA{qB%qP=~`58(fv7N_@BY= z%+Nqim9tR&O?^+Pls2VwelC7)eyRM1@*Bo)IKL5XIZ7|oM^Z*7WsGze=H$B3gH)T+ zv^M2mF)OE(GH>KJmtTv!Q0xy~+jcNtwfDFhyszleXV;9pS>NUjl_$H?%acc5RlaQG z&8fNV2b(uXdR&uLz~gAFaOJ96cd06LJ;3}qHYYvDt3_N7a8EyaG%eke?7k{zq{5f4 zLaKfDENp|P@{Y7$qsHfbO|>td67TDl`8g`V5K0sJE`rylUu*0@HcuTt#?SEZ^6z@t z<#q9l`$c!Ib+x-9WtkVIyCQs-%O288v|87arz)C~HX|3bbY082%006Epu61-zV3+r z^DK|_>!;uN4YoOY(o#pD7d5tI`laM1P;e#A4qg(MTjLqm8 zRuXaF*) zyqA7!Y&|;di~#wr0j0Kuoi5Ia@kaN?V%z_Y4c_V|zQTQxmf0rg89l{Q{m-*szJ~C<$s;E zYWcj#QH2gLW{5MYgI7#qKGCVT`p3(eKX9|9aA^0hh@slJ4Pve_7 z>2w6eUm^6JuhGBVkH5SVx;-~eRqnh?otOaL58tH%N z|JD-aANT&_Ydf9ef*&1luJbvIUw-&G6{v!S<5Xtz5quU$;l;-c_>{) zMFv$Ze{xpU@*nbRaCw>-&j)ayXz z?P{1+TcL$~&n5ePTUE6BW!^uR;`e>P{~r18^7}5=qSaSv{7?1!?&tqv`9BPJ{=Y2$ zM*xrP6Ti|-aO&ZiBhBy2c0{W#boBZbr2DIPm(_ZT)6#v{JEGO)j@}%nTI3tGDcg;# zuDjn6jXBh!^0ZCarRcJwHCI8{ap6_I-L6{S+EE6s$lKnkqoe%3#~i&@`>v4oEy(cu zo}|o`)N_>o&+>nR)S2n`{eu5N{y#Uy?;GxnR;N3AeG|s|eHZe7g0r`3_c&??1a zL9g-9i}z0gyPN-O`Twl5w{G_Zg}$Q~ShyInzwWQj=tC~Gt&z6r+J(ri(FKBYle86k zRrK=y`|N@D%TjL6*T!Dm_kP(h?nm|CYw78FZ5m~|%rYj-3Gn~*(Y><|)lYExEq=N< z1zVf>X0%I%nx2~zU@!je<;;t+T|101ADf->)|u~(25t=SWBD$d@5eEo z$KyLo@qZd0zaslT0k?eYTs>_z^Dn+uXLh7!=$eDgZCiV4hQHa<-1b3F8FO(d`cpo- zNf|m+(Rg)Y7vp=S!yB56p1kVvw#DVz(DGGRd3Rj)#+r(@d$%lJr41|RS;0Q1npV(T zeaM~Xy81>L{{`wqJKxr2s}mY?jg&E$GLGMey&+||y|wqP(PZQH%ZH~Y8`E*zq^4P_1*&o9U~o196D-pAiZ zneI)=`2O&jdMQud)m@@a6k{i^;{Pfwb>Hd<-opE_%i6Tz`+yG)V{X3TV%OE1uz`;= z_Rfr2ot;=-744WTZMz&8Y}g}XZRK6%jcmPt&B5Vo+IqI$|7OqSm%P_wmA!wg+9o(o z=9gHe&}cMe7WCF08hWQnUwP-)tBp2+&mEe&da!mOhbvdpjyBq{hI)TRy(6giSJZpO zlkfGssg2l2-N859YTLb29|z{v;jK|*fwAj37-ewZQsFq2(_!+cBH>O)=-9 zMzhVK_itELaRdCiNgGzvK#pU1@7E9AL0^^k&N+0uah-ceZ11`W>ckOvOTKO8D*Su= zhE)gW-nXUa_@w2)YNWh09rBQTw*YHCrujHlt3PUo1uGD_vW+brW>DI-dq5$X)@ zQU1~Bh^NMxI!4PqOMlv#Qh!b7iWRepk{l=YP){c?X53~>a3bv(`lfT;e~BJ_>Fj{g zT9@uAxB=|#iDH=!zXYSp5AbXgB&4Wnk)t9e67--wt({$PXo+iT&=-OBI@w}Ph zucH6frrR+AOr#`-XF!j5F!q z$Q9mDCjX6gfRBs^d3H5&%Z}IIp&x#hMNxR}uw%4=x13cecnQ2bgJ+Dk{1VtrIcl-^ zpGD`&$4>UaPr1Ne%r~#zdS~FUBh%3BU%{@<;@R5^lwY1n?(*Kd@E>2GM(vZee(-wP zOHCV!H24htelPqg&vIXSc7w%7|4x1CY;4+4TRDn$bWn~fUXF~JQtT@o`$=?(Ptgyg zK8e}1I&Y77n8Q8H;bLQnKC{`aDtnjS!My)1=KtH#18zef_$GS8Hv;GfhCf{BC+(ge ziB=lIOCL*LTl`}5|3|UPAJRs7 z76w1zMYI3M1TUo=;U|kP=6@Kgd|c!YxS#pnzd}b1dOD!16Z$5hS79s7*8F4J%W7*= zM$QnM2t9B{1Y61XrE5ct$!gkz>E7ro^xf6qA#vYN(7`^%hB}2j5Sb?LpH5cWeD{|` zONXfKOV(Z+S`v#*%?3Z;^(E2IoY@7BcP~ApYvY?;*yX|x?#sLZFFYZ(_k6BLkyY<9 z?&^DE>*bsAz&71srk!`ru~sI(kscFksJ@;BO}Wsbu(%Lq=TU5&ssE>MLcgCEQ; ziTXsYI0|Vb=>@Uv47^g=qc*SQChd~vlY!$;>U(b427F zcI3b8M=k^Z5$%s4!**i#?!@kmjZ>l6yQ(k-{qlwP=5QZ#1Y)BUXDzE2_2N{&4zA%; z6+Jl#SvXtyHS7ilKjKdnEAwLOrQC3untBS{K2F!0E16?Lol8fGEqOb(;W+lj{tL1< z!d#faI-PZI4)qTKPZv0LG3R7!sxZeny1RXZ$ly^%-*i1+V%S0T$S=l!VHp0Wt^P3Z zS>K2So?%ROu&-SHQs0ss*SN(BIB0!v??T^9&(i88_!Spow^zzH;s+?Ef5R(E7~AS~ zxi3TBOIs`(tn8xLdhz#|&oY7&Xxp_sle+$RCbnJh5&!vf@U0zh@m0w;#k4aOczc;b zpAVpq_3aAengy$X-9vp={rAAnMjN%vW@+QEc{jfz7O-&M1kM4O52))XW8?z97a8&g zWTMgF`9fZG0XA+X?-$@S6y(BCm6y-IxTr z&%d=FAE99f<2LhbjQTT;F>A;FSOP6iOpL8R=ljoMmBNqmUDqhaBJ?$69&OwRyx{rE zT(0m{SY)EoyS>ohU3j^5#kq4#9^<2T>DL&GH!vpWGB)QhM$4IF$^wj8V~+U|-zNI- zK1S{tcIQY(b7r1;OX7Iz7*E?v)LRb@QE%aF+ja=rG%`MAu52ps26A6ow?SuomLTU^ zkxlc!O=MaL^w^AV?S1O$?2S!T@Ia!UtImO(Vct)C9&qW*i%SsLmCttW$;xl-ax~k&! zzQX^;OQbEc59z&4EA>WjplX_yJ1rERzrUo z-krzzu=gc(8EqF?_%66e8)Y0u$&p$|S-wlI^@z+q%6HYnz0s++{5{J$IP8YJud;FxCuR^*-!Y=;*;mCeQL;YS_>`+EX3;$)&VQ_C&B2vxw)CvrvcG2^G0TP62jntDw_LXB;Ls0W=$TAjz~uLj z_h4V{)29?ij!#;3aMfjPJ;(RI(Q~|6@&bnL!=?!OG(Cv#FW3=@ZPU@C8i|vsmT?B( zAs^P6ywKnN{}XucHs}AjytizhjrV3d-a7r=BxFc3vSbJ{1v$4=#~0}Ei!S;pdaCH7 zr;y8HpW1!;G3K4+#8P5$pZ=*jRoKb+8>>b)zZUoD$6_iF8)5qN!zH{keEMU9pMgU@ zKK)Y@wLm9wM7|OINc7YnGB%3v=~qo)4}lf!vpQ3ep~y9vi(_?KAO`#==t)ObD8J|i z7Z3eB#EcGoetA%kbfWia#S zZ)l&C+1QH=X1)w@-^Ez}GuPN2jk)FASPH(@7cF0Fc(jTNulv-<8TqMdhK4U83~fiD z|9wpROThz}P$#q%U6J{2`Zlg&gN8>T+wh-tQLpEQ{AN$Xya?^RDLhJr3U-y%7Q{wR zFRi^GJ%5+0cIqv20-}cq>~!R!@vZ2wz^dpnH*T5-k#0oOp_zf22ie4GVZQ>xf?Tq7Q=tG7MGpbv3n3tumMnfy;Z?}_=@GjB6 zPTv`GO%>mK^7VgbteAObGA>lE)BpSRXV+IBg#M3vpC%VWw^z*YT~a)-Z|PTw zLrC9#5BfiHIANdthwZRa60|ne}gU> z^l?BZCo%xPl2c=THhn~5lYHsfv+Kpr(FlKyx*@mO$2BKUz0}UwDn*xTxgK8uW4LIs z>E9mfRsQYhj-B+o=}XQG{(`!SYNpN1bTXz>RcHx5nnrk6WJcp~<{ETrv5~)pj2lo7 ze33M^^j`&k-x_ZuCw||8JeK)V{0s7&@v=>DlINnUoAJwQY13@lRze$#>3cJmzcJmY z_ol{l6|!(2X~X@N)3NP;gvSQTU<(-I>DMM;JwlicFGu z-UnSTg)gdk_X%UGfHi#cpz&s64b_q}6Td&gz0~cbZoM`qUA{Z%)CyXVufosGi_WgE zdRqm=2DR+ID}?Uoi;3e)V=UO$j?i{vE+>K52SVMWk&d{69cn%t3Yz?hi57AL;Nkv|Fq{ zt~{2_%(|IuawDc5V~(B!pXFeaRcR|XOklp%WbgC#5&jV6ZgSwugO;t}Q3f9!gOA+M zKL`IuoA%U(7vSR;=&u@h{sm}k>VBiTtLU$0;ZOQw6aC|5>}~kwGP^YE!+0&Md{^>XN7&_GdKlcB(js2(k?2!M1$Ns7ZK6mUtW!L4u z$k<;Z<$eCxA5i8$8~fjW;ERv_x&QgUXYBtg<^F$h?9040m`)^qM*X5<(t91vAOSh8Q{I&_6-ex}}BL{6phSDx-*p0NCg4q^$H|IWGYByf))2SvAUWzO!% z(wn=Gx$We6?u+Mm=HhEOOrGaq^rn3ayoCk$8s1*6{GJA}weU%!H(O;iBC|(#Ej=Xq z;i={LpwTTl9HX0Eiw@6hB2QCkcSW{PZz=l7m$FRUGlPEw=Ak(5>$0+%*Mq;r_`df$XU0Iej$Msz&U4YtpE7iFrJ_~K z+h?izdU7~-ac$tbo9psfc0J63&j()Bx6jh*cg=Ft@18XQxIDhe<$I}9em7BXY)|rA zT@y!=@0x}lnvOm?3cWM~{WKGuJBxT?GIqD28}-S@))iwT({jCmlh3O_27c04D~dyn zmv{pnO(&RCERpYhPTo(|7oOQ5?u)K1Cl5C@60C8bWC>@IV8kA4S#^((fIC?JERWy+OTUS zmn2Lb`c1{5CgL@cn=Q7O16@LFG2#?ciD6cpWUj&Q)JsBu|4YOz!j}>wV9kl-3yVIN zecOacc%nCiJ)U-SB7Ow$_)}k;)e(=giZ9Bv(`WyObKOzkPk^&8n>J9!yWr9FM0?>e z{EVV66`?QPL*Du^p))b0>xl7M^p#kO*styAXCC-(Kjm~n#~hW_?WK<{;#up8+r*9= zI4^*2@6uc0n_J+YO8Dqz_^AS$_9pT&Z@dpaGh;Q!d)L1Sj_0@UOn+-ui#OoXvR*IJ zo4WU+Z$3mD-gab=e_Hai%bD3+(EG%pmE;8+`R69L#)G$+5}`j)pFe-l_t%#I!T0Q}{bk{)(OV(0@2H274~!(NDS0Kp!LK z4{zxAu{L`b>nCZyOV)4~4*}{F)oTivAv8K2ghD;{>0gk>*-d z?4lFm8&c@U@?79@8oX1)#*_bDhc{85t489$x@xawjrx=-;~oE#lcu!uA3Ng2QCk_E zL)8H_>q3*yp4#fa|A6+n%JH2S?lbeb$|YZ#`EaVd@A{dn-!tE@ zbQ$?x#`}EUOI!9ZCIsKW(lr%(S(_<1E9O%9cJHoDecyU65IRyW^Qckp;eY9S=O!*L z>jjU2bNPKaeiv{uMoYC#8_Jk7r5?i;6uJQ8Y0B}hyzI4d{Apurhi zTQhR{>G4h5|H7N3g^U}~M>Zo{8ptt~+-jG0!Bv6XtU*fFZVqFAcH+Z1%K8_{ zfx7Jtnw#VXVK#JlR6|(2BeZFENdxx5f9}`5baV)u$74eqD5( zqdKyjx?E|+h0HnAT*z^GUOZY=emh3&9{K7eM{(B^-nHV}l5x44e*8UikC9V|3@7hk zos^+n=8aBBDv8u*n(saY2LC}tokJBG`Kw&(Xk+V&JI;}RQeW@Va%flvElZJqUnS;m z`tlmTQXIlpKcx!3ka{wy=hY!4k?^J01;W470;j;szztmfv`2h~y9#BjbxoQbxag(z zMx6gT;{2B&1AdqEC8G_yz)SKTB)=(zG4zM+(2?)-w8->M@)>?WKUnp)FovXF2ldXT zeHT&A_v7U>ke|W&q=WAf^OACQQ_iD&`|OvrPzQB1enks4{`I$orOwEVP0N!CH~lrK z5Whf%ou11nV=1(<(@<^1~z9l%XWv(Ib zW7D(v;T*~Sa2+{i6Y*(+OZaJ3yUCGUxM_nU)aafaYGrIZySzB+LRT8S%(c@@Q*x?Vsx zvE^Y%n@?m=7VmwEeqb(*2>it2!VX0a=tSn@ON$G?@mg%Xllc=Cx#X05lK^5a*{&&2EpS~JfFYB(zrLV0*t}xGR z^WonKGk06Gm%hl&^PXOgj8BGsKcGG%Z)}jhfUfvNWo=4-yel&A*T@15nZR1ZrTDPn z57DKkA=5JWRieL0ozk~m=&RymbxHr;;Azg#Zi{ZBuQuJz_}f=(^7>I=zW}d)D^8b9 z`?Sy)ctqX98VR0>T}0d{zGl_Qy3{0o$^3@!v#nvhvHlvpKH0CgDl1Knp%ITCJcrby}Or z&u*TXuy$!3aF4#15_s(T{gS_-ptgDoq@-TRR#@v$+eNIVUTb{%!$JWfnHY*_Z8fHwEl7sXa_Tm}f6u$jc z^q=(o*jyFLdE6Pukv*^;SAp?5b~&`r=b)!($akqv^p-g)Iv;yZecc-!_uJTd{btc) zh@suAqVrPJqFu<8cZR5&j$JZ4Fb}(B1@c>R<-)%x2^8F<-a?NgCvxS6d8w@J0oFlG zC$G-S+R!mU){U)0|J|0&vrR+Plw#gz=)`m4Wi6oZZ@oo%DsRv{x2LI4=}pXE|}>gK=|)_HQD8ycIusW9s;3b@GWrv!I{wqUigL%q7)MedBa+e1_{( z@ca^ZP6f}awXygHB>pb^Ha576?>;{02%Nl7MQ74xY5!p3NA_2{V7J&-=Z-CF@3lh8 zx~8jl{or!hT)!)0O8Pmx#^jlk^uO?o@cJqAE#Vm>uiD_5tUjJ;jq}WF@Js|a;hESj z(}$Loz%ygunNArqEljk(}v!q=gMd4v%#L9s|WP&8>fB>zVLyyro%ozfB$T3w94en za_A##h?Qaw1jSwmBbj%=oBicN%rkv^%{9Wa`cxGXyIA&`%gId+~(*ex<7xu8AAW}=YUDPS>#9_Yf0xY?oRVRR-_6` zW$gfa7rl!g=r+b>HaV*LJmx*t!{<(PZOj+l279eZi%g#m|8S4Zl0D~Wk=P((g8#yr z+i($kE}`d3F0BEZ9GT(6uDm@~bx0p7aWCyE=Gu_j zj-7v)YjQH{HyMXVSqmn)VSj_B??Ka_o{g;^Y^+)H*ys7GWlk4*_OM1*XsHti6IzO% zl?kuu{IZ}Kc4k34x}@(e<{9)ep>4|q*G9dMo|$oajtZ8=>8Z1B=nv8t&|a@q>F6bM z^kr3tMD|G8N|XHv2gctmzxZ_eXf$<*9Z^M{pOD9vK|h%CC?ohg@D-d4e967)LVi9I z$FF6==#8I<&g{sXBk=h44qgOKAM#9SF<87g=bKt6r=9ua0KZ)Rhq-#W{+zkxN-dPZ z82SV4mbsO=d}lUN)s`Ddyp7Kl1yzCjV>8$2yKz21c?usKycjHT>eb~?yh91{d+*@@&x*cc7v&JH1Jd4VDvxZp;g0`m2sQ{KuTGf)eIM_&+M;mQ$|CqGRhfWlNh%;R&1#Gn~8|+Ag+~^8Xc@$~9AS zy)N=v-X-&{5qhu|z4>k4+x_WQ9yYpfNNZF9yKKq9cbj_~`YrocTJpu($1>+kN{;dM1`VtlW59$)Sw>()KnE-tx-m>w# zXN|V~yWp{yvUfZEVmH%{%CW$Ff-J0GGdI!*&8+96crNl)^2P*igxte(Qcu;bf(JMYPg?KVrQHjZ{{aKn zf%<^>o+U5K>Wl3H8}Ezuk7L_^27g?}FSeq8f5qa*t6tfOk364!j+iGm_#XGZa%3X; zI8Ip`mBc(bgn2S0_zF4N;t$|k!?ug<8Mr<~|7LMW=078c2>#g(Ezq^jS(hPse;v#- zt~<~#7>91;ox0e)QF2SMxzqDB-Ixa(lTDo>+#8!F^IvOkY=%xwxU6rHv5-%$s9sF& zZHX6q$7Sq|Xz51aX5g48H$?xjYe#O$TA3?o@B8t!GWBWJTA3SIEA#AzkB|xI+QwQL>)Q{V zBZJ2G^s}s$5xyg)=l?U~O!64pMP5Kh`+RAS+c!?9FOMjH<8&2m3>F7i3++b_SSxYY zNjxv-T1SjE=_lUPn#j(pn1k6PW-s)Ub?^F=CiKkZ6*KZWh!>0wN-kN1G0`;PdJl7L z&2H9}H)2z$+dL8DTiVcg6>}f!6v#=8>f@2&)F=7_{+MPLbv6yBAJ8kB8L!pgFLa#4 zy7q<48?3nry^ddNvXSRkR(@|h&+qFuUDo^o?}Zkf%r`>E!^mTqdxUOn&`t6Iw=%ZB zR(*fOqT3PVw|p!0e?2X=xtw2&_O?UM=8}@aWAJz#`gEDrv_W9dErK~0tAKl%7RXuV z2)K5!C)%*_>8@Sy==CLix;HvQ!Cr7L;JxH18anx?=Irv?3)4%<@yi$Z@{&RerkuR} zQkVJM$9Fny)`>q@@2zj7KhM{fo5)Apx~@w=UDsF1dqtN z2|JH;;F}d5>9`QREY2h7XU(m&i9A|kZD7lC)z;#uzs86I&<_#^X#7Qa09n*sPakID zvui~DPoVFfWj+{9{5OivE*~F+i~WWI_)EKp|B7Ci-_W?Bi@k^3>@yT|xVp2atnByK z5$hFyUCzI={+)3pydZh%V@Hf_&K{B7Jk9XeNi0{^@Q)6*0{7$ZxC0w*SJ6ipe-aON z^UTM1uVdZ~5V!CUw~!cC3(t^ogUrR}+7(t_Z$16%i1)9oDY%_}720j3Pcx`z%-^wt z_GsjK_wO+#Hj?^_*NA~x>oKf#7z!V7m@-68n)Wlcv(B2IcGeG}Gt5UHpntb_il25% zj^BYSEg{}REW^o=IlPe#Uwbr6TpsPo{%(sSYXDo?q?~K2ve+E34 zI^b3RpBFOkrg+;rQ?$0$yVYB)|Mcfk_D%3jBXpB^Pa|}b-0z*_s@FrmM(CGVPmA?F zp`NP0$`~^Dd$QUu>r_vF*NOl5+_kD7+x&nL7S^&>|Q9Naxnd}`WylThu#zAn@ng`zc*{q;&?oUiSAPu1 z9^dh~)U;cvSM&&78#1Q}Kfp0(azVMK1t+Z10^Lhg`n})rMu#zH4V}`qu~}20rhp2# z*bjXnI)&K1li)8OGHOnBb;QRS$}Bi=Fy?xm;6M^30;d~68s#lF6b zI)oNGky)ng?S9>QcM0#rM@bRdyLJ0FgDAvi$#Vao1!tkO_X0kr_c3Y z5tB;X&qUVglZS7t)xSu5@W>2%8J6#B`{l;GvFM&`Q{ISOB6&l1un&pMrFNOq&?j?P zgN4lb>8aTI=M8ydt~-$!e-h9C4!+W#Df|-a93eD!3cdqljh5anc1AvWnAjF}d*gX@ zfYOx64B>0brl1dq`{O^e*$uJ}Y72f&@$a;nV;hkkCNtxGBmiS*ABi0@X{eEu?ud3UN@m-z! zd~lg5XO^g-*oOLlfg`?Kkt1Rw9$~$&E5Bcs?8bg*f_^KZ=lOhT%$-EJQaSk`xBeuG^mc#K=LD9 z3G2!7h&c^L8*{#K&Z$@0RR%t@!H0Q9_}+DKaAPj>%rVvt7G%nrul8$li|c3ovpHw5 z`g2+1@hr6a@5=Iv11ON3ai$h`b3-O0Y zo80(9N;Pa3vZ>`kTckQU*S)G)_v*SE#9(HHnGuFD9&sKk{?^zf5WG3A0 zea`~cv(?}1dsb~d8}^O9XCCX>>RbAr;ZvxbGpw@jelPd3E|s-y75Zk!Tk*VxL+saF zG3S=;i-#^t9hchf4T=8qb;kQK=ZIjVHhfM;a%t*_Wo;X;sQ%uL4{1}3HZ>w^hXYfm z4Xb4P_^btbybP3WbVOkJin(nL~dlN>z@ASpO*`u|Hb5+YWOCL`d_7PYaYymZysh$ z$UNibxyS^K`9kIcnO}A=ryKK&!ABW2C$E0ycKiJEV#50__~%vTDibElDC3t2FfM^{ z4}|$y0*vs@tHTGvJd*%Z2H(7zJ`iSA0!%eL@M_jTm=y^ydxrv(JrL%x1eh-TK(Agn z5N1&Vj0+fI-DyjhyT?06&pA2dj#N9XzTFSM7(Z&|ZP1)G=;`C&>v1OSE6TW*^89+P zKCahtZRA?QwUw)vtHj(-5-;6E9>j_hu}X=p|JreGeU!CM`dj*sP219t1&ceO2f9m5 z82-Q2nK_63#-`j2Ux@Enc=cI$LF8)tgmD{1kFn!Ftsj46N=8j9bN0i;X2Z^D1=Y+; z=*l(oDNE)#83TtYdq4M^$?1@Cgr7u56yBG4PF?(O$X!cDWz^{CHaX7Bng;UhZ)Hq& zfV;@KFjw(c=!}<>=(D1?uzGEn#a~+WtmXU77~uO=3BF$+Hkw{* ze#@L%>0^#{(dVY$w_lgCa=*lH>y7(j)e+XiO26IkQSW+P=27Ci_}ZCAR6#v_V)kLC5#wzj zrfl``3)mSFLy~oyU)29>(R>=XWP!_YaM=t`o4Em_g3nT>p_`&>6QipTJY{{C%$w{T zm7SqwOu7#KA-}=EITxJmarC>~5e!3D5sH~SZ{oHI* zE@yAZeqf|vdIp~p?RNYGbn$IO(sdsN#&Us<(lI= z-&XFqGHHwdWX7lXJmJ5BmM5xS!S`8U=V?n0i+wlP+ETmEL--^a zTw2k2u-6MRkt3s#l`-}J#2zrmbl)78S<}SaF$J1Ur%%KV5#DS!@^`FxFtetbKFtTl zFH{-R&L z?IK6{FmxgwR%zjl|E+Sp?Hj3kw2c<#e2@`L2WOcNIc%;`P%df_FLe&{<+83Cqw&ldh9Vu@%;_v zHsRLv_{old!?Ptk{v)iKB$6|}V)dO8t zcnco{N0+85sh?jZrLt<+lDE~pOX=@S>O!S$FEosTe=YB}s0>0aUf3Ef8J7<=j ziA|L2xDh5ALNC6@IEa^GC}rko?5nmtw1nZqP6HzSxtdjx0Q(vy&N z_B=R|Y4$w01%flr2ICF!1B;B2_=oU__%S--d?7h!avyu1^(r|k{o!~&GsanOz#ePP zQ3c^t@t?bly|r`cFXoPIExaGBuRbNeQpTqbeP}PdCi9v2L#~7d^6g;Xgz+D*EGcH+ z&a9dvjFpwYz^9qXGvZ{;w-$%y;&+febsW5#o2L8=iE*#IZQ+jQ+L|5n$g{2T-~5;l z{$?$Nzbx5Yn^f*nH@V9e&YU+*ExLAW`C}5N&O_JqIb3rTcKcv>z>Cb~S$+PRVM0^< zHTo$t*8`h&z{%N(?0N{Z!HS zc<;l;vd)RJ_R6yF!Fc9-#2javYYioCQkLosttX$Zbc8olihgpMzHIrKtWA)3_qd?< zli2zye1Z}iMXnla6NzOQx@!P9&o$V3?AbLQ|Nc>j}fGz z{b@Gq;QRN2lzqeTufM3?FYc8+_%grg4ahki=u#Db;tZR+>G#uzl3u?3D?jV`^P6i9 z{`t+VT;JsPL66veVv9QDBF$R!1t?WfEcvLaY1OKFnpK8c_UtEn8dy8y^Y_0!2Scs8Dd2lL3n4tJRO-=f($8UAA;W* zYt4QIJ|bHN+xzttbCK{}C3B0MMIbha?kz@7%Bb1JT=X!qzlGcvFZoU1K+X}DEOcG# z-Qm_<%`#`zFi+G0n+xyFq`zcN5x7ocJ{q`psx@9^ZtIg(_~t58nWKdh{&~>y-iqQ-4f9)$%x&;e9lD}RPi^)v$Cc-CCIGbQ#-BS+a(94VsL9!b zB~Q;`y~$+GmtTFIcgF|LNx^b$c=JwtW#vu2wl|v-x~Ed&XJM8~Hz}STo>v z6X&;{%Nd6U;l)0=ZqRR3uu%35VV-RLBsNv($NP#f^fUE>!SNb62b%h=ef}#|*8Ddr z_z3fL|NO_kvGkWU|J?yT=gfbD#B(iMJ@9dSp2H@wct_-mSw_Df%!*?^13zUj|M{5z z!f_wNtIUBN$ab*@#4hclhp|3oRPZEw0E?c9t~AYvS;K=m{-h3B=SNu`$am#Z>2_Pv zs&6@SYLzkHWCs5S`bP_ODr}CyU`2mUjAdE2f}NJ4OOVrO=o0sIqfet3>hXBj1mvMz zPaI$mutr;KV4d6)jXmNz#J4y$kGy)Z2?oZU3_ZUy*S{IxYvMbxt>rjG_>Ci+wO3nCMpccF_Y>aM=F~ z{C3;t@LRs{o7==){Dgz?k)7v6=dFk5baHvar(@G3=YFs@TDcrDZp6NqSPp)JMRz0X zZ=ucrI*UGkqa}jS`0t z8si(7l1sUzhMq4mDbo)!it?o2B(_=2c@xP7&heJHyfe%A`Fsvn7RK>g?a}s+j zt=%Pa(F&PcMjNv2*az^ov$!yyypB%ht4?CWohilS397bT@j0jtnb^r3Wb$QZjpRjy zC!sfDLkXXTolV)Jv{9)-qduPgPqEdrf^S5HukV;yieFdSB(~}K<`2DNW;6Nk7Tt&YgU88*T&3yFtE;PL?nJNp z`PdsC>lFPE+m1C--OKqe=jJ-G<<7@*N5~0*f2K;GyOyv|FaLe7l6y>>%QVg1jO@8A z?}d$9ncof*J{E;Kwvp(PVv}{k>oV`;ki zz6}TPQui~@DNkdGH{x1WwN_u|U7Jh(-(cgwI?G}N<7pGQ)@vA#oMZ8rb&iEc8<|R6 z*=^BU&an`Bw|o+-lvtUZVG-nNm&r#`HILqiPSlnJ-DJKU;+LGX1w%NGAy=Vy_RDAD z>rvNJ?EQruM!bLRoCe3f#DS3~$+eaIKtuDEOQchl%5} zHI7d!x>2IvOz^SF{*=As{>YgGN1n%yUSYOnEB14zOU*cfED~RV;3;vdjo8d5zeQY% zz1ytk;%k$4>%^YEv^a2>vtsi&$1X@tWb;>79ejcFN18c*)7Z;ya^X<9+(bC$a52S-UJ}sSV~!ld^>-4X0w2ZgLa{qh)H^xwMpaTlC8#MWhi^vaOT`>CwtnMvLP&Mw)*x98yCq# zXDH!Uq#TCKXvJ?-%}?sd&#kW*HEmP&o8TgTs9zALkhu$9ls(2n`eZNhZg)+UR`ysG z{37^bYu0o_8Q~1ADd?d(=M@c@|@SmGc&ibLA#-oy6~IeqZJ{ zo8KIMH}m@zzq|R>@Ow~&$gS}+pH+;-PGFCW^w?6KuOKH7-$N05K9^}f-r&}Ll6{Q% zx)uD&k;|#b+RMx{nrPGTns)RTYh9S|^8LvEcI|d!&atltv%V3zEZ>QqEpv{{jm{!_Z z!*3OK-0Bk9+hb^ruB~Q`o;S3zLiCuS#N<|P@W}lHZ{a+2gky{&sXq*Fl#+h{A1yNa zFLP7&UG!fzG($dz+|a2DdQFB_-QaScwjui`?CHK5n%z$ueg(}A7<*)2DSKq2-=x*F z)Aq}t`y%GB7Ur;Fj$x@U0yB)g$o(_VK?4-faqMp5`nS+Ry!c+^_kH*K-)!tYlB}7}o&%+c!hEI_l_# zZu5B71>LHNp%n4V1K(c_-F&Qx90BbDlw;M?i7!KR*-h~FCdqk+rXA$qi~mddR@T2R z=l#&z&#rG>@vTS~u}oZ()i#(+YlyF9!Zv>~zLx1JV=WV9*w-?3+{RiaVt?oy(R#jV9BeI<^&bBY zHb>w3aH&J+)_UjJ0ef0lbBmlybG|caPC1iUzhK!;=H8`tIT*fF8T$m~u;+_n-PU8! zUua!OKj!iCzIt|jHo9Y}vv5aiF5~ECRr~4xR7M=ZT+h>gE)(kjD%Q}>BNLVXUD|Ki zh_XIF_V8nUz(Ljr9NhVutc8x#BB8C;*;Hx)zDaNq*(-H_!Z}k??$JHU-~0-88T-=Z zN-PB2T*yOR>)IhXQ4Q!&cG^`jk3R~pqF-(svn{rM7G;Jx1IpO{4!)hqy<*JCbBS@> zzcp^pGS;yHLTXHX#WdbY+`?GPR?l8A^$vdQPgc+VWRvT!5gVm!2zy?r_8wP@7AV!~ z50+}}J$uQmceQZNRSSEXt*Wn>NIB9TIbSqNdyM(v;z#btWqz>QCVn?r`}UU>vyGi5 zeI(C(k>E3*_Wd^8yWUM%GVTU@@1+e6-d7Ew?W}#Rr;G_tae~_xE$=liY)YF82%ALqqPt zrQ;*MVbLlsE+VA(NXp&}t z+%$<{k{EvP&p9(NBBp(Q-{+5coipd`v-jF-ueJ8tYpuO@^nCpXHf%ci%Ha^ue_*uq z|Ds^@Z2pf1>k4cOC)?tkSOWCVD7f9S0-n^EV;9>}jxDMjel5>3qUl5RIq`t0KXYd& zE=2R=Xy;kS*T7oiPNMGP$j5G%;&QAeWMjcKUh;VMfA1H{Bk~NsoL0^Vd<5O${aWXY zz)6J7&*5C68O-|?z@)s70qiaOhVnj6BJblNY~6>xO#W5!J}$z3_6oEiTAC8ehdJd{ z<7l01Wt>l+m=E*6o^oiZ0lkh~C9Hubb^MKI-L9fvydOJrP%vmOn$8NXqW@~^9N;-b zuE^V6?B7o4&+h2p?lXC3kU#QS>Jg73%l`ZMBR{(Br2LVRP4WDZ<+RCsQeYVl4l{{U zh|f_K@T&<5Czq;%>MzW?A*mP!MIeW{sC#M3Ll&DL{2N<8r_oyG& z{Rgzp8FqJ(mvg<7lQR=raRy_G?(tHksK2o044v&v{rU zqU)AnN9;sqdDA>i{gm+~o5W*#`@49TiA}*m=80}}rcFO%xp6{flg9CXs$CE5_R+4_ zZr5YCJ2Jkx&}AHb&TMY^qiIA}BUk^J%vdt7ga5GOt4rx`gZ@{u7JZ6bK+5qD1pb>C zv!}=j@!Z)_-U?X8n(>KuPLi$(7m^D&fV?%BXt=*!^cRMl; zHhx~#*N{mYDANF~sI2mitl|t{AKzmi@h)0=UekEmDKy9Tw}FdPaC0HJd7E(`q&?9T zeLvb`UiHz=^alHk-Ue)&DGA>?XY}$teo)~@<+bM1`Ah$!yukWIyzuI^1V3dq&V=4*!Q~@dq|7 zj)xZCg@)tb#c2`$yhDrhc{EzwdhP!#EjB`j&|G>WbO>FB8^QMoIt*u@;0!HO`x{S= z$&S-v4z@|rq2`=M@S=68=x_?a%epGUsT(plud#73vMmpZ%^e!gTw&OQGFif-vH%g%To+aFXgnauKzvr7wq3V zR{H&eTT4$Yd}%QZTVXWsM9$YE3vIeYW{|^Y zYuWFNJ=hCuS-YFFHQSe2Th%W1!wHh{%GujTTUE3b`2K;d0ouybd_r3)$NsskZ!(w2 zW&)3g_tMr*+S(XvD?XR5%(X-APUunPLx2z{-RY@a25 zvij@5f#NDQ@q90SCgnd9KdlC5vh}=crff*P*t4~bGOeajU6;U5;v-}`KGM>)vp5S) zay=DZvTZ*XhwN{}OZrZ7UG&mECezMYCVr9~B$4<3aLuQak0@pg8D0VXdYH>gu_0oQ zdvH^|?||mmtJsh5M>Dl)=M75^+&?$L)Mz9J?k{lJ(> zKG~{PGA82t2!2iUQlETh*{Ro7_cC8KFou$e4UD1xtYCn5c@4;k2Fmk%DS3Lr=v?es z=Zb9$T_{}mJu_68WS~Evy1%J=ZT8i&B}dRRA7b29$3JsUqI9Q*3FuDbr4xT%ca8G7 z8PVPF=gWO}z4VA{Qj^A5dg@j3m&D=^HRekwD;ZTsd%}AwJgWZ@c+`uYC7vwp4IZnD z>sr0F4|+1twd&Bdpn+ENa|5l(Ndw})xSrJ}Ju8E=u+YE2q0J8!=;f7p!2te^2Upd5 z54=UJ)A`Vk=wsszR~}eBH$^^+rj5B*9@v=5vjtZks9v3NK)$_I&=1#A;bT0q)65v( zbpHGxdqz^LgV2KZLsG9jk3*jmo;Jtj%|cIv9=)}UeVcjZ+UoRrdoF*Ax%_9?`$yKH zIlYekkfImSU?W$3n`<<++{-=ZFh|7;-PG%a7d+r50A8do%#?frFD02NO-t&H=C_eg zd%??I@DkTQC6{Ci(VmxH`o2hKh1vLVn`y^x24da*D}JnjEU+o@K5K#tMYTK;@sXcot5D!y4XOT zqyT#={fP7LczF3ab9~ctH(YgK{M>QQeeP8Uo=Y9q^xOiTt#+RMi7|M`9A}@)xKB3c zOn1}IFB1=^!NW_}_Am!AZ`rn>WLJ9L*&hwgNpq!p)d#~>vi6%uvIf(^W4@q<@b-x@h9s>&OD!~d1P}0p3)huA-*fbZm%<1+uhFM1z2CRxM%%@zM*1lnC&T0G#R(bbq$=`-eBV>OU!?R@!-Y1OZK4i3Ae}2OIgt1*1fB(M{-WMmd z|8DI2%M;!wjQzs+`>PV(C(y#acVgdPoA5q?HWtR;e?8&-%7pgcj(z{Fg!c)wvoQYt zI|=U-XldVDvF~qBc%ML93*+zqCE@+Lg!bQzeSh!pd(qw^p3DAz>jd;Ym%UE3dDb!> z8sb?G`hA{kFQO&%qJNr}m?P7nCFRf8SuADuNH2Hhz1y{BMJ8y@)A_^O$4#==tD}{T zAS2jA-;eKVx$Y%Pq{}a)?znz^CBDWT$m><~0X;o^6|`6iol93Y@V{L-Lcd;3x$n^L zz3|R+=9U)e+B@;d#`Wz#rlvK;^=)H>zFltX+a;&y+ka%ut8e1^b}+7QbIw&SayQXF z&iV4oyX-Y>W=sda5_!ECxm}8{5kKQXck^q@KApUg^Oc*Jk9_p6iT+gsx6WCvrhgXw z6JFFm;vvGTkfW`!^@H%Rn*O!Yzn$hoEvx9?MtrCDGY|dA^ljLYI=-oyZ#W~lTK!97 z4$ncZWV4=FL;pBid2QN+jrQ5fZ?ga72h80&IFni5#LiZ({>=WN^PITr)IMpBZwuKu zdhi`!d&5q6z{eiLKw5K?VWeG2OklMqrK#M^ep&u5m1mxg2I{{~`~~MVr3VX6dA4+v zXE!*{48xe9^28$j+}h@EHf|^%(0}VnVj~Y5Yr28cS`hqUC>8%jQC4#f`*rl*03UX_ z3(ck3UrRc<+=|>&%RP|1GBq5^W@F1@&!*043lGjP3v^yv z0Q~nVo?_4)BJLuZ$=;0J#lGeccIaMWtRk~nKk%*eR2M(_gMM-Xd+GXxbT(Pm=Zi=e44>dt+04*pfE;|OED z5?JCkX~l{VTU#*rMMJremqi}sf2|cFPBPj~e6P2`C@w zCpVeI4!GcJH~gIhpAY9J^`pmI#~;}*&G1poK(e{o4Le0@PMJk{;+ z>>HZR{uAxry5%CH`6sirR$|{dWmi5^wnyhwyucp79L{vrvlYmaBed}>&xi+~tMQ7* zK1Gg$*CWK5qW2#F9`+^S$Irq(mDSH$ba@GM<>%+~6cwb@2WwWHnKs)gUm^0$ZJ=NBKlTG#N-p?Yz&4kYhu62dM?psW6PU=Kxf(?d<42uj$`@u zbMcwnKz*XS@{f#y?}1C&k6wcBt(U%e$k(raS^RX?Q6u}_6|1awjnGyfW6;Z)6HPO) zTg|>A@UZFgH7!~zC7z+3 z`L&U{O3;JQJC-#K2l`= zKcYp!DEQpeDZG7EHeK-Cj@_UE{93?b0f*#Z1pH=!U+srbzpngXaQ8v(g`YSak&nmb z_wXz+&U+DMy^j@z3b7ISXEn5tSG`I3Z~MscZ_f=m7ITBq{0!w3Px9A0kmtaZt+UrJ zM2=IokT}don4_7Gu~QDmh4+E4MbuZdanp-=&+a_dx8rxm4!rr+F&}Uf?|mSJdF-tv z_rJLF{2j+Ojr;AfxBmFnv3NXaKl4~&k#Tf7xF1=sbfCm`Jh?iL+k2q&Dcafi)G_aC ze>`^J**A|R#)3L~X@+6*8*Y2QxA;=^IoF&&C;HZV;m3*B<0}oy#^}oPwO`8lWmf-{ z;Hd)JSdp1>(2KoiVW>Eqlp&pqITU<9e}S>N!j<7O(8&bn`(J#<-j8YJz_S;!m(29| z)LsNUf=g&PSl|t^zcRxJ+d9s+q-HC{h8Z` zTV`7W&l~Jh%xuJOdg~-XCb}Md%GBE@NZtGwt{`c(fnrphK-|ypmW3JlMtnrx1H^{m<`Y(Rn zKc=&w@EVgiXJoH%xLE-dwJiCMc<@}!Ez7-m2nArN)TCP^| z)|N8++R~EE)kuCAyT0~*e)gQ3D32oKw(cd)&`b(O$NZ!?RAsn)#pYOF(&mLBZ!lQk zgNM$e?KgjB1n^xUM=kOzcse#WnVS=3g5eK_MgODcU`itX(>Xg)Ihi%5TMr{km*zGx zpX0wyRvg!hiZ#6H`cJpO$NB1q0S_eZzlrA)(wK)@(4o?;re1OmS%DyO!qZ;@{^51x zgy-NBEs6uGzo>z;Hk_R7 zsssEj+e95-aZmHb>v#sbQ@ig|zMJ)jbX39f9$srr= zsm7Y5sYPqI0b62OefyOd+?~KlEGv6)ptZ{K`PpmCH9>IDJEJ&6KBs=~tcQ@}UgWs< zV6S4B7CLc-YE$(1@omh#GuY!xe4O{fhL&8)x+p8YjMM8rVsq+eMh1v|JijddFI4} z(p@w!&S74R*PBPZj87BebP(HXsE_&k0D7JDr}gx~Bl<(8mxJd~+In$A!WdeNA-Wdh zJ!!FNW-RVTp1us-y?kT_I@hF**^j$72m1%-yu$j+uIu071;+Xr<^b@#ER(U$dEYq!eHNF>*PIEK}TJ`@&#BGI|lREIbC?$kQpCxY4mc z)ip2{{Ydj<`Kz~n72Ej}n%I$v{U z1^27W6RsS0xEFg?z{IAq*>uk4e%O@X?)lPsPqP=j%Z|@}o^wXNlv6o&HJ&j1QuA$ z)6*p%&E~g}&4=CNBp)Z7OqqY^4@N?I$`n=2^AHH+yMIa`jMNWl=eAOpUHA7~sm*)Y%g!Fe=tAbaIqqqT=K`W z{|t0MKXY4UaVoMA+$?j4vpK66oqIX^dhNB9Y-Qr>N8x45V?V1Yx$K^0$Ot=^oZ`Ff zHCVFNty8PzLnfz8bSi!R4YAgDT`;%#u1C!1?bB16l@qw;3DdTtDSr~Vee*`Puenh9 ze=W+M?t5h98WVUvWId^JQ#CL6j0f%-dvWttx1o62=xxx2@WGgeXMm4*f3yemV!qLO zvdo>?AEd7ZtidI3C#PpMeVMa|-p6L3ck0U=a+FS`tYX^=-5LGD!(qmuhrCMCVSd0E zpc~Cqz8m`Tg7oA9^kl*D7vNA`t>oVku76A(i5|W^Y=1koqmFOOu~X@MOFuAG97`#v zfLYl3Tnvv+!+t|fm-t%4M;n&Ucp>sL&QC|r_<(WJzM_hY7luV^0naqe|Iifr zHF+P9-PP>%&Qe_}|7B!8`V8?r)B3fJXa}Cj#+FqI@0HSi3HE>O)s{ZgfIbvrt;5+U2YaWR zA;GIQE!uQJTYurc(CzAXayFn^T*lA8DPi*sN=j|Eq|cK^@5EQeeRk7Cpc6&H-#82j6z39Q+gZ zaLywaZAS~4XU=1uA!Z95B;}xDWdn2XZ&||GZ>l>$-2r?3;p<;aow5tCKlaONp zWq@+f`wZ6WkFgff`pE_SkIqy+_zYi2GW&+vd*V4^;(0&g=X*ua`y=Az}*N$y-ax2df`wi}LhfeI{97XIyR%%SXXzv+tOTQD2jT;V= ztE+B77wb{rf8Jj24eRDFHUs}*t$B!fcg?D6`SW<=fz2*IdT>U^n>P_3=f$S>z2Fbw zr&(vEFZ!qp`NTRM`=GO?j^}BrfaZ__z`63#Psw|acGQ*&|6m1mbx(KTwrLu;bAdZD7%cGM zUo3kd*eR2=~DLCICIS*%8c|G#pAea{=Q85cHzr)(02tqY%HB4 zKDFsw^M-gd8CueubSrVR^~88pNJp3|J!_(~ch7C)$v%-ZCAjvn35!0;g>Rwp7u(ru ztmlW_4dgUAIsQ#+X2)K2^7E6{f?rxvP(jQjv6E5j>>%rAJHxx{~s+6RiYnFv_1)&TO1!+v&M1KNgLg^+_La9G_>r;=d=ObXdbIt zn%A@#-C+l`u+)XW7 zCkXa8fPE(VQYq&G3-7}BM&X_~j7_J(yY%82;JpzZ+yEXHfd?zbhkcJKhr_A(pOJw7 zIIM&FMw}xJ%`$d#L_41OocWRN!RCtG4Ac(UTor?Iwdpw+`QoVb``{hmkab?ObX?hY zJlK1@*n7x>+9dmrfxKnSxeMHIzM8{7z+vYU7|B=h=h{d4(YINbl(~@gZe)EDvYr@% zg(kAz$r++KFB+5a7r*m8a_3|tPi{V*R)8RCOazl&`OIJ24mJj|OG>jT?% z$UIe#Uad9#r1YsxTE`53Kg>sO^L-@7M;`d-0Q4dqLiP2dTVMN-bQ;gW$)@k%ZsNT} zN6ud6VVmMp%Rb%GImVbL`DM|TksJ(N#(FwPZnX;P_7Df)B@Q5uIDiew*UzhjHUc#@ z1BK9zMee~8XweE4h05fY&|F6Q;5sf}-pw81W9$Ov(R_@`Rfn(a|F=(Zd(5Y}5}MBe zCat#}nKhi-DV{?$zK7ED&-fPM51m1v{Eo-3b20YlJ$Etw*m_uJhJHz0*)6OUt>xkubO`27eD$8?+a6%{k5`lOh?~5k9T|@Zt81X))Wcd ztbKt!*r*FLjiU>}n}>G?{Ef@J+e0@qN0fTgg9Y2Ix@D{}O8KAHNL(6aYREFT%zp2| zXX8tFPYyf1@A&53L+?w?X}Zq~5uakeV+~0kt8-Wv>wi`3e;amci~sR8W)A;nkLX{+ z7$bio^5L*A7;2;6M)_IInq%e5`DCfHZ&hQyh`1KfnEL%Hc!)q(9&j08y->(HZySAe zXk$3{KjUl5fYUt}-b?1(Zrfq=YO3xmOPzXika{d^!sM8~Gn@a8ANtffbATyv%@uD8 zjE)t zw-HWGF*sMvXjaY#YrJb98(XX~&#}d3VT-k}!zw?S^{`>vVm*eZ*}@(x861q^q){?n|b2jdH$1ycz8eG&UY;x^JQd`dUKhkoRpV*`%cNV7T)qn9eiOg7;4KaJafGi@7b2{bJG zuWcjMT9NU-4g0U%eyq(p;#wA|Ufa$atM>wIU95vt?|NWgt$Hah+iMTJ(aZY)Hi;bU zymsA+O;g?2dF{GmeK6oxyY9bV#`-l@_YCTW&*F7otGX#aR5$NO)_sQRK9W4D-S{M% z@oehGE@$Hd9w=1ZF`KasH}CDb6YR#xsyk*kmK{?zzK6*@W%r|ap*?zT!8{}SDr50J zcEQb6ob~f<;(NSB1&!;1H6G^Dz3hM)YND+?*%dUyjVSDElku^Ck9(XDSAizM>=iTx;z39Q60^V3V4D>N?iTx21ej zl1nZZ?ti&HbATM7O_@uAq5Sbz6zu*}e(2CS*qB{EY4M~GL*%Vn#@QJywG)h^`=A+Q zVRSn>dnxk;wv*~k_Iap$ALW}ClD`w5R+HJ(!r5F+yBK5uk`paVdQR~X;S3O18 zL#0M=4AnDt zxSqd~JH(tdTu&?YCKY?l$oj_3H9~KzJ?sp7bB&|% zI;D%K{a*ZHmDH>IBg|EMi;SZ#aI60ozYg;6?#Biv|D4Hh#pkT6TwLA!6sPjr%8F0! z-su1Q-H%xp-1Au41y4MdbHTS$t*d^2#<^EL{#edUk8{2Iv9y~W;Cb5Tbf0t6-8{ST z?$@p=zWd#)jJsdID);WcUuD_*5{PMlk8yW zDX@BgD-KUPb`QZKn8xr+;y0FGGC$cMroLu=@%hT$OVaKy_)hv0)&&v& z*(#fJQyR}6r!Q~ZJ#f|CcOR!;N3Z(*-5=7&59r%5`ttr&fg`3N2a4yhg2G zfYtHKPqqE>FSCwOn_s{;ulg15E85Ip9aL9ryzt|7Wdo|~6TAM*4$)am<~{h~>QA@4 z^%dY!-IPnLJ6hS7293R8OPM>;a56n}&UDR893-NNmUi$-L z%iyo}_16zr$bfb?S9I0vO{^h#-~PATY+Vt1;GLm&fy?w%vHo4|zpt46MpkuXc0*Rg zZ7o*+M)s|UcG|mUAKVK}zX7ISG0yK_&0ZzO-*VMG7#V!$m~hxI3plVJ>MZQvyf=Bi zWX42eTGMaF8oLYWfA0)u?tHTqJC8Zh*AGrLZ?L!6hb?DuY1hWA?bMxDGjoq{nE@_) zui%?B&U@a5eHcBy6r6go(X*DYV`xH;=7-ddy)I^r`3°>r@A@_9yahrHNhIJ*e z`S??p*|ecCq@6FQ9mWj3a<0bcYQ{+Kd$Gw07Zt$1cQ8M+cm3o6+ZVZTI3Ll!{lv*f zB%J(a#=KD5`bh)ov$H>ATE}J3P%Sx$RNu#6pf2irX?o_?im!JTyieagI+uL{#z1~K zw7!kHRIY9P%XaRc_Vw2dRCuw|qT@(jXVl~m2%cT=fCv2`PJ4O8C|H%ZGTu(iN-1SZ zdA|sMk=pB~J$PwuY)x-zSBtE#G z_~3)Y2Seire8dL7No?>AVuLpl8~i4*!STGj!UG#I?D*i~NzRz5?QLOH=*j?3vVT0c#TniYRdf3uXy1#!cg=Rz@{LKxya>F=`hSQQ#ix>S#Jeuv`!!;2 z*x9hdwK<%3p|d5t=(nV=TF6XPJI~(XXNjL z`Lk)SojER^8|)R}=mm~$@`DM#p&@?sG#v(xBfMJ?PF_u_-=a}WD^`|-J@AS+s#Z=0BFQ&_v$ellX>Qe%FycKl7L_{lUc zRU1CvN;7q_@2uiz3i`#&^LA`qg)D7_&Q})%Lr*;2)Y613{R3AYvCApIu@N{nVh2AT zTIk0H{?;#zz;B5YbNo>r)}xAN_S*i4vbvLF=o0rPD2_qYu!Vo237rqU;Apj3-%<^}nJb7t^eygSEgPt;+B3tn7R!&LIK*N6IewhKGp6|Y zUmOqqX9F-6)!a6)Ay#)Ob(d1N;u>w=4P{jCz4(gm!&h{=`k72l>nA!L zuDV#S30Iu`coMGsHm?Ky6J1df~W9(ZUkr8)fck|#CHh#YkYcB7IQwnFOTiRpAOCygAm8R2mHIhyKH+6 z)Ga$gCbZv3-CpY6e~x!+9<*FWT{C9|Lo1%fjt?z2a^349Zxj0AHOzYk^SA6FDe!Bj z=nf-Q{M0?uR~ zK0g-c88GcQ&+6PXd?x9QEx+_hcW|*DusWny%Eo--mE?S2ADF?nUjo14M=L$k_L&TD z=0`dC3>&~fqR*ttbNxK=PXO91{O5coKOgBcxwUF^pUHaYs*gEGy4J|~#V6hCAMC?w z!jGwaShnr|ANWF~2Q@-Ve){Wzma6dA2JqG9abIS5`;iyRpJDwWn#tu?+hU(}zs9kN zpXzsw`z?=Ohff)Mi^f=Z9qqZ+Q=>iCH!uF!ZToq2AFm^JeNZxQ+@gDbPibM(-BazN!9bd+IMa^r;a#bgwyJtsve88&CW@Kk{DRaVC~MMjtXpn)AC|lTR9>W3(rj zls7FKn7SyVy(6mU!?D1CE%?yTxJ~M~oIX6n*lZz3o8Z$ooqQu47}t2V_F-4B$YWRm zE`aHUc%DFuJb~7UOJC|6`~6>-0nb)1_59Fla2DUNzb*0n?veEQ%kjqZw-8&V^Jguy z^F^(%Z9QuKT>NYk?U=g%{3iY!;BNnN`9OsECp*`8+C;{(#(Lh9UTWv4@}zsi=w^96 z_#~7g^shzOv8?--`LBHHX3HWU-HuJmtSLrK_I%ENMs7(yw5^>pK#ZCb`%J&pp6mnz z-}-cm4a0H=h9i{!^(Zjpo(2Zx=MW5G!LaE5W$has7#g0sf7znvcHV6L+}M+S3Hbs3 z0UR&K;kea-!$qABjRHqK^g9|Za$<070gi0^i0$aS#@wz2$nyoK>w7rf=EK8%zjKr} z|54x9s_zZ=FSC|7eP8tUre%@uaQ{#CeG72ZYcJX!2aZR;)wQF*aoFkm&qvZhXN(T6 zkHO&|@4(TH-$ZoK{ts|}2kmdB{mm~r?Tap^Q|DY@xE)&TPN2nYqrd>K=LKW*kSWP z_Jy41`49SC5byUm#ZEZEcU`RC(cq_B{2N98wEwQLM}5z}(kQ6> z2mPNC@BbwZ4C236Xn&*(h|%CFV;|?gztFzbuyI-YvrhlJ&)c-Ddnfn*Bn=)1=Qa&~ z)qz9&w|f*g>Q6HcasK-?aK!lU=SIP2^WR_MZPqz$itZmCrOnZBud$EQeT(|u4$p0J z`d%^apQHP5y#0rV`+Vmp?f;`buTAXpg3s>rPiV7ei)q`#x3L#5*YG#HdVXpDkH>=P z+<=lT?u^eo*L!U61?fB4-(xPhp%cHB(NVa`9OxZuMwtWHyXFebJp)~tI9%ln!XMt> zxlCtMnjMjSfo5+Pxq7yl&Ar=`Lf%|gv*)mjdw0lNl+5!n+`D)_mgg4tX2|=vr`c2F z<(~Gsyv@w%b0KdR_2#;{KQrX5cT<<{&!CMY z+F(E3%xNL7NqhCl+-LDE_eHclb85&744%WfpF(-ySTUCS$<)bxE-=lU1bo26e!AY7 znbhy4Jn+rTpibgF7Fx}dfz1PKy)!3z$cS9l!-_y))BjpL<~I zojIQNff3kxXO5$8-~zVZnW?}6T)@^lGllZp16%J*A9Vs3u=URLQl|xsg3H6Vf>Cf; zA;HJ`xjAy~*wEn%3{0=&d%f=R(C$afW{uDL#6h%O#=cJUt;jy*OhU(;MSjVBwFQ0; z=RnRxro?haMX;AM#8*A=hFFgZuYOV;wa z*!UlzFRwyxf6p^Eqr1(E?E74^)>2=>*5AYP!~AbyZ%7Z<8-IIZOS_BxAAh=#{@2=h zTYA95Vaiqhz}PH&#s5Ecg|XRBJ&}Ed1*&H`-!^;;U2{unc-JGrkYXROheWX>WVBXa zJpW?R>(=6^|6C*5{~_xg{QmxhJll6ofq#apAhNeO+PhEhJ)y`SxPOIv{K#*Bt59)au zXAmtU7so=Lr-vfGh0(t83wb^v6q(5L$$CC96q%-H8KKA*E)V;!Hp7t_ygPSc)c<5= zC^D1#FLM8DBl`Bd8$#`Zf1eTcuOHL=&L7C}v?4g*U11D#^SsaG-S(-W$knuSo$8zx z@}r~Yy=p}J_F^w4rYN?1Ei@r0Xy<54S%e9s3 zOk$ymJk9TJXZ&A$=8~Kj)?L!cUQc61-GUxnYw8vlV}d(f-p&Q4XW~xdA!ubEFn`gE zM*hI{YOV*lUPpb80;4Bb0DaNp|Ohu6*@MW33|k;MEZJNX0qPHfq{ zQhBwC*dvMTXPwr7-%mW+m`6UFr#IXA%of3;y|=ickvm+`!?w)fJmAZX%^%-V9R28y zV%whX?5WJ^czWfqUeX;~n`|TAix^?8E0$J}hnjf3yx_UMW~~W!{jX2AM46Y)$niCQ z^eX!^@t4Qzn)W02<}B815n_xoa*N@Q5xhE^xl6qI!vtPkU+VDcbw$zMujoCzTE_iN z+>2M65_t8NqG;b8*u2E6cTuLc2p)r1*Yo^7J%?ADc>V)DhgW~d^DTM~uWsdetDeKF zkMjIUJ%?Af^ZaLe4zK=F&*0VSg$}R&hIe~`L%h0=`!~23um1YVlX>;V)9`9P?YyHp z6L|G2qwwm?(o=c$0?8ON+B=)8cy%7vIImvu-{94+AoIklWs-edZ{#XoEgXect9d70 zy@@Nl+LOSmJ#k*$B7TEcw~Cft9)xwm=soY>m?9M;}ri`e@+rDN)i;#JODeCA|c z?fEQT^?~#8W{g+K2Y4C0>MM$VG`=YM0re`DEzV=VTk+|Z-K;}oONjHU@{TF4)aKW` zllirK!7#u6bp*d&@Q{1+RPihGa>c(5&e1%42XH@+uU%`W!LdQkY(6#4{ga!1Z|9~u z-JaR^@R7YwEUu<|6!`OO zx_rPp*k$_=q-$8r+MSY?FQ;m5DSE?asTF^AkM57%_V2z$SKc|jTeaQQ2$$Q@Kho|a3= z(^ABL{7c~(^Npy!%fp8-IF@^ zDA4&!hu3#qVk8?od)7a5iGiNgmrY#eROWkN^81Z&AvTUeVx$X+l`d>E3kuJ6_U$7_ zJ3LD+ztc-xdC>A{+^WOL0~Q!igD zJ7OVwO3s?PeC;-HGhF^mr#xkq*TiIfY1dmo?izCadNPCI67mqn%g>}=RmumMMn8yA zs7`OqDy^9^ftZfGc)Nv+!C~xq`(t$)RXt}LYbyp@x`>sRn#h`Ckp6k(II z`*@W83SMG+53Fp6l2d^H-z?nyig ze3Scxb~PS(;IT|)1p{yqm#|Cx)IpvG58v2zW*T98{24=oeBl2|f6HjI4!LDNuOVL} zb+Oi6Q^fTEHga?+dymCI_TUoB)l_=8?9Qfd{D8A*eVh?N8DN)9+s1jAm^kQav6+2Q z?bpPsSFf!nW=!u&ziBKj+pU;m#mtefG4}vVllm8Z#&5~3=IbMtb$QR)6wI9HP&}P{U zjD@Z%=od7oSSCFaJ?dGGu7BhTe#({tH)m@X-eayQ*R{x8lf(7E?ZNQ&ES-1ct1g|@ zl+}@Cgdb&WOKPS~5S#&M%>&H#7=s7#-v;o&*shU1KlB~q_MSi%m?z^v{qBeMhVdDb zCz4GeM=p>2wj|l%AIT;4`xWR~cs_WQ;>mr!Qk|&*jtgI>FT5{YL0?33Zy9S=2o~{i zs=@wxBclYLfp}4MbrQ>~Gr{cld_MsHP5E{(`Y<^4%y4privMh$O^2qTk^S&%z|5GC z?IsuE)Zp5~jK9iy$J2Lo^|Pj4xAs!c*VpFoyn}gL_}B+sco=&hV?PrfUOcvZ-eQmO zg7Pz@AfK=Ne#+MK&6K96R(y5-&Z4j0`A5nuNv_Bz&}&XA?@P~4-CBzLW)D<<9Wt${ z%Y32suyJ$-c&z3eq*V{z{o;8gzc{w>to_HXe2_EMzyIvkYWAz^oD+S&iSGkXlV^yt zPTpZ2e0S4V@0^8CJIyuiqFMajMTX2u9($+G6fo&;uRFCv`HMQmGt3dy=Cq4uUS`~> z_7&5sehGHHowMK#bg@nMoUun`0{lFK$i#$wk5y)Qe&gE93j%!C z_()N>gF02W#=9zzvUPFM*Y;Eri&0W^=AOXVX%i06=dy3Yli;FkE_p1uc5p63A9)u< z>!R_F&#~tUyy-asL31Pn7pEIbYom?~R35~Qxd&TPJMUlCYNqzpQO9=5`iM0tD z|6?*R%V%s{)^D`k41ZT`gN`I4q(7DNeKkICpAoU;qvpfCw2Q+$!wa34UgOBqSUt#F z%H}+y7|jirk}vbDsb61P$GH-P0b~)^yWuavZ^NC?k3x9mHR9Fw!^3(nIT3Qfqs%ky zV+{0SjiF+z$)AnB`Z{C#Sejvo1MU$ zt^RQBr9a4gJC>%8J~7Ts0p|M+%=gHX>Mr=b7q~sNp=)87xl7-1pU0eEMt{y?&M)Fw zA@@P9O&!Gn%}Yo4_8{|=?j>VIW7*7=oyZHDH@*pPpdW}YnxO0R&<#@14Tu}w@D2K= z`R^cjTJ^fIv^Az9C=PE&?z1iDqa&n(2l0{SoLY1Q%`Mv*(;bYda-yG?v}m4SS|OO= z?OJ5LoBNIQS#q~}R_fMjcbQB)!WiQE#^!I(0yU3AJAiB@vJy=>Sa z+B@mojq7zT#i@B7f!&S)@__5>lko4qS~lOauk21+PfoJtH5zI8?a)#74aimI`UB*& z(t6Ki9`ByCsGwWt4BksT0poTf^L0P6W)(b9fedP8zRv~5e%479gM+)j0Bvcl-sv(n zcVoA%ycqsG%=x_J>{DNR-5av}rsX59q`HTF^vd}X=K;+@?eKIv^l!n_+3<85`kY`> zpR?g>E2rD$X=2g5@ZH)j^tU#6t|uy$-DupIZ|`xvhB_{V z<{6`Ibi3Ns@FqFC+St>wgYhpr(`iHP8$8=Yy_`+7W(9EUq;Gp^!{!~@mQ2{mp2khc zg)uT)}4;*dUjoLK`T1+3S`$t>S>MAF87;&WA8nty+@^%J=u~8 z8qadRjj!qNM;;vF+dAGomseD9741C7dgwWH#@e|~j)c~uPT$k%^Io2b_V$}O`8(7W zV^PlkR?&>E^r5JqvnuI>_;Ck)SjAXwqz@bW2IsK0%imA^p;-Om%@Fk{M&G7&waIsl zJQLmbM8Gfo=+)Y+#<2m|?~o(gD5pImjhs=WHADk-+3&*CLI2Cnc3=?RP5w91*Iv$y zlI&*fHiz|GxPA3fe7&cA2{;NN3erNAoZHe5xa_ za^?_YEuLs}k$+Ze=}N|hxv7$R^sV+!RDYBHix2p2rkS!>^{P%}ZLH3XS>%=QY0pkO z{St5b;7!S~YFAEv2lyxxZs}tO_$Z6vw*&lEfnTjNxz6dh+}45S9B2s^h*$K@;lBan z-v;M~_}TIU`YVU#Boj^Gv}u=TZQzKr#Mo=Xyn`;PGb;knXd!j1045K#UkUA7^fCK< zX!!r?OdVo0fh{|i?}5z%W{<0$xyoOqv9;yKUkBOyli9I?JO+;-dtMz=w+!8Jv+-B# zZ1Ae)$aDCweBtfX<@u(uv~ahvMz$b}{NH)ZLtf?~c-@|3*~7YA{qVki@>;c;_3U}f zLn+Kd=;M9$>f>$9fm{!QZy#g5iZR*Ax^)vcdx~c}85hn{TueSV-(KcL=xlkbIl0>Z zzXz9ZhhA1q(mtw`>PP>Kjs{G~KwBrQ1g`knxsbK9@)*b0&7!XWu;c-Y7g(6L`%LCr z!O{uehmZx0V~kyX;Srk75uBUgPp*0BUS8Q1(9=qhePzhLdhnPbI_JCnZw~IV$K(Vu z6n(I_cW~|s*}%beHdD{^_yg8ZLohOzK8#EY(?HPp_{4XX{3){TYtkg z!&pm(1eo_k(|Ycki$0`s(->RR(;uY0Ezn(rHK68T(R-YJ&j&Yk`^L_5@vX`XW;yaH zvEL!n@66xSDcVeY=W+Q@!dal}TzfxL9evNGkJI_xvwv_)w&jdxB7Qv+a^|&DF8e8%nEquqJKyYwM$lK+bfb?M+LHxMN?z1~+X`~5sx7URZ1~Q1+49S7 z|4jB)QYQN{mrb9E{V55bJ5M?T>#sQt>tedNCpi4=*PMCMu2XH%Pw51*r~U?ez5Sd% zbrBb;w#aR^lyjKZ96^tH2%jNwK;-_IPy9h<$4c-zo3U%#J$7CL^FTIvU1Z~G$FBf= zhI{XBTDJeK$8MIaFXH=0sh=`757~J50RF}P(~caDyg#_hLJqgTKR7p$A3RAH&TF8Z z6!73du6vP@TI1xscW%CC-`Do+e|m7vkKo}(cw__p)w?S5!g=<5c$E5)9fh0dGw1)= ze3J{mKttxw2bXV<-t>^m)*HJ2rOTd&fRe}U$;?3&Nr zto&o&jpIvwLVt_*N%L1bw6o~#!MVRz8=21B6UWhE#;(E9&%&W+H!W-2xYNc_`F3Q1 z+RJt5KM@a&9pmR!6r;~EhpO$RwB7xyV8JJ<{~Dh4#b}E@=Y|HCU#;ixWX@~!a|~m{ zf8n`3;^4W0y;=>(#O@~tcX`e-mP#&(7Au~_{!x$J>B%eR*>e@RYI|aE`8>YA_B6HL65I)!H4)(<`Y|hJfZv)n_g~q zv!45|VaGI05C6ix$_F#R`gkJssou$4FQh$hId)+7v|Y*EnZmUJn@1zCNgr(->z`Lb zUNDO}u^(HLt>4u+GQ5@fKs>pc`QUtXz}3tLJDCsmG9O6q@jy$fk)3;y`JMc?@8Rjk zre44HF?NjL!j6OQCq1@Lz&<;sVw88D9C*Ki^)R$qPn*iMsnP#~^UuSEb|rY&$U4F1=_T+odcZs2 zw$JPeWE*4XU%Zs9qPgxqempbpG&JWHomhNTldK7$o zFvq!n%}?Ip9NE0cV`Pl!H@JTs9Yy8;PWeMqj5W#$#=bOc07gdlnT#H&z-@rKi8Uey1LH+rbAyv zHbUDu>w+~#5_TLbXzNf7$g|2{nB;9hx8&?;$@kOMQ%60?U!wot!&Zp=y^k~W-Y_zn zAL9JR;}zeWzuP;hsm+!Bvh@B}8B2Ry;aSb8jnF}ev1?4QNpqUd;F`w z{+hAoTxj9-oZq!@eQ|7B=drQDo@0+2V>dkZFlY6kBb}XtOo4W~fFYByt33-nk-T8F zXCQaEo{cQuyAhtJ{&RunN&0mz{d&U)77V`68i0NsqF)iNYU_=h-)CiFYa28&tAacm zg1_sA)5Fi07XeEH@LR~MBxF?Uo$x&0N7&1Gx_%F`S2l+;W$$HfxCwb=!%ja0yZU3n zJF@Tfpwnd|^V+!Xo#W7w&hWL+FEkgPuKq)dI2VmM-%r1++}~#_cC~Mo5+h23+h7sTzD#Q99iS>cXtl;_H zTbVP_VY=B<=~s@O_rVLawReuAPklpp;a+-309@@Sug&Lw91L^b>6}0b_7%^#ra)KO z9&*yzw$Te2&j#pf5xi|7i{my;;YRdT$#}{iVd0N>82nYBZ=Vhq_km;aeo+r|H2WfD zw=wD(vPP3JhyERTian+q-*&@^!MS^=I}ZB_#<@~54ZTURRqWLaSCaFtsg$*)2U}+h ze&rC}@$nDl`M>Ar3laCGEcW0n4%A@_P@X%Fv9#43cQg4BvrY%|H)y9diFuU%cH@Ka zlMmk3P0)o*;PBshc>!ZKM{p>D9&#Y zXl?8mc-wsb*jVF;UB7rT;!Zm)-<nGrsdAvgF{=XoPu)9Zm*b|Z2y4vyzzBo*M_J5VD zfu1D4u+`*9ey!sFE6#VeZd>W-*}Y@o|9jAhM1L<+KBRez^9R6zr@ok+uig&5GY$_f z_t7W2FX&0#*h}qtsJ{&xht4chS+&!4XmEM!^^P2luR&k?9cRbVN98Ga7C0N3C&kOc zf%GU7Ta(RWmpObFaR;O4kh`Fca`8G|q7JQN52~Mxt9-raUbY`7lJtPRX3VCpPTt#f z#p<`$6v8p%V8Gk)`u2WCeSY@?^8|zD`d)DC`ODxg(X!Ta+sG@SX94Pxog^pzAG{Z? zR!jo|t8?;e3yyk7=nT4(^I*{D3?L^Rx&R*~kU*aJ*e1LU<{9ekn#rdg|?Rr@k znEZc}|FZc6xsU6?S|>@j^`paj)b|o&$WIo?oH!v+lQuzqvO4hZ5iTBg^yB#V9@Zq# zlAWhcXS~pUO=+oAI}VzraYnz_Ta z*Y={*#QQ&T|KiYo6ZS4zmJ#KwuU*jmWUqT@~0I3~~)+=h(T*Kgt0|7CNwbHu>9$ ziTd&EPq)0!c$QP{j~hncD;wWOY%#XkrmJ7L(zt0)vEZ9dpFGsbywk*5q{$3NAv6IoT>Qvf5MOU!p^5Xi&zlF?RgZ#0L>g`eIfeFi`m<>rpRM#WqUg- zP_$OMK-;&9qGloM9&mE|OM_eFKeHCwy4hU&-4~p9H}S3kJ0Uc*IG?fn5x#FH7SuTd zTycL%#|O7KeLdB0$GSe+#yJJ6e?<-f^o_)Ce}vr}--c~})|kA?I{xd}qbE~mV!2nb zRrhmduYoUoG`}=wAb;jL*oys(=Y+ike6@hDM4!0g4yUnZJWd?>%d9JbYgadNQ?gL} zq8x2gs)NxzJhzA~h(DLjae!~4Vg)%L&X^$o zv(Xq_D_LJLsW_a1&HPQD#=+TJIG6K4L<@C{O*1mK{PV@(252Gsb>4Upo`m? z^BV4eACl0Y)yEU)?9f96c3MvXw6ugZ6tvm|j8!rESxLLH%gR@z>ndoe6L%;3?i$TmUUw#@yR3Fv7>l)piB2 ztB&l|*l}pb&sYWAjykW7-Hu>*Ci5Aw zsJ6eTZ@dxin@tYRSvpS`S@=cGZ(dtJVh-|EWEuJARwmRm1 z)vr0`FwYuT8%U2;?u)8(ivk+=0J@giMaT2$%oLy24FTU)PuJ8v6R5Y>_)>mlHhraS z@LF9#eHQsi)b_QEiQQ&$tevQ%vxrXp^j&lsF{b1%5?&ZH=~0RAJjBph#LxMC2Dmb6 zCNYoLa6`AkvF5inuHD#A_pftsS!tB#+w`S19%Gw*6#AlG+4kfArQgYam^cQ0+Vayz zyiA?y6@Bu3J@JM0UEO!~aUXI)m-3E|pGl>aQF1=df0^Pt_NnBtxae*_53v41Ta4~U# zf$tyOIx`j*=;6CYaw3!x7x)&o+PC@kPs9a&$qd%K13jd<&ba7Lrcv`H;sSTZ;sWC_ zfll1`85iZ*F@a8=&)Z`>;rRer0Ezi zwjaJNjM4o8=>8ydZ~Ka&`whs|Qs}-AnvIa3PBN+#ADPd>r^Pza$2yT1pgx0lYAcVn zWG7MliSX>uc(zUBqJ8m<>?h(MTZW$v-=XJap9vo`j@{VRDu{9JX3j#tvE!s9&pXjM zWe=2Xe}%rC&RQN>)`x9%W{meM7NpHyWPWb8_^F%tmWwE#7kjseciqWT@&nRuD;>E| ziLYJwlJBz=+AX7Q(W~YJ&lIFtN<+sW^-n0jh!oOP=mZ%loPClj4-qufXt)$U5Kw!UXg zv+D8D`kXV%|IsV;{txcS>COJA>q)cH+r;u(4$?0nc0u z84-!`7avMnZ*u0cDML4+K8L%wrR18 zn2|TA<8*sAWOJ8pA-ns-qBGV%iTgDsqc=53PD6K^!xH_=k_#G3*?%oJHjj(&ExL@k z709y4MMiiJyx5B@)Oy6f#LksgJp&x+T^?(>ym`i4+2i%T_tU}M(zkSPA@e=!9hqNZ zEXwawz7^JmlT5d-{pQOHvgyBYZO`LJ3<{YOa=5FPxP~5F_&t$HW>wQ(`=Js{PcKx~3Z?BO<^jk0^*FD7j28coSAdl>F z@QW!ppa#|kbb-6bUzua7V*PSoKMx;lxPf(x;M#!gC16hCOen9f&x@W& ze|Df3D4$;~dO{U?LVT^&h}}FwENnY`Z%xgfAe|xz9EuH#(`@fw$(>RKoC#%CU=y-& z7UOMuJptV%5!d+tF?Z(iQI+T7KW8S(nF))Kgg}C3Q=9~q3J8osGf7-R;!*?Fx+XxE zNn*8tRwC6&NYx1e>?n$b{!D;dZzhUX1!b(aBv3U-wE|x4axD|kI+M7u&#*YZ?{m&f zGDNWT{&oL2pR>K|^FG`AywCf*Iph~!RSXTIE9o{(?~A~K_2F~jUk%`RJ?%8~_SBAF z5i_iaGnBW}FPWb>8Ea%Fx*liO-_-BNnJM~Sf!2mS(W3j4>GBENq(;A+u}#`6#1q0c zDPvl98aRt=3=ZlmzGw!_WnDmY$5h5UN^=%$1u_TI@a=5Iw^Qs9(l@=|^e?J^W?ItQ zDr0RD9cS>kd4XFm;}u}ML|zsN%>kS4#6?qexIy=Alq+=K#obC9gzo3@oI)E0)4kw^ ztW#Tsw?ePdCV7tSCuyV~Md*v2a>tH}u5YY;pf~7zkTEI1ceV)M*|p>qoDqBpS{0u~ z=V{8s*q95?u7a;Lz!QY8h>gN#8a-tx^xcAd^G1E>=FleTS10#p(JflfMxM||@uL%7 zZUNUtKMa6NL2$_pAL!TR@d1d!@>E@(HGaYQf2GTd@1yRwgT23*b~UqxYm3?Cs9WqY zjoig=ryP5Hyx&e2xOfqLyc&FK71@qWdl_wOMc?Y7FD_svet<35^6T&wu0z)v&)MVU zlwb8zL%))KH-JY~8<4;FKzgBjeT;eD3coBs{}cV_T=wMke8~Coz`FhAa=X~81LoG7 zxBeq}Y9G9}kT%F376)TO9^LNb7r3H}30}qf2hAZ(JPmCL88qZ2-2nb&==??J8y_3| zrCt1mhMs}H#El7azSE9LH8lF~j7csyr_Zlwr%o@>aict=3zk9OmY+bkKSJ+SwTkQu zjkU~yb{l*#3}3aw5B_RZHs*le+0nIB!5z_u1(!uWaJG1gllg2V<7gB939J?B%4S_b z@G<)m(t;b=*tq4rgml?IC5?Q;2d5Jr7_(D8-f7r9bND88#=buy-)+qCz=inl&GP*M z#s~beA!`btE%6&*Puo%*Nm6qGJ2ZY}Pb{pS^;vj&4)HzTfQv7rw#I zrUaw>Lgrm%rfE}T-EXiw^KQRx>NVg^0evo_kFr0j?5pIdlsB1k^XwXPg7|WX*M15O zHUr0a_!Y?_@aOH(GFX-~tathbBr z#4=?67TPa-Xc_(l!XLNXZ<_zhsDH5TU%~%I`A=F&W0o|ur4yQ$w&DM&`$$U}EzvT> zm;Hy)G9-=g1t~+)N*XCc`qKvervNvBq3lH~`VbzE&r#K3?xYvr_qE`p4p(rli#-Sh zc8mU4cN6sPByZ>xRdeIc@O<;$Ddl~_t2=B@1jep+#_Z)_Q)jSLnpIe zL1Vvyh5ZUv_A8`tuB^$kH$(C1cH1@RcQ$2m%$6HQ7Z%+^_0#sNRgQ}(Mg&_0WjJ+bIE=;wT8ww#|X`g10Cje5J!RVG_1h-*Jw z2_OG+Yh_k;+kz*SaIVBw;!7th;jx^RB=i0u%)f(_KS>?ivK{-kcHXmVMCVr;Kb)>M z{$skOu_|P0{GaK;@$7|qSlzG}->pchg3ssAcK1GG_|(l*v#jIQF%vo0O5cweyhXM1 zZ6tc#BF+*ViM%L`!Lg=E9f7a5)t0Um%G^rQ2FO>lmGyPTQP!?%_NgO8Hmj% zt_YbbdX}WC;O<4|6CFTsT)xfWu8A!bp6rH>g5#sMBi3ny!v>xM;}su{_WGE{WY3p> zFl3U0a$|M-X?vl}9Yw$6S=uRbUB*CchGJJJMz5;61iB|5Yj|=d#}u*c$g`xgnUtv_ zCj#&h!TT=S7`TPCd&a`Tyxyh`n<(}w;+z0iY=C^TsEQ#&fMXDt>h;n;Y%p?$mTxJ2 zi`gQB6R<(irea`A?Ej56!Bb#ej7-q+RN5x<2yn{+o@w|NxQG*@uQ$idZ?-48r=Hrb zvnGn3D&yVE9C<0YH}kw9YuJM|tzZUoY<1X3t$wJEE3q-pr5`#U{Vn`n#w|KN)jT)L zc$6x>OtUgo+PedI#c-r24o4LE4vuuo`~*GF%~*urwzN3dvNp59gukVn*=r*PL~gUP zAxCYPA#^XcQa5`?f0K$|F1{xc_cRZiTsJ(RAa}@`KFY)YzAw0{HM5uJY@W+#2e>p< z_sve`d~onlYcj8tx&=4=@WxrHwj=((!hb1K&geSI|C5U=;Faa@%$e}cGI;1!#Mr-* zILc;y4^{SNV~$pk+qv3`9m9$pBgXT4SVtB^Guw?`E7C!!3$#l@TpSd8#LC* z8ndL2(PT=LCWWqrhPvO6T%wOHviunKpBK@~PPZ->TU(O-mb!fqUhQCfg`dpf-VMxc z;8!>H9u1qC?5kQUumYytqW8XS=#%>MP55`PE}g)-wCGtf7DC5jFM5l#Rs6p^`j0WX zUdE`A^2JV?k?36$WItVRY%ixdXUg`utmlBEm!R9+!`;i=OXiU({KU&YSsNw}c%jVO zC$YxX&KlHquAsS~Q0RUKb_Y#WQe-_te03yFdkz1EE&{Yk*YDr1ii-=vzIzp$qVx+L zqmll;6x?PL>k*()!7JW;%sfHr#pZoe6qQFu1p;a+w68^5+>%?!?#93DO zJ4s%-zXtxuvmRRjnC7s*-H+ZTeo&EZa;|wI_>=@*C4*lYc*dFLi;>GsCQmFT$t1?m z35;Y-rk|dHmFNz-yoHu(8OOo$HdnsWXW_+PvMy&CR=h}PTE_nfu9A5Ic*>gTK)!19D%b1IoW{OQmXt;}cRVQO-GsBDF zyVHf|0^847Zx!9qjb0qDJ4#={y^Ufs+%7t^=#`0vKMcWrDlL>-BlyqQwgIbV+FEqB z%;9I=C-P|wX)fb(vad~OZarz*p#clLa1Z$Y&V%kuXD>2$xN^xN%34aBBBr5rEf@cq z^LZS;UqBz*slS)~TjjK&mgfrE;ELjWAKzt;QHMTK%lE42_gu;vLA@=7>`%o$iQU52 zO8q~?t|oGJ`H`#i^R1b)-1i^sYizKT+$UpxHDi4ZW4)ZQ-e6udMf?t>56yY>5j(_j z(sgN%B(ZdQky}gODzTSmVDD%Bq}-y;sv?b=A@)gl zH@qnqdJ|pR0>4_yded9j0^RVOz*%x`K`QI*!#eh}_LY1nvTFZuhwpQ6Q^ra76t*It zz_6V4I@&x^+H%Ui!Bwr9q6bU)`uNj+S<@&`M_BFXq5AkDb9Ej--UQ+VG-6Yb^as(E zPgErooIvg?dYmQV25xJbE-{v@o`y=785upIne!_wg^|n9Th}P{-oh&i0`o*xTI-f@ zKG1d(emCHAitvE*Qr4VF25Y1d8LXL!TSFf0yl2)7Ydm|i?DMUG-_}})!-Vc8+X65t&9N2&Ph?&P4SdXc|N2;t58woTc z`?>>Pu|ER6ul=itZo64bS<=>}v9_K>mj8{lmDA9A?N<5%+~%Mkm9y5=YPQz>rfF$r z#AK~Ix@2kQN%Vy*+E786Eek5_F7U|#?FwC9$v5GLqEo%a_zFH0u&yt@5u#Jcx@j5w z2YxnnEpsx_nM9x3*nmC-J_!%}DP_oWrfS_0pXM3Tv?H(Noc1H63FiAaGg*bVo8a+g zczpsqpEKSUCn5V(PsDuI&JeNEj^JM;F}mWhJZ%nRVJtOH-nB{5=Nh>H;;YrzWqN8qbLZ!`5Znz0jSvL-ArZp7x` zhIe=?Mc-F^qTA^D#UTDsH?G8o%ojbVPP;!PD~t6-=Avo)fQKQ|(uo0*#yWonF+fIl zd`BgI9_waXNhAIP8azEVXB3=xjqfLjA=f~B!-nL94XvOj7x^^VbT<3h$=;4HGZ%{(h4dWPwK z%No(IOgxLH{)Y z;vm5A57w)mhUCj>x3VEXA2Nw+cdUi@fsd#seND4A|5e(UZ15AK-pr0+v_Z;S$X)U% zjneka{;m;leRJ+ z>u{OJ)O|d)H8X^bB%k$$X5t`Aoz}I|Zs46{v^!hQ-i5ax&nopj3EZ;HS#^2z(S4EV zw^?;_;cKm;Ukm+V1CjGHg$9LIPJp+)=sxe#?p}Ck5FZa2_p$WvIQ|hm^g$!-T$i>+ z_nSpujn8+8v5Mu(BhOzLvsfNeT%M1SbFsQUiOcgba!=&gXnd=P?@QY`=ajYwX;YB8 zPr&nqUSyAf)n#4NNBiq&Lp5>Ca=A>Zx9Nd!e z$W}7@WPU2=t1lzZ$9zA^KGSvJZTwhpmQZC9;~@B#t&Hw-kf(w4k3(ZWhqipCiPq*}m z9$RhJQs7_2h`bV>b_Klca(G-RysiX2wwSZo-n>I(MkZ|#Kf%GgPu8~s;EbQUWk^XV z4VYQKQ(RDSUfPTZGY~mPO+(ka^S|#a5I?KWl9h1aGE+g| zG_e;obbFSWWy^K#Scs7%=&m{Mq{1>_JWE^D9p`EvJjRkq1-bMT+{1T7{ z_3?eBPx`#L1Rm*vpYQ3ADsW5#p6S4K6!6VJW{f80N|HzLAcVX)j=Ye$llbr=E8dO8UuidIhW6$w z;j0yg@4~AczKgyr_O@my`#;J^ERcO$-SF(0_%6#lQr4Zju`g=-tEOc)V?V;~^x0*K z?->3*PV}4B>y-UVCb?Jpk(Vm-r6b(47|+k)Q<^er2Y$5s6UG$3+C`u79o>c8-}NcH z zMsQ&GRrVC@04{VwnTKfbCHX%GStN55%kt8Y^)}-Svr8rZXd?4X;*X9d{wVW~O~;_U z5cM|?!$xCOMzoqOeL>>gteMnIo3nOco1)!X-&Fc~5%c>)e^!Yl{)-nQ*x^jymstJj zjM*6U=uBk(&<=rfaz*(7UJ5m$;gG7sH1-w|G;4z0*W z9?3ou;GPyJQ9NTgJ6d8eA7os{>o7^}aGQ%?Ji0!`>*q{mDXUA(YLU2SFY~|Etk|V2 zftmCrthXnvJt7*e*B(yExT6VoBzVi=3lSQ zFt1$QzVNTp0-QD4#yQ3N9x$&I+n(Tq@adzp-D%E98HL|oJNAPh@qRM%|=(S(_-lgmmo>aqJx_~)yAGS(Yfii!b z`b4JpO0nH4^OMyzneqJ%(cj?s3i|Y<-lr`o1qHzJSK5rTJVAJPFhZPuGrSFWuBX5L zEJs*$f{z)WP#*Cp;$cTQIh0cjYy`f0$uoVex-tN7(sC6~B$7<*c_rN5bkj7-oEo4{ z?g`4&2>$od&qLX#j`8`7v4geLc4UT${jBx)=FGugUEsbCIDP~^NSw9KmDGVQvR2v$ ze(acwf6l=VrnxPlsqF{)cpf`g4^2f6^#kV_FLTC$!1p_fj(=OsqeGqF0iW1&ApAq(&Gl3er-`wdNZLU5 zHhcSbLQ|HgN?IFd8czjZ1!nFeoX6EPJ2Z)R9d_9Gul=hLzuD4O>;=De6#Z4%ufZ$k zNIZueiRS=q5?iM6zm0UVP74kGt{``sCkGp|LcHM|?938xxL|)t$gOC@sz!4j|%-seLtoyX^-$Z$A#Fv@U=%a z+BN*s;Nc$aEbxu|a`vzECvrdgHYTyJhgx=(+*rifB9;a_zj;Hu8JMF zqT2)4@$^;F^}|)QpAoLIJ}Yq51g_K*a3y*-#^742dVc*ixEk^L4Y-c(I4cgW)%pJ# zu3mxb_^-p&Pu+uI?Pr_^e+ykj8^Osm_=L!nPGWD$m<_@#1W%2aj}4WtzGtM-1TQ$JPC>VB zJJ2~@+21f-r-l3DXyJS%9O0Z1ofh&{&jnu_vo-n|3o%+)PuVeA@FSPUeghrMknuYI z>*H{`v9Yj*Q}vuS6?ghjfn6`B@#drwD+@7}+a_(bl;%8{{>vXHfpF1^N( zwbk$)@A|vte40M%#SN=-RL(@j{$coPrt9cZ@^w8gc zX-xloWDf6!O-#|}BSp+d^7Q#ghCUw&(I)g^v5#a#b>`6>@6I%0NL3T#%X_TEr~Sh5n9Mi)W3U%sd$iR%Ln{3DNIveG_w*2N@B{w;UH%XCMt1Q3&s@8?J|Nv5((RLU z!@QBd^KB2`J|TUdq#y1Le!>5P^8YMvWFP-O;p*f1f^O`wRxNc2CVy1FZb1ztG>SBCE-s~$ulwC+n2#}KF=>I z;qkeNp2%?Wq?*Fj=v8<8fi*aEE7qC9kqo|Po5IQ*;6E2%oP6HTr|h{&p7C>%JawGs zRKa;py%S2peUnPUk@5VW!2gTJczZA7TM=!0F4Nmv%KaadcNzbSOyS;^%YA*5D(!V6 zlkK%zRbOphvS)m5vZoq5QQs`u#@Q})pUYYxb(WH+g6lf2S+u2|HXdVa@8jPse7lkQ z_L9Gc{NT-8&L=7vs%~(gC-yc`?>Q=G>2S~E-okwxFsdS76W7CBE#xn;cnUZ(Dre`` z>1uQ32J`>xoZ;ZysJX*ZH_Um0^P!$E2|LTNNxsDWTJEoKucVyk$^R19E2eN@;T67M zUwM7a&58BS4wFyL>T=#p+HKr#C+!x}Z8Pa-V>qEXp^fi(Pp&FMwnXxwL1<%q=_Z?vUx)>ht{#-|R6uL!x z`cd$NQ_4(do(OO1B3_)#F-2CroHqp8AJwtD?5kDYk);N$Cny)=H*!p&{_Sz}j;`Qw zUZAX;0-YZ*r$Fc5H_k?*KRx^_CyurFic7!cz33ukkvQh3w7 z@Qn!fzreSpJ+b!x4gMb~6~DM-jT-kKN9Wg~Ql^yU}BgUT5gkc{T9PC=Td$+bY>3RrG!@I#L#Tv&)=Ww|%TK`y=Ka zl2>of*+zSMNP7smB6gg8G6zTRm%O4I<&ftH{X2?mjg%_B0QHH_l=w7;$ai916mA6r zee(~&jw3J_8ZDo+CnP;{e6jV2ZT};5XsIJ$8kMqyy*W0sl9DC%QOOr;7q+LplN8^p zr8jSuc6~(uXzjcbU*`RmtiV;o{WfcL0)x5e8oEq9U-2ED1bn~+ zo2pC^-w(l4NmB+ri=S5PzcX51BIBmF?Yse44ZtWvhta8RmvteDNuQ!s~_-TIhh zF;3|nC+0bPx0=VMT*kNy>}2e*mCXJH?a{{%Jk`q_Gf=0D1vWi{E}}FbYfz%M>Em>Z z!#9Q)zA|2G7_YClDXSlz=-HD`&@QQ4@UPXJUMKOis$%1Cn(~Dwd{dcHUWE3=mpPZc zBm!%96qZRbSmqA&D;|~~(+BB`4li)|7-J}FBu9az{tX^*I@oPze7{CBz$Tt%1lMA? z8KW1WAN3Q7Gccwj@JVTbz(VYWLGS_lM%_GMB6jRiz&I#$#dmba5rHZN{x+tr3%=gO z|9HBKl>sjLV&#Z^H-?)v=FEz*^DS8#`J~=(zQZTHUY@0I@-A&SQcC^_U#}C|o!xD+ z6qY3`61z66T^NNWZQBc95I6`7rElnk%;`s`umCtBLs3t`iNmBf=E%ZR()w{#Q__eV zl37|ROzQ1e!iT1#52+5z@y|9c_8mC zJRc{G*hwPr8Tl@C9;V+q9B0IE-ryPf_~?FS3b0W-dVxb3a1i_`V0}w)MA}-0Z_i=i zcVmop;$)PH?9t1P(v!h=gmidz?TvKA)33yLy!;s_JkGX4u+;M=caSK(X--G4#+XA3xwr568%a@GNB z@n3K;hZEb2-p*XZA6;~t0tfLLXlEg9mO91H$X0yzW*wfz#`yP-MdHA_`y6E?>o|qV z+0`$dTzoaUSp_;;Il9_Rbha|)`d2a6H~esIoP)fHIXSxi8P8=n5_i^{%*)GI3oKwQ zFay{gEmC~nG*54k7{d6MSuE@a8OlEWG}iG~;7@|iEBj4o zNZU!fDDPKw4_@lIBl@puDCZ!-e@RpS?68L`IoqRr2tHib;5W5Q9a_$NVM;1~S(5MZ zOFdV|^2r_#c&+4HHuIO2__Ne?61!7RxAs!cvRFFeC$9%)nhO56EUmO&A+eLKbt>P= z#D{0orJi!q1~_xu#@^SN=RT;fy-LjG`=ATmR|j0LMwW=rj>I~2Updo`9b>Zi2CscL zvZ{@ED(%c|vhk-1;luPkuy?aIGp6TuyK+ZE!nA@&jbHqh&o@E6RFhS1lcWxtd&k?PuzVT18X;ade` zS@+Wc{Lt~~S_3Z>+N1xkztq#0p@j8+tg#EuG_udXI|(}%aEYB+wEttqg8d^}6fW(| zmjW4zPiQK!NAWx)?aq(3oA&Rc?H|oD+Am{?j9D4LMl9{$BJ(@^E56Ck?4UUr8I#1k zV!+od1dj+KcaFjDkHaGr_BgD**Ss>%%vnsS4)4dA4)2@H$=Xd0do%Zg$@sSwfBIvP()mG5>OW`ZvmXq-N+!;!=rU?j+(|$e83J*F|=IMBNVN z{^&r4JrTN)bKJcX#ZG8RDO593DlG43_L{8~wFRTHWS_U4wJQs9W(z)$?Ni*VS7JNd zB>QgI+b!$#@#X(X%EvyDN%>XDhP`r&DYI|$#MP{=$$3_zJ6>CW&5-zH+Di?YnksXg zjFdv^44OF$?7UG~;-78G zTTB^!FHc+*?fuTxl%)oHPr~R-voQ7^p-SwqnjFPgSt*DdFrK^M5O& z@4D50uX6IxWeH*1oO3*Z6eVnHJjWBcDj^&xO$Y~y62b>762ks@d3O5k>n=6?cw`

73|9FH+d$>+uMZC$tTI)-fi-(@ox6?UZ;e^Hzs;}*Q*|4 zi}7ss_L}*AOM-V1WmxdvY$g_hmA0jD$vW4v`g2CMUXLGBYU|BQ;R7NYXR^oOEokq8 zT=oZCqg=mbj52>KI53>_@~xHqPoE*fH{g?xY@2;2@pguxV@`C=(}#{4%exnv zUNP?N%;!!dPkDBRvyjhtt6Bx1HP`4fV<-DaP zgpMLQn;s)te69}8 zw8uvM{%s29EH{kYfse6#YhawUMAn?CCl@-EcFLMg0N*QtHFkoqAAWQLcGMi~P2zKu zZDAiP?Uwwa&-Ji}S=I~ny^mD`<3riA<$KR?hi^6c1>Xhlb$sWHsH4;=^%d0L8wyfi zA@%iA-%Q3r_Ssb9Ut7ffE!jg+fqqs2?pC1J)iD+>#-fY4X9?q~f!FSj*$WO%_x{8Y zmUh&E*Dmn754^5H*2tb8=SRemrN1e_{%q37{f@Qj^{sQ1`4^IxGx0Jx6R&b;PTK9STmx}zBQ?d6)s z^+&GXay`VA3~np{rfr9krv|}&!DWe+CU~5|9_<)y9TMGEXp1?2JZl2$=G*9IxYrT8-13Tyy&vw(e*W<5cCq`edrfD?{mIA z5D!7cZWiNxKN1`OH_t@dx5EpBM*Yx643D~i&0zZWl1}(#Ja3RWfdyC>K|>O!TIfu8 zh7*4egVwQ;vByQ~K4#7=EUdq8fd7%FdQW8b&FF#H>-vQM9HWm%Xs__L!K z&S`psa{<9)4ITJj;coSd_Vv<3j*!cg-gjuYBfOS5c~9x( zVL$p_>9Z$S9bCxx^kL)T41@swUb4TwTKF{j;&1Qn_u=?eybp))7xXiH2ChIyy&PS& z6dkSvU9K3r&Kr7+V$o5v)%3nLWc?g;Me*Mho$K{S)s=pH5!>*GsyU6mc!0J$`}M`t zJ}F1^MTvo}DZl~#C-$aTA4bOYL6b8DhGV;o?o(O0SKaT2rtUGNwX`qzaGG0u>kj-~ z_n#A3x0!AY4UsC#+yUQeA`cbI&7jaX=G3EfAKAge`lVX5%>69hgam} zsIJ_WsoclRF7_U}@8_XuB9nRzy~Itw-N+1`PFT}DvtE+c@#%ErHF8#g-<)ZF;|4zh zQ939DAI6iGGxRElAh$N4Z{CS7)jN%8x;*{@IT%3>et{eeM&+Q!{7~p)ICLv(19JZo zIhZ|Hng6rgcQemoKW4&e&i#fTpEcnsdp2j;sKE7m$i8=>_x-{@Id5hwc*EXPeCDwi zBje_fW*0D%Z+`e{_Gn}sa!~qGWIC&_ftWaw&cXdlWMMXCn5g50^WM$ei7XucUB_d8 zuD@$@tet^lkxepg5qQ3z`O*k*LFN%b;A;ccR^jo$(+`iY!nW4~t{0F-bgo98-(ub+ zI5*y+pP$>em$v9KV(6$ANmm#ob|mw}LV0FjH< zjG-Q9{nv)imlqj`4QPJx0VBqY*cv6iPR=)s$8>OAtxQ%v6 zJ1MUSS=$ACQ{n%Q;hW%O&HRjVmUO~zzDdrWTx_)E3EGbA`da&{8CP&(?Yh{c+lBYicineh{IBSP#3JdTZ57m+%^2rM969!z7W1tQ zAA4+Q8|Of0yIxgSIy8y<$oas$zr%ZHvhm)?o|CuafAn4D{~Pi@iF5Vfa{@ajdPp;V zV}gq|bblG&Gqp{NYuj4dajvxOG;P{I9c`>J#oFZMJtz7uZThYJXO1TSrA=$)KksgA z-1>O#ci7v}JsLQ7T=D`0iP>QUGhj$mS ziho~A-RR}li%-uw$}Lcys;t46$97(Krt^66RH#TlNZ zoXs}A#yPFrlG5T(v)>PZ6VBTeyR&0OrSpzw7qC9yb3VJQ(s}1|3!D>`&8~A4dpvK@ z;BAtgb*gu@8JBq!dH4454vFlD3vrhPc==IK1 z;sJ~e>2WaNd&-ce>8?B_e1zvcE0RLh=P-U(;`2GK0hn7iR4+-{;Cf35yWZ#9oeeXb zdo|ia8hksQPg^{$H!ODSP+=E7nujT)ZAHuU>ZeJEpK0}TQpPg;gqL%z;Cf_w+Y-}; zwk75b=yZ%9<=}I8_poF;I?qUF682*UxB`y24k(-XR_VM}vAZtiS_l4J%J&jA)Xevq z)rwv4IA?z8$?Co82}?EK7MK>f9yM)tz6`E`bIgV9?hBV!x-WWu0k&hGz*EP&53dSC zbK%G&aQ*30_9R&PL?7{+J7)x$tFd++Rvv7e?mS1)?O8o$#Z!;26R7GmeD0ZF{Pkz% zl~ZZQy?Ltr-Y2A8i6Ph4gplj!Np{x~lU>K#duZF%#1op8e*9SR6;pq1T9Lk9Fq{0Y zhfMZ{1Z8-2iS@+2FPiKwlgWPXF6$zJfyR8o`J&>xcbCa7bSZu5{&aa|*>le>pzXf) z3!tST8x{m6DVrnN^kdxBXYR*bB`kWV^h4G#uN>5ur)i5V3NIb!<`;QHFA*4N7V1tV zw%oTU|Dr+Vub}*&mn3@HJl^SKJ0NvZp4rM&D5hn+qZLv2OB&J+V(f; zJnBnLRdkviOxrq6)R&(04_8hi5A0ILHBv@Kw2V2FF}%Ux5$r4K=MOvK5AVVs?uOTL zuBLK7W#m&vzG^QM+-Ozp-B&zV$y#-Nq(Zd|j#n4lc`H7zCmBPJsfSo0+f>%{jkT(u zUy6;awaZ?1F8kghopv|(d-61o^s$@g>UD`89ak0A({UGgicdTESm|7?`dkZ=gjOq^ z_kri-=K7gD*Q`^0tZR&{onrFA7wp!YhRWI{W@3kO$3NSJ@454s>T_C??XF|ydiDnR zoTD_mYoum(W^!%ixx`|3-pu#=Og^uaInio&;on_zrNv`Y*%!Oi7$)C)*Yzo^TyYs2AAe{HSY& z+;eNXX0);QLI2iFS)oPltFboA^q5zz&IK8 z7(9eM#L&0&p&fay^N^=!ow*-Ro!XDH2i0*ub-1JD2W-uiWzRs{QJ${-dY~?Yr~mu) zm%D-Ztmgg|fQg<=)F4r+=eHgYf>d%#TWbNtN zpztq=a=&pcyeUQBi{_;L@oV7O=VEt~c^^8CAHIO!&nm%pC%TL5y&K%$wO3-d#mE1f z?6!xRu+>U=`2YXvr1+#8I*`F%p~o=$r^_kOwan#nVS@hztfwa6YF z9zRa9yO1BXN01>=xlzOans?BfkRJiFX7AmSXm{SRtTOPDiS{Jg1M?C+>2nlMZ+4Qs z*6FTf{k}f3Bhfypi&)7nz71JXX&u(KptnW!W#p=!;2yqd(QoTj-{?lZ6;Y>#U!Ik8 zzgK-@kfp&^zTNR)Wo;tgcB{V3Vq!8xka5ljiDlHbAke4!l=Ozm@6!j_JK|4PSp!hR zW7|xg6T{VT5A6#66dj^kVkxC}{FCSEZSu^XS9#87-Ilc~zdSd|^XQHb<$0exkLmcE zJeQhy&g}Sr=SY#oeuDBNsa#oH8C?4`dv{J_WpI6>$F_g?q5!&M;4^f`Stg&%!vfDF z)JJZy2;4jp#{^y)7FixFO0a9!^8X-X+Gn;0_L+4U23r&CmU4GxFdrCJB%g~|>UDTlCEB%eRU&jVz6nI=O}D`BhOLT{F>)5`J95+&_mhh==q5aL_x~h>_h)o?sDi{@5d+%c^FxK|9%`O;zo$Y(@7%H$zu-PgNc>A8YfSdvmk=j~I)cP| znYqGUS+L>R1qDA`^LT0EU9Y6XY4V2o7H$&%i)%xImuiv*7SJd8S`I_;u&YGwqf=JUQ}A`-Dc$ z=J^P9mVtADJyrupOB3rOEmlKMD`Fp9499;T#qq_7_0`N*1;4g}6&+afl)#H* zP3||p5qqcRS^3WR*DcsSYopLYgvbW0} z9u|;$)$4ZK5blvb+MRN5{iD5I?nQsJ2lVf6+HFJS`73{x66Ic8}@+QU2oWJ z!?q2#@%v9H`c~*FEkmvahS;$$aWC38@n1Z(7huvTlC(7IT@b9 z9g(xf*E<)*Y&?27*D_ySLA-A0um)M_JdkX^PUPx!*qwp1&Y#)eEo+C_@(!O4?Be}= zM>tE~gvb8tKPq`y+f38b0zYDr_sjoTO8pb@-Fa{ck0sQ9k@E> zz0?s-kavNvyidgrDeu5JaENzwgcEo0F7TFj_B({+owhjTowgj3cY(jWuXlvMly}A< zaFF*^#4VC{84r1HK{u6m#>FY`E=TwyJw4+i??+kV(9^$Z4;LTI6?2)c_y@>3;C~k}%K*wSHKgM@|Hfipb^bcS=!47f>@As_Zojld+RF7+kYS-s) z-HDz{slzGdGyl4qaTZ=S12p8 zErD2UXR%f_g7veJ#Gy>UU$WoEG@vszogEV0=C|lIQg$orm^s7CZ z-Nf0U_*P24m`et^2VLkre_CGIeD!k+g6R1(;qL{@pIs1q342Eoa~XKP?N7^hxvzfi zK;Xi+54eYXaX@?rBkL)X`~moT4gA16v~5A~GGehxnXSMRyGirglsQZB75sF0rNqsT z5W9X8{|^h_-ms!_`-K}81nU%E^Dy%KU8xtI9RN;&6z?d0BU&@RDtIElGIE zHhY=ep*^?Up}B6kLu*h$D;TFr?4&Y| zk8*$RRGahQHQY$4%!iEFvTeyTyhUfRzlpgb{N;DKXC-^uwkbQ8(5K5U{c4pT-}tqR zbx}@SnsbdbCfa-%X&kXMW8>148EM4tYa(gdv}ie_mHoNQx%9fGoAtW3lBR$(O3FZ* z{`NSHawJWTl*2w{y*)$X>iVUTW-e(m;@fj>T$+_enmpP(j5H0g_Qd!7H6zUu%1I=R zpBUBMJe7~_5|v=w{* zkFO%D%6S(aFW&^$5=j$jiSoHM=)MmA-3Z=EnpYHiWV}j@1Mp#zYCrriSCh%^z|QP1 zwVoh`f<3a^WIz0qB>Ul1%^u0pbX_7a-sFpHY`@KSqE2 z^y8(3MbdV?y+4t;r$IZ!jq@Eon9RJzpCB-C<}Ej$)5Ijp2=u?`L}EPQ}JytSHi2~+x~Q!*ZH*O zbze){iS5$g_QK8??p@^}qyCCecV%{{bIDWFAKx2o!`yGzhWq2&zh?2il<5J?xs$*S&x`7YwR%#kHZw*G22R z@7vW`8eivqO1LP#&Zn=XpToTF2dMLbfjS?@hJ5c5Z~?shInU0AzFnQ;;_JkooEKl` zxbM-=v%K!5)VVaS&bidNB3dWU&PTspotF4IA63GM@pV3ZJ^dWvbw5F!PsG)^l{!~P z>*U$_)VHhi)0#LM#I_mWeSijM(a%(`dmVMI8>rJ=UhE?tjqh>nS@(8C>*m?{+`p)M ztb{3{m?yZLU%lQ>@4qMC*XcbGdQY_9 zV@mQkZ-1jd?e9q2Fc&&}D~>nZo@8$|CE18=Z`@+(9IP>BiC2S$S(BtTo_r4$P zC(q8m{fmAM;&YG1_w#Q`_>uU2K7BLen(1}_gMR*Fpr7uo#lGf|SJ`W1ooK7-yN9*o zC%BvsFh`E^x%*Go?+23Yt$E4JXGqWG{Kr80`KL?&57IB8j=jmAX0BYdctgI$v4K5( z8&p-@@Hn=Ddx`s-A#}uZHSvGtoSbYWqpuU5uC%E;A3>(b`&xYRy5Ya9{~`ypjvM0p zNf}OJA-T!#T zgPwVob*674YbMf`ep$mB)J5@aqg;uvc(%(LYzNM4Tb`|8~$Ub@e=}8Vk;9JM|_Iq z{uigLpNj9**V7YQi~Z?le4P`}krL6BIP+j}GUxEAp49isyy9c+z?Vqoo=;x%NabkO z#bvEc%KLGfvNAi3bqV&aEX3v~Y4Nw4&Dk1u8*zhc@duK9XA(2Gi2c`d@b9moee%ur z!egs7_F`wNqwCh<%Ov|84x&HYK-rJ|vy3>!iYGnS*aLVtS;2>UOy64cVPLRJ=DVUF ziET-2Uw7BC?t>q=CVL8oD`BH8WBM9sH|w{az3bVV+ho{)#RttcSy?H*lHHt{E&7JS z-uKsmk?e0(8YEq6$1S`EG92Lux&!-4!XKF6W6V)#bGQs2Cv+*kamEtuevEcYdmdX? z<_*#gtD3r_{AB54eFxPS>uz}bKsm7xs_=bS3v9#}UG$iA(i`nG;?mawvzfrG4w$j_ zI=}XXrK_ueSr0I)r5^b<6PRIZWv|5Ox?H}qzc}qB>K6ZD@ja2UyLnGlTQg&OAMFX~ z%=G8v;;R_jD;eV}80*XNYcKUO_WE4#o7ZX1-+7q*xw0o$;!a6Cc!ATgrd$teH^1v& zyJ^Y{*@s+ZR}|umr8dm?0sltlYuIq}fahq=dC=oj?&_RITSuwY-j~#l8~dNrI<~%OeBaBrYW69oiC?&+%g^&j%$x>rP4+7U=&v3pi+?&@>Sd*O z{Eh$mKRYxV_wRM7gL8J|TlI5aty+nnw>)?AT{)RNRi0nt*&%y?dG_&a*oV{(d4Hbw z!&hA%K5S7o-3)$|;UgFGn{%@#P}(_Zxbp78l6+4k|BqCVmh{_&hOrrF{4Y3pCfV5f9^3W z`W~ss1jcQALO4F3JAEMEZ1NqRaC!J1aH0KF-nps0pGO*TQN&m0k)OaLsb@~Ko=xQQ z1Cs#%vXyj0E-5KHpL|^8Qo1ge0s>#~N95A;alO4{Yqv`vWZ<3U=M1Bno)R!GFlm%PGI_;DP?E(#P3bhB9X~{;#~Id?+>GyiSSoNVfZ#k4v5RWX9T> z;<-z2Yl^3YIwh@x`}>sVWKQMAS6kY5BXe`z&pHl=%dE;2_J0?~@Z>@AD757s&ghHL zK?C1@$hbJjC;pr_$a5evxq8D_y02#}@76sV^miGfnQ`f_=2_@OY{#)OF6UcLAO54y zoLa`Z4+qMa7FWh)JjZbOt!E6Hyy3IRrhDSboFM6YBa_?OPc75*_kl9=|N9f*RZ?cA@Y&yjH>rn99t^Z5wD1+7xK|@+&r#(WP zVa|h^E&FH7k>76Qp^ZH%Pe8*G2THlEq4IA(HrB^78u0CEW3Ml~D3La`BCDT7R=|`V0JZnl1o+VTF@B^xN7Nl zIWQBx+<>lOBTY8{FNAl9AE+K5*DSWN0e$eqU4PW=W4UV;&UzHSz&dV*va&hP;dKK) zD|L(qXUf&_p-o$@9?qc+AF4ucuR>2{{08Z#o80|<9neFk&{yoddK_H;J|hmUe>lF( z!y0A;@)?&rZ1OI%*WdIy>2>Z8PWLyxF+EVk|L@`(EAemhNf$}-vOWR)OYBbk zIXjnhlh!?*IApqgBofiQ#61W(iFNBAPaVX&4g9BJzX)LK7W)Np#wdp~IHvM{O2dpu zgqREGUrW5Bq0AG9@q8Wk;k?VUF+Yz)hV#t4dE5x@mE1@2|12(fH|Dm)nbzmFk%;_D z_15IE-U~c?D~f&MXVE+125;>}>E7O#9O1q#%n6x`S|w(!E8UAv39)KBXVm?HvrzEc zMF#{X8D9T9+Blasx<-3zm`_?+_b6vhS;$;6IQ}MYP2m`C@R^&vH65AWAZund%=;oc zq<@*-$R1)WY%BKlre0;QW$stQ+^;4t*<;O3_BdEK@BM-{d`^9;z+kM`zZn1A`|jdes0xqxQY%i;_W}ieqK4Z`%{v1RGk48Tk(dO# zM5iQfQsvNBv77HR?d;*ZeA|Za-lX4swdzPdw8tfUYx^kC4ZHL+;>0H^xNW<=h;?TL z{j8TXWxutsr<@U*ah;cCuesk01esb>Fzz&OBK zo~(NW9!p##v1`j-H)4ZXh^;UE3ZoaxojzCQzl2R5S>De14x$6gchP~3{wtm_6BTqP z{-FcM|GS)g@?Oflhd9}y3mpC-JS1Q7Okr&0-=XKxlY!x#KH{Y>d~cd=$KY(OiXVwS zru)a8qK~cB_xTPxEl%kD`bo532NM_l2mR9FGb?dXIsNFZQtjgZYQ<01dNJ#xtCH+N z(&_oIp+9+=^o5ddv1;!lUkdqBMqq!B=IbR-3wdJj)a!alsSo~MsSl7x(nVIO_9b!U z&ZN8`?FepDUIf-*OD=1)lpV-V+8kgECC|@eWzv_0Jm&)orPJ#93H}4$6pt>3`^5j+ zm3=?*&7y?wA!hTY+V6OdHYxgE+daes%0<`jWbv{ae0yRG*){T%4o#LL2lDqZx@kE3w1wjN*y-w3PF# zY4|xXH}$6=2eOdWC+MGo{Mh3dIOAx13FoM+($D)5Tq`1qJ#_G2sP;C*oVF7`F0kzV|y{9Hok(q?$Vqtb3iv`&F# zP;bxUac5bic4S8X3!Y1z9^StxzS=j=?YE7B-z9f^oR#F=mzv=9PXHdQ?dy6D=No9~ z5pqshPfDt{inTr2^Jim>g7>egWN&PFf#$IHX0T4ETFZGa?7l*o%z1m2?abAQwHz*n zf8>5ThJGe^wxUM~-@APjbh^#7Q*c-C?LnbdPR^+;^b$Rn)d zgU`XzMB<^NGhrXX7r`j^8OnWLIVm(%c*QSupS&P`aRul)!BXN7;hPu4|4+|%8GIH$ zy#mtWGvPaY-7N2@`yG$YHI3@40WNK(Q7y&b@_#WN%-cCnIDN-)^XS3=cq=$6HXjq? zF5igV^{CjeIGc5c%aqkuGjYx84wI#&#$?$MvuX6MALjM1H+wEYo@`>zfZ%b=hH}^B zuT}-uUkm;Y*iTZ$e!_X(e%)WOKkcJ0l2(J3dd{Dz`-Izc`$=lQ{Ui_jNl(jA@+Elo z0V9d$X9ISA)=&e_;CF;A@q{_ivkLmm9Ll(IP8#u=`_@fie|8n;Jka-JttRGnCQqu8 z+43qe5G00D#AWeDW+`FO^UF#VPpWE7-8*8N65WU38mlau=mrn_!Oa|cpBdi%hRKYe!r&6(NgAk<~i65vC~`+-<^N({MJn36mf1Hv2n2pcu3RVrr#ND8cv(4Rpk<)A4~jy z&G?_pe~En;?R#OWz|53soCh*roqrgZ{9?E=^#-ntFiS-JfiQ?uy==G%) z>PoQ}5B~0g2kkKdTl#&p4!mx9XL{f=;U*3 zq&{fAFbIzl-MN=EDe%eobFXuoIU9^}0{jzOTcE<^ll}4XE;uSkrG)c@EO-hJnDZYg|n2X>YSe2a4@e1V0IaP|ykzwGl4+{HdT_k!)qL2;g#L*6H8G180X>tGZ%RY-4U~DioXK?AKr7&SMtP; z4?L{EqJWsReZZXQY zT1Hsfz6D=N@gu{B)hYKqZ`j+pAAFPcVPgsGkoBHa#w4{b7n*@K(~P)=X?=cTHyUvb zN9i_5iEHT2R=#yy!vMNTtBG+1?rDS8Tz^IzWgSd>F%Rxg!gmNPfOmj#k$x0|XCsA= zFwgvv^oM=~#7B&I>Ku8O{>b}oNBB10rC$rfkxlVsIH>n!{ zQTB)6U;Smg|63#SDY8w@0y=YCpGEK;@zb13SwGaH zD~Yir{^6pNHUXFFIp&q0j;DRZQwqX23N2d8o;+oK32Ak`3fn%0*R&%U#OH!{$EPvV zhVBLTKRS~<<jjK&1zI&}Z*`p}=vzg&B*V|mdA<(KQOEpJpjb+ld91Z3XO zMt^KqD4XGXo;|W|#&>DQC}OJ~>BC=d;Y|Ap+Voh(lopX~bHM#iCn@1QNhbaL@0%D0 zJ2)1iFUrvec8fp!JM<^Vrfkw8#kF>X)XCw8rjr;fO35*%2r!+ zp!Y{|ChFk_b`!{GCu^GvBM+AB z4$?>BzSXI;xj98^=>z_?w9QJph%dhLQ+X%OS<48;v*HqPUH_KSu|*wLSHhUNSP$$( z&e&+%W2&_x5QG6RoUN(%vnX;=YY#Nm$$c0FK52Yv&BB_&llQF z?O0JQGUIZI*`>#^lyx{MPxwgyexgw(^rWxD#qu$b6omp!`|^Wa5?f)saN7jx}`mwCn595 zAblugy}OTmRphIAp&319R0aBLiko}^p-1X{?>K8y!td4ezVwTk3#;ingg2yK{KRUJ zFVwNo#o5erDf1xykp6j`m8qn*98X0SF&7cJ6NxYvxlswrH<_C_OB7Gq{9imO>vC_> zFE{g%gZWmk_?O4>O4+KK&Y8@q9oIAFt;|afZDXGgm(=l`v};eXZad&S1sTuzC5%Im zbAKn%uih#LJ`0BLLzQCb>p}(+UnjM9l94t@{~Y*51aC3Y)H38Zhh}Mpv(WGqxv^5h;t5r)bP^;ov;q-*wF-lMu1@`#zudpZEFW`pjI{KCiv@+H0-7_FAa& ztUEON-H#16{J~wsYN#g9vNU8j#vq{%c)v`#xu;-T-YQ%3&$HGd-{`w(j}Dxc zhQB4c@ZkjhIgE2^@FI$AJ0R7#Nc)Tia%B{3Lk+!=p z=3EI~0(vp}AbA3N9@-!NO*Z^yx!c^qv6K_tZFWoyR6vKcr&X%1ef0aOv-!`A^P~7r z<$+I$=09%~3~GK`V8-qZy;QJJaBF^vvsw60c>R6APyY49`@LUIUh+5vza8wEdM7Vu z|Fgs!zr(xATnnv|JRad$r5myAdOx1We3P;!k4gEyZSdl}hxxrfM)P|;tg{ez&CbQ| z-3lKPiNiXHc@f-o*J&&3LU?)+zoPHch40z!4*mZu|M$|SKfB|r=knnnD!Fj)yN*%b zwt)%%rFi1m=falV^WA3%U`^9)%ymLn<=-R?B zd_YPM@-h0g4d>r}GkT^!O3&o~gK_3mp<(({AjV|JW9WHL6=LUvo=Wmwox?e;GTq3B zr<%6Rk!tF`G99U=?&~Aq(+$2R8fkl)(G7#AWzr!?c95+SwCWL4zrz0Ow59H;QTgeQ zH%@b)`*IBGzS^>9cv~Efz0o?aUtCPQzHH*!M(Mm#rSn>TzNue`%KOMM^7^1}F7#gA zc}s|a9T!+QrLJ)iZ6dqYm%|&~gx;%Wv2ny8Msf}DN`*J_bNTP;U*ewrWc`cam4Ix% zCH^k2=C_x*6kquT=C>Oi$n*qap{<&W4;=UQ?y0p`^pw0@dz;gG_HZAFJ%1B(c>G%KzaZ25;7>SLDDb~k)WKfI{_d|&C&Sle8yyBVRL)+VA*Z%EAGG(oCK_jUU+V6tq`eon z8lLv0#0c41RN&Q4**er0*?hVWhstKtj|$eI2@6wKHnD95-%Py|{Q|CW z!{K`JJZ!Lzg=Vh7cdY8okxhkQ)l}5sZ!UuOa8+$(p8bq9^Tt;5yV*8)K3{D~@4&b; zCR6XgI0y6&{Ez4zyu?cCX6^02!PqSSsmL_=<+Xu;G z>Le)hI%U)bGEDu?`F@Okp2)*S0v>{(=raNn@%=r|c+O9r9QHf)#d~2~(=^*n?YV20 zwl9J|nqhM;UWwn##naun58C46>0?#x+J*P|u)|!MO{}P~o zeVd?#SkUc6=3OuSBHzU`E3Sy--5-64zU6^0^iQ(xPGsF)aHcg%j?IH+|NC-mQXq>p zs92MhyqwUs5;;uuF2nBC`WDv~k#l#3<=j5vS{?v5)E9W(cprEETcL>>!?T=2x2{h- zT|Sq%Pt2V~D-+EA-$nmbt`GViWdAAVPE{Yc1P4 zAG-(J6a3NcEhp~zR%FjOa~`;dD&7qEN~f@zxi{A^v~hp&3{$7ax?K6j`J2;Epa1>z zcbO}-*E)4YuGXvgdat9bf6k3L?VJ%y7chabx3Ag-?=@SzGkO91qB`lH)}aXt=$+>B z5@4Y+vMttlop&!PklbSFEn^D0nPb7%=5*HkS(}GyVm$slm)8rNxwjg0P3ZjFtK1D- zG|Gj{0UkIuG#+3r+{qf~3zxmCG^bwoU$jRq#7<82V+&Mv0+_Er9!?c4`BG|ACUTVI z?KEUqdAz!+Jye4oOuZ2-MYf-Y z@8-KobL*4I#~JeXbcU?6AohJrrgX%3-o2T%MqMtx6;O}n`LgPJ+gH(M4dXJ9ckiV= zfB9TXR(t?Cfq6hyY|NFsOC5sC>*0F*larfrsizRTN%54b4`2E18R*Aac#q`&Z8(4b z{mD&nw7uvOcgLbrp&2(O!Sg4$>a*fsY*pT! zYf`=Tftj4oBmk>W4RNMa(ldz%?~|O?jE@6T-r$UY-{E=q`y7! z#mJ56ZKYpFP6SRmf7H3y)oXaq=(6NS&MKVh5A92CoTMK zv_H#h@h&K096S4&Fz(7S>RPF)`q# z?LVs?=$>pa^saSx44N+9LgyoY;(Vmf*~59N%?P;Bb?l&=notaHGx_o{p9`i&($Ne8`(4 z$5-=bzkIv=I~P+AvC@hEHj29!qq&0-i@m24drueko^kkhuzNH%&a^#~vUKmo;XR#V z1f9f#a{ap4#E(qkI(E{}8elE^fsBWY{z_ne4fC%viF3t{>xqx?!b?A$m5H5%!!{sW z+2&!AR}6fS2O8t^|Ii$3xF?ZzL!fH`j1p3VrUT?q?}; zW=$#cvWWRv$UH4zzUC7HVjjK|{*C)GC6{ww#)_9G9h=S=6*(-wTY+0pS%lcVF^21myU(nYgnC-l-7WQSG1 z8MFuNea*B7%w(TDM4kbAz}T^-J)pr{6ml-o&6<)uU=P1O@(s5K)cH+e$*BIqeKD3j zAbzECp~3za@I@8232enC(1%UnH0;4jUkuv>Rn)P(QBW9O6V4;nntpda zej07N@2}Vg{~GM3{DFN%!QMBEg6}k!biCVC(&201ai9+`84nJ~wk@e`3UjmbY3^?X zih^q|GoE`k0omKid!OB`(qb-{jsrL z|8ivc5@f2`$aL_OImn5MRaJ-$py2&y8!>$-I}6!EaFfm`b6wTV-67kV4D3ZkJH_|j zs+g$U+hD!TFtBI+*HY~6)R&%I?5P|c=LOuQbBy=+yTGOVe3Bam1I|a9Q9#??g>!ew9gpc+_BI|=?|2*bkPWT zKQm(9y4Q;fe5#jvG*;iKl8!^6l8y|{qqZ()Ju&C;v{hxh*i)M1tY19E@RmlkRd?T_ zk1fBfG^%{oXGbo-e5)w<_Wlvp#5TL<%+JB+>&1H;&q1H@$ARyj61zun zBX#a`Vlny-`uaw$G2{4hn-|&gb;T|8Ew|RdiEVbTiIAiMhD_3~NAI>*v^h3t8?mc?HSCf(H4@1b11yCdC} z9$#n1r%DV=#%GlJuC<}Qo97DfxF>Gp+`J3@BR=nZeMLdva;RGK_<~T$_yWmw-O!@< z$@469|Ezp{*v1EVKCoHqT64rX*#JMDx7eExY|4PkG3eiM=wCl`-p8DWYGS4{n=> zz9VMODD)d=(^sAUr@*tw9#{Nia@$vgTX}Bo8G=L1F}`1dCsNR-fh(JdSyDR=-g+xK z^e}Hbj<(lw?*(})cq{PNS=d|I)|uFo^jCT})rA~$R@o5g!^^%FF8erXItzl%KG!|n z2yWIpe9TekzWI&uKFl|*gWZ|v#K0$?KNeqVzz?{0`V|uv97Ru~Go(`H$fU3A*SVSO z^Ck9b|6+T!uPLS=v;%)KJ7Nm_*u$849tiL0t46Ip$T#KB-4|oo!`H+NwTGu{F8R!I zPsSAdWrX~uJv{9MqV3_swrFTXq;H-;kv=26xN`*M~36an0q#RZN=oVp6yxS&TS<_V#wUhJXf$s zA4hKNVXYist;~Zb`*FSL)rnp>6`#Pz-G1gKjj5q{vL1$)9K&vzb}mQ zq8AmcPkf9Ftj`O9-K~t9bNTT4Ola$yto51Dwv)a~-u`cXSAa*F_wEbiw?_=G`zF?#_T_d#l=Wv@_RTj8uFBQ*}=wBl~Fj4o3>wvq9HLEzKnxD<< zj3ctUSwTFvoVCW@OyVg?W@|xrml@)&A^CN#ulj1RvoxO6or#1tKW)rpJn~C`EC5gK zGUZ10n;>_6I=V~7d!{fa5g4vwoEm#%+;@b>UD+R+`CIDO9QJ@y+ix^BNA~@SPV9_0 zAC061zc`qd`2zeh?MKo!TyJa^9f`c>FEuvDM3w!`b(Dp-TKIfu=8mX**HGuPysxb} z({Kl4x|e6DFE{Lc#cK|W!}Ryev*R%Rh_YHkQ&>ZSQM7-!k$qKJwtc#TtaFb2;jJiLMRvH_Twygz(Ug!k7SmG{Aq$a#7sZZ`OezXIg(5z>awi}3j$-^K^orKJU4 z`XQMCd+hWL`j1Sc|4wvsvf=Z;*_e}s2~@Ow+*Qn!9xcJfYSl24ldkmIre-+ae8$Cu2;IsTCQf!*Bw>){??VWJhMu9f>B zu4l^IU!C{Rl4-B~?Ca(onIq+wO~bz>?Ur&5J#9B@9{KZoS%;IjBNOjg20Xrfp?Oy> z-~4_$$jyOz8#2TK9Tfd&^h9%J)b=+wd

8E%>v&~GaP07hOsV~rR^ZT4| zUyy093-{#&c_z_^H~CHH*UWdjmG1!G7L%`n*lgza2M52;Q+du;&F??8z86z(VRU*-^cXfpej=!l&%ul?hrhAh@{p%a^OtXwO$$Z&=Q@c zF|M+F^8AuAK7RMxjUzF(#~O2l%eDlM!}e(7^CgbGEj$(`@-`>> zLKk|&IP{0{(3LUXGY$uSdt;y(R=haT=%KV@Vn1_l#(~q_szPLmVr)|6vpvLF(PZ|O z6Zqg29Mg=H`W>7*Sof>!USgw|_p5xxq?z}t(%LpLz8puxJCQjxlB_wzcNH#sOaQE4{sRR`9H^ z%P!`h?}Zu+%2!gBwl2j^v!G%$IwR`Ou^Y2=Z(09WSn2t>X&L9QAdjDRiATbDD`R2p z>a0DN@@5*gcq^t+hwh4m^VNbMtcwA8sCE5KXVDGDf#2LKna9PzobA}#8v%KhryZ1YS_~++t z>Zc3t87DbXvW9g>t``|s{!n~f37&r(3^f!mPrfdvxhDV0d$5MP>C?|I2+cmf;J_dL zR?thDMIRE|mLCeqFYK(K<3i6-zPCE;dvzb+1xKoR2eb3QO738$p7!;Y1nb6QgR;Lw zF`|?F7#&&>vp{dg~N)+52zd43m3`^eZx-E1A!3Y}R`2 zjR|(U;G46vpaI#QYp!!_o=hFZoe7?9zDY0fUwn()(am(AW7rcja7Wj~*HwwtKKkt& zKPq_r@iV56mt*Y32EM!gOxd30Ii~)z7n~Y>0{=Dk#TE|P;|rj<_^Eiu(q(kb4gQAw zzM7tb)|WURn>shx$$N|A25c&BXs>+9n3+Es8{rnlMw{kXvl7g9kEM(X9-TNUm_Mxj zUk+>k9ny!lKZQCqXVt8!<7&}wPxU&G?53iTv z>$(Yb*|#I8T+?E8lPL*wTKL%?fVbq=w0 zWjB%9-W~ond=03L-4_}&t^F*~)7|@Nb1$2J0sP1m?(5NppSBOt2Ye75k>9N{Z0Tll z*Xkg1FFVCv#wU5wTGz>KFPM0`9Jm|#mjENqcoSvs=D&9?_c3FF{wML@g#O=z^}h2g zSkGkdm>Ls2iXWt7(_^?7ljV6uKK4E7=&&gRUM#F&Zb}lWk7j3ZPb`Brr+9u{#+VhMFAlEQ8t)CgOrAG+-s0I$oxPNQw>Y?RD?UsDe8xF~p;(@Hp7GGA1oEdS z|M_0uMCD2Fh7w3iaRg1jC&2H}?D>}8lkAdUU@Cd#`y`M5`TURgKAA|FY@Vq+d5*Jv zpiJj`z9U$BYuFD;A@8$kW4JGr68gG;e#jrn=lOpv|K$_qdVb{-rG#>y=ed^WdPmUz z?EH>H`<9OE7X{r*U=@9g_(oYynR5D2jenMiUzFSUR>gA%&no(JAl4JtgI^W=qZE%V z+8*(b;%}gR`ABKvS3Xjj`IV292Ff+@G*dRpN6Oj_OSZ57!7v{wUFd@FDd~HeUwlgX z_V9~O$xs*NU*_4vd^9cS@X!AIppTS&( zKWozu&Ghx=SaFWt%^}9bP}+SP>o%HpHyk^gcKT}eTeSP#lHhgZH}&^j%Zb;>o(??; zvBwOjXMcN4=Ze=BRNiaM2z4bCh*l~_(J}g_^Q)GW+rOVl3<2#aiF`i_ZRx(L_KHkm z3x-D9kI1gBn>33LPH7WQ)N(DhjqnaW#_nhA2J%#dw|a?w>HNUQcc(3--uH4$aJO`9 zQ)It*A@H>5dH1{Q?Ey!y0=jn;IwZQ;4V)gNZWG4P!K2W>9|9-o8WbN`d)%i4CupH; z=z8G=t{MqW`+F@o0h4&MKkne|%x#czK4_$A4~|Yiv{x`NwynIm`^lIy&#{L%b!YKU ziJqf1Sp~A6^wS6DHSYeLYmNTU^;rd?v1)%ZaJY3X{QNdxuy|2N9A_sv*#8>%)^IsF z13|~c+2$1kck#(%o)ay0ac`-|>K}Vi+kO9=X>MU+C8ow{h#2CE8@(33+Ie^=Xv97*x}zQJHzqbxEE!UPmHR( zagy>XO*B1RBL;hImjOBkkb@CtD;?Ilo#`QQU>*BwQ`s2utPp&+t*YzTP zY}aw#J2lq3VkG{>Pa9oa86hH`U>-+!|;4u%IR6}5KWyo0uR|81 zjqaPEh2gbn+O*R~WbJ8fb}V842e15jbGiC&B)Z@!Wj|o8(MiY|DQ$me-hHH->ytiP z>ur4XN!DbJIUYwfcVv0as}^6{!<-+;j`j4eAMH7~KGSn8Fo?9#M*c(l$ou3ZuhzLa zcJlX-KYHxDp;3O;=P?hPZOKVyd1&LI0Og>Chv2uB zr*%OX=IGUm7C_H{PY>g___a)vUlTpKMtZ@n3X@NJ892@ce!c`?nF9V#P4oJ=kCqb^ zcYF7IW0vm^j2XX*xkcI7#k&GMlbO>^*x1w7TK!L$7j$ByiLV3pfO*&$+5@{pQ=!AM z&(WM+sW~H`AD(pMsBbNqwjyV>S&lM3_K(x__Z0nAdAn_5z3SkcU`QRaspFJrGIjW= zYjD0Nwp9q$>IY?2-$dx#>8HR^#_If>G3yw14JKcR%&9#h!apDQT$q0b2BCzQA^h_x z`fZkTe4?S%)G{QGI-v+rjcM_xof9LjT$1P}jwiMNh5Rtb&$Bp&j;hmkG! zy8Hh%n?2}tqu&?BJANFm7|kPo98Va{3y#G5`QY{B$DOkh_7Hdu{H^=o^~8(G=lTb; zuk!9@4BZRndvj#R#(yX0)Q4`9O(lK{@oCp{F}C*=Mt=_abszlOA@Oco)6oMYn6bdo z32ZmQJh>6!$)9waXWZal(AUg4C;XT0m>Kw{tHsA%1$$C6XW-3B-!z!Fg*3wsPhUUy z&G$gm|MgM-Yoh+IiTZzU)c;k3|2wHWle+iQ?ta?sR$Ig&5Ah!2eI@*;f7@bj2Jh;3 z&IJ4^6?1h74a=EMNb9H1qzG3|6Y^|q4t4;AKTAxSb-R2tgWunW`8V%F+l`YU*=yM#r zN-1*Up?%Ed)QMgn^x5~W5!}r=s8e#{2Zkjhiaz(^dx5(-Gp>OaY+FDK7RyH8mIjR? z%?Itzr4MyHe}L}&-*xwY*WLf$)}8jVx>3fgOTd4v1=;iXpIlII=;t?kCEG3Ho71)- z-4AbL%6F`p05Xi9_2d7*5!^t!Z(Bxz|H+Jk|2lHUysLOREVq2{1<5V&QQZ0L$g}*g*%%TMY`!oEFX;-w9_wDdojo-@}P@D0{p-g+i>)`#fXGU0LLCwdR<86BY+nSVcJ?&at{pO!D= zb>p5g-#bZ<IPm-_c$Mj{Qai{yo_^wF6M($G< zpVZ1d0YAFhJZx=irr<*%hx-=LxZKY;JS(a1BJR&T7)8a1`HDr~9K_vG6d&^ROA4qst!VTP=IA_Ug0rXS0jg zw~8$N*>rRg`JDNjOMf<-JYgTOq3hYBx=JnmSvmDAryl9gZsR}aou>Y*ocznFbGZI& z4L-JN@J%cI8NOLfzpZQdt)b5Klz#wQN9oV%$@j4Gp+8$sI{sVXZG-*>rK3Nq=i9?P zn|K;XYa)NM@}oc7M!qMN5B*sa>CL1+Mf%T`j{a;L-=5@ois$E~Jxl%_%8&l+1?541 z_AF^TY+?PG({pG|iKRd5DnjQ%Ug^*F@c&KzNAzbeP^OFLWu85@v-M~D`2Hp^xizdm z+t2%3w6ScMFX3ML`Y!#D{_F$(_wir)vlIMEf7VO64|w``PS}F}z7q6jpC4I&7F&!S z2%Td@f98-rkv>H0&tmx&&x6fxD8c^m`m=oWlhU8f=2!Z&LVl$`%ctCIo$)B5-8ArOILAuML_+%Tx@yUMtdEzNUYgAr#I$;|=bWI6u zACcCL&LP7{H0jq#y(8Bs-hk-Yao!_(yWPkv3ACA*Y3x;d_OSpFIvy{gMXd94@AaBJjt1g0RVmbV{Y|@*l!`v>;IN1o{i8l z*%g@Y&rk7`BGbvwq50lprOUsn`F`gV&q~szvozn2P4To*Ugv}6``8OTangN7-Y>b( zGmUrI`Hci!`3-2}CsV)kf}(*twJy#-FElpp-Jw)t``pfN{gE}}5jp1B-l=n;tx-Rk|mvq!2>qh+k zOXf|c-hAZU=P$Hm;hvTE^y|FWcKS?19X3vO&J^{od)GRzlk8jmPN)I9xFcJTeIvF; z$FDEp+_hLV*gSJRvt}pf>4s;+)$)1!V(aTya}m1yY`^)D*n-Eo!~DhTdOpk?7Gp5u zw8NW`Y6NpLOS~t@b1(Ot(HjJ6v6FG0F4>gy&6K8{z}1OgsGYP`VRv|)=%`xl&L+v> zk1i`AZXmv~Sd%*I`T}{LUuJj=;&SWzt=MN_BYsv~LB$MS%QKh!);$S3v4vgUn)%pY z5j$w4KKSyCvLC7s>zspq;Ed?ee=ejCRt!Y-;o>2ENU{1*N*}`U0p$l@XGk^okMZI! ziS`zsbL=_rAhl+@@q_K+Gyl;8i-(MT!jN{uV_#+jYd#w5tyxJsldl?aTsc#qjjG>1 zcCh}hQ~xbj4H;MPs%Y4T$MrSpzkj^9<{pjX>T{06mt~Y)ZH{ByU_19~99It+$NnMh zgvUW#z?zTF^VY1>IKD9AIPhywHp?8xd4u(D)HuE{WE?LKsXsi9?`Rz7duzU{aa?oG zakNgvmx?)#34`qrv+vOZ*9;lQ_95+r$MJ~9k>IUq)HtpkaU89wMp=wGj)cMbw`v^M z4jIRzL+THY_v$@XTYaYGjI+S{rq{a;pWU_iP8wqb zpWn*5BL;}pLJzc~;%CGwK<~YlXQf%^XwO=7;J5KV5{pl9_;S%Rxrm*x^7op9iQbw& zYYvv0eeMj?frt+T^;z@)n~1U(ROh<%K^Xm$e*Sam5Exxao`9`HxQi3ib1V6*n&v{&_Nbn~2V)27ado-lqmWsE(L-8;E}=n$a1CvG*?XkRKMLmSbsY8FD<72 zLHKPQQhykJ$*i9PhG#8LBk2{z>{&bw{Qo#OCDPVedo25_VzL>v>*wTLP~z3SzMM>S zy_wh+^JI>xy&|jjH*+S}zBI>P`|UZl+HcLts{QSpoH^u~V+3ur-*i$TpeOxfX}|N2as9R&&LD3l|7Vqd zG5O=YIbSCKm&wnQSzA9R>9yaTlk=miDL>Ae z^9|DZR>+gd_l!Ey-75bD%Ae=WsUw|lg*=&jcUHuBa(OZ~lMj7qPU|&ZXX^xS&SvuQ zt&k^^?-?zmU#WVorJe+DP7CRLE9A-KyA%JhxeJRrTnlX-#whZ)f@3+?d0qM?Ee<{K zw5Q?co~8^>W*M+WXFKiJeET)9B#uns1is(Q_lnLda&}YyZoXG=-aUb|P9x~77)@VC zd#1hS9-xhJuL1j((VmPq=-=(NSLC)XrOkA2?%z54`8#d#nB(d!>c~BBL4oVM*aBy# zt;3la>&fNGLT{aWn7oH+i*aV&MOz1{vzD~gr1g{5Pul9*D~z?Za|-`OKe{Q)lSRDH z>2`WDCfS0<_vptY?%DD`HDR!tGy%V2GVc92JU8lZzj)AYp)QW9N|q}_@U3imOU;8owiMWH8wQn@GXvS zd3-D9TP640%CYm@!ak$C@rISR@|oI3@#D&K>oaTdadKI{3p;^a{5?AH0XdCF_fzG+ z<^udc>U$BqrtIs*533(eXj=5U9otg9Yb_&3nZ9qL({qO9$G!(21*5TbjD`1h!t=Y} z`SDM%276ZBCE~t(4YtK47b+&GbuVHI`}3AvvVWY zU&MJv>#mG;&TkXtGr7jFAGv4(IxzCczh_If_1}fR*FxSidB@h{b?oiUdznrLKITa~ zmgn}KoLa=$xY3cz`%?U)Dt`;S9`=eI{nV$kU*cp2y5R{WFg!<{^HEap>#8>fjMWsz5e;VKG^4uM{JhH7` zi7YS+SW0%3pEM55%S``$8Tb~IFO!-Vu~+HOpM?#re8+T8#P`c$&LsF=DIX@B?fj7U zUxSS-xTNzV<@q9b zR|BoT_tM82-vkHz)UWRv=VjzKZA!6M(Ybg6xFNgSOk4jfjY05xoj&fz-}iFvuFE&0 z{$~r<5@gS!yOt?!qbR4m73cw5`mxR;pV}-TCd#H*{QUB$+^$PEH6CEk&2r#xgy#g~ z?4*owA~Z|)MOEfFaS5Ie|L-~$Dtm_innT6)J0AA4orcfmKH9r4F0DSpF)A_PhVHfP#bhe8dhjx_wh%MXQRXWUDDk@Web50=$^&qUJFO#i3vP{+H} zac0e8aKDr_u!yy=kTtOYKM?a}1LyfEI6PQ>uVNjnyoT7Q!N zr)Wd|Zz>nK+x?6w2S3+-tu5l(bh4hx9abF9B5;ATVslM>iSo6?(Oft}nYG}#%G4^h z0BzvcYY%H>Z^oV6?{E@_lQz73D_m_wcoDU-tz6j?GLWy8_(~vEZ^MR9SOXM9|N5isK4F(_c30@Gl{fW0uN%f zNz4V>%;tThHu3vdfBV1LW)X0aZpY^s+vaCp$Lc@h@^iKz+_Szr_CT-UJ%pW9 zZm#?*Lsy_P(^;dw!WXfL@1He}v5EfZyL^V~JHC=;!H*`k^cvW@;$tk^@O;CL&ob^*eXAqZ{P9jTfa zZLMflU*$-Vp*)nC0A<8D=1#`GOj}yLaA7)gzB`Ny@~@q5?k(5}(zlDkX~KtG^5+R3C=D^gpoZl5!#m)MaL$YG;Y>FT0&G>ikWsXX^Q)H z`VI$cH-^1z6nole_BP_YuW_PNwR>L5GoCxmU6p?Bss!0@PqL1WGA~-YviZ|`E(fk- zCs}jxJM>YPPs8^Gb78IJ#J1K7nIa zTczDuFPicFgvT5AonP08fAQ!0Zz}S>4Zp$n#B$p1yg+-Xab)3>jg2w1_3OBp`a0I1 zVvg-*&ZL(*7}cisW3{=OHf^-GPJ8Y)!&5t~&4dY$HBL-u8ffz%ZHhlSP5C-U(w-Rl zztj=4r@%34k7!9V{xY?1T}_@-;CiJa1^*Lg=jmjR?qfZK@UvXO|2k-r>2H93`haJK zZDOEB`>cJ^9&l;X3hK$_*R;JOKYah|e&Di^7<-GMp%?L7InQX%;}OqXj_-mCCfsyV zIzlwFgjLIf5iS%KJXRiM10tHp#$Qt#7Awy$Dl6( zng~5O&bgP*K3L}-=zI}%igvnwSKcmI*lqYu0T!zqcIp=2HmU6(eEA9LW$){_m%6!+ z-0?Pb3$7a+Mn`l%Aq?<)y6Y&qp|gWT{E4NuorWiUmv0N82gl65PHe-O&+}f|S4@UO z$UleKPY=P<)Q{cMSoKU?6LA-x-^hNoF+Q$-G5xy=U$4sZzGDn|$F^+;e!sIB9ba{f z*|QxukF|{rJQP2+{=4M+-7^n0?!>3`7GPOVo^r<3gFm`W@vi#l{yqiYZjIB5r=tD} zj`A~jk}2l)y;0qhD65;L0dXf0i(7?$wNnVijm z%X^TQxSyPg&*zl3Lij!TtBqs+BHzkIgZP#M&xefbnFi0d5S}jqT=9W30cg+|=-zvb zzXG1G7M`yPe$V9l!aQFg<7I7^x!|os33v2=;J|B8M54t`V*~BXMgE!nOc9`hplT!@eNWvuz!@c&*@X44wB_=CI1FZr>S{JPhwHnx$!F3ufH93|fb z?%;HO#cwUUbaUfY+RmbFgXcdf%N)#b+xzpet6Xws&0NM>!kCL0dl4{s$<&FMajTFo z&y{XxelpKd$0zNdrcNZ@C<_ALe)h}5?3dD&^e~pBuJZQBpeH%-oZ|7~kP}+*>HP{% z9DIEqkM856$18gjdXn6g(Y^p!=&q=ABoFev8C`}OI{zrN+&p_^UBKJDt~&d(ZQgk5 zbW!Fc^s7G}nFyM)2>PkLSaEw)#?3zK1^1L^r+rMoVEy`_+Y{-NpZ$5xSL2(sc26>w zPIE320$W*&(nCA}->aBG_>_O98$Nk6{>z`BuADpKn>L^W(Kw(HRZjuOa^#T!u~{F7 zh9{VF2M%2Nc;mV=q33M0RZE|bFTj?8c|VN&roK~WzH|;+SCMZev{Oer>5O>~X*JY; zh&|w3xG$OQ7_?P#$Z_`bMptru$UZ8t-P}9IfOparq_KBcabo3@vJZIFL3>ZwlUZNV z7f8RMJn~y9-ZiOBa=@UxpGul^4dPu>OrMtTlIL9g`aOJ*@Hw9+1TG%{mv@~vh|9ka zJkGP=ftAgy`x$RH{Tkli@1g_frJu)Rjo_xZs z4}VD=(!Hq;8{cPAZxZ91!PCrsuB@|A2nO2EKfSb_Ex; zTR}_$rz?dv2W5)&&@wOY;_Bw~Ulab{4o!4G8)Kl6qo9?e*%y!jx@;cDWX;o1 z**!B$ehBQQ+@3|>X1!M4zJoRI1UDjdY&SH_Ka7qo&cH`4I1-mE**^u@0r>&kkgVPl zZvC2U%?YiQ>QVHH=#?F|5xEq1$goveC}b^&necI>Me962a3TN(K-CO zbtu+WE#YUo1x$G$-d#1p>@}X*FqI@p9ap);@J=Xvg^_(8ka%qA~^rTd~1(0 z&V=%1tEBd}LE9>+Up$3im4}{H{W?ycL?;S(3LW_4o6epGpBkyFmAbU&_2d_M4?}+{ z(4p=&=RN_Iv-s`ww!;s&z3)NSBl?YKcx@s6^rPf!rk{~IiqW_3ppJZG_@l?~-}g&< zB6P*_DKhB4H^HQJpF;9EL#JCQl;r>-FH zk}pJiz5)Lv+LOWBGT<3CPRW3CpgSo%izrt&bs~60jDe{AF`D+=OC8eNs1DJd&rz@7 zX3aq&bFjJ59>psx1#Y4{M+=L*nfNQ)&v={R-PHf}&>i`DG2cmd@b2SXF?&?!Wjq=5 z(au~Pz^{E|F6L=oiIZF#+doVCdG)1J{3`URl^73-aTC!$IH0jB;NL_mwvlHzt*As7 z;6nc(KK!fyhWY`4%YXiwJq^RurG(k+WN$?n=pi>6zN zCf(9rApPsPd}jA%IJ^hov)+!4@%|WJoTt%WSh@?NO?=in&>P>AC0^Ynd}s=|h9C8- z_+5HAcgCi|pTL*NN2c_y$vk&2jceL5&E26G3K_)ElI~_RG)MZ3mdlJXljpf9uE_;n zhU~7Y#n8n`9BdyAzJn`@YqX5GM(~s-zte)=;aBtUT?46}RQ>)j5I zbt0Q{KXe7JvJOIaO*$Ek z#r+?HdYLrF_kGR{PO-mLI&6VV)<}@?ZJ>{7QR6$sI*cCQM00%kZTZNr(iiTC!PhCY zYjr&PbJX}WuBRB6#<_=IjZI@zf46cbCEny##wJ-}$q3`iWS%sp8hDxI;qmEz^!T=g z$9E!juJ;%^^?ugA_LRuGVm8R$RQhkF*EtgRXlzT-O`d=r95;1DHd9AbNIvBO7JE`h z$kPpO97jJ;h(G>X{#!aC_EB&&=R8YCR0}Q0F(Nu5+klSfR?!^x)m-CTIwISEj!1sa za)|4-$dO{|i0Wyhl>Ti3zW2~S>4v1cjRS6vIg;y%jf{LVm_MF%A(|}xPXhWCjZ^v` zc>c0`O#P4cj7e=L=Q=iTV$Y2(8^_orM@RI-y{ym3+1zR9^*fXkza3U=adbUqJTF(o7;^qKumF#8QIn*FwPJ<{)_LbKA^ zh@l(oXB?0JE%ZF+Ip}&^f$i*%=Ghf%N3^RJTe`BxnMmhP&b{z=cMzNh^z z1KR+`Hq}6mM+b=BO6l5D4Q%St&<~wpe>jQG2>CR)a4K<%-5?fWJi6Qkgm7zM|~FdP#< zR=*@T)Z|m(isRP||El#p1Rp4u&mQ(T>r$}uU6|bD z`WV>lK^Kt*+%D&l9Sf9P4^@K_1U&{wO!hApDQngs2z*7mu|9xZVshDv02ezp$FI+MWPC$=)$w4Tr?zhV3Lj4f!7d3}V9_F2+ZS{?`g-$!+U+-#v~!@zdC;+iji~*BPE~@=g46^Zkam4qM0P>_)IM z3~$L8mEbGJ`o@mSZh^S||Mw46dfW zPS&Z~&A|`(J-~yz6P{wq=71x+4dRrupUmNXH+dv`s~r-l(Sa+(EGLYGlXnO z|H^v8aGMF*oRn@C@GgHOZzI$>B}$MRka45IrgIB49G zM~AP8h%Q)T(Osy}df75KkyBjAD{;sz@!&iD8rO{FZi@qZ0nVf!AQnZQ5j?iP#LF4n zUeRFRWPahb`}5FY!oR)oQSqkcWMX-fZ&9&Pw*P(Gk%vl+vQtZq=hhHoycu4;?kB_o z4%X}={@Wvy*e~|q^SL9Yj(eQq2^dG5iWN+njkNVV!kYu^-v^=n(mfuGF*ZvcvG57L zi|@KOvtyL8nagHvRfD?I_P8IxP0h~zWvIP-o7P?{fqZh zb5FD6ugY7-G`Z-@2D_^?*X)O@s%x6$s`M(!2J4a6yw$7PYvAJ@=J)uj-V4yHv(|QU zUb^+trp5#4ua3h5NnYt@ufGL4(lXhYp*VHh*Dvi4fD`WQ{#mSD^ew4XcFwOgzTk4= zAB4wv0DYCl$KJN&XqC~SF$k_MU^R{YZM4Ug-a!9;&3JQebbDJJ>HU)9WCOK}eObED z8_AzjK6E_TAy&zDPI2hg!$*6otFcX*RJC5R3Ey;HC!R(zc6Tur>B%LNY;i2@zewW% z&m-gLMo%spp*ADmq$d}CMBe+L5l5@s9ThSC`7gjzRhIv>$w~h?H!sbgFWcxthQn$0 zskM4ddj@@R@U59|>eEs7&Pv80c`0JQw~jR#=~JxwbUHM%2K~wKHrLS?>3EbD>Bj+h z2;tB|_Oa6!i*HL0+_b=G?|_H!0e@)s1Rrq!wf*~z+gO{Yxc8-enj6_1)WWAp?zoO8 z8ds(<7hZ5jcy8jcaEEgg$fAF<>d_!}WcV=LKK!v$s&M z_Q>vkS!XF3Hu&AQ;9aRtdcN!rLS^i!De9lnMuhKQffZw^0e6>yyOzCUQdHjLwyofq z_DHSAF7QD*N%8EguR!-oXcRcxIs$!L&UzDlvvB8bleTq12MQUZa3?v69!v&zf>E?> zYItmeeNJn0ql*y@h^7kx_QG|ny#&^p`jiBoMEYiYMLI)+I5?8p$zb39r%z8ykB4~ zJ_UY>chDL?hK~q0xM7`p%iefm+kWf>PG-YXkfz_i{yKKntN8Tt#o>!@QL1MH{vP70 z?^>PfimT7GW&BxlXX-1Nt3>9?5BY&xjA@F47Ji03-;0m=T=sR*$PIiK zjeN>x?e$e*I=L0!YMS>1n}rwf@f~fne_;JwuMK@tFZG{Ua}|8imGDQ4;gd??mlmPd z#Sij&ho`||B=&~$+TocGenS4Wqm0CZQTZL8B}Ty~l=(6JNk-nVh3S%IQ<&7Y8lM#D z)5V)$%k_Ei5BWo}{T2d`^MD6@P{BfY({W+gJU#nzuf8^zuujWPRbhL_{G0Th*po^>z`mpN_3C(|1AX(} z=E=r0EztKhw#54CSa^z(v)dKzP`eJ+-z(K>SA0cWRTupzhgT|LE%brY={!37Dh99F zpPxJC7~j?nT(s5_!R>D7r1~HndV2PK-VdP7EvG{H(&Z>N=I}I~$yU-YXBKwhqM`6s z!gaN|oc$$8dzKzK4LvevVbJC1Hcu2YXB{;Er+m*~ z&JDM0M2x-T&lK&`I{557x0gFZ!41Tp=AOfm9PCmiqF;WWadpAp#W1!HrW(N`U3PEp z12(Udu^ofQ{$H$9<*9^MuW=YhPJstkWub$|-mVIIQF*FnpX{wyovL~Em9Z1+XHr)h z{^@k?vJv>Ufg|!C9Jt!uJ_`9}VN#6A%U)=A7rbJ?&$0$D0&k`o*s_rJ8F1-$#@N#B zeBaLZX4}}RUhropy6WZZF^5*KY!9riGWWUz3GgV=9YV*&XR3bHtGYU9_n*+zoD{1) zvn}fTOUjR$;;*K&_28`5fzF^(W3@l0bDr1OxGZS#+Tt0m0WZ6WN&5kG z_DEMTcv9pQ-n>n0*PpCk3Ottf_rq6Z_J?NO2_8nz)i&xEU+JLzINHhM;XSbIl8ud9 z;d=s48Qy;EQX>4xEZTCz_ng%h{fo5qQNA14qvxQ#|E4{`|E&DO`lhv`_+Icz0S9yQ z9RK~>EM3-FWk+ah`O#3>la$d|da-#B%q(4NI{RW{<5qMR!VBR?^myzV4?2zyUYP40 z0=uX-Dh`EaT}j=la}skPIY4z5qdO6vMd}s)2~IY21BX8cAAr8)ta_uyok_jtY0U7q zTFcvdLS<@4@8>GFyxX#=7$|ozR5tD7%0-Q1d9N9BDFxYo-+1ste9)cH5$^TwZH_aJ z9EFCAMZT1MRv-Pj6Py*T;yiDkeDI}zl|2etr8T9sE&5TZJX%wnv5w&!?6hPsY<3bP zC$p|}SJ{8E=2_{Cl2})=mstt@UumwZ1an<20$vNJjBgqPjrl76C4XugQ~Dru=0UA7 zzW)c`x7)@Ip*;@vBdsm{j)A^xtX|n(Pb_!w+S%Z;@Ib%9Y3pn~4jC1khNd54&Yppv ze;R#31g~qEFP#$#E>9zOKZ7iji9OSA1y^iF*F$q&;=ke)%7!#L-!^E^{~}HMF!%cQ z*4WW0Wpi(pJtYTz2OV3-CE`^?ukB8qt$T6|8i`nR=3&`L2ES05k}FQAZ6ibnMU` zooC$6`*-N0_WkwHmRFEVUcK>llb(u~6s_q(wh&*NhJLaTxnT12#~Kefp#jj*pXFcf zmHnOI^)KLOqy6c?4BI{vW{077Kg++*`!C=V(As~`vQ^L?e3*5i@+v3Z0ey?PH|ty@ zNWXN3k})1#6!@e)+R2`#z0mn=U1Jh^;VM^BeKqjN$+uu(!XZ3X2Yokr8Rq8|_=s1l ztud8^d5LVkYfR~kshlwtGbTH{gt<-`gPk!%_z2DE;gb1Yjl&Hep}joP*XXq6^ih0+ zY-T3E6Ds?0_`jKF7#t?Q9V+_)>B7g!?33r>OJ#2$Ua*ukwir0CM1HJ-ClgMuZw=FF z#w-1ih0{sk^rPlkNCc06e}#1x(uvHG4}ZQBnPb~eh}%UUKFO9!_Uu7)%DUsKvkLjf z=^k%zhB>OOlY3aQ4V?*pnaJb2(CwW>ALD^p>(}E={(UyQ>k4o&!oT~`*bd8}2p^s7Varq8NRGLF`C!vtf3sk>u*%i!ZquqG8J>XYnP1dlFoNA$3Or_d3{ z`rg=BjDG!4EH)V2orvJj72pu|Kr;?=M?$s)y6ZZeH?ZUTPrQNJkCeNZa);@QfiAuR zJ0Ftu1vgx`o)|Fy57USh4*rFCE3tD@#+!2LVSYm>&kZ90gwJO{_6}!dKL9! z8+Wn1;dH%=C$rW%_C)Y2TS?o`zNdD3DgQ&>Kgr(uJ=T@h(QE@-5uVBm+}?E7h!gx? zS>-f!+n2G9T#k5i9nAo*S2#_%FxYqBn)b!HZtt9`)}}gmWv9bg>SUixCJ(xyDm(b_ zJb0(NT30&FeZ!?agY-gMVpWOkvEenhg0D^Blg>e(4d<^sD}QoTG5I*(=|Jb!k!8~D z#Hz{Q{<+%dVoj@!UTh`(_ub9@l2BF5|C!YD0`*L1%$vh&qjIISp5wHC(Z>zEYdyy^ z_K2)+2md!wZU*Ju@DfYthi_>$_br@N+o0_k)W075dX#zFX^%svzOiv~95}th<~^KZ z1OpEk-nHzb-N>;I%wdlogAHO{k@vmn#ohpATEI2occibo7~i9`r!i;J-Yj$6?2W+I zf8VP1L-$#-Y6*M3_Dl!grO&h4Po#b9qsJ&N)>vpLF%XzrwO>iwwPR@;ne#o?h?~2v z!Yz~j0kcEY9k?${>lv56OAc2blIR0GMb&a}O!8?t@=Nh0n;Ms~1|#FtonE!o!#J-} zTdZH5xoMm&=o0s+EzLdj#h}h}*?emsD(8%F8*As=l#6b29c`ZGzD>{^-x%{Qairag zX}5*7uXbhQ`JUDz`w(Yqw2e%`defK%f7KtE1L8ZCA+x5OwXeZn_p^@8SQ3m)vQ9E> zbhFl_k2(x*tGoECDeK!IpK{KC**3aSJG$fVhTB~SJS+Gw+wS#z*WRstv4rupnt3K! z<-&coGq+{o{E_+Ude_>=RW8zYNdOxI@`cvR)-MJf(>}J1> z=*#LpMqlRr7=2kbb1r<^#u?VD?8z2yaTmOWwJ)yY{OAqv{ce0*{0Ln86Z(-C;ZeIC z<4j$;?i{>|jywQwQhkYZ2nhjng617%^-H=c?ftu{dkp^*p+DF&1W(nB$~{l(6`bt5 zt;pMt{arfk_%05gom^~mWMmn48s}Rwa}N3}>7j$jA*|~_CN?^kJKPt0iwc0ydk9C~&YUz}c(J4LF zxE0>>6mUHXUUvg4>9TH}ZQ1yKaB&H?(TQz>-}~skq`>p0| zGJDCPSsr$v{sLz^Uj3vZSIN8yg}W)66x`%(J7i?YkZX}W{L`NL+7Et1v+ zZhbkNuDhCL=$W-P&a7F)dRoZ3TEP06&pMljE_W`vT+2pRcq4p~P02C(AY3nj*U|W` zdn?J@TY+zgI%kPt+~WJ+2dBHCQEtlZ*Zl?7d2jf?(n@%j9Vk9(S>ra^w$ld(eTkt@ z_>)}2U4->EkM2iC^UX(D>%DwC%^0-bPXdqs1RjfTp2(bwZ?5AGlJws2$OVgr@y!L~ z@qW$Xn-^<8WS@`hi35By_J-5ee4{-t%->Gt{qOLIN#MHLTFoOKwDoJ&K3f{zXI*z# z`>a_nYw0Y$8JaYWI`h`7w)ke=#e4pR=WgnJ>|^SEg?6-mpH**!Z-x$8_1csw`qXGVRWWIJjaGnRe=K}W<;9tz#7kQZv(?&;g9pP&&eNP;^nT^)o=m6i*OKX49 z-toZY?2GJAhvO~2R(s-bzIL*7UU@_L+G@&)ugzrN5nmh8GmXU8s(eIFR+}p++s(ZP z$;aYre?yxQ-gFyl=4IZ?ZIc2H)~(XeA(-}CUFZcndH2CPWRR{t_p%OO;G6hW=`zLF z>c8ri?-{Mxk^R0|_D?6kLmzfLoS&iBwwbzFC-uvY(`oLt&bH*WqWjhzie=YC>}+Hg zn$&g+@15}I%5T<#osFIK+b^CUJE^SIeG!?csaE{FFz?4(+;lWM_EYAtp()A_a& z8>x0|o{nMDQi%-C`5k-m;6n9ch8>x$Ck7OgI`)X&$!#zfsW2ZQWGwr!X&NDo| z^KQkC8Wwl^BYaB?kDvNm`dopX(t_KD#!sy%a3r2Hj%LYc_UbO=G4_A)XY!k?_?gZz z#?#ITCFZzgi{aZg){_Qp4DG~EGybrH_|RVDba!k3-xMM_$yobv>N>42l&6*mO3rK z9`qo2Ut=B*gF9+R?d|?sv0{|X5FXfohk5t;&F1vo2b=}_m$?ddFDWfhEbJcmR^|B| zd0Hu_e9j5vTM}2W&~e!`r_DHj_lLO!oyGB-A11YxelOI}8pqzxe$&o)(($)zd%%q! z!^E~Xeqd}~nvI_o&MqQ$UVm>eHt+sQk$Io?FTnqmKNlAu%gnG%uG1)EoR*B#&P*yeQhTF-qYoRi^WJcHO_6<@jS=I)nmXO7m73f{H+)1DRU zKkW%MjKU{6ao-*u73^=q_i*n;1t->x@i^E&Ub?Te)VeRQp| zjq}mRZg6(y_1N7(Kj0spAE{3Bj?4&k3fG*R1zR1L<~_KKGqv8yj88UplF{AZ{x13xfoU)6tcLNu@-gERe`<{nzg^TJTvI%HWPYzd z-W^msmB3VILW(iJ0vNvnoN}DdD*7e5rT&QKDPIlnR@-I&MLcM5qheRQ=V$OV73<>^rY%5hheBt}*OFCNF6I|noxs8uj>C;>6H}_w88Gd3veCsl& ztv`SB0o#!Z#(y_=)mp#)kIS5)P`=U@;&ZK%Z^qiC{ciFrUk|*F>eZQNg}EjZ+MXb- z2fzH9r;BK}l`|lGgAa)@F8GE@K66eV>BZDD(w^dkK4F_VL%y42OZ(_JXT5x|DqD?cQ!5vsgUsstFIK$3dssu(Oc;sSo*c+SsCk@*>-vMaJ;5^%&Qb_ZWA+I{DScQd`Q#xlg!Ht_9cM zqF)-Ha<0(hrjOR?c$ zU9i55e9?3#xg`cwBxrMoO(dS(HSh&GDT%Hx6<7wGZ}|)-uW$ef6|nA&+7lw-f6|ywd1S)SKVhC{snw1 zDEEFozJtew+hKn=XFI^`Tr$GONB3K0OW4UAruP_oU2DJ8zk)s5 z_1ABBr$Gnz(7%iW9_H}I%mU7S z;HWlp9OL%N_OBNCR{Qeff`frT~3WNbx%17l2%9^;>^F+MfI7^P3q7Vn+kox5&gaXg*zXjb zm9HH?bfq+$?|-xR?(tCgUMewwW!0T=>T#<0|>Nqp2+!%%e3n;xbo~N9NC(FZR`&I=p$;BG%rNjJ@VG z|9cG%>DOWS0K&E_d)PwUfIvcQ!`FmF7K%~Ggr#~`U{5(ysz{}7pyPQ7cnLppX$dfguwAC2fu)tp^o5BT{u+Ag-r{osmTFuRib<05nA43M0) zlK-rM4jTIT+($RM#pj0H_iFhQgyL#sw^X+m&r^caLMH{xZA3)>Z znQ`Ro_le8hJV|#!(-z`g#H9`!b<5a|wKu0@Gq8?by(QCtm(e@vx99Wk_tpP$4t)sh z^+WvM=AMtQCizAmuV;KozWO|0{ZIMkpW_OU>-(Btm}5^B*&reEJtF@dA{pC7nvQ*X z;N64q=%IqD*f*-oTQxAB>GFL z|NRB%8tLv$hK7%MVBH@jkLWfs7COKqwF$r1@BU>ze1l4SbI-Mb_DWv~jAG&jJh`6r zgskI^@~*^w-jvAT-fR*4Ewp%(cPCnScfweQuO;r_l<4ETmsUO1(EShXx$-ZNmoZax zQEh|Bre@k};NMN&uaSOm0l(CSkA7`zog4UjXrsnHs;oO-nGGF92Bxt006h%oG4K|J zpB8dv>k42*(}_$U+QB>%`i{f60Pl=fyfeO^cYN#xIk3r(M|Me{b&3AO+jnj74sSIa z`V8}s#J8c_xS`nt&`f-PCxp;#KW{`F<&B6Xyb-aVHzItTyJ^RlmC&z+HzhXmro>CU zDY20^B?jm?4m#F1l%N0P;qq1AJ3j6HeaFks51}V}0S?}l7z_SF&$pmiC_S5yS3=M4 z{=-*4@Ww={ddp!3{O{-O@dth!#M?gfxjzJWTh;@+s_|b(-j%Pzi-h~AXXPT^t03IN z|MB0_;cwmJ=2zCcB9}j;PWOmg<{Nh2ZQzbZCj6!`uWsA;u=i+rU%j@0|4A#sL;ML&=-HawXgmo;7A{YzHbz) zEA9o~ST^zxu!naC*}^7r_PUY#iXzL!PA2+-V_0ENQoy%c9%o_2Mj*c3#^Bq{dyC&k zy~tS32UUEIZ|k{xw3fR^KVk0ZVSmR9zerlU7RZz&aB0X!#Duk7_@A(QHvUmgJOaq!xOe3v$y1gGf%+{S_1QQGgv&Ad-J zv72@73ZYeSzdA$uRcNq^J}g8YbU-ud$FG|-qtDvtFByljw=H*j`Zz-wX-{Aj_(;E2 zGcMMG^OwO>^n5?Qa>r86%0w<7ZJspW3ai0}-bFtx;eCW$eAkyi!)4hnuh96uXR8}T ze;3=v`}BD`XH|qxg4;hgsQu}bDZz$+9e3Kt!Na0A%J>vN^`C5q?>j^{96$$RH+WT?EO(lMfP;ZZyD z>||yj&i{2}@Q(Cly!+9~n+)eyF6`Klw4-A~@($kt(*J|>yDAq>a}M=P@8lg$-o)^o zulG*>j>FSPKl*;<$n#yT_jY`ba{pSn&{bvI;i|Il$gHYckXf~f@F?Li!q*ls)_0W3 z`ysqn0Zmq9r(C9N+FQT;aeXIx4t(h;ovv;5jTo9I{2}q1h?fn~JT*3#H%T*?uP%MO zoc0}{eaW=1oc1kUm;8(q{nHD~Ba{&uzC$^YMtF0#59PG0XJ0$xGG6#3zKcIh4VZ(%; z`6)v^vX(Ixx_bAL=6q@@wrkz<26KsNdluVkrr34_hvhE#VGL(T@?Vu-v2l+aEpMZe zcJn}5C+9oWw}OFhD$e*f%i}9&HtUL=8Q9y=xEFR&XUCYzg&*--|GLfNyN)+~c#~K0 z$A;Pk4X+!X+V43$8FM&4J&AW>F5!2IyseV7L#s;Ok+EOv&GWcxP67 zYnJDAaH>K_U-~QM1A2C+;7XrXkI=SiQ?xnn|A*$?`l9A}lJCz+F zeaD_G+Q;_{zL)Te|7ZL?Be$K*@w73YZ~1)l`_>iTw44n)34795QI2|_H zZnN*`$l`4mzO~U-@y%h=+w$7r0qti~ReSf+-r*tby_EJ=PSWN{dmasL&l-Pw|rozgzoi9Jn`)^W%O?Fz)OA zm*D=9ANNk^zB2^(uHo9&wo!iEFAT=L>VFCD6VLDGnW_-nTfw~}*^j#xjQdyqm*76% zkNZmKUL1n^MsU}L_;Ei}5;T5m|Ciui2ty}HjRCtAo+gx`JTUR zK)&B9@=JUw`u7(_o{QewY1cikFB(YOfo$)*@N`+O#y|APVm|Y&Pw-mm_yKhQU*?dA zb!-?&<38ZlaPx)S2W8*q`6q^7mM3uj^}dhmZ7ILX>%bS8_!|GmFKO>mZFU-KoY1+Y zaEW2J@RakG4myPv~6r6PxFCXk_BDk~+fC zz|w|^IT72SU~U;=fq8j6Fq>n*Tx*3nFCLhGiwslIdY=)I*6RmD>zTO`>7we}?uc05 z&j(ZA?#T72e!!n%aNicD#nHDZ)1tQ_D>8p>1is>v82VTBRL*OPj&~b%1;(bFEqH_S z)t?EZwM|oUT=;V4SiTnrzR#TA|2~Lh~CyH zV{yTl_}Xfz?`JXUdpI&}Zyq;@`reORpW+YfIl=s~iZ;a@`H(FuNj|b*$7e>ePF)Pf;@xXj7GE6nTw?yv0%g((24mz)z8NCfpTHA0u!u%D) zg9n|jMTV)`&=9!|cMqlwH(eC54StyzxxRygsqeKI^?fsPeY{m3m;AEO(>*I<`_y>& zuC*HOqIO}V+ao!%h z_w&ns#~j%=)Yu2eaA*}ezctAJVd!w~rpn^3G5Q+p;s=e+^F2qWE%tU{hdKKFp?=+N z9eae#8M~6nOBrDLb*_H8MQ(@ckLRrYF(pEOSo-+3*%4`@c=*SWdFrQe(nr-dX}~5? zi5;zG>LPWvwhj7N@cCP0d{i6vS=;ylIK+_a2c755iP%O3GYfq@NKgEaQE|X@$AFn@ zg}E;tm^&lGRDCg*{Rh#X=M2zNbO_n|_{)UA_|6+hi%FNR81>Bv)OVUaiaQSH3__l) zm>aPlRQn!|T;JaZQ{TZD_05W0-;{~*wGV%n)-gJMUc@#EuXQf6^2LG(eDOrJ&EMDN z*sqBUH#3UN+!ALyx)v4i#;o!Ondj!szHH=HD3^etbUhRqTJrR_9y53+UdmM+SDAaee0pBZjIR|NKm5TxUHD}5{YZxo z_+DbmbHDXyar>P=yr*k})>^~;oUUYjje(t~YmY6j{hyB>uDRd`$7^Q%`gqNbw~lvx z79X=&HjmiQZBE_8ou@pRufND1dDotOc}7Cd+)2#y3O052!=_FJ`;VF*HuatcdlB~Y z$+kR&FLn3h`;69Vsq;(zI+q9PbZflB`dR$RT)?@Cl6`qa3A_{S+nd)8F0yB+_%L+t z{ce$(dz;t~wD9TQBKMQX=PKGB-2Y=E^uO#|cPw)CemEVRz(wK6Ud2h`mijV39i4~2 zYOU`Ww&igAW`XIB0dtEL<{Qi_F=4(N1Lk%s%!Bd3ylO%8wm)Zu`CL40|8@+RKeodB zMm#XPV!#ZsvDL%_bMeCHZT~kbo?nUpvwEM+FYg}$UO?W9tWP)PhADsFja;A7JN`W& zufpg#*PF0zj$5zV>2!Z4di!3ow(rvs+V{k7EOKjQ@-CW9tr0U z(ItX#{uB25a5!6J%6R+|-ioa~63mlUn7@hu^IBb-+bnYxbCi=gs_R4a5$-l05DB#F9Mh(^W_wBe5oZCiztRp~KnI;F$>c#!Z7>-|Ws18hkYZ z%r?$13ErpamTl2#Ty>Z+6Vz{Ci~wKZeA;}sEc{wCl{;in`nBqaway#G#vF#f!`3%%&JHS6N~HY*-v^?}-8P3s#ul ziw7n$I4bY2w!+*J56sFKFu!Ppxh?|C>U)Ox=k;FzFCcSe-uX|{24M2U$b^V(P<(&? zkcfOgKHQubiwCcdT%Y1=*~bgwmrEkF&mvcU5Cdkl6=oiGo|t3hPcdN19%c}p=fs0& zQDQ_|tF}LBg*hZ1m=8sU*}>f|MW-O0OvQuFHq}h!o&$4Ym(mN2B$a-RI%9Rv+2Qnv zZA^3(7j})tsR27pXnE$@u;r&;&|jW2YqMp}sxe^_V z!tiOeZK!|Dqzt>F3Tp?K)II5~RTXIWw19uLgdV!$l4!Yqyr zbEh*qB|4tPR+tOpfw?&b%vDyH+3~>ChDFEo9xKc-@xZ)12Fz+J%#S(K5@Q_gbiNz| z<`yf=qw&DZ9v;2z+pREv7Z1#)7%-o+!rT)NOk+euJeB^tFY+0%kCVdeZ-tSOK{9Kn z^YX~`mCAX$!`3$ZIhrg8lJ!j@>(4Fdt<5Uvl{<88);-~!&PQU@d&FArUFmV~O?zaR zigtyO&$G3g^TW*p^HoefO&uAruN3Vrb$Ehh9(Oro^Zr#aU@o=7bjAa7Yh;*;E>}jT z%gb@nrMlQ5y6R5nyVMn=2MjUSWd3<~r*p-q=rmhpZQt?O>+_w?Z%2ly+ILUn_FZV= zcP9P*TIBlT(qr#m5=oEU>6|#Te~qH_*!vx)>#`{%y(13Zek2CWhpjMI#{=`V7%;1?Ft3RR=JYYq+x}H6OjkTGx5j|^m=)%|<&i~+O33iB-^4qmz{EqdF(X@&V8@xW}03{%OGDCePh zFD`zYt0@bOu@T##>icd$ZiP7;^f^;52cHd!HP_z}qrQ!i)>g83UO%p2l?xibdL!&aD!DO#Vfw&CNUakOFfgy?iRWrcY(9++Q`0rO)k%-_WWGc`RTo{HACq{!#? zOj@7G*nB8*eTvpuNuFR`@>}t=;pG@Gr&?h?77xtqb0X43;hAfNxh5W%n5 zb&%cIJR`Z!r1hEb_1|LDH!E^|@19A0lLpojYCW`+_0W}J*F&3~#>D7!Ews{g`kCyN zPQ5Ce&26*up%``AtTwk1(dzWiX`7utive@HwH>D-*zwdFHHdzd%652V_;z%}s?)mv z`sdjD7@M6FCq?g%=dA76GbE<$+w5Er8K&a%A4lf9QD-v$hSF{7)#2%OePn#pSl(yt z|F1=$Tl0n+diHZaDhHptIlA9Rg`D}|{(My5mm4PI(`18osP931JTAh&S%RxIhk(7H zd|MHIW-?uQIRuG2C4L@pf=a)P_yXbtl`e4?ae_*}l=x!e1eGrFBH{#GSY2wikbQ1b2W2 z+B|6B)+}m-ttR6IA-e z5-%iP=uek;E$IZ6K8yIH#0jdr#6Rz9O(v-Hi)h0(-J=ukf7Mqn{n#;+HdFzVAbD2K zq|A3HLy&mqg~YwY2`XLU&yt^@(q|CgMVz40CEiM$pwcfO{&V64l`iqU#0e^WI`Ll; zC#ZCZ?C=e+nm9qFOZ*UVf=cHd_Dp<_=mcA!UtGDgX-PqAGC}g}%^^=gqDLoO zZ1Qi8Hal}0-v}26Xi=HXH+_glC(JPEb3U+EUYeIoQ1zcr{hudJQ1wqGUPhdt(j|T; zae_*pLcD@FL8VLlKH>zGejf2P#0e^0;@iNTpwcH3A9h(@GC`$F+(UkXO3#u$1!n@j zjnvq5Wr4F!8G_{5>!eH~ae~APB<>(iQ0bY(M-eBebmE!#Y|#mC{oc&^cNB4gN|$&Vae_)8PkaJ# zf=ZY8B;o{>K92Yl;slj0@f_j=mCjo$xlZB)l`iqU;73sDywO7Y=!*dV63=wPuLREW zDVnsULRn=84?i4#=1#BTsE zf=VAnyn=LsN|*S3#0e^$w^?%65htj0iB}OPsPqxUze1d#(k1>Vae_+6=Vh*kI6cgE&E@bIv|@8*ze4C!UF)Af53208J{Bp-C-y2$Dy~H(c(1 z;sl9TB@zEMae_*h_#xs1m5vXi+%?dIpwcCNjC6uZ=R9TZ2gC^~UE;mO2`Zhll(~Jx z2`XLURlp;t^dZE*LY$z|CGK3x{IJwtK9Tqo;slj0@f_j=RX&0E1;hy|UE&uJC#ZBg zG$}|zjwN}1g`ZX>zd2K%sWZk1_=fV2f4=2>_Ugdd1ssCJ3ncz5ae_+MfV<6( z46&m>@fODh&GWCE4b?cWV#ZJRN2}vE7Jt&>lgQyxe&5ldEq3A>xo38bt9P5+WuM4- zU3@{PbH9PJFwCQQDi7t&GzEjRt>$?e3yh{I78q{uiwFb1x?y2FZ-r444F>*2Lt(Jz zu-Vx*$%03FG#L0T4TYiT@xB$0u4piv5n!ZDwcsIlP9oA{N(2})tuV5q!Qehj*tV>& zwxuXqThb!H=&-_Q4!{up6ThdzfACss4go*AOC2Ne)ketSeR<-!a)!(Fn;h!vEcJ31 zXVOy-%;9am96jv-`mOT)=jU&$f3B^o@Igvw)`tmzL!3`wV{RolBWP)er|pKJfqJy+kvmpjjPM% zPji>eSK|-AvpI?W_dg2o0VwYi?c?o}-)8c@&4<3(uM*FM9-+R(;%VnLWRSF_4Ow$0 zZRw=#vD(tier;%5oXDu)wutY%Rru}uI&ERiIj1Jw)#@e48bjn#O*KAX2@)5%v~|1R z2dv;+g@4{~?GD#(p|lvY!sTtYjX5w|^?9o5^G@Ke=j{UUSmC$l;LjSFDYlX?aW8Ev zLDI!Pc_^*J)fo;qc(fND?S&^_a%94z1mV#f{5bLrpSs#HlTX9>W2bx`v~6JNcxM>J0|Y14K5JCjyP zS~q7)?|5`kkL0a-wocJi##FBE%9{ocsJWWA3x%f61nd=w+--q3%`x>W@Nu2+&8hun zL0&enH_qVx3$O}-?dF^GVL9Ifz8kq!g73t=yir|KsC%Z7CU5)saj9u2AAX6#W974t zH;4m zx$Pe(r}L=1vl#leqUrxo#*wL4;Kz3M9^V4*o(+?Fsp@ppUg%kb}b|Bvw3}E z@Uh2v0HYw;V-(<1JsE$+}xIz56zM;aCu$OEt|2sl3)DKdgmc8@DnNTr1u(kia&NK8}PFToPXZ) zaeec9f1c0X$$7!=+?j9NNjn&`hXtSWLSStJUlZ1@KR$%bWlYPTR()JA^_hM|#h=!E z@O1M2NpmLczg1h)F;pM${h-?7$0uCMx_WoQ*|Etv90gePA)6rZne}`v-GjzX71bu!z!gN6PCt* z6PCAmWAT!WEvLaUhG=ud=dPa*t@C|G6@K7leA>p;!%`}X%h5}g>dCpQ89N(kUy*Js-ALLJzKf0` zW81y4wm7@JS(VSB{X&l_^zRz-8EN`*@m*?Ym7|S0H8+|x5Z)9&_T1Ov4GpcO%GQ?G zB>o?NH_5ZzR$07{{@i2JD%yuMrZWzYIT9Pwo0IS#>KX4z23|=Ye*AeeRdkko+WURx zKY6=g>k#2xN6VM!M;CGiob#5gNgrLFs4u^KFaG&+&|z|JTr2PKxyZi^+N{4puc$$; zzWjl2MHdN&c~mwpGaLDykjSK?e2q)!M4}EW5h4A){;L`J5~ulv#knuXvw$2pZ10$<*Ma3EW=60}1l;L%2W4DjVGkR$kE72IS_ zdud8-@k_t{!9CM}b-6xfmb_`?f~q66ljjohn}< z`TDeD?XrHPT#>WA!FU7xqTs46+4wOQ-?w6`T>`)8#AWOX+z#LfTp5c3cN_H>%q!A< ze;K1?0cBR=4_nF@j9n?W55lLv^5ia3`2NI>Ni?)fRztF z`sW4zx>EG`;5B6?_y@=HQ?z69c7x$ShVyQtS}QF$SHYBfoDTX&^1MhJEO|=(dG;|L zknxAx-{!t&g7$1^-9o?M`ht0~GdWP-&-}0osZU_-0@hJ^$C3FnX=Y7Brk+%%>q!TK z;e@{T5O!_MoDz2Z)Ft)#*Tyb%QQm(Dzb+0sgN4i+SX+vNH8L;;m)(d zD1uHAVT8e>Gl_Na{5fZZhk}u5g<;|$w0=LZ4vbfCaDBphLm~67%*iK9t$IV!U8m~} zMd%Bn^NSAHb-w1GpM&%TbP;t2T6B}nyZt(YJBrTW4(J=Ej$oEI^@PLV6sjlKz>78M zp!I~WPYkao#Kr58_-w%$@L8ns62$L=8ZRNdo*j+X!{89g>t~8XI6gOV2wSqvWc8{SUpdBM!(j z3k)@f`tu2GIM)%re2}hZDX->Ge|eKO{W#n_a1O1Lw-Qvjf`Mi7dRl!8 z!ubhVr;ENdXCR*mZy+sv8wc!N{cj>Jl6Kl?CvAK%!Q02Zl_2}dfW6D+`nmRP z<3cUlIo!~uo-2KrV7S(UrYoPwR;;F}LOBf}#teACud!VkiK)bTFrhCq`KH-6Ab%p8; z(*9_D6^?ez*y1A5EmW_Oe5_ylX%_q*5q>WH3{Ra2*^w|kEKrbn6fi?%$G7Y zU$<-Ugnvv++Cv}t>cfpew~UL0YX2Gkty%r>q--cmY@HqxuB6=_4@~&Q#4#+)BrD9~ zcwo*RXm?nc!>uqE#sl+?7%;^Tet@RZ_Us5SL*@2atxtmV440p0&cx<%&>5uXln(H8 zco;$Rs2aoOJlZt5U$zL0BHGUQ3lAe`9yDQuj;)w@1kJ@37(sf5NsC||g`d za8tMk$y;}bu41-LjpLBDS5z8@>8EfsjtV18KPf!kvi5aA2IhTYbXLA%;?TRp)A?ZB zYs2ts+?~#Z^DMkv6OEU{z<|yNL+y7_VT3=Uv(uSs#lsy9kMMo9(>c=$!|baw;5+;r zTKl^wfX64)-#h)jf&Lr%I}(jT+BYZ~h4ht4qp)<2)ZZa|=Z@CjVPJ&gI}^qy)>rY$ zz(};d=@a0`O;#9#qV>74`RW9q4qWXEe(7*TmT3?6Fi2kE+l!6TgfF!2bUFMJ#HL#J-nIfG>L zp197A%>{eQgNfYx(y=YsTRsT9v#Qpd!~Ex$Pnfo#V0#klO>dUV%lVB1@(%gY_w?t= zu6cs@#~sg%Tjx41Yj5_sdCz%luEg6oYsYzq!#(7CfP53|yv<4a-mD3{JwB!-3%-yu zJm+Ia>R+>_4cL-iWgYrg+qmvZ_Q2h;ejUY|)055h?pXhOt{?kmH&2ywfCg{&E_ys? z-S}tn^|9SuydT_da~#k)(;(-C?xBumV03c!Lcy9aP*1qGzoD6beZJw7J(tzaS(Ui# z|78LD{|#3Azve#O{vUcS>(B|Gvd^n_2JN$oy;b&DYk(c^xvYN+fo1Nw3M_NK^?q;+ z-4FQd?Pqr`%as;>Kj75uKZTEV_}FSY6dsmy24grAnyrsf{P`w)w_5RM(lZ&l-o2eO zHC=owx~B7ZJ-kqE8+V`_p1GfQonNx0)s^T*H~q6h&Xfu-^^$iD<#X0ecxGDf`d2vL z(!IKBV?+0Q+H>D;={TORCwE_eQB6Y+XI3)oMz_J46l3W4I_=oHSH^<>Xn6hioC!4G zd+)=>GdU}@Lncl+>t1>$r*H8qZw_7e+?*UkjR$#mWy^0>XM%Q)fqi))!$seS=dAC zAI|r#uRgb@jQ{U(HYQ&m*S(i>O^-8v+9ciW=PqT^bbaqYYw>O2_uT1-m*tsR`g_ALDVmqDZ@rHC@e zMTUuOe)bdC#MBr)%|4&jvXpy1DlNzs-!CJB_77AZzg=&qvpI5oVk@i)v6ofE(}sgF zV20SsZi)w{!CR}*Xg$_ygS#v?%+1clF<_=!Va|#N=2J0Xo@<3UDIS>ZF<@p{VH)wk zd_M-vsaBZpA=6{<66cq)qxVs+73N>!f$5F`bA}b>{&-++jRA9(73TBtz3WY6ghZbJTPlwz$~=FER6@|%Q0YHYK6Hh0?gp^ z^yWU|S=fU&pXIrr&Cau5M{NECY!Ex+wwH!k%dm#PZ(ZzjhH=q341Csun?vof<{G6& zbow}K=h1Ks3nPrJM&WUGcIo)(9A>?z>Ytcvu}f!vvg_ir=Vxv$o9AT)vmRA#Jo`P4 zi2N9~ufxz<@k@lYb?~}4E}o9WOZN`YY7jPv&CbmO{HV9o}ZN+4(ywjGAaL!k;PJ?0nY>qd6LkFn5s^J?5Wp;otUXFv8qLRxl=7 zVRS`<;T$+mr0C_hc5r1O5mpQG!orB`Ns|LyshW8ZTR9ayaMWa(B-urp=T35=`*a@NDjth!^-=---h+i6;>P8Chi)B-YqOVsq#QQF6kF~-3M*pxq=Y9HV z%f6Akzu09@Q~O3u+?`C5eIxSiCf|g_%HlNAf6Ts-ny2Rs?A?g1w$nbg<^4|w-rWm# z7CC57X%}|fJrVbm()#z5YEIizO6xY*Q)=$tQ%Y@NFKa7vc<+Cn;hxeN?O{NJ7DcO| zJ&b?YpKE`)BjO%L5%+uh_b@2mO8L|FF#e%ESN?_5_b_T?4Ro(6bkANxySPvhpE25^gaPvb}AePIrJ8vo5YtdfDs)^Qf|+EXGM2Ii#f!OUxI zRv03q2CKuKmAuA2ZrU|m(azU~v0o`#zGan@QDtqUxzUg}&IaZrp=ZSNwPEnUrh!d9 zyq!*D&}nwGFnA~!ao9!vNBA{JkCHw*lRG%7k1h}MhMc)x3Yv?cRoFF`g5kEpI2(Tb zAEDvdj``QD?H3;HAFuJ!P@NYWkQ@J>qv5*)G#C^ghUoHxqG8aQMD)Y6qG8mtKk?~r zl_C0@_qE!?wcpr%e1GkqkJr9-)S}1D3ew|vk4NcoyUG6)`ClXdQ{>o; zd@MH1oz5S`*hj}#Snz)8ng}pM?QfCv{S#r%EVVFJHZoRTVytXbW6Q7iuM)p?S8h7o z^@|@KFM8`2$J?*8>i_p~hN*>fKXQht2p>VF{@)6`r-Ane;5`kz&~-rh`A;4$U-iA? z)9&AQy!`xu^}u}m-l#K8tR)P_tG3>FrZd^TReTPOU|q1tTo(XmSKw@u2`_kEFm{`iPg85WW7OZyIW0Ncq~U+3?6Di>dmo!Hf6v3Z zx2(nG^|olT%j)w}0oo3|{t(Cf|RH=VR}()fmduHMq>7Z;RG<=a%P zx2bvq>xu+#>Dab~WwY=Fcdpj!oyGUDl?&H@oakMjOF8OY$r|u+o6Gw|;G9&3Hm19Y zbB|h+e&|=9!p{!zZTN%x66d}motvsnwx^oq-}^Z7nW%5k-tW!ot3M0f_n@<|N^XVtC~-&ATf^v_3dUL5AQg-VHpt+;g0rGiSDbW;)JEgNemcvXE%W}F z==Z9PE-MU~_anlH)L#!+VVM0DOsCW5t55KrewaNK)wZ+K?*o2I2J}jy?-}Z*0Y5MU zFoeGSFk;NHp)&ofcrD&LIAPX-ik9ED_Hk4`iNvpev%(O1M&z}iKCLnL$dOQ(X-!L$A`H9M(uV36hS4GtcBFPT*PU_%oS!#@_w^9dSM3rHY=DME^v_s}x zcR+>(%a8vSIUR{kA!ngX`KDy3vZwgl&sxf2lQL!Oz`PkJo&G1~bQFIsb9h(kgArP* z5%6iD+I5g|QG!39TQ$#VGMhUpVk;Ftuab69&OncF^(wlkv+sU-sJOWve0KnzlnsU{ z_KVJoSq}!_N}7_dVPJkR!qs};5UrIr#V%?>ucvQX_t=c`=23*c6bZ)NX8zcqmtViT@L<`(N47`a2T;yR0yZqQMBSckFcTw!(17=x@`He`bPfM^?w)1-6c= z1zLi(BSX`>E3uK6?$bO~NsGKQKhsxVS^bLzRn@;-u(J9q3redWSy0M5994wx46$?8 zpb)=ww!G3To2P1Hk@q~wpWC*u=J3)U_u-_T(%j00rAH)fnRgOtrG<9jUg1rb|J(V$ z@k;MF`On*;RY_NQQ|13E{%>6F9VP#HbFwPwYVUCV*DNh)buZOhOA8bEc8xdtkgt9u zHoHjptqa006sLYz+kr*6hrFe>A)YG2UHo^i8se!Xm`yF=R{Fu8lTqV$O2sXDiyw>qn!cO7kW7dkw(gvzfL@2LJ}+rr6*_ARg< zs$Q@%Y0Qq&C!s5Ez*g~wY&CDl{yTVx@9kJ+YFY|gYj3<{P1%i!Yuqh1{ulJt+(O_D zm?Wp>nFil1)u;88-Kh88%XjxJdhhe3)vzXTXqCK?aT6yg9k!csX`}6fOC{Cdxe-bbr=R_h*y2KYQup<-j`tyky{&121P?vgICa`Jt~I zUN-LM$CurE==g!e0o(KLz_sN}FzW*4zvHb4B4lQ>* zSH0k?*R*8ME?e@kDr~SFoC_+&27Cc!#>qWe^1gbD@k}fEe{Y*mC+SjdMW9@s-Cs`f z8Q8l|+Kjpfie27D+xYHG?he0dOFp!fV3x~n_0>z7y#H}8HsSlP)x2Nk?resZ>bD7N z$%l?zEH>d(WfQ(v*@RQW-la_#Vox>8zaPaWd@gjzx}dMVnR_+Hp9^~p-uEyxt@l@n z_SC)y_uxlS^Mt0gCCa(fz0KX^F-kR$K2r9I40L2I_3wOhLho=otP_2`*XOHW$(uOB zV<-4ux?S`9j5Y=H+V6oOcX)j->W7X_*6%!ZyNx^AcJ62=;A;Rs0}l@I_HmzHe1VP| z&Yl0s==}Un124I=eILB!PI9%n6SdjmFJRm?AV z0?#xkrTUy<=K&z}|KG@4|6~FuVtzO_hmvbKzwccPo z!z`B#PiU2G3-n6no63C)jESz^Z1QGN&Iyc6;0X>AJ`WzD@16KwypVHLMvLMl-WE1= zS8p?K&e%5L|KZyO-h?!lcPj5Z7~d}R4&|5k8d@FHkvqlZee-*o_x%m@0f9FXTCbR^ zZT0^j(=zrZ?lnu>3iAAK(~Qx2-@E#f-mdR2ZDoBmJMHL$ufWf#!k?leIN-tTHSQke zZ=jlY73iBX_JFdhv_qLyB|XwNO`P+}^x@l`{O{900O0mWf43)l-rYu zhVLU_ygwQG6R!PSfZopOp01%&a}t?)&GHhps#zi%weAEsI&Bo);`Dx z^g$DK2Fn|vNjxwM!@x`mz^n^_`EBUSdj2$@wEnqL!;i7iqPdcJ%fr_4+t2Y1EHw13 z*S%wz$4@{vzh4sBr%Y{`!a0KXp>N}3=rGI&A`?D9UhJ`r?cN6eu20umJCl*Az<6ju zUwt^6!Uxosfgcm$?d{bf&-^|e(0mS4CIgld6a})N`{wG=$B#mfw6l!XoGdEr}tzVjPEhp?Dwei zDEQ}mzMywbqN_Jg#^ea2j7Et*CNg&O`0cWT13ob{-d3H<|Gmh{ z4qzOi9e-vXX+Q4DYsYuXACaZcXqv+P!E#scr$6>R?@lhrE8E~&wQ(5Kku)%Xcg7a@-sO*^GYYX z*8Js0U%hLnHb?fG+Y7Z(<3~NTrNchHWd?jH`h;OGcuv}I;#<16BUxjf8`rW87)Q_z zeo7wS>bdi~8r$-Y(qH0x*!k>J4ISi{_fWn=Jtz5IL;JesX}vXHa`kk*H>Br8qbtwn z)4e|6bv5qItI>Prot(!1d1Axj_q|JdYVOg%q>JEq}-hq|R+DR-1| zwUj$bx!ORvY+#yYze8Es>nNM-$`Ks1^_q0iPdmWvpFy;EhI}0#`{uaMcjaX3M)J`{ z{~K*3lzH>QOT3GpZD{yBZ*&~78@bX(*|Qh_(qq!~tzDCKPbc4m55Jg2JY#QOyVf&L za5TV0+G!m3%~kcDahFA%}wWfk9>*Yz%O+Sg9}5#iSjH}=$WmoZqjI_cNG z2d{uXFNaT;!LLi<+a=zQ?keIvR=b)%U!lCnfgjt$%%R9xzMFD(XO4bBBsr_ng5~UX z;6;+NHR8J`crFZ*vmf1{jGtDYz(_cPa&f*(g$=O>k^2^y! z+NgOajGX-%<-*8B@bTAa%GtgHr_0%2QdZ`=OQ|ysIeQw+t}rln24G$k0&^+!jaSaz z2fuz&ayAR!GG{7hC%2usoc-vopTMi(eQo&`r5v*Gklin1k-JL9iY`1}be`M%a&{ha zws*Ku_hZifzB=Snq6?-LAwvzPYfT1rmLB|_pQPV>wh7&pyq$FlSz4gRXew`Z`Q>cU zd#*e;a`tn`s6QfS-(}2+oV=5HN964@yonPmZym^6_g#zfvXQs$+m*~aChs&k>_$?T z{hXFsU7J@%xg6Fq*@DM4dPTy7#cL%^`Tx2EU7P<)WIyJ0<_7dak;4z&;Hz)qTd<50 zecO~p6W5jg=%dN#C%j=O`2Bb$W5X_Q1?ZkPh>I+G9hscK`rvC5kUzlgBwhG*sgw9$ z+wy8e9+e=EdVOiwY&Im9=24{`?@v=DC-xnmD6cjzN@m`jj7i;+ExkUfR; zX92S3-<3YIi}J$1k$g~zek1D}^~PdCW$|&{zs8aEjU&~lYoU+IQ?#As`h}ipIhr?T znAR)ni5k{EMA5f@Szcz#EJV|J*dzDfFG_#&2C8 zUS{uj>2#TWA9#vxeB#10li68k$V(-Wc*!~Q{~Rx64w9Fi)5G%;-v@XJdO59#)(@X`$tcqu&$FC76-;iX&8ke9wjxigVVFKZFxQo%+4=XmK+XfqhO zlo5fK_&&f(&?_P@krvEL$%Evj&tDgiMSDm2Z5OQ7OgXjBOf&5oq{$k7!~WCd)Up6? zy*%U0w@%R3!SGg%7G6&AeSo*1S47?-Ett3N1>Ruf)Nig0 z@K)8uYTaV;)$|B_^>+K|e6H@tgKlkP-h%# zmeXMVGz`oQ0hkFPFegD<;r~#dqv32rMb~|DYnBh@o%x#O#Lv!r%_4UDkM7dZbHm&1 z#SYXyZeWelZ~sBo+{fLPT}ep;b|2O!V)t>44YK>7)5?0}??HB-|3v3)2-tmYWIgdI z>^?=6I&GKZ_@|`!DlhLUk0N3$$ho>=FtLQ|3{@h3Yyyo|e zZ}5H)@7-T#ceU1hp`cgC)+M}k1e?%gaQGW`qbdAuz`o}Kx67{euW6P9*(IYb*J_btqC5|{71OPqSd zHa78tvn|+j#GOzioV7Zem+r33OpSwxXnh zHnC3+_cf&cZt9n| zW%muVt&Ox$oPyi-C(4A|$NA_k)-Nxb?>FiAC$jEw6+YzJBKEUO()6v(w`zw>yIIpD zzuio1cVZiJ^V_`PmY!x^!!NFXzsQ8!fep>~u}#^~x<2;X&^{!t@-tS(-uQ97tTiPr zcKtmY&h2@hI=cuW!(YzstCu>&9(Z(Gf!F!$)&_$&cnf)pS9p5SwIA0znhLzUx!$d@ zx4A^#lh(C7gS|o4ZXRQ#*e=0Q>rwW*KT)^XZ&`QBd)Vah`w4HN{>)Q8%eT>FZ1^uT`ufT2Y^UZw|`kskikpT{3BT?&1*3#^uY|dFZ+tXxgTL|vGT2j^Vc*?#YK}+V67}2D$9I?F2j>Jjs@k^<=&xM?{gwH6wyD3$ zc>~d3ZxsFYGFNLecJrcJIU9nV_NYC*CFk?($MQcHTX_jK`%3m>(HGa`uS7ROSIS@R z*RhOY1$pg#zBz(}{LfgURTQANb^*fxMi=w00~q-M82P>aw=O%WKgZ5@;%(@z`H6Z( zCBN(ex3W*rI+uMu(GxZ1Yq1G)_Wkhrm-^~!zUXS@tV4M=`q_XUDfafUYR}Kq?^4HD zZS8lRW3{zsV^=qA?XP@)OazJ~qu z_ioLzY5jI|bdyYA$=qMYygrxr6`JXrJ=mHJ_uL*g|K+^Mk|EmcoJ8$V?lISvl7-r_ zRnS-7Cy+f=hd#Eh4!d>+Z#GoVT5?Lh--_P56kTW8+YfKK1DLh8lsZjMsY~HZ)uV-) zNBf(0*FWvaEmgJacT`s8@7PwGzhhIC=Gj5`%_!}dock&PUI+CUz)ymwG9J--a=)VW z$Tm*;=SR)K!J!p+*iSUJ(NQPV~ymzB+5>PW70=^3wz z{oQaRF3)(hf1TA(29G$5rsEF1?Ko?#jjXj!?M2VlfGPPyW!>Kx7fqTrSL(Fr5m~I? zE|UGBOV)O!OX=j&Q zqF0p4I^|KlVjI7rr^?#tb!eZQ0=;yj&gd+8pzgM>71PJ%F4`>ci})27$-Ch?;L3R|qea>xILoiXg}j1`16-s`zQa|~M!QQGiPDE#d1C& z+Wypgyys({@9d?GdF3A8zH0`DjzV{$jR)-0vXCA9av3Ae13 zby1G&@gaxu@6#$=#P@UF1X?gw#Zq?OIf#-8rt*Ck_Xu*J|LQq zD?PL$9h}c02<|D20r@ZM0698)d?*q#d$$OiFh3F1_Kw-3RauP`0LQ z_hQ|#>_PGc=gsWp%qe-t9eJeqNq(jOj>5a>x-Ino;iM&;n|WjHD@%0!kjTt8`QBkq zYB6;ESS5Jn!o!ze;L2IA>*bHs7Od5g9~v@Z8sEzuuGSRnFz|1A8N4VwD=_8PpdLr% zrRojrO!`ye^BA^`;H=3y#|Do;k?t4{p_Z<-4)5 z@c1t%6GzZVsm)>6uK#OAN{S2FGsn^5XsX4EaAxwS+vvy&10JcNO*P0UQ@2?OeH|iq0^`$Q ze7dR6p#9W4y9+$1r#A=w=>eawL#IC_aAqFb)&^+1#CPFbr|=iDV*HwWd$PeV2mB@y z(iyiVep%>vOY9TF;%5NY;9RuGlO*V!3%xgroeNqEPdY>JbbzPyQx1480nZ-#!>o5g zpx&@D^Q$!{Vng)UtO{B-F$pmBC$e2cU>l*UJCx5!n_vaOf^ZZ-I^Y_y%z z{XX>+yv^H7v{!h%$DY#CInLE9eEx@C+g9Pdw}2`AT#ZaJ`TQfv$N321-Ix3GO>7Zd z{?MOqY|C328hT@j;_-cEz6|<=zO0RqZ+A$(?~zaRU*S)ggSK@t=LGS1CUQ>c$im~C zPfH}fjL(cT4wN{m}I&I)%*DE8$TkH@KUicnRL)ExO_**X+8-!CCIv zeJfu%ir();Hei42Ekpmw{PUu|j?0h@-(aq$T{4!B*i&;m$R~8z2;Nhmql{aryO4R$ zAfCZJpd<8I$0Ei6Z8!C@FEi#l=)*4PXnwngZ%vG?F8aXyb|>GO?Kj9dx1GUuih^`G z=XO2v_B!P5waDLVki#oDYjQPb#f$GmJ}Vs=d=7VU4#q)Sj-I3SPN>p6N&3Ujl@;1N zN2lrD#vxk8)~`0E8%}H~LphU)9Q!nF5E;uH0<$<=H(9DE=5wJgkjvb8M&I~zcncgxA`0}l*PUP%4 z-HwZ%X`rv4&jP-g?|SMn)AtEZq*dCDxwtXzZznyOcD%`0PY35OvmTN&lo{HDZWpxpJv8;rWuAQ3x1#=C-|TCFU;fUzEwX;m zCM8@s9lp`|U;XI%9oi$?uXH~0tt&UdLrb07v6YOy%txB8)OJ66C1c~~N!rwNg>R;> zDc`+RPf30g9LuGB_T+BKYv31Z8f(i<=v?J`%Chx2T7{djF`YJcBPaIQQo2R|xP$(= zpzMy_o67Ily@Pyr0AtSuTE*+kP3y5azepZ)+~TvUytJ<1R5sr_;d41Jimtd->AQh; zcA^jdigvva)b;|t?c>{3w7;k1XN)18v1CJcv3vVo)Og!6;49?30ws5=ktJ^WR@RW? zCOGP9_+QKaqe<|sjd4-U86NcO_TjqHF`6zbxv68g*7pRm$j800Z>PJ~${J+PRINgd z8|MB_`s7GJXIR@~+geNB9OkrkV9WiQqof_RJ94X-i~Rg-v^0Y62b=*{c5FE(Mg76+ zxSo<Jzsor%l)cw@3>0;ioL|f{h{9jQ*0&XxxZ}A7CuS6FAzjW z?wVP0)2?)*PR<#=YSTR@2?paXgF6Eqw4sMFV2mi~aU?V6R%h&P$G37jW!o7e?L)La z;~ZCO5jHUYJ-Bf#&Udt}{{(-tKH;gc0iFW>zK zQf&zA!J+p=virSfAKG%lKBVQ8J&`+)3Fyf7mLBdi$ofTSl%Z+8GG`c>TAmLd6(Tda ztF)tIMCHORd`8%i$+DN%$$vXC`1cby*EO_lVdW6gnfGf(a~5aAz6F)l)eA&7sKIaC z(+?XtPZ#Q*#;M2@=2f{Hmf7U1udmj7pGvZ~KCw~nt*lP*JpP!@ygR1l-Vxf?hQg%Q zC#NR%HWnwf)@;x5)XYd!>zOuWQ01(|z9+X&Q+-#NMg0E6zAj(YmYTJRy*1T@jf5?P z2Euki3*k9}ZHA}jCw@3CC*}Svv9HlKGp{*osK@*8uk!L)lePTtCT{_If|Xe_J=yFJ zuHrxKEdGO+yTz+!dYY@r`Wfp>E#ydzik!xR6j z(eV3J7_-2f9)M*9UAt(GDt}b-!!!t>H$G2h2UlU!=HZhkp zV!v$DKYMD^s7D({q5m$`M9AKWbe<3$0=&WJcjS??7L^}|*LPA&b3AEHIUcb)md?1Ocjc<3twxGoXE3ii zSZk{Nf&^`O88Vl>k~P?FDl%DLi;nVX`d;j?ZpxH>VX3nJtU`Y*1I}ET-vyTFZmW`7 z-5U#fjbUQD##Tz3Gp1mJK~4#rex4>m5mZ&cB zl$GBlG_cqxdPmF?y-?_8+E_);c;y513~)Y3TIikCXf|nO&&yoo zO=4f=a@|oUw)RK+E_-Fi@7Ha)v+T~EO;cQ3ez4|_-Ho)ZfOWtfqe*AYvZHg&E3AtS zt!6%y^~{_0M;ox4*A?l;0kJn_=$Xe}M3=3kFD{`^-IQbfddMyFDQ$~nLv2{bQ-p?5b*;_L% zs^vUcb@6OxJLh3iTXbl&tg)|N#zp&7zdpCBNL#TVeQpl@t;*Bi?dWsOw$wWGpx$Qu z!Rgc~dcIEEvfIE1eI=iJ)}lvba@RVOyVluUb~e@}TwSX*6+4EdT%2F2t;t?D^6G3u z&#OV!WvA$Q*_Ad=zAK?8-<8;t-^OoKVo#>)s|zw+|FR(8W%OhhX-Xfc8d7qT^Kz{( zyUp&&tW4aId2rK$?7RJGrJc9jlzFe#m)~K}%dbqx%Wq1^TM0fw&tD|@*9WpMxcXAr z7ff2_7`FI<)mg5*WX@`pVc%MscJ9?VYbK4xwx8?JwKeOrw7f5qSJoBE?#1{J`}qTf zDd>x?-U;x0{xtS4Q?(WMlBWjSjKjg66TDD80h%KVYO=I7%)O~nW-E87x03HZ-W=VP zlp*Vo)RQ&`dkAA%zHZ_t_BHD1!1gj>TY;A`d90jq_i5Jpr{qq*&|0f0J_tWoqGyWk zb|Yye_H()mkzqylwmc0zhCSMOHAz~XvC!43>7z%tVOJ`$<7cfeFh)5$wwE!wWT-1o zbd3BQ_D@->=g-0B%CGpDC?ZYrSJCF8Wd8r$Z}(d=!o^)9%8)k`Jf&<0<68JYa9l|n zi|nk;@n4Z6`&7E4TWl86PFXLe(N@WmlZl+|@y#}nn_?r#Wo%7b=h`xTo$K+pk(JU` z`Tno$vs_q&t)Ng7nS-r?^OYxjD?VG+HUFI{+H&6&dIk4_R*dxdMn1dxuK6RAM%8Jy zj8|t+R`#dwLjJttEo{BJ(A7G^uFW2m^xO1{_Ki4aEbvDF&!?w!w}9Vgb))W`MDDH% zEoj$;i(Ren!1EU_)>?n_;m~{$eM*3z(AUE^w3hDC&GiAGPcq z4mhdhuHQpV%&;QDc&JOl$&kph2_EwdUv>Utu55 zQ7(2?(VwfBzp>S|yhOhnTG|2h_G1YX3Vx!G@bg}mZEW&b?vOo+ym5n*;z#Vv>o3(> zg)cMMpSuG&pky?@qcbMZuOsmT^H@P|)y4vE-_`#2gS14g*TGz!>(oXkIkh#bq1{q& z7Q1CB_UsaDfqTWjHtG1jUY`G`R*_4ampJz~n2SAxk_7gc`DSSFUX@-UHV|xmy+!C- zxzM!|ef9}pt`z@fAI$B_d)@rXIe})IvCIvv^1&q>o-d&zZ)7a9N7cO)+0MTCp>pWV*c??(KMUM$`^JXV zQx|!2^`ttHz1!faW#=GwxR)m1mm(vVF?WY&#byo=ffLxhmwwD%vjd zx}9-#TjA1HbFQ7fj&n22A*Djs%eB@r^2uFmH#F+k1L$jbcWouS@lD#hfB0hd<}zmO zw`ptSZr`?9H4Wk5rvY2~OS?jAtsTNUOyKq=Yo;RlVp`ei`C=bj4{cT=3q%$nSGJ(X z;iJUS-8`2y9`xI0GY$mf&HAj^_fQbdSAl03T&v)DgK^9}yF4HKzXk0abNv4kathW~ zA?rkrmA$#Nb@je!p7OPqpsQ;4tkyR+l$~0tblYj@wo8^zt2=;xn}UA3X-d&j_-FYY z=(krcpI-NC^xHk~Q3qpQWY->a$Ws3Q79QKA4^Q5N-uUR-54~~)ddENQBU&VHC;csD zrM>@Tf8qY0pc|m;K5*m$&tKtv!(P)cjyX?spBm;1(ci?jqI5v!nq1mb5@=5@?UDcI z(H_wmy0Guis!m zl(EU%wsSp=@5876>6;_t*Uk9ly!$vglOuI7*F9gH>8sC}iJV{#Q@jJMWe-B;s8ZJE zvUe%GC^iW8s1Kj_UDoH=iWDyF@yhx3Qr7FC`O?W}@}rylUE~#ZKlZH9!R-of;Lv>*eNNtwzgmIwe zySlgbO0D(2>Vn=<@XG>+_pyJ79jeo=u@9BjvM87L!tyzXouG}%aA*~)Xdi1h&QEBm zA$S9|ZFy*Idfj468?ve{p5&a$h#_&<8y zf$p1`>3ybVtu~rDt~F(wa%D{J=LST>6oGS!blR7_*Tf z`L4X?Q@%Nzlf&PLj!iE7J$UJRlRu>I#=;-%@J9{0NybY58cg<~bthwrIV^b22{+H| zg+?;p?!-11ZLY}(nrDtd&vu!Q(J%K-LRXc$)4G42Eg)XDy}+A<{4nP_nMdE2`EAA` z?;Uzl-5zu%v)(P#EBPhuay_}uTpwK!SRaWj`{A1DyODLCa^|>9>b?WtO?A}Wfu0;( z*A=p6;++bAeUCOs{!P^P13l%yCdxOmZhD>eif#Dhahs~w2&|h-+{kw*6bH3NL+4t_ zy_`GXy{8x(f1v%T_6Y|zF*Z(OtIM$a+cW|ExNG8(;A1~W#^&4HsoBF=kukXkU8It| z^AFTpiYeXukimOA{`(_4B;VV5g_Cd6C%$BS-ArZ~%J%-xsY;-#!XRWMpcxQIg0l!j!Bm21NKiC_bDIEm zTM*PnREXGSz#$}{mZH(%H37OcRSKelnzn5M-c}?|z-_hdZVAEet|X|;Rw1GO-*tvm zB?Qs?f1Vf5i~5|}zq8Mt_gZVOz4qGdnQfm(|IelWky)D4>HjSHe>k$g^#4No-$fsq z{r_?oEr``%9@k0OO$b>9;ZKMl|wjc%EAsr3S#F1daT zUL<=IC$s+#r$;fgZ6)-`2ER(OtMgTOjYhN1WF5@BsiMu}Qm#NwbTs3Gt~p+fYvvqL zhaFj^iL*~KUhC-_n-bS0GiEbBe2hIiGMv^~e}=oOOkVG0>cZOfBPVmBn5yBC%hT1{Z`RPh zB73GIduAZ3aG6KjLW?V z%7$e|T!nm#jdbc67jM$*ypqiorP=M7^0jXm>j z(kSll49r7bUVGNEf;;-Hy}M5BUtAQ8&15h2%H?jTd7SN9c4M-8Id;X5%_$zg);^%R zUQPOZu0H^Aqu~B*j`Hts~{%ZY!{!ZV4 zWorMbhmiA%&@E-r24@b*xql{fbEcdnTR$9`Bift8cf;8WLZ`99IYb$!y?Y|`ZV7U> zjd!%u7#C%kLw||9A+rAz=AfN*-=F9g?YsKWcWe0WD1Geg<-2rbDETgYPglxe>Y3M;kS`@iuulVW>+6f zLWi-Acgg*Y?-D}qX7lbP-j($|u@BmMY0F&tGq*qbP}Wkpv~w=w5xqsTjeXeMK}sM! zrPL$##~J+4B}b*P7LK70WlWoM!Rh1N1Udxmd$C_VJJl_EhTOHPCyluW4R3^(4zlxY z__(l>hFwiEb^F(dZa=)AWv;Jbj<`sJwO>h6iaI~JgFB%m?H*%7YxnV?cCgY8zn1pH zhERK#7J9dYcenDcxAE3yjMMOP$e4PN{;nsjE}a5p6J42>cCMbb*>C2zlPyY&`Cry~ zKP97w^FvKOAAp_juc1##IxqYJ9vB>;?606bg6Qsw$omleAab-_b(T~`R2Cq!UeW!^fO5<2Z63D#%hD#Vw=(UnyC7*+ZJnFMF6yZ^RDCT&r36FSKnvi5}bmLl&)S?$%u>a(??M=eFJtrz?-x;WN&2uYeQ zR@&eW)()%*ik;x(F~1%C>3Z@jRF(P5_;1E-<$E)(=!~PKjz&7Egw1!GvDN#6Jhx%r zD|%YatyjxgZYgt)&|}Ih=buF$m$Td{JmuZ2!L+3{rrDIG-(x?E_2Uj~^kW>Paf4Cs zRQxhZy`_$lhSXWckApl4(Q~Y(9>uiDRO%(X$ca2J>$-N@PJBv9d)i5}j_(xcT%FNY zi)bseoECIO{m7$^cKw*|t4XhpHf$lEI@)kESU>WsBR^@wR@e# z9bjc1dwUhg{;auHXE+-x*gptgQ*K-stSMtX+0ci$=*k-`&XN!L*;VC2(cOwXXz#CF zGY7sQvQOTX>FB&eO(W$adCJ=xGDx^dh_HAXje98HB&0bYQCjK{@X*E zd;W}%^$6v)WaLBH2WTIaL%OjYqtAc3b_V*GbTz73^hRUZ_t-TUdj#b4U1u2BiYdHHS%Px;P; zot>udohtlS|1O2^%B;#oM_3b{Nn5uKmpgJ}I~pCe@IkaEPb44i6!SXG- zat+@a!ux?MCjO5Q-Upn0yD|EF=j3(j)M+BLr9B2yOA>@4&- z(|f(J&!-%5-hZFN*kYg8))u_zSB&4)=w!??iXDWT^4&}MB!A&&z1a;5Ei2^wrj2nT zy4hIlNIcX-)^6kaTbkSWp8*XT&D>+3t9UbIm2-EpZ#NwJCq6v$_=&8t_x@u4=taeg zmJ@dq^lB!&t@@%X(ydE$0F&EJ7H)oKqZA7Uj9U0xhkGZAgJ%=@! zo%|&qXvHl5CCksG?~1U?EQbetfc;Mq z{IG$xUYb$rNu#W3lr^8SiarkB(kyMXG_}+tg^UP zL(eirQLj?kZ;$Mw(SEl6QPr{@k#jSJ@X8c$2kkkPa2@N8Y{q6OGXEZ)QSeM{4Uc>> zlkhyssmVs@WRV-Dd+GnVlt1GS*&f;xdf2ad4rO=z;X$pPWj!ows(gOXu_&7w6_rap z(paC#TFm48S^BmC`e%+ab1yFoea3F)l$|nVoeoWv|Ky#4KBIZ>@`C(z zuiVWT%S%7s`S#1`H;U13z#H;tlZ-j+jY*%#c$&g`MaEh>YfwAom%bC+98)6Wn0aes zT>6MU={CmcvzfHPFB#(=`>DtHPnH2)Ww$cW9>2H@tVK^X&w@+j+_~=0KRuZ})Qi+> z3RTPeO4^%oQIOABtK#{|?q%qN?qFYfnZ18?89E_3D=q6!zSnfL(qGa?igTztjr~#| zXP0EJAiF|YfgOP6cev*_j1A<|?&+BKo*-j*lPQmVD*Mvx)ol?vHvpTAKJ^D2O&=bZ zGUCHpZ2rbMQgQ`22; z8`p^4#w_xoTsvgFD$l4f==}s|oQTd=){IMkBEEz8vhO^Lc%u7HrYtGcQ`QF2qu3XY z;H>!<`dM?i$3@1Xv~#y|5vQJYrnIA+FOqNSSu;sk&MzOk>mB%q3Lmk+PdFd5ID+%N zsyk?n+HRg>aCP(FGt{q?Wd~-mcVVGU0oDj&8z*soG9? z(ydAW9}Aj|rBYr&xyvVr4Qx6#mQ_l9K?XLKN##n(6xMdwBW5ne#!_gSz1-qX%8u|S zWk>pxHuGE+=~uEJnV@7pIw2|B=C@BFUCzhMeCJoUs8=eT_RUteQXaWh*}iOoeZKzA z`OL3wQSMbblXhFN$%?@CBZ52G6xvWpZ{b{s_#ktM+yu{Q!?qDU<29@g8`H5(!*(Vh z?^>{5mNFNKyv3LkJE5&&@8|0M-XK$UPoAXPV5BV3Z7{xWx2%vhThBP_%?9J3amFQ% zd90jCUrv>KB#>i8##52URaM(Z+Bsg;?7@*0*wc&irlxk+KL~wODQCD1f~*zIbz)B& zgz$BujrnS>6T@u~8cwi5i1B`L9KSTucEZ2aXx4>K)!LC21XCW)_E;E~mCzosxq2uk zuW|bVJ-*mOMMri&LucAA==(m{*o@Y799x-}yJ@dp=VYZ}$DOYG){(g<_B2b81!03gi=LQf3nRF%`NY{$?_;w{1Ws zN>fi?l&226AVXD3WRJT`hl~EiY0J4^)?R#D()Nw64{c(f#YX#^`$}0O6w~G~(NTc@ zQ*}77XVd&Xhz>>}Kkvd))xo8d8q(}{zeSeOg{<>~v^!#ixZy>g)P z?2is?{dVsGdz#aO?6uAFmt{NJu3UG(1HN>?HwTcDwmIOh9Um_*K#tnR81kY^ku~84 zm*uyaOVP1ZIh>hO(B-6;#0*Y5+m=yjQA%vgLu_%eISR?YtZCP$thc*w?69mz#x}Q< zb4gNMXznahSSM+O3?`S z9^#8WMQp)VJrBP{uik*|MG|^gkpUa}@SkxUG33hoMFxzf{4Ja(PEz7KoQ$un(2j;B zx17M^%F&6)xbIv%g?i*tpDU0XE=Ok5{WbYWBaZVXTS-sG)iZr`I|wO@=t-K!ah}Vf zG(O!2--47~^G)(wZC#PCs&P@wmx87X)fE?pm1h;Y5AFntpF!O==8Qm=i1kjRd__M5 z$Gn-U>3lv*Ic=P_zOO(w#|Ez8eCk|Z^~Pz^Rl{U#B%61EV&Y;9^9_d)L4Ho>~DYJyrBu~tbgo0xoF z)5Me~KAIqBM7|hsm4EY-FHC5%yfERSGON2r=EHqL=i|L{SDnmfndjo$A;}uw(Ly+3 zrn23>f7kdq+>fKMev6QDQr;$L-NO6eZG?9{sCag9e^lkjt30le13Z=Yvln*164-QH z@rduMBlOc3k4^JG-D>1n@yqi3Pg_`?6-rNROJwG~seRnf@N+#Fia)Qn_zx04wNK`~6~yP~`Z^T9sJHlE6TgD^t;FZ& zy7Yst@=Wh7{-u30>stF{KAqFoy|CRJGkx8z?|xZZP9*KFx$&J6$Ryk~PDlQVwmjWa zd4RS&5NgY(SCi)vbIg$E(O;Hla(JFcmB6s@JYT$(F>{8e@;G@O59L{D$wd~lWj>v1 zb1&?ZtBh0e@ywi`Y%-td>CFI@^YO)*+|pHyULL~M~2mh zJ9(es=X&A8uCa!*h7-sB3--*q9DC-ytBKFgbvzV*ZEx|96MwZmvut(K#G=*AG4A(u zy|Aq-k9xi-d$DO^WkGgme!buz4@YL@>by+XYDcEdCr{Lc{?vK8;Cpf5>+LX^V2IyV zpXm0zW2|dbWvzYRO9l3=eHxVw=0oYk z3O>@nKQeIQ4-|Z?fq$vv(hhdPFE;RfA>1nXr3P-x)XFLOc3uviqXbfg^!y+#&c?2L6$b&m#W61TQesPd-OWe+&37!LKvYpP;9| z8vJj9-(=v^bvy@rr{K35c#)1v`X38E%Sb;Wl>P^T|Hg>_TRpy%bF<)cjdK1YRL*w< zzf(b9Lp$ufP%CEz>2DPLZX^AzW4g-!s^Iq->8}Wt$0PXt2L5{;mwK%c{6Pc%Q>Z-u zC3uBVp7n`c>DLJUh!MX1%J%I&kErW3;v{0{z|=nQ;A-G48pD*~IjrgC1;*S=5ixL07_4v|mBLsinz`qIM zNrG=P@Q4Xoy;4bkh~S?X_*pu>nslNC-(ldF>A0ldU+~Wj{5Bo0zy_?Z;9nVd!bB~7 zX&+VauMK>Rj#q&HI2pXvz;i?RVZr}x;J4_w%8-!H*m5zv6Tam-06Yj(h+eUZ3)1 zDBr&bZZYsFGkaS5HUA=X}90HSid{U$zjx zSnwPJKQDy;TJXyae5#I1zWB9;j~IB7j!V8b3VxM=FAAk|t>6U)zCy=mQJyOWzs|t_ zTgRoGR|tNSf$!Dv3h->fZ`JV6Qj$e~ALEtsUn2M{1D_DWFB1GW27ZH%OZw*uKG(p@ zLh;iDztg~<*Kw)WaKZ03@C`8|$A|Dp!5=a3t93kwe1n&PR~h&NI^Ie;0l^)0#{22q^ ztK(9h{|H`d;1L$B{F3i(!Cx@&^FsLF1z&C8w-`9||F43-Xy6Sx-by;33hp&c&=4%H}Llaf8D@;uj7)=yMn)I;NOJsw*>#Afu9+nr6ciQ6a3Exew%?)-bP&)a7 zTP#8+KTA0lia%NKJ_erFS8r$1$r9XQ;5UTuOu+{jc%_a@z84BU*uXdFxCQ)d!Q%}4 zOC4{eA4dtEXyE=3o+9`e2A*Kk$|K`ssNksvewL0)`mutKH1Ml+d<*dh3Z7x$^Fp{? z@N*2jQpaUnSp~nqz@H7p@0i>1;PZ5R7Wf*$?>6w~b-a~y{!8%t4E#?zF6~ew`27a{rI8MF%q{qX zMte5s?J4oA1+OsTr$uSy6#QYqA2IODbX@9NDtMKF&kNxtf9D|FmK`~`wPY2a_` zxRn16!JjeOXW1|3wFKF>%8e4^lQ8hE{qZy}v=g8$LL{}f8+e8K;0 z;4Pu}qXpk$;KxGoM+pADfhYIxYM&&*w;A|lIxgiOBKRj7zCPs^9hY{97JP?+|27oA zzu=!6c%_cF62GtDUm19Vj!VC*f`4t`pM~Q8m;>Hw;QK=OVZr}x;HM4H$}j1EFL=9w zpBKX01pm&!uhwy?*B-$S7`RKvw}Ags@FNEPkd9Y?erTp6kKW^YnIzEf| z&4N3ux}5x3D4o9uZZYt0Lii@Z`xv-i$E7@tf)6n8n1Nb-SA)MH_+SG+OUEVtTEXKC zJXgn4nWrxao@n4t==dzkze?~k47@3n&Pu^k4g4QE-b(sU3qI1okLkFS=W)R^3_Rwv zu5qr?(GRV#lg_>F>JW#IqN@f`4L1uw7$I^OH!UWA_u z*Nc~9M?KWDkiEnEW=i-9!oxiGmMiXiR!KOQ@adihHY@JMyCpn@@EM-_Rn_gf&KW34 z!N0vXA1uk^%2wUJB}%|Gljoy6f8z|aTUG3`)WFtS{@y=)-_YG7|)z? zi`%tI{>P%jzsBPm&9~?B?PiPHNBwNO)|D4_;Ed4 zYKEWE!;8&ujUHZThF9w01!lNT58q{m*XZG5GyIYszTFJJtcPct;aBx=p&5Qd58rHt z-_pa^6L!s-6c8JLqFX0vK0IR-XW+9|*@unsZU_c9mfmU&xNe%H-(kdlwb&jPmZbaT zJiFMtmCN~~S=NB>J((x_1!Q;Bg8VG)%1h}(-bDY*d%bM(W zi?;Q7w}`Z-u#eG3zo%y_?zydr?x+a-M{DQk`?aTF3xS{ND%pRnC!Une6IM1GcqwNC z#OJ^~`_|g(wYHV)qX!%%qy4fU(qfcJ%CeI0itcWj*m4)XUm0)shjK@e=N$I7+V-H2 zpNW5gbE7D1kYz z=X*M2{tZ+EF`HF)F?&P9cB}3?*#GH>Rs-KMFNdlYcQJcDMeH#qrdZrVvMug}nPBY0 zv=3JU9i!Ahe7VK#SY>gyV~a4CeVIGfP4#pzH`|$;j?Lf$!P%!7u-oGP4uAA_X7DZZ z^E>9}+;g%6cV0mF62e)8#}dAn@Fc=l5WbXf4&i*l(+FQqIFInPgs&%j72!W$?rFc7 za3SG8Ug7zEwi;-^o&SH%^?X;%|GNaA;`x4o8i-S^?)HU*p=IAKRs#c4tnTluN}#=z zvS#xxo-wmTrQNL15+$(rQT~_nADgystEunf&>QcSp0=k*w?_V7NJtdO=13ivx z{?C;Er+FOZ{9h&i2N9qDyXF62;`bqbAO1%ZpZ_!Ee+=>Yze@hcdhDBp#`bZyt%>t! zevhH6_vXkNJVa9q#imoj*BbNR3>O;n-wfZZhp#om*X!YFX1G`n=bPc%_3#yDc(xv% zWQG^$VSMfD`QN37FEPUx>fv!_c$^--&EUKGyigB+V1^g#VSE7U^)J=Ko6PVMJ^YRtepC;?WrnNu z@Ed0MaXtL18GcF+ziftU^zch&c%>d*V}|SWaGe=mqlZ_T;g|GqjTwGf4?kswU)95p zo8dR~aJ3nJOAkM4hTqY{O9;E}Gx+J1h1)&M)?kA$|kyIfqAdy z9+cBPuGA>@yz%)vE6T0%Z0S32t?Pc?KPcx4p}EjqR|Vlm2uJDsPi!jZJVrR0aJ(8Q z+QxU#VOMKEJ+0d8KpQ+fxqRt_qSmH~$>miO=6pi@;lv+DdY|+C*M#9?ZSXNyE8%|= zo<#Tx!tI2=Bb-kdKIS?=_z2-^313h6DB=x{CKB2|q=+hHwV& z&mjySYlDxuE+9OX@EXD|5x$u4rG#H53_lYat~u~e-y4MABAmnf%X$9};Z20|2wz3` zJ;EOlE+G62@tX;MO87d$H}QTa;V%f^%KyLfoW*k&&)@LeBVoSz56`)TJ9ys7^Lw6m z^E}A&KAu1Dyr1WfJS%txd19O8vs$o2Bixr~70)Q1kMSJH^GTl3JfGni&$E{2P@XUF zOyaqk=Ww1c@*Ks}%X2i(bv)1I`8v-FdA`YW9M4qx?Gm1UB%H-_3(rYB-{*M+&uu*O zd49rk8qXa(ujTnU&+B=9#q(yKU-K;F*~)V^&wum0oo74GVxHgeyo=`no(p&$;kl6K zQJ#x=I+)|7JS{wz@a)6$QJw>MR`VRp^KqVWJfGs3%Cm;&NS-TsX7H@zc|Yy9hUYni zU*dTI&zE_Q<@qYli+R4m^HQE~@yy}*4$sSZZsM88^F5wd@%(^i0ncWOwiYwjVTYMZ zvMyVN-M+{R!ppA4Zh8gwn{}%PYIkM2swM>vZ^GtX)(Ve=Xxm?Qh0(T0plubTkGyR^ z*;=8>+Ur{34bstQTSXXcBd$i<68pYeMB3eGTk5b=(zX}E%JxfW+Y@}(u;fPn6!h@j z?tEfBB6dUA0dbDxME_OC?s{K4OR|#hd$U_T%-t1oZX=C#jJ>mHr(^!<*XE^gmk+kt zqt`yXBkJ~d4@71E^FYTt2M^@^E^4LNY^T4Qw=JFj>F-Y2HjZZ;>!Yz&Wy6Q~5aqmR zyVwi+E%vIp(3$Bai~O8<-LQiHtfx0rvF6X`4$wmESZ^D2!%-ExYizvny`lTwROusG z=Y{(QG|#E_XKf{Si3BZi9jjSi%^}S(q(27#QR0iE&GC8d_f43ill1eeYO#^`DhVlP*p>7bWE~q@zO=TX4>sx4bq3#v zj|F_ywbv6CTf9fV=H4Ibbsu?dqD?#+E@up4y&O_qkxYEfF;ADen)Q_Q71EbHXH&-} z%k3{HHzW^G#-DeNjk}t}e_;b-$`+w)w+(9gin|G$rMzkQHBaI^vT|N!Ew;iP8!O6q z#&s;=jF`0jZImS(e-9gMDHrEZ7!$VkZ1PG_NrI4Sg5;?yMv<`hvfVUqIPQDR=l81-Um$+A_ob zKaA;^l`_ZkDW8M%Gw|O%4txRl@Xe6N z`)$mheCkzdETxW3%p*x}c}cOqe`j!GCHfTeyEe+7cRaZ9bMn3`va_bA zFA)UGsGx1Jl%)=3qHK z)5-Ib&YYc}K^q5s_q!+0<<7dtECZ{jsROH>*^0XyADpe+v61|c-2IldR>4Ligs#vb9kg8NmOQ^F{MlsZ+xsRfZwD#ID&l)7hs|DvFXSlAk3a_Z zm>jYWp#J?jROqTJPR3vN`@Moz%l2b;S-9KK%2|>K&Xh!QwxkbdWVoOGJ=HDq)AkSi zfiTC?@xzlYZ>U=SNdFDVTWjr(Qnlm~lJr&C@ zmoqYj_(5vp%v2iVV~RDlT4-||wE0MXWy99vzuQ^qR5mur{QsxUXXaAh-1H&Ma`yz6 zwrxD5a*mg@?6j!^pP^Z*vOjlYWv#6W=*Dg2?fP2r zF!_<);9cWi6Pn<*Oh@XAL=@UR=Vy#w=<7ca0mL-*^0Zr(!Y8w<45j4 zFMGIjZT-XD?>?8YQ)rBVD(WM5o(mt9F=AKZs*6$;&%%+aXX6jS4Z{1TQs#xUNh5Rq z=qP2woK)R6ef^o7|5hpr@_65ZUU4^hi+uDv>tOCy-Y-6)E~GzXo#-1qq#1gRZg@!Z z^8@0lpBZ4Q{*m^Gq5pdAFYX}p(?>D1%@_1ng*C2Ozn3b`yP5n};wS46`vh|5-@T72 ze#tKxKMPWxk-QgPU1}ZHkwTre($+ru<`8Lz*=G}6}p+}8}e#JFs(Jxz|L2^#0je2D~?DRY2 zE_v?6ly>zkae9v7!(I54{~s3ar%8ZL#(THZrqq9@!|2O7l*2|oAL0+~Bj*3NmY>&` z<9@{Eo-d?TU*>a|`p>y#S z%pCJ`sy9= zCwI!elKUNd=_~YY(pQV)m#jC`c8ntJIgeB=DD`|1#R+a53UH#~;lLfTY(z%?Df zA2aQj&6tt)Or;$wtwWo8Ys7*{=uG0oVWzR_Y~j!0X^cx8+&=AHP->DiWZ z;*^c}x^idImhl>0iuc~AjY(eA;k4;d#)!+bF30*jmNGmvj?v<|7Z z8GPk$oTs^!F)8!L#BPjIHV91=|9-voO>g|>Da!J5_)QUX?0?H|miFz=Z}$BC|2=-Q zo-&*K=BMbA_|o{q(phu#_TK$tm~v=BGW?3UH4}dY_ul06Y=uXspNu{kazc|Ow%G#D z5xO*jeoL^1$qB8BXT0>gwL7o>khUKZHm4ra+I~p2U6Fez;yUDBapCv*%&7$A(R}7q zDRXHS<6Pu(IkT0)oSFu|-wRI>tPZ{+{C+O;DIXp&@7o8TT+WYM&2?HhbFYyC10d10#7FLu3OZc;pRQbYaH@s(wTtnsB^wpQr; zLin3YTQm0FFMa8kp6fI5b#{m~8oGDxR8_oo1%JH1Q?HFW&Dn^oe0M+WduFQ367%mp z<67>t9&}cO8pYTtw$KJI_i;A1TQf2;6h{>2?(+-#XBW?*{^@pQkjJ7liryxRdtb`v zCl7JOe}<`F9>@d*2aqYJ}2ybSeus} zGQKYedmq*Ziurz4*!!?PDLZIbc^aK7eRA}m{?+HecfRK?<9+a#Pw~C|X{5z} z8vdk(uLxZeU8?9KM8}fO9jNWZog1n8MaQ(6I9ZCNgE=_ePXDwsNBa?%J^TG@)hB9S zhF*-eIU8SwUPMvOVrUF@K#eI**7ST=1ic=uh`;-LUaw;gh+eKA@%l$NxwBHa`xWZZ zADXsSU7C0&bmCL!#5(B2r|Lk*@@vkmeodnjgF2d6x17ct9}bKjw2Y)kdhHZKZJdWySonv(`rb>z^WA|ofMLpoZJId9;5Sp%DM z*4r7ZIYOW0q0bOK%q{3NDuc-UsmjJ@kW)mTkN8v{MhIf(GK^( z3m4tWeHw|fPVMeP^yuw5tW)tzhb}7hZdk9PSB?Q*id+svH@X5Zy9*G`ZT6S|k@$1hJTPYeFNdzPmH zzu{7z*C>xGnDdmhQ!M?_fKDup_1s(NB1Av(AJX&F_6_WPNZZCLHu@zVdKu&Wig)+_ zTgF_1H@v-P(nr5%9Gh*P#+r1iu_hHAh^+my7?<_X6j__f8ov}@C?dD{EWNKy&Awg4 z{hp%hn1zg04?T#p${JOd-==9}C)WE7dH2+tEgh)KZ5M~h)9Wd}jI}uW&rcdINz+)P ze%ZA~jbn`(+qFi$Qd^_Outtq#jjF#p*&1~#X?Xa(0xwwpzgnZ}GIb4WRFP#Kq`a~w z&7%xgA=7`vI@y%zMQ-d(re8t6XQM~^L})8AeLXUL6Egi;5EVJAfeM|7z39{;i1C;d(W~I4H@ug!^n|8<=M6}^kxiWC_?&j) z&yc%pl_^^$DoWYJ`uoTD;s4_EbrAbg$f&w}F^fC+R5_2Ooy(8b&RM4PlBevg_LgT0 z`AMF`$+O|x!l2* zYJ*jj!FJBFg4}an(fXhP|As%O51Pnd8;9EYBz^3c_LBdp#(p{Zca8lke|qe%G4o6w zq0KjG_gTHVg2dAz`eA7Pn)+}Eky1N~F;ZZ>&0J<+E5Crg2zq!gvOTfbbXg~Qjcr?Ej{+vmjp z41NP!^d~fBhtN@E>_9^hU#IvP&ccUZIXuud2iXqWdhyYpH4}e@@Kbw?teutDoczz= zn_TVzupy&H@m(qRDV}L}9+f@LT*HrZ+6bp7o&Dzw@n3ADoBnI;CP>$cj8GU*w0o_4 z%UAAt&Wg3nFQP12_?+8pl*x58RL(NwY3v0u3nSI}8H4bDKPV3$g>|)3C(~!|IcF=# zQuxAKHD2xmM?a+dw{08Vc*jr4!oej2@uOp5??yqkuz6WGKVcg*3E7yoT9V+m-G(p6 zNbH_Q;ZMgI#hDO`clX)&fShe{4?q_cEOmIko2CS?Woh)H3$QUJmcj$tv1iZd@2HNT z{^`WaVqe$X7cM64V)|ho?Nvvcb8qNPb+qd%v2}@+_^K|ZPe#*rVmHzdp^OpRm`(5- z(M2RJSNsjE1ve!sjbaxQEVX%p`Gk9U?SrCl-*d!pX?aog|@=1H4lU+)PPDvkJ$-@)y(0VxA^b#8P(Zdu1ld3;Oo zt(#3aX!YPT@X4fP>t9{p-*n62q38>+WtVo*?{`7pRigX%AA;>h3u_QrKTHb-YZTgO z9Q!E zoAGoZHqOD)0iFQoZi0*1M=f>+p0g_((uQ;Q6600I*V2L6nOTglWY#!IyDjr&pDpQ1 zd|!%8N1rBLqs(uop9IULpX~TImvn}pdk;@13*W)7FmGb1pY|RbqVV@gGw_|s+|km! z(MS_4Jk1hY_ixNJ4~|v>jISM=f){xZ>@l|BPM)gR*9nYw0BR=GpmOS~$j z)qHK_KgdRvch;2@InoIGExu0=ZN&6(x zKIO<3+IQ@2gxAYXeN(ElJuURRi}ba+LleT|BvD_P=aP=lnU+hir5(x{7;LtEoL$)Z zVoqjSqH>Y+W$tM9Ian{6b!H6h*i8_xGpjFZt%3f!)6`(;PyX`_mtZ{3w5c%W`psUG1Ntm6`e8W>v;1*D2Ej zkw!Z)4uofaJ55QK_|jf&GUmY37<18Tbd=e~1C2Hg^ca8G@XVh|+y2P7kh^ccorX_l z`fvvIOFCVhKa;hu&}3%;{YW30>HYXW_yCr+B+j?-b%{4w`)7y`P?fPNeX8+2WemE5 z{@!ni`z_<-Ldxi4T`WALVVbhRVbFgWM}_o>le!c#U&N*tPE8j_+rPxY^$0?*;RPvCt3u!AYU9j+U+o)ssH)gtGS0(9+ z9bbucY3+~bg?jc+`oX?q-(>8|oRIOCWQ@DTdxIOLPa76oH$M7wCEBxOhF{t}^R>#_ z=g9AV?kx1}nv@yKyEe`zY~tM|yjwg*Y{%lfW6?eMIv#i(`?!u-GtdLyr0jQHB>E5N zX)wwYyqUf1Bx~k6#(&Q_5)2OT1Q#fQgQIEl1)q;k%k-1idzmht+Wh%&S zn+AOspWC)_oxWGvZWt%?-$pyy7*9fviR@G0M}>`Z)SN4JlYIzVgbi^@!8gU~?XQ@hcLDXgoMh~e%A?b|Jx z^!5C<(Yc-u_*%+q)wQ*vh!dUc^fW+&k+uA9K~sHqEB@G9@RfT^w=)gr zy>-MtI7taKSZDa#`eIwE!XNK;`qSns{=^w z|2W~vraT~RNIn}yM$q^_@p``BYVu3kSoAJ3kL4S4T%fP(8W%Rkg~&uM`DU`SaSr24 z=78{Vt)I9%>L@m?W?M@;w~fBs(?Q!_Li^oF+ikAOY0TvtA7dL|%LSZaC(rUY+uSUtNUHv{gFGbj?zYn)OQSN{V^CEqs2i+|BUzY?ki|guFynjAMS+m?b>Yg zMW|jfE~IX(x1X@B()}>IGL*4ec(WefdX_TQ$GIpU^VYXe=``z=LYmpscMoF;0SPPl;p0M{+%L z@EyVu_fj)1cj5leImc@PB3DA|(p6h?)*ob#jd4T%+FE>Vv1 z^?&cO}E|T#re6o$U(Bkl|oQwbK4#u_8G|^^rYBHk8i(NF9F|F1AW@q4X(vrLQ z4$=1E^-23c@AFG}FLm&}AJlln#e{`lHoPCa=(bMeTj{et%0_d{mJU|t&!+!19!J@n z+!w4wT7wK=vxii6i__;+b&-7n(J}ri?S`(QD*a&)v1T$ctNrx3Qj0 zIt*Pwe$i;fZk<--4p7EQ9Ff(#=t7x3*D@dOtZ9Mn`k)JPhwN;jNt8`V$?=S)3?efM z{}Wo1#Jyuq%1}hUoFyFBbN+YZY0x_1r@~7a8!IHP*{=I&f6{$YXkX=%wB3c=0|Vwh z7K{(jKY_P`x{QykS@u?Nx+zcIPMo)CSCJ9r{*s>Ua}c`VgJ0OxVHvKYeY@J}cFGG4 zn=kJ_<~xlp){k)ilk|%I8XSWj_K5|nN4W2uCH9@M`rI5p*xl+tuSs6-KGa#0{QP>u zk2}5?;H9gl>3Wh6Nkeq0AM1Jhm7PY~TS&ViEbV8b-4)#5Bx%1$+Ro>? zr@dvWp7!fT+HdlF*i5@+g!@&(W&4e^TSL$R^36P9~3B#m-paXBxrl)T;iPI~{(q<-5EvKM&> zn+~zNm-~Ih9x{!47DM`!q5gyk&PFBKS>i~C)-#sF`HiegO#YAnZ}5+TH(U#En60`m zQ-U=u!?8ghuDbt4Sg%VQydlS&$Np-tkrpg#uvU6A1vqs>Bq8k zJzf`Y_}ghA-mn?ou(^DKX1Dhm`}^#Ns2p93oHTA<|;6j^3xvvPmwy&FB2pSih)bFJC z6@~MPEXpQjuRInUyPI+IBYF2cLxY}fy)ETZO+REqSEt~|X-;jhrs&RW&R9mdP1KM%p<1Jj%~Wb9ygn4l~mHuO4Z>x!n>IoYa?n^LX~n6WBK&!oGPT`{qO0 zHy_3wSAE?_tbG~Neb@_*^adE~M-BTW85{3>k?r|Yw6n2anx=#-wjJD&ZN&w)Mds7R0ONNb#>Ct#?VMVaasA zjqy-#{p$o}NyUUL(n01RjqCh2+WA@1Xjt$+<7MvFTd(z>Mp`b)r|kt+IT~9iN0F?1 zD8F6WGigia)`QTlOPqm7!eUReW{JAJg*wO@UDj2z$yfHOrJnVS1*uc5Q75U72Q1mn zy$?p6+O#?)_&rO8_z(3}0^xN#z;`Fqjqe|kx>;}Fn;W(I$(U2@+)qUveXLKCI$0O1 zl-aD~!7ubWF1gPC1>erinap`??rP+2+?ck-JN?jNk*kszvw8G$!V<;3qC#w8WZwb( zAiCi~WMk;pbQ^IB`=c|V?g{*N@?Mj7d5RAw@u?!RaC?+fJNstlg&Z~}fxHfJK1uYK zGKMPQ#dhT9v=WzJ`v#s#-usiGfm76k>a=oYdouT2QkRZ0^lZtLqk(UPcgnbNFmB$q z-sJa1vreOY^wsnh(myWkO`Tjhk%2{zac^ypK1D{@xSTYOESSpt9o98}6Pc?$=kHLk zAHK-*ylshZwgvj=I^kMvatCZLpckXzlF5fCJT4WFn_;HlKGq9 zT_AP%$@v@4{N=8i|D*FazQ_EHJz@Tq(3UcPu?y*S{_gtp#Q9suSTF20e}{C<-%RQ$ zYeDAk6SkQXy5{f4q$BfJ)^##}lNsBkq*H3l-!#Vdb3Nv7GHLbfYjge%k@gImzp;dS zo4*UlSITow*!=Yxbqb%qW}U+4Z-DtbCO)RCZkFEa#$0_y>SoMesh>H2rHBW&URV#QaSojyZob`QLs14*ALXJLG4~U-CBQ?~tE0e;a$5 zzXPPNjQLyg6Z7|n>$!JH=ri+R7433pHfP~k?};8j?4FWT&U-==upf+({XDT@Y9Smu zleq903lUO<~1B@fv@ zK>xX8M&H=#oBIx}eru$%Nc0-I&%hzxcZhetoN;4q7P#K&G26mC$Cb$Y=YQxsu8i|P zBJau=5cz4tCzch%Xct?N;;y8;Z_5~9+{n9eKOC!(cW;25Y>!rK4`q*bBCGD201s@(>W=UyXF*Z@@%qp`>bp2 zW=-_lwoVQxkxoy=*OLQFS|nhs#L@SVp+7_M-*9*d z{^Mxph6R5c@3Z^I?MB9FS);l)_vx%zKz}WUrW9JEJ6g~=Rw6$ra~AvS&sR3AB46SE z_94i3#7~Cz`}Uyg;ch$ME>oYL=ShC;X>DDVjSjTIir$g9SMHZ`-#9)^RmMI^-j&<* zI-Bn&8}D6%(5X<~7V5N{@BW56jJ>$_{6fkzoL_0}&Z zw2UsB*kHZU??Vr_X~B^3o2&`r#g4fiy8a+>w@6Gr;qDia%AX!l$hzx{?)xA5-R zaMma2`C11;cfgOqJG$EJJ;nXwcgl)yhEK+xE}}V-^EN%l5k8by4<|78b(*mUn(80U z+I|#k+EGeiBmLa4B;J1vx$bFvBh+*5pkX!bXz0FHk*Clc^ooI*8cmM%-uWzg1k$^h zcZbb%s6)?hpX}w^LgU-fylY|IqVe%_mB1(ov-Xhjs$tYK6->r|19rkPo_uY6$4Wm) zTQ{`@yXj-|=$BIDE!j&I`wkcKMCnka@p<~re+A=(@fEGjo4%fdvVO#t`NzJj-3LIs zu;JMFW~{rB@p}_G;_J{;em?g+_7TKqis*obbtu^D{vdMjfXnew(0hc zrO>x!s=c|6HB$xW;yZ3_$}FYtvtO&JZHEpQv%itYUO}Pa&~%!q*d{=Kk*Dp|4^T(Z zY3{B-{$#Am+$`Bk|Iju*{A2{vj@Jl1?$Vpw!Cl*=OHmv8lT)(!qQKc)}kPt=D(r z5;R+G(Vf+S3++y2e)-Qlu9bHyx)HfsTlzIv+Sg;FZ$&TA+ZZru-b0a{H70#2=f6hJ zL>CM_GwHy?)Zq=r+$QG48;t29=7TG2KKzsSGno(Q;*I$b?e*5ke2Dhm$GZmP9&|t5 z{0z;6$968B0e_edpSTu&Q2^h#207ts?(&S(c+FwfJ%<>(qKlL|+{5_YWQo?~oKLBz z*i|K=Pb}jc)9MP*e-71bd_)&{OI5JOa}r%-LzgZxwnNe|b&+kff0mswf$mb~3$!|r zHAL>(zH$6k^z+iMxkp6re}msiz2sc3Irlg#)2%NXcsKjdv6`ERcQwCL`@5n9 zm3>T+QR-ROx{4X&$cjg4A9*M1(YnuqHO^q)adX*+D?DGF-@=+R?e@um^y10LL)`IB zI7-DXPjwVGVdr7P&xGJJ8GDjfshnY8&J83VNp~nb$HblR8li=vOKm0Y9^@DH9}5QJ zGvOfrZ|9p28AoX;&JwY|kbTfH(&aAFVzB`fy9kk6!ry0853&1`cXqy$J!NFL`Ox3# zmEh-)CjK3YBlx}u87(|c3UgHAjF2{o)$I%=9jTY5rzSl!Ej#JXA+DW$6p54dFt$mY zmzho5mbi2|)2x@&=_~p{_O-G|SMn~S?qdI8BTcE7lrN1mU8I>NdoxS1l>w8!5*bA1 z$Zq&gEBc)^q<0zi${T2>B6zXHk$TF{GE-y#xl=dVo6CC{A5HiIi3v+%HQ#^6JKAns z&vgF@j;>)$326qPJ8AU0*yKbEnSB3d$}DzO>;Y^P+Xb-^(B=4(`%4g6FkNihs86|k z&w4`A)%G>1gIS*~m!f~|>^n}<&+IhzbV`)@>68^8P>FWsmSX z^n6Nr>woM!F70;o>OUGboAhrow)Ga;!yI3Gpn-d(&SB$Ce5wk42%@h#a=jHA6M+oM zIVE@rZMH%5sy3Nx^jQkys%RE;a<<}Ly|T0B9%!4at2md455Narm-D<9>{vG4s_eHA z_i&+-DZ2DHCARu$+F#DbE7T?2R~q?h^h@@pg6KSgQA$AOiRd`vwEazNbv|OB(31W7 z;WhYhhOS9^`nZbqZsFZ7i;{V$(3vS~LwwR@df5AHN5&KRDF@l9ig1TDCZmwBP2;1S zsiJ(crj|Hu)JI{R(QWagIl~S$u@OF&V=) z&cbCgABA_uQMW_N__2fe-^!eQnzG40n#A2eTCtY##K#mx8B-fEa_}G zo;9|G^6YO$MxstLBusg~B^<{c+b3&BJ9*0e)yM9NfJa2aD^9$}`UmvphoO&0ppWLg z^4}n92ZI(*5V>g;cXtbKzn;8O#9jlNGYMZ!IKbFB$~fA^Su~ZgEPI}fjKi<^nco*$ zhiK<;e&n0+e4k()F+to}S~u z4eiJT63%^XCF@RmwZa;`30s@W{N!reME0`O!GnuBgBOXsr7D#LeTImXmgB-yeQ(VZX3HUaYYm3&MI3u;!i(5VL46p9J9-<#>CkWC2O6Dcu7&d$ zlMfBx`z)cW!iPv#;s|dkhvo|%7d|AsS@?x~lM7D~dbXPW%Hcof%{EB>xk8_2DD#`3 zcj&n1m+>s?fs9SFV@7!zSwsu znek|5ExW1Px^yuFa^cDMAO*+>CjXJiC@d@wft=)vyD%8#7hh?;Z z@Q)-_Esn4`k4igG)&jwLiz`0hl2psjircQmwVn{yC+h{uA$_6cN1GQ>4$WRdjVNw` zo}DcJpDL?aC#j#*OUn7bO{-@c{ZzeA%-7nLFZ_6ev?Da(8syjcYiM8lMf(4U^z8e0 zGj@eH(f%DR{7>x>zg5Pn_TJKA*2V0f@N{I}^o(cYY_g;~6WJ=+>{AU3rPa#bx6t$6 z==c$1pWmFT*R!whq%JXxap=^TlhK}dXwO^LVXRSxy*&>4{DWnT#?$pRA%2}?e9jiy z8SmZsVVLdDa3#<(FwFL66Ld*zpWsVZ@Qubd-c#Hk@{LB*_`jR~tF7wxaGEwu<_R<{ zO=wyYd*r>)v~XH>9%)NFpU^Vs_F04vorHGjK2=rZf5m&9(H@^A|8#q3wD_cRTl9X> z_|Mbl_EzS0Z}V2>bsx&r+q<8-L;48omBXx8geU8|Ta|T-f=n6VU5V{)OT04fi+EY5 z#KU*v+4D~1U848?kFgcR#yP`SlQhZrVohQ<)+7pHb4|in=(Z*ieU!G|f#+Tf|NSaP zx3Sdh)z!G--N=?c@_I*6Hj4e>QDj^RuM|GXv(P$J^MN*nyd{jU)#6p;XYwq$pGDR& zXELV$NgZw=j|7LZaamYef5(qxZ#-c#am@9BlQZZhPY^m}@&w6G*2H^|ITk@nqznn- zw^ZcxaCw=1S7he9O_^EZNjv#|18)(1f~-5>KMHH0@Uly}U!~lB(ody4Ht`QiT*+T( zBD8C|Sw@jt!rw_bg>OYmJt(K}HU+xzlj|J#OZ{e#m-=v@En| zDt)2xJ$R$^Z;PyTKhk-dyq`q7pvTLoUn=hpO zL{}#CD11Ko3v-ZZvypXlzsfE2V;Oz3iu&ZRj=C6LuVAyQr765i`dZqKezQ>)vmVla zl8+g$p0IBgdLwM$9p%oF49+-tBZ|hw^W7J$r!>0Anp4({la2>#sPh=H?bhCNMovqI zHrB=#cp8{CwoX#-WOOj*H=F&O!O`{>#3xDu^9EZg(N}a~@!t2z^Ihm&JnPzbc=pEk zPsVqBKgr8O{P&mgn8isyO;bo$LVa7Lb886$Bo{4u)IUJ|cToQWRqH-F;28Mb0oq|(dWthMPj829w1d}9UDK&+`b2dQYsLL( zrzofGF!6e6Z?nyMefKM+_isvi$4{ll^dr9opxYv=h@GczI5yqd9s_=3`YHiA_Zlnu zZsW}BnasIoEE%i~3X~uu@gL%-#=Zyov8U<*7e)EzXs87|Nfz0uHTlv zJDHDT-KVv-M)G7R_U4X*>g&Zbo<5L2<6rpc&tI93f1LX|^YACb9j_xDE8A(esrVUB z!si}q*XgC`$y$&(=AcW9!{$KFpjieg_#=<=ia(Gd?nRkr$7i=4JB)|RCzN8hRr;{_ zNwReer+x8L&bqJ1Eb0R_E)s%>FBrn1Ae*{(sb+ zdwf*Y)%VZLaG6Pf3MAwLnuLfZL2a#EjK(*U#7h8eD};+mO@i7^h}I%_0WX=5+73o- zM^H4WC17ilsaP+dr1mKRdkN6m23u_(`)GptI3a4wJwf8U-`_cN!XyOnc|Y$T@B7Dm zX6BrIS$plZ*IIk6wbw3L#rtf0eA)MptUr&K!gaJ4s1DS9e870=xscAOj_69l|D=1i zUPWF;obg?)z!e^rgw5stH%tDY98tC%HZipK_r%pOh5+zX^4wq_;O_h2&KYOs z{_Ujy)zo(%FhJQ(Y=x_!!2`g0_#MgQQ+-o-PBT{c)-Ln$kG6bE+zdLB#eP@Ub?yb4d6H|dP_Ty-*670i|Ueowu7 zD7ksSvq!jeaoz%0TY;_Ji_ss+kz8m+N`@=Hg0=81dtRIaz_Oaf}CXNhV zO&t8dnBxTMmQ6H*kK+@vU?tA}>)Xw{G`=WI&kO?7=RAvrsqQ2&ojV9j|5AIv^xXk4 zt@^z);l!MZM#vm%H2yfjdqs~sKztmw-$Y`itTl9Eoa1K2Rx-D%$Y&QD8Vj^(nxvGmWq`2WIqR-a@%ErX0F5FL+aknv32_O92QX_bqIAIuo_z1cH{|MU(|26;8*AI=`vcdo%a&k&5BXP}QTew2g2V8w12 z!C(IiSgP!^440p9mf<4$v3>OE=xH+sJLg7y)7EpYx95+)6AwAjCodZad!tQmgZ)<2 zPjBkfvh%FN-s9w)V9i4LxBKP1(7uuR;A}$?>%0&jcL9Ej-P{V4)>7cT6P{ijhsc$d4Tts z7jhMwx$N0bk$14*O%G=ia`4N|LVs`@@wHvf_}Y&PxnqFy0iP0MeZu_$F=qpgaQ}dG z{TA)}fwl7-?>!!30(GyNSIlzTIeoMX>!EL{F zuBrOm*~D2wa}5r=vG}9C5p{ zJw{u86c_PE+^#C(s)dVea4{KN+zKup0T&+bSh$89%rgs&;3#mRTy5THuC|%g(OhkP zxbUFoSvi;`%R9lvEY3zu1{dmExRBjP&&kaWKJbZylh_YoFu!$8Vhpr)r}u+Qz_ju#0-4)g8Q#T?fD8`N{HcM_!*yo*!$k)1Tjt?-m_9 zCwO~~z%!*UjvvoC8u;8>i!KQsW-rvvzDM+Y4gE!Ga>zxK6Fh{zacKTf^DIyLI}3hn zRQvNHm7LdE%IEf>Jx|_L!JtU&?2 zd%P>Yb_6{Ai_m-b0DAYFYtefexe_(UVPMo)6+2O+@oJpdZ^eV!d3T54oKK92fiB0I zd4vDqDodwozv_yf#riLuEUHt5segd>q*on&7~ApE{%5&l_za{M69&cT58$DNlsy#>y@dC%v?H1xr0o~X`2PWIPd-W8lcIE) zIiT${o}Z3~PM}VdhgyAdW=J&m!t1B!p(CmL{|gT_fb}#y)EvLfL%+-TPsKyu;Qjxv zJoG<+eIU)n^3WjfPsc+8ub-NS`lD?OSpsWc^pH#~^(4 zr?n^Y(OW4mKKjYOEk24q-tVmL>G)_8_0#yAY>o%sQ=$CociyNwwGz8_$aa7YC)REt zoh+{Z-BzoyhwX|;_Hm}8aadxl@*T^r(2jj1jLjnp+ey01e_7;4Vi~^gxL9S=@wfFj z$JJ(GH*ZDPPDjUYgI08|&doVet6$@#9ifLTdj}IvSvuYPlimR_28edxp%;ybH zsntDtBaIQYg8frqe;(UT12M`GY&*c-{cyPvtSsk@3j1W;=TS+SvU7~4#oU)r#rb>r zz7%`b#r{mXW*Pe1-Pn_S#1Rq4GOeuo#_qMisIxv{#(xiMQZ`iCP29bndY#+T@1{kb z`U>uq(3wcJulpG8G;KuO2TA=h`c)jr+5FZSex3LAVZS-qJsHn*o|xm7n1I}DKJp86 zAdhX|KF^zZ-mK@_cOS^s@~`_E5&Ci|D{<(3o8<508aGb2S)# z`ty9M8ICUBSE}m*^rIY;d$8I4ls{twJJ4offY&(kM#M_lC2 zqHH_xSuhR7)-ViPx+gS@c^}SaO_H(ZAp7J5@+lsEpR#=H_{bT_oQ0R12p0D2z-Hw5 zz_>l5x;jsJAv3BuS4g}cG33Pd-bf5@DNjE329BS%~m89Q!yv2r_3J});wPTKI7 z&&zGyeqJ!YU`Thyw(sTTuOHf-QSkk|jDjEJGmZ(HvhuIK$bLwh5xFXu*{;q&^JbN$dzHTNz!u^-PK2EUCAWgQJO z?<8({cI9P}cgWXVgr3YtW4nhjg@+e3Nk5V9>>&PJHrxu<+A8L56?B>9Ce9lkmmfXj zqqxH=-mAQMzJt1nJX0I85^u z4JCJTrie2RBbrh&j35iV&!6<89b)dK{C6-%uMO!vP~-yV zuHeB;Yd@|tga1(cFy%XVe!&=I?aP<<9IC|T^p3Z=RqJFlce}!`6(6u>qSLfhH*$}( z;kcpO?QpcgYs@=S>lwHBt?K#;ji6$O+4q#l9wD5Bm(FM|PRF;!JaoQ#dC%1Qz0Gmt zopmD{+|{L9!pyA=UkC8CXL@`73!kqI+i+kz5`|0pz{y|>4+7h=C~T^K=p#o{H~tX8 z80mvCDO5=r<(*ZYz|Vm72K=zZ|L#5nUXBQFd!Sd=R`*0luxGm?=o>q}b^>_0i1nIA z?g?Mvf`y80e2W|sI!Cuo_X;9^!;Ce-F|KwUb{f{j#+7j`mjzP2#N-+q8=U0nyy=Xl zaIMkX0N=0UE)nt^PAQGI?v-gLSA%u-F*W2O7s`wl=b8XzvY40lA0UrB?pkMbZgi&0 zNttSF3*TWZN5?vv{b$h^_k(cuxLLYE2l1rJ118+o5dTy~o(W8E6CvvY)WN|jNoD7>E4>?rrmL7Zg>|o@ecB_Vgq+uaV9Q~JDl;| z*N(-u)cN(1P3_<}rtD*91<40K#0YlYF1hUK%9`Y_%W;g|RqSxrg2%3`3j=j&Gsw5W zJcOao&JQ2l?Ya56{j1LZi~ajZYQhXab1FY8}zLF&tL27dt6N4)_ETJo`?P)^xaiZPq39Y>W4DIKhXyZTG{H$lMqWNhDHa~Rcj`a(~ zoSECAvB5h`o=^PNKpEnv9p4~)I*xTK8Dh_U1Mvt0=l*B+{mZazeYFs#6}EFl)p<1Khxvwj>%;uFpjO;QX7;T9k6m z-$*n{Zg64$;IoCCEIxQ_gV{5Uszg0EBG;EZb{e$V6ztLzqXE}L8aKJdfdwVr4C^W0?d-1l8gmVVq<_OWf0l|RA*-);Xz5&0hDx{4c};-3m#c3n|vXBx3t*USH{_b(yO`Cif{MXoau-E#=(Cdg3s=R|0*_VAn%3O?u>iE z+db1U){Ir63}-Q|J<$Ym@9kvX!@Ibb3t8R?o!NR$Qiii_3u88juUwZ%9$@6SC-Zo2 z(C|v5`BmPxBZJoRzCkiLdR{>}XYD)%?sKF0K4Z$)s{H4Xe!1UFdLUo#v%q8Hwj4f} z%-VmFm`u$Pc{Qd8Rv+@+$GjyGH@0A3WK-lLBiCSW;ddGv{c-3i%)j!9cjg?`t(<~`7y^gd!*&Adh}pA4TCKG)k^%$cMtm4Q}Yz0w-z zH-Xz9Gfu^-`VMHnOs-4LQ^l5@Eu0M)WB4_lw?XF8=X3O@vzS@~u{2w~H&Q~f<0Qe* z>LPC}<2`bNx2Y2yu#^2lc$m=}AnvMzb`%>rMrU*=^V!THJqMV-@E|-aeaPTE>>t0e zc$nZ&8_46SqZyO>)BjfHTx|~IiR?)S<%z$Gwinrj%m&_Y6kZ!P#kSad?N^jlu7VSI z8Er>uCY_d#-9r7-@~YP-wzZ>wD?V!=ugZewGy!81Fp9slj#{v=5!`0Ot2$E|H#*#Y0k+=sV{ZKb$L_By`Rx+{%*hyHPc0mAbR=C zOlR;j;?a-dpIpeAYCNZa{JNgX4d`!;w9!Nx(`my$9DO?;9ayxBFR%lhyIndrI=t?C z4m|4W?wn}!K2hXtj*uVfx2fJ0vL(G27uCN_JA@m%!8lX@uJ{kdQPw>vr9NMI2Z9Ta2BQ0_KcK_^7P?;M@^I%?V(4}LYy1-& z3AMlBd8f;h(+$5Trl~i}#Hrgn<2Rdj2YaV{F>cn5aEpW&DR5W1^q8(oHKw|<0vKPON(LlsX6!>8c4CFr@K**`eUv(?sq&P6;xF@0S{pI6fN6`Td&%wv%=xbfG{ zgum}$Zd^t%0bYSVZ1NNDALcKF_w=T5x3TuiZ?hM^oBktx>(#cCC7J8>e&)Z`-Y+BD zV)x5hud*{bc)kG~9ErEiZO;3Wv8HuEvrOV*F^d9VAZs?(u{b2{ZcY|!o(1rT?9({R{cYCmdy1!(V(cWVZ z=fm5v*E*Qvdnr>!yzRCJyv^J1@*>|7L(hWS!Em^UF$zw}s4(rjM{}mKx9m;F{a$iO zCx*yzw}O)N%iyusgPW@}&&g zeja6WpE34ju;1Rcf^lGn5IoCHQvNHHZwH1n%=%vNF&rEWMoZP;^JH`*+imOitNP&k zfVJ@g?GE&fx)s02`69)ODW8P)TRA1nEBooi*c&HvcCadXc5pIh2le}G&JM~p&%GGC ztI#J6e(R2<7T_7o9zEEzc=q3_TL4Z}S2p^JVMeabvt-e)`nS&`1<=hNLPwg(yEN)l zgOdREUOit`ICpve3}Z%qr#+b9oBOqO%Vwg7i{FiI|&`BKQFICJ9ztibI);2Hvb3ahC z`)37BIf}DEC+yFDaXiY-){>mwoyTwngbT^H@`d zqzgpti?)7)kId8s@R5O^6J!6R%Sb=4_e1&E^^q83)wli{MJ2e;&F;C97lhxvTp*1?%-`%xh>DXa%G;6bw3y! zK6p(9JSY~Iic{9}5Io~_{IWdrIKOPjfM0T!zZ`mA1?=d8%_r+~@c_PO_gNvnCmb>l zDpLt8xAIK*t^}q|V)Vj_(V|QS{7LP*2k=$FevL^RBTb&#uih=#9<=T?r^w@2ejHm* zcRxwC=x(zN?hzSB`Ei_)G;u2*E;8Y*#hgiiF2q-|7;kxaj z_Q{(QyrjqoK7nqv7M*%Lvc8X=X$RZ7HTFSp7CR??=w3KB>7Rb@(RhGc=TG%qGAMrr z_grz-#Lk~8ns3Jkp>wm@w{SiKohS$XUJAZs;}wl%=dw@CwCEx*2n?~bC>WZ!FMS8R z`+_J8>R-=hkO%5iYdnLq#(ir%N+$#5ujV^;%+>VQZ=8(pZ1M+@uijeEjO}Ff91C}O zv>n0f#?~VH#;S?M+48CEf%d}8(JJ`tL-64llg8H@+=C*WE^9RL9o&Z^|Bv>N0r+n$ z4Ycorzd(--d`|%%HqX-8?^bAaBD}X%JUwGP`^c28&b6HTC#=S@6U8{98Z`?ly`v?u&Aj=+j2Jj|^*ufQUlWyQSvqWH1byLg>sk9gf+ zI^VI|i_HCk_@S*^b^J48o_SOLqs~mnZQZ3#_80y0Aep=nS^9n^F{*rjmvKo4ypyp> zhGmg2J8AB?`pv8ptrf{q-%axuYAqY=>D}z0Eh}sCHskM;&(F&8a~Jm? z4?Kt6nOSg~ozrA6y+k-mBc8%Q?n+0IOj$pypgW&fR_VapzLUs)3`N z{&a^-%cI_7=uN%kjp`w%d-M5>MbH2CsCUcV;2`2mtlUF8KJIAdeznR%@K}W&QO+2o z3)nh}o4LZiw;MX74)zSnrO;L)`feHhHO3{HKGCnxzs~H>k3J8_C7I82cy8A%qrX+u ziPN43KlEP8%mmMsJS)Sem%+#BaO_H@&Yi$QYzuoB<#{<#w~2*G`9I8j7DY~I(8*n? zCzsE^6w} zDA68Wxp3`g4V=+mMIAT%buslc=eq{X`O~zav-uYR_h(BBuyezcnHLZEE6(s>FCrHs zb8;tb$Bix!d-t1pN?{53BPv|o{C!OF0Mtt0QC@OwA-eFEIx!zax(&cttkdKP}infR^h!!P;u zqV~&N@T>b}B}2HEp%1^)gkNM%Yuq>!hwZ{e6u~$4KUE#QE0FE!G9qx!rT4&l`CLn*thaMw0f(IDI7v&#sK)##{{)B(|1^eM2K9k71 zFnmNYTjIS5*n?yFVmi9TLB_4Sm^F``@<~P8ZlV4e)Q{~?_J#T=JSUSmotcHV$xbD? z^D<*v18mu&J+;#rqilM~d`o7uUO0cDa^K0;Q~3_3k%dqAFu7c`-^+UI2=|1H^ae4qE~tIjnuD6@lkmpr%aqLS&6A_uvNI5#`riEWO! zwtLAR=yC)T49>p7o5Vlvob9eBmTF28=e~}-i>!eswvIs_F>dJ)1>l>x*xdSg!JF?5 zGxi;3+-~M!5AxQwNxV;;AHbPO%G>Yx%==S>+}$Mc6bwKx%$lkV%-|~4iguIF0fht ziY&hCn4_b|B2)HY7Ytw*ls&PW`84f`im`*1zsuNLK8s9!0{RM{Q=IK|q}J*#Lv%Xk z-Lmy4cIXo3Jj|M%5ua-MEIOm>(?IUY&st79dhd3m#^Yyd)}AH2-I042^dX%n<+|kh zD)RcZv&P1OgQLXNdTUzq3Tk%bd50Sty)`@Yyz&0aic67C(5Ux)BUpex?Gy40cx(Qg z=Y__L;|rRLp?B#^#YIEld&GQzGqtaNQidAWE=SG^ug011XXJzQ2|r_T0iVh&r)<^@ z$@S~RzxH#+3|zYpMW#M2+O3hibs=w0o-5JH$>e08=rHdmPhwuPITP3Q=b%aelu~+>zxy7*TJLGk&TDQ zHBmZQ%VU@Tv{1o4KX#`X~9MxDt2eM2c32|2*WSth0Wy|AzbB^BO^Z{E`9UCxWv+6B#M^Jo6XIZYCZmIV~ReduUNS(AGC>9tiIX2As*Y zlC9kPSF+rdT>E4G$Lbpc`QH)cvWoJ*b-FhRdXnxjHOl{rO#YYD&(3xCE0$lV6aE(l zUYq~P*CX51;@-IW@vb=XJ&fNqli2*3%ug$N>0ab{0ehc(Xfn*4_F!wUaq_{j$l6wO zuH7N=;RbA$hr2ADU5%U+4iRW#|LGmFy+Z2U?=~fRA~Tf5VI1TZ&Ad z$EOT^U=eYHKKMf``oK2yffn?ER&(CQne*1ly!lwa@Q*3w@SQI)M`6aZ1DFm$TUm}o zlb-DPnaliq3Jiz4yiF?8LmA~cAU?t@SEYNVfFUq;f*JQO`khhbuD|$PcYXK_|6^JF zK0qA(a_CCDAid`8yo{Q^=S{5nM_yLV_1(oasojM&Pr)Ca%=6Z)&GRBxGWc|Yr>vUN zE$KDy^86=x#WiVLH1F`yHe?{VGm_2xc?XCoG4Y5UM0AxB)l=_^k)1wxNf~@a^^5t4 zZqk#8ql9K&0g~&_23&c<8)1!v-&Thljqlnq)Ygc;I4C=V? zr9MQx!E~qt(6aPY>7K$_9CP{vFzdI?69VAL51yoN-aFhNPpyIbC#ANIPb`iC;HVlL z`J*^8kWp)+I9knEi8YAgsK2hcZV(*(1RP1{*7#y|`N44XJH}w^^1_j=%S#Xa4e#tW zY#hb5YvU*m`p_QE*5?Q6noYDLU2_d%7SC#DeF|qg=>3T5Dl_q_RHi`CkTaU4)M2Lv~C46@W)Ccq~RoJAxlsJgOKSZ5{M2 z9(5OZnZ{au4;`&PkFsJ7z6Sm(C^Hz3GJUQ0&Ol!(Mqj!Led$WFHl*-wEP0L+LLX7)WbobXRPGy%!D{zpn!bF=QO&? z(2sXA=OkTDinxZN$Baae8O6MQiTOQ)c^=Jtk3kvV{<9Wx{C~#$sC~tIEg-fD95jcW<73p$e3^J86;XmNtjOx+V=%eB3>}AlgrB@g5-#$xKhCMr+DgUW; z2C9>~6Ft(S@k`=+Ro-s{+MalZu^AuTW@t1a6aU2he0RYg?&UiJpQQ3Fzf{9rvWzc% zX!rD8JnupNcTc8mmv#P0uus5FyN>_%d8-EWrTdbN&BuWM9+$CMx&J%Q<$m;$syD&R zNpugnK-)R{(*Zq+W;%y+mlSlMeX1Y3%{puhA=ntM0qvt5FdR?xdc4ew#+G5#GX!N{BN_kTsE2nO5MkWc^1>0=tQ zQ_pR=n#*&^1l3Ld)Uxlyw&(xElAE?n*oj?3vUAo={q1^Z(@r__LD zneG>gwQK3T);juXM{YuE&{Jwx1Nic@hg!v&PGi3%eElcm(t9)C#iia})=lqG?jYf= z(dI8RzNhvW!S1Er<|ok8Y+MC&_5eKDHnD0`r{jK~D6anavW2TK^)z15_juY`fL${k zyMv7@(a!<;7are=zO(VAcOMJ3Xd7j~-U*xo@iUqBV(p&)q|TG<1tUw5gXFLBaTlv? z8`NExX$E+BH&XIr${pZ-X3c@!hkRuFz^!Njn@q5g-=a^&qAFgglh~5j`)~_@srTMF z@VMFVx@+Ni-+=dho!p>bYwFCr<~DeuX>ZA+ug=V0-FEcboZ}h9w*UH<6xT5X9;iE$ zt=JC9Is;i9f#+smzdo6LEp{IyzS#22yoF`VVL*DdcpLMVORl?tc07DK39p#)Q9hJ} zP}M$Saq%H&{cUIMwQ64vsBolIQS0ooD!eoaY|WALbD;Ftd=byE{XXiN?OGp=aAhaQv+0 z@f_!Ne%8*+-`{#RvNE>+Q;o-6XN@NoZ(Thn;w=_GryEZ!ZvIYNeYlBpG?%UXVcuY} zTX4%p*DYORY;tWXJm4v3YOOV9tug5vlG9^NU1Ae^ruUu6yKXkiq%fD(9a$ejOB;E2 zyj}|RbQVVS#to`psP9j~+2FpE<9B+SwtwcqzT0~Y^V1I9?164fdr%DBltF(Te4b?d zVfKyf>>p)E7JbMLC7G@B(D3{c?Qslqel7jxKmU!+hQ8TA+Z$-RaR5!K?e%=BX}giO z8)Dj)kIQbmfORdK>FL_J{S@s8HV1o)FEZ9|o@T6SXE1-v>Ea@Ohul5L-O~r6_X{as z_h-!WG1kUW_=fJ7NkrF+0PAh+Dc|G!e7<3F$YJSzDWM0!DKdV#_5?@ZEd}Usjo?Oe zaPW3#lkEQ?_zr%feXY+vQ8%==VD%PXVtp}ov=;P?5C3d%KQw3mu>)me2O3Y@-67gd zpx^r<1#hmNpMXB8b5CPKi|I?YflgOmsKSNqhkHmij|tS(k6l&w8aW9+{Ij>|vm|d1 z{=)?Exdwdx5&8{3yS9$JH>;IPCh?ZU`hY97s|?+G0(~5aH+o|GA4~t;jO||5%d(p$ zM0MH3&@B26n0;Rws&HC;Uo^VDE_w8mbuZKR0c?}%TXvp9)NgdAn0YJhaV6015of}# zx12+F9b-Jo3Azn_e=+5|(W4Hc>$qL1{pt)Iq>dlDKE}E$MJGz<_lJzh*8MB-oBkRY zZ(uH}c<-fs`Ab%~>gw9*L%xMYl=-ptz|k@e=1XPDkg3AOXOz<&P4cr=aR=fY*J|qO zUai#79O|l`Vw}~#>Kz(kn%G$FRj51y|!fwsMbKV^BV6)l=^Hp(gK9{XOp$G=0Rg zBIvy(Gb|d#Z8{a1^O= z5pV24e#ar}8Yjv~$`v-S?N_QL1~efMP6jPEvvu=l{uY`EEbU}ttGbLW!F z2wsH#8pcl{dF}nyyoK%TvE5#4FOvQ~`NH67S9eF&PJbPL0z9pqJ$x(tEzEwtdZl+` z06w^qJQS_6tF1&=ru^f4wqsw@TwO-pC%2RTk2;e3veU_4*Uo<5Kg`(J&Ys^s9}}RB zd{=_$D^={OYy^YpiQjk$U5xjpo|tLri3iZBBUc&Cie)*BZu?Xr`ZG4EJn%(U8bbdLFtD3y%#$zd` z990ALzV)WwcMUXfHMB4bnwW{+H-o!xT&BJkJFaT%Y#*XOzQ%aP-%saTH)U(Oa&8>Y z3n@BsLPMgT;$-YJ0qisecA9kAz@4ToRkReIGjCyWIyxEiVCrPh#IKIP@1X(7M_VV; zURmdgy=NjDn1gPuXZX?xaO`>C6>MM+9Af{gJz36Ze0}6Ok*pS26W~X-9u;cuehW3B{yqDvcWC=kMn?T=1p)qp!pvGr-9wdsQF=@ zVpZJnj%M9$mIkjP7h*|cp)+_i)6sMc{tyqG3z)kqm%*8o3AG0KdW|#vmw7mo@2NJL zyWv^z%zb6-*;nws@tnfk8LQ`Cti5(MJgxCjd`hp+-!gFT-N0VG@oQQG)_%Bxx)ZAx zbPwEnFB)xZ4i7apuL6!|;ORf$yOQq*`Oa`G=x&D(zFB4My&HyfPk)r>&GWphGi%>1 zn8#ON*A4uA`))t<_Sz_8b1r*sa{eYX%~lNQb=}IJodLc5W2C*8)*XE*p-#>h`~;fn zWdGAX!k8jI*P@G_s@tQueRFS~5PE|5W#HG(e)~q+R9ma?)1+iN+ps;i8Q4b5dBQeg z&J(r~*#U(!%NKS-cZtiwgN3`N)C~w)TqkC=UQubS+(#5Z!Cv^Y8w9 z?VIWU0s5E{-D?Np5=_2uBhT%+Ez!MpY&&j4GEwcjc+&A`WVXu5Uk~OBBnF${x zPE7u|FdylWEy}6XVYF?<4(*55C8I4pW?bmspRjhtw`275*s2TIOk4voagB_tiI})XV#MSR zmLJQ)$=J|B`c_Qb2Zave*|6QCQ*WaG$D{Zj3u4~Z6x3XVo$Im7CVf!S)Y4iHU8}|7hhLhKY zv03qkoc#p1iV@kuN3z0+oUmm?tPeta7WsGOgOIE!2EUROL!;wb=Om5~9v=%s>-aAM zLn3)iEEt~rKZN0fL)QFi9J+hFg*%wHI@!MxU-jxlXWIh!tF;eG3T3fA-rGjOjeU9UQNKG6>?cJO(Lw!`QK z?W}!2dSU=QQFA3)jIifX?4&u@=3cPc zZ;!DZ3~Wa*t{MbJwILo6aVB?(7j@8<;;Q~S(b+ba@q6*@7U0`0#J5|FZ+Bq31>kil z?G}SqFL<@#qusWn=+IiT+i3GmY}n0wBtMp7=TuzY8gk``U##Qo+;-%48UBze{2^ZU zSKt4o6~B18{0paFcUFvL8EZ|vx|>hZ@PclQx1DE-mle-xMfNFv>x;zNmPTPS>lXHp zvt7WufZBBSk9~cT|B8LZ9?)#Xk)ArQgdHDy53r@Vte99kK6WK?<6^flo@jM4y zl3#*Lam1}oWGlXof#-j{{~{)Au=BsqTkAkLK91Xzu5xr)wP(tzPa;#+XnfFbBDA1< z5jta_+{EqRDtVB&$meNSagp$mePvz5QaKY(pqZaV`yPnb@?8_jgn5$roJ!j-#2oP+$sO_zFUV6-gh{rZ(_A(+4|i}>N~%G-|kcNJsOx!HtxnD zF@3XF8Eo9d^UQKPlHx1=`-&d?aa}nVK3P|}hn&>Jwv)$%GnwwL4055IEPis&bm^9f zp}WZkg>Nf2ygxEE#9ZvT)Ox-|&oA}nx(`RDzOLu_*7Hg|&-dnLk;mYlr=Sx%u2eA} zZs_)LVv4f;D`(4Q_6+pR8RWn8#hBv9vtvvfH~!CJOqtu0#)zIO#uQzCjdYJ1`a2np zA}$wa@P=Wx7>@nIjUB^-JtGdARy@9X?2-7NYS<6k=kPih*QO0dTX~$fNj`<~_w}yz zkM5pU1|Qbl53Bfa*X_oe;*5x%PG{75P`%oO<2(5~Ci-J51|~->~{pPBZ*sYm#^_ zn~e5v4dAB{-1rz{8ROjy?v|SP8`sr>JWw1%1MpXybC?(k2|u4lrVFpasa;Qax9gRI zBjv3;S=(we139h!Pt}gfoNB!Cd7Ug5F>A{7kBW|@3n{)w^eH=K8nIy=jBOx%1L@w* zAJ5p@)<7FQup+Mb?tH7!6*)9k!fCNUHuh9>*`zi3`FN@;~nVSrf=C$Ozb(Th*6d%AFt3_TYB_iE7|vCs$Jbl@Z7 zP7rWt`{d&Bjon+3f=`Yd*67V$Z%2J{A30xYdg`hD@`=e#dM7@yO?RZ!TkCy%=z01H z&_}0hYM)Q+$1ex!UVdqn=@XMrr|WHR)7k19I9kwy(_GvO^z$d{;^=eXH&g0;_?{Y= z*S!P!Or%fc;CWeoxbM)Xcrd&$*w^Ppp{A%GZYntd$G=!#w;4a2`rJ#Ov}^g&qGO1s zJ@Fm+rUI^{e(&OVm+XqeHy2Ngo&03tvE%WB#ewtq&@1r84|yh9J_`PQ__RDO%cr#y zo?+$OnLr)!^@Pv??6vYqeZ)HY9(-G|k4I@wKCyZGBb&bh9G3rdd}t5PR7dfEilI|% zp!j`k8%t?Je!0Wu`-IR@=1=V%yuGmLt$#%}D>l!9!Gc@u+2=z9$Eo)#76I8}S6;=6;{e*T0E*BULe@-FSJK=%pp@nTD!Nh}%u0`ttT zd%bc%DPJggv*ADY@LRV02I$q~f!Gz2ki$LTJOe(q!6`nL6dDS>$WD^=^4dBN?KvFY zjn)~|pI{qF>xPH6O?~p`6)gkS*SdW?Ub>3T6t7{{^xR2eonLbiCs^;-mWKJb~4`LbdBBB z;&krP9(a#)EVQgROwE7)b48L}O}}s^$bU32*O8SE#%Z3%Y(PiSnc#fuE1*Ed;;t+#gr(b2`jtZhlL@?m*@Vm)VB9*2d*5=Hom*nI7-P zB9F20YUX-!biQXY-=&J(WKWdMZ_R%`{W%!FbjYtU#zTznYQFv8xgENc&JiAK^xR3` z8oTtQ+kfF)^Ei9DcIdf5x*)XO$b9@S>iL-aGVGw`*g;ia_SjD99|C7id{XNBPVi~% zTM|MGX-n;BF4d0a=RDfcJ&3x)@nd*J7d+#kkvr>Vz8GKs5V;R#ki^2C#>eyscS1f)jv@8^3H@GhVO;$u&9cLDe}j2?H(cXGqn+RJuwSKrDD`tC*iuMl^s zY`-A4>{mP|N4m}{dBNS=JTH5UXXIG7pY7(^vPD<)EJFtiOe8O-;eX8S=nJ* zJ=jHDd7Z1CMlEJmcp z$TfzN|GJ1X`s|Nu>ONmv#+kQre9MuohVpZnc7v7R8k_m-Zq6HC1U;@t_Pg5$wHxfp zy6?(2J>NZUH^}lB)7N!hk8_3z|~+A=D+);H1HT*p0nuifFzt~f{U z$zKN_t8k^n=Qt+B&$z$r<;BYBmHn%FkHhG}rqQfAZgMpc+t<9Ax!H=nQEON_ZP?|` zagvk4l3n9MHMD(g6ZcF|rwcvH9;fX&QtU->jSnxUICgL7Ws z;S%Pfod4KQdx%Ngr2W?LW!B#5P1=^dRrSY4>)$~A*tKBiV2jPGuxBgz`S;TQR&Q?W z!N}BRV7un&&y!BJzc>Ib9fXz+K}(0BCB`9hA45@qVR)+|5ukia2sB4-WRC z7mml4Oz1Y(So!}7y3PBcoS|2*kE~txEk{oPdV7L7eVjS*Eq3&jZ-@_;f5&0M(=Z1b zzQfVG^&Df#>)^g@Q9M4^L~|bBYjm!0g6pZ+lXX{%^Uwot`o2@pQ?aM8`61>xog7Zu zBMf1_d^~e=Hp{oi*{plM!kI=eku`9LHK%bmEStNqJ>KZ`EgBj0J!|#d@-_MnIeK+2 zHUrwfmV0>Neei$IM$)HexPSII^tz;W_^WeV?Vre7_|_hB+@cGsoPGb;^UMSCqwlVU zk3NB(=+|L5YKfx>zIU0WKj^Lx--@_q-GyMT)6qpe+1TYI+Z?72I5rfi8P)GT1pB_n zZ_vl-?DOPVZj02M!Ti8$u&MT&AI>&JB5_UO8fS29h7mk4$LMJ+a%XIkqS^^De=&Y`d2jm-uHML%j(wt)i3hx^+bN^?A{X>92YNZc z9-xOc8=ha()A{n1&BjPit#ythDYOC{hCeTQQ_m-HKJUYqhc#EwD2JX_G?xx+tu0cQzy4@LMUqt{7} zgwo5cHDTt5$JV1UrfN;_F8uQ%;<-Idh2$$7?}BDs&K%KeGW5zBh1tKPoMevT^*o&K z4xhWg%(+ga-E$-6GAJN&xESBcAS*0Zc@vT7aZ6YWLLU}!jTdhfIMd!M~J`Yf`W zv#0P`#;ll4!E)s1;FC5!hL=h1N;eLJiw^4Vq1^l6R`NF;IjnNnpQm)w&v^Fu!bLJ` z<6dkTEzG6C82pjQrb1v{0qn927E@afcpr zEhcxVood#&#&xIbbM{TFC+d@9s0Uj+dE{v~(-AyxxAMqG?~k2)MN=2gGhVEX$t6EN zv>`&i?S=w!XLIfWop%O!L$2kz&p^h3Pxl$LM;m4?d2EcAGWPa!!38iPKPltwkscFf zkEcA*+FwN;;2Zi%LjI(sz{Ba+0bNcMPJnY@UHk!kcuR@FTLZp|D*C&Dae~9XyJ7DH zX2I0@S!9~*GwSak{jEhF9++e0#HnJxwI1F6adQ1@Z46Nk0oG0!JlcAiPh)`wp3}Sf z89g`iKYY`i=3?aUfnUz((S0a?LynCzR?e1Q6Xu!E>E7jDHSa;nj2C`^J=IW89qY-PWjbV<}gKJw|h=GQ`CO z*KB8vZ*Z`0vhwOkrh5e+eFpZh4~goV!hb`NtBtuL&aPyOV13zD(mfE?MxLDt*8Sh; z59`aW`Q42*^S2CyHB31hR+Z7&;aC`x1P^^_uVBMChG!k~SqISe&)y^NzI6`dN3rKX z@cmeM_iN5LZSGq8?snz8TC`HSl=ghG9XmtyH~-J?3HTX&!TnQs+Bb@tKSM^Sd^&a_gMG}mfl)SF_VvVWvd-K_kDL9d`(a1-R`z6B zndF)vwj<*v^5#On;<2fe(^+uA5(XAOdH<>lx92$uR^}Pp9VOqlsi)#g94x-&*zLb> zlwQ@Bzmaxy);6F0Rc!voJJ2(>(~q6M@jLwYfd}Pp{58)zGhgFOgtD+uU;< zKhsFeZwGvKikESK7s;aH^X4uLqYG(%+$BZL@TSU%_`!Y1p$g=h=1jVkn>z4)?t?XY zv)shGuy0dNcOPZUan5!rez%{#Bs*Us`ci{)Y%M%{&C-7Q67ikz`bFSA8pGO;X1B>U zYwAD8Z4Jk6C)Z5o@67Wj((AI|nQ`#Ncwo%Vjt6%RGY6aY`M_u9U{fy6I@Yhotyn7u z{dMwt8h$$M_oW}H42MX~_yV1ubZ|dU3uV)d^uOw3-#c|P`z75w^|xoSb+}w@vHmFZ zs~9|=)t#R}zCq|Q3w{gka>Fju7qv?#o{e0$!H+iU!JtWDOKGxRmu{x8SpYdf$@ zCVzfhPWZvYU~-}z8nokpN1HgjQF>%Q9Ii)B+O{?MY(#Idy#4duUVK1SJBm;F7h`UI zmN?+o9qer#_}1ZFo=sdXF~Np^WnT8h-sPU}8q3ofTwBt*joj?7@ZL*&?6-}c3COf{E^I*9-L&Sr zCL2wOtOe-=KZc&xXd)udfcCA z>O6Na4%t?v@BI;br|+C1E1y01tRVNxCFnf9)aV*TZt0nHo>X)m-?`s3%Q{tdX|!xt zv@HFi2L+}3Z^I6v{63~GG~C-HJ!tdK*niNjViU~g@J?GtdS35nM|;tGhcSjq_MFI` zlf~jN59XW=%(X21yTP$^sAa&d*aYDVSx9`Ir9=7Q5z?VJFSs@vTu6sfT+N;2)Wi?; zm~48R7jt(Jc9X4pjpj|n>~G}`@MYAI9mVZ9-0emm3Q%XD4dp||p>v33^i9lmpS--C zeys8IzXNeF_>HoJ;4JLmWp@?!`1XwLYvV@RD7%qo6OTW;PS198PxJOsedS8@Y&*{u z9e?(9Ju4g;j9-A=^8wyX9MM<4Snr%Xi!1Eg&*=<}#h(&tuLG7fQT}9>D|GkO%~f5U z?YiXnUNE-q%-Fgr=hXRC%Vw3My2vNrqT_g)#It4KRW^l9rcEIsluH|uwZ8$kUlfN4 zfAuz{C9o%RChSVP!q}I|de^*f6rH{0ZPJ}I%b+v!eSxtr3mu5FNpF&$d|w0n5qrwM z^?bYiR!&H_m%PfzJK3rm)E2r{9c}6Vdg@kepg&>=TBwulaPP`K(+FnoyB~X-r`E^4 zfo1gVWjv3ZWDJ9y+jC>rD#oU1()2gHO&<8cL$otvD6)p%anftug30JI;HTmZicu1- zmeS8b_WbW*BbO|H0zCV%Ed*pEVr?YB3zCsPhAXMILw=L!{F-GpSo6^W-`hqz((ea5 z^ES|iX_S2>xO6`BHSsIKCHfvvm}dXvt8UcvmEv1r)ulP+9c zHys}K1TX|*ZxxO*&kvSEw2;;lUc#K_z8|$EQO4~W)h7O7u&)vRKc%15C($GA^SBY0le0SxbU`O_ z+lAa7g4`a;Tp+XWaWVKX&^_2|oA^k* zKdF8^_$YIw)*i-xe|;1mDw7q1kGFtV_)tGb&<_I4(R+RSxN)I3c&~h@t8a8<_uhLI zEjI95F{25U+Z8j) zeENV#HkWZ#jF)o=vJw84ao9G(<1v*9q5Hv=&S@qPFZzn+n|f>6Lr6X=PAHCdM$NW7 zN744Y^@S^m<*&}$yvNizWP=GR{xF^Skk7tF_6_hLKa81^lKs!mY5(8gNbvT-g+1v# zXpwu>O6JmjGV^2iopOV_{%-m{8tvOl-v#vDLEr844d0Z$6J$;}r%CK4eU}1*`Ytg0 zj^=Tq??AL~#T_=%zv4yH(e0XOKTh~zeF!G~j;o3z=UQCt#e6(5W48O%Sk<>^#sJSp zZsbljCO&XA^DCTI!(*1QJ}tR@e7~Zci*{U3#O1&@+W&52$vkr% zf{U|(PwTRbK7HV=n6rfHSNDc=BI`{2_@N2!_Ph-CnCa{>jdtX32fA7ZcdkK`*^`W> ze2>u+hfEE99@#YFJa5w@$k6X#%lZ>I((mVyr|0wk7yQ4C-{ZBM6P+kRoL0p>B*ei4?hh5dR5xD!w6VXU&B=8G32 zzjU6!P2cl}69>ZF!S91wKSz;YW*P9Ke$J#H#S0Czkwl_45;uD>*+_mtAD@5+`Mt%< zBu_LaTBnV)PmZXb1L&mz;oFg1tGN~b8Z53rd)8CgiR`_Qo4EZuY44R$?%KDpBPL@< z#E1EqWEguQGj6M$eWdnzjZRODU2$z{TxM(CFSPE^XNi}6>{j+~*&|2PX0sQ(mFJ7$ zPgM?2SDoXbI?uKt#0{i&`IztV)lT!gh0irScrW>*l*6WxJ)zH4X!*4-qD|zk&Leu! zna>4(RW4_4Stiemjz7=fd6|nd8ZN7x@VpE=UI}s6)?TcqJA7>Lz1YeP>=(qVW-`az z|9Iy_aDe@j*lTO-h1S?bH%&R-rmW}~;~9teZWZy%?4e6iqGQQoEV<@blDo2ao)sNS zI%CO-8OuTX&1Wp>j3p~(ET8f`ow4Lc$Fi6IQ~HhN$VtcY^_a0pM_R^M9%n56s4cf) zR`1$%QGQ!B(`eQ{JAfRk$}x18OHU4bZPrk4aPbA$1-XMsJaGzpAjLo@W*BX6K^IBT zMK}D^NBKZM`|=a19oUzRA6WL~2kFb+Ps_e6JB|-K!C*S6ZOfD0L^{~1>?XF})wsTY ze*CcbJ^E4-yjFTx;D;p(byh|G<<00|re0b&%GARK>ZR<{`}ETDk-y55E?t5#_UU2C zbK&9ea;=9is+X=IujI+}(q+t*^f2`^SYG^Fc`rF&<;6eoOpd98K~GxWC+B&V4%PsV zvf^qTp)KUcm)_IPvw?Cd)<1g-u-m$qS&nsJ@5!$Kwt(;he7Xa#eLx=lNZ(na!DYGX z93scQ^5*6bI78&b|CAe}4^BfLOhz9xi0|{pz;s+6ObqoPQ*K~h?gFOAxrKEJJ;L98bGdldurjg`h6=heiv<5 z@P9JDcQWpI{NBMD(A}-A@L$={l?Oojr{K`o1jjUV!}o}>Xn}v(de12O(=+KkeP?A3 zN2aWzp7go|Vzj0*<_6B)xF~DeRpgh;#Yd_3)%GRiJ3ZAHwFhX4j&UEk@BVL%5xKBi z^IvPuU%VNAR>A)fz?4%pqQ3I}5%rQ=4cxD?zF>P^HTsqxeG5Iy)VC@b8+q=`IMegs zp$}hZG_6B_8p-eH*{gqzd3fP6Z_{YxY&-KXdWg}Z-|65z$p0@R`}Q#pKjJfj-;Yvv zD<9o|Be;aqPAB>Ix*}7A&ne(DF^bP$f(IL)^P=+=J5L?tZ?5P0K$}|@`4c5$=vDK!@6kPjqGldL(v)SsC8tRna?Tu=ghH1jbq1 zJ5U9W4KqKKYo5$Ux&O-fFI;9XxiS}BbV)mOLrm7}o#-H?l=t($G{sor<$DJ)Kb}d- zV<#K<*wCBkb&9<#C;nV?kxjq459P2cP-mCf87))5-IN3G3mZ(gj1*g=GA#ShN96W+vpWLMXwY919kn7_kVQUTtpD6jn|`p}eB z=meeE+z*hu0-h@0@$B#5I6~OHodHFp#{EkaBn&Uj)+{t&IHD|_|XI!+{F`oPS zQpe12cyjQmFAvvzv&rW7I~vfDOrL3Dm+0tH@lsc!?B?_3bQZPJ7T;GBKlg#WTn;Cv?}5=AZoz zeN^psG*`XhX!h-94V1E-@4l|;{tQ-?}iS|T$q01 z{!0G%x!G?T@hlF;<$;!VV0$vxI`>?OKjc5Hd!Vi{?}pTP|8^Gi$auqX#6f@)-5*$u z9YgQlh`w|4ZYq0}!yi64dkSNn#5hg-QFcAL@8UPOyB%3*((#|@n`f&38@s;S*ffRu zho~?6a>7VQuHL_5zaM`5E=eyuLGdl{w|%dp8+t~laV> zBK61h)n5#*XM*ce=C2x@nD9^HzCz?@1@E$XmoHw;JXA0b*>kz`7hW9?eildPaXa{p zW8Ts=9&n;>jd98dYiw1(COU%0Z5Ga&c|X`ZmZuvff93s)ykAUTvo-@8^WMdsVCT`- zI_7&jGGhdNWl=_b-U=P)`ATy5l(Kg^NW7b3rS3wW9r^f?*+)Kk?v91eKfF8d`RDiN zUGv8NUq8He|CS3L*}dTT=k`CD^XL7^&wsrCA0I!m`?^n`-=BN+8~dMnIK02^;lS=+ z-TK`AI_{Cbp7*bLjDO!-G$}ZdJ`LtJmOj&oXStbq5KQL-%T{1n1T5>IMK3w$_8yA# z-AU>se~<1UeM#eUc$=U5`H#mbj0W6V6$UryqUFBfDO&QCslJU9*l2fliHcd_;#JMGquuTjHv{!Qr6L$m9?ok z&?r26Up2Ba2O6Ep*h(Eql@9)AYu)ukESY}`Jc2uY^emfa8sB8<-kM^xt;;a>HIg%Z zGWE|t5;@~LmA78J(KD*H!IAcsa5ds+Fs7E@fwBx!$Fk?{W?xml#Lq{)oj><{C?sJ9rIkX$?rWpK1?> zuPkS))3~FPvQrY*r3OA9KHUfJkdCy9`7vX~z`53*WTE(11?$I+EKHLu6u(DyvL9`Z z8)n73c&cygM(%EMZ~Nw!tZJiq8Exw>IPK3G(LE|C?;h%|b>k<>e$fbS;(d{})UA2N(HCRg|_p6Q9N^+H2} z;hVsq?=#Le)(Eal<~AQV8h~R2?P2fZUPyFCXf&Sxt%8d=u;o?eu1HA%a7%YM$N6n!fW>?vdu%MYPd0k-3Ma z+F8p!#vZr+il)V^!;{@_to3cL+XiR@8}lX`?nU5Fyi$97ecLj3JI_mXh8$ax`hE2R z!@N7t!~rxAVlP1eZ_+RL)?Ak(yASjKbY~LKeg9&|XorbQl`ZeUqXpgU2{zezeSM5C z?GcAbcWH;1W7$(jLEG7mB|}nfT%oH(qjlEW&SWTjt#7 z9v{p5j3u1eKgQhq&pB3%xp4P0?(MwPJg+pnGs!yVumRcJIo#WGkz#L&F~{yanzKgQ z_w_W<_C@d&8^=2wjjlS~{z-q@yd!8l^lm7pUG011b-gx#a!;I0F@I5}EVfT2Wb^niV{-Mb?<>&i0 z`=S6ocHd}YQx|)KO5m)7ek!6o(GPvb!3#^LT6dtA-Ud%(tc!W(rOnditZUt2`VZ`L zd_Tl^O+9ClvE=fUt5$5IY<@Gcl5dp{^PbrHN_5>4e69(0+a~XceNQfc#k{X_82div zer&U^2jLl<5iR{4u;}|k3uoyu_12>&FQndiJX;(OztA{(EZ%0slq(0;*_6|>(#`Nl z>^vHqmv7C*C-nKS!0OIjJ@!>r*) zf_zvz7a;rJOR@5HjEeFFeSaC=9^tw3Y|hv^Ms^v@dzIE0-_;&+ZBCW{&mDSVcVumg zVNCNP7i_$(2fnj`TmOYSjjxopGN`LOd>M>qBA?A~a~>s%YlqH0QYZfw?ypT_d_mS2 z_vW@!NB5z6S$C@_D;+eOv4}5)SzoIdONQs#rp%vN@LZV+Uq3W_Iyf6+x1E73@SVrn z0hWC5<8iF68xrq5Dp`5}c%;i!Oq^rh-ReYN!>-+vcwP1uU!Ao^e04_i)?vnyTFU!I zU4D<-k@8iall>m>xGyp`*>~ZxX8PvH!j|;d`8@p&Di~S@%h_7a8 z&qy13=Q}qF-*V_jV}8W3q=WA`bRq=9rZ`KN$}K&_UBx{AGHWYNcx7L)o;Kskq3KD+ z=EcbWIL2eXyUx19w-kGZgZ)@MWhS3(Y*yRtnfP*t7#EgNSF~Qq|E=6%qPB>qGxZFu z?}mxMBADQJs;9o=fvHOS5Xz1Ku3ATYZDmL4Jynhgl?{}ewffo5@Es+!(F+cik~7zSwvcv0_eR$;ag9t7KfI z&BhY>K{%tBGb?jfU6o_}UI#GQyj{AMP|f?duy>0dvRa}z-Up7i z9>-T!%wBsN`s@j~=e~J+oWz}w8{BF;1DLnc_5tLoWLg$`fK+PI=02{9R7Gl%*w(%xD;(?E3!$&;u5yf$~fCI&O zZUYAz`&7oRw)XN)a8%*1YTsh%ap(Tnx#qXMmfj;=6i|M36c??p(jPRIhTJTFRpk>* z-u(VothhjxMR!~q)t6p~)t9!h9#vlc-*>~&<10T!D-XkGcxn*wp#r*L#rD=@luZv%ub4fA6I>_Ib{Tl_ zfoJJy>#0xNVC4q+U!(Yr<>w@D*%Wxm;_rc%jOKg6k;<=R&#UrNnV;D5gYo|o_(yCT zDl>T_bZyFrrbx9&$<0_nJnl?-IUa`);AV*tVXDwx#E% zvOC)6By2nOEFY=#Z_zXbjC$H4fpg_h3r$WA>XjW6Tg`E zS}XdVaSOKjTa4yJc;2eL-rSj!yt%cq;bYS;ODSIAU@jj*-lwtuXh5dtF#fn(T)DO8 zvv@OJX9PTN9kRy(pN^|=<;K(2ENsfRB6A;hocUG!Qp@%17wC@x>PZJv|I#Vrc~-?9 z$j=(Rjdr#oH$NS_?)t5?_Xhj&KRQOluLG9#+5@1I{4{0$3fbONcQUlFxZIUn$@=}@ zl)Za=RP~+zf6gSp%p^fT5J7P=Aay2DTa^&fuALdgHVM!c8gzBnPC~up0^N<8r*_m>2)wF|A;)+V908&I**Zmr$zl4#vch}c5C#n#U6`TCqWVTeKZ z`^P-aoO3?c_xOZCqs%?$W%m0Je$q}2_0(M(9^DUw8i?Cn`m2db>U!-|KJv(YK|) zWUmgdC$2#MZ*o5Ju+{?T-b!d~SZ&kJ5NnoU`Wntn%l0|YBJ+sQug>{X-S5MDaoQRo zAETK2ojK{YN}RR=*V^@s*lcRk2V94hw-Kg3e5bZ0!(82<=x4+-w*`p}|*`Fu~+pZ#ETbMj5# zn1-p=*(1Q(_h#xKM*3{i#yZ+aKOd2N0QY{)0~*Sw?YC$VfpR=OIKYTC$PoV4I zuhuYjNDFJJi0!ZIr~g@uv-1Y@)HJI|>wJCZM*E%N3-%COOL}gwdaf4yz^m3ovRU-M z0Gm#8xD=bEigv1@U*BWPOS;JM@dnFFysSgakGjbDwZmD~TKO16dmYf0%6m60@9Cud zPCvBEd?d$NV-^cnnLe&dv9r9TC~+z|xOg@g<_06pLXTOADaG z@I5m=&G*KTH`vxE*sFoBYvfM!ejdIqa%-eZjLrkDqL~rqE_=LyxK8P{Gp_N{MhF>k zjo_3q&YCF~7H7h1+f-}E3F?+ow~V?W>gtS^*7Muv`!-!Xi|@V`l+sRqwpC2*oA{Vj z+zmeCZO%H=z+*q(5Qe_9=Pln{1)N)mP1eq^io6S~^ZiqZX&`H^W3A}T!>7f12YabP z^wf#nB$_I})|w^$NU#r5c0)C^e;4BprQy)!w@W&=5c9Lo9{U46d`ir%6C7yVtO1tz zpgZYNpJZw}rsW5g-L%m~8zaC{Yp$5+40wj|O@@J`%cG+C0&rv249vyE>)@es?7MR1 zCS+Y$Tmub+CFALbb4`m6d#s{PkGEKT=JKqRXL>JN=LCI}(}#X5!E-ryt_9Bp%*AVa zi-&KI_H=(D+Orp0CdQTHwf+2Zazt}_=k}%VpmyF4&54(4STiDfFR(B0XW~-d8oWHu z+R^b3PE1oWx#1#*K1{uh&f1`}1$he$#K)bMRn&y-*L5enWP2~_q^{Z9@?7fRb&R2t zIcfik+PU?j*ybeg;oR5*zD4kr!6(a*Cx400Rq|gsu!FzBc03P$d5HBh|Go4bKt35+ z4_`*%rvP%N68lcrY+vfKi9|q z2y&s3D|udPMwD-Tl~uooc>1%9&Csjn4Ll@=b=6$GdbUyV)L2OlxLpZu3sWB~HnI|! ztRNOAxgk3tXPsj!gu$=sqFW9=04)`vuXn&RUfMmd+3Km{nR5CLF^?S{D@XX4Lq8oC z;6IigwdmK!StOLv-i8j~1Pl(S-a+cLV`qszyq4F$!{c2kJ@sCGdv7i@B^eCLC=-t0=JoEn-$k-0M1dnX};=C zmUp0YkJjCae8`Yn&82@?tUOocZTj(}S0l^8?HbWw6a6Yra%6h!Zt2Y2v_C(a*pH1r zz?*}B=PXa!aq;E!#{r}o9@#_O$${p{V||JdR-;=D7yy=lYUM}6nD_Tgt= zNqms_(0IJ=Vfu4p28z`tuXf^hijQ^hzZ~4wzDN10Wo~)@XeIwwE`S7K>>KMc z*8OF~&lLT>Gs&a$uO>;76@u&B0^* ztLzK*H?I8(&-<=)p4(Nt+eBUDn8uvV`%d0lAGVfuF^-YHNZYN9gKT{Rj}Y`JJF??) ztKwQ{rwh1i?1RiBw)(oU%qa-fvO|R?>DE?TkW4(ygy% zZQs?-*j~Lx=Yi#n4x)EP((g{D-@S4TWr*)vyxRv}x2O5+UUblQ@FJeO5BXwo|0w+& zeIa!~?YGnZ@3QXSrT4$P<}15WYX4V!_XHoV-i@Z;jb77=U(q+3KqmE~!z4qD&L$^C zGNck&@-N8CRX@Q#Ir!c@7eh{r^ZJLZiX*JaTz<>SJ6j$$viAC}+C6w<*C%>t`!VgK zunv5X`K1`M_|x^vOO6bMkUj8cigVQRuVBqeS=XPhvR49&3$1M5`AS3%|OLf|5pU4Jvupx-E$j2`|7~on-uEa`cJg4*wto9%y zCC`gR%hc^yOdYN^c810nl3{Hp!W4 z)<^-?{DbK4iF+@k$0LvN4!b_G0C~f@JivW|>kZ%{hV8pN5gohkL5J7R6Yo8nD#C9z z+nFaZ_kSn91AK@sE#_i$7XKx8HRiw^=I^E5t2}1Cei?Nmz@$oJ<({{@7B#n;`eBAh92eI=e!^Z&rCauXDnSUV9(E;%*&_%Nx znJ(ET7#Mv5tRnN^OUkaq{2A(PZOL95r7w z_cHi<8@N7zEzVWX6jRLSf1GE7#0f@`xef45j6NgR+V%a&5z`;fEMfx6=NjUffz=h| zh90x$UAOsMH0jpWnaq)ScKs^oM>I1C|A+??w7CtqZ+ob$1b<}_-*%t7pSimBOtOdd z{v%~2OMdWbg;6 ziWB%nMQ2fRHPo;C{cis@v5pS_?{D*5aa?8k=L4%b9cvAoe(8KC#K^X82Tq}1 zrw*M?HDA_1p>%hOGt89Yh zCD;IEv9T3-F=L0@oZkvP?%?}#OEq2}vNrr1Vy(zp`IGwa=SY7@7pOfSvNrr*sUk13 z_7TS3u#qy%so^2?2>MrioWK_A)LJZ$ZwCy5U)EuMJG$v<@=}$Pir=JU+jRM$a?49? zZ^6LvRMFG4mD>hCH#@l)SAf5L!KZg|J@$;%ce>rqJ7FJ&Uv9J3H1sfc#&WrpIF@Vu z^mOaur*;(%Oe@xUNn?zB0r+FfG&7&V?PVn^ZL28075#-gP>eT7+sYXUJ^SaK>=`zC zU1dYyO0q$^S~(64$mlBOCR!Ty@O%z)?E{vq*&DfT0q&ADX1#qF9|-LiP_~pcH`inS zi~CNuSc9hnw*Q3iU#+&mLq55PuS)OSf^24;D}6CF78{!aZnDuSSy#ozJlJZs&%x2J zl%Gpndx$z3i{k2uAHs`VV_bb#9VrnF)ji9a`4n=K{}%rP$nL7%R0Z*BQ_stIMK51P zZU*KfbD&YRmqK@0|Hl4fWJw?TR(#yZwSo3+ zj-GJn{!`GsZ2Tboi!QzBq`!wo3|-A3mlRm1>8huWeC)k>%FWFw_s%5F$NCrltwUQs zO)U}cz9)~5m6hBkS_a?e(l_mltpi*1S@83(j7?>wUql!FndtY`;2(K73T*cItgmi+ z?gMRk-%icAe^c$|sm)F{e&n(nmTB7fgZa}whU~RFD zwM9E^3a1-rPjl)N&8FKcMb~)2rD?DJvbNkOQ!_R)ez(0+-d&_|0Z-`<(S8JaPJ&;> z{f(W$JMmz{O~^uMD$aa^^kK^KtexsXizfAn)TId5EAf8TOx4UiT^Zzh-A0bzw z5V_ZYEQ(D%6T7>Lc^O-r_JxN+c%Xs#tc3OofusHpLT?T5fW}&P^~b{vc5YpaXQEf} zVJ`LLcgx@7$Vd5Jm6t8JiPvPGN0Yp3&aSt(mT$KeN4aY7viAA+b~trnTtmdH{)6%% z+VxOha1yT0MN8rZ>$}c4;t#SO_-C~R?LFk=O(;*%`KFD|qd9X1fzHoe?B`q1H$K6+ zAU5ZscsS3DJzW`R(cFN~wseK!BFJCn{#oW88jJOG7F$JF`n(_Dvstb}d`B&W*8{fG0tbIzaT-85sCVwQuf=R0}tl?<7O4B@^8xe!`RT=ODp zDYoM5Bk;b;#J3`c@MDS|!q<~;ag9|Z*c+b_u&rdB9Os(PRqOi3tFhOh)luvilYdBk z;WmKY?6`{S9LFEF7u^DWPbF!0FS?~pIal|vrokr?r%XRK<`MOe{79k;bFT8NX@Fnu z#rN5Y{Ha5(j*`=qy8s?w4u{ZN(x33)Y>hjRpdLERzyZ3BNuMz;FLjJf&9fwRs+p@P z&%J2el&7v}eBkzscsuVq@L3E17x@;GQIoUm!kqWA{e4+yu|$W0N2s-1vVLWfc2b04_CG z#rC7XQDtP$B!QnB+n-GtjiHpi>t_59MoWB(mB2UotWORz-de`XH_-}^wX-5NJbiu> zYyGpwsvN|Qi)CA!-<_P%xb_f!)T|l!FfX=DoLA30Jo_=fSv-%vK5gj>^evjpgO7Tl zK~ITevsKO)t{5lt5dCUCl2P)JXpBC1Snm_eLwt4uAGYktB=l!!9vnsaZmjkQ=zg#G zfcm%kDmEwZg~g{h`0q)-*L@%S;-6w{ANJS4m%i9$tv6KvDf-YF;a1kujf_Pwj1p(4 zqQ9rML^suuN6_Gl8vpN8>&b=DTI)7!cj&klm~$@J?7ckC;eG@6;_LQwdz#xQvQA@F zyF;|8`}TDX-)n4x*kQuG&AZxkOgPH3TH*!hT@w>NLY!OkY@l3>as!NOJih^*A)1_s zGXtx1yH7L6_Sca=tRtU>&sVVrY8$cU-04=))5O1?p}pufl>G~MFQ==RT@<`Hl+{8JmyxPi# zR;FVEAEjPCusFVvzTU*%qkcc|(!Lq>QN{oJ$*GkNY3#jeLwVeb1k!KX=VF4hBA((W_CXLfNtHr?8D zdU}4}>0W#KiF~aYn1kTBpZ4803~c5hBLpLOwNlp+V1=(%ZTCWl7I5n2ndqmNvi-tY zTeReU@*1>P%6pJFmJfIu-|yM-M{^N9NIs1qOY^6c`X|n-J&1k;=hJVCm9Rg0iTV%H zhhP+k9xT?(k|TZiM9i~##=`&Jt=K1AugtU72q(kLt@ji(#Wfc^k3VNldY;tWxN5Eq z>!2^-)qXv5XMJnYS7?f5?2o3GQrc7X)o@w3?TVLDGp|Th>PLH`2 z-7kCPDW7#=+QQ0c1}YbEKc8>f@A^*_^!@1T79Mr>+wJQ~)(>s*cPLwqd`4apy1bm{93 zolBPvu(mMIc;C<1^o%&66VnB5dY6Ojj=&q@&j#pBvexKfvZ5Id|RyP_W_TktU)$%-A}u{wAnYvda2owZ390`6^Y)G!2e#YRe(bkV-jqn zXYg3hiW9$h13r?i&RG&`y}+Xho(R#NXeB}UOgkfhEbjmgnefU1Ue<4>72n04LBXfV zod;#3z(~5AK=G-(ZEV@+5jGf)0N;<{|!!A|2Xk-?`V>D z&oCFa%>i`2+OcUzW4eg3bo`2ap7c5J?3r9i(NN}GH2=8mG0&PQ8)keXzs$(1ryuc} z@S^;$Yv?nQWc^0`%k;el`xO~88LtfBbC=v0crvv_cxxm+Bm1SAJtQmenRSrwecIEJfYk#eN#Ytc>jqqIty~L^BSV$74aX>8GsbsjG|o<>11^GFS&lM$}TL44gy+JL*U# z7}#)i<;9E0fANoaeSr4N_<@Pw9zZ5b7c0*BspqX7fr}j({9bfw*5{{~FZCi%I5xGx z5x8-9|Lk)~UHXqPci5E&{3}Un7_{igA&xNZG1s9bSTy$OSuBm{fTc-dR#)07> zI9Df|RqPF3hz1f$3clYjqa%B)q|)AB-Vg9J4BB4ygqLzUPab<`z?+>yyw zq~)Tq%_fJ9)0b~FhTS8X*C3tzd*V;Db07B_lN<9n1P{fTYX>x_c-H@At-pu0h2|1v zY<4qi`hGtSJRl!!bpbcYopCyZrqy>5@Gv+gUOvb(t(^y7g&%=IKd_Gj z1JO|(c&bC!)v-P*oC7ayIkVQvXI&t=lK*(0MFaF@8bE$f2R)qLAr>tgTzHP@a&#+U5okds1)b{2f7qjvKcyZVn}$JWx{H|S6Im9IIpAe?6QzXh2fKGu3A ziySiPn`!mPDEuzoDXY5t=#CSM$c5(HX42scXOf$VpWr7v@65%vcqxrD*3#l*$$x*E zhDGn{%at8DX}e)P^rAk+w}Hjje_VxA<)It@EOt^gJgOYI81quU_we0_j^opc)rb6R zk|A!HLE=V6R#HA`?4LQ^H@r>`9D8C%z=z|Z{!-GynNyNICG<&@@lh5HdKOrT+~q;jl{{-MmP5XwMkr1 z_xi@XatV^-b(R~O;9{P;W0cQbV@mufZOhQ^9lVc+p>yUOr+#ok{hu($z#3<*qrJXC z@dNV|T(yTZHO_iPF_#8LDo8x5} zhlQ=vaFc`6r^!W;JX4%Lz}h~^SPgtx-!B$C8Ak}%guzXi_*IUzZSAPlac9KNIgvc- zJXbEMZ$`&4AA9Npw(mrTV*AW<1wM$)j3bWiYavVb<1^W}CAw)JK9g;~EiR61hi_OP ziT5u5{vE}uO6)c5jCp`Lq~m<^HxfrD4pJNAEDYsD^pTIDIG5(B7=rRM6eH-PF+uqPHU3KeOU4g@#}?Tn$Yo^blDLolq5aI*UwDB1;>4~rCfScw_~RufwBJGJ z9cZm#cw!xR0an^aRryn3_YQqSuTKF_>1KmZjj@b6JdgW`p;3M>w$G?+VelH`-i(KM z?+N@cL11O%B>3oflzBb`&2a6$oftm2);nWQF_#YZ04jf?lB=HAF~){RpqYKZ1Nd}s z)i`8x4>E80(<|vuwzvGuXVK3|;Lzv9_bk?ikv8g6W+m{ehR&*cSw|<*@*LmCD#mGO zB#7@J4;cLj`9ZnRk62$LmpcA~@>*ZtX4f}9k*ZK!bt~WZu`KJHGFE+xw@cUI2iWh# zal4RZzP`TG#1M9s{$N^h99#|)N7K6Dx2#v=L!SB$!FaX9dkvy@+K6!dFU4uN8agS0 zzMlhLdQbVY_+b-#{4;0nq7O4KbdqqNBxV!`7eQjC;$!tCd!$-xI`mxiQDU3uvcCcz z4K)rwx%IVYLVJj8+S~@sZKF-eFN?e##l~%DA}ATif6cpd#JciZKi+Kob|4U}(Wj5%sob<^$EQATodaKPcU8upd^=1=$>2B5>6 zoON&jy*>nOXY#`B|4tknI}F+3$b0BcvaAET3jENatH6cO3jLUQ!&d`8Ni7lVES?GP zily`YJ4%$MH`6Kwu58*`L66SIR<%l z$LYMr-=0>U!4P!5VF5DgWyZ?9CYD`IK0;n}(3DMnHC1u>ZM1oiHW_PLp3!Cko8d`Q z20v6q{0q>PuBR9;*Z+oYI~h|u>uC%5Ss*&6KZ{tZa`Dt}-*(zwP1`S6^)E69+NRAt zjFbMe2T$)?{PeDEJ=2OC);T&#vR*I}U-Uf=t%GZ|)7a|B$O!Lko@*ani1q6VyL59d z{Z6+3!|eSK-o_kwTC_7U7O9+5o#m}{`J3r|A&NV?XS->>kvw=2JG>b^U5H&;!1`CV z`$pQ{hdt9ikJtzHCN{ybtHF=Tg|U|oWAD^b=3M>}oA`{MILto4Ug(~w(8^v$?Cd(>N3rPvlr5a+H%*L z9_B0iUvfRX9XtR3y52N7v*gY(ZoyD|--+%Dv2K5f_@?w~0k|)hO~83NIp}`!ELv8u zm&EWN{4D;9KvM~>vMGnz|H&AOg+Iw`t=U|eEVL1Z*kLPgbM$L7 za?;Sz0%QR6(fHBHVQ=J-d_a^NqJPo*WH>T?AZv!ewKZ?@GBwbE&Wh=P2DDGDl4~5j zUxi(LA@J6FjYIN25C2G{9(jxnB%9CD^(5`fpZpFo4PRCzHqB&Xar-fFqn|49Im~=J z!I>}U#3=-YJw*&L{l30{hYEJbbQN7k`HP z=2Pf3Slp7*u9zm@m8ig#t#>1+OMb zvk==^d4AM{Z9Q4v~&^g~o@_cXb zsd0JUfPRpjr#R9u^$U4lN1oc{b|blHhfb4Ya$*RYRt{PR*Xqx(uZcajvX>UZ7uC>& z_V)JSGqh$lPMS9^8>yeRwbqubtb-;bmbQE*p?Ur4c3+4BjNt1-Ef2W1Z^KUi=S zJO#(bbIecc83X5Yyf^C{;RD(ZBFlz3L!k^`ZVU7;A8^!eo^7bu+yA>G8etBN!5NF}Py=U^awYSWFUgO$UbU@%H{MvTQ z{7Be3C76V-BId^4;K{}$JWuxA$RltRqyHOT#CIIHwftmbuO#o87SWW$WzK2l#gOu?vOLxh>H}#l@%{)|ZJN4jmQ;$6Y zrd|M9q`C=m9NqE>`qB7nXg30VTfQ4MhoB26>-(bz=TT+GkUbla=Y;ohRmGDy zVE9FT#>S?<;hS0i9~{py+!bbizWgUoGme&QY)fQxMW^IlI^QYkTfJGbAoYf$*D}ZP z#*Hz)kM?AzaK0D2qHe3skvkI~P;7M-K2ohU6kCm9yZPx)KK6Xyb9gyN!osu@p3-s8m&S1&~EB&heyp^(V2pAA39O@T8F4#m5H8A zRivh3PwM(_z!bQMHq@u|N;NPV&>9B2t4UY&`MR?f@Lp6}vXg$Ru~P$K+Na+p$`8A1 z9-c`~c~d^VQOiCE%6b?1&40y{)L)YKf$JTAjrSkRO77*k{~>FQ@@cEFZ!}K$Wo;{G zCDgTr+n)a4v-7mQ=dm7Ij2@z_)`@x^+qUmIsE=tbLZ^W!lMOmMu=6g)eOKL6j`J43K7s>W?OiVfI zvj39s**&?kYpnle??_)< zLOWVxoGbs_^owsO@q%Mt>fEN8s)G*@Tgk*}JAaj0qIkF&KV@Ug+spV(>>hp+jcTnw z!nZb}&rd6s3>?0MF@uXA|B`d9mOB5pP_D+!5C7$~;(XDlF_+%$IS)x3aV%9CD zT$;B20KW{2hPFB~bQn7HB8%T-kCV!OpK)iF|6i3?KMPo2GDnr!N%_n&I@eueMZUdj ztTABSr26o#4bMtW7+M;!>Q{+AW?{=>TUYXb9&7&b_s~zrp>$njZj}Ent^S?p=O?Km zdS@eJ7lHh73s zaSz=!;cGsHuh;M#_!uNVOLYb3KGq&)?BJ%7|1PiOrg^3D$K+yypK4^C@FSe~J}+Cz zS0}vcj4l1I0xol@V;w?<0ndT{wA@v@{fr5^%jV3tbHT@jpGAyH_>hlGV!Z-+it?O*3^f;ox-P3Uh)uQGVwV0Hvik$G|W>nLwg}Da(_(u z^ghab;(HC(q6f&ee^l!i%E|^TyU8}T%pv&Qlz*bElb7to;9gGi~Q{<@<|se}>N~+D(jL7Ib+I9UACKui2ps8{do> z2lU?D$v)3!WIj48;Cqky&tNN%Ba)VjyL`~K&);#{hfM%ooan>2h{+Z{N|Zmy!uz+7wyu3vA7b;Wb7Q~AI=?&S)M z!68!_#%OEyvp&bI&;Pr?no6y1$+1_3NEGV<a{K$(9a5H}V?{V^d3p8Tv9YuOpMW@A`yuU+LbLJNH%Ot366PN3c6*n`k8hnF>=YVA=aC7&e=iisASPy+@Z)QY1 zxQ_E-;YU*jx((l(D)KY-2ry4FR`Yy+8L^U7k?dG={}}X}$e`~8aRS=hN59*`&VHUe z#@owSmox7?)~)hK1*UlF_emB2hd$;Pcn2CISLUbGRhjAd7LKb9@>u7VRBC-g8J+K- zI_R*9K$!J}U`c+6*3!z|(*H`vs&~SdJKrPVYcDh|xmfx4sftVKrv=!T^UbGn@Z{H3 zYYgfq$v<=J`M#7Ynnziq)9HIV^U@i&!}7J^dy(8IxMIp0$s?V|P{+5lh~@c-{YfntJJhZ-|m0$gZsY`Ix}-g>x=|ra${u1CwLopp`UY=5zOT`tfHP^ zp}vf74>;?Y_KX;RAi7n$(v0P;vXW}-Z?}9X*I8R<^Q{ln34LdINfYly^C}N)Ol-mR zNoox!y(~ZE;Gf^^uk=!{628k5J)loiPJT@nXQBZ!Ht?=;X1$m8S1N}?eV+UKv(xaw zP3?X?YYOrC?C!Vl$&DGGoYk#)yE)nNkGX4Sojml!`+vr2|aUVS;Y3#xp+)SXS~Opl{*41js1c#N_36L%I)1e*vvXK2j4n^@K}I+Rh?sh>64kDPGp8(sjmf}#6tJ@YyjooG(@ zb~w|~W9Eb(`J83@p(&?q)@VMsep2&i?$QgIyWVBaUH5+EYMl4-Aw)9fF4-c!@-ffM z=gz(7tT{Q)vPPBn`4qI3`mu#?|IFU`FgPffLqEZ+`clRyz7zb>&tI(@dbgZ%_tnu( zn_#Zo7b`ixEyP%cr&(h|i>$VHE;Rm^Q%+3b{DO&fLRWD;NXU|-ombF>P&lN_uxb<#(kNF12=R0Mz zk#>Lj|IAlY$61FOJhk*7#U0SMDPtBtxXPhM;e!oipWJIXRRm=Zt(rPGsQAm4`0DTk#dUU+stZ_Nvr(bfI4-E57<+2Q&>!)ljW3+ka^7$FO+3&(zCjTeL@q4D6!}n+I zU;AWl*K}y;IIAQ>Jidj+8Re%g zi*EXTVRX~)KFD4>#`X(z4v}87-|`pGj{ZL>n};>fY;;b$hx0q}B_1a)a1j65alUoX zVXnS9`8Atp<8WGUt!LblAF?ZTJ(AW}3!!=K#~Q>&_(#eQq;}iHqj@#$gxNHGv zUc4*WZSJ9W(fP}gqBz6tyeb6`oz2)zC8iN?)C;og{eBI0H5|lW8Nn?$Vjp zpCuc=oKb%wd|euQ5!?pssiT$1N8!Wm@0{hqoA5CRyoQPIhnTBN(}I=ernRQdKzHY8 zlkayJ*yvp$uxJ1lqJubepnVd`7ZB~Z?Wk{+6)j}e@qlC1v1o5t=lk2KpqlZsgJ!b_Kk$yH>+{UDX`$5y zUwqALkGwU^y1t3^yw~UWb#-RGd@`N{-yAwdohoZA@SxShnYH94#yH>TkfFV1{8&D| zMLpi`JA8+TaqC;5Z{Lwk+9xVe14TP53iTAknlM)-({II zk^JNvc^CV4dt)ZXiblSze)W9+@zo2K)!|{pkCd zwY0DN!+2W%ac1K|Vvnzl*A{=nnN+}YL8m)T>V=j04boKJ|dcd752nEQU>0MK|f-%9p&ebC5; zT+X9&&lMB?yzFl@xY_9B9te*i_*=1a;{OK?-@VK?aB_*`0vq2IQS+^sAbzgD)_5l1 zp+@6ntm}B!>_@h9?1Bt-0^B7A2nXfZqTc$?GzC`&l@DO55iZc&R zLfbA4d>*-MV$&a8u`KWyKJXy8b?JflLpFI*55QYh$jjb6$RDl=XpMZ~0|97O@d)M@M){eiz?51AM;r`CCYws_TT=Jm@=_q zJ$L)va-&tzP9C1tnKkrROZ_oD({$pdL- z9FheA+Nt7MDSPG=H`I5%z3lZi_6B%f$(S1`qq(h2%ZC+$17}S57^7^}d~74((tH!V z&G~jp7x^5KbiFR}IrRTq1=x4U7N;Kj&H3MZffGZKoM}c^6f!353zqCvo4r@s_28qT z_iOMR^X+{b`8^rE+JU$Jmhtyqo#uzDv&=qa@pSc`sUnk8DP2ZevMV&7d~9vArw=+-nFn4wQzk~4Kf>?Tdp=guNld|&tcPh~9&P20c6cMFb&JywZq zIjwQ4k6Gqy8osZ)bFy!FV!s*xna+yP_gX5^8<}_-0#?G4yL^q#(fxk7~So&>rY@O=TKg@fZCK?H@Zf!N6xzTLXRiwMn-lo% zk+aphf*+kHY;iV$a`E~&kLq6Shk~x`E3pe6a&m@sesSnZYiU1wVZOxs%aH-%^Zr*; z6~KS|Y#W_1aUu5wycccgySOFXYp<*P9~zH%dX#gZyo|4%=jC%+=C?n?_llVp_TNfk zlJzIAbM||TGH+v3twU$PyTqZ^ntR6gD&^aGzJJd4`TOnZD|?auoI!5(iH!2StKR=c zPs2OIS#t^`>$TroIul%;2ntR-(_Zhe-FM74!-?U`7LvWbm-7puk)i37qV4?%vpU80h@_>Tz&f50?YnZ^qQgLmj?a22e!nQbrNG4 z*mBD<(d{U)YY%IFcY9>64JsF0U#H-+^Q9MRJ+4=@Fd^-R?T}AuKyK&bdiN~F_$X;l30C+SZ zR}D>KJ6Jq3?L!-~<;G#K7&<2pQtx~-k=M{bC$v?|Sj)?b-nCY1nuQI?zQ`Xlo@Vws z7TD2Eqt|a`&7M=&_yKqV*_y++G^g2oTQYM_+Haxr({zr1CvkO~a-xZpa-_g}75z3s z&wY&B9iRF3Aoy9oP;wA@pUSsI!J`jb?Rdgde+6e;K1+Nu1#CV9+|OJ1?EJqaM)^B? zhX1$d`F`?!g_lNMi9_lvjsfbc9oP2Oy1Ib6>2>v5*B)L-eq?|?o=(GFFqGV6e8_kB z`oVN8miA@0JO#|3V(fL$MuY4v$~7>re%1$d$n-jLi0bmttMFF!uZS&BzVKJ+b7Leo zIKQWz@`>l!BZofT#`zz)e1Dwv|5AAU*eHFvaFINZ(Wfh)hoSZA3?2x<1Kz|f%Q#ygbrcVyhav9rGm9h%}<6S#IfAAf$A z^^LXU;+yZ3R+67tN-j=Qx?fBAQ|wtWFuahwM&RuY_RTK?r*0kVqtgAWx&ro`(T;1e z3!#OoYpnz8_(@OAS!|u9%fuJ`z^v2c((oTz=mM_;^gFVUGx4aSb#*!Th@+pZkAOd5 zZ$V47z_%Q|T3blW9N8Pq=N-1XrE3W`wywm=JGc^CpIyrt1LD6y(ZE%<*_W-o(v{qc z2fx{ft!ne_D)4O9KH#Un2fZg5N4YBg8(fW$69=wL&Z5pgpMbO3*fUM!<1|gTOFk^V zV7!B)|ipsVBzy zV;6kF8C@NxXV^bG4WG2YCvPWy4S%#@S93kF$nJWzI6M5x2kr2Mw0E4iefCAyZLXqC z@ttJb`I+>T$5_bK8(@5$%HN}o@{sEAhtx6O!r{~stye|&{r`)sL3Vkl@7lh7 zpTYaaPWG?kcUn#!S{S`2J5%3t(f6^h;JNJY;I~p0Yq?kXDCG-yPAqtCHT^2Dt?~)l zW4`%ZM|iIGWQ+GdoT^yC{q85N9o`F(%g7bkV%n=Ae{CRyUi<-mFlh4Elp~+K)yo>K z?B%U%1InL4hPN-YHvNKk(!FEM!Hc|m*111px0`c>az~?#yAC}9-yW!j2F0_|Bknkp z^xZ^#zTZCA3y(g*br{+816>zkZ{6b9$iw)hrFTNuX+GMCq}v(3j4}(LL1dcw4?n2Q z;T+K*u)yAEWKV;1@Da}YBd2a@7%(H`QCp;d4U*<5@*Mn&uTZ4Q7Cg}Ta z6C2m}=QH|N9&JB*MDIE|bNQ@y@Qv4Q-*p)@P|LUrIQzr!;23qli5Hq}dcu(@rCj~A z;pe)N_8Vq8dl*E|wLeVLI`&ov^UC}VYs(MrSn6R-UBEAHVNZg$7x}ii#o9W^?}vZy z*-`m#siO7F&(IYxI)R&89rQHq5`VferbD6y=G})Jkv?kVDxHv-=a4}6 zb|G&f$O!p_blyz%yqh=Y^FO4u=_%?X@8#nuK)#SibwKMscP-`W7OfM9u#d1ujx`{E zEbsEomd;e}!Jjwe)L+lr8a0uS7b|+Yd9S3I)V-1_O*v` zs_kONZ)gd8NnZqhhffB&k=>IAEF>=v{}TTjycA^ZA>V*0kGvegCWQ8SLUzbsIykLZ zyl>utM+@3E|Iz7cukmqs4qLxUz6o-{hrZ8y(V=kt{nSD2=k}G5xB48m5Nm$v+cNft za6RVF?l|rD44yW&Bx?oEIi6hz&$@7_!f#!}GtI|cXYT>Vdatu@597meb@?8x`6>5F z&6{y~fqh^eYvzAtZuDvRgmHR-g~p)!Qu6Zs);HIlDCoE|fbS&u-84VV!-gl{_r#(V z;lLVnJ^vNMien=QmLu@xD153l0(0$Y*4jGF=h^etZH~h0E^U4G+xX8}N5`-A)T^xO zX$%_E2KMA^yXix18y`LU+YhyKtTnxNBF{KGdlX#6kylp9^_x}4A}4ipALrhHw`$sP zWyJ7#mdSB6_vx|ZGY-*9l(SNV&*A6D3;Y`P@pGx7b?}Ga<!DR~JTs<@USyJW97AxApfuq7y0&DEl;Yn%T1pmSc#K5`=6cD%QM zoBnb8VON)R-!b1e`c8-W{)xM%FZdP8)2~z=RyhdB}(oo`v*XVRUo< zcJ?Hlhuljd6Zt=wHLv@EU-Wmq{V;nP`F*MUotR{d)aFYiixch@dofbX0> z*1W6N;~njX|NjFyr#w=9OJVPqJUgO%lY9>F{xI~U{hXq2?H$R7uaP5bO>Pk~KsKS) zca2;h{0D4-Tx+S`w*ZSW#ym_OZHRq*%I{Pe?Tft>TR6yCeccM3=U*@N@b~u-$>d!^pY7gN{xJ)O+glx&H-aLwA5X zjX4qBG-uw_PwQM)wb#KqT>4Y{#flj3-PT)6r7vFC?AfvIUhETcf#;%|)wa$ajFe$Z z>$;9{K_lCM#pSfy3NA*`bK96}J7p8h(b&P1vsl9^CgA^F>OkQ}WFhv+Fnr2-=x5<% zQQN9lQ!}opsT=y+Hcms{SEnug0&|Oi*Fos!QvOf;u7~1r)r?zws&iOu z)>Mkk#l})Q`Q>h>)14+f!X+jwO63TeGe{u)Rm191*SgqYeefk=CAM5g>By1 zbLwK;pQe8|7wL@~e{5oJse3)o{poMAyWYNI3URin#M!11XCub;B_HuZPn*V~Sla^n z)7p^zXzW?@)Q7e>xe3-_*d~ zeZ}My8)#sCguU|y@8qxaC9m&lhF)8y%C~~NgI))?Z{}V!R>i&G@&M?;DbE~OpP3krY)LnMd@1@p)^Nrr7yJ^o9Z=hO z?OBcQ2<>aVwAkS>t|Y7au~m`FAv{b8qRZo zYhiHC?gsoVwZuWo*}t=a_2o&%e{zwv>BL;C=atV|C8+?G!5=s=D&@ib3%pUBlPF!cTMeN)<&tvdEYl0AMMyEvgFZcCbYd2f_ExYmBYMy&(zuIs0)X=|o z@xk2}%)DgT_#Wk%QRBM~)4t+Pneoga;30X@1uo6Ef?hyZ^GvZf$?*%A$Mg6e?gnPE z?G8@0W(xLeQ|y_GSWDLf!#(BJfl}FWYqU$d z*S%o144I&@Xq?6mMI5J!|7!OjJf8_~jraX%iG+8@$IF=H14^K;#%Ubb456d`FKs+P z-{R}gm#l+R#N%4`zl@x!gtvI^txWS4{+MJOzX0pZx7qh6{NBX*s2x{YOGooc$T6oq=0uN9(7Jvh6>!Id%)O0h&=gWog!AztF_d-5Yo;;Th(eADGq4sQ*>SM3@>W#CZx9V*|Nu7@vVhxI9B zy!!tn?ZbbPZ()1>)skvz+Lf%Rbig!ix`I5=(ch0`> zW1F=$>h)Ns^evT2&QUjOLhz|G)qRQhvKr29@gX0>(Are){a=D_4IQ|; z_9jQp`>-v-JTr3QTIW2cBj_wYb1mRE%zx34=55pF*MZY;I`_D=l(~a5AG)p<`5Cd{ z3*zjb|@_iI>R@?W;CrDxF9h%47*7p*{+!7Gtd z$Q1o&T)q*jWC8I00{z}4Sq^Os+~D0kw?F=PYC8Tb)~hM@)edp3hOcG{H!eSlKCxK@ zpCTbU8ROl&x#lZT*l!Sd}l zpi8%ZAU$RVfLN}bD@BVd8ogwPDcnf9Vt@^`O z@z3!uj`xqvn?0{=b4Q8Oe{~7@hu~ed`CT#20pvW4xvazQW-r)x{=(27G`z&sVQ!8}IlK^uCN1BJ&3#uR zGmu9~&S4WAMX$}Sum<8M`9D<6kWr6a{#7QFMyY0ep7eofO?-0ijJUX4AyN$(8)Pg*6D(!3wvN+Gg2 z%?GaCQX#xgpd;U7$PT_YP|CPg^1kM1tj9OaTH;667oNK3=gQIA&NzBmBPxfl*JnM} zVz=zt7PQ*xg4U@ZvZ{r(4Q&%ojmeH$Q+>?t(RXmSthdH0DtRtiU(6m?`4hxzhtSLT z8k5)#vc2?N=YC0UxVdkVk2-5ba?fwye0QAx;W^FoBa|HiK0f%-$4!tBo;x=~sRR#f;?F zpGx`29LF}1Pjp-!yn~))ooRTMxRaq5WQ1_1G5nfblKj-P;y1x@7yb(AkIl3{0&nTR zqc4zS(0o5K{BZg?Yuedfgtkw7oX8yB`}V!arA8%*+0TNBTLc2PtJGtD*5SuDbEi}7g4s4`RM;I z;S1e2(#F@712ut;ywH)x@<)vY`t(9GUh>ThT#&JnWd?rK)wx~zwvE=ecR}-Bz8y52 zrC9t0-Tj;~zlGn!{O$vXQ#fz_f6%{ofQw(C&j#^p^f4CePpJb*-tXbLwGSTouQTRYyJHca53>~{0S#nk8154h5p-n&{evk$CAub{m$d(W!#U^p8g;9%rEeHtm5%; zAn$kL>mnX+zKL@dUeV`GirL8CJPAIha8?Cvn0stv(SHdtp){?ZweBh5x0Aj^_ix~b zl8pHtZMb6@K+fFHvm0kxMJ`O%@eMiIXEy#(@t!Lel>-<~^IsDGT$kX5%od-CCvBd& z?S`PmyJ=T^V00(%6*C@#4jyynAb5ceDD|6Qq_L~UdlnnH$ZbyzTc_6_p&7uwCU=F!;hwlTsbO! zu)(2cU3Bb`z34{4JuPGP&cnMTv{nwUu~s?z9d-2XI^IbZds(Z>E>iyaJ6|fp-%yI* z;p6xpK87FS`mR6y&5ADk5+-+~dIF#1u)cEFP#XIRU)g4@qxzB8uFW4`49?Mmvi)C! z&YOUlyAIFBf7(11Uc_f0JvdEqtTb(FEmj0SoeP&SXl^1dGwHPn+emBFkK~`Bn@;Aa zxf*%OTrLFPwO7Z&D;Hn5dn4=ePR^9v2p&EM552m`+BD3%{N#jnx$M!mGuGw9tjXhA zlP|^YN1m0@mc^Rf!g*2{5tBglK@ z_$Xgkv`l`W$&bk9?7#$k`4>EoL(3+Y2l!{!tzvAdJ40oVzbex~dqy79wrJI?%PFIM zh4CtEA)b{n&cncb8Q1f9t~Ig#>nz_(u@(DiV>jXW$2x* zbUTLU;NcU{ngI=WYdPR2Ws%Egu{Dxw@|kL9bL*d+>~4Cz8iRXTCv5u zE8yub(zj@Cfbs0g_^9m*p#{f%mnKAcCCL_@%#Gf7`vZ=la_YU5xLZemf1+FM*A8`y8G>z`90joiOv4-nW65yXKVLHZK2}m*A_|^kL?uz3ZZ3$@Rlu zwt6l|T=Lcscol8^@DJ~{%PNVJDW6KdN9h-xBczz8Lyxn&tB@-%($-7(8NQDEg@1|_ zFL0kf%)77hTy}x`d=F(mi(jOZ=j81!b;tTFV+>DmWM_i2JRU-J<^++)=z((VmA!mp zS2WSge6;Q?gr@w&IA0+KR}LH`52_er3wEIPk+nc4%6(8=+VC_yUZwwf4*e+CF9a;t z@lEE^ENd;_m0Dux1$o^Ez35CV^oh}lzSXRy-XRx?HA(?<8uz#K4V?C8|K8}uKJqr* zxd=YHnNKaWqw$ThkHW-4(qr7f7`2b0j$*&x z1&?RZw-1_9Y`qlv4mDfGcU!vE>baOXiN2%cm-)c6@1@j%8rqb-JqH@AgPvD1Uo#Hq zY>0UdGtNBfE#xdz#dk+`w$CRfJ6cMABY9SF)pCn-Ue52%LbmPvJN5w~>rSA*Hol#j z`zLhDa#PPcdV+e=$*NaApQCd1qWxXIBp9esOm=zHX4VOJcG-GZ$1{V{#^AZ?4^vEPz8AW6{V9w1 z9|ymK*ngqa@RU%3DoRsHQ%cbO|^>ezbiJD zXo-z&`Ip$(y+qdTdo(t-^}DgLFMlsK_K*KYTip}*+Ag_njPL0l_&mJXsPhlWhg%3f88`bEoU)|vbK z6u0{jz7O%8&1+qr~tMwhX#MtOLv4q`K0v97BL z*(tfb8ougB{ZbGrLBV#N8Y!I8$Ym;DstbB)KPeYJ0+7k#uZIrAF2MKJhZ&Ry5_Wv+rz zh&@}{Ps2XpwR(O${r)%U_g>~uVOvWx;rPq+yNPgA8LbD@&NFU(@~D0AyYU+^M)`k) zk2k^JmGGos)laNg`#P`SUa;sVpXM<4(!I}db@I=1%vl8bF53IWa1~=7_?UIzb@<7| zbLsyrY=8^NG5Ch;LFHqh_Zk_Csa~OYgnsbZ<-k5eryWG5Jx77 zAhA3I0^~#p@=*6`Er5Yc8K5LeR?-56oUn>Y56vp^AP+He{7kBT@!EgI@ zWcnM#z*Yd?9Qr)#IW~u}5%*&wLU)euZ;tW(2^Y7+XYgG2S$6ofP8?5ugo$=|@-HU0 z!-w!~$>)+l4}9w1tR4BRe`Fuiw&Mp6+VyWjXHBwKuf<2tSX|$M;GJZjsn*&B*iPuY z1Ma@glg1v&>)r=%wWsZojmYVZ@M$x1bN8_Hv;!@@op#Oew zEO-fS!b4$j@9w&`zh1WTI%~Tu-1H&^{q=grcc)J<=8=LcihLD zxY_tH4#Nj3BOkyCV0Y=v^37Ub=sQ1`5;NY+w?3M|?VCB|FhB=kV5jf#j8Wzy@RB;u z>XH8=fc+$YaS=K|>%VzCo9z7q_$rmZ&_|nI=s{~-(SgPxno!@e-M?th^#`DpB<}=A z$w$Rts)Nox;T+(&lzz0%?L@A~zY+$QW$Cz?XjO7sdy49K-U`e!c};OB@mmu3>)bbV zWQE{Bt}6Se_G%wUZ6CC9gmH<+#g|47u$Bt)-6+-1oCE7bqnjo^Cts<;du!T3TFGPKl69+uh4)>1ygC0a5*=hO+ z0J9KdyBPZL!t?G~_OcQ5oczrmwKIqvb}{#E8No?7@8FvFd}^os4T3=<{!f(`4CD*8 zn0r%}w_pHTu1@P-@wQ@X%7F>Jj=c%Z4bzTjZ8vxq464twOGI}UGLA85(Zo}ItU)Bh z(lT#7btF?{gBaO@{A+;LUIZ?2?2JL?E?OL-oceZUv0@|A<39ElOJ9DGcu*hbhi3Y; zh@~5!7U!8C7}!Eym~G9v5MS7ToS0U;9GfHr9}d8We}rC*3W(kf|rvYiG`&n72hy=0a>gVsSmtq{Uy0@0{u7&eJiiEMskCBy1pdsGm2l8 zRsRb8#1G@2c!4z+GAzpV=d2aoa_VP*wY1?i=%9-7O~{In`;4*3kI?S9U{pFTBz(ZH z`lhhrEb{+3xu?7bkD0lrhX0}4ToZ%w`4xldK$q46W1S}+AIhrlV=s|pY$MnG6UK#4 z*ThDIzln4q-&l=!2){xoVf7f^6t6+YDsS!|a`J06AL#a=e2?1OkU&nEGCUWZ8lT-0 z&OGHuH20^FXUsP;$MF?)iat!cEADQ_HyFAWzu*`0X8G*tSNrYru0C#CcY0^G%qK^f zYxDdjkF`lY!zO%&qTS!U`fi_@#aezVYh>j>^h1l|ebZ+n_c}W#z?>3{!^AdLN5{4> z-uF9GW+C>R&N0(Ik|g8a{|al1$BCgFNypG=Q@qx~{Ezgd4wNFB^lkb+a_^;=_1w?% zGH6VB>x!QTfoo}1hL5w^CKrCkF4=4$!BBc?4fFW_X*>7$D62dFKhI2nXC?#)5H10o z1hmd1TBU**mo_s2Y%WsE#d>X%XkBxGTC}zT>n4F-27=p;rlRO>60p0OD7LoPA8EHs zqTM!Vy@1=xufOe*fVK%yE8fVcaenX5^UMhb z=r(Zw>7L}Gx%AK8$F$WMgZQf*r%3x1*rX-9Wl*PrK4j7_+inKUZl_;vS(Q)^drC!}QdRahCz8AhGyTJbVG4f~)kw@zl@@T#Gzm~;c-@i0INKE}pJ0kI;?E7e9&a&Ze7h~UO zz+NHU`C@Drx;}?|0MfOy;U&c$bEq4=Uh6^S^OZjUdx*;CQ(o699~mvrdO)w2?*CEc zdytKt_9qTWPJSd+{(RbRg=dMENv{wu)4%(z&A?uJ#qzM-by8;{eQJjPiO;DH_;1(I ze+>Rt%y^DKFJ-F~oq27sC;ob|8Sf|8*b(o9&OakZ{RN)#7J0E|@~v#}L}PBkE+#)z zF*X;`h>1Ku0{%GfJwvACAKtf$zrpJ=dHV zd?S-F)G-H@sX0iOLDn({_Ixr2TFVA?Pr}~noT2#zcCF+T6HEnj!PH2>RQar&_Z}Oj zf_Ve)+j#G%-q7g#%pntfO)wX{1zX8xuH8|vP2VTh1CCnYNPzElldDAd{ZZwWkHjet zelr)uHhpAytsken*Pe@qDet2{AIDQR15@%)4&^Z)KlmH#NWKf0qA6RSugP#8pSUc~*f7XZQLiAPgot{g+bKaAi zmG|yks7zm~jB>WQ`54pfjdY68$dWWg6ey2Y_oge57G})?4na=?7 zbesBxOeg&#D%qIxECQTo(FE){+t7)X|B}&F^H0n7b#MmnRN$sO``Ed&Yw$PL%s@|p z4^;7Q8N^oPI5F($bUkbYepgHVV(elWzGZd{S5RZ~D7RV(_Vx%d70w=yw>9oX_%hac zTG&s~S(<5l&x$OLSrPKVtuf+OBmb6>2;_Nx+xFipZeTCeh}CVztc#3ZC?t<#WSS~TN9Cg7CG{xi*unB(0i z->CMK*FwBl_@Vcr3(}9YU#q~x-}QjWKQd^$vk$a}yc+e3x(`{Lp`1aScOLSoU2m>2 z)oGVFUt)j;z!~v+>80ws@Iy9aHxHn6kPK`v2JuC%OrSDu9N0U+OJz*t&LD7z3O=-_ zy|CJ686?-aLEFsdUg3WFx2jX$5|2~PJAFrb#8_oyqmewWGF8BDtiFgZ%YUSHU6>0_ z0qU91$}@dm`jX^VwVii{w!Nt`1~kxalkZf)?*|!E0ppdOt`hj^+p^c$IvC$QC)K9P z=)GWfx=k;SHTse5KwtircECaVJgGdJ4iZxb?5l>8Pu$HMxp>or-F!2@b9O-&fsM)p zQvC+co)C^eYtNi#nQ>N8PQ1z0@x{kn+I3z+h>5SS$mUO__yhBTPkK3JaJ=o|0$0%C|Q#=o!4^)B{8nRQ!%jh#E8nq zP-r&ihF26eyZ1rvOIHM&b-tzSnX-GTys;wGT#HS{wo?!{OHR4rR@xISEhko1@f{1Difd&>EDz-CEBlXNk8oXmVWS~40)w^0Wu@+1+R2J+;HVG z;?kDJUB9GlCs9Apr7FRXJYUJirv!I&AM!Ui`q!tijUtoOKF}{=% z_PXo)f9rm%9AeC5Puj|u4?rhEj9ED@tLS%}?(^Zx#m3>rx?0f3v1r&Ch-dp7|R-1Oei8YtcPjp=KCBCCM z)3s~}ndEe>CpNAD*xGb%oUszwqf5TOt+XK=o+_C$^?Wkd@)L{|n}R*>6g$D|b*b1C z)>naD@56S1zog3eu?yr==1gaDh<0|cm)Z8U(yv9p^b7oUK>P3H*TqlrcMkTrmhr|) zt*hn0SafLsyMjB%2Kb}-Z2}%MoU++(^Ih3f8(9YaNDO6r1Dg4f}sWeK+$kK>q1y8c&zqB3);8=4#8I;DGS0M)oKC zBk>QCi|qNLPOoAXc!q4X$i_|J0{)*;w;Mc>{Hb!a)IF{_rLNjIU0*u=-00vrt3%lD z>gOR394Ke+^tHi#y*q6hk=?t~=u(^uwn%Kp%kG5cf2!AGY*@`WYJTjLsrj)}2AdJ* zjjGJ7Pce=Sk_CP;i5&6t>vrhhPx!CF2P2#Um)5*)7nXR{}^g*iJ` z>uFJ6r0dGQP=K!>h`i$)sd_PU{aLH)wU5X2J$?S3ZylO=%P-!T82Qs1ul)SYH|p=2 zNbH2~>Ci7Ko(^#x`bC-jA2x6K7vq||NyeoYeR87liL+ZdKODHNso;F@Lg=OTPso>7 zjJ$Ib&kKD|@2vlU|G_N2xo1QD@CDF^HfV1i|E=uv_b_Jmb|1{4-b71w6Kg08KQJh7 zpg+r2g1j)4$9P*)ZQf0rk3Q~yPKu*-iAZ7T75#wbsPiP`GOyNQZ$N4~Mw zCK$v4gcFyZ8}52*cXs^!LbGe|ri@syotz}f)$*W`C`ETP{;%X!I;I2s5V6kLRU%lkX+S_yI9D5L-EqFcfclI8y?18^CyRMtSnVR0*`X=T; zxH^CzP`IZvVf)EdBm64||5WF%jN^UcZTf)e5&F~59+jVSeqG)L#@_dVS@L?Li|^wP zY&L2etCVF0qE+$>TuV1a$AZ& z_XCF`z~iVlxAP_J7B3UKx{-aM>GN?l^KpW?*m~6Ii~UXH%-<8+GkC<=XE}&GQ$QSO zs7X4+tOK`T48i%#Uz> z_TYrY%UM%yy}we&ZSSYp1(hqMkaD%y;0kUIcU72~ncF$vVLR8`%{iIS!uk;7+z$K; z(OID5>t%0mNXZPrapYeb5A1=PKMQ{%e1W4nRYq%nAED>ZdPg z$xR%du=ss^dJV|rec0lkPK{yUJb&Eof4e6(;Pu4^&ZE7%jQE?^Lm%hU{w@618zY1B zm&OMgj0AD{qhkbD5*>^qeT-#{v17|M!`QC60_c8{OFFi!9*%q0?DMl1LGcK_XDi7K zFxR#L_c6Y9yA9yO_+`^D#>0vGYa4hzwh3}EkXvEXU=DLYwTdy$P2CVi=*t-sbnOZ@Z= zz5_2#ya_J6_$l}dYkUA6hgoB>`553b^R^#WirM*Df#P(IDK zbr#6`w|Z%F?DIf;c@cX*c^h^?oqy=c7t`ryANSe^9Ao~!eq$t_JD+$|?+iO8c`k6+ z%G{if9NdZw+>Km*J$*Ri(z(9fsMS&M_<#of@FXYnJfi;r(hp3OdUSC3+;&0;bwaBBvEb>(51Mf)&q|esT4r%YrSUI%czZJU^ z_$T<0hq|JY?_^}RUN;Xq6g52cM=y@%wtnA<6A=AE<`_~Rtgl9jqR`TseB|9~_#$~Y zYM_ymfX88T){x}9aa$^fmxFgfmBB{QL_N2SZfq*bH|?c=@PI=XaBba>ehz)>qwZwR z@k~M&B$wYx{UqN&W?Tb)Eq3^Y^bPh6Q{KcsrasEY_6gegF8v(PH}wwKyzZUcIR$$C zI(gNykx&;DjfQ7q3`c!KvQcs~<)_ITQR$;u{WRR1_@N9D`_@l`V%yH@dS z*x=}&>MmxS>2-@gL9R7$_c*aZ#p6wT-Y-0hGPIGeZ%lFEv4FblYk%Sh-zu)6zwBF! zL1*v-Ki)=e{pU8UAO1ZseT_PY8BYYeoW8e%I=`ikVzeH*n0Td|j973tGS|Nu@htGK zl;5kn&G-V!2wo07G1mVT`RRw|q9dBPoN{&pFpPaSoKTi8*BcW z@4VYhZl2fqem3&Odi1kwaPJ^I-5odJ-f z!a;bm{23-NkX%LW%> z`_8JGiucqv8L{o;f@o*_m0og-GN-ea8;Z9--ovwpn3Mi3;kfjH*Stnz`QK}7;e^Jd zeygvSv0wjQ$|Zqu8!!_dzs{OS0_%S6b*(ja5wJ>MV_F9W`8lSr4g#!$HvCe__{sSh zIGo(;;)&+xtMH5|_k|P3zsR1ef5i?<4(NRPwIJ`{PW}%?JZ*27;qEtxnK+h5Ougvd zerQAfAJB2W7AZPVXe|5>c;(>~E*wtbg1biDb);_{1{WH^&DZGLVQ>Mh*`_1Htt50# zxODk%lMBm;MLr)KYK4Ync*@>2nU8wz8@M;gH=TL2xw7Fnai;KwPtu=f@}JrH7=8UD zcFIY{kvAI5NyoaeJ!CE+4q0*>G^zYt@&_=#zP3nL&}YpNE)0C9HTPn^HA!n-G1Pqj z@GppKKZE>DloqP|X`w`~$HP+drDaPLCfXU$zXdQTbn06ca-^iMB=g549Il41a#F-!q zuVNe)bM4N(y`Hgyx%X|1Pxi12`F7wgbN%OuNzKUq{KnUzUxAh8`q$0xb}V=b>N<)1w>(4NqcaLn3NKfDRKOElJEZZu!g83&=S z!K;wLkm38GAvbBi3Txdm=h`$^?MB!mWSvG24Pq+v?E&6z<(>YuKSMYZ04MW^sTJ<@ zk&jbnmkWOb;DP9ai~GXmI`kECG}kW!XYXeJfn)>0S$NV3|ESQ~Dg%GIXpbCYig#4r z{C37v3a%kv?)~!|qw8hn@+IiA%XsvCv-+?_V2o00Zt`eTnMD8OCycD02tfF$&%z02_2JiY< z^TJQbg~CVaXx8>{5&o4$4-rFH3(nbPkx$gVwT$l}w`r_(^s7Sa?QLWmVh$T7vUil6 z!o<0Eb<8pr{&|LR$xZAzbad2Q`aYxc7<1M3qvS#>!^jTOpRH=2zWQ1Fm8{oZVv8!s z1y!H;%!kfL_dgUaszm>3qRf5NwJwbmmC~0Q+HmosXAj>8Ceb~~MVXWd($6U>OTVE7 zrBxlr);F`hWNKWF?w15-;Wd98c#0tQBSgM0exY=SE89Z+p~Pl+rjI*kB5s^(674V zMiFZub~o?7!t-Zqjl>Vxr~lY;gR`~feCjcBzOCc=WAI{?dGspj*^@F~^JaDKv7cpS zK6|s#^(^?MeNbN+XW05j6F6VUJZkOqfV%~}$FI28_Pc@GRp?-%ox%fqUpe^S$hBf2 zUjios;IX|9z~g-Ue42M(OSmY$E4i=%-d?*)>#T9r3g#eAze3DqF=gA@lWRVyy8P>X zChrxWYJV3TY->Z7;lGOiI`;B(@DmT|X_dW|edFhbyAIz0FHr0T_ZE3*KZy@@(v*j9 z3y2pGzcB!;-z5IjdKh~McAVF_u6$PhX8Zs-_-x26EZpxHASZijJ&5mAwzD=L;@>3h zoc~X=jxBJ@dMLTDAN^B!x*Hv~f#Z<8 z-&tzJB=2=Uz%_8Ne|%GL&*#}^Xe$Z~YW|Ykn~AEyqd9Nu|5SYN<<@V=AZl0KpD_<-F3aLB%TY1+A zjk4DVZMM>;$^8`Op@VkCo6o2J$BU57E`iphXf|hP{V2btIOf@&oHGB;^>>z7-sa=r zzhcPj@>~noip}i$-fhV>vibB@L!$?sJ@w>Uck;eRmFL@U$22u=rN4Y@?bhVNR%n*m zGkEqIFiHZWD&!Aj!>IwrnHL4t{2Po{b&@||{d|XKzhM3FuW?ESKVbSYLq5iOlQUN7 z;^||JaIZ0Htdsb^g0@~@tO3T_Aza~pC3}b#{INLJ{>|i+c^kiaXTIgJ$EtWSja4z< zs;7Okhv{b?GLY)M!k*z)`q;k+P}qmv#q2}ASz_YU02VZ2o+n5LJqYmFeUZ`fR zcLJaC|9sNs=>zZ_o9~yQ>k;>|jx#g2`?8uF&A{(8H^a=$C+OQYdyb9u*W|S3&f~t| z!f?D^<6ypjgRSq&*;j1zWR`5Kpucx6xIb58ucsdk^rIKt(_UHZPNV&pg*>h?7f6;- zJHSISvAZ8sb3e$q6p!EoZV!X+4UE+@r(|OZ&#&P-1;GBX>+C<;0sSSx1^P z!A1R-pHJ6sdhX3t`5^Xo;bEBi!S$Ehe5#M~z4KUivuRg#X(Ph_H2Sag*Nc5*fWAfP zoAOf27gCJx<vZGg#&oTf{{dEkOk=%&3$dTfkLK)u z^DOE0lJmlcE!2-6G?Sbcp4eb&?|rGCb5mV^=a#dE4dmIkq3r|w`jL~5u=bCF^RhE< zdG3Q9lj+++t`!%C?-Sd z)EU`S!MMw@0hbXk=e6yY&)(>1k^OQkxV#B{5^Uarj-C!1jrmOQSpsa-?;8BzC*i_b z)9N>K_ZKg@=75VmwkFpvV&E54yi(Z+o{p{-=}+#R5L$*53qfY*BvwGmqaMyBFNxmyR1F&ocJEv+3=qzeF~dU@-gnnNFz9=rKkMimB8b8B$HLuQi%jtI+^I95Dg3p@ItEqFQ7$|)sm`PkX zJ|fF75>^;{HNMub;miE~B&+#%#Fx!9HsfQ&4~Z|M)stiI$Eo4{Ru8tdt>rHkZ-6c@ zxG#5b-hrLl&{N216RSUd*F9@XkoB@AHd{@V!)u`j_#m6Hvk%>GZr!;dU%oM8deMAO ztk^qiD99Y-ZMk+hc$0Y$o;(zWM|_2T%O3N!UD)7n<9ro-jdxrUPW*JL)vW$JQDVgQ zv+vS?o_2g99QX6xa=v>tZO`IcYX8JMPb}$~HPnHu*0=fE;ei`Zw*Q4x`)V6LJlb~K z_?`|Axgus}j_gqTP4uOK^T}%%Pn141e8x!BGX4VOn~HJaSlwfuxae3F{aA!-Xkjav zf!$a6@Mi(L1b%42s{?phz)O3Ttpz)B&H24M^S%=v*0$pO4a}Py8Xp&vibw&AZ_n1w5}z zJ#R#QQC=U;$*W=^2kP9Npbyy@nfON9v#57F{rEok+kwmw zDDtfL6W7{IepY+GBlNZyo~68m;>*YIpL77*8pdSzHN{)6*H|TQ(5_cG6tTBR9&V#7 zc)GWmxxO$?Jc!>|tY`V$7joa`bMBjnIXqAAQ}-tC=j**?EUtl0`p>;Kz75zvLLbj` zW^Dw!rEGT%#1Xa8*AU?@uuh7iD1u^ELNHFNyX{c)-?X#jf|&fK4`c< z|2q5og~0xd-^Z4j{{0SMB%CN2m_EC3&F7iqUCoOHYy-$2QTB$` z$R<*Kc@ceGXthV64;Eq!I`KGiZ?Hcnirjq1jYg~%d7~J;&q6lOh?Q+@M+S-_XGD=@ z>^2sabd_;FkB>G~r*10t9b|d$gG*iIOuDyVn?Xk$@eO*t=ZQ=tcF2*KJc~{eJs7N|B21{cdT8#TTA&Y z;?DMCcNH!)(^r+RxOUyXJv>)Cd93x*+xP+c)_>9l^Z4QR@vSKD^&{Y;aLmMBc{}%= z+zW@=*|&Ww_l4+-_Iu7XYUaLxd(r>)my&C~!+kY$U3-fL=Ns`O&=2`dhATZSgY(1j zI>z5cnfGt~8FbEs-g$th7r17yo{3vqYTE00+dqYeOBIj6+H5<=;CyB;HcU^abTaX_ zOIhn$?-N%d&GryB zu@&aZ{g2B==lB+$&3|_1V|k5-CKWB1C{GVC>94^YjMw#^k{^4Pw zlMN$`fp==Zk2V^v8n?cU|3`_PTnDY#4@`G~qu5&xeU^1)=cCa#8N2)zS)Zmq^wB_v zS}~Gbq;Gyo<*?(~efx!zTU6!NVYf{0lW2IC%F=%W{2Lt`yf3|eHMGI^_ouhli2eS%s(bA? z8&{bB>4L{suu~ldJ~u+U^d05@>W1#AT%+h8dfZyze;s|RAfHYve5#+ZXx{RHS9xZn zsBDW9TNWQTYw`((VgqLEOZ3IqS`}i}%5cGO^ucG7p1%qY(St`Av@P?-l>7#p2zy@L~%* z3i^$OyZx@cT=MA5&a;sVut|_VsWJBozIT<+SlEY+a!-(boA9RDtgSw53B#O&P{qCa zF-x|F$&n)8O6*+MIyNfuedZPzO{@G?+ih;{#|&S^#y(&oyMG)1z05JOH1=NFIl!+5 zziF>%HG1m+a#}AmUGE!N6Bh6F^D!Pj<0(L21cr;nJK~%VFnBRG7-X|2JclYh96`4${THZ6y4ErWh7g^ra#&mzRz{K}TyuB&H%F1)T39FZ&|xvLbsu=k>J z9+TFFbQ68s5bpAA?X7lUq-QP+Y<>xT<-r?|P^N}C@d1Zop7r39wEb($ynzL|o~OO1I6uLM-!rIKW!8N^u<-%=kiJ3Rt96fmbt`*f zeAqn;pt}|P7h??CsyXo5u=5X{KbtIShRhD&8 zuGj(M3OXq7r`=N8wGzmO|9Nt+Xd!rzY5d@g3i}*IXV0kUoaR_Er)0=-XmlksT613s zPPOqbo4#O~&vlTo1{iBM-*M$+@gmpmWzomVbt%6-{jl_-FY^C&<3o{N;O;xMm_~1#FrQFyaG6vH))Uml#9!Whq7^T;r`6v3>W<}y zU-C>JuKag!_EZ^KdhlEDSjBBAmMugdY}?<>*j6W>^!GJ_x0RxK!~$qtmb16IlD*Z6 zVJV<&#UQk3lCdz-!_PT8qm z0w&t)oel%_QQwQ(-vei;+W~9@!!wO50RFi9!`ZX6L;J&Xd6x8!BR+5vG;lJsa0>X9 z4Q-r?Og9Z3EhBaWTT;r#^unn9&6_(su-k~`qFW~$h%Ll7pFEEk-1+Q%$A>eJX(R^D z!(UH)eA|(V8~O^35o?Lo4K|G>M#AbSiIYzv7Q!|VGOcF{d0x)HWK- z|De9<;W~ec=Gp4J@=H$H-?Jw28h^EuZ9X8E{@WaS5^Qi=iLKzzKhlxOe!#eB>SFk9`&2a^cEg^ZDbzO`mb_%8|&6(sktj zJ_fyb&70l1lKR=;&PvK;kGKC5{1fXqdFWB*;B{iH!r7CW{m{vSiG~;R=|fMV;U}+A zpR2n2riScW|KldaweTBxz*uSvA2whD(2u0J2J(=~p;e~ql zU4Hw`$@O05G-+nlAH}AAC9ut-Ed#om#12-HdbW~&#VEg$el6hr0^rYeBH%ZUmr!4O zHyZWcG(roqr;NTo%-*Bdyi+>yv*3%F*!dfDzE;iw_|4sE)2QD9JOf5ny=(^gl+CyMWezAhgI2ta(;n4Lk=+!bSdwWDV}=^WY5Lg9r9KFdWR~eztSpk32niHuq;xN487<_3qeX>|bSKVbj800st-#B;`=e{^=adu}H>!mv} zVXAqk0jO!TVv)|KqALBa4_;er9J>xpY`0V$rDeM|W#~7bGwn4_DvCYz$ z7-xbpieBVEFM|9uwnfOK?%43fx3s}m+_C+m#>)Mf#wH(?@ZX-(?}mqKjwhdxy%rzs zBFUr!8O9R^`{^IL!P7Ex3OGy*{9MaOAd@GwH^73gMv+rFV^R0aZ+7oH&JOhd-F7&&K}*|_+ndyUx3&@$Ohwa>(+lh6_5$d+m5onyWifBYry zwKqWYE(%^k@4EcfcUN^@*mj#>>H7om6ZA!8bRG&mi>^}k=x8rU4s_w|3L}9{@pa?n z$VlIn#>g=A*_vEB9K_#fQD?AKz8hz+*dfCk&NR=EI!7Z{-n_>S!Qs=JGFr`zY<{;R#C*ge#qXs5;#W-M--Uf#*R zj~o~q&BKT7c^0r2?Dp_3xy2;!$eCBjHp`aD9?clp0Z=-7Q!?2R zaNz(lSrIasT^~F^U+xNeXEpaClLZ4tY!)(^=!*PA7Cxw6xbS-7JCI)&n`sA*!-&{#+Gj{$vJ8k&0I<>rtLeHOs*KPoKDdu&T!cgW}SC~ zi|Nm`ZjZlU`&hfsA2KxulHJX_s(#}55@&T@gzZMp?Dc@`HI_YsxwhA0cWN#6q}E~! zV-XA=1s^PnG4T@(ldXw8>|G&#y3g1B6z=)i?-iproqK-vd+vRIAhxZ>i3#(w7pIW> z5cjeJ6mqYanS5d@{bpcl;W%@3!?^PLcIMJ=o;|hD=ULt0E3ePj{VewK@l*L~6~XHG3V z*SEUiT)uDLPn%k}C~I}YBHn{XelvS&;io6BZum6s?fWTH3nNojH$-?3evvN`pJ$TUgw#vHV5bbY~7h?zzO22v{!r>I-pzu?tU)mZw5I3Ft}qFV3ya`gQC2Po^6aoORO00 zZ2hk4e#(zGN49P>iRGFYi~gf2p8JA$ZsTTir1dgy*Y|gsi3k4GOvL}`^5KpzAs4a- zN-&XsqU7=D&S*(iEdTNE@6`S~twZs@wG&5nwEoyk?D(mfxc_Hn;=cc&eb1!W)*Jaf z>3qBK-in8erp6`FCnv?Chn#0^E%fJi<_Ke(8vSubS9IRASoBMtMD#|_Nc0HTU*~!w z*Edd&87~>P8m6(w@HKwA=(5@(32Xi3r?hXCj_V3^coLeI&zxai1dp%5BcW}H|3dzYVn-;| z{};k>$*P^a!=8fd1x+*IckDyl?;)2IIQdgx+s3@Me-gPDnib$1Z^Qc!^FAM2kj9kD zvxccg7r#UU^IhXFVJ{(mcTXhYqhHolz6T{I4%qc_hPqh`Gmu3&?|B&8^j_b=e~1_G z?PmH>M?XSbLpKuSOBz=GDJYA6I>tp* z2qU|fA|JwIVx`z)G~Z`42Q|B(`Sc0?W6Sjx_9iC|;VgTPu<*bgSGmT+7-}g04aShm z7^*n?#{yoeCmWS)QzwoXvFnaHXY!7|&qBAC-HQ5AaOxifgY|P#_{9D$?_3+7a?|nY zPYyl}je$=|Pp*wm!{AdN_%w8ZU_P_67Fe8sFOv`ABtBgR&wX-PS1ESPx;K_}`GGle zb|_E#%ZT|f@$1H@r!wDoBcp2@d)o%+W9^qp5{22u!CL&e?4`-B{d!5_g0h0-d zGd9J)ttF?0z8`!wx#;8auqU@}1#j(+`XL41t z!RRVIB{xX!aK};epcO$syU<8`u!(>r>$FhDLDS&O=#*-vrq}=Tf1!b0z@xV{FyJ zW39PYrx`88#?0LQ<7HjZT_uS}J;RGd8@K*TNg@+De;snaDq5~zVYw$U-vb#A9FAGXe<`-85)P$8ZF9&cViBtpz%DNyO@0LS znTw{d$KzsS(Pr?bRPTNGYCgqScY(KxcMajgLk7Kt)~9S2#DVdRruMBj3EK zGm`i+=ce7vI{JHP=PjI@R>QezwNFP9b)1`4&$(%9|FI;o?mtQrw|-EP_$ue7t>@gd ze?m?Saekrgly(qdf->{lX*zcTPc962{$6Z_p z?91SxmFAow__gBQ?A+4j?csPP<7i+UAv3yD@B5hFQsk{n{8EZ5=x4qvsJoW?R@Or` z_jV2^>>Z*z_3++$GrZvy`rBYmtgph3QOj6E)B!%n&*98Q;m?)y*H0Vr3tGg0o?gae z+?P{E{>0PY8Pe-im+wml^0TIx$KyBZn(;p}&xx7n<6iqkwVvH)VV+&2Z`03m`Z+{D zrK{--1GkI`Oyq-XpiS8>&!@g(pRQo8UtzE2JnBD!|1Eu-uRuFgW+HObp3B2sDz_he z>SwHl^tFM$M!3fB-c?9{)wbXjq937DKZ@x`9{nS}t_ymeDcU)-7@DN>G8$<+yEWI# zx9#untpWJGGG7hzY8j5qSB1;j^W;Iyy}q<4M

(;JDqg^CtAqDD zfm5mCN3=GuXLOu*n{5+4BD^AB&t=%b$k}4^ZgRKn6I?CeYPYr5$tx#W&gRYUCl|ev z;WRfc@i_8H~cw?o-O^C-JT`|G(@E*5y|m~|SNKUe3;qFr6*tDopU?sI%7 zT$9IxXReKxvD)w%OaE7Fv&p!F#C}-#(QG{wpD%O#xMVTA?+b{5$T8Nx!&;Kif?wv||GjapX8Zd{$@um4}QtbT*MmyU!paWL*@FKZf4@UDow$$lAJo-ZT=Q z;~w9l2rzv~G zz!~ff_`k6?5EsVW?1RV3-taAC1@+$xF1&5)Wi#!ySwnlozux1^rVr@atsY{*nD?hW z#wz*2wn78tHwz&L`(_(mRlq?u0pV{CHa*!UqU?jH_Bgf)&p6+MyU$}jc@B;F|6;RP zWshZg=L=Vz!Dhj_4cu%b#)?Ogt?o>FJY=Jn&%}+1)!CDmVl$iNC4K@taB=Qh>ECIz zAxw@O7v}@WOj^74d&Sax3z;38uUoe~y)OJ+@|BC%n^_Z&XBjP8v!m^?4oz&mYOfBt zwQL13=ipZ+bm>fS<>qKyJLi!Q-&tP7eg*7zL+m|&lzOeSSez z&2#r06rC%X&l%$rS)W(L;WTxfpcHyoz?72jq5`Bx$Gg=hOsOL@0Q{_b3P0>Q}lcv8CXYb;K z#>74LqnYV4Na&92sUdT!ZO>6VYWG*1JLg`zH1rzzAyPOn8(QqX|2bv%Kj*|V7YCrv zL$Yf)?Wpce?zN}kPxx4J%_Xc+w``nt7E}Jp)Ogg7hk5q3!Q?`FOm}088gz0j z=-p%F2wz57W4_T+!ha^Ttburg2=VM%7i+l}e&jJ`#jlrhU&(vvs`@7W))wuRN0weS zk@9cBZ`|)h*_-S3tA+2l*XSoL1JEF3gQ;WfS=2gKzW?`c^}rX$&iViH1cUr^8FsFY z;gl~_Huos=7lKzeTx6^-B2Tz&2QDisLSI=JMK6~uu@$?RA9`$&quYvn=~DW+Fi8Ig zrWsvR8N;Z~ii~R8U%-3mt-=M#-RXLC*$0ysTgy)BK=MZ?X-E6(bjBk40u$uJ-|$`~ z{&{9gL|eA#SsZJVv^cr}tyiEVW8!33W)FWQS$ zO#OE5x5BqWcc9xaR(A~IAFl438pEOW@Eh`|&3s>c{N~GxqSV`}xMPQ>XN#wQRQ+n| zF98-+wBeQul26yhQ3s~>SwL;%C2`*q9M|GryEJV+^Co(;6MIW3x`YLsqz7p2oC1q< zSb-N2{$w_#pm3PbagD>ZRcfylT@lDCNgTv?&jJX(`3$thEDV~ox zKeXR|hW7DOblH7pz8qin9J~Jm6Ik2enLA(NSMK=dHM3Nq){E z>7O@yTX&Mf)V{u)>z8hF_q)!vb9TO;!pZlD`>@w5v`h3!b8Fk(zzJ+nW1ji({;eg% zlQ{cbJCq~sE{C54mOJ(e#lTnjBOCR71G!y&vj33@$>RiVvpdA_G>Ms*%%MJ>(It1`?f~wrP%?sUL>oCesb2*!odmbzfjB*@jKAzgzRI# z0=^%|3;LNmSLc}p&d3KpK`C0acuTdw8tt|vEbL|aP;0Sc; zi2-a5KEpsq*Pclyr?ATdUm%u2bx#Z#j?H1RV{@3b`on&v>DV0X`8OP!1N<`)MNg2; zA&NdAd+;|Umt%7f9BXLTLKivR<{&?l*2W(U`UYM{ZM%abV?0WKNR2OPlvU`(y$dqMPP&Ffe?iRQw#-+x4zrV^J1II(TRE3u#0_evyiOwTWN)GYEo3zKN4!$QDR%_Wx*`a0EP#@of zCw(0_*g7@qJcspfvHsuLed>B!0)AN7_-3Tp_-3-+2ADVCYWsEjSZ@QV^)_Iyx17$7 zPdMuh9Qerfb_ad1<>8d9FPhf_-jPqRrJ8q(zKl%Z%4H$O&AZd%9^AMj<^1^LF!3s5 z)T;*8Rm^YIIYtX`Uue$>HXC2QvEfYZeViXZ!5O3P5a041@hxwqWbL<+wco|gqBTDY z{4;)(@>Qnj`b6a98te#}K4?1q6#bl=Mf^$1RvX$NJ!zV)C%t_h`zHiHjX!0VX=e>< z&7{jPwb%fMkoVuZ*b`Db1^GS>l|f%>@a6cjea!`nzq{(#uYPh{CjR^@W>0V4?pxE` zY-Tpso71132>x*%MDzC9nLMv(ZZR{Twj$U#&|xHd>RU6A9fULe#JZwi?MR=Ge&$2t zux*9NZPnbnYv6N~70fjsHPGKic&7XAvuZ~;MB9PKGuGEKE_8xJ?}4ks=maO=Svs$v z^*!{-^lx0{*f!OUbkToD9{cyD&~;#1i*2S3+l%^+zID&c>}`K^zqf?_B`O2W?;7-2 z@(ZtDoZb3I_2K*U>l8bWZTIuWbD>F+O;3O`1B|`&w7C#Hb?4$3@39$WN~T0xOuW2X zGz*x)tB+vwc5=Ya>AVLTkxma9$$6+a!T{rJXPma&jvmvE5ZEd!P^xhJ;=8&y*Zp97UT)?R(45#4nsGh$Pm`LrM8SEd%t9xGGI_c zPI%P`Nq0atvSkm>X3)5+kX>anwNi48t?%(|yUg09MNxEY!Pn>TRogZw{n2NBXY7JdM2OkWG%*;JME67+q)Ia`FNPV$3J-V&BiAIpBi& zBzpf<^~+t8lrintD(4;N`Hb?Yv2-SMhnAVbbGJUSeG~KMwtcPI1}|rWmx`xq;`?*3 zpEgV|9?#?2#sStC^1wmK0-CG32J8sX7+(%K>A}msr;HfqGT8n2_&5{UIWn+wjKdEw z9)G|D`~nuXeDdxr^{~HxJh6r2uybS(Tc|yUCvBLvZ3Dg%$aiw7X-tv_3gGRP@OIhz z($8CLI@?&Y$&*D~@3hYF{?3SJeJ}C>`Rs^Y_r$i5^Q&+}&7I}G%;qq5 zgBoOG`SmSwg$Pcvos}XVXf8f#eB;q!+`8g>O|mJACZgNIKNsaOryo`Sn^Rc-?8lFs zOTAPd>~>OgN$u2-$5?&(sCHhThJ2uNR+;N6?74xh&?5Hue_TEE_37!~wdu6m|1?_v z8|IpKqxjj&{V0CHGcwo%DPI{f(<$u`-%Z9ZTS=X*vGZ+oxzzz$R7i?9nGPwnn!Fpwiq zFS3yDD)d13uOEF-c4Ffi_&j)G`yHW4cJ3h24&SwLZ}xTZ&pg9k9O$tNi$qF(aOJJ` zG}`>kPfXHU=v^~5l?L+r z`xZHPlS?07K&Ny2bnRTDszOmv*o)D&;Vl02L2u!&YP(`>esT4$h9Q9_W(IC?DbAxn#d=iXQe!| zaT7etn0?j89{eCrLi_h)FYg29*5qY(=1j&`ohh3T@x7Twk>xcu2v%R5WweA}pRicC znr=^*PfWbUh3Bua0ouGgE!M;Jf%U+*_NDQf3$$Kb{Slu*HSO4I;nh?>$*bH>4t2Nf z{Pgn7eGh&nm5&ZNV|8T*FVE9uherHNig_OxOg^zPZSJ#ZuS$52EMdbP+`9=l{B+hGSh=-$!S|cIl4yL$cB*SSQHV#3Buak0jTe zdmp*rr-OgX@v(4k<5kb3*nxiV3R%g;KlS5JIq2a16E4}nJdJ*vakU*jIS>2O$MN<_ z8NnS#NjB$yAwN9;Ew$G}7xt9@aq7Q3gZBR?ZGEM9O4XmBduh1wuPIn)yryqid|XyZ zJY)9Kc+KJD4&%R*3-zr-wDl*xqwAl~F6LKBlMRMKyj#^+j}Kg#3@` zb6x#A=bic@+5S)D1;>9mN{8xF{neVSV@;GpBZUKs2dYfb#8Y(5F?_by(uO++l0_8v zGIS!j1OMer+0T?KPk!if@Q3)0y_wKT*~t_;Ct7OGH%`*Cl)ngHROcv-OZhbLpPhV` ze=@GW3Lla5DD7|i^lWnUJU8JIFEFY+i?z);8}e=E^Y8ZYg46mdKk6_fh9u0@j&k`JgpC9;D3U8lw>?C%6Lui23gu$~y z#!?Tg8z>{cS7iv;z}pLWUe5DM#Raq8La90t%9PNrT6l_Zl)AOt3nljdJi1v!18@4@@^`id!Xu~&;IBx1om#BtrR}c>|^mKCXcD}ite%wvH z$IyokGj6T%#}+x)4=erwf1wlqkU11N;P|)Q*ktch>_Mk&vEW77#wVeF$TS_#C7;Nq zf9k7nbPYHvSz7T>b=Zp~LzH>9?UWuL!X|m3JGt-#d$p4@jN^mnwdMxRpg-8{@BkB= zfY#Pc@YBZq$%V=fFpy~^25!Q(f)4rjbJ^3mzq_cIe8PRZIh%~<=8v}S3?A#Pw;PcgqS-i<74c{{r%i*UwKkS}sgoTf}pm&$Vc#O;fbi7@sZ2v}6B(Ue|#;ia{u6 zZYzM@nc`9MiIed|zipX^>(l%4Rr+GfX4sWGGL1zVmuN$0#@M>Rnp7YE4|^cIsq1}Q zf0S$~TX(H+mvtonfpDqg-{7mrF69r5j}7fkPPt-3?VYZFUhw#+_FAozeC`fpiHDgF z(WrDeU9|CZU$7!8M}4E}(s1xt`nyGr9Ow3rec&hS zfxA`zoLr+g1IaX}`^jv1YTf0w{ZPJJwecnF^?P4W+In-;V-#(st{V%JuUHx73S*oG z*Xpn2zysitZwy)R65gM_pI^QYTdrII4}o9FzCIn>dQ|oX+n?^_BvKCl(|L(4%Tli& zJ;?QwH*#(J)0wle%Dd(IrEf5gW0g^Tt-sUX%|N!Y`vu>UFI+hpI|;UcrWoy(DJSTd%gyKR(6B_Yws(&;S$DV0q^*hHTx8+IjlS$ zDLqg=;=WHoQ#I$ZMML8RSN0&<^0nlPJb0*&^2KMF>tAJVgEw=g#Bp-g&+FaU3B4=e zoG?GM&-#@CcOPZ+(Y z2mP-ApHMkIp%6J8eLfTap0QycZ3#xc3yg)G)Gz0Zo^tA!Q@2v}=D`<`QEa<{WFuf9 zd<}7~SVnf)M$7o-DxVQu6(k1iUC}AQ8N3zzeZ-s;Bb#V%TMe*ANBdr=JtHJX0)v;$eMU@a+2JWn5Uw`T5umNXdm~-)?+ol-Zl6EU_iy1-&Q-0bAl96I z_wC?0<8kBMm-D>uR-Q3#_gR={?-YjK0#r1utewr_`_(yDZe9QEGIrI%*%PQfb@NyI%k>M)O zMJ`2_bajqOV809-UlTa#2iMB6?NqWxiAi6RIq&J66OHd3dS!gu8x!+%p5Me{)%dJ# zrhk&Pd(b1KXH+bR#LE{EPmau!h5WjQcJ|M9{PoJYsq;kjKZ)nhBc~>Lw+nn${>?sU zzv9##Cmqvv}^x!Z%`Xl#B-qzhs^US(~08^HxE*D=61Yxq8ZJPf#i4B&TVLb6u|Dv`-eq-Pbe16B;MCW*)MPD@6m6RJI2H}-V;$7I2gl*#z zJ(Jx_{VfJJ`k{Hf_>Bz8OJ0-g6K8Lu@)i&LzBTs%yhD2+e+teWrhFAPrYyzRaKE4X zHf&K7xz`z*t=#va_hfRf*hTFRZG)fnqYG%y%FSiOzksh5yu(*Z{z7A62RJg4c!ejpN;#N0S(EXmka1Rrv=iQvvw)sBq&^D`WnN8cOzK#9o#*ERkLZ;KdhFuv&RbS`%5%1{pNj$Im9M5)nMxXh3 zrn4dTqFYDPXj=#Ol3&teU81A3&XM0FFV!-Z)5~eU`QZG_bsx3o{B(In{=Zo_hr2GF zZY-2v(2bV~B0J26zUH%EMY`k+=2x*VNzU+YVP z&l|A?^AzJbqw}w4v3Crew$eL&NNbni)Jd909L}>05-csh>w5PVTr4 zJIp5T)z>EEm_Du*FVnsYxgUBX9ddy4jQi&EoOfj%=%T6X_xE6PxP{m-<`4cuTv@Q7 zh_lq)v4@ezHFn~;?@?Yi?X?Lq&h&5Q0UyC32RMZ3Upw>uJbi2jKf-+PXY}z2p4~z0 zRmWxr2irDdPbfKwgSXS>{>>RhZMX6K$&)y^j^}OVJllK{2Lpg7!R%S`Q@6MM7HiJevd=6BF<=-&iz%nfI;qQ=m2u8+OnZE$|YI(N_8YxMI> z`*&R2maZt>eFHjRaDI40PG;k(FYsH;Zx(syX04uV`(0eyZ8yBg)hTXFs}~U6BsK+~ zJ?B*5D`1X$@wxcfFJj?yG356KZ|wc-@a2AdEW)AS3T)-zNdfp)i9MwoUK_&ZbPV5R zA^njptQxpnTN_SnV9!kG{p1<<%v7iC<>`mFR8Z#;@+l6ohqeyehU5XYRmk&t+UiXt z_g280#g~=4EdBh7M)-?sGi%~nWBw8LiC;4BDz^elf;0u8FxbQd|cm&Z?7onqu zQvQk%{)(X2*dQI-M;~N!6Hk`>KjREOQrQFUQM{8|2VJ5E-;3&u6~pg-e>41PE%6u+ z(nme3#1?&j>RN5yr8eC;$U>ggc^7=c)|2^0JA6?&2t(wN4{mYx#M=9dSc|T|q>*{O zlrrQsTHlRLLgQUH#(2NMcSpa4TvE;X4R${8Zy|ef?e_WL+i7+L`p|^FpB|fnKkB5N zLGSUs%;Ni_Z8N7$sl7eo{a)-}*QdrT+$P4_f#X=dA8#iZ995sAmQE4_PzEBdp1SEPJ1O*U&3>uNr#w(c3E7W8Q>(>jw@Ml(p^F zH}gH@PuX$w%-+wRLw%lUKmS_d3DI#79=nY*Fv{z*!$o^0Hm<7Yn)t$kO!mdQ zwn*FFO@FjrXEUxD^vRC5rN3@pP1-F(zB+)6ExQvjksoQpj3Af0Hq0>BMYQYiH9Nj+ z)h1|i4Y2x#;`t7fON@BF+RNB4jn1;svy}5da_eh>j~$PVj~)5lj>jINPJLGHDY4i! zzKXtj`PGgmcM36r%2itozjz8gr;>d6-HdaPb`)!Ed2{Vp>jyqd zEUh=UVwO24vxa`$Nlq!n{^1wAEi4+s_Z4fcdZ87Uof2y;9-=;IJa(+2Iopmk6h1n+ zeR7B~oZji~E4~xF7A+92{J+Kqzw#z^PkyJu-Y0`BP)6Q!!M6dpy6fP#JR8|m3XXpk zT)!NgUk>gsW3TqoIBUV)i|pC~P5F(PSLwDkjRGGM=mL_{tI5;m>@6UNh4!aiPL3kx z=soss3ZCukv3Z>`T#~OZXwIdH!pIdfA|JBAw4t6c0shNGs+m{$C?_TouCg**gfxOs&9b>ijdI$T6y$3EiBR=#K-h;!(mKqzZADM|_ zYz+n29YhBy?)=mO7PfQ+Cb&09lmy^>(bRk&G-0;fRLtjq)s*mF94vs(%{tY$|Y#gRN~f=FU`yKD@LUO{Ro`C6FU%N@FxUI0 zgkyKI?#o^Kg(z{WqC?9TGS;?*jBPEWe9P+VwO2YOZ$K^k`y(sA0bN`NeOwDIex36^ zBTO310}lqWT`@uJ#7q3w9puUxhQE3m`&O+z&IE?aMq9Cc#7j7_eZmQ6)z``v?%-$q zAoy*u-%IxVCvxXmSNRV9|APN!9yVm>M#ii;DA(PrlrJZ*pW5CGM;qNk_JSfjK?=A@%8#cE_mQIvc);#8NnY?EXLOV~_eE*IWht$kv}3$iJ`y`QN2E zl)X0H+W&HQ{d9*OJKy8Cje{F)nD8I1E#>!DnCHYjt=7U8;Jgc5)M0xFpzpCxOy8y3 zADM+Lb8uTt&O~R7k^jRnI(yfa4>^Cv-Y<&&YR)Tjl7(C?Z!L&4 z-<%kjljIjSYu+7s0ezYBg0>u7aW?ume~zNkz%$fA5Zt$!acfW_dVDFm6p`MmIGRUDSKUaV@0xCzWm)5XdE;plty%Oho zgk!-{3H_Qhfo)a!!y?$Lbat>#eKFT9@XExwPMq5g+IQw&c52>zlfZc&pLb|yrPWpIpYw{nuRly zCmzrH{_X#8JpZ0B9)~XFe;(|--T-X^ulS|^*gU?@4*+zC%(UWTCUQ|6359Agc z>`b7Or>~40y0QlcQyU&*E{l7%D!9rMIl z_3nMW1Mf||lkL~t=DT*Aukda#I*^aq-*Pk;Vma`d_+k$Hk-X68zzs37Ro*;hUZ!U;M>^zyG=^!+(wNME@FN3xp zfwn6vZZT=Qa{00?%UDaqgltg`xu1itO4jFs<)JOYZ9RE1B|9p9ZRN>aMjKjZQ&~fm ztTD>&xP?COOOPM4VSTdtVV(FrBah3InTap;kYJ}i_WvE_i43%aOS$@DbJ2nWc;`??)4$pobG@z zZ2|aaomhL;&+_?mx&O>F%J%&lxsp|B*qt1icQ@_Di7Uv&FYdgH{|j-$;3j?$-w1ZO z_;740(h*#-kKd-A%!FYkTlA#!I1D{G)njVZ@v8J1rAJpgsofq>iN00}LgD=2$fE zbvMiYfU?b_j9Ida&IkSyPrB)c^wW;k>G3%h)6HDOMJJhKQL%eYj>T$Vtzq6nj4?!C z;ASAd;$`#|WR6R)y(zz9Vi^zPEu@U{D<-~c##U_BMfVi%38D)TH)`fr#CEc{82hel zvhBQ6{MuC4Gs%)^>Khs6xirnQ^2Clv3y$ryOAJi29n zf(Jnx;;rB4v0~d3%j6T&Y5FFvMW>1skPpl_J|Ec4Rz9%K&&da-xHH8DG@++yUvVpS zE+@A1xZGlPTg%v|44GRDdRM%F^tpE6|DHw$9trWeFA108d- zk~-#2sV{lv%vk%}bKcL7o!yfkJL`Y)V`sjTADhyhADjGIek|(``LVvK!z1raNs4r@ z_B^wv!Fc9vU(MX_6FVE;5ug4r{(37{74iD`Qr?CRDzSyAf6hECK|T!FH}>|)rneCJ zbvbhEGUV9;%No|Z>BtHkpb+QP1|cNM=ClRK0A zw$ha%?5m;U27hvJFuoA|F9Vox0Z zknCmsz{@$Hd-FD0IZ->k{$MnjJrDMV4B+%qAKnd*;=H&Z8h z$2dBWr?L%1unCZ(L-H~=?|`SO2YAsXbI1jslkGERx?xworhpG$6ZPe+9!w256KB_;^qm)$NWoQ4`F)_ z2Ag*l4{JALH_E8r1)XHDk8a0yo3Y4SRl~Oxf1lc_{Bj4GZ^b2Puib@TEsrwId|PEw zT9;79PmY&%%81_2B^IX%{HOxg#Jp&8u$%N&&Rj67D)7B`M=Y~YOTWfc{(-Lbz~ zMDzQEc%k=#XjXkjmo| zLyweB=fH5c3x*v0xFw5d8+?xf$IomYb6oy2N2h$|6l1MoR}@1j8JSmQo!uIU-@>-1 zoGW?HaIZPv6|AeF^flrs+AehIYn{O7*4Mbd?dW^-wR@Pqzno&_#eN-I@GkQ9m*R(h zlRj4RDM#)Vp|5#PsIR5l`WiW7pgZYn=k2rfwL5@eaDDA)*)u6?psy_((AOsRg;@_n z>uVFuoaZO2uPLARQGG4l)Ymk3Zr!XRL4R}WW{K}kq?-i?(aoG^n&V@2vvgB8bKXlg z3!XqX3l8XJ>6LEXY}6Oh&C*TXEO64gnH7IKm~Lj$%pkhih3IDfA#}6zd*ka=uYZ?y zmkDtY<{c-?!bGyZ#|`vw5t! z5}&1;DPKpTZk7yO(#@2&=U&=5m;m1*8$Nfgp7Frvcfn@>pWlYhZ^QTRHhd@2&D8H; zy4exVH!H`>%$pdya+A9UFI`&Ncf1>xP-@x8W%`^4d`$`^Yy~y9I-^g3;ZRh?9@`VtOIUh1y=bt%|KQ)lcJ)QmW8NY>I0`v28w;hVl=-_vveI><~ zQ@7agnDGH;T^Ct{UYd*yB4){y#o8C$V$0>5ILoE=?&zk?!aX{!_>vP_qw?<98lCN9 zpR}`t_=%0Ku`YDs!GAe=(+>8W$Vf-m=>4CjoxMkX^>J_Y|N1!S?J{+aQu@0~ac06b zehjybCE3ngtuF*+O3aiovw?uih&3-hpBNcWrRNzKW)-^S3gXwBN}UDsL$hTYtbZln^m}zqL(hFNKbBXKJzI6e z59_!;D-kZymkBp#z_g#!*lUPm`5UoLUF0#+7$g_7GtuL*S>zMvlFi*bA#B2-k;G;B z$(3&y%d@eQ`LUDviNDSE8@W$VPwoB)J{jQb5tlgUrcVrGBg9W_kMjlgC+qm{=Q;3= z@lWLL4tssSW8SZ5;Y0jF>-=qEDU};lYdofTq5VJ^dOdTf{RTW{pzUVb*86nY)SO_0 zF>{M+-yQUf%jsp$oy)iO0wL*$S9z)wM>&JBUNsE*U&`EY2d-jT++`F;d9FQ=nyYT~ zYZDHemef8xZl~D?`y-RSu$3hCg%6UrubRcm^>>iiJhdS>>qZ+h_R+rfSK8lb4%aY$ z=*@{WfmDYjmB)!NT$M)*Zsk_dmrS&!~r9(^KNT8)N8OM(qSCLIiMa+Ag z^GyTvgzeFReK2~u*%=SCkO3`}F+Wja6pqH++M__EiHY!g~H z=~_4aS_3vMWS?S8Z-Y+l z^}9psS8@}3=cXw1DP7k+M}Fu-bS|GKGIU2h<=HpqWG5LjH;boJr=0JtOO04YnGw4( z+1PZi*J%4XZ941Q&_454_7%|iz+7Ro(_E!9S9xPCx$fZdWniw;yrdtM+YbD7$d1Xd{d0_>9q6DlvnO(8}6OE!Tk;7($d*?2VVAO(}im{AI*kuYYkqQ zz(<7#tu@!!%{vOrId$gm4JF@2vrF)1@0*p~M9gissE&u*`?sl;^C=ZVGyaBlWk=Ou} zXISN@s62fIOkQl^RxnCdKIO#w9K&m@da2Y?IW9`J^|}=Eo(xBC`RhnxIM6>#_>i%0 z4uUuFaq@|nc<@%_59pTvOL_JkJpMcDSGdvKeoih(2d^KpS8#q0mgmiZujRd?c0cFY z-ggye=h<=a8V@wXohd7^@4aN*n<7}>r0oH?&`BIT3Rhnju59`%BBrGj+9`*QK2K-R zy<>`%@e{*V0j*1CP#x(E2*Ap zT(6sOln*;Nad_~VjK|4&F1fCGaLW9OcAlWFGahu;8Hyuu-v2Z2%M*By^KLMlUk;CV zl{eR`(>8o(x;^F{8nfFk&Rm!_*D`x;UNi^4B>343Ee&Odbz&GaU$-z{_W;G+c} zTPJkC?GEgY@M_Mmu8P;>vFo33ZD?__M|{McvLJK!Hq8`&BcbQl0P0en;^vnB@{ zXQzwa#?s~+!Z~x5h_8+0dKwH5rp@xDlk+|IYM<=zxC7Y4FFHD&J??kdB*WOSb@sV~ zTvGB?r`Wz~*%{@p*1p($pL?{8o$fZ^3zAR5m-xbB_7Y~_Hxu8D+!r0Gewpt4)^BLS z!*A*O=KnH_MohaduvCDPzc-HgU(m^h@?#BW4fzx9t4+sNlI^mU9PejKX}yEo9J1w{ z$W}6xAM4k4o`tlViH(7N%=6jod&;nx2N=87Yh<^DtHd(-#IIU*b${!Zu+a*i%?HWt zZ`(_L4L@+`^LBeKG*8ZZ6R*r|KIcS)&!KF#^0DUz(A7T2$L_qBkNrEt!?90oo9!WN zwsEiJWB-Ic?&DL3y(#*4`Ph%zVPAazL_YSTWnZAIeC%rmY_^k5;A6M$`#o8kt?V90 zZMK$=y&ifwk&ivGjP#H~=0!gC#CIpM;}#EM$90|^Z^!-rkB{A?qe1Mr1ji5QW8eCA ze4Xkg`q)QYbi9v!5^KNCwf1YUlZtMogVfn}(mTmrmJkA2sL zUyL^`u;Kfnys5$V7ahlU$gAe?rd8d8^QNr-A+O8NxTEXZg4hCl>>r>00AL*<;xXCJF``&+Ts)uQjJ&O+>uL$y_jEpw>0%Fq=pe?4dS z-{cH9`g`>e{Dg0CHV_~CW%$@HPcMjt@v-OQV=u(VJ`W%JrTEw{!N-0vKK6O|*bDKo z7vN*h$H)HOdBY>`pL48_y^FDK#(ph+-1W!!^rzV~J&Mk+F&OyTcf*&RJW<`)Ab-kx z=?-T8F!uDBfvZ-yW#xC-OE`Mr->pMWJb?~&U1#Xj}Sq8oY2Eaxax{=iPtvPI{1gU+=sdOpm$;d8am9bxi$wzu)+1 z?S!$7PoRUCv2L;>;%n}(brZ?kr?DXnyaPX_(4c864{`6DY~|R=tJy<}|0iQ7*Kg%K z>qb9x?BqJq^%OWtsrgbx7yO)6?@yx z3}{9+-EPi6{j!!khQ#d(HV^jBR{qzZYc2s#%4Z<|c^STY`9zd^O?C5$^OMY%PF~|W z+gr&PMW50Euzf7j{41AgAWk0DWk>Ua3dhpd-sKEXVtI0|E|G8jJIqV`9%2!t|Jr-7 z{)9bPsZBRgTMv&i&*Ww@_qM|MP&)Wif|GXrL(ohy?e^2wz6?VNMve66{rOdIBL zd#TTlWpUP;Xw$t1yPER>4$YlQzq+Tx*@OLQwzUU4i)V>`7yr}1Pb@|!^L--wMZWZ3 z+kUa1^&$I+{7p{3N3_qd=S}x*IebDmJZQf&X_Xj|6Zv&?M%1BK?g-#6K+dU%CZUO; z>?_ad?j`?j7B72R7p1nYx!8`pse-vZ->0)4YS& zeQ$v?wwHn5`R%1G<&ylm*1a=c)=n2|N3<+I!7}L1JpTgB+AkQU&%)aG{l<+a>XkvO zjy*$rB-vnOgL8aHS?JqN-%q>nXyW8heDlkcb^J!s&7HDNo9ozrCeFL&q|t86^cArO zGJre7hX1%Wc3b+o)?3hCZ)QEpZv=j){CzGwIsMmBcd&bH1;anM%HQgSr;T>uS#JB= z&AcC2OTsC0CwSX|dn0W|rURpw zdiw30$I*Sjn*!WBdBuNV$Kwv;-?`dXpTc7qcohA%^Ui_eztA@YTNk!thxWCeRql7ZQ@Q)N6LqjMgN-@j>Nf-X zLl$)A^!23s-4SbUoOc^+nBaj&ZL+{z>(V10bHVMv`qS=X?{+$T`@dj&_h5tfVvF}- zUnOpGCBElED|XWH*;l(^flivWeppAf#3{;$VBEyna_(^UOP@i%uE9nUz}K6Hy}eX9 z_pjw#WDmAJ&c%yQ7iVo-kDqpDvhnoC$u(UcbKbu<%YFwBU!Rw>^5atCE!8gZ6UVfh zkx%>=`KSl-7^M>HL7SpU`DJ7)oWLiYPrsi#gIA7ilpnM1lzI0C$U9Phiji9%B>u&F zte+O&Q|I~+G(_DyGZq<1%M@E2~TDxA(=hPFutU{;y5$oIFcQNo? zJ)Cnz#C5;M9hdI0Ir$nt;Xe0WFQd0&AMHh62EetcJI(b}`B}FY(N511lL*?K&L?7=UQ}|w_a};@P=q>1(?_U^SCm1;QFkSw6)%y$YmFwQa zoY#E|-xK?vRQ4`gzvZdwB#yBjy!un?Ce(pf?m@#Ab^2F?%X;uD9L<78RaU%Qc=EG{ zvf{u~%zLF2v#Po2G261{Mls^`w4pgt8)wr-Ca@&ljrGa5^YI(KZ04+d9oU;~eO|eQ z+NrY-*i~QulgV>0T$^{k7Jh!baElqIYsLAhzUJ#c*e9F3Q9e#+Jj^_2oQ_SBZ*yGm ztqj&_7xA&m;UYR1#n_y8f!vJK^7gXESsM#o&sA3T?u?Dc_V*5}g1JQ>GVhIopE>n~ zzuWA#O&-p>EcT*K{cpQ*n{N5X6wB%0PP}m=dXIF`W#BH4v#}odg{jjGkptKq#}GQ7 z=F!Yq%>2P$mgKSTm7XYgu7z%NN3SFA9Jw$4GL+1d4P_p60*{hQ9luNj`e`K=cc_@! zB5=nV{_oaqw*=$!6R_^ zX~QX#NtyCN_OH&Er0f2p%4n}c-4^oBq8$roDaieK$dp24 zO93(l|1NSx^3qStn4fz_cd|bH#EiAX;xlw-;$VKhfqRnz$Ux{;G0$fHUHFJYOSRk$ zGr%+LT(SEpH}DMmv1G#lJ)t9Jo@Fe-AH7(3&gMInn5WMzkIcCRn^MCf_BKi6`n?re zhX+qP-@9ll^m8NpGY`CE^8H94pL{gNrfKZ2w?fa_t50Kpt>0&|zph2LS$3t7ea+}O z4U|2XvQ4~SN?E`P|>&tc_`|{p4DBs%X!7H_KISimRN)UYY`Jp<8q@mwCt-?7`-JMy1fC z?3|ZDlMA59VrcT*5l89H%bbdaB+Fgvd5m%*us#NS`$O@y8-YWyJ!V|qL-@q77bdpj zz<<#86Par=X7g}lo%8+!doLhaaQLss@FUCj?8qI)D_;LC`gPO)WaO0Q$YAV=>t!(d zpA3zPMry!WF|jWWeLl&5haN>=@X5tyT+sdzvvg)#yuf)@w{OHO$0-yW7a-t{HNur72IdY?BAx8!>Az zIva{M9Xzjf&jYj~x#r{#hliVc#7w@O^6cFQ_Lxq&Zr2{M#x-x>W8N}{SR08s(oH@s z+Y`1R^wSgCo}~XfwkP*KrqtE{H~B3aR(uA zq*$*V;9mK&L*!-crcVdw@&%i1SntSh9X4})JMX8^j`Q0K{F2X`wPxs7XR{?=uYx91 zM>l_*N^V=}1y1=CHw>&p{E17HKeirykNcN<+nHNujY>ar_U2CRceNwEKz42SyIYCP zabkyR;gebyy6gMkHA%>LVwG2tSMDWGqZvCVe6miz1YbRc_|eq%OCtU`;aJM(`H`e0 zoDuuUf=IxBMI=}gjwOG2QDoGd#gQ>@h9hI2CJ*ksaO@uTe7ij3%v{CB$Z!n(nEys! z7x3O~{!TUXS$>4v+2tACw~=|Y*2Sg3atZpz#pGJD@_kwKdi?W!R}PDOc&RUP3j2=t zW_csM=XoL@a7L>KUt1S<=5nuQ>p98VGZ^6E`1&Wh=iG^T(Y?E(!Czt@(0*zK^L8P6 z{HIf_oI1j{=xQ2tHDJf6@K*g^v_qVF9{PG{$~N*xtCua@i9A2k7+%uArajUmVJSd}^GjhnwjGKp5HS_JX7s5{GcpHRdk6I$Su4I8pfA&hpuU(J$?y`{{pm|~EazT1?U5Y%yahZAjFUB@ zb>Xk!J}TBo4LVB(YeDyn%@?dT9i_e55s8AYQNAPV4p+D5lEb~~z*FJK#$Rt(UvJ;9 zvWxpwv`2j%`ml1hjcc7d-D}>hau9pXeZ%@^IWQ~$hUek;1M8SIt}}&x`gF<^Q0AET zx>Jn3k6`NHw^J@jd9uV0wD-|{V|E$kyL8H&M_bAVE#2Ypr*Z7~sjaIou<{CvHqPPw zO!&XK*Cr++le#G`Sbk~yOZ@N)Cs$^(E6-&b^_{*Pei8Yf__|x@Q?}7e&HVvOfB1&* z$}`QMVqKj%bH_mWXm4PSpoMmHPx)x_S<~uA{Gt9>n2zCZtB!qln&!0G<^Sn(?O7an z?+4z2J}-x_0<&rVVU3%1rEB5W(8^zCB7?ALIOF{%!#ay?mI2ld~Kn@iCoOG{4#RYYp^ZFA^WGz%Hb6BW1-X;$1bfhDi}O>D+k@&{XNdjdNZLA? zGf#Dl-_(7}E-`(p(xJRb*n}8E)TL)~_VKv;C|?0jZvNU|G7p<%A$FaD0UoY1Mv=qu z8BuVqGe-0IE&CO1w~FtZcSL|=(TDb6`FxvxmPy4`oP6-}#w=JdHO2bwrG0W21`YDStKURhFY{~ip2wio8e-TTd#uVjG=gvV*|o2v zHoAFGmYas4y)b;+wBfvF`Ac8H-q*0K;+Xt^;wN3?;?qz+yBnPM^|tT8-KNu42l!Vxhc0g*mhr!Acm!XcYp*k~ z{-L4&bd_;%)U|7H-`1aK!=yRc<5RkleEOy}KZrhmj#kU!c_VVgWW3OWDDWXUA z$ceU~Prm)1rrmZN?`5l>MIYGfpB?|yxJI>GJj3Mk)19_%psfKuF1XQaWj}J(PbihS zsPa~|pr@=k93P!bZg3}WX0~r!PAB$n#Y@ZnY~s7kI=?cGb5DPB(cPGM9Czx`wevf4 zb;Zoc*7$k(v1bCG^t(4GEBIgHd$2u`@=~#6SBO4jv$D=m6rk(oqw89CHVw$Mbo<_3<~>5YdA2MKQ7=f`M$mZb4#}D7 zDx+#@*7gaN^l=k@a_Iw`f$J*d?a!%KJ&ikypw(*pEsDRb#z$c2`+VdUyui#MvD~&v zJZj0XHP9IhdD9wlNst>Sx8#7Qss?#1n3eY;%x})xR2h5+fFa`#%mHJ{;e9dr9+I=0 zcP4u*e^3W<*{rt+U4`Fy4*>_?qNSIn$JezVvh+7|E*WDOd96EzwC)G?&Gxw}Q})wW zCbD~mi9d@s{g5*+gW)off9s5&d`F*m#?Q$esT}OukDQF}$c#w>X9N31yWYw8jvU@F zf-|Mg??l<->_JQ$>Jyjb`Z>c`E;}&!tRf?f8R!$SWx;~BJaRIH)59w~im$*{k%4_f z=l}d3%2H-3`|av6;F$T(u6ilAGpT=8G^1v5c9k#JS)bqi6MSy{{3eKH|X9S#`I8z#X?8;q+k z*E}x`T;v#>p|e!7DG27ePo#HSeChhX6IRK>6y*wX$%R^*S8IGl$F^tMv)pqMU+vj3 zl%Lgo7l?Casp#)KtCGzgZLqT{Wx(s4x%HrbPIvN=!pm~R3kJC(KzFN2-rb|QB%Vs= z3*ShHry9w4b-wUq&NaNvdUc;^S%@8Dli+xfdp=%9R(S#q&RM--+&SG2Ui~)`rwtzw z&kEdboqc=4<@0#&E$eIooAX)j7^%VhgW%ruISk0~5bKjZ6uZ6`Ur#A~MEZ6%wtVez zy5JEGolHX(q#PMBQ*mZatX1`SR;<<2JUfy9d=k7~vH+g5Gu3GMIMvhfaUFX3LBH{X z*TfBu&cQpKnI~wZkJ1Odu@!R^0<-2(2HlP0dvodjpdWBKmgJWeuFK72<;N>2>YR<_-iQ}zPjAI#4u z-Bs}>iFZB{r+J(o&y?rj?f1Z?^Eu~F8_)-`ongWlN>5T`o2b�W z^Pz$L;DVf0rVP!3Mkn(bY>(`L{*=>HcDXQmozwr1>0j|1CmP@PU2r;kp)7c4s%tC{ zyUMI(Eb2qPEy1*!aVuw$_;`}|fh|XuQ^zIqO}Xd1OZ1;rzd3k};J{s~%wIeDkLqjQ z!tgc8)>8EWUQIoZ@oSD9yR_oT%ssUH)(L*M8ySP!{-Cxr7M&+fJZBkkjosb$VqjLi zydmBfYfQeu&!!sC@>=j_($3YB8nx$ha5I=b)WaMP>~9!nGvnV#-(R~28`d!V@iq?p z>^U>eCytwaX+8V@MA{O(V{LemQS17k569Q{c{nJij^v;Nqt`X&(cn_KN)qQ(xOd7Q z{PR(|NGxXvPea7qxbL7TGCbS;M+zVBdH&Iz6@PyI0Y7h2c_3cG(Tsw{XE8!FRulq-CL$3Erw(#Fb%I|mXe|Y#y-nI95jo7`X zaITSD$6LKAW{w!y3#ISqKlZ{U0qLUG1~y0zH?QiLv$5dCIiCDg+^tbNXY+0|2Y#c< zmI8-Ai@RI`MvT32jJ3Y$=Q-hsavNTbPWp=^Bc|VvdW_ge{;%QxW&BR&Q_gR4mc+IO zjhObIg2(SkOD+!@#IgUF4&2*FCc9@VfA4 zBWBePN7qCL&+~v6Xcs=SvzI=_FGKa=$bxy{wrb8@_&uxeL#3Gc$qLBXumHWQnzi4} zx}Hk?TabxO==Ej%H}Ckd$5ct)g-Q=)mCF~}H*KWUmHaF)>ekmWe;(?K2AnZO$xA0# z2Yc7eJNa-Nzg6Jt1u=h1LF_$zKfS(@tqs7TJx>`j<}&0SwsiBX&`^1bzM1SD|u#EfBW1K@8!paNAqJzf60#xdpkeo>mr}SAK{h1xASuzcXr6~d;L?A?n*z} z#{y`$T4zCsp^zWDnE#5;$j1&~(ah1Z@zuI>Y8W=$Bw~JsW1kp-or3$ORtAVG<^E+q z=f1}n)190~UT_qI---8wKXhNth%=X$mxtTFw|rUue^rFrc6mls&L=NnB{AN`! zU%0|!oTjrSl;0_xPMcLitwDZHS6YJr# zs|wa&9~pyZyjfO{mO+v{x}Ecx>HHKLit9zSBE3IcPR_F zR95?!BP-`aM@7)nrO?$S(AUNI2z@_CKa;~{zG;{5c+W5sSRMOYM{KzWnNp}uR75APJR2w*R<)m?#MP+ z2i7y;+mmdTf6FRAP;Pm7zoWZ!fg7_e`p~zB{&iN$c&p6pW3asc4u2mP4!3m@x0}ZL z$w*JD458m{8y9{ofL#J!@tESKmq8E3qtY4{kT1T%6IwIBHlx41z-Zg%^^BRpU13|j zX-Dr0J4JbL(^~ua5EoGCgU`ghseRJ*D`-b|g|&Fo%)7$A$2{%zpU0;4Z_aDBJ6im0QXEYcwiu zEUEXjf~@bLB_K@mrv8r-Rd<~9}#>) zW%Qx;FQWY}{+IsJZU-BtBhS=cDSbQt)t`H84o@Ab4~;{8pzo|@-e%U=eRzqX?{>+} zF#Xgd^kX>VO1H)}@Sk|g!N!@f$%r{+pnns`rQp{MyC0sgk$AOLe2S3e8OYl#=&1FylnyVY?I6T zXoK?%X3o$cWyqVEjI6AT&XVoK->Nxtz9l>Lf3S6L?uVfxKkK~(pN*-LE(&ar-V;T~ z-G%@Zjd9 zMkD)1_Hn7LWvrtE@PIh)v#_sfpLrMcS$mC5#8i|rHrd=?pqxePh4A45_;5aa*jlH9 z!SC>`%wgeYvcZ#oH1sgccqY_b5UInqC!RZ%@fUlh_TSB2Qx#8oBX_edTA(}e6Wu=_ zC1$+^SY+E3tOn~;-@|~VX4Ok`YFGVsPT8sl=ajBmJ*Q^XfAat3IjdIvZcg2ra9jBr zqiyM`@ACd(-aj;_8hq7&uX_r-jZduS9vVLIjY{QyF8`_SY)Gx#&F|{zaV|e7F7lCp>rBgC8uZr%raj-_XnZ?bIvv1RJvWcs-u&-=pl^z?InF zJ*#|;PxLYtJ{pT+6U#ra#!{fM^fDIiCU(XmzImB57PF3(r;31A3+_9B_v8CbxbL#z z#yC2?mgl?e=eG%NyRHd0?<5O*d7lV(so(~N(%rtsT0X>*qA$$jtS)!(nf&H>T1rFC zI(R9=#`PlfXg;(1ti`{lJ?&sLr!^C5#*W2jK5Ira7&5|~^&Q(9dDz%e#8^x^AfDxi z=oREe0dM0fb)QiAIN@zvG6u<1fHb zPnjOZqO~y@Ihq9=uZ!-0<3;A7nde#~qPw&t>$&Dn&p!h;=~QvnwmE;)apq5a!%cS| zQT85iL3#487_33n>PYsNe-19pxwYZh$bRB&<~F|CU|(jP4-CO` zbMWn!kjuRjnvP0-)8=;0(pT0`YJ7y;?6+1doBc>&R6~s?<8{dw>YF;D(LJ#-8~rNE zKA;=j{%2d`>yX=frH8r8Il1yB8`SnO?>J8*+qK?>ID1opy*~S_&Iz6DdDV6XzBA`M zpwsVQwCV6e&wWND`(E~7ej`Sk)9bjCs&0jcy*zpInJ<0I=T;Ao44|)u*tgX4z6ra< z-J&z-1|GAiB}mRgeP0)j1mN?pP;Wjw-@L09o-ez?F7zVJ@f*}@hvvRz`a8$9z6}i> zGLv1Dgi$Wt4hw$-|2?y^)MB8*NX}2Rt-(;0yMDiQWHi2kCz+ z&uVDD99WfSAt2otKPC6d=MF`8gZbAD$xMB~VI4S>&8zfPY-@Jg)!;A1EMvs}$T;e3 zJl0)p;juLVk4@llD{E7Dd>+^&J5y}G@ngVzz?Q|5osyG}@~oUT0?Wy%;qf&N=lO12*PF@O~Wytd+^%>ZH9zL1!@XkEX@V**&%6Ctq{3>+BJkJO-M@o;+&PBkP zmXdV)ES(LL>@rSF;8OP0@=KQzm(Vj7{T02_+2d(1DEwq0!~PYXCcA(c_rP<-LYe=8 zP32zazg2!%-$u&+%6>1{_54rv|7K*LVh?5W-+#r{NUtx6yJLot+s4=W!K{lTALY%9 zL@zCjymL-L%lD8osFtyhWj^mZ!&uZb!8(g{*G%j8O*!NZVIMh`F;#m#eO=7! zD$08(f59Zq^VsDtm~Q<(Gdq7p8Rc11JM#>0Un%qWh+Td%eyhCfVpjQ) zeO>(ixn2HT$~W1#b)VmG>OKi>b$%nvJQ7T_X(TW;*k_DHBeJ!LM)X}f1HU2td8vba zu&`jLE`-USwN3_06j;p^RqxN6X(ITB^EV$IT6$7f+w!C5Zjli+a z1$BdC58oPRH}CztZ_*q>e~N)tjF9zyWS{Pz5Z$X?(SDTvdYGGD=BAIic@LZEI~(#N zdmax*c0*gE?DaKNuAmO^bPD6z1^u*xZ=EF@s;u@uR@srJ{!mHTUw|j!R_pc`;6}fn z*10XpI_I`zSKP{8ryaVlr%vk-wElDIS5yB*o7Tg8*HJz~`8<36-0QWK@+B&dtj@zm z*bd+92KUdm#nVm!ZyHlA_!2&?|KN|kyY@8$=enSKhmULp=V6=f<%c%^GY481iU;*) z7Q_PW;JbK5|vAnMs-Zww8 z@6w{kfh#YKM89zfJn-Vkp0V>H8^OtV#+R01EQ-U=gcIqU;vM=o2I|rA|J4ZLm7mz!TNK?qA9?M)qYg zUnk28CHu^?Y|Pim^1|J$Yw^M^*53y&jDQyokL>$hQsls^!y*T#48{|Wp40v0+rMoI zY>B5?Jnh)$nzxJ*Rvdg9bE-Ia-Idsm+!8<3-Q*?k(JpwV{wtszueeVJG8TMwLzR8S5I+koz+M6ftMHKIWjzrn9eQD=EzWzJR$&b**o}X+q zw(xDR2k?K_(=Q)k$13t&4>$JaqqnI}8g)eD)$EH5@kF~`kh52cOB;_KE*>@hMx&8> z&+VqH+Vt=FY9wQIMt=)?m~8I*D^=@%sqs)7ZdMvVi26z-oauFw4anu+~e_@KJh3# zN^8SSH`Fut!Lo}4Y#fS~<$pCAmwXZhe$k5H@PgkL&$8%6{}m_m6KsT<%S1o=CSbnX zYsBusk3LkNb>6_BzWh{t`R+ddi9ThMxB5(JtsbJ!b@X|c-KQT|H!b#W7^-i_2k_W^ z@pUah<0HRkxXx8zd$#nm_)AInD97MKIt`gJ7F%WtHqBIgl;gN_CCR>X#otP7+7J9= z3QZqii*G`||EBy{3bbQ{lN-BPZ$g|bjc@R-pHFmmI2Zdu zmEs~x$+Z*pakmuytUT=G@@M%gSZDa9?_y2lm2uw-XZHNb_!U-LeuZtLzCO#3FA#dJ z+9Ub^uHC?^xSns~yYq^lRl=JTS4f-9tlx?JkHfm=`>*6rHp`rD=P{&Aw{KGG<<9${{XUg*Ip{?9;(z=Re0e$k&S>9#zs~dd z`@H8M6HB;Dkv>ooa=QvWjT4OYhIFq%DocAGh}=boSsbrUio`E4VyE&vyU=Ky@cw;| zb&<1XGyN6$@_!wBDk;)6)rhHnH~89(Z#5s<;V!G(Xj6V}FM2`p>Bd;aBJ|Nu{&NlM zbyxJ{=kVnK`()aQHg)8d8~ro8riNob=KsBn^Ip!ens{QISvD>-&au1~&Q_vhns{KG zl6^W?QVPx$%Ot(BYjww*uC$uD(KN;l{Qe{NzRLc3Z0~vOF@U9r{_7a;EXL)Yr+wH= zt-I$^`|>}GFKOX@92^l(lN;UIo?FhjMvW`6trp-tbSiS5F?O-%7@={Ub4BRTMd6tC zsY}s6M0@i8^{m9+k``zP!XII>sC9kPdEnlztXn@`wCy$_QBwv0sKYriywE!;7{{6IDG3i zdn^u*na3RON0u~w&*-nljwb$nAj8rRp+QO!t0*u<5A7G!3J-*L? zCq22!l2PJYd*HiUx<6YNL5@bS@0oW)Gnd=ogZtCW`{o!>jzU%sXkO?7VLWn-KzttFyth^fe zaTRi83G(Dh^7*`Y@wo5Q9g@?DGG`n*kZk*(GVTM&{1wQB zw|ZB;8ikg6;Cng1QM@;t+lwzV1ieDHd&?NJXf)TJ=OSPFF=M{T9y4>&c8=FbX?e-F zV^Y4+_;0>3{gPGfJLg^Byn4*xUE_?Zm(YcqdB2DCCB0VbYs(bx+C#v)#kZ(GvU<^$ z5c8(>B_7gCj9&5=SYIu)6J>pA{sp_{D-3S^zGLTWh7G&=I|Tm?QdV&67hm&nKc~6Q z#7_AS%(melMwZcd%@laas*-+xO$PgbwAK#BV$`71Lu0~M7PxzX_Ed+n zVN;)9J!VV)xN!~pknPRz@qOfB`%}R9=(=6R(SVQ4zLl?bGY{kG58KG*?0EjGdFa(K zXpjBClHK4<=g}pL_E6r5qfPS~YsUloQ14IWy~fohIMUJ^4pDzOXIk>8^C`GbtoJr> zy#q~nsMp2sKF0MaYi}R=TVgwJ;S&PJs&-=kQ&ux}+7=H6=O!MSSesp(la4+Rw&V(H zN;0Yd8P)rla;O?(`|#Q7*|O)>tS>RRw;3MUJK1RLW30D4TF^g=GPikd=-*12cFL%1 zEwZ-GGkaDi^DLWDuQ$+8oE6TUT3#_*K64ND1kUf6JYMsleF}T;O$)#+`^6dZQD?H} zj(R(IHiKfVn^+*0Yb9j*P9`+gN=G!lI24s3ItSz{@+>_?Gd6}Ak!9vJ#< zo*ABkZ#02tF5$i8gkVbS_ddoeyUKVzqN(vCY?|_o?^9di7v+r8U-6y(3}2w{Mf#7> zPDuZewKdRD;M#Cxx`!C4$;5Lk|7;y|K7HHdFtKDt)l5&_1g!zHOs-X?i!#ml(ws8c zlo1cUiI!KHMX0RL6? z5vS~-_@>DZ99b7*4F`zX4SD?A^ZwD;+bJCp`a!>bHfwZRqc@2hFDtBa;u9LnOvWPq z(*nMPKXOh)a=}v=9lZ%0)ZjOsKgGh+Md08o)Z0dTp)qNVnMvViu=zd{BA$q|L$PdN z^KUd7$pskG7^3Kk@$>i%tkQ#DxQwx7ylCz}dZu0)>En48>(_sJ_%ZPT!8#52GzQ5* zt;_lNfdd}am*PEG$Ntf@M;oRt4S#-lGPpoTtYJ-SPcL7k8Ph@k?H`ak!}aVA=&R_Q zaOfh&Z27>`TNP6rTlU$yPrzNbZ(@#~>)C#ui6^?teaN%rK4beIA2@dIH+*f7xrcwh z&3Nu|&AWJ~_M~5O=3crk^D2H?#Ba@Mnlq>9?Uw*AIUb(PD*tlhWcDfVEU_a9Fme%47Mef)twL`RZ=Cq2*XNeAY+&}f`{ z67&2U`f=vD6gY;OXLDVkLy^B^b3+0?p0x2{t_x_PP~)a-`-kz(HIx;fl8wZY-(y=h z@Lu{Bq|Ya}}U-9nA$=4*z3KPQMF2lWOY|n{EBC&YOCS zE|f(-;$@Qe1N`#!_zdw$!K0Xjzw-Volsh)o&S=H9@T@=UtL9$cZ?JF59M#y|``NnE z+wwU>nLH2=Qcm4cHXDyW3O+T^czpB)>1xrE?p&C=ET-JuoYyU z%lX%H!jWanS*vec|FS#t`&;kiyE>d(UCo{e{ixD=Lw_Z*sdUv}=TxF&Rl-wBSJ%ub zKl`hZmOHQRFTVpjsgG}OM*p&pGiI~@N;!ysteTjNN0G&c@R=RJXV#Cd`_^aqudYDv z5&xBbwTJb&%bV2q{sd#mJLnkX@mu%0H+{SI{+2!1(y!izC+PiF-naO!J)VBBH8^eV zW_|BL{%h^OgKqXNb^k`)BP*{0S4+UzmEdkMI9!B2|FuZZlq+rpx8xz>{Jd{$UyWoV zGD7y|On&$HJdM-QF(a$JJ7%Cy_xQYxv-p-y-E!r;>kl%P{U3TG(j^bQ;fctWS>tV= zi(NHVj*e1_j?#&I7mrb!HJRiI1+Hzx-?XoKanANL@SlC=jqEzZh#f>{>6v2bH4W6w z@J#G$$3}FJ^?jH+;t8d`_PJ`aoHcmpt`EWQE1s)gUs=+gD?P{m+=lhH0AD4~8;}RR=ZuI% zxrbykG|{qp++p#ujg0>{$elZfPiYj63*m>z)zdgXw)oVK53f5^h+MeTIMTIRdkXJy zWrkBG1-`y{_0?MrHJGt&efz%h#QOf>#*Q{V6Oc2CmokuE#)G*FdwPFTZDk zbp0LCLSziTJ@wU**FQYG?mGD0+TliH=4bKg!~vQ2d>pv4AaV$q)J540BdxESHuNq| zJY5gGj{Fp-C6?)+zj2hA$#@dqbyG+BsNkso;J#PM3DaP~5oq9iN@Npo=&Vv=y)gTh z8?wA>o2i#!U@PYTb^Jf*>7QBWyP?01b@liZ?*FMq|JWO!Sqg9aIMciKyYQyN&{YNR z@8o?s@5|qh&(!>Xiry+4pWgQ%e-1(i^{#fZ-?Q53qMaLPXD|EpmsnFTjMKV1x~|@4 zP4x1vjeXF5e)qFZWbg6ABg}tJ~-J!KXvd8P!H^#uOuW{A#0dC7%pmntA4G zCiOLj4*J%t65)2*dB|>O zGd9p&_@7>VxM}^S2Q1%`>fgz;cILj$+n!s84q5Zq4gL30$COcn@UwjSIuXB;&u$fM z#C^k%3jyRp5T2F{ZyO1Z8wIaB1-Wo4`TKmw$p!x-;r_t&?Ius~KUcF}@h9R5^Wh0O zyqn59@rB~&YS+uh>+pr>IUaKA8IfqAr!fT{xF4IW_+a_edDlnJGmbd?p{o#kF=Z0@ zMABaLPjH$_-2>85)8H3Aoee>6Pv9w^d~{g%?d&}C4bB5h#oo+iLr}gzqr_qf^BrCRzMKIx)Y+FJAfJ@VcJ>+v_&JP<#k+GK;mgX}=QN zlj3GhOXmLtJQpulTicv9Z?PNVKv z<{0_!GiCpOyJi1V8OA0{?)qBgSCW1$eL%V%zTUlMtcxPndJFcuqA8q}VU5pcjgu27 z*1W1_P9I~hF?EFrt=~rO8WR=QctgL+*Psj3p$oLwWt$$zi!`k|qu*?gm{NCpZwR+j zjJ+dIC$0*9-i54p`cPBPhZ~kd5riT)k>eNwqIhg6{)cP@`nK^6s1MkCUwQ zvnTMQZ?N}aoL%szY}&~LHq{LQUx;?FGeq*9@V{W z?vVi*-7p}df3p6OV`TJjWOP#G;FYYSYizj-u2_RV7)(Z=&3cl3H;TVXzL;+CnYiXo zr;KFuCqECjeTr?o+Bc!^-V4^Rzmqk0HJ?=0oMdt)YiyU|C$;wU4J}2l)H+MG)*O2Q z{nna$pF0yb7aWz*+S_QZlpjhq3;L)$_y)PeSx3mRB|JavlPSn<+Q=}*_hmt^T8GNR zq;h(vvd;T3YxbcG)*bz47_5K(*YZE+?VlNiUZc?KzR8yTD0`p9_tSa*L-@X69+2lI z%+}g&2WG+PhNBz%(BG(|`QQIxI1-aQgihS;J95lJ`wk5{a;yY9RS9;gL>lzG7hfWO z*ooGJx%OCFbxVyMT7UJdzX&>G^Qu+&a;)>pgYg>0LLFJT1YU6^JYzAuV-Y;$Yw(#X z;4=$vg}0b|<`DXMI(E(ruzgjK5%vJRujXz`mabb=^4fx{=_ z58%rjj=oJBPCMP`1hT38J$-M)E~QwPlkG=vs?N`-^VAve<<*~mE_M&Rh&|Z=UzQ*F ztn={U5j%H)c)}Qaqfgiy$q#T49xb2wyX>z@kaf?%oAUU+mGAY_l>gMi_b_nNOpYAs z71zPX^RcNCmzz@JOYi%XeL@gC=J|}h+5i0G^<$6ZKmM0-V;eewTX;SMubg<~;>Qm# zZ{K0AF+wwslbFZxtsf9m>}Otje6!Fy0}bfQRhu~r_vvZ%>;Lt$_~t|K9_d{0=e;#& zi1@&*SDjWzD~R9gZUu#Zerh`y&Klc$M`Pvq5on0EXsY=3#_tT z<`KKK%(rI!)bqoUX@PL;OI7*(9{|Vuz%lLqaBd!c%Df~ab{l-lCD~fuS9(6Q(uttpTWKk^EbJ-SkLqE7s(cj zPs!HDbq;1?>w)j*N8V=s-(ilNy)tL9*X`%sr^v8_;Qhya@y*+T-`ZnOYTf+};$Gpw zZ?m6}f8t$mt3CD{>Kz8}nm@%PeL}mc|9DS)^S@GGG&+sB(b?)Q=vw|B*_9*smtNts zz+;_l3**0CFpA&yxJ$qbafLBrwB!qGoX;l@{N}S3w@^=dTsd=B?mMIZ?i-CA<@u_l#X_ z>~QGz9O!nr{$mGkvHQ8x?#JQz51#H_+YVpugfBZhUuXJsw~x-hG*fRSW#khuZS;!k zaE*V>-#vbOt@ij$_*vik=qnXCwT}}%>foI}p3MFd---5drOS)^z2nV&oYr|JGS}WUi-Mtgg%4|)yp?=(LSfs)z>ci z+C^VlGfrRH#|i&W3@4r)TxicE{{s6ZyG&Z&lk}^7Q@Oow%AsxfF`Th8lWj(IJPu7H- zX+q9yr)&eb6kpiRm>SS~YtWH9&t@;gdK3&%)?+8U;T6UiMTf)2d5q348^npuyWh#& z?T1FC7bVhe26mnd>^#iXj(+&J@Yci_mH)8rlpC8;!)1ZFZiW#rw_zqcP2l?MZ9>4`7OV^M8F0 zXGE2^N$n^;O=seYnU}cN$aUH|a3sFzdGKuZ4^K7whv(k&Rgcv_z8c}o-Tu#<_9qXE zl%glO+y9E)e#>@pCjKs*+xk*CcUgfax8f2{?p;@tM`o!fxB6RrUQFt*esOqzbpfAy z6c;!6S-A*(#eKdV`>zPHA4_J>?QNXOS@4{i@bYDxH_!2ZeO44)zm1-R{m{huRHGUF z_Xmqhkh2Y*h6$J5ed=iqp5&aVHOBJF`$u0n)$hrjGAT?>jx+A)v13{43ZVv*A5QRxUsXEhfi_ z&(t@&E(phFX^!+>ezkMLu?zU0SWa<56~HZ+73ZjZn_`6&?^py4wAgu=zdDw<6mal6 z)^n8a6p!-SjBTCGbB#O2lhV-6yTo}4F;CgdQ#SL|IsEIhv>t@_v&zG5XLC;6$$wE~ z_)Yvr!G9e5cXHR0AN)sM_>Y2rd{?rIE@mhU@%Hv0B^c=kHK7yrzE`!$s5=l}Q&z017)PM#m) z`;hs5^zGO34udFVGjCtav%fv~@xt>TjxBuo!9xold~n~w(GT}8{HF&GE-ZL(|3c%z z0}K86#@=pxP{xBFE!_CvUY>O?%zyCRg{vO?+rq2|cbor>Eqib98{hh=a$|L|cDsj- zpV`Z}LLc4#n8xkV=XC!E);W=l$;K z(dT{ibmn=Z1>e(|xBqR@l!Y%pJ^JvhnPzTHqGT!doIsqE}Lc;RmG`m)>qndIX_nY>R@X#k*8+EQ_qE`j)bTF zVuZz0BTp5Co+%9H=FADV6??qN3q8JM#eO7rG)-ti#=VD(`-Nv@@D<`ZrZM*Sk#+CE zYu|_0I`Zw^3{T|kG)uGQaixJ?_pe{}b`{XA$>K%)0i(ccUk4 zB^SaTbmA=LdK~>p@0b3^Igc@UMrx&(carra$ zCui8QZcpRX0J=VLp_ADQEqLhC{*h(aYtb31xmQf_W&~=D5j?xmng1bZtd-45c04`% z3$T?gJ%hM|qvvv*^A+(z*>-%`%7$Sx!_IomaP$}MLt07RGoM#Jm1iA$JARDVI0OB3 zDd*e4E%_O$+EcB24HYvr!8{iG}q2k@Zx zsujvtv&}ocA?IdK?)W_(olmMV*qh2;O?*~E%0uC{)G^_huu)7F}~Y{GkvwzvlR2$TRih(J8s6FEnMbf=gwq(V&C56H^yzxNHU&l*M0|j zH0OjI+6#yiWWLNe!4PqQIm9%X@4iX?u`=$zB(C!z;8q>YSK!|Za=9aMZ1N)M^hTBZ z?>W9n$=nCf5VYI#+wJ)WX|L#m`1GmdP!Rnje-NLcHvLzH+bT=Yxjn&TKlD^y0zG|G zWhTmJleXKZSU2eII?lBCxC8XV_zc~>=sZuhpJ(rn&yXEd`g?%2m~w4oLsP*STk2PR zz2BipU_?1UAt^c$tWvNRp@4X0_3jBq2ChZUl#GGCJul>XSi9<+=GJ+lRa|@ zx>=NGJ=i>ES7gtQj==B1JDnNQ`ANkMsK3|PJBFU)%mQ~X_fi(!Xh->{!l08?kp{my#G!}(`Z#M8{oQgem+!GVreX}-}=(*-b?aoht0iLk~IQDJ? zXNq?+eXa1lDU6|DJn{&5KjQgja?s6A7*_>ndfnw`+U0fjvx~km2p0~+U(Qaa9rotf zJBUd<8bdF?&5@Nw@XSl$otMBvFNT-S!|zbYov~Jazn!!x-Qn=Ft_DIfO+`zoMC!6sfRd$atF&B)u0 zlecWx^uU%4Ipf1^%Q$zS_wrMR)9jed^ssf8E@RGA95;LA9Mn=E^V^R+^)IjJS1hUKQ+U+;YF>m-%~=og<0s!>z?Pdaa2f#S z$lmz$6xQ)wd&#ddS!dl?SIk@8Qh4!4@lATSmTz(dEH>Y#lCS+ED>hwwDD#dw&Slko zi@4Aa$-zeZ@NP2)!b{xI2LCaC>n_|d|BWQ@N33h#sYct^=6$w~ygF57?^AynV*s9> zQ)Nd8R0`$}{CygOVwn8kZL61i@4Qb;*yZ(CzP_q%!W{blCOC4B`M1K^jo2=A?veR- z({CzgX}5YuR=#M@z4(*bPlNVF15M;WYd2}&1!&*}lLpp611i@wk@<$#nYjKO8hF#B zf$;-0pt%3e;gfjp+B@ve!->#-a`k`Qj7vGTLvZBR zQG9mt*<}A?{%q;PMN^FFIu|nmKJ&?BWBO>|8pB8Pqje-aIPsZm9_8R^CdiC;X_7wbR7+yZ|{x;9Kwsco6nTzL#;I z&@%4#kT1jW7xv??E=YDYT;Y8seWnlb+;ke>~Q#J%x_fEgv&`v9|y!ifKZU|N^>o2z9 zk$lozY0fkU|N6i$H<)$ME%r0s63%m$QD!u_RZN0|%P4n27AD}bh-b;3i4B!DK38Qe zSn(8hJWNGTsBT1WW{s6h^H$aHU0p@Hl+S_Btije*EM4Mm?p&aJr~(@-=Sm$oh?BP3 z$mcyg=rZ_B6FEQ%nS+w~MwRRUk7T_v;R@~*`QOyN3wV^(wg3OVGhAjeKnS@9+FVe` z1hJ)ZOQp>u@d62=Qt)00a(bFDXluQ+h>$?E4Ft4NT9tTC0-l~`ro~o;%Cw#XNNb}g zik{Q{&aW2|P@51Ckc$%p=lA)(?@TfY0oy+P|9}7g^JJdPyzhSZyZ72_ul-$Xuf5h0 z;*_m56B8aHx0Cq(66DS>=8WFgh8$8mn)$DGXzx${_DPIeXEyT3;r4H&ulD$*1#jXk ziSV}Q8yl;ekPF$)G{1v)_l}W#0D8yh6ukOf#^@-RFUF(sb%-~`7uwI3PvIfP)eYUI zrTRjbgG~d_y2#A(=kkt-V~oFJ)zewk&P@N(+NZLJx6}>(3KbBOR%_%W+Lo78_JEVJ zXPA>-+6zWXTHtj1bKs?d+-Doh;pg3FBU8%f`$Egi(a4tK!Y<;=mhrAz`K|Rd9PO84 zK@8NdFtNek-J33RPK6Pu3W*ea_>ehYTi@*L6btQA}B>ST`v7U`18{0c?o*N>fE;lf18qVY!PgV9DPl`*hn81It*&}sHGEIGznAUSr1ISd$M z4v(?q*a2vu=Rb(bvcuzjq3<{|qVjCG-@qSC?+xbtEKZ({@Grvmt4E%VwdL8ajHo<= zpC!*q`OSWFfn?fZWZK#re1-M5oGZ^XR}!3kO@O!2}6Cv{J{PZWN)oUe)h_- zpA9I-GrnzMgjOCizAFC($VPwjhv5AfHJ6OVc<0r3|e^O$jzunxcSTq&9w+tJ` z;$!6XLe3R^#?F1SS8@m3vk$qmP`q!^IWzbKdY3Tu4CKKVd*s1z|4wY&pQXK`TTC8J zv*@2`#X#*#|I8lxZ-duj{1u~n?0NB(Xxt60<+nA^d$razIi3Ex+S;rI&XNA>YQLYg z`k(2)<6IeZ02!1=8zqCbgXesf4EmWRgSw(J=m9gUj|{qn-;zP;(Z0M4EzZ}Mhx_$K z{3zKYjHU0A!|KOT=F}ZV1QgY&IBc!w9?PGPu)|N5dQ5kbad#Lc?N#@+g&f&qS?Cl*! z7QYK!4&cji4BDuUBbVE_=>y_q+~7=$%9;`Wiagt<7##QW#+mT4hq+O0Ci-{f+Oo#O zJUCPo#ZFwau+y|CcFO6MHD%aN*Onk_t{Onrz)!D5u@kYNEbJ6yE@*vdquuTFMc7aN zb_d|gW6d)I{QYoaa=6qW#%royw$8B*r{}#WjbbwCBFfn^ zX?SpaifEJ8Ll4m+!t?#dql!6FS3T0Uu}0g_D~EpGSkMnoXj1!krthZ3T4D8Z6LiGI#GSlB8-G6z~q4z+-Ltn#`hdLxINTvMB4 z^}L3BSu=iQV<-E>6BxTokVEwoN1h{x)*OvYQ9jUCaF<0>u%0lMFi|r)UH<=IKbYlH zKC{WPJ?HV!nspqTKsen9$)EURYkZvS)9c*Q@L=AxpFDAxwf|k@{Q>J9XMVu#VQ}_w z{35j0#rRHb#s}i=V)Q)Ov^8e317@Eg*GE@m-b8qzW4tfaWVrm2*|uCvf+sS9HcfWSfM*=U^3L$T3~wzmlKPE%2D~&V7#sTlb3$i3;@hM8;(1ByPkh~y zk;g2&=CI_8^cnGmXg@O*I?wSH&PG;CPgr9Tr^C56>nE%=)vPP+$nBf4eHJ;B{Q0#F zU^%D13R!xPwl_Hw{A+60@ZAWlL8i*i?J?b+L)^ngNDOt4zXY8@`#rX-&#+|uxxC%C zkKd$^dZvFrHD`_?2M!<;Bqz`C-gl8lkxwG&Dt`?5eE=JxuASzZ`YBqlzm>F)w(P*> zwdCTTH?Cv-ypuE9>*o7HFPURJ>%f%cO`Wn98a&N!$@7!W5kX>+S#mcIxqI%or1p%- z6}0#4vNB}pEM)3TWb4()*sIV{uOyy?Z43J}eD^+MB>l8)tQl~vEH0cmhIhY!FTw;P z^gqbn|M3xLf&M6OisY{5spk4i)Q*OzQ`GM>J8?-6o=i?{JRR{up1b_(nYyu z$x}~Mo=&%Q(E@B(3$T@Kgob5cH0hbPtkfAn$w~X{;#T5OSUMAWW{tIP)K^BTzb~R^ zNuR|`_zL^=pC!mv?x~DR4FYCcqnf`X{L*vvY z#{E!qOzM!aRy%Z!agk1Y02!&+wW3W-W_)Dvd4_)zZ4__VG}YP6UYb4+kHlyidrrKe zbEmQu*!b(*c1aJ!ztG>hsxhnDG2DOWs&!f5MoSM2O0FPtzDQeQdZ6~da_EOJSW%B2 z_);G|@E$X{4+i@RzcpunAMMAg3-qHV+7Hbcy;s;v-{n87K4>1WMubi|eW5Q?r{-Sw zMdVG@{9-P7$GRJp+g5SEExV}OxL$VAGWJ0h;Lp-Vf6H0Fu3_vCv3I?ewdxY`JUqfW zx-Ys`ZHAAneGhm=KA!vF3zPp#;D;sH+Whc?-#x6}4?k4H57PCu52Cp)e$@Pzt{}Zm zZGRuE7I_?5G1drKGT@Cl-pF{nvlqtL{1~BJJS{X%v#RYmZmrMUm#yOby4bn%UNjc1 zUu52wjUCfCfw8M*?zhvHWkZc5>0k2i@H4hQIThIw%YP;uQgA9Vd2w`aX(w}Cds{2{ zuerX4x!ZrPH#66z7hSTHxo#%=W45VFn6rnWZ4>Gf-M4f<}%#nuPTx`g-0 zhhru7*J|WUG5`DOabtS%6R}Im(c>nZ#+GtrX<--ZUy1`61D{tj&m^Bu!}~T~$p}hb z++gW)>6UFLUe-vLlRi!C+qrrsHg3OT{^7&v*Zk`{W*L@jzA8FqdM@^i#!F)(86x|m zn|{ic6#u?r=pJ!6u_e_aLx%a6tXkV6L)JuPh~$TCYU~GvCR4w3xp+BZSh}1oM}893 z<<2KZ^!zWPeYxX2eMye?MSQFBF*%}nq<$Qy9mvMeadbK6KX@WD_+r%Fa0l(|m9w%H z^~%|sGL7{fXshy5tu~M&gn1)++RyaPfe-&0yV5QbozDc1IKU-N@W~)>%3yT9A?SR> z!$7{hm}l#G^u^j^)7l{HBss%ajmkvsDgV5#P2{8aFgmZ9gZ)|hURHIcwU3rR1zSEb zz4DnKW57`bUn?#omgnTl8Bc2;?Q6aehma>vVBNy@>|cD@nk?p)WuLywxh5-xdYc^S zUCs2j;NsFkoX)Oa!1}NU|Je-o>N|*+mA~>PJ2%%t)>7gfxoD4f=gr%+j%d%#de2IG zzb?nxQ|62|ZO;}4VSo687Y*+r@R9rja`CT_&%kc>$@Sg}#u^M8RG(I_2w6D6%iJ$Q zZZ8~V%+tEh86m&?PP9LwcOSm2*!uxg&NDD zvmYFx_2?(;GtXumUPnJFzuOmD%e8Bw&-u;a<@NaL&YsV8>X|*?2+5{YL=Kf4_8?!U zu5}B2q0QV|w-9U%-|D{Jr8VdfnD`{TdJ0`;o6FexAopW*=->Yw#Z&zMWFI&a6Yay3 z_{fyghXiyy@IP5(<|i^j^@ z_hUT8Z*}iSto>omi4+*lnWqoKpOdIhyjKUU%_#q!z>lK9;z7f|65bb0^Z5|lDg7We z68*M(tTq;hT&Ce^VlPoS`xbIvJ{{Uy6t9S%xLa6MC(z%MwP!Hv`*VLRTmJe(j_Yjlu|AYd8Xa({;^*qs6ywv+4nVY40eT zJ{_&C%Q|WrA;pKTM2=Z)#Ruyd+O4(@qn+z$=cRm?y;AK|xr5A^J=DLCcJ5#d-omz; zl;sQEO*@mWDlXhhJ4fDuJvqUCkBdFxSUdMc+j-rT#GZDJl&?#)ok=CPZnNrndBL_x zn#Z*B80}1Q*zFXZ)V6Z^T#Y?*7rdoCq4;YhtVId5eHd+C?EPtD8GY~;PuMNJS(p|a zek=!BRSr#v4>Y+Pnn<6nhNfM-s~nn~f+mVrpxmryzTsf3os2m#%WoVE53tTG!>^8U zm+xGM)97wZwAUN-*SAx7?|RPCtpBpB;TC*MZu!;lhF`GnDkh^(x=-?rFt%eUF@tVot|9xK_?fOwhks~S{?59rL8i0(;nL#5cAl3n;D3@I zweHc<@M_LmYc8A5J-wk08;_T`2o8L;^q%ii|H{&@hC7Il$5l4CPkV(nj7<&-?4HE-w55dnuJ*#jRmtY1}&7+u_VFXvJQwd1C&C zoOG0R5YzGNl&{=>d!hQj(7D0t!);vKrr7T2psnng>Ru|eiP2Hd-8{f^bGXL3ZsqF! zG2__=ZGOwOR`EOUXg$t(-6&lSqFakLihmn;Ap0w(>8@A(b19!nIs08H!9spxOSpR_ z>*ZtAqx`fduDuGrxDx)j0zN54CX}#`AGML}a&X3`*B&j~GyZ=)=WAPPKO46EgYY#4 z3s`bvCv%E(wTyv1&crER-yZ#*C!f}_*gs2%J%|s2Fs^)s;>)j%D?dU$u5smk%$N7+ zLmT5;$JwS>uCR8lwbb4o4)i+%wvqBNJ5bE-(?Hn^#eknMlQo?SvUbdwIuny65AGfO4iGNAy{Bcw)8>f#Wd!6__+D}cjo`#!%Tcq zzQLy(i#mTad&RXZQF`C8#U6=l$FaGmiOJOw~cg|o{Uqe>^1Kr5 zr~H-Mm!~x@tXde(%>i##fNiDAFeZ`}YiZjhjLSOc@GRx-s=6z@2i|0KIYvU(xlYjv94bGJ{oE_L&TQBPt*)R=O|&VOHeErRuB1)5^efjG>{%3T zQw962JIus#*}C(n<67FW0N!iH?`aWyu>?9Dh8MIyEPhDj9ea4k9{6D&{P02AhOE=( z2+wBtVLSZbqd(8m{^uCSeP)uy4|?XUNokFf93y(h??~ME9bx>A#EqZ$p_BDv5cPbm z%oox;-#=FKKE?7=cNtlJH|Km0BD-c$M*QNXP5ID7^Q9a*R_4Lu3C32=GN+3!ZQ;li z`DiI0oH1xvxq%Z=|0c9$ zeoi)TfCmjDq_eCO-;He1J1!m4ko67dra5UklFEyqt=8?lyz3r72cEZ?_Pwz`vgJA2 z{&VP*?MU)_dAHh`^X0Tgha+jXllsMr8apRrH^>@0Cu29rgALN+^PEd53(xL96Pcnm zTF*zXm2hn(G~J1xw>i%U$!|3G%fGPjMJ|0&d-UD_wuC(11^%7vn8*B#_C>Ufr)Bw{ zB3ov2re*Ef(#O{>&uF|8-gpFBzeztoPrHR9grOu`y8aN^($3rxALW2olj!H96(dJa z8W(C@b!BFw?unPIc{jax-nFN#%vxzMkMH1IxWi%b(pU%Q_n7CJbCNCAoSX62&pGi@ zl41J2^m8HY$mM;LpvQl6eV{mnv3wVi+M#Y0vOryXnQ9>el$cDEJZ*7BM8KJJniJ3o* zwvPR(SGexwTzunbbLTzyPIvi2vo20*G|U}eQr^IPH)q~Ce6Eu^y2y78-$fh_ zf8M3g(V1Q#U&HXY7aCox=hoigLj2;An{HX~RpV~usb6YlPS^oo7J-9m!1YBQY?kTR z?YS@edg4M~_bvEUsmYdy1-dWE`FhLEwZl(*;io}KkzDaTXJ!7*_iSQ_p{AJYn@(ztzsj6knelh@)vsdcd=h44EcW6 zPk4TD^x3i`W6F!Fn|m9HcO;swVNAQAZ`s78#v9G-ZC&uPzsic$uw^l}5Y=z(x6+2=^(rjiSow5Bgjec(I$Ml{}$d5 z4`?45Y`(P0gUo{le{ihMY8mgh&QovVd`|$LzWnBVVv*0^CZ1xxZqswJ|Cd3NAbRtx zsC`H@k*r&7_z3xJ|1Y9W*}PSD4Qq9*Y%yg&w)*Ak{^^S3#t*E0hY`UERZGIs72yeR zUP9NVqDoj+!R5Zk%HXNl@W@Z0v-T`3`ooh~Qr{ZhEk22jOPOMib6vc& z7`lpQPbvwnYMau;V7s!il3 zvgr_Gxt1}!gm$mx*(UnC5We^Gt|gSmcX9F(-s$I^!_oIQqYrCeSpKXB_Te+Ed4wK^ zF4-mj)SX;|KaxIzKXmUF_|$4Qb%|dzhi6k>Wtw3w2+b8&N zPpO&P<>4FzvPba?qyH_aD%A0L+A_j@n#N4neVo!$$*vr{=S;@-I@ z`p)+HQm^9fUg&)#jGyY)`4PRVnf0c9ysumGTK!i4MTZb|s(osw)(Wi|T07Ja)vx#H zed2i^b#`(eo7ZmnO~<$G$JC=|ML+pln!@Cd)s4KQY|(oOf)XF@gVO4<9U2W9gl1Dp#>kq6qt2l?IcMu0w`ub^})A+_>#hub`#yS?W z@k4$$BiCc+n?B&T;;Ny`_c`BG3=f@M_Em7*ZQ#3G!FjiU_in~saT9UXZX~AgP%F;G zQ#_AN^MZRU1v8Pmsl7UWeh&0BxdvX6E{;y`FJ}F6q2IG6Te(0gp}EeDVdo01=DX~= zijUz&7A^*37n$rWuxH{%-gSVY?R8&*hLOaUwEsGdj-Z<>`%c=;P<0-O*^jrOSMi_m5nWI68^5SR1d*Xw*IR zUwTPNKDsyh*aGynpQn94>-)yYs4in#{m(;}8BgC97(;t>nXAx8{$eoSkGP@1Mkb@~~^rPgHSI7WJoz;_Wn4=)_0ANwuc*J05@&zN5r z-dJLc+&vCj?1P4ruo;bm7UQACB*zfXB*)Mm-B-M@n7%EDj(IcZJJq%d@VjC!_!+}P z%z-7$0sr!0mK}K*?X~t7m=m(yTWk7?LFlE=HZHt!MC1Nw`-jq&S8@^?UtuiOj!mhn zvi2MBp{1AFV>ynooXlA6!|hCZsh)Y~^NEe?9Yc2C2QSMG@E3GiLV2 z=3#HK$L5ud?q}4j$||EDL2SwP9>idO6}qY$I?kldY{N;JTC0ru>PWis0b$fxtQjY+ zEkRL&g^d7w<{`zXJ zPu}YqEjyd)HKT{ejL0cJy_bQrmz z6r0!10fi5qYyv$o1zKuHho_yXcqgM(=le4(U$~$8IN!g>>~p?fdb{N>&GR}p?>b<~{q}%1k#SMsWLtSEA8_<kNN-Q2X(R$@NlAEV7-f=hG8d z|1Cf9jNsG+)^0H0?}_Q~JIbGAt%@YG{)34gn?{b1MYcWgJ${Ry4)C0Odi^=<*OYRN z`?76}%jb8I9k`(bK3@^!z}r;On7S;QS!R9x6`FfwIv-1WoJ>%=s$ zVkBHgTL+3yr~RF5!|bX-Mk|K3a;0q?QXD2eh?RppK8I-qyiiL$E5REJ9m=JU65R2K z?+kMN><{?eL|#)b%72VH#CMF^_0^65N`9cF&H4_IeNF2C=Ol)Svf!j_Txb4J& zYbFj{kT`G!7jvCBaNCIkx1Bg}L1Mr)6ALbf7;puJ#4=zHT5%t?L#tfc8GEN}KW^HR zM$8DwQ{pllYlAK?I@0PpzV)Z+b;z%d5C1TICh_28mu_?17Jim?>skwQtA%pL%3Hz# zt{tQAs#m#eD;Y1nOFSSQ#e|m1OQ>A7{rMMqS-AYu{H;$}>z)JoEZZ%lZ82 zKmE&x@|o6p_~d?bZ=)l1cV(pX@yz8}jYa9ku}*xRsjqbBozmqqAOymAUu>9GX@3@waY1~LG&{D%i++5BF zBl{Jb^*H;}!=OoJweK-+Qo~$#(sSQZ>#T<}y+xp(`Oi+|URyo)bgk}VN0d&) z{k_s>dhXNbtF8M($qVHRXs$29SLGw@BG{IKjgIs_V@)i9dH%R|{U_~`o^+6Y#@;Qu zV$-qcD*K=AL-U4%(RP`%Np$w|ULSf%e7k$bB3dSgGDd{9#kIYsOmw``tupkFv6any zCbWyu_lKg}cssY(POzfr%UOVK#bgkVs61`3#?GWoYL}Dxnq?Ew7%egy9**{rfT5Gy zp>e*EZsmk_!T+lr=58 zbg|d+_NrKsLm188N07B8cms-h_j;l(y1>tcBx#i zQ_RNHY~o88BNM8YPM1%84)|611H=O>;pa`vkyqxC?};;Bim9nH5i{v?5$)cHTvOTj zd3Gc1oGu<eA5cnwW_%Irq#r+tzHF&`jU;C4DiyZp^o(xtaFvDb1vqnp*YFJnXTS$ z6jrdtRkGIQQ-2QiwwC_f~!xyri=G?Xo8*EBoQSo1B4;<5!YbYf$(;-rF{m9Dls4BboN_ z-Y8b<69Yr?@$9nM(7hb`moW~rutm)z&W;^xHB8-+U5c~g=6VA6J(L?p`QeNcd17jv z4ad<#VtGRPj#*YXZHKeLWZubstF>hV{C(pygNcJH~^N z^Z0-Kif{{kAx2QB#p+M-=9(e&2Rm#XnBoL9KgGQ5U~XA)&KPU-wJsm=u{9^#nU62? zY5$774s~#?r8YC%aZ9n6{O#L1h}Yb~9$*JHgN}Q!b&*#~cCL;z{=*|3AC`nW;FmLh z_7z4}I07BPQr1KE48L_nxbrye`ZD-59ed#lXCMNeX=98#7~_r!9Cu#VZ!>&Y>@>pmj^hAjW^kmvZ18x10R~_-OmT z4k~2+7yfD$<^OD#ub}+>l%MG&AMU^Sx-&NpYN(=YjuXD(+3d4CvvE+No0u=g;Ihp* zW~37r-x3PNqbW_4sKB0ql*U-4|i~b>em_+ z`>pr{EwwjWIbB+uw`^;<#q@SegT9JqeGlW6WrSMbY3USdYq@xoHKS7F3$M1M@gH7o zfu~=Br(57@*U^X-1Kda1<;Yy|KDN_7{F-a=>m2y+8u)NF{8$cOmSH=ch5x6`vwD~O zqQVOtlyOoPy?Gga>EO4R-mG&Q!eQ-BGq7LlIC3R-7&c+}Wj?eJZnNZil!vah%CuTRSVa=RSRXM%P?Wy;%W{S@`F6&)4 zOE`-Xo=d*cG_9Mg(Y0p71N7_X(C7xnwD^GZ+$`2OTZXJ=-HyqSHf-CHA+lwhU>s#{ z9!MrMv%eiL6aL8igiS4(fDeTw6KXRpnb6|Q2umik)Mjk6WCC;Ek_n~!mrS_F74ASL z*mEDbKpsB6cX#Bmen>XlGc??BIlmO`hWV?qHOL1& zdm;Hyz3W{0@O|i2d7xK5q*(G{bbTuFVKnkV^-Dg)exHzhVDF>OVIXsF-d2ZvXmQ>Y zCJq(9Z`xLee5i}c2eq}7wo3k^=XH0KBHtw+?wJ{`Lq16U*C8Y7kP(t6HOP}`D$ibH zo#e^o{MUMM&(+~Ntrz1dul3@4#hWX%{!Q?83p-b`1{RIQ&IhK}8SML!G2*RN@Z)~? zQ9PzK>KMEy?4x@n@Lw(W4@B?F=5rBin6OXmektqOLzHK{Aj%jjvxs_-W#6UDJtK@H@d!B@a3kkGe(AQoqHpfjm>i6hNWdGNN$jg1+l_C9^l!=b%BXxgh1T`I$wrs1M{dM$i<{WQ z+ONv&u6`lmv+RY{j6oP zC0i1)=YTm{C0nlLe;)D6EZH(7EZNyA*^;d?&cHsf#}2SZ>lZ0I0X{~q$QNHZJx(HL z!eFE)`nCV*X#3BMdU``FHqlA=_BeB@F!_f){ZYJ$2FfZ|X$5w8<~Rmb6IoqxsBF=qKUx~wrPWXy`l z#X5ogxNg>q*tn=43!?pK>n0bA_GOVv!Xs4+z%Iy~O4`{`kAM7V@;L0Xv5SxXjG=w^ zlwy0J9i8;2YAiAzSyM^Q4`dc)!Q|L{ElhsE#^i}S({Y*f3CceMF2CohaOFM7CSv7O z(l<*s3CkjzD#5av->duOHny?Ob@2SP@cta;z%|T;*~p1<@VPA~M)ICdnqtkD!%7nz03?YaX#=8WO&^-G8!|Pr>o%w zowwcqFU(;4o5&|s#+;o3&sRupMd$23a(!#ghM2EUCvVsw8FU7jp97!f!p{qL)yxof zz7O7wjej%GKkam^|6&|J_q365FM^-%W6U-F9o=^Bl!MPIX)&||Sf9o^Tw>!p6e{qwsw|Ui{W^5#{Pz!m5TH&!B@Y_^q(l#1B zggV=Hy)wg~4K3J6V>o>wb!u*C?Fnr9B6*s@_}?0B>tM8@#kRF#Pc`=%rmuTGp<;o=1jDgdOl5e?lU*BVQ6l8@Qu*gq&*AS-8El)20A=*rem(F(b6ffH%)+_n>o+f z^2kl(TE-7Js@Jwe_1c!GUfUAYYg?jvZA(a{IVy>>fmi|q8x ztSv3p+A=7hc`Lp4%-oE|8hBLez(&@AW_a(HtOGHfPjYjQ!?FG;=8fi8ejG11Y2G-E z!e6ouNbeKxsr<{7uR#vfAqQf7-vqszkOMWyfjZ%bE_2Zg;#cCn7<>o!26 z$2HeGXqV_?=?cW&Imvp}j7}?ERalX++E#a~t*h2a-lMD5A@A#=^Dl>XG}Dg9CGXKw zH4j!x-lMD5!7q)py)G*2>zLn~1M4Z@L7O|7=c~mp*e&YBFT$sKD1 zC!y62&V|Kv*sPEHV!Hj{nnx*P#Obg}Dnp;GdZO6yyOhacucnQ0m(ACuGRTHl8DYca zl<^50K0_JGMNWeaM;S>v*C=dgVMMT@g%QDwHby-6d{^vjm-fmpb7=p3JGhWB!S`?q zw5m+NW{ds9Wf^T>GEkN)x@;Xvk--x#we7$Cn`aYrgK| z?7vpV#R9Ip1ODWi?j`d-g|)}JpGpi(_K}nWZ8Fys3)o~Yw31lBt(@g?;|~;z1zai& zgI{tHaXL5gOo8#Qa~HGEGwb44v#QvGFa{H286Gdc)Q$GvdJlsKq*t}U z5ADgN0p(Q?29eHn+?g2Mi(Y?(@0M+z^3eAB!^qsQBO^h4p%^bU#IRjWI~U_WGm|!Y zmdeL%jQ>D1hOLJ-6JMfRaf3=@F>JwHiW{RChF$JfSy%{5X^b^a8gK1k z6_I~<6MWJ;&RKoNc|KztKfWy{f(^&h6rUu}MyT&h*M}P_aoSgFTTgN!#)Ll#L)3>$W}`o8*2o%@ZH% z_6B~-A3)FlHT?oZ_w-BrujeF-^j+~r)UT7Qv7@L{F~DxNXq9ZmnCzuh-#LK~-+Eil z&q9{XMBlj@edj9loh#9IuD~whu3_#YOT4u1t_ zfatPrFHLrfkXMujhgI>P-`8073C65?eS}XA9yZ%I}5faVyp6^vpA#T(d&Q&FF{w>Yl zNtuG>TdkO530*pjGdg>nqYpmVCf3ZyAw}Pu}mbVE0c-8&HryZ>#`0} z2k}MHb=|9%1c%m%bj9-Y_V zQ9Uyc8^>2EFPe+?_;>oB!ADo)qV=$vIpt7{Nv^%Dc}AT&16>9dcnyCk(Oa~xf!5W; zjjUk(cSYky=5VhiPL6#H7W>O+qx+N38KEb?_{kFomR;%n3o^6abA|U9xbt1`<9p1f z_rX|_S0;26o&F8XZ_xvJAs+(CFCXu-`?On@mq}FWf^f6 zu7M{uGaeK1Q+`(Gd>ic8c>6lRFtquuajA_9IEyn2`dmw_oFkO|T-7(iM=6{A7&cU{ zAE7@HFplgLKhyX5obiYEu3esLl|4e)$YHLb&+LItZ}R(yIpzzI`-mw3-&h!nINS0w zSHJLqu=_P%F?>c_IRg^t!>m-sg5R@Ji9yD^+YM&XI0$P^fOnTHRLq&t9?n37CcHee zuv)PU4lria^BU)h65zcx`y3fd&kG1nZDDQ{oU7TN24u}PuPn=2I z37s`onaQk~TPS0V53v~H$46r#zjKYtLdIn~XX6wX@KvrA@qH&T!CqbITk0L0JgO+k zcX!+I$h?izrE=|je}i)JS+C*y67+$!oFnij8GAL~*K&Sf2mQLUYRR_k$Qkxw=53DS zqs`ECJN&Z~-+{H%xm)sOp|6|1ZLwm~aZhpTXI-4y=!Gw4I{-Az7s$BEM@T}ZJ_yNS=LIri8m z=6cPo1I{bG*8Bn!C!{ae5-p=}@c_Mm&Q`p0m{_2S=^%XaF7!c>NNIb#B+yR(`{VL6#AfH5=o3d-W8(4oVKCvl z&{Q}*-I*T6=%f8Rh`%mev$$WLiLvo<*^z0&=(Vx9?CjYHqdWN>!|00@mwk}0u--K= zK0g?ZmF$V;nLzGe5TA<>;Cw>e|`y#jmAjh@L*hii7;!L&dn}lY=zHdA1zoaIWW8@hF`&bU1Uq~JJSXr^yxA%#~ zo)vrwjN|5=ipAcGTSr?leHC+1w9zlh`A(@Uh4L)#hjmAD4q?tn=Z_Q=JcQ>cPjOEqv^uZJH$&3=y^AAomjz1$AA-$cy4Y$xyJnSh5q3 z<+8tT#GO9m2=N8-C7uu=dACk#G+tsEWvNS2_6+@T>?L?1GB20bzs)lcg!qo0<*3p zre7I#l)Z0b)`a&X)2tZwVAk1SRtrOcS&QEzX6yJVjqB{Tl&|A`^c(z|=9vvM~-fyIAp(!@L%)XuR1>;WSnlR)}oo8GH1_fVk7rum+A?i}OcChN__3Rz+58o@n zmwUjMN22(0PZVDst-5PlHZc%|FFBvq!-FHmTkxLn<-bwq9LD<1g}&}b!Iu{H7QO_x zZe~7gWy7vZiOzql8-`-f++pZjJKX2p^oBdE^N&|Imh9@U-R$9+(W({1(HRY+q zH!NgfOypvFBYqRfPOmWLd)Q9Ter&Gak6o#~(Z}^t?|axaB?Ha~M>3Ce{=~<751qLX zhP(!uJR8|uj*KotR?ouTf&XQ#?aTHg&wd()eAj8)bmiyKKC~}}bb)0DwCVQ4kOVBU zF(fu!VaSR+FeJEI7$u31)-Pd5$$DWHVaN{lPlO+5@cS_IkZt!}XKA1v-8FWW;5gXv zBQW@z;KyU&?W16A%O(b`gd3lBjOoFR71(yw-u}3;Pku7iQ{l#*{A5wvZlC;QVAAv1 zbxq{entu6w7K3r*`(Wq7#a76=s55oR$jTUwYeVkg_miT&$8d>gf{&hsC+~nKPheY? z{t&~IvNu|GUwkl5LWAFe36y6f*8l!ARo_<+ke@7O-|dedH7*(-(f`|V`0+yd$z05j zQr4RZ_x0MzE5VN*Gu_Tl20azaeHZig{}=hmWXn|?d*+!veyPEmqxs21-`H5)Y+*L? z+%W#VW2E@V@nbYHNIc(O+z*e&*P-zdUua)eHqJwgt71w|N7hg4vE7cg@{`FX=BoW+ zR)oBJr&n#rB9EGdMau@rPbPnRFT5q2?fJ0ix6SlEHrpHdEt_q3v|pRf)30vUcjYHj zey#ES`X*kU#78lA!?Kanr_-9F%zLd7y03fj*Ol)~`N__~s^{VRe-u`gy&S9>cgFQx ztm=EV$ik`%gjFXBs}5!!JLtbbtU_CENrsiM9^VSy(pju={Jxs}czxy zwk5(qVAWX{lUsuC1L4*?@@(8X?L4^E0d7^DHf~L~aBCMa>-*zY#a!|)?|c1R+?sw4 zZhgeUtz#_QD!h6Gth$f?mtfmHVvZ;{X{IJz%DJ#qKGr%9ri);s+yU-XSa&0S$o;WyJG73+y3^pnc#OIT`P?6)N# z{|MKJZMEg7=rjtf1&%rf{fbJ=yDPwF#%GD$nql0%h&3y_44US=#nGyv34@;WJ+PDg zqBi!PYdF_k^eu8jVmlNah5eiPF1nkP3G!|$ekQVSi7~AFDKoKrHh6R`=dFc_H*;q1 z4SqMFFRk6V&cex4sHYbvlhd~cC)@dd6?>^KPJSthlQS)x{L?5-j<@Y97MX0j1!jh2 z+l|M`vh5xkHzSIZGvjbFZS29x`flT7&P`o^2zwy=D|>~LW!t?FPOe~F&c(?u{arZO zz%Kgt;pD71oNV*&UyqZ=d@4@%5vP1JIC*$i3@4B2gOlgN2Nq7ohS7tQ(@pU3APXl? z?l#xQG+{sC8ZX4 z{sx?^G11uc$H^z-?5xT~AREC1)=k-#`rza!E&JeP=-CG+W5;Qa;b?To{<)b(e`+6{~Xq^?z1k=h!A8%LfV~6aar_M0+((_hm8RMh)XGKGe zUCa;R!dRAgEVb*wvWfm9va9m0^VwB(-VL1jcVSr!c9u`Y9m28$^e2X8$HnzYeD--h zC*m;d5iqQBTwaE3(tX_%hK*ephP{M)=U~{LcghE;mxli+3@a=u-SX3LtZ|c#V~fDC z6$Lhq-5-ZxC0E9QQ7?~T*pHZt=it_S8@I;WTmSFi*33`At(kulZhe`(E!kVohg(aC z0~&GgPRri(3E9OifgY`G45t8nXD$GRSSYpNqP;Z1PsRzBB)zpXv6A%8+@Vb!Ez(Z_??hcKa%_l*(0!D@vLxIHt$+L zYHd~rXUzBTIRchF0Y4vwCL-S){-HY9J5?DRn9&5d;!?wSkz$Zr-^CIjU!i=)5 z?lXs2w%i?-t6kJ@q#?0&NOX&ugy4R4-Z*A8M%Saw0#5f5>Vy?!5k*0Vpt zmb?94`b{jTczmmzT=Do;_SXLRwia3VNEBl#$BZ!MdUJRbW2RUblXYUs;R|BSlsJsZ zcWkle$CxP=#+)6+m>aRNmQnX&Vw4JF7Q8nw#$5PrJjP6k!HjVKF4rTzx;d_#_Y+@7u~~Ge!h>uc*2h}*;0{=NG7auh?VpXy%>L&oT*ap{y_8GdS?zISi?(v`u#+S8X!KZYqq z{|DLoI+&Mg=kjA8QU0~ngYBGt!j+;!ET>-=xjC0mkMJ z&K9b3JF-c{W6wT5raj|l`vYkF*q2;y`Ibb-EoQ^jnDzGq=>NW4*2yOP+3Qx;W;Hnz z{g2lEJ$?Y}OEYFMKY#~`WgYJ?tJv1Ez4pQ11N1|9H7BkwF|2w3e%1c4ejlKWjZMb{ zPch%KsaLqPFzPeAUv}-Dx@|uI(dZwANuzevxHv24+Er_uy>`_c%dXnj2S7fvl2fv) z-qzy-VB1n{KUv!cU9M6sUuFKji7bj7vwaYL#{N1nQoE0^ zzkZ}RwUNAKdn4?xYtQ;x_Se4w-ny3jo}VMf))D*%j^OurE$3R|?Tq4uJCXS@Ur5Oa zVO-(HH<4-A;=B6REaWj*^$1usLJr!E*gy50+Adq|@4&Y+!KM@W$W|*Kg>!AS=kZHm zpRTX1_V4#gSV-Q2>!C-yU&2Q0%JEpX*Pi+J@B6n$_x*#>egDPh+4mPF{oD8bd+nK@ zLAJ!>Zs);sardIm$h`TKdkFa>-N1vKvTUIExAyTBK+alz1K5+dQ$F4n{KFnwF#ZVG zc<;nF;&R4!E9Ktkhu3xeXI$5q1+XuFnyyr@xBJbxweFB zpXO_@|M5QN;65lSayK;z&aMW0u{UG^G9XhkCIFr5S z&IWpJ-rw^aGRXCAabYVw^JSjz|DKB!Gi`wD9cnAQB!4OQd&PyT@%zUwzrZy-q26WM zacw>A#f1}y-$%}mxxmK%PYbGa@wFiq|S1#DBlRZEWbxk1FZ~%M* zhAU7G$%T}M-`^g|yW}IZt)#IhEGW9f`#!Ry$k|;u~i=tU5E>iVK$_+cT*9G+0QozY~7S$A?L=RL_iWBKEbN58Ind zz5@K`M7xi%gF2wO4;xR_`;0O8ctH{4fqg-IqC7%d=_gp^ZQ}NXI`hb@M|~ZuR&Pi~ zj#n}!E;GebxxxrNu)nxa*j;f^v&dVeHgG2S?l3-eONqTwN&POxrsjQ$ClqUE|1ur$ zhV07HA#TYwM(J6V|Mr~A>)T3X<5pbF6wA*5pZzEBpP!uXBtG4jSQ9w|(F&&RC{Zl4 zwBQdO@rD2A_7bn%m$cw2`a+Bl&J())tzTHbA&ow?((akaX7TVro?#!F`yM~BmUNyp zsSJ9_FR>VUy*k2GKjlh;IO=v@(_#Eq`$QA$iTGKMF3`SsC%M#Duc&yT>Vc_YFKtsj zUe0H`SbO2EQT^jbo&I_;K1#1x=V{4zTks%!73)Xq5c+eK(Jjocx~rf=!C;-|NbztE zCDeti(|Z?kJtv3wh|baF4tV>I@Z_Jk*H+?aXwP$SJ+(ZA>!RI%bFY8>ZH(>05vk+_ zNeRBeeZA{d&b?PXFeQwyQTl1-%sX?u^%c~&kot(_-BaIA%HKyDE2zVcotP4QiEAD3 zS1UBj{HoyXq={ z*j$rETnJaqm+Cm{@5(T`JLqE_{dcwT8AM)M;-e*`U@t^I%;ZDtk5IxzalbF&_hkF` zEbb@Jz6D@97wu!>PDz+JGf;+%@$;Dl9jo{xOq?~qwX$=s<(6C5B=7XBTOZ_8-4JqE zaHf1sT5mg6f0p0l;(lMs@2S1Ne?eOU1v3MdyraHYe*y)w2Dnx>z_s#ouGPU4#0?JA z5wC|c=E%yvW0*Y>-R?mn03P0wKa%{c%A+udGg^F?P4^;~@3dsT^upXaWr@WqZ zQ|?u8ylXYF6v(HZ-KSpG0iKPo`i&jm%?}`<^?j787ktY;+ z#auH&#f@V!6l-aij~@AtZQ`@r|!(!Co7zW1u`%^CRK%er^f z!1sQodtV;--WJ`PI`F;Cx_9Zo_x@e?#tnS0N%xq)1JUzI-6MX=K=&GSZ^Xd&*67|4 zVnMR+8$U+v=nl2q>p#{j8S5tGNA_ooZyDI~8Pn@4(~QDAHt z^xeq!&`NZaZ&H`|;uvelIdc{aI0rtv(A-L~%x)P4X*-^X{dNX6CtQr#h0Ob@V*{mi_$de$V$` zFDiI%tj-10i9gm>;e*I7&Y1VZ8!?&GhD;LnXaQTdgN?;++2{*3>@$u)R!dhogx$t- z-NgFE7ngW1H51DBaK2(QIA0k4a?EdY~CT3PCwbSgSIH%Q>?%A$I>Mhvd%$ktG)2I zmG{09xk!IT>0CQLSxLst_l6VbQyFXj0@^u?az*^FK$prj+~xRBDdyl7+F}7m+2|5WFDx0;NvISpl9Fy**f@5 z^euj_^yl?htV6{`6tCY!UCh58UazA6Eq?{CH(R`(V#PFx(N;LsMcI=Wfds)}X8Ia+M;KV5O1kfG7X++ zMV7lhV~mnc*M=XZm%3g>PstVTGBYh*NcnP}WIX=De3xBaG^lcB35*C z%yj=k`sQh9pcubZ_|C2N7*DUdYCp?^K+#1 zQsq8K2_{pA#^1~5GnB9SWaZptaphv~a#Jp!GNRuO-t*t!1j&l{G1>wDs~wuF`ag^E zqUA#BQ`>ay2gpy2g&Vr*y#eaGk2zmC*Yw8hKc{Xi#^z(^l~I{6G&^6eDy^}Kv0~YP zz`)tX*lUQ(Bs?)Y+K+!Ers$A-TURr~D6aWZ_dfC_kI>&H>6_L-|1il%GTS!RM532E(>YHM$v_K4T>e z|M3Ur`l`HZ0|8>*^>0@*xVYuZ13!0|=PFlmzXTfxG^obbF;M*nC?{NAy}}(B=>8vc zA534pB4NP$@8~}IK=lgGfcJOn{y*#fumSJy(*3)1fB1m+f2I5P>;8xV@4vwPZxat& zI#BhB!~x3$DMOwH)&Sy~A&(@J0(m7~VV2;yE4;*%C}-Y!g5$1=`#v-7ds*D~*>T_J z^nBNx3y!<4&+n?}I~K>iV{zO&7LV(F$Kr9l?^ryp_Z^kgyLephd+gu6@6qonbj`pT zV%WkzL1L*=a;40 zo|Rl8$H;)Z=wqDfbRXTP(q1c*G+wLoioJ`iwZX=i>a*6={<)b>f8EY4=3-sQXI*f z69$%Sa~UfwJ4wZ=Ut{ke#_X!!@P8%$U*Z2M{{NQ$wKHNoy6>~ZIHDe5OgqnTYOoEw z)1E-%S%7dz2xY7W}?^WU2bAd zNI%f?X&NtLwz5}IU#a-Glph)`PcE}9mYv+Pli7WX?Z+q%{Q34{jz?p`i)WRiT)a6I zI!^sg_GRL+&v!?j$S^WIPl1WqLqWgEg!eKmo;Z9a@Nn&VjIAEdKlDhmqYpw-9W33i`HdT;E0lKr})2sa*ETOnQQEoZIiRVo55xD(`eWhjy$o|vHW?- zpb&9OBgmR8FxlzRM(Fqi<*qgw(#HzFseF3ymz2*a)pzCLObx!SXT}=cr_0fmDSLpA zO~16Di<|+%m~CUvrD3zhUm3yG@PX=mo_gDmb5qcJJ{IZc<_>9d{p0FL{2+Tjg{t`5CG5&Sr;gm0OzLjG% zqdfn?Az$&{!}@V^W!QD`&JEn3nm^XgbJ_}>-Od^-&uL}9Jg3yhJ{ohk$R5ipCX&~# z$XD3P*>LFbwscL6*>%WP+gDL}Oq8Dwe9yIt6${?E&^(Bg4&B>1Hto(c-$-Y_H3R<7 zg#WYP|IzUO7-HTIZK#Zo<3g-<`ZTv=yHPmCMQ)%9#YE5SGA!RM&U`Rm<}ETB=Ij|_C-SH)#r{+uhhB%HLV9wfdMEk4choiqwcbV%QyqmqS zEgd)dx@U22$3R}&x>Q}rLpoh2 z_yFCZJ3!7}cdk*WdoG?;9UmVzma1ImGlK)l@!AeeCBvQ6^XsWKbMLzUtd$2N!dSSQ zjI*wNzbi+(`4nRq_0l%Y{dUg8gVe-;PG9ypGMm>Y?Z$)E}XjW>=O~*8R30xpR;8XXZ$qJwkV$30rsffzV8wT z&s|?+8eNO@d*!zjpD-z5IxY6w#{5%kHe{Qu%tCl0PkBhsBqt(2NfpLkNwQQ%P)a`jJM{Gk9Rtt=PGEezTXgzY+b;3-$L2e$L#v{ z{m>wWv^}n;;>LBKV{qZV4USN2f<3OkUuBPLMs!>=4%p+`;mGJSu3wLi>%N4(<9Y+_ zXs@4T_3bOx7)z!(8d?vsKc2m_P5qeB zc+s*ox$z@mY_;B_2Q;(}Dc%zFpeok+jnj^3Lk*~g<-IhZO2^2P*GG}5^7}nP`Tl!p#GvT5eMkNeYi}MOb#?v!zuz+~GYLCcARq}uB?;~< zAqveTaY+JJ3s~H00<<+DT0W?)Qq%;}+JS&}1e-Sc{Ukv9X=W5F6;xXL4M>+DE(Ka` zKei=^{-`+U9MGszGG_VfAu{+P$iyx(^@_uO;OcF#T6*jNZ1!vChb zWvvbWecie*JaJ0%I9vB^hDKxdgg0(&XpaOs-6}o7hIw?r2~4NyPq5zpX=KWu!RP*K zM61bxY|YU%+!uBxGD$Iv*rSX&&4QgV1TGW&`Q7l>0(;CBvSIJGg|xpAZwtl4o)rf> zuTqSsy1jG{dJh`!IO<6y-mJG+dfJToX!Sj?4vB~BgKGfjj5jO{4Ca8 ztL1+f%^XO#_k=&5WUNgy(h}p_3nqHD7LiK{9WF=ZwsV(VZ*+ZnQ9&1NpO5}Td*AGh z_G%?_d)^^x`x>yFy7CbPQ;qOsWNs63&I^x_p0pyz;~%?eZI_M|Wc^65zmU%(pI{g8 z9jx`_c{!J%To9U1?C)9MK{t$_v-!vn7te)5{0f1lDBZ>Qruc+CZ|3J{&ef-z=D=&$ zyt!B#g3-0TF>S@5!p@Zm)!7C3!DFyz0{04Jx9bAWlNWODwALhXvcPQ2(!Gm4brj?; zI!^aJelZ_TY|g8rbKc4svUbsLQ2LC6SO(nDsX}&`0cjlj|U&cL3qi83-oS$+fcYhyShM15c;E8+3 zTtkfTGGY&h^eDSRWpDW2=gR7AV@dzdKUaT|X=nG>%w6&Y+~YQO#0U$nJ%WqAj`t|n z8Y_39U9OdKUgmcNG>Of%*koS4%xgCD>e#fRVH@u?F8RUQptBO$xY&`_n9$cWCdIGP z_Y1@z_+*>DNS$_kRHBLFAIZmL`0fLa=A-|Z+6--;S!& zcviPAg)nAZ164Lt` zSIRc&Z7lITF&_8r?Cr#Ss`0oV%ky{~XGC#a2#(|DWNdU!ig{j!eK(IegO6>#6L<}B znZ(ni(Ss%{7++-7fh|;SIJgF`D(C35u650~3?Xh+13*xKsu`n4u{&zr6ZP;&_ z&{-DeE}*H)=8*|`P4er_!XO`vpnE0RL=Nw zF1;c99-b-It(o!X0PjrTy_E4cGXB7(m1&B*x`gpR$M_X*vx@P)5gq>o#;^0zjo?%5 z#>Ssvq$et7OtER>z{3REy@z(Q!Gqe(#$PhQOsK0a7#pgD*JcvCb02+6?wFCKJn$#JPf5e~fR_jJ6q!!u6Xp z+O+Ftxb1TY8S7q~W;8ADhL5=Y%hXmv&8*+Vw{<2u#;hoO{!!iVt=mBxwbRC$o{gqq zY>dH$oS#&K9gfc6`v?0Mh2W;}iV?mx{!4$jg7#%o8U}Zw;&Tq7gK*D^AAS=K?(^ii znOEj*aYwD;J9v4azW^GX53iF4Q9bcApP- z_c{FkL!X@=*nPV2EvwIBzGM1WOc!gzcUvjgc<;rIXv2r|4gL$Z;!h)!b*5cq%A;jm zpV?*Zhqi|2ppBZ2#5bk0XisB;PvgTK8@4 z!=|pmo|pXn6Ju{CZo735u?5s$=P-S5^Q?70_e*jwrXTsSGtt;GkG@owee)3KOsSh1 z_+7Makh*Qry4!@yxVjx~V+)C7liuBL?Dqe|=2L;o3jMc7`P5S8-v1@?9>CXhUzF>K zyE@%vb~)FkA2%d`XYr{J`o=v2uk9?M{KBtov(Cn>f+uJXyOj3};c3C28~%@R6*nXF?4<~=Dbzdw# z(}RqwopKuY4>d-7=dF%0o%Y_OCtRmz5z6DQ4pwi^Yvxp&-xkD>`X{J#bnYxS&z zX9IaA-@ov0l^4B?_k{n*f0gmWJL7$*k3nC}*hg-7XTF&n(!7gz*6@4_b1wQwVxKlr z-PeC?yb+cjV<3C&vDxtn+BbW3{b%)Cm><0#OZ^k{sr=0?j~kux>EX)@@8g`+I(V$k zp=mEz^pyeayv7`Bjml;`fUm$$TRVG!RdI~1U9hHv)P`V{EEjw(;0ywz`U}!t`f8&S zSsHGifL_cvzJI~#`WNx#&c;`@nY%~TXQN}d1(R~L9Hajh{>xVO9@%H<%}+sJ-XnjX zn#JCW_B(2p-_`jDxQsoo0KYZM@9cbtXNo6iqWu@4?+5sQX6a3=zb~^6Z)82rWL?g{ zzg+5Po!Wb!+D;>`((}`DZ zv-;*DY&QH<^4}iq&v+TvbMUfYCUvUacAXn|&;NClUC+m-diVeLRBsmklKcN+-8J0g zoZERla0hpoKH)MF>F<{f9lVAQzO>=!WkL4Ws#&WUATT6Dztp!(+v*v-46QBfq&?o=57hn&PZiMDo-wwj|92I7Au-*tcX@`?2g?0Z@;JV+bm*bp`N!Tuon2F4BK^p77yMsy6arP8TyvX(Rk zJ3c8j@J)>Yy}sudZUs(!JE8d#fBd;IczD;&80KmW#9MPuMo9ZUmjYu1n`!@5Kek|0 zA7Achpe z%hEl;rpt#d5H7}MXihM;iiVo{)l3)tG+*rSH#z@0t!WVd(I1+}+5V{Jab{jy?XGD} zDK*nOo+TbQr=P#A_RVQcWBC6d^IM2tYMdjl1K(cOJWrl{16lKoyve4)b^`Jg+UOgl zjn;P~Q+~}}qVOPikB>TCKM$OW*TtUwi)X@%#(aq1x|6j+?T$psTGmPiMd7nhN zWXh)?caS|h`l2VAoUK=^k!9P2v(dTXZ-*}&X`eyUnT~zPbv;|-XlQ6b-g167`~do7 zOr9R%{EhOCrkle884qo7h8N1@(H#z7c!!yH`axnsv56DD=E<9Z?5IXIWW%Eq`!e_FlN}RWe)eJ(9~}Nn zTH-}T*@~|`IDS}m!r$%kA2a6F)K|TikcqBj)*1D_gS-ib9{#0t<-#fCqSIa`@SbdO zlMzlfQrS1M<=}g~Z($EB*ix96z*s^X$l*}W{vPLL%7Cw8lNY<(Zl6ndb6I1Y zAHkPE{~h2VxYXDxK9s=NIpl=AObC?1GhUDKDK~s7hyRKX6;3tRmC({w;L%!eYwZwI zfNy!yQRYazu$;Sz7Gi%0rbJ-sn9Xxw$pXh6Ysjk%3 zcB4743O+yQ6UMR89F%yd9v?RIcIYO%oo6>a-LQ%=rBipnm8%*K!9Slz#{UBQAVNQH z;rH3dc>alfuyNB94bQ$f#sB=vx&E!SBObzEKs|TJedUS<@Un{-*t(PGDjgg8_=_1+ z@I_-uGdjEOM*P!z@K9*bvIXe-qy3VTf1OtDw0T+ub60U!fo0b&EQJmkgT|H*4EXRY ze~1hJS5SC{_7<|9O8XmIa3mxhg{G?=X(59>&I)kVF=1BrSMg=Eq7N-V2a!yW-ZFtP z33sK){cOfm#h4~z!=N{Xny>OUkTam}Q{ZoQxOQm_oq``)68t~y0sqT_zxEv)et4qb zuN-$ncEI7TD=U~1VlL+#WPXS>vSQaezj__Lk1m$gAVnY8B|H9r@eygztmDDDr(^KE1jkERCk(QnsKZcA=_87Dm(Bk;Kp8BfBfS%fO4D!KO<8PmjzujT! z+TCNY^i;;MQDZ<)mE9&ibrLW}sI#9s(xt`6e%cLV?j^hz42ANUqoYVqeJaz|QwLJ6 ztrp)DdTJ!%^ap>=9oOUGpVCvA&-3Z2qtR3Gjf9(>w{*tz)MjMSI(T19Pd$ZhaP8s7ol+GZuBp#e)PR$KMbJ+V+ZbA2m)j`+VYQ2x-Pf;A--`dfe zDCgZ7$&Jaf)}7e;@R2|BZU%K<1eW8_o$T61Gze}bomyRRw>;L?$zBe8E2Kzxh?J)}EsKz&Qc{3+!XTl|j0v$gX8It3s34W{zE zne+a^3D^s@ww^qZXR#P@#Xw!of3M15D>niAGCnyxi|Lv{aB~7ZQ#8(+GyicNBGUG=VtL^Jk%tUPewU~WI*U%!%25QE4CqF39HaAE2 z-2Cjh#nzejed?h*5&t7nBfa4l)?0@z_%JlOa;J5}H`|;nb z0LNZ%`~Ypr2Edp8ulN|h8ge{d$~csof;6GwS$`%iNkYcayG`=WBhT$ zJHvk5Fan)s1Uin&?}Oho;TIJgJCVh#pDNv@*or*&4m2iz3T`U~v%l8mpM1JuA-qZR zB)_E2*Om^xwsQvWZCTyWFq8L^)pf|~=ATdT2Y!+3e;NMqGCD|j1NHga+8np3z5)F?Z>r=VFr}l{3g$Ay9Wu!|ti13lm$B(54MBL3-kY1A zX?Sqt)ZCLJIm^VoE*-#{lkAx~A;~jUdUlw7mp*RKDR|@V0`$S(!?U~N%85Vuug~hM z&oSq1*wVG|73JabvIeAAmc!>}Fdx|H=%0xJ`F3|j_0JOY&k4M%HWNbRbPM43oD;aL zDEkuj7B`|>RiQKL&W@&KrQdC)?RYr0Md1jh@5sxNkG~OJj_=-RaXWXmoLTx+*7q%} z^Es^d*{u6n@Yk=v5BgemSsVK8H?@9fhuEPhZP<77`QKZ6O~g!-YaunZAEtbqvHdXl zfIns|KVocef&=NmI;Sq(TE2nU*ka=<0Y6r(0rInxdY6EYLwswWtwZgCFYU95kNas; z@3})Lq%y*n_;@G$IS7qJs{8vnqvH$b8q9S{sExKy;Rlcobc!;Ze++qeR#@;%idD2;_ zl)w`S#@bhEzuGw^lQjxV>)?4hVTBnF8YFU9?OSb!L#PtlvmWB{NKV!Hom4?X{(L3 z_x3(yC+oNVGjs^ntLsGVnr zovu#-5Am@TilxZnYzy>&5wa57{ zztRZCqqbU!^$t53Ppui8|z^U{>~t6T5Ut)VR&d8|0SyzgWq=eg#N#jf{zjXSLI&hH|Lu+3-7<8oij^s zh6Znf7QYNl-Uw~ZM6S)i-)Qf1{h2c7KSN;0lV$Yr__GWnp6NxpnkrG{pbHJKr+=4`a7IVBtF{a2L;% zmy{T$)Y$(2iMd7Ky%4;2=Ky_(au4?<&bY_{zi{#n-%VLx=BOWYMQrC%&gnWGzEQ;H zR6C7O4SBVp1Hb&M%7;e`FlWkZ_Q)?YntlFHk@fk+>S(Q0RNauBj8C)zpM!iU3&)E` z%IBbcsHD(-@HM%*{KaI(<8`LZkS==={N!lN&f&YngIqDZ4K}u{a17s-bEa^ObhTn| zcaXUhzi>03Nms6DI3^r^_Q>AiLE!MnBYQI$n`}VgP`P3|2aTvZHYlks)~EK;?Hr;h zfoCauzlFQufx-Mf$~&FWjltE^m-IXETFHCcmozG*aZA1{pLRrXuK05pgXLFy$=&(# zpQ8Nqv%lN?l=b0v4M?ifzUk}m(;#rl*H#8!J%FC^EMpa4)w61NYZ1?i;jIbq)*0~D zN{hF;>nib?-P6Te6VJ(8(U&dy%wcaf0s720N7#IlecJW~tZQ&TmKf^e{MUMY3pn?} zZ_g|(h7N9k9MHQr|Wy>!jdd{30`l-7eHf@&6A)-@#*6h?WKguS+mWo6U74_WGv0h z>%XJV=S2PL*~!xL<%?h0fRA}|cN@rlcztZ|Og2Flx34+55 zE~G24PG;G#bhon?9QolNb~}ozuyy4(d9HCuKZ(!Pr5LsI@yLjC(r)GK5#7rdg6;pu zE`LC6T4%BR0Yi<=$|YcMhV*vuEZ>3hDUaooao}{li=3rybLuORU5;?%4}9Ks;7^$P zN}FWphVj1k%oJh@9KP?jadxsu{w{P>)lr{jeJ@)mJ^ZA8tiNh)LICN(!2H4Z*bK7BSCm(*~^xn@>LznVSS z#>a8TkG0Hi4Qn51AF^-%SYysU_K}v8|LH_^N}lkU zWTezhxxm;GqHK^lL1IW0A7}Z2G9AA6z@co@8XLb|HjMfS8iwVAj4^oBLXk+3}539W3dDHytlR#8hI^N zT)<8%E+F{xQsM^`7eJf|1#l{EP4g1$Z_kTmcY^!V^wT~O z9)d3ROV+#S`aj?gT0aZLe^`fa?c3U@sCvt$HX?<@3u$ZZHvB>U;XTREXv7!-b7zkec8x4p71mL{{%S7(>&py zSnsH>chLP)(RcqYY^_n)Tyd~9TX17zS?^enyo-nH;rGlW!S&rJT*XnizQcdn2ZHPV zC|uS%;9A80_%^C&L+8ii&yGv)k$iOzWZy>Xj!yYie)vRrY#mhIhoy zlf=%GOaJ={{q}tR_R`a-83T+z8r!lg-(wcPUE8yK`JCmjelHvEyKqSHDV^i^BHiQ! zI!EkG(A6K+&Uy;H_vu1oH}}ncbQ*IBl)nVg}! z8Q0;2pLrWfx$BS|QcphUNT^dRMkZ^;Jy1EPjKkX7cZI9>>OAp0a!eiIqp>Si?|u06 zm&wypnd8Y0UFFFx!(SU5-a5TKbV=<61~2wxFT4!f zp0XjzqVrW9p(XtVTJCthHHymU<9q_^dx!bd1eKj(!kLmk4 zb5+_Uk^Z>@CP%V+d)Ejo*bA0FxPxTTERSOel*t_S8V?3%(}Fy{zfkMX@opejX8-|+Vs$~Of~Q}u?OrL zEBZSL9S@CvuNV8FMfC z!cSn)Rf)MGEgx7+<~P1i541FsaT9& z!{s{eqZ2N5AA7~cBDG3P3OIc>{d>>gO~%Dq?HFz;iYMRP5zk$>|2g!x3;J=}+(yCB5_@}qcv6CUsj z@-Ho9%mYVrcL8!jG}OWP73-8S4jOhFyBD4**gF6}b2YqBdwx}C3fA1ic%+a1i+Q;> z`cCKA?nOU_&O)q{Ig8+#mq9z<3Rk~)@36$WjnF&y9{D1sD|8T=d*M0{ciRjMn5#Um ztRF~hk;74S82Hj1+`+{6m5$|U`3__86+?cQHgL%5hEIk*-XOor3((l2L5X#n;A?Gt z;R%$t>ls!5qTUsbAEy1sr~Pwvciqs>%J-&FXPw)VeSp5cPG3KxuQH>r)knV3r|JXh z6dK=8D`Ssi*^p&v_tMu}Lu(qEXg}Q;TBUQJ`Noi{cD@Z`Sz3A@?))Skx=rWcY#tlN z7aJ}!2Zidm|7<(s)|%PIxK|GyRCk%n@a<#FA>y&u0sANLpbltOer@4Uw2gmzH@V3U zD-P{m&cl$OeUIkF4gM#9|Bsnt+549V^-2kC@=y z`NSg{z+DF{;`b}9wK#lNDSee)WE`GBEO{y8CAS@wzfc-o-P z`sZFkJ=quj0d`!ncv&UBQpv^mb==#U*IszJW7~AYV|=TeoEUS!@r~rmzvsU^{>on( z{!4&Syew2NKIN`D$hYKknfNAq6`G^U9K(MB^(yh{R*oZ=@E7pgdH9&(Va%m&6?Lm< zX9oQ>M)iSw)~{f$jPhyuKdd#%*cM{9RKg!LKW)H~0KK?rCj^~TfDh5p1m;NdB>YxW zt_fK&0hu&#tS7rM%9q81rF*!LUFnRe09ujXNb=%iY|(qk-}yW?=ko)NaASe7$-p=L z{Nw^#euE3>tLhWJH!>Hi&?iz=ezB=Ho*(xuYOL;!caXMLV{cMlQTWkJQo=*jJ zR-UJycGjm?bi!wa%JVUX7(-)vaeOXP1D&PU=dN=+AJF^rGfP|Wv2*UkFm}1ofAi6Q ziFcTz_!p8B{;PC*hOf>rhRm$2oLc1`P*;w8)E?rQrQ5ALrm`-q&Q7@GJo*-6av8R) zbX{1`Rko)dJK>7n_1MyW?5+RlUi;MdZXt0G3O+U%-toQ?Qh>mSTNpM*Dq) zJ*l4kt{^9ni+x;c41Me|#N(qk8fu6C8Jr^(&oMZ68$XVX@SXwe>$+b!Kku1mt$AS0 zGW;39>IMckcQ<8ceSxmWotp8m_8dcE$|=@Jm~Gbf^16;zXaIG*tIf_>-v;`Ic;Y2pJaIsP z^WLF#$hHNN#XL{ve=xN%Eq*?-vOL*t)&V+A)JC&yvMPA;%nP+XY@02{PfO8rL--*@ z+bcOIQ^L3CshE#)JF?d@DNA(ECp-E)n?4e+ux$lfKTkYMKW~?9W0kkt%A#Hg_pj+} zmm3^u4yEH{NBh$I8G@U4l39|y)!;~LQ#MMqWEc1-rF>2lhGN>#ew&ML-8rT<6)(S# zvCqVhULCbXrK=0ZY-C`5R0e7+3;D2Sly^K_Lf;e4GoE$KuV9V!UqYQP!a)J>x-EFI zKUAKNYoIYyb0t|~Kx5Xo+y3rNwiG}4y**hazdYi-aOaWYC)ca#fmL>>3p;gdmTEVJWApW%7)esX)y^7tFEl``P#HOv1tZRPT> zbq1DSKpLdrY1vPtTpPMWYxL~u zUfOJpJG;7#v#W0bckFouKC5k(z>*Ffk7aGHKz}S_zrAJ{dJE-da;B^d-nbn) zke;h{#pAWlk>xhR@%1yHffapu7Oj&(dzZ3iZlv60e7{9M`)EsfKLWR1)_K_^kNp-=xFul@DJrCSL1*a;N}Z z<45qLn)T$}!4CJr*A&b7vCa(>SLnshuei*L(~&9C7n;U-!ne}5p0$!gq=^{9IsAW* zd6loLXPkj@*qrN50_AHIFOS_1l%@T9_U_XkHvRZq!r4t8Kl!`uTt26Z)^vR2_@)0R zKRKeD)xnRPzYM;y>PYJw-0qI7eJz=D_rOuUocs0qvIp*PJzYUgk9YUmv0M0s=V%XW z=bzYHLcVq^Cx=T7JY|3pKDnMVIZbw~Ry+Kx930S&Z#$pgP`-S>;Uo74e#q3oR{fuE z`ffqS>wKhQy7qHMP`O??H#Fyg|7+%rVB40&yIYjQF_u#nf032TGjjbB+!TmcGhfwq3)pr{GjWd5e=;2IfTBsb`G^^H= z=QNF3#7%CkxcH`Q=D2DDOWD{j{9M8?wWU~9U)?4j4cv1(R6I&l;B zc;g`Bu=WnzyY1)P8B>~YxNK?_`GiW%#8Up}RKj-~hZ%T0t2}Rjo;+Q%zDGto&<0D9h)njSuqqX z!e=4*s7)iMf;Ls=IjgMJ#OHm#v79`9=qVTc_NM2YV12~y#?yXu>O-EbFC)hkFW1cc z8bg(H)xK{p2FL%Q&l>hXMiBF+JskPoQ;ipfvEN|nJ-z8#u4>Pde)vZM>&A6a!qcfE zJzHNQH)k{B5k7)x1>c=OJ25?VKQ!saHdc)I$>H_bPpO~N6U8Ubb=HdYvYEA>m%al3 zD{Dvj{WAKHPrr|`yTqkEI?i(ti^tyotUb4%>AH(Tdwjviuj}l%%Z_<4n3te*C}`Sb z&Q$*Qlovl~>OajNq?|=7)_J9Ri}nkkeU!Azoi#P#1oa@?r653e{4T(pz zz$3=tcWi-2i0+RQ)7zFw%!!d)_h;hcB&(F`Z!Es_m)WCj;uCB4Iod6E8Iun}BeA#z z@sj4k?sL_r_S(GUcr*rB?Z)aq#yBiqa*mvO$^j`@D%1x0n*0=P|D$|YrY{qGzVUrx z>-QliluMU5iMMIb+QZlX{`h+NuN(uhID#0x9;Tk=U+>!Z&E0@gGHCBy=h%$%ynBZ} z{{LD>yMU#Qd$N13qbaN-+4Y}WGZ)xv^BvY_{JKGZJ8RusgD)$#Ze(|Bt^LTlC-NL? z2wLcw^F3Z>9$@aQI2mH$-q~Z@DisNyt;P^nUF%)ldCGRO2F7wGPrQEvvB%|%@7vI* z%3>=_A|Lr)*?Zz^-{IXB=nH?9@0~(Bu0B>4{$`cs{q815O5OdGRhf0@6e@FqxvrqD zXrA?Bjr}?1q-Yt3g-)M8l1zj-4>czvk?7p+x{Q4KS`NOgAUw-~Jm!I9{$t}IZ8Pn6i zwH1?bcKnKc7SzHwx})$4uC`jT0J-ZhTnS#`@|NgaTzdidw&ROD#IkS>hI>$XPufT#g>2Hgxy$jh)p`PQXWbIdbj=VowjVm)C|~bMzw5)N7ZNKXl!a z`yc98V+_4}xiPfS7{r4>`X_%~NL(-MS>-s%Yt z&u|y(>y%4$d$!&~oB2-TzN(?C(v){3#C~&?>DpC|-MSOM)1)^&#P%oDeaB3gVe1y$ z`3tYu=P;_u(a#*@gxC4;bnfUW;6Jg$mfrPIlO<~>1zFRn*f}G{dh-4o`6xb!eApAq zGkjw1bR3j@IMg~l#zOLAr znyC6bLnpKEqSkZOiM0FBLO^uEHc)jXKi^GytazR%qv3YaOG;=(MgkRUtcU&V3HND^&#Wr zj_9@9fGu9$#QbW_Vpa z^B*5kIcyCppP<*leM!EuR}xAepOy3G^Y>ZyifoO`R?S-Z{B!rkY!(;!H953K2GyOznQC@4mG52l5$CWhX7M7^rc+;M#tHkGwVEQ^due%H*?``>E)^eSy=rpZMMbJ51mE znWis)4mtTbYu=uCvp>i?(RBDv#5>kPV*`xb(~CUeSMf`>!IRqSiu`N7Wvtab5E(;^ z&AO+1HlMay)|f%=ltIRv|6u;Or}Mu3_>fLBM%%9DJ_pLZD*K7JlI> zOHUkm;ZNto&)3d@pIYz(k60`m9R-dqJY~f2RAk}li*VC9oA_(+qA?r*_ll>#2>*b_ zp#6f_7`_}GgVtI+K3Mm2tp1mx_0f?J2Z49^lJwaz zoEG5c8b=#J<{?6ixN`GZ=Mb1b#kT*^ja=X?Mh$&ix(hTV-9_^UU!Bqxt=IGYaK@s$ zFk}6An3D+j{7y$?vixoL_juRIyXry4;__f*&g@`h(rC&M*kjFeY>q`cMa*#q_j07W z$We~`N*_SH1-Np-@5Q%*hHFYSb0hj##{3Eg(8iwGtGe-V>Ry}1j6;zv@36+TCY#Ae zk=-Nzgxb}a(8!K{=Z&xa9t5b;j6XI*5t2`MdO9$vBPYw{Nqb18%)t@4m?$%;9xj_`Ax-^ve@-cUbWSiaqXW$0(LJ*6-Lp)W5bU?EdlfTX$lQL-O_+lO&2Cq$2o*yHy zW?Y(u7sZMyzAtu;CUYojf_e5ptMYSeeH0*1n!!&H-En2Z^;>iB#jm)&D4h7nwR@{B zDlRRJjxhr`2cc`Gn}b3%tQU00J(mODp-AnM`!rVFAw*2xHuJ_)tB6p}NXztTbWXWBZ(U8!~)g)0NU2fpce zITyomeEC_FPp51JV?S4U!DAY$8WbZM`>lON`I|p~UyFS=kbG#!gRZk|IyY&IU|tp& z7O#JFuXsK8&*=OTYtihZJy6>xE1q`~w!7{=VeS1fKKDx#dj9jQ4e6@7!%6cTL|2tx z1zPb1T<|5_=7u~z9i)T zv8A@`_k0)G&-wYb+9#&zE>25cQ^xi=*>`br`SaR$EO~uXR9@@84fZR9r)u$vl)7Lw z=j+)wZKv+r?}GaUrZ1I#PF^k^f*;EH*tC~2$Y)<`ED4Uki8~jS9|@dNPWE{V@>=_@ z25?$s$bYFa;{Dia)*R>z`+j6YhU7pZ`*lg&*o>~FA*L};(Uzn-g2fsKg&4W4iB;9j5*?U_YlvLY;)x6 z=uoUbp~hcjmEHTCDci*jztliO=)B6&s~ zzLU=W_HM#IHL)dEcyg zbQpP!{f%%(=5XIcc=OS_9pvaW{*`gtYtuV!80^b9{KPajawI#BmmNT^c94_&Z}5Zq z{)OZPObA5!4LR&2H)w3|QHSe#};dzlWO3%G*h-wocNHMe4#+Bv{;?ONh{tLb9|g~{qs2d6BaF$yh2&GpE$j)?XBP7)ACF)jYr!lV z+V=7M=?!zO{5x6WWBwiaXOaV_7*7`Ck$)$PHYOn>8`0xa=6d|zsLyCUu|Y9gAlBYC z+VjE(PcnBt=H;Yh%-x0Ij;ccJF4^d#X&>BZP4Y~4EC;I`VGH-O%_T122>#sV&x+1P z@F;SA18Yt+!u;(AKL-|Io9{4ux88iF{x)(43I{tGb9&z~_f2E|MPJ?HP7OKGRlCP+ z)7X!IGZXlN;4{K}>n;~7#|`baI~~5ley8h?*B1JFu1C#-wO*t3dcGHL;BG8pD_pzG zGuQ52&mHt$&X=J>>=91dZjfvv9}>EJ6*#yO7~~)8#=ni9hA#Z4TKL}#zY+dNmc{U& zYWeKC@o$f-oN>vnQJdZ_n@sba8u&hKYHoyc*<|g^%YNoS_!I8Rp%2+C;!(oqh0u_d zKMx(_19XhcM|3Z2a@X2ZZZG8;YNMTWQZtMGrqGA-#l`cDd~B2eP3uV4&W(4}QpM)A3hm}Z?d5ZvkTfeh%t&H55AlReo@4`%4V zN0B*2l>I%pIxmlOX6`;q2Y~;rN46h@Z=Qf}o{sX(lh}c$;GNFOl#lG+SQ(UIkuO|_ckYEr)f^`Z#vIE zCqepBKFJ{PS$q|K5WL9|&VuI~@ceDuC6&yYYk{9j?zK@)duuVC@atVRO=>=aH^B>7 zJ2o#+emwGeG7nmZ@S10Xti>9}(m^@$c-LjKACmtYW3l|%%IV)Rw=nF$ryyTMCSyy2 zR$1RZJy*_0m1!m)y6PVU_c@hWQ?0fsqqgJguaCis|4P0LA8=NaKd6o8XTv!PIEM$e zMB!8$@@uF)$EAvUcUZjD8h5mfG|p9J;`l4IUM{)^X$Gvnmh$>^}Em z;}r`&+EIH!txxVH3MS%yO9?~v+}aCja<=5F?1psQ9|F66ZkcVksvmZ=dvGk7D+ksEPE~&dBHtm zn%6ex52hWb?OWhIw!ee>X_(_o=0~~nV(|p(vykx$j^)g;?*9>7mEi06HP8yVPQ;U3 zH8r!=U#}SZD88a?sQy*dKYk4~MqSAq3ttzE!A8R$Y}QP#B-Y1W5S7s{D(~O*e%E#2 zjP^S=IQ`1QD_MQj9cSv{W0SM^9W2DQpuLzaFbWCOKQ;V3c@f9K%?*uqwxM)W-CJRkiL8dlzY z=>0fz_!PEOGBhOmh(M#kBGw9X-Ui*b*P6bslGCZJsPJ?8v)Zuab+lfM_#?gm@kq-L z3@pcy(IzxEoHcs?`U`vyjG)ypTF&E#& z%lIZv;+wb?--OnsbZ7ZKUiwPZ2f@5sV>DBvK8O*P4?=#9`0|6ke5QUL_KRea{5`$( ze@V3e*3wA02l8O_Ha>et-%QBfm@%!Pd*$(Kal5g>|_gTyHqIT;s^f8V39dfY7#=d`Wfxj-% zSnGjLSaKN}kgeVJd-Bslqm#*9-a)@5_&@ZWO}`~)7UiBmXOJv(Mfv83^rJa6!Nmu) z1%Ab8$|v|fbync#7i`9v{^Zpm4puTTDjQDjwAaC0`uQ2_M*hjpG6!~C&pV|#%iuI~ z+*!oo>3r=s9wAuwl;W8S0Jl0$I8X+&LFolKJoy09w$7UFiJl18D?Ta z(v1`9RuQLx-@B>}zHR%+jVsIfeU^`$yl?o(Uy1t2tK=hBUGeRtf%wSzG_vp5gunUQ z_zBkIODJC5?MqNhaLkvWGZ&w?`3?VZ^qnl=Cm-hj+UEZf_=?&573g~Cf?Hh!nUiF9 zojaQMDB)7&*7i1J;Uk;RH+q-vqEXoR3!*kX^V5E}C+~^hVABIr1bEt5+uIbs%baG(~pQ2kzkImA)Wn2#S4_FuN!?AHQP3z9<4>BFHmF@gw-_N@fpY55y z;0tX&zg%5cHTBMi(JJo*WTJGd!Zd#R=JC@H?@a(3Z9_-Bi$h= z{^Gci{XX(2{)qYgI_16FN|v}zFUp>T*Xv{eECeuH$h*g#czoNRlXYscC_Lh8_SHtW0%q3ngUz^}duFDFLUj+elf zFqqu=)cKe^`1s}Qy;bn2J-4xZx56j1x7y0ys!ijp1L{a_o%x>RLU-F6)HZUvJ+XlM zTHx)-4b9Cla25m~$l*z@zQp;!FQx0qR^Bq-INT=vkJu3R;eAs%&oaeWc30xldxR9>>r3sTD(R1h$U9*8eK&?<~FojkPBZ@kgo$TYEea_S{p< zA2i&?dM9oxuSDPI$sKp&%bi_FOaw6$~itvQ3(Ch~6=;O}bVy>#=N zpndF7D^`6UXXaAx#LiiOeL`L2c6jX&+Q2S3G56*!AJc32xVwEEw$EyOtGRYDc<Y_1VL&b>V+$+!IJ4e<4wjPQe_jLo-->DlDECHB4f2K2_g#@6GXJiJ%-ODa0|G{*4h`f7j5JdeMuKlPC# z>Et_=o~<#q-!Gfjwk1FNgSl3=cyX6aApgaPz=h0J@y``EHKMMVHe|cXZY{-l{kM`8M`>}J*C#awP*j?q`Svhew{*<1D_x4^=vHwhWA-7Dl6Uh!$sT+HJjWI;fS>!J@oZ7GTGp}Ch#8Kal zVoMe<7lK88DdlRk;K7HuG78U4-&r=nQ<9?*eAS!e4g|z-AwNvjgGnU)ke;Y zxD!?s)y(oc+zI^7aT&<}8?ZC`)J#7PJ#Imd41$A{dp-UZ=&{-IcaE~=dEI>39NPaK zWy#U+F~;{8OY>P{85IaH7U_oUan&``#__qH@GZz|wRd9C&Hh-M%i&Kw+gwDOIy)c3 zna*|yZ_+6XgeN~{JO}oaJ(!m(U4fOcmPrcdi1{W8DiwJ92`{%MT$n7}`XW`md zKRNtA&0HU^_iQ~;-cgb9IUf-o~FmHekOP~G3x9; zRAc{YE)`2vx!#In+vOr=ZJ%^l<@dY+-i<$V>1$5kyR7j8mnZs9z!M_-%q5YRop}`( z$Uek3O+Sin3;x>a!zaTzIR3xPZ`M?>g>QI>>_p{fvi2pkUZBr)QTjab1p98#({Xf< zFy)T&?sOe+F;2-HWZj)ro-Ry5O zH=#1}4@z&Pp9<(u`nAsF1%F*6zsBM|qdceN+sJl$@^nrLeJ`(6^+=P z3^jI@RjwEw{}47e`SE;O8Cu+r4kgfRT{sxWL0I$m+ezvc%$%=Vn>^f_(hCLneCs#&sZaK8v!CZCTgm2SD z`Tq8rK6W|wAM+>tT~A(i?2<{Nv*~jJ`a5&d29D+T*`8(0xt{!3;YF;$BJN@&x&IRFAY1pu|k?%>WU%o$$9L9Z#zqDh^bZ=t?@THq0ExuQtNuJ+Y;wGSD@%HO~ zZ?63-@KhknY+tT?JR<}50IOtZMWT4$$hr#JkxUT}zJPf}VG4*>a^^6sxGUMx?!Lw_ zYrYqc8m9C4llqRCh>gGGU`tURd}HyoZqIkiS;tA{n5!l^jV0e%e*^Lpx_M;jk#hVG zr_hmHzU%gm;;gFlv89~XdKLP67}?R9GWwi$VZ`AGZNSc5&N!r_Uk&UToV!sS*F1ct z$zx`(xNp?3=(<_b!CYru)D3$QuzUNl_r`vMc;QbIJl|Db3;9@2&Ss4>&!YY1>>0$< z{sr{YMjVj?+UHEbrW*D}$_q!~A4mxdA864&x&A$!S>sLr1&l{+sJ-QkDZZUVwZppV zY6n>oYsc)-POq>h0Q)>Ur#8N=CF~1b1#KH&v9a2q$hj>rQ;~RFX;SnLlB)u{5YYnH!UVt@BCe4v))U-8o%T0J8MIF zv*ddx8JHP&?P5 zqQkSKxSuE28jqVfacW;VDPYB%vDPa3-mDz1OWfv&t5{1*4)W~vK3P*{n8vVD}*hYXbBTB3_5@t^V}bar}mS}}cbhilG4Gj$l}5^^eNJ8PgN zm$tO^-!?s?Z_RYU=~|M;`;@EnUU)c{PwV`4=pgLS{@9`Y&?D`bt01;$2mY=0!N_Ok zA;)d}uoNFxsmVSK`>m0PnaASVeJ4EN)!HG}nfJCSS3lF_^i4aM+25yp_@P?2?*ZN) zn`nkT=E`Yb<@bq?jIf7u1PAe>&F25nr;TuibLF)C5!1JU_THzi@=z$3*tfW+scD=s zDe@_2wDB9u2eJQC(|3S&->3dJd9OH)5vfLQa}hqo6eCwL<|AlVYt~EN9`3)*OXmzk zO>pWn8Sp3F_dJ*LD7wRP9Otgf`Zx|3A*-z1;ZYm$+rKrTe#8&ItU9mt=<{a!#LvOL zC9;lv%g+AX%f$HH=!1Ihvi4C8>kiLm`G9}Y?E`Mc2i%EH8}kE857qzA`GO;T{m1e5 zUjv`}^uXtR!CRy8UuooYkUYQrfbCz&Vjs%AIO<<9@9O+Du*Q7Bx}UrqokKQuAenm- zLM491-nzJVhcoA$k-bJZTe~dEH z>V6gVy;*lz%`lYTGYOxDZeb|GH=C)CQNot;h(gPCGh~d{mc$?`n8+TPC(> zZSAz+40ynJ^f|$a9#!liUM_!o-lxE;J65eT(&?V?W5PdsR@ysgRs0!psQnKGdGEiB zuNs_1z?b4Bk-eLxa|ap6|1|#WfcOINrpxnH9xD|k6dA0wjlF(OJILGon9)5k={)J-3{L)2bB z+?6J9L*Lht_@{H_JRNy6{b!mk|{^B?;Ek^Xc4Ao#ZM z5B_2@t%CPW;56MBZN2|G|K%fe_c+rrmNg!!9dusVJcPO<_?&BBDIcHSwL=O{9mn3w zV9sJWHl0&h&*(F*Y35ef-+pW5d1)j*w2}Bw$vST|j?hbe1o#O&+@$TvM6)6i}{b=^KlTjcswdY6}Q}pKlgN8{E_yFVsS@KXh3K7YKAIi z$q1Y#?r1deM#ML4YSVpNqk#8P^sT;yoxz{?^_x0Fzl}4~@Tfx00n0z$XU*w)^wmus z;xwgmc^yWbd!uJ+{$g`+IdrVpoO$?YPr_G^BP&uFe^;DPsu7D5`k@t1(!XYU@CSn| z|IBgX2^34R!;(Mjjbiu4;)Q+zei|L*eCOTSaX7LulLLF1GsO|uaX86m#o?SB?g`s6 z&2Ce+>eIAmm1mzA!Np~!c)owH%$H5uGF2AexaDh{8K1Q3> z#A`XBs}`{HEd|oCV}8 ziuCcz&)b3CG8}zcv{vjiHqVDn1ydRRym{EB<vstt%QjzFlSLn>ub{eX@!5@e}J)Nvw~%a$;u(@3O|So@nP*#-9$X$0(~< zpHyOfT#R3Mh~dCoAfKZA|2ABIr+uv>;lR}c2M*wKMRDMwUHL%`l(`9(*hyz(T?4%y8D4$bKShd+^sRow*J>2*n6(M zP3O{cU*!JB*-3V65Bt8IEzXgh))~$N>=W-mSK46u%0B(H*E5`IBdC4iXuTTQqtLr- zbj9{){*E!OM)IMJ!!M(`5U$ok`w`%ho)a&tV-XRxLJf3&?4!v*~;LvPaI` zwlN37@n*)EOgf z;7w5)Y$g6dG&mfa-SSN%6OZC=fR2_VVLOX=+IvVTfkV;fF8q`(Gs)UF(j4q(zMS|T z55VK&%j-P;2dpvfQJeMslXi}u2Zl8^$mTm2t^f4Y|Bluzx?%2h&fLSA{1do|r=#-S zlGaARy$ zdo!Sud+@Pjn*$OC;A2^JmXBo>ai#LHB<7S@J{EUQEA6%p>h`g0>sK>fFb0jAbI*md zbaT{w+Mi*4=KMeQ-aS65yUzbVX9mbj0+dTG5TKI)Z6~4DULco>W&&*uL93pp(_$M26kGBf9#&v}2|pZDi|f4@KSm3Bc#>2P~y9k<`=l_B5CHAZOC zLLa;b9ZJ5aa)+MrSxzb%eGR?%@~_S`r%qaNc6D*qgoY){c+ZN7ymQSI=$$wzI0+cW znjmYIYxRz^$C(Lz-%9Oagflp`Zvum*C1=!n!y8ZD;v=8I4=s#>CPqVdW2iwD-Hn0n z>>4EHI~{`#m9sD3D>1hp1`217tWR{Ik8C{EN)9P@yK* zYG|x(%EQmL?$!7#tmUd z&Yluk+ru2Z<5S;kjbFex%1yB2<+&Xt_@|L~C8{wE15d)}E_G?~gNx~{#~Kze&xu@* zpUr+=L=2`7yw3RCV^QCQrO?7x1A9iKg|8Ylvs>0e3&Pdlx+|cu%b~T)pt*(Q(7=C? z%d*81v+NlDwvQr1+0UUFobN?0jeHfcHAz^>aM++0o#440z4}-vRI*WN%YvyMa19Pkp)kUlyH^ zi>3ZN>W`xMkaCjL*HU@N33S-W*f2-&2Z!*d8q>;d?3ru~S~e|wff&P*o&|%gjpXWR zkE>qsx8(a>Prl!e$@e=+zTXYla}DU6vWM(iIy)z)`DyG6Ke^OnjM4#oZEJ| zXP>@eu+`7_71VLFzigT|^PPOYY0tm|JG5t%t8V8K%P!#Tw?8A>iRsiL)5@Wz3V*J( zrit?|vwE*S#aPNuF263^pP1$3)Xg$F74uL|o96UA=H$+&@MMzQ8oREg2Ye`>&yPL+ z-~SA~^872%Tmtx$%BRfWZz1$NOZOQqoBPopfPsCs)55jZR(0;oJ0ZQ(c`o(3)Bi!nf~T>VrO z1B%h-Vi7bp$Le*l=yb0OgI*Wo_?@$<4sQczxdleK+iS6aGa9r%edDGyt6#>U!Q?zs zx-$4LCf@xzI8l!Zoi(+aeJi@&%38IV*I9ir{@v<}G2ZHn;lAfp&Ti7Xy6Bytc)adc z{|NQG6|6ty|NFs1y8e+zp;vDosz7|cf@YA({AH;)gF-xr;-;eKBgs5*~*) zCTH5capO-dS`(($+R{InkLY3YEyOa9+)eIMPRB=%k~c>W%m)}h>HETwgm3GXsE>FC z_j&tfxV@|Fak4vVK51+=lF7NxzbU!g@SNtD^s664F8ei){W^vHI+gvJ&wiao&hZ#( z;%%Rj_M_%E!2Z&{^zP$0ahYCxaOMxM=rrL`u_X%!is~LUtIocNUT)`LD^IuzTX1qM^&tm+b}tO_Lc|-}K2c83jZO2HWjOsdxNh@1 zJIDL?|733dEi{w(?qbJ2SAVSh4!`#H&GLWbH^XlSwk~9!pv!Na1O1bW5cP8w(ku5p z*Yy1#9BG25?)}ayN8o+W)J~49DIWV}^xRB~7skoKg)Y7t_~NV<>G9f+2gHluSLB0c z#~CBW7%|4^W{lc7k#=<9XZr5j^j!OwA34(g{a+j@8vE)I=*1c@fqku-_?~6rLv}xi z?eM&#ql}$eH(O7#`_no3fYuq+`|cv%%(`u4-Bystqjl?Jt<&)r_0p-Jmrh%{UOMzn z5D!S#OGi9I&#IS>^*nge+G!8Y799%@p9*1P8w1e-{NcnA!umg%bGM(ShWU2lHd>#B z)G+TLw&1Su0t!VL`d-!ujQ+?fy75JnQ)HhV`TAWxj*!ZeXup&z`@Iz5j7|z_sw~Ysf3M`(=oh)mun; zO)d|$Y57WMd6U&!sK`7^`|$Ms7Cv^9BV_d2JXH6`%;qgwz*o4MK3&kl)=|dhCe|dz z`V_OD7Q-hpo%)op@XB7^<UulgQ&?`#K}?qveb}j`1|MX*6%Sh0j~9&zrV%@%cZY zbMby}UnH0R+iMxKV2=WO7O=bT6zqDh`bxU|pYw;dq~k4>zvnz|Z0F>{G4Ri9@RtJ~ zbHQgGymJcn^O#|}PJF|(ey^@0`+O^O;_Yu$b|ZUwgRf;We#fI%+k4uGu%|a{oS$@i z=y2W6=eO&-6F;^4DJuRuR70EBF0Z5A)$l9R>W?mZ{+Fk0-)=a| zDB1N1`tJi{f;^&-nP=xur_A#QBg|84?VYCuXF2=sUCwdQ2ylMGffHU-f?+lHiEF&D z+C3C3oI%%aykY~eDxZ01P3(N;_S3KuqhP+N>w#|d)gti>)}?O__+VWup79oIGi?O8 zKjXk1Bj55^1-z2jrK`(b1sv)llB&z?VZTX_9bETu==fUb`5Nf@WAKEf_-e*jwvy#n za>mlRK&m(BXTGU^2A9@3W1&Q`CbxGaHr-Nr@5DRA5AS?A*{WFC89s%De)eJpd@KOmLEz719`yEEHsbdkv$>!&Clq0 z-}9pEpbzn$#k{YY_b%f7Rp5-?D+}%3D|#+UY(=_xEB}9oeR3oA$xo4kryToa3Tr>* z=&Y73a1vvD$>JC`x%5uKa0%~sVR=%Y=@mLqcX|7=_bJDee1(wl+8%Hl_D4F29V{Vs za2qnZy6BR~u8rT{w50koPTx`dHpuCD z2l#o1m2a_pS$~l^{h0vgI*@Pj4DuTs8awyXlV~63e#ZX66AQ)uX~I``$f>*6wE*!z z&FkbXm$%RKdukYX4e;5xnQGzY$G}!K$9Psb8`slw zpS}s|AI{q3TYf{)i`{p?)_=aqxIXZEJ@7_Pk8S5?JjK}7Iytz<|1qH2lbAE!Q|OYm zjYoYEI+NgRV!S7gWRwoUX^-`fjP(^@)7rgk;dp3nckztoAlSXJWcga5MN7A)_NvpI z5IIBtJ;3^I9IqVv;rj0)ez#;4J&7&-H`5r>f4Se%f5#i0wX6ksJL-o4U8~35P(0d} z&E%uuliA39u}Kl@e(6lNp0lVLf9zG8@HbRlb$;_%$ma_4tiuh+NpJ3R(y_C4vp4L# z=a`lE95W|mjrBaMyl3lq=tXi?_mg|J<=^?7%16XIa==A4x>gulA~3oN+k?M}{1sMp z1P4F*y!@#_^y&)JTp0{BED1jt%MVO2It}_fn%K#||0L%QFb=$D0NZ7!=%kkUwacG^ zpT7E)$Pw`$aM!Za!%@fW~l$Z(Z2p)au%<8*vl{71wCX= z2sR()o~+Ske~4bcd$^W^oe*H%h4Vw`O^2cDO_Oq(-9F2Ii}wb3?;+lsX}y>8*?4a% zFRbtxul>?&p=}IjF~d)P!J4!8CX&z7S%*J`{G7wsHcS7Wwfup3$D{jMy9ncambL!@ zYyQh~K3x4D*f+0UbzyTC_pnBX+u-*T;oXJIFEk3UE9% z4UfoWUj(s{i?ETO8Izl(vuzHrrkoAfnZP$q&!H@2z~a!jYwBwGy@)*oKeOTW@CauP zS7{Eyr{-DEU_Mfi-SGZC@Ln)({rvE3YHj3ydzpMj>Y|WEz`OvtSIK(W{ad@&hOQR= z6X5MZ#u2XH>3-Fk1gWhs?rwird-g9kb4CsL{S|B4Jtns~w{P9^+kLssE{-pUPuafW zT#J_e$o*R12Hpz~%eHto--Fa<7V+$yM)TQ2;H|#VeC=lV<)NeIqbol4;E@$~Kl5<) z;1$m=X7Atn$NKv-fnlSW`LMnB$8wgp(eVm<=#`6%fj^u67V&^a)=m3IJRoS;JYezY zF_5mk7sY3b*+bRvP2pg%8NfhlShC#gc=!U=K=xwS(d5#%wow{Ay;}pB?z67F+4b6x zjZ%n@N@u1ihL>*tczYP)mmCN{H}KNA?Z7FxGVjiOxElN{12;D@r$gky%BO!A9MyrN z>bv)yi6hPc8p2U%=no@!Q z{%!65srTAFBZu*scfbeFNyi62ZS%oZ)GH`uR9z6^$@buU6zjVGol{|`G3F8e7@Q9M(fD5H6&j*a5k7?{=?Sq9njURG<3D?T$`@;V|Ubf!H0pX-;G-}L|Mm;cxG%ik|Q z^?%adx{*AI|1a&W|BE(g{i**;II-oeQ4m@4{P;Ipd+?{lW|G@9!?>zqs!`FthZGhoXV8GTXje1~EoltO|4bP~e%C)jHQpbt{M--dL3x@$5-uGGOF^-@^ z4#nhc9Yl6{Ddz=oJ_mMEUdIzxo143pn_d}g2^i#fQa6>Xz~6saP82@4?cGSwi9(t&+H#)oV9Y(J(&=K)>T$g)w?E+u1di*FR%)HgUGy z95b`I_DXF33o_c5jWevig_6sM?`~b9vq$FyBjqpWH7uz!$5!*bxSAMJCUP9z!FsNK zQgC(sqsZ$5&IH|Vbk<*Ntf^mNl-FN_PEA}rZp5Axo^l?SNXlPy%moTJq~{4HnN zc&^U3f_rpE!GQ`pKjHoT_#!K)B|e^<_b>b&xJVINS21X5yAE@$`Yt8fy_j4g@ zBHvReg02#ym*FXT-ABw8tB23$%w3^2zh%co=9S)P%r*wSYi2eWoxaNq`maE%;4l#J zm5=!6XXsQ``=}2Y1ODuc_IhKKbtZV+@b3#bc#In4LxD&69BTuS@C{&>Uc_(1hUX&<~ByZ~^eQ@s10Dc`5q3<`cEgeNJ~Ki2768^ytH% z%b5hwSs34gOJ_lz&*fdHSEa&7)I)2k|6%+|rfDt?-!Zb>cI|Lo9S4}1ArCv4hFaQ638{GME>d>%jkh*9`v zMq}%b!RF7vryjtk?rTw>aIa5&^ZAj1d~j~t>G>V4ue@RP4nzMdPRx=WCSRzPv%_`4sp|pneQB{4Rg_^_FhGQ4*VJ^Opd55n6L&*3g=ptoc*)gWSR2 zJYW($FR_=;;j{91)iWgzob|A_uis>pzkY@Gy3XmW0#6?VUsr&)%fa7e!~_-+6R`7P z4(uMnMWt}Qo7}dO$$1BW)pw7t+}GeM50tUjD+{f?qJ5su7CMS;(S>|bex%z!Y$kQ~ z%9)fNuk&PQVymwDgi-!^pL$i&x7Mj+tfwzaJ@&*$*(>n{Lp{Vk%X+AX*sASDyL_g3 z)>*N34C@W9C5a0}opVE1h~LpyuL55DF?#Dgo?)GH^;{Kw2mAQjO3#vu&Ru*DcCf6&^&sXOs!t4m`D2<Tz&m_goh@SG z^ARjh0oRX#g*a4;;E}DbJ}0u%M|yV3Xro;B>P)qCHrIUcG?KlQ3lA@GYOvJ1E*pFM zYvAKI4t~PGB|CB#c%}zci(pC8-)lWJS5eCklGlvfDqr?RW99kaY*`UKF{XZaEj?tG zc7ux~H2_Z`Z^NVMqu}7_GP7#!HflA?OmKmHnMqt&{_ZUJht688!4LB>Gq7(azpcPG zvdqXloFIM^Pj@8DhwU*KGE^r@`5e*Y%Fi*A(Hr8rOqv?H88 z&hH0!)_cuxuBqph_xF=K?PnD8@cw?x`8}PF$n^=2?B{dOF!Iqmso}Y6BL&Uj`Ro;R z(-^od0@iS0cYe^=w4@Im6~D!bF7Qa)>pA7%axPJ4?UrPSy^b2xl;KZfxqyXu|x~%fgBv9zc=3%)1dFyzFg?hK1V&b<2T^)*F66#_?X^% z(0XP{$27s71?-l5v0={uc5;3?1Ju&jtSBpcd_hrjKfF@U9pJgR;2q?Q1o(+~i@|^4 zr;l+omvqnF%sqD8Y^a7ozIs=WOV!sc-D*#s)r%9pH-P@mxyR^Y_!T4Uo6j;It-V(V zA8BpXQ(pOZ^2dv>i|D0r``&@-Fm-oQ{9zzBuydPDlPo$J@W&Sh)fEiK@P>5#627&#YL-SqkXp z{MGUNCjMTbA6df}>mRB4V#OnM_|-P?_Xzhdhn|jtqh&)9Izh9bJgpL434!T}%+0O?IOE#U0=|)4Z^hbC<4K#&3$L z7h|{S^K9(4M)qQn?8=L5ducm-k{&;gYn`{DGurDM==k}m!B*lt9a`sWSm$owtYV#I z^92IbM~uF-C5GJg$|d&`C@8xqRqVHt)fg zy0T|-3_Df%`t~{0Z`UVRHPFYL{i0fS?H6R(P~G%~{Qf75{*pcDbfUM-CkHK`C;gU+ z_5IMHH(Hakuw5OSVmPgX0on_P?DPQ!HGV3hr@S5mqY^U2ycy>Sd*W458brT%2+r?u) zZ`yO#e5;^K*;vxq1?yLj0p~1g^RZd?Ie5qe53=Wn@bKc6L~sZX-6PoZdwo-zlUEyS zlHa382R)h&@s5vGnk_nmEJ>|Uh*(i%cZt={_!;uitFf=>*ESH?V031GkeDocMfNK_ zS;{4Y5|3fSCbQZ@`;5+M^k+H#U2=rbfs4%CeHD)Vy4N>kYdw=W+}0wNZ`oRVsk068 ze6WBV;>p;qj~SiRY_16@cdZ#)kQk-jBd{Ezka1`)Wl0ji3Ay*#xF7 zH`r?~Kl#xcgRPT!Ui>5gAF*XpdKvc+^-J9Q+zDiQwlTT630bMy{|LOS0(oli|61-r zXS?cl^ak%eCOq{7bz|MdN9KHi@ouv0lZ_#gi(EWnw$kugz#>Q6v0+Y|er~DN~{Fk1I zA869PZT5bVj8UG=`HNPRedfAbSMO)YDpL0PVD|$`fS*9t~qAUxq$y#E7cz# zRh->f^VFP)9>38UfoFL6V2!W8l;4ZRW0k9@ey-xR--o|T&l9grz-y@~tm^|uedLmn zQ%V?Zh!$L3=P^g#q`TI`bLoB;e?RDV^*Ozpo^PCetNVXD!aU?#aO?lX#~C)&BNA5 z?cV;+qvNEzXQXE(>jIyPbj~yai_L~e=W^sof#J)NeILLsG~PqbKXi9yoXU>>D)u`M zJZ=LIwr_-UWx<2=tP&$P%l+*;{N_JZ^SqCH?cZ|G@LPEEH~#y`;2F7B!n^d$9N_Z8 zOz6^r`El}k+vw%9(>Xsd#_w*gr?JeT4Ii-VqhEld)O$HUvLv++R21I13V($5bQIre z>N$XqIsId@ojyOC57>R6&;G3aG^hfYxt)o5- zzl>t+ZmxuAEv&sKK7(y68)46M%l@|VXQI%@#fKyxP%Q}j;FAuIXo4SXs;(^)Ur@iKV)%l~LrN!Z%+H%@%v}X7wxhEZ znKLeKN3K}>w#e2eD;P_>Z0`zqi1;e|+O73DaBIVoP2gQJ=!RDZTceVRW?q)yZd_1` zA4vWjyT2i|1Fes-Ua>Lud>eCQ*HH`Ve`rfnW>@F z4d2CU+xXvhbXwUedTeCYYEJRe;)$Hef$uxGGSa!;7hG&MvS%Fn)7Y8gjd^(~*L$7o ziYvG@c)o`Q`>OG_PI4GV(iXP2K8x_ID+)Di5NF z>zcc+2S*t#-g|1$d9vUSuaH+-XXy}iW#b(kLU6eIUpmAi@P!K&{e4-Tr8kVP`<~gH zzn?w-Gx*2%CN}41A=8KlWa&Kxm2;Zi`KbTpe&lHk{1}|^E4~+I4~N*pajn&LU${{F zb|d?CPHhzalhqty-^SQ~g1v~j6*1=NXZej7CpqCpX8@eJzjrf!y8ELe{2m|S_wEti z8y)F)cu%_b#z(kcea}wbVnPRJo~a}K$UWOSY`|wKXOgdD(AQf!QlP_+uDb<2rRFSS zu4F?6D@ZK2bM8~m(^QLkRvoF#)!1eY^BLfSt=eDhnboP$!FD*mH z&-4cuOV*b|Bl~^6l?Tx?tG-!p*k7zt86VzOi*h zjT^T_$y@5<+~q#!8RZ={u-CB1I_YCxcN6p@c-n809#3Bh{5OItSajK%+EG=dE_^3b z;9DNF;alLq_c7p;?(_luM<0<4|DDtOR`ZG9hW_FYEOvDl?LocAz4uw}rSD>~@2;B1 z+nRL{p}nHDZ@YG_z;DbKa> zUAXYHJ4%hs4d{s^@^Ck_v`>ir|l zd(UWa_*Le|`feMYC=FE7gTM$JQeTMBUHGc`Up4m&7T@T*YGl5#)imnVTOxXq%{OXa zu%Ger4dQHdP2ef^EBjjQd*1nY?HAa*OXC#+lX!+?jrNfKFZTsXyUXl*_MZJN#=T=` z+~vTgI;E;P_HXT~B^rTGrp~u-?$9^|*w*5cns3j&Tk{29a>FR(!Z`N$c=q~P?D+}U z0u!+XMz!qlPwW5ohH1N&8NQXX=!IF+Z-w%rl$F&sUi|u}ow^r^AVNLq#78~c0gF!bK2^j>u8`t_sA>(P5f z@46oKT#siAU4K81{j=26$I04BH$S-t8RkaORlf z%<*w&j@CI*o<7D+&iLN>wR`4wC-XZ89M?Z%cFG@Ze6nH5dY%zY;=?Id{f1%qA2Ezk z{Eg;s41XE?1^5d-Vtkada2h!GC~jDN18QEIccbxrV=xfOXbJS#zZo~*eIw%x9!2j* zzb=n*-q0fGBuETYy+(rQy_SAYTv+;jcF-7z`)}xP-*Ce&{Pg|!Z}Hvw+L6x#e8(SW z4D`(+4~6xTZZ)0`MSV&NWOuNh ze~sPo*qei`m+%hJ{YQPAxkisSx2M2;{64tuD)#?J;R7FmA6$vd|1dItlvVSZ>Wh(I zM7bHR4+mciXWsd;*82jMey7}V)hN0CoLS&)6FsQ3KCb?xdq>ir8o4*&pIjWU^rwLA z!ROBOW2j%-@be)Re{y|gde?!;#=yZjM&|;2K*j`mHGR2ZiQ+l&?+;r2Y4#vrynSw6 z9VeXiPIz|CFxe9P;1I8MbyCilufs2~^bYY7-tF>|@TehPQ3$S%zmc3<GjZ0?JL`Gp8GEEQ=o;J3e)AN2gknf#z( zfx{2j$Ep0Fhq=ivgf~n?j+F<-2AV_g1A2%v` zLGgUAAHnh~r12x9quHMj`+gsDPIv8pYpp{r-X({ewWe3p|LR)%+;@7`+W$XT>xMmN zu64mB?{2N7hoE~{>mC0~a$d7+nGNaBu$_nfk~<$Bj?MP^EVF$#c})eZyW4ZT4O#Pd zl~z3k{T6ffsUO-3_>^=@)y8i_4(VC{MD*~5*yuOg@#2>j*fkisFM%)LO9$RFSYIa= z(2A|ni+w0x+d0zF(cNOQl+>etz&;28c!bF7bCk~_7@TV zA4?9MSr?#|?AnW3w*=_pmtf8zelzj;lbczbSZKE7qR$_nW9wI4{U!8u`QrTKWb}9R zBl%Y?eSV?zd0TH{Pkr^YKA*wd+~mi|tED>?c-_xu{b%y}F2BR;p)XgiTl^e7U++$p>wVLV z7K_&dUjUkt?%sQEq`d1}$ad-atiJ^_u>q}7%!v^wcgri={ra9pw)f1zcO!otJpa4Q z)$0T5UsgqLD!Zr3ZgLhXxbEJ+4PTIY{-w{69`d0newufg%l~~Ht?&siuf;#GQhjxi z33IzA&^yfKyAy2x>BcRQPOnT@jxH|NNw|FcCf-ws4_ffeVchmU+t*W!tXd2&JAYhO^L_Z7H{o-B)jy|${&dZ| z&D^@`i<-AI!P5@mbFMyQKSPfJ%jZlCu#=kAx+ZjO@p;K1#S>A1Ek>GddD|;!z9Mbua<*V4kJ9Mx7*{SOxx#C?9uUwgv1}`qYM|<$C_ZN)t{yUIu z?i#wWcFj9=J^qgIy!oKY2RxSXms0Ir`Sa-0ey9x^um1x6TYL;!*BLxx`&T(v;bgM4 zfcm5V91|#w{=vrm-nY;#wvw~qwT%|_*w09}=py&mwvDJcYaL$m<;^{fx7_^>e2T;0 zv-gsr_w+EI)9=B@<{ew>?)~2VC;Pc3ejnMNH~d{i&NG@*BXlR6dB?;rMy-ku+ue^1 zKMI*X8k?S;((9;KdD^7^lkK-_-B4=H^8ohVlo<_6$~QDD(b-e4!>`)VKND+fR&$Q? z3e6`@4pV~rs<@6#v$6+1b35zg^}ox1J`{(ree8a0_CM0&Jup3Te_}29b&MB5&$aSw z@Ub=WjOZvXeHOm;25}7CqZowcZzn#Xcwndne;j^t%fCJjIqnaaM9~f7(1tr!7kPW= z51j{Yw|&#n1?1oVIq%0;)2VkT&-?(oSnopn9$mi_-6=AVQL0#cn^9KcUYm%{*wCL* z8g=Fzp}$}W_vGUnyOVtvg5mHwEVmChn)o$OF0W{ zSG%oC?6+*QTq~xY_f6y~IFOH1c~p;qlRM$fTK65k+{GK<#hTkw&fHwyAYL4GcyT`P zvM!z7;3ft?UIj0Hh*}M;Nfof!H2Fd3?h5Gda_I0f=y4&nKjg3a?f7#IW2tuF+ERR8 z@B@8kukQ@rQ900ukE<3KR)Aa1qh3re%&g2~3*fbxbtd$nHPL%Tm$e^2Rn9}K$tlv*dOXEL!IaSU;mW7f5YpdtMlHk{;_#0YT$8Y+$Z03y77F@c-oJ8PS3Ql zuTC=e)I#g;C0`( zjcZzy?z4%f{m58(D`SVgX{>A@e>e1qoy+qe@RMf@FB-b<6?l%;KxZ_C9*1{KgpUy0 ztMU`u%OO6B{VAIko1;87@xqd?LDPmmB}T}*G>2Y`N2sSqUdKO!6N}gEfv++5yIov! zpZmKBY~mpXz1pM;3XU4l1i2A=`0tLX@j4h!^xUVoBfXN!IQ({8>inJB9ztDe#|@4t`|oJB5F3I{4XhF>o6oPc%NJpfm<<@oOHA z0JD7H6?dB}+rR_)5%dAcIlP>Gr#+|71+0rTcW`DvYg+4V{9n&_RYqh@XwT@|y2vj% z0Z)qi^2m`HzGl6^K2+Bd=vZRS*QTc+`<=bNVjK6&W8Oo&-OfvVntNH(c^7c)iP0`^ z&+gcQ4hCFvU_zJlr_@c|qSpGviJJPtWeEJsSy7KAQ zj%?F;g|2LqY#2#a#gPZUeuw#3W8-JatGb7=slcJZdFtubDV8W5T>8-w=rIyB zN-yKQ_`UFK?RCB{Ev3g_IRE1%#}DF9dBDH=37%7o@QnRS9%a%G&-F{^3Uu7Wc*29` z@9vj2?&q8B%7p}s|yc0UA;k~^F>ElyDPi^{6JOEB}3(87dJbei}r6FX@we(YXWa72F zk}~cQu1xs6aI;^00A47%d@tABoXmecHk229di{=^nHQW$`rBW}_oe3(!~%DtPwqya zj61R8cbaE;$#?BNwi}($OG|~5ZCZL5*o0$htE?OkSC2jloLV30(Q#-$X5}wUv+|d6 z;lriCg}*WFvoEDQ`#7}ShMe}=57O(>?VVrotV=H;Gj~M|GCU4GrN2s-5I&R}@g(%^ zy-zg|mi$KUm^_n8*YjEPt~WADzpS;GIJB0NSxbZ8WBhL68eF*Jy7ap3P;%)^-s{rE z?>=DBMajLuG14>34=0x{@I3SHJX7;$>kK5^D*|k{Fhs?HFPei^uLA+Y}?|!;4k%F<(`Nz z^mWWbUt6KCt>o@`(p{tDY&>o{-wYZ_L znEj!o1w4eTY9=zDUL44W$ z=$dc%8{UteG!Q_ZKJb)kLWk!Ex^eRgjj>_mTvOE-oM2wA91TPx<+TyxstWuH#@8cj z3#l`R;uoxi=N73h34NuhrC~`2`pU=6rhM7nvJK@cOlNz$`pH|_-pW~bb(>p`o~hfs zmF@j*^qF6O=S+R36k6;_;eC5#mt??mC%Zg1&yu|l0UNRM&g1Z0)v~+%Y&pC#hdCU_ z{`LMYJ59d6A=^)Wm|SbD4H^p_`vu8EgS)0%2uaObr;ubSjPx{@c{OIfd7(3 zweYt9@iuxHX9cnM(e3MMunkj+Q-C- zNP1q`1m*)Q*z5B?&$~4@D~`zg3N!6@yLA=a8{ z=a1Lu-Jiy$AP3uiukvT7D9*Qpv+XzE$#qw!c=X7SPT`$f)xdDQ!aKii^zUfKEOObkL~ufFYmNJ?SLb{)6MY;XO0gt$2+mvTCv&0!{7g2 z`U`+7KYQYx;A*Z1SK4dxc@?lOsl3{o%bh+}M17snVD%ecO1_TPB3-^tHu-~Fkvm(j zM+Z^>A87i`MqX%h7g^37r+DvFiPKhohIYZoz*4QSE2!c^~k{ zwiKG**b-wNP4LBy@Wg%icsa)}s|lGdU+)}r!K_Ympn-c1a?ZfY3lsxA(`G;QS!xo9 zX}Ie%(%Kw%*2b%I;5(kDy(RwaH$Dgb4=UZ_rhcD6ae+C01N#&UMk%nRhoVsW&rM7})D^ z>KgT%zV{$g-V z2K{#K&^#|g z@Ex3G`Wlv8$X>|gbCAFH@LB(htv~y--QD>&2~O-PXT<~b`4FE^ z_%n-lVS|OX+5QEc4Y&qgkT_ZYJWY=1BSM^x^PGgw>$#R--v;yO@xmE+72lyJ0KQuL zj9A5}^E#%%gS*~L&Wo>DYxN{rE<4(v*PO%tj$UE=fD^-XpBzi~xfq|S@_i-ucBGMe zmDE>B=UNOr(v7-j81wd$kM73OT>WU<1p7YM->SN`T z(YRIYBkk*1>0~DRS1?f6m92YHZ3#C&zJqatGXZD}O*TQToZXe;LwG)*x@6 znmevys_wXz&bSXbdXIF1A9LNcTgtiaj^~9ladLRvuv>Kp zJHfjPS8APdg5ED)pUM@( z;Ij1u`Jd*c!TZ(}yrVl?`Nha63-3=OzrA>0sPAcVki>7kdt|WnJl=giYw&G8&*t-; z@D}kOd~t_+upbrYY0r(6QPVS5Je`me3Co+CSjiji4*zBZ|{`>jRnRa2wmUi~ost%f|i{x}oatu}|=+G(7jUwYD5}ZRPZ~wVh+| zeT{)_tr&!Re}io60o%4#-dY>7*$a>2mBVdod#tZ(ENp9gEZZL)9u+7JVMo_62X~w< zVymiqY~cUnww-O+!prfA{*m>1(y_Ob=ynC@cH%2x_4Zsve4QFW={$y z2kO-S=zQR{$8_F*>a3EH*0Yda0OD==Jo6&w4~qxOUUhk(YxnFS@5{>rW%s&#J>cQ3 z-o2~bKgIQ2o_Ok2n@+l^e^dNlv~m^KZF^YxzV_K!IoRQ@{c|u44er6l7c3P#>+b76 zGPcWGys%ZhF??Tp;p>KXx_kKTZ03(`vw3GXw&Z^Fw!JB~S(*xOy+WYeQ?)U%UzRXZ@17 z)Y~flXM&IE)u&o(XI@bxI$xk?a)INzqJHaH>Ne=NLT!I`b99n1z&>sX@ILVb^%oBE z&f4*YY{mifz2_EvC-PYEZlk>$cyqBEHQ#h|r4Mn7dUh^9K(0Ufkz!yvx4Ntizbf;t z1@1qvU*F1Sn$CXd#?RG#0ky@%L8>$t`0YqO(^MO$(bJ&xTp3c7TkcVXh}`4Md<_(NKo zsRsKuwb#cg@I3&Y-FNl9GW0Gltlj8Oe+wJ!I_G^}8*SGE)ZC@W0p%_XmrJ%B$O9J% zJj_z#f;`lhLX4pQ?R9&{xiRZaIElPf$%Wq>`b9GKHKZ{SMR|4yC zTAnB`2YDiS=*l(O|H|PgAb)}1Zfirf$oJcTeIehk$$Zq?A$6W6c2_!i_j}f1NZuK@ zZA1QeWmgyD)v&jC?-{=01iXEiuNc})eXr#9aDTXs-`#p|ip;LWkE~wCr{|B4d|Q8V zDZQKA-)_@)^_a744eSng9TF4LuR~$haJeUcth)|TuBBUtSQg`g!;$PQ`Fvj>w?+J< zG{^BZXB+cYgD3f#U71@+y`SV<9(`)m!^i7uj&UvRp2TO}#JkTfwtbK3o)gDLhT+}F@J~80 zyqydWSw3$Y#-9Kq^z^QL-cKs$j`@$|6G-O^-{8#MYxj0(zd_rwx7T@aDue9nH?qE%?p$BNVpkWEU zkk{DjduA<)4Y$|G>tvlGq&!1nS0gu!6eQe z$Nw+gQ#zRRPJ`=OmvryK?j)yvWZyrv9s~0IyS8T_vY=e?LQ7UVe&$s9?E0BsB#%S9 zIe~n3^`*D-GrK%@7xPK`{M-1MkB{IF>87u^*Y6Pimd$g$I?TN-gI4WFbb+rt7o1&Z z+WYBJKL3P2*%{vZ*Kxn(sc0_l>ng8e{R1%49CUUjeErR&M0g>S2NiF2~i4T`HyWt86O$V4{}r7_Q@ z4Hs|S_YdK{n``N0A~`2kKVSJevgplTw61P-kvVNe1G>c_^m+YuKEG|{H~BwK@LRo; zcQhG%oZz`C{vYDMd##OU>;H*7;IoaTi+h)q)eo{~XYl#=_UGk%uDc>KP)9#je7qfp zr-w`KLibY-kKaL4lJ)807E_S*#`~ZrdLITV%+A1m@&UnlXv3)T(0=~>qsv1N`#J+z z3p?Gog)2X1Lr-a+XDtE`5W6?YTWK|j;SY@=89seOrPVVe)qj{;kFfchHa&&16t|dS z#VwA0b+A>mqq(i+jFx`*SLk*p?=NZlPloGnc5cK+fK~Nf(&6snxnRhcR6*UEF}3of z32v3gCO(yVt!)yyGx%w+n_lZ0kMB#*u?{tSu7%#HqiosDIT{V{&vszZwFZ2Z(Q(Gk zK7B{WolWmK@!u2jyD&~AYko8D3xu3JclzLIeMK+or(S$Rt<@J(JX+(4HwTEFg*5iG z$l4mdU(NG3;y1a=q1Oaw*ZRm`6kl8J*jBBM9v3A)%;gpF5p1h&Y%A&EF0Xjx&=9Zi z(tIrO7P>)ia&9_aVf2zSn!+n)IBVPB=prq=FD<-pV_%(ec+=a_lIgt9%WG;JS_-hH z7Ei%ml14|CO|Di~pkcmGV&dPh9X>*EfdUzqrbii(>vZ zx%A`b**&T?UM2XIZYNv##3)-2k7Fy>q6b8gdoxWtXRpzz#}f|<@{DVPR)HhYuq(SP z9_7>$4X^v77VaYAGVC>1A3T?FT)8RPHjlAVpE<%a4a9Py=qL&K=1=FV z#X@c(Z-yAA<|O=xb|nwNMawGmh9>lZV!j{7kG}#QP&DuCN%SwdxZ zwsG1t{v>a0OyFyFZpf@r^o^uPu=2drBiOC6V(mI(yi57*5Awd41H0yu&i@i)ZKOB8 z(r4$!r#?%r*W4JqCjg$3-s-oGuGqZ1eb|rUFFeJ z$rAZJ^|=q+NcMz(^x&37_$QTPuq|NxN8$8L@@S2g7a!)#dk5|Y-jP^FuMjJyZmjH^ z6KOBxxdogZ^hsZ&Q*||eq>s&bXiZ~>$=m5+{L8*!T5FbWp43&nP2Jz{zFmWYF1qE9fPWs_Z?v-ymo^a7 z%m$v{{B)ib|9+Cs)5w4LE%H48toyB4xowkO^OeC?-LLr^gGa}1R^5tu_yFTXfiZ?Y z8yHw%<)bMFO}>lQ(VgY*(A+J)jsDW*E5rP5Sfa6XO>)5HGn#)_k?p%^Q*S-^3-q!x zmN7DaQSb9lGdIEY5cjEH+T|CXY9$|L&IF@G zu(+`9IhkDgJ9H%%SE}i50RE=4BKI%CpVCA;xMylvsh$b_)UNq19v8V^GVrpgk&}hY zCq`cojo0s=-jVs|Wo5E0bk0&CHBkZV6L9{V<#)s{syJDK`x-JT%eMN>!`s1IkQ`29 zbmX7RZwKCc{`~Rd-!ngR`~~yJjMrIi@15@(&$Yn`^UoSTasIgRzWG_>{quw4N6pU| zKYIS?@nh!u$7kW=%9wAC|8HYV@z0IX#osnY6>l*7#Wxwg;;U#cd!cVa@vG+e;$N8K ziob7;EgtI|Tl^j~v-mTnQTzzMZ{qj!XOxwC->vtW#(8HK&hTGZIb-yd%V%U<**hb6 z<(?T?S8kgz?#jj)XI*KIn(~>RKbjvY*i`TtBYxp$g2uUL7Y$^TOb0io@%!iG(tE(~ z$Hy7ZX8LWrOSsv_ybq(_+=^bf75Q@y`ocD7Yb*7O4P2X&WjrgtazFiV4}sfb=y!*> zKkEc~>?yZS ziu9eoyCr>PEkeFwU}e{CUvJPUo}c5WfhU4!YZWUc9t} zGY(?R8+(6c;~$}w403rzlfYR3?CMYK=C9lO%ydhiX>{Ou$bl!YdB`W$JpxP#U^)w2 zw>iGzHHl>F8S{ZpcE3f7J;|kG zfJNVP`QPBcv=o@q%_;U4a8>9Wr`z=}O7Bm% zzS8?eCobIQdf*=RJbM3I;WHKP9_S+#?$$K;JbvF`t4lX~o=+~l&B5gL)^cU5g`{6mRkL6&Bk0qTi=`YA@bid9dx`Olr+4sh5|x1>EVLkNxY& z`MY6aAv+!2+1tCZ1|AVh=^uHqvnKBQ{$Ygo#nIK{&VJhckb!PEyyhs~`)nE*-mfy( zdEahJ2HUy7tDphz+AiRH7sFYjGH~1F?5TQSQ$HqGN7j0{@mu1(p59CO9VO(;_Mm@@ zUaHWQEn4CJh2++I^$ydCSx@CXqC?dt%fF^t(gN_cUUJdWmGe6e(*LDTH9=fg&X#DW z_|@dR)SPlRrgo0#0@{+!`B7kxP(Kvo*{WH_n!BKz0zSW0-MMlZ^)B68b)L?t3SC@v zp0VaAd&C`A`hGcMr_TR4d6GS+lJn#*ZFBJVUhZ@GWX!`G-c9|c_+}7YDmAB9b$VWZ z(Ko(+ChvS>6Xz}<6NIl5_uT^DZC19t+w(B*0W8*qOn4cxx~?qh$T zH!`rt|1bpidRL}8K1Su&xiJ6de-Gh0f&b;Nfmu9NxPH5Shm~nyZUg3L;GZMaAG!WX z?3{M>+{*b7XK-@v=9Bm}l1@K@Bz60h=FXVI_VSd_+vIi8GOrCAr7C}Eq zoNY{&z1_p!k-e?@59LY9@27iAbeOp8YS|C;YG}hos9}%rtkv%!$p4XS?gnyKv)@2( z{)Y%<1xCv2=anskZxwx*LNP55cwmbLe z`URF97iKRd*kASQedVOJ-+f=NSIK~^vPQ?=<{)A54mA} z@fvaj>G@fI6R`vA!5!?S)VhXBhc`DMx2sqiH-{z4I>zV^;^a6C)cQFG+&N=hd|dmK zJczklIe%=c&d>n|@<)i*Sbi1!Xl{O|H-Ga_f3#z5sqgAOm3&;hdM#t?n$9oR_dEG6 z+6^QYT6O%((dRGvExr`|vhvB^Sq*PHys#6Rv~n{7?1|6MgzlgAcLx4-p>;+L$brqySg0M^xg(^7|rJz?l}&PK6T{o=L7rU6;Chh zH0V97^U$7y-UEE+T;Hq!?-MN7AP4EA+Y!8PL4PeYS-;*W7tbnU%vwH&$YH7f5HEU{Rm>xgoZBhzAo;4ic5|#5Uat9(a}wqL9_CgHyv5kcb>MfInN^1$e(4J4TlL{^ zbD(kAt`hoFbz^h9=-31&&WoGD~r+(cq|n^^!~`@y@FMa^v<5k6ZK8j{S05wycf-%E z+TBDcKChJ8-49J3Uc0-Au~Tbz4{sbCUb}mQJmOi5^_rbWZ1u;De91bWuEI19A7s7L z<+E;pCXW7Tu=RG{(ZJ`CWTe%btcJ4>kV(;J$f^1S^gTK-GS^{g*>X~-HA((Lm$JZh_9y_c$6O&p>B30n2Ye+sMa8GPUgdb ziT1NSJUi0;0mfX-8STFk4)Nut%fog19H~FJ{A${RQTCv8gHiH@!2e?KXU^E$7n)(b zww>#t9Q%xRgYhv0vTN~!7X4z(k`)(6c2)Dg2%pYg=+yfC*^DJCs79*uPFB_`X1oM_@dFB2XcK#pY?_<s!&N<34gNVj5B$wVoGXIxv$+RzKe$*vU#%C z{(HS?u(kD3WDe^f-EkDW*`gEj6Ws6HkR|GqSiriBG>3F&5L@X# z-=-ToFIIH(r%#<~jW~3JO}SKKgrS>E=w=!ClizVUbmQvS1<1KfVvO!J;TE}#pF4QC z61*lj>sdNtJ!@d&cPPigJtx`TQ)m8u@|K^V>zVX+^U>qYPjA6d?Sp=^C1+G*;Bn$$ z!aucR1A%c1%Y#u5o`JiH*nkV8=7F#IS@LlYjzXuug7;ibzQvdcrPB2dIQChk6GxLyWzS=HezSXyyt@ZJb#Q18#O@s0 zTe3IZy(QSDBd%bKlO@>V-L2|7bcHf?F6=- z_RtdtF1BJp>TB5}KI5qeQ7)|)M(NLjvm7`B>W}b>Y01zcb8cpl-zW{eZdyHYV?O%L zAyd`AD@Y%sy=(hcSBy3;Dj-Los($*_QGAJ)PRuVZ&oCycPt6JJlZ0>a@`)ow_k(a-_4m*QFQ<4AAVLI z4 z)6`i(nZ82yE;MjPjB(oxTW-F}m>TC8d2G5rG12x9rux&}{Gfm0e%%+R z|FQ1T93|)7n0~L*$Ke5Io{7&4%`;41Z@PI_k=Ofa#&vm46>E?VPl@C7%5nHiJ$_d& zuFy;8-r?|>xAPJ0+HCWgdUW-V^4`s?1BKb^Ry-Wx{$)w8L1FPv)M z_YCiCaO@ld{G`P%{T)WaPxrgP&*vQcM5)OUj%rhCa%!Q`C^b36w~)t^t$d|_y?3zH zrPEAkm3sAJ&WEO#=!fBxAA(;lhHqBGKNr#OlwRE%>|Wj4#}n8y?%MC|#a4u#-1Wc1 z*&~uSF6~I(c;^|lYB_!FN2fWpoVQ~mc=l0(J(7+#3doC3Jzwb}XYMYpzvIB;;ycW~ zbNB841Sa_t(!ta~zF#U#V^d&iNP(&Qvws_y-pwBR(*r|$NayCM=Ync4UwDa{*Mm;Y zD|uPHe~Oexpp7JJlVn|bCKGFY6(t?%y6Tez@TL*)8^Yg?3%} z0`in}UX{)eikyYNpkj}GUKVFzH2|Bgz4Y&ct<%7hi=QJsX?mWqueKt4wjz6!J0Vyn z@=VO(v+9GkmA$kTnWUa);v)~eL;L0v4vu2*k*)BNCisZftit@^YwkEvdWoTb4P=wwt3{95mj2fX?MSamTpQ`UBob~u0pIUA zj{S)4D7{wpoAlSzYu1?5<2J%;y<_(By!6T)jO*%^s-f#AZ|eAqM*AthzeVp_gPs__ z+4rn!9uu5d7ArLdRzEb?(qBXLi>W!W?i&sMd*@FZ2VL?0dB#1@Mg0^{&^68i-o zovG{P+UPF4Yg`=&+uhZH)DOj-+g+Zyx%YOHqmrs4#fZ5*;P$(C%p9g8y_>lvpu+)Z zP`Y6c`&2ffWdDl?)bC=U)$gK#x%J+%u)Go+Y*oJt-q(O{CPM8?3>gT$Te5!xu@Z0J z1;4(h>ARr3oEY|RnA|GiQaBZE8{|J{e@Kr|j8S=~qPr@5$S0n*;ygR|V&@l``4{gc zFIPOQ37RhAjIb5hhWND(%jSIyI}c}R_Jz#hhs@8rZr*pm50(a4>$v~=e*B?-DT24k zS1cQI+Z1A2pO|t<%vW>2VdkOx-Uwg50seeFeEK^0^~ceDuWg5aTfLbojLC<$!3T1A zZ}JBCttJ1}qdT|b?^x5s4XJkM*UIv(_n?FFz9jGJAs4>@`ASOKto;EuBz-#{0Z$ ze!XA@zQBJc=O*B-lI^dvCoSEC`rCV1OFK_M{?(}+d$BXue}x?EyEvzWvA#q9h#sEn zEKt};Dp=Nb8P8?;y9uOZV{ z7i6E={?6h3Q{3Juacu8)M`ms#uCUzklfHn=RBmYE-l4peIM?>0Gw(-d)*5v=dm$hE zrM*8TU$EPfX5PtKu2AM$mijLGD#tQ|Hbm(5+Ont}_;U{YFbu7PPR#vH9S| zz2A+LKfQtd#{B4Gyru_PVC^^jObL8-@5bhnUvyv3VC!P$8MXYip^KyN@dDsat*LO= zGF(LsfoV(%DsO{Y)*!hWfr+E84v?Q=^~Cd!L4TeEKb{O<&W0{?pvzq7GLN%>Mz^?e zy6v~;mrW_~wG3?VwS?(Gt9fr(VC$Z$>HBG9two;}Tx+IJ5?uA<+ep{hwqa_&Vkgz> zsBuC+H1OFM;p0D!ebvsjV(Oo&*QsW8+KT1)4V$X%dfA&68cQD{{-9hNjkg#7tZeHF z`5mF>3iNdK0#4Q;FLv1ax#}XG)%w&y!yB&aFEsF>qFi&MGSScv(pTSmRuO2zUqJ>sH?-?Qvo|OK00>gbEkW z_=3pzz~L;e6@r(x9pF%uAD*5u9xZQHrN_B9WLyU2;Tjtqxa`vP-?tRXh zs^_pt4)5-TQ+I8DOlJ(}+zZ?9egp5abo_Q>WgE44$|0}(JMg_=fu&~!m`9NKPt&=9 z(g5-Js&ivc2=*xQLGZ^uRaGh&+ZSH9%OnPH+J_B%)_Z63UO(@JN9(V&KYT7 zT#gNs4#tbQKSmGU0J_;epMwL>-1ZQgNcY@qcW$pk+uZZiyLdLoeU`YFp8b2^6)tp+ zQTT%wmaf+xWUVzP?T-u5r)sW1Cq&ll03SAvCtJOdbVjOhRB)X!cgHO*{&g+TaUuQW zdO}9YC28=n2YWT$yk7mCvu3YhQ*+OiJR64A;;e5E^x>}OHgbEP;1yHd;h(RQE=AqF=h!r0adgb%Bg9&}Q+nrp{>!%BkhE+#yC=m9jf1UPd&xVmto6dGUSf6h zV#EJY@)_Vgu!DR>o^kin7YEsA--y>+{9im8TJK{I#U$7HzMMHU!LyZHF*_JpTL7)Qyt{Vr zEQ@!C2fy-!;_?;Xz~CN}XHwtyZ}iY@JC#v#7WXBnr>weRC`Z2ypV3>b+aSG@x=v=4 z{6E^>JTC6)YW)9vW9ZrLFTo!|R>mM{!MqCWloF|V2V zeC~4Yx#ygF&bjBFJDV{zF_xV+_00WAQ_VHo@;wL~QtsJ&^f!GyGGyf%Vq%`BuG`cj z@BFKAezb}`b}28qOFi#5EiulFrp+Rj8M&O!+Eh1lC^mHid@k~(8(Fp$TCwK3bfz)a zjpqB*p!Xf-`xnjkrPI$YbGl1M%AD^2^BB9)e`?n7{`1WCH<|5kzH0dU4$2E{?BspC z;1Sqm{TE#Xo-o!RbPz4(a{b=$&aKL9v7<#VGR}G>M2T*s{@l=wD#J&vmo@YmYF)V< zxc@}II*kRcucMT|f%1Q$ytOX6#--^n`C3B5)cukH6Z|`(kJA4L4bx2e_@59kb&mqm zbjtsg@}X!;;(ebW*NF-qw~>SRqw}~cf$v_vgU7si6nMtd{@bD88Ku0P^7|;CsZ}rd zV~BpbN2z!0FGjs!xJpMEcMs+NHw0XpzZx2^f2aH*DKGJXs&U7nK9A^nX$!LT_=g_> zuh1fU57^CSY=WuRb&S&P?`iii?OOHR*P%1fru#<|GK1UL{O!=NZKI6}_-8qDL!A1E zxf!kRrciwo!%|dj#IQK)IWrMF4&mQ|k$Um7v=s>q$I0iu1Q;xO5?!eWI%<~K8Dwn% z`|g4AYZGTR$|(OH_76GyhvOjtmaeBp!lEt-4a@z&BJyNC_{gQrcsn)%-@5*Ut`#WX zHs4~%579GZjkI8sybv;meCizswoCY4&VBMT_|D{iI^QBk(~zT@TKr5Tu{gy&=q2dB z?lDRW&&l0CAu~`fv~>DdVt0~##u_B=o2TEOF1Z_3V(Tm$GD_lmR!o_gOq+R|hQq94 zZwS4749ph1q6ZvAKc2xJrL334@5T<(WuZfOBTd{u5dW)pwWKo}3|SvYR%+sCYEs z-4_JId%*icV9;enH|x;#X<30Vh+P_bUX1qIg4$b8d#gtof6JGE_oqSaXVU)t0r2_? zzZkr0gW5X|{oF-+Rh)OJ;=GF+pP1z8TgtiEKwoyvLSwD_fu-*wW6s!SqL+ZDK)iKa zY~bxGWM@2mTuUE<$6+=da=L30b~SOCYSQrf-9I+!&kCyNfRA%S^tTziT-wi|KU41O ze4mRwlX?-l<3&NV^);jX@=?aqm;hYQQ!2Y^fmtlZIlnB3rl9}@|y99PR~67+X8V?L#`{SGk~l_ zPtGYz*0O{ywjob)MIJxP`()oQnu6Na;i?-+tAXt$7oj@OUCjkV>u3QCs(Q!dEYbpPeh-wHlmYYg99+8TUv*Ri+plrmfD3t!7QxnRC7z1HCCuK;gg zKc$x%{j8v$<zwy~*3+s z?ynR$2Ry4KM#NVC>F>xJN1JEtrE|#hdGsgitgL78%!j_#GPZaJ`=@+|T8F+GP)EvV zBJUidl$9}B?MvBhl>Ha$o6zrie(UsG1@5bc%Pa9K$-LJyPoZF#64dT-<_!P4B@1)3 zJGHaYan0h9bga=P2}Xv^dkyxbz)>W0!My(ueI&Ocx6pY4b*W%JN&WiB_CNby_J2kh zPY>?`@g%xpi;PG32p->zOdmF{{1><`2d+@~dDm<&P-g8!=SB7zZTJwdL>To$AQT5@eL* zYVNpXIDWdFM*T6sl-$i4bUx)oetyN|d9l5|5?aT;iWs6~H}YY|@HX@AGTJ;rn%o>`jZgK%i zxfe$0?uL$-8PxWAcvBsPPG$s^&4kB}2hBP6_Ubb8@yDwdeE1$RkpElX7%t~h(F+77 z5)_4`$LOGbwgJ=M=qD8Io)=X1R$vmjVCq>p+LX|9yn{UinPU~Y2;^1u1_QQl(MI6& zQgdDJ2aj&kUslifN^G3(Vk|CvW)1kuM9=u`D6syO@_}+Ao^kzTl(K&eD!YwwZ5*ZS z8$o54vmShZl(N4JD*GPoK1o^8PijZ0^GZ;i&()yiVE12yj|ZQ|G0u5pj{@IKUjn}T0Qh7LzZF^# zdmt3=WPOSDGlJUBr2S*eh46DzpnioM3br@Yc!RgL!b4JCbo#(`B$Aj5@jo0LWp1N_ z`d$x?-y5Z@VwMeDQwyMl*GG|qN8dGgGq8LubX!e%D?aL3%A7fyihQ%;qqKmtsS=lA zm9GmZpH@3mzA$X)yxmmFH=X6Yopt^+I4@1`oz%y_YwQ6@?38sE=`AabJs^n%uz2xn z>~RQ=H-fi+N56jx(QlU7uQkV3-{NPI`@=)g;yM%mn~@t5ZxO7cb#TXgpzly@neS4^ zGs=8co9zY8=QQS1Vl6}U|6qvnODVq(AEU_hn?|9t|1|4;5gBqvh`x5x*WpoMyD>!h zY0z@9Y1gI!TcFL;cOx`qkJZ7=}6o)u4F*~z~m9zlGpd8>!F+hMl*678CHV@_IK z)-W5m1D}zMbsuFy(b-?#G33lJdSQQ2>ZFWP=dBQRWdB|2kmJ^ZrP853uIKkEc{d1d zQ||t1l=iop?OSv99&%OWUMTpRgUa5@JUwK=fKKgxJ3+VQww>`y$9oQ#cJ#LdujpK* ze;f{9;JOg1&ov>&e=a(#HtPiPzsUbdqm1jpFVV-{A^H%$T+hC<$PCW)gv8+;U!t!Y zLiA;)FTb3d35XxF{9w)yeUvQrFA7z5wprGi|K-fT(3Y&vVpELfcMXgocAGl!oBIs? zXF0l$;6QZZMw>EmaOG{p<=skr-YvxGEh1iTA@S1-$Uk$Qakp)VF^ms_OJtmz{716x z%l$N=#$pRG?sL(~F<=(Gd>E}DkL5g?ZpRiFcC6SmuTr*=|J6GN8w8GRqrlN+wkdqR z|2$~^9KM}*0?HGAY2;lxN1fk?fLmyy1elKB_8ai65(jA`K36#t2kGb}x6wDq<-y*Q z#LKJ5LPx1mADPGbns4D(MW$}rt+cLQ;vASqj7=o(>V6jGjf!U9pWl*;^(DDukGYkY z3b`8~je9TL)R~J<(gWTy(_h*saTadi@0>`!6^C&i$)YJYajnEcwpxDu)$FIo;Zxhf zeHpT!UPIifM*9-GD(BcN+M33*S&UiYB6iVk88o&#-dV{#X&K9iuk1X}*;+DHNf$bH zLwC88@DDJ)T>2Mzxr{gwOBa8@t*ldMZ*XNkc*=t=cQ#2p>Epy8TKY=hJLO}4!b8M^ z3Jv0OnUxZvZpIX4eldM=c4bkJ`8QmHxo6-D>^nVHjg|XX z;G;#k_-RwQud0r{-VEgJGU7q?c#Kn@Zqx9$ivKBUl8zSb6oL7lmlXLcSdDnI$Ie!^Pxmm6an%m4fS^I+PN_nf?E40%#o0ucwYrlQkKVf{?z1e?o zjQ6w?>k16>d$>R57V(J#p9@-)+*n>{%*r>h3>u4v#&$qsr9xxGeOoj(j=rm?vqYc! z7~(Z!Iqxw<7d0OZ&r7*>FENtPpcfjf1Fzf-TFJgah8r4ef(EOh!A@vUa*rq>=;Y{4 z{|#Tln+3v`O0&zsxxS8sdM%H;78L5F5c8R@o;lx9{3QNj?8m{CdC=;Prc+Nq4-@`< z`R#swl>FJ`qz}wRZ^?Gy35|GuE6$wD!$!G>NNtYc{NV%VDO<(9kU9>|{`5c->CBI# z=E_#>#&l1NdIx9y$;GyqdLlh-&~9< z)crao`Qi1F+p+ZV@OrT;57iO_Po0mE)uZK_IYu55Ise&jp8s4w-#Dx12CkSJQ#>xd zlT(#fQ-H}uOtuF~{c`I7vc_X~S`P#h~^4vX9mk0CR?4?#OQR+LcQ|gyMn}^B! z%=yaZB&SlpjX2{j&IERor(1INodCDR<(b({^pi<0s7i3+9%ssxahw^@W62VlUtVy! zVHixRawu$4;V(BM2L&Gd9QuEOCzU=8S`M9?CDa{PX%{-v#aD76=UhvO!H}4Kp|M!# zjku4k@LWh*i-p!UL2H|s6Sqlg>koWBTAOOp+NJ{@bQW6YWW8wy? zZ4sQmh|$m9C2$^GXxa^|b-*17Z_G}OzR?)bHmGk5twTt4)z3pGStg3@mcS zM(Um5ES)RgXsZNTmGjo!7~9;)7@Gse*vJ^01IF0K7~2?QvpL4C<{0Ik5*ee_rZvXk zb97BXxyT+c$0+sQdn{q2HBS9r7=4^^&ATb{pgB&BvAnZD+bDVd{W~Q$k^}ub5M*SiDHpw}k#?}%4gAP1J6QK~Ra@T{9nXd?A$scy=p)yj*w@Z;^j9zWR6O0_M&#@dm2cFx?*Sc>V3u8%U1TT@Ek18 zE??QVVp()#l;_CM@3NIWD^hp{EoC(&p>wYEtbm55`^0xJy3M#a*Vlb36yRu*+<44G4Y;@Z%b>rmPyDk^oBge)ztQk~f!^Qw!||;57vEeEgy$vn zZRPHeIT}x0nTH2>-c-A|zb@!Za4NJixbkc8!;SF84e-Z2_+&12Y>^kf(e2ou(}urR z?x_jmT{vaVqih6qGyyK`=>-$BnelM(F^y{wm zm%l)!psTeooA(sY%f=V3V83c0`5L7)S+nJ&J7Ol5*_Fyj^#1gxj67f&+?^P8HNHVL zYFQ)jJ}c)PIp^f&+`0Q7zs+gFS8e488I}u#`w~*2r_=WH&^f}n!!4{iL%}BOz;+E+ z7PXSQ#K6_0<}OuP=d$Hhy2%-qOC9$p`9(xtJv&McmZqS5{nvpvk;|KO|Fw}9kY`*a^bOVcvE8KyRm6q=JRCEn+eZaV<(nGAA8fO zh^!?SkY6{+$eYy78C#*DmEp>|&L#QaS;=(qzHB1=i=CZ<49npCj0x3yXG~~6Jcc=Y z>JoMSCg8Xs_00`WsZpz+;GS>MnOMK|9ERP7iesFo5t}U17MYO|uVfvwM{^(e`OPQ6&wzbGvzxeAjWetIeUxLAtj+_RHH}Kk zDmUT%lyjw@QC7~Q>>20GDOZ%NJ>YXsjFMT-`0ru-<=lZ&#rXRe>oW4A^>SXwfO+zj z`=Fsc$nq*R>3(_Nr$%LXIFD1#m|lv`&MH?YU0Ke!_DDI}jfytjRfWgjzj=bP=pOpG zmp=46b;yfWuEy!*+bG}68KIBB(dsGAY{>)NflMCHvohLFSEpPlyeRnUv5#qfo$`W% zNX`%$uq9mive{?(*r7gEa_R;4`4o9S%E@CWFm9pzcyJ`J?}^fYJz0PL0?&IHcSe+w zwVO4vH&V;0x)Z-PXGyF6PHrN6_u?BUu2Uu&4lIrE9+=Oa9&1^$ssy1 zaDM<=Stj-0P$~wn2Wo_Opx5F*<6Dq-I(;Obp^rnz@?)%l%V|@}%YOqWNdY*C(#zaS znHk~G&l}48O;fnn{fK{26g*q9yYh08d2b!|PZ;MKqN5oO=8}HZ!~R7vj6)w6{v0`z zvkVzDSB<@r+z*u&jV_Y*rOqzo7Hf{^8~T4}wVS()qVR=C&ao)`WMVHxu|IJSbM_H@ z&^6V z)v07>RnblrJe%%(VFCOa=>kTbkCW7#Kj2pklnu^5vo>ByzSbnpl5iJX7xZt$WXCsu z=PhEnmAEK3a@-Xl$0ZMzCC6nvfijpJOZC1g@|FhVPz#jxOm{~!BeG)&a$kd2u#;Ms#4GxG zCVE~wvBy4q6GD5|dL?nmQOJom7jsW+_!RxFxGL5+g>_b7khACtW#*3Mjtkb3M)t}? zUdq{AStD!Mr}_i!6=}daDS2WpxyP&FquT1$Xja z0fu>uOU6>dSjwZPOsoPn8CS22i#9~RczBC}YpWfVXV1}&=u>op89bNux~x^1pZj;ELhx9W zx@!Y{y~gju$gkBGqEA%QhUhYdj9ccg#?<42@8^N0>f?zMX?jNRtmL$jE3S>R{B7il zYa&;i$P6nMA=v&5&V{|N%8(g>u?Sbf3nKI4!0AH*5AX^+4?$BROCKQ6Vxy;!kDop%6V(=3xfoE00%m zgzjX$?gM_|p>pt21zl9Lw|x(D`_|%nR^yVJ-K}3(S%e!OuOyDb(u)?|T*x*R7?D;70PYo&Zn9*xN$WMc^Wr zyVS9(7Zm|>F5}7LcNyz>-6SPz>0g{)Yb})dka(8g?lI;=?%7BqM`kDXOegkCXW~%) z7SUa;vDz6oaqjCLW{fWJ53=7?7C+$^4=~r3ENLPpM+=uZDSbFjoS<%yALA^#S%mcA@((MP><2<#85Q#vpXI)*M5V z$ScNR$*iV;c%VR@>*SuVD;d)PU|2oqpJnl`%)5-Qgzdkrz_d(U-^_ zS>{#Pv&ry?T{6SW7fF7poThOZ9+3+U-dnLv7u=6LfnHc|uN2)Xlswb-F4h|IOz!m7 z?4lPb-hS+zG}HFd!#biuowIrmqf3qp5(Xin;+^r8z&F1Ma9 zrVAD@#z)AJcyg7x&dKi*?iR>4hY$9bIHuZ=O zQ;!gte-Jq1b{lJ#54kV8gpakx$J&)PekgBJCH0RZ*O#+i2`;R0=ds^4+L%~NhmEPo z)NMcSnl9_V=<+fq*=LmX+8C#?rtb!hzMwU|STS&d-2=R`raJ=G^!Pq~9W>YU4s%UU z$4?k)P5+-=q1SX5KFa;*@RE})PnRp4g_bT-R1M|kb}u4ty^-CJ=Fz| z{a5h%b1k;mHP~bY_$Bl4N2$oprF8I$dWv>HT}6)1bz zyU=B?#`hPo!)1SLGWgxdSlOQ#B@RON>GIE66OpAhbwsvGlxXvf>QM%s99SoMIJZq1^jp}z&{XWQ-z=8vH}cKVBdtH)aT@Kea%kLcrR_M0dRa>D&B}q$bR+wxvlVBn71N;G zEFt1&g8gV;gl|7yX~>K~zWsz}UzTsXemIhEt0x$ID|^e9u2cq2MOSj6CyTwL^D90E zjXNFV;8$6HL&ePa3vArE8HPL!N3NcSe8tbTl6^FM6%qsDXw3IUI-RZa9LoIia8Jye z$(6eP;Q4d;_A&TTtJE>&=|Use+D`U@B@gfn?`3)!;X!nA>^QBj8o&QnfNRqa@O!bA zuX!wT<2{uxuV~5pisuCLCis!K1&JN+gf59+8d%T%eGPKfca_pAXFkdpTdsDMJTyWKG_WBTq5z+KT}U(J19+G@$m?wf3kbr)q~soRUcvyib#Zf1qC zr3H;`4P&$FXwp|ic^TiVv&{Q-=DbU}~ysd(to=2D~Xh2}mpEIZO{GdF)D&RRjY}89r^zRH~zk4jt#_?`EWhPKI8eW}9%t%=MY4(x_*zcA-Y&Z8` z1m0nwHNq!jRDHj@WYc9YDAm<-WL-%9O>%bki{akz>f{w|X=ifHZD>nj?(Ez>r{Ht9 zd@syXBD}o4Hle>z?!klp37QyC;eGK{NgnZajOTgu@6Lcdl)~_#J(NwXVVkgnNrWcCcEwJm^`Ka zb$g;dCg<~)eWS59@$@akKpaU>1{Bu#ws_^xPHg)|%679~BC!y)%-2Vi&~EMR+h4ce z-tU#zAIeMv4!5a8ZbF9)gtG~qZS5%fqRSRH?B4QkGmpXh>u=r5-K_N6Gn4ffdHf;% zUkzHb`mcs=Qja~huOw3QtfT(c+QjWg=HX9^A*TBZBgbR%CHN-6v*hYfS(mG)>Gtq| zR&!Us#vG|N&elEbT`;GJ>FSAT7j-82?o(PNCPe0^5+9q-ziEN>OxEfL<=J4{MC+O8 zP}q5~zVXnsS+|7q-eq%Vd(x`NXFWa4%U(pi1KHFIP0r)H@29Fa86RE#6-6g~)7cy6 zKDATG(-Sik?y;mT#;p64824%R+fLnG;5{+JIk3ISp~5u?3ZW} z*kn2Ce4zuWmzLVOA)UQ^2YdPG4y(iPO{XA_xmU3A;5_HRcc6{$PAdQ|Wnl9(+uQF0 zpX4e(!2ak5;OGT>cf{If`Y4M$XzjTV-b2^$avnJlwzbf0rT7cE3)AvZHsSNE!al0P zKB@|^-#ZhP*>5wp{sLv-cqI43*|VD+_5_c_)@uDBl49nCW* zaepwdT5!5YxeKj{yU=bAnuDLpfBKK$JJ6?aAv7w_PfQeFGix6Ez&*ru_ku6(A}uds z4Clg^=*gk*1(@+=-$cm-L}L%g~(9ih7Uas5B9RZ{Sk7`&D{#cYJ6W2G~?(< z-rh@nAN4h!=Yji~jJYQ=(ldkq4ruI9Tb|c%8{hW{bhP7&c_%-D_x!f#zR#d3+m-W9 ze$0P~t++&u^!ROK`)o7ko$Te=YT^_6xI66z;x+cG^86eVd+(XRGTs|}S zB(!O%7vR&HmqJ4D1z2o>qaR<>y_HoT0v+t~we=N#=f66|_XSQ+lTYj3p zE>q3EklC-4BVYeeL(C>^b-T;~n#y#@O1 zi8-E~tIt98g1h-HlzV%uI{1>T{kmaw@FmIKipD*tR$2Eb?KjbWo|IiHdYh6XdkKMU zX~Z+fnfnH@CV#Fwpd4}nf0HeC7=J#s!jKcAwI#eBsx9I5;I=Bwwqy@N;!(se^`~-U zO~2?Y@OFNIK1wWj?bqf=KV{5WE^EDhKV`viKR~3NJHBqR`~U_#Ehap!%dr1uC^>b| zJh4OrbCG=#W0M|Djz^E8+azLJ`Jo@Z59Z&A9ExTfvL;zN@RT>uk!KK-DsTvmP)1`s zK98)QYK05Bk~Uo5$=~O)D_hr7zjY$-p~1+G;_X&BA8}Xgz0Mnh&E&agk8XGM**zOd z*k?wk9`I1Fr@*l3 zc1iH_Ec(Cr5OiFz_H^#Th5{$#@*P%QaO4KhKKjOXC2zjiAKX1h8w0;R)zAc=s%6Ww z<=HxLePkuR#H*YGe)_UtFXC>QF6y(J(em!Y*bP#~9II|u^z)uN&VGN(TfA=<8Gycp zo|97uFST8u+%J0Yv+zFmI}M0mtCLs`%db@k&xZ1AfwS2cnDdgMvL_1+q2{HBKE*FN zn*U39>~&Lb4&+ziF;@`3Hqw@?>uvfzN1V=M{$=d32>!sY(4maE7yQk=qI$zz6&=W~ z49rt2pAq^@iOPHAn-1s6Bk1|5|K)xnVv)Zect24dbT4;!4X#{(tu-H;>t<}Po3O#YMm+3|#3P64wwZ_eDN4#a zr|#jhmwRTIbW6Q3q=`lDY5p}J7)@fhrJ|r zN4oL^cj10^_%wYB9ln76unFDUIayh>8r^#y@k-_4>iiozh!spHCLw+CBL`V;=3S)B zuZrWGCBNs^aL#g2sfbM74elMQuR#y*z~8o;_N31yIYaQSTJgTJ_+$z=v)~e&0)NQ@ z3%0NDJBm62e@To{t}fNsk4i&tll`bhaw#D%7Ub~pVP`tvv(@~T_6x~P3w>5feHq_J zS1GNHjQ!S0O3R7M(3dF3n!ezCJ|AK4d>U)`Y>rP;^!w7&uU2Bxqa!n*hgR?q{ZC>f zESz>Sj!@VJGDhKVL$=FzdO{-7-6V zvQ_6V+=GwvQefbWSi&a@nFsvIKKg8&W`A4u(+;4kNc{IfK1YcKf1l3@``G3W_G}#yR7xMi@eD5Mp=g`-$m#gcf9cfS66WcY0iO97CKvX{5lN$ z{)}-hk5*a=gU3mI8KaEP8e0})(`ZZX1+d2VY>4ss%Z87Sd|&#QG`?j_wX7|}?m?6P z_;Pi>r<{Yt@2RMh2ENIdj^}YF3$Y-><{$)Y6GFiD?GUj2DG0W%uNtuVW-D8dLK{Md zLW5tU@6+g@!f%b}DQ@^}7SHAEg~XDVlJj(fJ)$p#?}4RZ^&cjLdq2jf;)X{x_^Jz@ zmHPwbEQ9O*(+zTGVAqApB5At>dP<%%;eI#2g-6n&-8yf4m~bs^I0xp?xA@s)&Xy-A zTiKh9eV+Jki$Ao}m+C%R@$-$w6QUzpJW+iyv<@#gDvf)}o&2}xJ?dWuy$AA>=*EJF ze*(Yw0)Fzl!G?>WE1@It|NG!Q8KW1tq|6C;;w0nz!!#wU)uCjfpD*xMS1;I=!AvXD;1~a;oCzdXg9}AbSNA3y@3+` zCqHUKe(?PJ%KT*Rr|@MNad6^e=&^^rhQHx3zJcBpo)19#K75Hs&ZVJgd^-w$a=44d%t|$&yu4sgg%9-PPkgG2%%Ivw(Q5mfLy8QkOxg&D+6TVO4H(4LW z-3iE`X6QzI{DskVN$0{x47f1l@+AFS=>5=Xu&n)?D@Yb;rYz!HmqqL^4wFUlKZGn2 z-}m|{N{i4^ComecBz_>)gb?!RBe^r~pmA?e*Vl!9@^e~&g}V>A8(OXGT8M5o8Jf9` z_L2-(4k8}{VOc8wL%;%lVujy?@6dr_b^e1Vli(|LOIFmURJ}$J!dNk_%g7-(K%bpGQUP=5#;Cjc{-XDLh_XFh85&T_3^Z#VNS{R>! zd$}tksW~PD?pFojzMuN-7Z~_=^DX#4NPfy;W0wCR#_R*eD)!S~_7*#-;(VP(2qX7TljcUzlIJ}g->%SvVVw1V)_oEuLaXc4Elm@t3jV9 ze>>Rl2;&kO4`mxj4oh_U`rw$DlXvG~8~je+dsm3rxbOdf8|CLCanqPG5;t|g2tIN` z;l@GkD2Iukv=jc>zk+{{u(lm9P+A)86SB2x)n6&Hk41Ez@1gIIH!Qn}xJ{qil$F?977RyjyVfglU+*(lN0BQZu!ep}U6Ct41P0X} zJFk(rjsv_u2#>63_cw5dQj`(XmV~@9o`09;a`q=~RmtnGFMQCOTM*vgRuG-tW{=6< zYM%hFOvrv9nY-a^)zA3+pB~->Efvloe-C$_??_P=9c{DcbS<^l$8l#li5IqxVVuW+ z)5pge%NFqPCgVA>QTzdUt*&fk*2&52A52pQ@M$b4i`P72qigshla(Kc; zjI^Dy@-6XiJAVdycHbWf|Ow`s&&Z+)4dR z{;Tc{g0JU*TiSNxCvb4~XiptBf)5~XNM-E=IuTJ&^@j=U34;y}P;Z05Q zf!d?9W2uwICr*vs)3#VS1n)08R&39~wyqa_Gc63CBkvORJLVIc*MKA08`bQ2uN`}F z4ENr$&v%{S-%n)?6FXn_2xFi{mpyjRZ_g& z);Ru?aR}~ZoINvc<~=y#ec)K{q>P+r>b?vg&J5X~k~{Vintg4?{$`@0L6^`yz8dRY zH}5nhF)9(dblru#Ghq;1UIi|10gt}%_)1gpm8zW0fL^zo^h(~zGxXX99dx3rw?VHq z^!a0LVbE(>eXJ7adDfYfPn6!K2^8n7XeMKb>)AO6ApT1`jxW*6G~ zvwX!nbKLU1)3^`2>tXQq`+}TR|B1fyF#66x^qu=y`+dZfJ%GQ2Z&~~8?5hmPb)~QN zpZpK8_GgQ%sh!^+4s6m_=V^at*HY|I`A?t3CCwIl-R~dkmHs9(UW;GRt!BmctlTGj zEqt`@aph1uzO`UK)+hHH>$V#_#p!ex*Ze8ZnrwNm9i2AL+jXBZAaX)-kjOl*q;Jl@ zRrz^}8&)UIa(eug#eyIC>Er zt+iElA!8JDFyAJ0!Z<@edU4p=r}Rye{~>U+#T@fSzW;+wi6H}fty(>S|vk8Or$HEVjh!_afe#?8>4A z@lC~J<4$7TLC=5kwyb9Q5q=VxB5PF%b0u*^tAIn+{lXUitVZTvbOMciFyZya*U8hu z`;FASlUqNo@MldQ6SlT-W`u?k`!*9`g)* zm2(|Y9;cf8-rd+fP1KLUf9mHbQzJ6grx3@$`NiZ8_D=XO{!hJJWP6TAxtOsr8B$jE zUINRCzx1cbF7 z_&IOyFL^-056B%dd<#vycz>W?k8G3wA!OScd?e3Tur7Yv%#l;Md_~=$_z4`#8dLEL znfiCPSF^*Jpz)9%WXT!}gP2}9|{WG(M=4jW<;d2x$=X_U$ zmUAgrjFfZp%sDy&{=df9WAF{#!1o>aD$fSj|Cn&So;?It(d@Is<+>y^T#a8D3D-p? zTx-Q=2|Nq=zL;;}2eE~PA7oEM@}XFK<0VIh_}f#_XEl@VCEs!|-8TZeJjZB@eVukE z(C$pW$MO9D-(#$PIJar`bEp^DT*`S+tFI>d68~&RN5J!brbKAmjjou( z9_1MycnE%2L7h@!^HLwZuwLlw7U)X8uX|;%;RrF(?-Lsq7&qMYUtd0MIOXb*>r(sA zjCHA{HnHE$+7o!jO5}w+6T7;O^~jA~t;cS$CJ9fv^)<)f$wYWkcu>Q(7JF#8Z5lkc!k+J#gNoV5K9j~TNF7ebhQ~DB71qU1U?Pz1B95L3D*2PXw1*Y{FnLp z$IF8a(*L8tC-d}(#PFR(Ui>Qpza6%ktY_@0lLMoA!7AEX!rbT6HnxYp*M8(abZAZd z;q6yJ=?r9I)4xHviGtAZ{DkQ8{hB=~G=H)JVx z!94y;xt|03*ML{xzkzST#|PMuPVU-xo;%U}CT#*pQ%&{TUzu{ zLke_TSm|TmQ@NrP8nxue^W^?b3tHbl@Hp{z46X6s3c~kq!Gqv_Ci9(fi3N|-dpG## zGWFdl0vmeojeHZ+h0Pt&j1I4#EqVi5{IWW5`=pVwro?0Ez=>ykc)C2%{ZzM)@ZptR zTKQke7reRlaDq`Qbu@&SB!cS3t2Pu^)8Y#>l~ojkjp zXL6=c=+MpHqBd5UrR!VJXdh*BqLr<3r-H~$DI;?(I%E%W7vJ(k&O%_9I3mT@FaC>8 z+iL#TC5gV-X>%t%!0$TRE#j^=*6K`)KJS=5QvVb;U8A#yAZ*Z^$h|-q<(c4E#vt?2 zNp1lja>hmcfW|wS6Kj41$4Z!?({~d0YvHyP+VBPOzMQWe!maF|h|D!`yA<5&Z3f_X ziizLLP5cJ0vla}Kh?@}jh-seX30E=&<`3}I;QyN~a-(xJ`(5Ty-~SwfOJH&Wi|7%v zpt}>me8%>GF4M!oy_uhQgzZ5-u25(5tJwz(wmqKxwqbh+d`;+dKJib7fJeq5FikdL zij5f3XT9gho799qArw5~PY8ym!h}ckHi?_Ea4R$*v@q$If2-tKlf8g@DEnp4fi-?| z$%uUtXURx?u8uyT@tjcl+|XXY;%s8oi6t@gxgV7dHp~LXK;1L%{{XAAXC$m$i-%x6 zj-BcrMgNRM|6Gm!=|TUjLigMQ4VL2%d5WABb&N~HAL7>a(+p3Pshc(h=%y8nMeN1@ zi+;NDzXuym-D~9ozZrkRP52eQhJWEk{0uj+zcY_>IY#W4oHv|w#Gi21*st0##C~zU zO;i3D7~_S%t>q*!VCiAR*@qMFkBk!f>CBJJ*~fX-g^y$7vL?1Ke)t~~5=%)RyDyfQ(x`F=@^RNx zrFHjW^0%cDL&cea3n$(9NINlN5>piXT;X{!dFC6DqcYCd!?4R$hFS@VkzL&mICur8b9?!0|yet(C)-g&XJbr*O&hVCc4AvkZ>^XpVE zNKfTV<^sbn^%Q=odx=qd$}!ecjbBO&I{P4dBEz1YC8mBm{$Av2^KSaqR@oOWT%}Fk zwWy6REgh>={taI?3ilYi6nb#k`H3tJ?c8Ed?WAc;N=um z^OV}OJ$HbUedHAI9kkaclq&Tns?gV=^{)JfUto``UODu=Is2wMTi4?ItvXNRUV8k4 zl$CQ~R{M|AzOUARXEW_<`WUPC3VhOEPt{O=SJU5-T7@_X*0qb(f&JG-=D4ezhb2C@ zF`s?BBZ_x2@lyU`rS+Yuj{44gN4<|&ET7$8uK^!8mDt2A&Lt&qE=lLl z_b+*&E2jGDqgmwnOfX^%k7hXs0?VyrOuq8#y;XK{QeBPz0oZ)EVV}m>$U}i`S*~r| zG0y4je$iI{evI6Sdw%nOY-fKchC6(7l-Vb5a}E?0qXG( z-$6#K8>h6OpR{tub-*n=Q;p6G&*=Gf)uA(nW9e7$?zhGC)gb5Y6*vSw#wPHk0>>>p za|fLF6L|D|j(!bUujp}(z5=!K%bo+ghw+PC*@Rr_$jBRzF?UU~)qnjr>;v*`(D0<@ z+xYJ*W=wW1Lt~%MM_kWLU_bIJ_FJXgPFwwB|AYqcjau&m-#skvYR&kdn;7GF^K)J; zW-o{_kb}Om>v|{j=o~0PzDjI<8sECll(`+UJMfvx{~)_V!S09yPdF!9IvK+*#-QiZ zHu1Pic$;UMI?2-sJ^XdGtySWX9PDB5dfL{C-O$D5B2gqE9DMa?4iBbZ1p{~Glnx0 zedL$D_Y!Az+h*#}hlhNMeY~41_QSo89`3?^&?tuvQttxBWXjddM4uO1gmUSsw#Q+2 zc=+ynk-M_97_Y>>Y3w!U+GF~98aQM9tb0QSd(B0vqt7>ejJM<;#Pq{6yfL79Y7xU!#}xprEk{E>J3JYz4M{zB-1vCZJsUX9m!$#3-J0O^k)Z_gTS&& za)O1+eL!j3WQ@5gxx$g%%M{}6IHM?ML^BveaR)KxjPc@h9KD>sS1Va_21NcH1PAX!|8h^p0q8B`DE8v%h|H=;?zO?zubRld3iIe@ zJ?YuWohqE`a@&*vf#vVAW@9(X93(T&YQ|%o#VjR;KqKxrOtq8SNa9%x{QZ-BM2!Rf z*^)mZ%s%{G7w@|I{j=WS-4npw3EbyZOj#(fzX9wP%oeN?lk)Wee}j}GE~%m&+$Ki@ zFFYpj${92Z*4Jp$8bija2@7{~_5xk1vWR&~@E1KX*y%d@a9zb%yMZ@q z0{(_LW#$dw`zhd=cPKeqTXo(-sq7-uK+FJ=3VIK55qfi<5g=UmaF4%cL33_3cc)8k)rSwK zDUC*q)$2;@uDSWGyU9G+B$TH`l?nl!F)l_5pWX(*9)n_X+)lz$=02BkNTU( zTSx5bi1{Fv$*T7Z^+wafWo|<0;bN-?>*3#7Hl*)MOkWvtHt@Vp4RKGF&)#1xj^6?6`IkvWb~b1-YdNEn*N48ia% z!R0WzRV})$)OaWKyo~okhiQ(^B;@7%LS%_mhx{e;lEcF)4q_V$jbjJ2SoArSbDqn7 zGI&mU(&-eMy56Lz?@%WVx@=-Sk#okQ;iyv!jU&$ZkHArsg`)sHFA(1@;&DUeb!cRL z3dVPTnL#(B!KHCFHB?)oLkG9@!8zIzdJff=&~tEGd(E~)&y;(FWUuLpJJB(#kT>Lk zby6oCJcOe4RAlwpXk2oRyi5BjoNF6#m)cj5X$rA{=kp!LcM9Jc--+mdDQdlTn!FGt zwthz_63j-kGU@ zx9!l~3)tD4vdFOkZrWK(jCi($=5gS#lmF;2o=(=9=b`&Mh^^|M<{a1xzpP*8^sXca zM6!Ky-)Vf*LPH9^)PD`0B8IPe@1yp_dCAK8<&Mi=NQw}hkTpwkBC$pb$Yvcj2>A4OgcwR{U2glRofWSv9{t_#GO7hio>;6~K{UiofVrydb_}Bg9>XW|r z)3?>9X7;%YUXA2Dq4Xs%NM9xBL~{Pu>PyD|dHVS$df9L3OMKq__`&5qNlIzP;FqPr1mB4d?c*f^gziaF<^CWKA^20x})!&dr%y75ue4dMr zYWTq)Vy>ns^<&=mH!QIy?2)k!jV+-$im_n<%{~n+ZGwJE z@QGD3fA=weF8le=jRxJglicPUo4J^1al$xbIffWY}gHcWj?#u ziyT!3>~xH}&I$j$LmQ#)ixYc<7^?qYbE$2hZ{3a#h}GN3voGsIubws1FP-v}2lZIJ ze@ia4+OywD^iOEw1Tketky-u7p<~Fb0c4ioa$AhE^`tF)-b(z*5{oP`@Wfl|{QZv> zBNjM~{0*7pTu9rGADFu-a%;nKa`%VjNL+NgFHZMV}hA#tN$R%$; z?nYZ{T9lE4c8-}#Autzmm*`B~73QeepUvGRnTmQ>KMTJ;2f2Fw0}C#RUyf_u4qS=E zDEruJQPl)^L3}L6yH`z^C4O0QLBo>m*r_u$I2-(wWGu1Rd5!`5U?kXT6T zE>PivO_9nWSr=TKLoEA)k)yDS*rC8OvW^7)mwNz@-^LvwH`$3t$lKA9{6|J;`hW{EAgGgg5w`V-yU|pz6RSk@a{wKrJtwQi1GIov)2=Zle!cBt#WQ5 zcno%OLB0-bUOu5@m6ZcIPH_(0LR_&6nJ%=`6BJh*$6b~ZkKMMITu|m+mfH?cmb?4T zmBaHs`W9K$79gwsi)Y8cQL^&I&qbw-A1SNas|{K85d6}_{#g^}qQtg!urCH(j>!AS zov8zcJ~c8gXed0%eQ6dS$bMDBglh(VLB3GYVd|<=I9E|UQTB-X#^v|-phKNtZ@(P= z>_Z3IWQ*!sQa|^Rspu237vIY}dA1opnfw>N#Exi5z_#d_D6lF6M{P>$v586!KKWK( z27Y(WBS;=O@rUn{&qE;_56b8$zWZqR<@^olPp zo%<U<_cjoLG&MydZ8zvI)$ z(C^h0_di{8LI3x{?ftJ%Ui?rqz(o|}EKH5ssHNi9Q6u+6QZDjIwIAK=&}Pb{fq!j+ zGLSMMY(Qj?BZa*+_6bYrPh^ce^MM!nt!-w{But5RJejvb%1C{Aue38}^dpVB#2M+a z=nD5orlMB}98zX*WgD`IoRfu8=7Rg3@Uw=_%o+Xpk?43De50|iC-p^_7+l$5oNuFF z>$iWWN*uPz{-}+8Qak&lVaPplU##TroLZ54hJG$|CF{5QC+#Efnhsu9Pi3!BjnwaU z%p;eP*flre&u|NUgRhpU-i>bVc9cAorTF(Yw8tOm(U9w$v>-VU%~&6zlUNv95LOHJs0+Jsd}8-3(Lm;DtN`f57Qh5pico-t6huNd2*u*SWi_lWXZb92{W!B^rH z&u{35)|1({_0hH#k?2vN`J4VzT_?IVG_u!cJAdP3`}rG_R{(-oVNDo2hY0Q!6 zITn38q1}urihdXTQu#gG_-*I6(0@BPb-;VX`Lv{uC5Gu=?V6sKR)Qzs>c+3gUDY#2a?IR$a<^iVVJ5^ z`j)E3zUF24CRs;)JJlSKDFEgWde8IobO3zpHK1! zD<Pb}2HQpJ1xpi)1n>w60Zl*aoLX;WmsoZmXd-lUuvT+ew&jWt;IE&a7h&X!A^ z123{)+fBXqYb72vsrjN;WzUFvEr`jH_8hcvl;`fJ^H*%hXYUZ)HFAH{F77TZny$+m z{f+?N^y|Dfu}{fG?<4lHhyJqBH}srCJeU1Zc(gLPNU0E-t((?~|8mkYC$#7iP|Na>fm~C4M&c=ak*wo+EK; zJu~vXeLSCKp0oQp<$9)F=bfrf_DBx!9YTlTU-sF=&OH20_7i712UfC&*bCg^Bd(gp zc{t>p#Gl>GJKSizS&bQZ#-n=Xk2NJjgmG~_^+W~ z2^^!r@NxhQ=)A*VsGx13tA~N(p1zgOcsQqXBY1lyPpSWey|I!ft6ph*GWV788m0af zK7Rl|vYu4g$Mng%SQWAAa2NQz_sA+}a;(Q=8|!)6HirGMF&_52Jge2Qd)l9Lo=$mE zIX#zq)MdS@vXAYn0S*nkdMK+=wvOD3%gBou8K-1dAuFYv%w-yLEi@-I(TOeO%k9W% z>fuZuxEcfObAemz3kSdDU8m2VDf*c;4>AY$nB%{Z=e{G?dIy#|y$*GV_fpY~>mro- ztjF^si^{TPudI>&efIPWw_S@$BWCVb#1jF`{~$cF6=Wseg*ri3j3@I z+p7xO%Z2TgMm|i0aja{tVXswTuLZtu5`Xq+wwZ0XZMGNqWlUA3ZKnJGZpS|le$K}p zI1PTF)q2S(Qj87d=1h~&Q4zAkQ8q{CUE%2ig2!ae;zn}Tn)f~O9$T{a#cREV#A`U% zThVDXn!NDLDZHZF2vSzVcP05+<@pkDTEN)OJO^L0&OXj_IX^1*leU5{UzGULoULNd zrjaWm&9rF+XO>MHXs=p03$$?q-wV!WeNLP`Y2wVVXD8`C)Aw&1)ctgFrcvh?bTl7u zTeKqM5_;jk&a=X+_`y9oo*YU}PuLTJuS&s@jr~%=r`*xWdEbEx$(ZsHmZhu-n(irsGG&N=q`P0wnwHo>tx$FA2z`s89nnLuR2K170Tf&}l z$}Q*pZuTNX{~$*?dP#z=mxzv3i4K&$YSP9T=qFX^Bg<-5Z|Flmc}dq#qWW&X!o8s% zT|Jrj7w!o>qn}I%)``%@yXeEB_j@1HHl~*V*?8?oxO+38{r$bH{& z%|+CE-4^3ni;jf@SJ!W({(ClUW1?N#Xtk5R>d!&-jl1b`1n=-ul`TQPM^4Vf?|&@U zyX@(^`gcElLH{!JoeVX(e;GPjMon_Rto7_abFQ8{)zB^0qJyrVs(7E{nZ!ReGGB6r zU33(=kA4#|4GPbyX|LMvZ%Lu851Aw3SF0Xp5zw#1-+c}9T)1{$!`xpWM#(`AVyX4zESWP1FjNO1*6HuW47MeH;1>eu$k4 zx;W?Hr5yKqNjdF&>@RF#v6IUfZzOn(zbJBJDm?n`<9BTr-EGyD!G?s>CHv&J;J=dp z8vl3gxNAFp@p;6CM3oQ&7|Ny+`-B{cA>WYL`Jz&Psz9mtg3GRpTQ}D6 zeVlIxdhPD1Mto!``HlYwm`mxYjfJl(+_Yp2zd!eqzusP-1%Ujf>ti6`iVQlQU-yxdh~{ z*G+b%qKGp^qSKX%u7JNy%B8%zVS~tMd9S{@al?CRcGj)XM<@9hdgwQU^>{|CGrL%g zD(}Z03?uiV#(I+nJ{-`$+=nRfB3rOWehsbS|EQ31Vuz5obHNL&*T(#~ zy!pwL6L^>Vb2GbuV>175KOB+OM2-mZ_eQ5s&o4IIG476_PE?q4Uj?5GIqWVq$xT4tVTRvO0uVxGPX#&#)mp|q`;~Af_hRKCUw7y}k>j`hhTkf(7I|AMvewv>l>3T0pDya( zMNSmKl?pF*GN0cCxMhxVeX6jWLaRcid@=FY*$n_cq!~V|*+4o(k+Pbe>CA8!S1yf;pnwT^k#VVH!DP3&stgF|sgO}3%fj5x~2 z0(%@bhXFHrt&-a!bNRTpl^Xhs z#2W~Ffpaaky3DnlPnA8-#@9I0JPq4}kKk2olY({FCRL^l|58v+AnL58j=b0N0kEgE zf&P34S;LSq*8c9mqT&0y?RG`qCvx*G`$Tqh*T8+ET4ZRbeImI}NPJo{j-$OR_mL~e zqcJ~n7jYBwqnWwQJF!Wu+~$%8`sk~J4G#hravXUua(@YaG4?Rl>T9W<$K1FR6~8EZ zTbbbDIQwzQ*o4X0gqpb@H_5bLlFZ!Ziw_#PiPG#z!}jBDG3P+?gzGdCm*6?uJbV?L zo(P(U?zO@5&~QdZ$lP5)9esWUAA*;f;_VLh@p`6lK99V+M*M+Vf7{>i$uM8O!~V>5 ztQ|{r9^Q=3|tkIOeymD z0Y#5PN%=W`x!R}aBuCf|?F5D{-WzL4qVBILEm7C~iShZSf6Z&eK}-J6#OCh`jXo^$ zCDG%1nEl;-WAwIrmfGum(<8md7Djn})A+qq8Q3sYsduN~3!JG|$~l~_gQ}bZUr^#Z z-QYyOYQvUsKW6YpU9;1hg&wBCQ@6rXP3Vxx+!66OI^gCd22WY{ZvF?bd7)(|^y#|~ zzU(Nf>^UzbON-`=9C9_V&usedKd4mpujNyqRCWWyfdaMu;ENJ(VYK__gXCT_;cUXr z@%=Qy8(2rjO;EizsnZ?xHE$PVeV;vpe(+yC9{(_WoSe>GJk0Y7J@DaUr*zj@n;p6(`96ql7+_kZp z_~>-{lsr?tE1d(AIHysJum8X>x--sF>@3E234KY``_JTk&j5x#)6dp}EgbA;YoZPDCpQJrWvWS+qA$pN zyUh8v<_S7n~>+Qt1voOv4-V~O8D_Vfl<-U;u110Mc5yu28mz60OJ?f5n#botq2 z#_hFnA5)Xu>FJN~9=l#K;u)r?m2KR~)5cv=P243V{-0l%YgnQwKNAlc^3!Ka940@* zflF*~$u%FyYs1C_ZGZOvSX<$xqwof2L3BP{`wVh(Q-`iw^~|^y-D=&C+@#Jy>d1S2 z{hHw%=w+!>1pPy5-koAjk_!&c2E5m{T^6(pZ zo;%sQw&p-R>R;5zIN#^J#32liLC|I6ABSY)1^t@r@nKge4q#cT+m;J1L^gec{8F-C z&}37xy095V-*Q10jt=gWU|n`#hov8T>>2i01|s8}_(CH4I%Ex2^K!hLvpw@3S%fU( zz1G*knj8=9`Do8i{DUuoPXV%Ln&|IJ{V$(8)zF39U@c(pnR@7HikkbLz~Lh{L*S5l z!S7{FZmPX&1kH*5ZrK4=jKL3eyvRP#cxZ0|G#CvnPJ||7SWnmsB8Eo%B&;Drcw*m2 zyU^*?9QaFsISYKnYslCF+uL!-jIH(r<36$pU*SHogwrRGU&toxDQq(PZT)HU(>$@I z>>1VGwjFypx~Vo{`w8~K8=!%n9YvL1|Mbir(?00E)ahOHYQqMx4_06wcz52mU3?%r zvDw6q`u}Kq_xLEQYvKQSX1L5G+#y^7l?1d*CV;JkOBBn5peEs>2E3qFg4W|C0V;Tn zs0q;OI4BFvq05aNtYc41NbJ=*g*1`Ffzy^UKwe3s$ux zICYTk;x(MtzoL|UbT2YvD)FnfK<_7z8QCH;#`>2WzgUx*O^mi8wrN!`K= zJm9)uN|rB)eI)$g*|Z~5y*_YNNP9l(gvV^`N6Ps>NPBYcT>616szln`%@~i<-U-@! zkM^#jodWp08+qt6>f2LYKHt>2zK^K?S@kZLdf;008r6;u#Ym*4c?j}J<8ivo{0N%! zSyJXCe4X<`Y$S=$o$S?$Uar`&>PyNQu|Fj+=aQBK@3UhcOUT=r?`&_}fV|4Hebx1P z+svx`ipKg4yba33M(lJGm;Mp@+Cn>%)ue9L`wG_jidNDV@LPEPeodZ>59lOK*SMa! zl=8drbpefQdkr}o8Uh~w055)+G1O`B#`MASY4FBq@S^$wzIK$d{HugED8I>o=cB&B z>mhv~@SyQ$ZD*!>>zETaIIUw&WWN4y=H<|_|Jl6!TI$8j%O5_$CnR_C`^ya#y!CEe z!TMazI?ZRj-psna37K&tZ?ScIxX=VTWgLD|@!QhEns|}1{yY8gy_owivX6^f`{<9N zGfqTjykO{zWnb5I#uKEk?2XhE3{dvT{RYowVzAh;m(d^LUBuH1`;!+nb#cDp8?-Q- z_~fE@im%AY<388T#cD!A%FXGO!{Bq1)nVN(Y~!MnT*pQt>&d zE-ZKLl+{o-h4d+;4^kW4loy`nzIs5Dhjzy$3zWjUH6@aP*yZfPJ?auWlUmWxZLD zV;=fb&Kd}!qX*E@Mc?=HyK4`>rzQGM(9ib{$lJ6e>=>i{|B3Enj%h#khaUpVZRjqg z=q_$Ucli*$<^TuWlP)QZi6!|j@9k<*h87`n;g zcz?U}`B(U1lzdG$xziOs0IdGH$&{Ppb(78^x^7a(SN7I>uzSSrt*6TGOS~I3@sWkD zcL(^0aCQ>moJ4SJ1IK~e*yk8DSc^U9>{LVc)6Z+^S2HvqHlnMgkGDYs+-LAu%9`w7 ze|Uu4p_p3Ye*L)AG3G$-2+ zE2GoMThK`04MQ)y(iQ%Olv}l5?(ohDP)(BUoDP22DKLVrb6^T_wlT7-V=-7$-81_8@#S(>jcN51o(8vK-9 zyXzChhtKP9+2;N8%fY8TiS$jI=I?sS;@dS$372#4!9D?;14EOO_gFa(jknzw+%we6 zITClWL^mAMT}Qv9JsW4S;>)gu-#^MxwhQiLY@5`|9lz)Q=PdSva-Zx6XK1{v6g(B5O$gmowvC!~qt%GR}*&^Eo+7OCM_E{zPK; zG3e7tEY``KU%!dYc6EuP$H5yAp$+>yUCyQb`C-zOrr>4v%FNXRy^hscO=L`7z70#LEUO?TJJ7q5u_s%$#x3l{F z^H0lW)BYv?s5Luo@3cNUt{WSHyLaxKI`$Wi-pK5lDafUZk$rA(&)Il)8D~y)PwDn! zANFIpLn-!SIh(l-9i;5S;5l{lQSSY|PJgxgsOP4?P`hQ@`~Y@dP0u~RJtks(ds5I< z-%(4>-Vfc#|69<1(TRDlYEG!$CI7fsBfLlKN4pa@Xf|)58?hn% zumIij-fYoLH=Kj6JXU3H9r|4PFrCh4qt9vj6n2FFBlFCh`_eb~g6Ro9v1wL(#F@eU z$W8Pr-A1H(TiGY-I-uM~kysu{e$fL1q)VCK$(KGJ=38sc#}+ZZyOw*I_pu&!GXIzH zbxh*ED|g!5@P)KQy)PBw-@DK~&-Cw&3~Xwxs?HD2kUhOU&;AwH>&~jJ`E3Im^gTa& z9QOFLzs5Qdd6~9$Ye%sCC!zAU-U_u2JQb2NjZSRDl2<;Xy2Cf4AymHZa9^I-k0q~g z>(-9)>Gh%9(}()l0YjqT60rE;~?{pRI)rZQ5wfDg+ zgD*;+?{IZTaHiCY!j$(+ayH)wcS(IHc&a^A-u}-JV{fykxH1#)e=D|ne&gRyc{K%` z$JJpBx&!Jyo!E+gFU-Hd22`Ri&R^dSw(uQMakn}CI-Cyr8*E4V9w~K5k z@5pXy#fDb&)S`D5U$H#TiQc+cO-x8iO02Ah#&>bwYZJU+Y!dfa;fHzf`@GfJnO#;J za#3{tvB=VR**=3lh;9pCSkVX%E>?$X`}Z}(l^9aErsotfs0HpZ&Tyrj{;(5N+uR_1 zPomAUv=Q5G(Z%6&Y@~~QNreXpKPZPc2u?O}&!~X=K_(vBJ1^EO6x-MNh{ntB`vE-M z7_$%A`gvoPdsfn~e>cVg%6*YB%09y!&u78>ta~EnUc+`WD{K#Bh!@%NfV`9OGW0c-PPWM+ zICbsB{hkMnq+vInFr0bfjYbN2>NBn$(WB(OoY?7ry9xIssrS1GC^~HqEyBRrsag?(LPxdF2D%cqrk5P*(5$&~?4aw{{OJ2&uj9^P(>; z(s2a6_Qz2YV{H6i;7IBVj*<)_!(p1=UaXF z854(}Ab1ho_8Go`bEQA86a0JqEyF+h^@7>#(0HAG43YM7;vm=XB=v-`yeY;mx(! zC&J8++ml7WMCEef;fY zgnMM%6{h_Fa09d-koE`n%#CawkQu>;lEn3mJsba!@o4Ac-0}KY_IS`+fcr)L&&OZe zjGX)e@0hOzSLXS+-d9~dO&6T@yNmVw;(m9r{zYuAap0S;$fqxQmhE8dKdkGmxs0~2 z;QxSOKaXHPm+=L}hR*oT#f&e44L!p7f515ZKWmJy|M`DQ|MP!id=YHuGH2hf`20CL zO}}FC(8X`TLx5O{2k0wjPe+1Nhpqkmb7YYV`n`2z$FqHVOY4N5)XwoEroJ9}*G`90VslF>7*{^s%C@)yT@gYPSxi`yzEBA3MXx7?`_e(ITW zeN*|DuWu4N+GhBf9r#y^4VM1eIX}fFJkqWzP46yLz32Gbn5~|vMP|v|=<-JVNsRX2 z{c)tGjJw%_lk)4q3vCF@H__=dyDGRS2QLZWBSp@vT=J$D**rk$O~XbLz;8-9e6%nR zTRi_CV?4U7BJLhMLTn8Gyp=s6F#brM*cZ-=%%Q&mQ*3YxxPPU1Z#`e9*%*@jFJf<# zesoekV6h)WMux=(^8)EP)C=;bav$kWj(nUcjC#b0_-J8y0=; zUKw_0PaSeo=5D+(cjJw@bN8d^n~b@81=$|3s7Ffs=5Dm#N1n``w0)HKiEmxiyHm147qdylkVlhKzxe+^Duw$?m@m9Yri)zHpK+FbwIS!N*~A-UU5Nh>SsTp*)9>mWuJq0-815~d zLl%a(=B=-Htb{ky#O6_a=sau!hL}$q;tNB4^~ZI!vAd3yZAZYI8{AU0PVGK&gsD} zru#Wg?%vJ*tA+DVEscGcyy2ET*@*?_dDR&7{jvV9_e5$;IC5V?_}no1D&vtkH^(uK zarCdd%&2>AimJUqImY-z=W3mxZ2ypPNnh)LA$UMP*4F)Y%8E|lws5C-{of7$E_e0p zXRN~SrCzCJyg?76{S%ot<4h@z4pKEbSi<}*W*+mH&s^p;hjaKFIES}tX92$jX8$`! zaeQyfWj%qZigyfi`_W*1Zfnmv@8T|Vuh?{^aF#CTNFuu@b43k(Y{fZXmR<($Vvl9q z@rmo#rSACrm^zp8b7A@-5M8|&pJV3!*VMaG486+(-%Ua{7>n*D^C0JOALD=IU1;PS z=OLZw(=Dty-q~*(TpqQZDn|Mnl-YvbB>TTK?qrrLBj)(f(d7Tu-f?s6YEoV~ZuX2t;_m*Z{d`1- zA$jqe3CvL z%hw3 z;~agTHO8OGUPA7xl#?C+4|CEImisK+TN|Zh;J7z@5x6^t6Jr>BCa+e!kKQxV`xrP% zDS7kS&cS8Zie6A^oz$H&0{-xwRXq}S_&jIsorCceZvEDL-nam+EPpY3!{fjxp}pg@ zhkrKz3Gk%%!vbF$pxIz07Sm6Mv*&>(3X2xu3z~jV-aT_+I6*rLUa6f0<2ROha8?ds z9(KVeJK?8tW@GMo6=wnO&7dDzZk>*}^M>cb~O;8N!4_#UCfasDCm zIkTaUNzAX~KyC^5w6Ag;u$Rc3R?sh@k07!!_ADuO&Sbut=~vub8|Sa@)2~|f#(z8C z&9VL3BK;cg-+8sNeFA;b=2E$Rj`Yzn>$;{EWQnYU$x3o}0ru)9=KK4M$ual3rX1`A zHs&F%W?{I3uM<5=>S$*=vj#Sq^;W}k#D}loPE{&*{2%@|X-#yMls#j-sVByGMIMUY z^U@0R9^eT7IXBT24#RhaeuQ4+tN>$ESfdqap)fg~rx#&Z=o=e@pX4mjo> zAGq%*f8M}7=XH`Vxc6LT;(oj)tCq-`rT!S~7DepmH99-a-X7e>(wUrDCUIuj06!Nx zGwDC}%(4s}r<5_*s&Z!eT-=$ZS@%+BmfBs21Dy3k|H2#UjC&Ab1M;!Q%z$?4jC&Au z+=DQAkI33d^xwFH7bj!)Q&;$3@5UREAJ@9G@P zT|jlsP2~-+imo#n9wK~bS>=VA$O3$a;R{LpZ_EBbVctx8JaDu=+NLiI+uhhi(H))e zq{UZc_1XEOHaNBW7`i|$HmLsfkAc%#WSRT8#cP9R(b-hfVZyhDa4YHvKJ!z?7S0BN%4v9*<`yAnaeqH82iLF{XL<6c3}4PxTZUf z_9r}tTwQ=}iN8qUb0(kcgw{XXHpm#@2QsEhjYDjOmrDOUzpJM0ebbxjJ~162u+3Ge(x#teZH?jQxkxBK9M)ql<%T^T_f@dn5*zN zv%*UJG9|H_b*sIZ%)m!Y9rQkie7WoTATpj<0B6LWCBDYYeWutn-OHg<$_j4Tdxh(M zrS6ly`B>Kj8>SGR|D%sdr3 z$M3n+KUaA;bQ`|~5C3_^LpO4s3BB2^ALZhLY2JXS@ZL( zp7!dk9W$yLLo-gbhuYVk4ppf`wX?%qHPJ6;i1>K%=ln$7cNhI|a|R7RZcA9o`QNF= zP**W;gWC1;zar+N7#uAIN5$YMed$Sw0nt``m8+zxp(?*v9TEq(ZjZW8hc~XRZDw_c zv#KFfG2`t}*W%OKd7{AETUFiBy|zA-F7TFoK0L>69bOl>axxEHd)q_)dO5R9Xv?eT zEfYBDT6!py*Ko2A{=rjK9o>nIq3#*&p@Y@n>JPKLW}oYqZ2eT9Z9i1^sk}ExEgJzX z>2|l1(30y1YQ_`;KYi_|w};w*Dee20Z0%^{9JQz(d=(q*!*45RfR}ZR8>CGg*9rc> zj_5cNW>AMYZ#$)rqheS!<7nVL*1?c-je#fN6wj>A?>^KJ2dD0PCVnTiIor2(IDu7h z>fhH3Jg@E(dUJroK=I*_XXeR1dTXt&>PTDC7%D$`kTc|y8od?Ke){{>`Fpp3Kgw0 za36B@fcp+`4}Ah3II(l-sWRuFU;j)RkoqU9I}&I9^wRY!3>+RB*EjZ?7<+TMnz5O_ zX?P4o@s&$nr$^0LYvz4#WM937l<^JuPJX%ehQgjFW%3Sd_XqNzVQ1i5dYQEL>iqH<4I4<)%CyD6 zY1yKe$!)9xPGeu0SU7c?^)iLfb`aV&;rOef{qbyymdX8Tw9j4b(K7Bbqs-Q=`Ds3?6GScjK z1!FZ~wQN)~(i*Ds?WY?y2tJ)>(kYja2hS^6Oh2I^+DveI9@pWu!Hb;0Gs_&TuIek} zF44;*G*Yih@T9{@XM84{oX6A*&&==S+nBQ=+RIaIdBwCf9XQZEK3Ec+&PUY@$=iFX zI=`}BTgzG*gLi9X5+2dZAZOzG%D9|q533o-`=+jRPkzxF#%}auZ=?2JG*8OdA2P}u z^yKrlw69Dtc$U6OncxPaOm}5|TZ8^?NAOZv9fi}nK3c}v(9j1b{iHr-f#Z81S|*Qi zoBc_zioz-Xrd|eFs?lgH-PEl!_|E0gHMBmT^|jVWe8nVuEjHHTkD}=-1%8o{{*|OYILigTnZ6{NzEt2B8t|K=={E>`w~_vvD1XQj z`1hOTqdesrfp4b&Et)=C;QxnFz9*V~rNF<>NFNjBC20cx8%DY(nm$$Fo9RD_rhiG` z-wSOfD#Clh!Sr(NK2&fz_n~H-46%0F3P-_Tpk3xqlaYbd@Kh;_j0!I0KGxbppi6o(|ud`cPU!dnm0+zw;!0 zD7yaXzOpS}|NOFTPe;qT?~3-f6&)uy<4~x4=D+&-n>Xm0KDf3!qh-_RZxH=T!)~)PUcJHB(Z>Z=SI?gI$aF%m>zp@7ph0@U5%(70_go%g1t^LaG<=)ziXxXmz zXj#uK{mLR6bC*T?;G7KzL%QoCuaSVo?@n- zjHcg5dchK-{6nMr;H)COcCnEjh^DU;_<2Tp!I-}C%LTrf{$4bFsld-Q%4fu^e}SK4 z!ilD53H%$3^v9z5)HMR%O#eo5U%Rsf{`E%rXQSy?3jFKTjO_`=x~;6}urF;4`KCj! zC%bjJ%9neBRUKWG=&P(zCu_-^hn8%89^_6XGMRahHEhs1^@>iS!>?%!6*Z!-Z`E}z zXw`397@a@gke`Oi$!}e2&;|AUhpIb@X3}1Jd#J0syRW?*SF~Phoj=rC*b*vl3HJ4~ z$m8i~OREd{eJyeIh&`q8n(`pL=3rgO*VY;;^mO&rYb*40R5*R1uGZGLdM)##?P=>+ z)&=^yw%!CUOR74Gwl;?HW}-`0#fRNI*4xc%h{pzv4 zeTR)TOTD&w_8)7}|7U_1)|@#Hp1J+%$(n44)+;($-Qgd~UaH|Rdl_A)mU(EI-LKxk z!=Yexv|ih3`Z+^yuc9GZ&z8}zp755~`8#aP-y!yHRndAmS4Hc!o@@x^u48=DqwN*3 zzjL;{LhpAjITVT=V_DLPUX-4@KQ9WU< z!2hzEQ6f0d_Wa3hZKoJfJd7>jbO_zBO<+!6T9sdg9^4;hPO1*GZ|{AHy?op_l2|Eaz<4pnc>uV3=%emgEx_f6AjqNMd?bw2ArbjHlQVcLCb zZI594k}CV7650Eh`4zGGlZ$bawBWdrVp~cXui2KG3a3})7orbKTG==wZ3r@wF-uzK*!Xg3_UU?k zBi(n1{b6H+UO!1MYqq7;AET$0pQ_GpJ*BtfGSXU4(@wRXRy$fRmwS@^U9}ERG1AJ1 zQEsi?pMp^aJjRTzOKZ1zq|t71g|?^D#zz`^+n=S5iOSz^-w1t=FzCBEs%MvyUOQY% ze>Sx$O7|tCcMdbsGoteGLDEyq^shwa;97w{RLy9!X*drTvKCsak!Q#sU*pLT{pv0l zqV+Y*UbiDSy+PAYd`nJid1Z;wyh7+bcP4rc^e**k6ZAZJ2T(W@y%C=1gC|N}vt7>v zpB?2h>d_~81JI!7b=vg2V7iAqyrg1I+8CeJs5cZF z<`QrQKMXF_^O`MsUO71P!CT5_pjR09>{N}snZRtUM>jZ8N<@UA)kTk+KYOg&_po3J`|i89eeYI*u0;z*I<0r#{8Yv^MVJNzf<50yrjML zFAblHnI|%7X+3W;-U{Uo`fn{SC2Z7_ymlEwG|$x&%@e&hw~@Y78S{58ny1M*_+ilK zOW9fOz{{E%64il!wha5z$4bWjzn5yZZPO0wCtV?3D}N|j{zs&{J~GpFAC7Xj@`t3C zeQ2boMAzo`1b)a!UldK>Ch*Pl-$&D{1pXPL{Jf}c{ItOTK*>n`d#QHrVfM33;J>eA z?EYKnSHw1Nravz5N#Fms(g$MG9}@VdKMiM{z&}O1{ouO={z;=hcSZYikH9z6%c45@ zT>`%wAM*X++$!+9l#Hx@#P|OufghxNXFUG%1U~7%7;KyuTYcfb+dLw-Xeee*pmG2f8kFE9iUIPq9ck9NnTqU zbf34iG2~nt)q#qDC$e=h>uB%EgQ3dPx@`yq|9&*xW(R&46cjDHL?imf({11orF46UizW)6~*{9L) zV^KTBhfCrA|4=e!fLoERxl52m(+zv$C|71tj0_L_jXMjJVcqAqBE#(H3E0RpJ;-sf zyU_<{>t8uvr%YQ_b$-{3`VFLMWpcAEgG&utBQRohiMl`QWsnc~X{QYP zC}rAW`r`SMUM48=6d9}O6qNA~iRujQKN@8gd-8)Za4KTpw8xZ*8CTu! zl#ETU$FH4D%wy-ul2D1(^n}=bFM2OyY%9FV)lcwu>uE|le zpML6fo!ZNO@@@75(R#N3iqh}P3&P%(0JdK523 zrz<;(8td8f#_tO~`}@`FZr9F1&3;O~F63lh1Nf|p;gT<5$tTzQZEd+e5K++h%ZX3r{lZn)TejjMmEq z2WeHJUm5#J=FmT#deDs6JSt|q-`Cf=*Q50cPx6j*X3Rd6dR^_D2`+(m&)^Mbb##n@ z*P`{(s_5q_Y%Hgu?N!Xs_q|>1@q1>+tI>MWo^R-3O(!+yqN}00BVjB1-XWav#_YA- zuSDymwWBBPMZdZ-+Fskq$__tzmvb#Nx-Qz!x|d_?q3<*@zQfVEXsh1Z;b(kpvX<(j zG#dD+QSTsn=NfcpXghaZv>sQz+hGJ`Sr9gWJqj)6M=M^DVh)~NHQt9Wl* zzcs(2x^V+@rtRZe>UI7U-0R7=f5+f+l&hFop5GQz&a>Aj2k-4WQ@TXC9KXR6vFW1o z_O&Z#Vg5t9{Gg9co6mrk?$P#sq~*a^UI+L2&b85UEidWid<|Rk%V$R6IhWuYtsz>@ z^Ao+CoxN9DdvqLah7UD!yg5JC%eBsgXLAm0wp-E2xmA^ZKBM*TM@IkFRpr}}duBN& z^0ccm27fOS9}GF-I90zvojw9negj{Wp zjw7em=%0+Y+K?B5=b_BovS_=uU3$5~lOnsL{VQU=6Bw5{-j*L2<%U#i`(|?-dBdKG z8;9owW4xSY6;?&zRRFIj1}`}~m5kLr@p|XGi_!mfAY*#sW!UdXZ~i_q>6Q5MZKNxt zZ@TWGIKB2aq`SVS$*^Zr-BG>y*8;x=eRoqloYw{Z^YmwPeEQD?KIzEFXH!3j>b3O( z|2ZRlL{vupMBr~X(%*}wWB1fDL*PsLx8mE~F7UT8t}XHDTLr!sJ^0)4 zGT0;VpQU_xeEH1+pY&tp@v`{|f&U#P$N_qXbg+Roi>iZI1RU zxU^nB=R0V~1 zZGko1R!G?jU5C`ldY;nD!Vfyo*|wOm)jb+5TRsE))`N#r#=KKj?$en% zNX{eCvYLK|jBag+;=vy?7lDWRl|6_(VMcV`;_&-mzq06IY4y>vvGeBG5G^aVvU29F z?|fG95Yva22cl&~X9|8!S@$=UeY@M^>G;t+^!s)2-u8H2v7YqKweZGUvVpcTV>b;7P>O`Ld&ss0LN

{Y&)LY^zqtQ|8lpqWoC4|xm&^eOz_T~ z!E)+rXGEM}Yf8Bx&?@O%mxW5}kW4(ve<+&yKFvKQSy5q@9%@!%t|bCO^gDQkXze7c+T<|WMC!uUD5hjfMX9}2$2);#??9j)NqHzP*R;5}*4baX>q*VOrc zp3cj#hja)p)AoOr29KnC*V-ym$JWXRa`f^d1I#^mxw!|Yyff&@@9N&WA#cJYZy$fZ z!Du7)oZH>S+Ce79)G^DqTo-LalLw38#jIJgy#HjSsefyI%30K}JZm#eTbnw3a}0TZ zzNW8Fbqn=6#4gkN&{yV5}DYd%z85zj<@4+fhV5_SbbNpU1wY zZLPLn(faNAT1@%1=wih&@`>^?PVOm0;a?jq-^yGSqTk4whKU!M+luNK_}4&JJLBo( zPdU)f)$q_A@pRQex9I^4`j{d42$w&Ip-)py%$@_AKWDdn*_^|e`_Pq#LVni1CR62H9eKTXT|I9M z+Cz)Mh45v8SC72z56?3-ik}2{VG%N@jXnr|D&Ti{^tX#NKWQfXgN>X8aGqAYjyD0P zbiV&t{q>}n{f(E0|5JFj%ZO#ac58=cS!1Yf9cLG(bsFe~pCr=%V)}Ye>uX|LCH-Ga z|GP;mK55_q{1o$UV-5INiry}7Ef|--h&LAUwNF&`$=i-Jby8b;`__(hU{)f3W9z%A zFXLT8eZi;Hv+-5~+lSva!5nYD`{vxOGVxcu4!Ec4LkDLZ`uFoQR@wIvZ*XF12)jzC zZOvisuNZSa#Z}VXuHC!Uc&6wTEo01nB)7@BY~#*V8+WcMkVARb;un(nEs}bz<#9Tk zZFH0#obX%oeTmRR(N=8Ljo9Vjsm`Uwcz{va5Z`uSL=?v7(Nqz0V)i2uyIP$7FwE?K zJl%X=9mmiZ{=hm9G(`D>eJf|jRqP2*qD#$uKcwkW^gWR~;j!)VhEY2kvq4{u_QN3u zI5yw<_^^YTciwX0JthojFAhe)ro*T^B|0qcHyTd%!3c8hl{-_n1IUQkelZ^Jqlb9hd}>V<*N3{>(an$(IuFR|DK>OZ^KFrbm|*hOB&V@H)KZVE zp-<*`Ti3!<(fe|z%N@9={#lGozw$Kes* zullCAe5Dq<7L&opS~TB>y~vxl*mq`!uj8E=@=TBCqrda{_eDS5Rg6~+q4^eK6e94!${QLNd&zB7H z#gD~yev5yqk9hx?!1wojZIVyA_#aupS9}_b;VV83_K`1V{dZglm7R#Xn5keW$O0xH4w^!&Ss;*-zV>X;1pN40_}(@a%`4Qo{xG-Rw7d z%68tY%6rQ2!&4SDNt}p6c}G8%chUIE%)4Sm+}nQfv9ODnSLWMh@t0uwV=m)8arA9- zndav`PaW3xwqNRMe%_@%ed~MA{UUzMrz+kz)9}N5-ot;u9OKWRLj|XiE#jNTMmam>6R4L+ef-jF9LjuKi2o?|pZP-yq&0%X{5!@$-ql z2$}b|H@k6uK*wrtb`$XfDr+M#3Ez|amEzB@7C7#a#7?KZhoFxS6O?`8yIt1O_9?_5 z!smz!*dAy|{F2BU_ngSMIpQPtokyXEPX;7wIZNrCORT&)d?fe`e}zuXU*XD2_$$Q!0_~b_l0P-RIDdr``rahV_+tGPrt;oB zFl|Ynzrx^^u5hOKi@MkFzjLqnE0lFMUh`L&(Q>gS6}?1!`t`6D>tx*cT@;^MW}H#s z6F2AacKQIZB*!Up!}xnB!C&F8Xj}6E#n^@K$XoSqEK;{iTk=+2{KeT)l_TZ&#dm_s zJp8t-QZL((Hn4QtRq&Ukl$Rn!gm{S%|+B{Cm7AJXicX{6YFBaW2GP z3T2P*I}Gm`FZDFIBED6KGa+@~GV983%D>5P8$K~whUUx(faf6N?gGXg z_6B*Zh@E~`V%Kq+nLHvaXKgYLq2XV*pg_Z=C_^W_ltQZP^dSR+^c4!{#TuWq_Mv|(pw|GRyvta@tq_* zUF5AcPez)|(;i@mel zT8RTfy0WQk{?S23%us&oJZG%t2Vh0Ey8T_~ATVvEq3cc5e1AO-o&s;?J0(&kI#$Nn6_-*uUe zOl9r|;y>^g`0<#6AK!fZ8{r!!)`!R%@R7**)(fe7cUI%q?G07szO}N#^c5w3jl@4x z6}nCpWzBM;>quD_<3G#z#V={?mRoVZGq!tonzCXkGF#Hl;;U^T|BGH&CcbU9sNN2A zp$oGZ{{U#n@N?wG&rz}+e0+}XXF~@RA9csTBSGHL{d8O(hCbjEvv;EZm1+8)o?e%( z>wgo`|8}7Nm81XJ!KVlPPuh>A3)4rFNf!n9Qt3|@%2i*SE(*|ngf0@m?RLyZaBa%R&fv7~x0ghgD@`>E)$N_= za3%1q@%Q4p37*nP+@j7cf4ul z1HUl%6yu5IM*(6AcQUSHjO#f3=mh-ey*-Nezy9lDjik%^yBdA1mOZJqHmM(&mP>r$ zu|DDP_L1l?)Y0l^tKJmi4t36j_lxagxvQyVh%#IA_4)P3w*8g(!FSHA5B+Duq0j~F zWI^UJXyuNQ*f^2XtdniGuHy~-6U53OmSY0`btdB{Zz1%)%0WD0^vodZ-C6DGk@PIm z9g0PZO`N7qY_k!I82;QQb}u)3dy^*!FZlHhIxp}HQ059B5MJ>!eBlZWJj+<|leAA{ z@OgNN4O!TUyb+lq`5yAkyiFFy1)g+XBk`n@HM#Km(Haj>I+zlS+6F+?RCtaCNa3JZc7^|%L7ST`0 zOOZYZZt>MD`}Q!t;v$hjzRXBupJOUMRMm?;E%>!|z{jM%j75XZ*8<(he)}WpJcK^t zVgF$V?>05q6Hs(Mtk=OOyYz>?ew?LlzsMLotUcjl0VQRQXEgLce}taxYD! z*4-p6w#NjkqScdlXcEgqbeIja-+54ekJEmyW7LjVgeUE#QS-+ z1{*PvKPy(X&_^EhCALGM5oqYlarOfeA7|OOA`<@vUaI&5(4|FHMx9{pWc&P^E;%jLIZf}?;{^2Ad5^)jZHw?r_B}l|Ky3dd z{z$A}GWJDZaIYQp|J8r5UCa3EQuP?2=LYXe{swgS{FG4=zdhso=v0wCD!$KkKWfKm zZ!Yt=LYF(HzqF~q zb7=n3#%uo4(jJWQmqz>~$5h>4ngf2S`%ANH{?h7*-6{Uk#J-VQiGF@Si&x$8wI_sc zW@QHO2Pb`#{{IOdp|(!lM_Zjuc{P_dk0WD*kG-~u@_Vu}-Mx{F$N?o2pS774I~HmF z`aIX;7n1!=tiL|fFQmk3T|j)0X7rK%J_;2_tdGL@Sgqo7RO;&f`lt{7O`F#7d!l5*3ai%f>&Z)Ew+^oT5f;9#Ot4S*ubkTMaQei0Kuy= zRmZEyLEHXa$(!+Sd;(vEAEW!6#Rl*Pc)O2#vO^8riSBT}fxDbf@bOQ7``396nSm_m z_=aMy3}Sn@k1u+T^W!Z8z1rT9@0Y2URvxK27J=XJdkVi3w<5>JDNPN~U8cheuK^Bw90MiZxPt+KuaA9s$a zEy=_?C{oaW&t)U?#$`1H@bTYc`1dE)XPu8gwdk1?%8 z8HFCXxXuC{UWIN1KY0!t=~(Pw`1Nf12s*CCCU_j$)by>P66ZztQ!jcB{-L#4nE~r^ zpBbm;2Ie4e(53ls{qSDw=fL`dc9ya^zX#haHb-Iw+YZ{w^WK?U!aDrwbIJRswCqrb z!C^uEDZvXh529aT<2|z*`SamM=;#9Q=-Wf1Y?WvKnB^-Nw7*WuejK<^Q)(I0EwL@6 zobY|gFSU&AE|B@By{Yu4lQzv5?USW{Vu#p2&TGp;KX?V*;2qjvUD~o#uZ*MYuf(RJ z++UEj$9L&G^u989=q`BfhmqMogPvsW#i#Kh%9#IGkpAWZ&Mj{IHFkA;cxDrO65UwN zF+ODPBQZ_{$K}j*9{h1Ju|C&ak(ZZqL6kb9-+ZB6v7EDomMGKY8*!Y&isP|RMh!5Y%gIt>b=A-mo zvD|q6Ffq2VLyv4wM%pXInF-c_rq#+h{iy&Ta!ve;CsY7@W1weQxt4Esnd)(_+SWmOYB}QQ8#Qz6#rm zXInOM{RS<@k)kwTYll|N7`l@$D)37sY%gG+uv*FVNICJZ>>S(_8~=JcusfFv?;73h zVjX?J@478_$v*FQakgyodNjWW=vk$bXQ7Oh`R1J0 zsxsetJnM7JxtuA<`L5VZpPUbhj4yAxiJBy%tJEF-RVAi*p@)_-o!#hOQm!|f&xw}Y5XQm#2BUbn^AJX9^#j@u;24+<_)>_TaR^{${2LJlzug7Qwp(@COU&+KVs^*wyY!gd#ByXU2#zn>*S3qfjct24F%zZj z0_=fbwCxmQent1FeRH@s0$&rErv1KWxc9Hv%ufraR-o7hVCV8O592t<8hyVavSsk{Y7JlP*+>)b(RrxB@U%B zs4V{)=$U)YT5Khut%6h2?qXllYh&$7eH*br>}gqP3)F<-&EPtL`wzFQOzW`|`%;Vb zWJjlFT}a$Uj}e#f9@;M_Z5B3e5BHumo=;rDOZ7!;LYo%q`kvT@9s^HONA7e?Ax{5irU?IIdo>= zOU|PMgR-y}kJIjRbTPlV#NuSE-DTPske%Q_`cueXIb(41$6mInkiYU_{N`^IXA;;p zwe-oVT5N^(Hu2j{8TmaG`=p!S3lv-NoUeG2pC6;l4q8`tf&NEM=ffx z(6bF1nm5C?(@mP4|8r99JC#($+cML>Gl_5A=2bnmvdKLXfABu=QA>>a-J9Q>Uw0Y0 z#v~=ImT@j$wrReSqI%gtaEAnapLp8X*R*~}TLFtLQ5y^Wnyij3-o*NQp0OQ;?#I(6 zXZV^9ZsGqCv>|-xdFlx+a=6!LTRDDbkoMb1Ji5Masg-l62snO#IP?h>Px#4=klm! zTz7%ChZ^heAn!Tm`+Ll3?EFm$5jVCEC!_s>lY`gjIC%s(GPgPKE8EI(|4(pY!qoJA z#wj|s>@y@jR409Hv5bq0`LxIwlhixMnB1%47s!8@aXo5|D~1-|?T%O=3izkIb=6 zRlEv(>J#Qj?t5r+$(Uo;oaa5}l(mWeu%ej0g0H@{cROoO#v#01&Lj1;m++~z_cZYT zZ`PjlL)r>h)I^WQOZ2rTcQhX1cL2K@I`7Y7bzT*I(7Q1QzIFrr?Rxm!b==(`R%x~# zt2A~UWL#n$5GPDq2Ux*Yc#L&WMjw7b-(?;BjxV;-xL6#i{bF#e-GGf|K}-w|Xiz~` zn)(CvHMt`8np*5Nu`x8nu8xlK?_+3iw<#`$M(-Dkp^+&zVu_)F{v~l9`j18K`vvIN zF=R+WnQjZVXD$q<6C`ryd41uOeG(}yNMK(CO#B>cG9Co)E1-bJp7Uh(wl z3&b;zj}wwGi8n*s3l!aT2|VDG)$VVEu8*>pu7!6VwJR_0RV#NiAQP%miGhKAC>`7G zgRC2|Qy_P5wxx*Njfb1a{H{_Hi>F}s$YF1Q%xIcb;_T^PPR6P#)jNKt;Z@>djvy}P zNa$}AbT}G%90Oe@A#d4RV^=ypS;3a3$HlaX?P+8-ya0WNxR`TNlC-#($;flLKPI|{ z=iAe?*qHNONnXcI#O8sAN?c62o4OsH%VX8!V*Uwwweuci!fKtjhryS`wv#!U%bh2O z_0HEl=*OdtxQY*M*6lF?%jh_}>G`X5nwL7Wwx~xOQ}wu*WvmUc!RNCUfl|a z$T59>M*F+Sm-&&lBNk#a(r+{NrTo_A-8g?I9}>k=Wv7JObB@efe$R!gpB*7S@54I+xZvXSHO9=O4-rFZi7`yr6}@ zLsnujPU@~5Dlz8u*s&5ne6H&nCUSxPOz^+JT_X*Cn7gj%@MFPe z8~M+*Wrv@qE$(%`nnt@9zldUTlKO4E4s=j7dX?4WjUAl;&!eFgq|-Ccis zxGZ1zYV%XmUg#WNcCGy1`R)VS9p4Y%d*CMdE$ii|7AtwIpZ#tAgc-dxI|p$te5=yC z>*q?-uDg}6=m*D`$BVVZFy}6br%`DN9>7m6@?^n4b$t%^-6}qbj3_2nQ=UDs(xpy* z_v0;UxN~^(wSj3iuX5%)^MkiZjNT;woPo+$HxE}<9DQ0@FZ-ZQ#wq(2({>t}$A0da zENF9V_a@fIH|bySx3rb$z}_>&yXF4gnu?!rCd~g`dpI{lFP;1(bbd_f31Zvqxe+>- zyU5mx`8{{4;q$B1Z~*=weGy$X^5MV|IUkep2>+}&$e2zn&TN?q?XIF-^FtRpOsfCHG4-pM3%D7(UN_$h}1JW6cvqr}8MN=)pQ zgzP2SlY4q?lX~$KXP_x=R9-5lXmU=F!KcVtq&qA0+tDJe0H8e z_tL(!-GY6;#b`e;?9%PqhAXo-(7uheAahjOPc7jtO<s5HB%)w`!W7naJg$-IvHfZq< zXt5KTaaiNyls{*{i#>b)D80Al1$feVWQ*`LbN*aHbCegp`21f6&ppYQ-25NpD|hUn z&HUwjXDQ90v*7q+aQ-~={~UbHHwxZ$g|a?pl(NF#tnl;OL=G6hq zL;dF%%Xb+|EZ_PuWn%f(Zut%0`pxdsk@qKelhsdubM&kbK&edp#DFS9$ zU`yHknek<>ld@k^yt1!BzPoh;<#RoK;Nq>j|JyP_!9}sbbAU3XMauf! z=#DiPBeNgjZwUEkSwpmcv)m^r{SRe5Yifn`Pi*?7>y-5c{1#XeJ2loe-m~gA+FgK% z)xvq^0Pryoyr3^`#I}7v^_uZI9X50``tTgON&mf7UBA86HQ-M8_O1l>h~UAdPP%IH zEzB=xG!NhH3KI-_-y=VFH66WMSw98boE3ZDRL-%Kv6ZhuoATBv5>b2+?q10`eGBtH zZv7M6WRKzmcEFPL^RrV`Unh1JsblJRuaOtAD4A~boK9?cor%~DhjWfbJaQ9W;2Ndr z*cRC%=w~XWynXO6rgH3M@=j9utaD}T81@Oscqu36%8X6k(mBwfWS`;YD?0mR=3d4; z`C8(tGqx1^FYm9;&Y|Cw3w)yQM?(CT@}pzQ%l<;rh8bxl-U`k}R%rJ+7?%hA#LZnY z;=eR`(BT5Ky9*s|7dqT7bhusUa2|9xn?;#Dk+!j&<@av9fjPaNxxEfr$DU`^_X{3F zzuSd=7h6|k>URy9nqbIqAAI2ApAva`1)<#XEo+`Fz@Y)6hI zDn5}|lu5!^-eWrfpLW;eCcNA6Pj<&0<*wxdN8a(*d$^op%6j^*{= zwb-a^L+zEacSA0&2yk{HbvG)m}KT7Osp$q9lN`d}% zal$~_XHQUj4>~t*Ib@Hk^3Fo)$+(jXu@4?SkcrN`KA67D*G>90FIVoceaW@$j+Z?< zUfVLFNA?e8>>pMQwwyUZpK57ae;=*i-+y*VWhKPl-cJV)b~PV zTgv9gwt0=wri`}--MrKK?Y^;gf>-_Rh(6YxF=Gw9v?y+@LRW!l*Ade-xiXMWyxdg9 zEAf2ar2i%yc~4OR40#6|M9=R^tk8T$x8Y|&{7({dw@31lse2w6zhd6XS&OZ_Ev;C^ zopkQ}RAL*D`6_z3V#g}ZGxv{$hOk*kTF%Qa?0~+S62V70_`p|0p6#oy*AL>0sZ#TG zo0unWeH=;3YzJ@yW0Y4Nz)b*dI&h)G_4j~>^snQGowVuDkEB&X6B1{5^=R%2NILD6 zl3oVx(sg=XznVKpo}_tmgV-|alDLnqx-!!hSEkrfcaKz7us`;u4&W?xpl*v5{Y(1L zGD7cz9k@>VumHF+|1#cO(qz1n_MnWHJ21!KIa+_|gY>O{KFIi`%$=?r=f4l zo0V+lIfeb{E3*HZi0u;o=J+o0l;Lk~_?z(8Qutdb{LK!`H|_E^VxoUG^=grQd8D}w zK2{1Jb6W;_0}+ewL(X6w&E%r|Mlif!ArsnmoE#PI#|~-zuW`%KKL|i->Tn-koVHZ24`jd-O*bUp&t?Y zAaUc_Z|)0lHyS>)PwqtCi+r%Vp3b+cc4Ad4-NFy-n>I8#kuM3Gu4)oKrKl6aR>t_x zZ}-;RM}PYFL*_P(e%uSayjQcxr%rG+{Wo^;X-^cqQVUN11^w_gZ+j;=H|-6t07rO+ zynPY==c3(4(u7B>vYPiill?x@Jn+-W@Y4bC*IMS;$3EL>@ZemHx7xe2{uo)|rH}W( zv)-nT%uDQi=ETe={Hkw0MQ8f|YwnWihdFn9_qa{+t28_U?@XIf8uFC!0KAsr^@7jLw zd|Qc|G~`Dc>|a%0_hBa}$4&s;Ch3z6I0End`-^<{@Lo~iy$HQ@#=uJh-d~yfz_eus z4;Op!ME{MPsT4aIxB`g{~Mj>HrAQES(bk0Yzy#N zk;)p0Y_WarTdQBvek5CbaSTR}x1r1}PRSKcPJuV5;Bygqhv9Q*)#TQjn- z$Rxrv;cY*8a;JgIf zPlE=E(dDM%w>I((+{!bN)p7VzQn8PxaX+2sGI)Lj?~Q~;t{|>Fj@Twc=FACLGG}@y z)rY=#9r`i*r0A47efoRo^T?xy%-*%^^RzXOHs{jz!}KwSF+7Am`QT&pUH1j@pGMGy z&v4E!8^4Ii`AO#@dY+$U=nGXhqqEV6y`*S#(6ieE?^|le1S7UrV3cnYY#7AL*|g-sD?7d*?LHjJ3xp;Y^pZ zxF}nhUB*7@bIuM4-}0??9w@|4*!-2Qdb#G}T7Y-7rn#rR_nmjsM zzx6tMpMTO!`pJYxe-c~-zbAm{a^SPmIWrXE?6+_A%#g@*E^IvF({hPicXw`WnmQ+( z=_I~rE_2ympv$Gn$Jo!=HcNzow;9a5*VZ8Cn0K=J}bI=pxgRQHlS^%N-P7o8wZ#LKDdS+h>0fIT(5uefRun`yr^YynS@*ps25LhQ z-Bw@5`dSE#4gEPiF#bTKz6Cv2;z~zioAPj9&da-Y5EtUAd>%RNgxFL2%G+k;?hw1k z%j9LW+C2=c70vKNKm&S=h^8ttkvrox7vp0Lt(=9GF=yk4c!NDmAM+QL#Vhfl%k?iX z<|=G0!q4Vovl5=UOZ*9QMU?ys-LUicP0H9yf%5P zoJm=L>^GfjZTyxv-!lA(5@&F0oU%dug!=yscN4krHXL>+6|yd(VN-lIZX&TaVfNo0 zGH#)pMLZ9VCdzz{ne(yWEV=f+a_trYe!Kv4F^P_)Ao`hUA@g~XnA?-}N zj@Yn zZu?-}!F8{^+PRLL9dGsju=BZ}JmY^$9klO5)wOT!%F@tB>((Cp!E@U>fAGp%o!1Zi zuyd_CWZyP*aNrenU|=A6H!!H3e&h?GS7>J~?X0Dp_Y=tXt`6LH*Zk5@4egYWm*Luh z&jGK2`^w1$g8eH{qbNHJm<`uqb`BX?=s|s`peboZSG~UOM4v-n(dHNB6+5+wPeAwZ z;jzDg4kQ+(V14z>qtLvZgBW*1m*j3-%)ZH5Ne^V(?p~Jze}Fa<2-Q$+wCh@9#OMD{B+G&5TTVikCjyiKRHc z$TK{hR`UBp_>jaB%5TnOY>?P^`CV?dyAnQr5A7c3x?MJX!L&zf z17gdiIjqlH%`rY;wsnNIZZ*#mOCQ@cBW6r~Z+0mg4w!x4ny74$I5D|Cm3MW1Gt7|5 zDuV85HKIeW)DT)5$4d2{Yn2A!wT|F`L*n=hd^317Na)&sw1@on&>jIqIm ziq4nD=xar(yNhp&M~Xd2#@vtgWZ#uMjhZ8CqS(h-Bb=qrnc9Rt zKf%M=E{AqUvrnilxBt+Ip7;IW0(*>#ybY@?A3|?=<_9LsSKr`&zQ=d0HR-ogDBfa$jHc3jW4lu8wqX44Qp) zv_Z2K%-f>7+oQ&r1MOOVDDi*A(nrDn1o4eWs>r`ET?vbB`dN&5RE#mNWX#rBUt9e^ z$me|UvgmJ!MSr{_vA!1lm2=&+Jw6I4uhCYs8U+jMXyY7?7jzOohE_uFE zcX}auDg3^=mbkmsYj5*qVK4ZaT6yNM%io4bot zL`=#8IU5KMvi8*Sqe|V_Mq&WSpIWTeG&tF7|Hz&wFBbM)^A?IdJGsLR?>H|o zFWsFN{0Lubkvm+^|K;%AAMzd9qysx)PhRF%P8;?a z;eF^-x;~Vqri^_FnGT=DcHz4ob?8Ll)i=T$&tl^WW|Dt(SZI>i!Ge5G8m?@&_HtiB z@g(udzjy^}VifohKQa6A5sxyPxKH#r_VmqP(QUh;S8U+il+eZ+#v#vI@dQEmpU`=H zBr80u{ATcW_&x%_;!5wcDzo76n(^JIhC1{ zPDduZ!EbAB#n=t3xryy8*4!-HnZW#Wz#QLLlFyuHGxN0fCG&jUv_Yks`ubT@UpLN7 z^~9Q=z0R0tvmOgetQWX@fO!heLm$OWT*o-u@C$vwbGrTXL+DoG7WhBu!^jS64mq|t z6l<~P;L(%`_t4&@@Dnn}ZI9iICCltJ$8En-?RigpA) zeT_|nXT=(un|(N1&e~#*t*^EOKe5`1HP3IxnCFMtU3xC1w~t1~i#-1;=BNSxR%q~( zVrOMvZZ+xVFm!X+7F#RhdiOHnUv7@;uq}qVm2o}z`?yJwA|>xgk;Jhi21>}Crc5<- zbB!7;hQ5;8t>Y!N_v5Svx*~F6d^NU9_(93AO&PhB$$AboT^BTY(aOn&?Dg0wcq;MV zOX0l@=FE3$R(xfs>_NhlG-8}G=isM6w;*OAsnszhPsS_u$$b2Va>is| zVNUJb{{8dfu^H_H7OnT>)Vlom4bX>VEv#owGLQS{)81w)wn+4^p=+M@JZ0%CGLIm+ zeFP^G=hF&aerw{y%bGY0UpNe3=mm4u#DBtP`Uz3DmtwNpIkLrhnvJr z#p=W3irt6TrA)ANjw`LUiR%?Rj_8O9Gwx`bPX1e0Hu(b_?(kjYsaZ>&nhJEYwd9Ez zM8269$86j9OYW6&&RtDcX8#b_%mt?QacJPeqQPSLSgiSaOdmjBG^m|4VsO(|jMR0R zv}5Fb=iF&9>deC4&l!Bn-oMwj1}+8fdoQsDw2xx+FEPeZ{rk=S`fpTd|(oT`qz&0G21w;wegEA_Cwn`tNBx8wjCc9?F-M1 zMGHB|-y@lGUt{yZ-!6jTk&AJ0r#ZGDzQS}p7B4+8m1{Z9f*+k7?eN1Z@H?d9D>QPN z9O7O+_HE%8Eer5%0yjAmvH9Wi^{uSky`wnqmYu$_uXwvw?qjzY);bD5G_sU-*w_=V zqTSQvhm!M>*VEo%&Ps|cr5RrHXU=)6#6oV7np+n19U zN_>A}cM*9<_?i#C=C#Q@#Y1DuJf5M-gy*0=;b{u?#Mk-VL*Ge(Ia%af&T{8zs&bQ- zMQo(Wi@uLP;&SKK!q}qc)|Pbnb8BHucArnx*IiQJX2umPVq7;crUwfB0`I?eOpWA^ zyIgq38q;9Ll&mRXS#KhjxBR?!++{1g*UMfbahI~*HnG=iVz1f6o@T{e%6j{(^z!m+ zd)t!4e&#dfidgdKc2lm1_KA9<;&W%2?Ka!w(H~`!V+h&{5+5zJ=RuzHnew*GwK;~o zZO!$w7}Iqz#k3OfLpA@`HKD9#DkoEhE zw%^b4TVk9qe8s`LPTIIQ)>*^XpIJ^mXL!Grw?pxZJf0f8lNr4eulNb}P&=m3j=OJb{0B0|2ycmC`cQH9P@Mjj|&lEV9 z;?FF_pIM5(vJ`)1G5$({b0Yl}oBqvG{G0asUiQJc_%lbYNY?lBZ?m7vKIn;xH~IBA zgPuwy-UPe7$X?@tvCLxxAMpu$oEf@)c+T07ao)KxL*{TCJE4rfTmR%d_&)5&`)%>; z%i@$_r-3oD$BZicM(5c>C7%6Zu1lV0YIBX}_l)=XEp7hVY*S!Y!#=x4_Fm-3qs}$- zS<_IQrp(a#dlNIWiJ^kGCS3cKuZqkm^5CNK9{cr?CgaS)jnEH#YJ$gM@VYt7McrxK z8_T^mU})USyh+Yn?0VfeKQWqn_@T{vSKc(8dn?U*Be^H%S2mjX@_;Yjdgu{+c})Lv z75J(${m&i~KULtzBXL5Ukv*S)t%NxSZzc{AoK}HT4>%Q^EeB_5CeB0#6P$USX}x^C z9usF(;7o834XY!-s?CY*SaK6Aq(+Uz+F*Y@@`yX{Csz_{L;n%_Zo}{1DDj74qviY4 z#2*S>{FIthQy7aLv!}!OikoOBPjI=w#HHjcZ9zV<)3=X3?6acpXjploV0A?d`d$tU zWIS_Zt-;eii?Pg!fw!Efv78%w@mib1ScuJ_CYrISlkhn5AtE}m=bTaw9F3$Z#T-sQ-BQ(i-RP27qoLj=CXUoS- z{rFxvQ^`wzNKs?A#Tp{7N3x#lx#n@~LvuVKZOpC)4|&DCql}!y#a)pdGN!)TIGS&q z`CLfiV^~o9rBe!Yfej0U06MmVBwreEsWbfP>(!SIr z6WOY-TzN8=r>QL@`mMyB`p})l*HFzKpTpks-zm>bFxSK;UG6Z>ifp(79<>l#2Ks@> zu&LA{Xc@?PXLwmq8LIof#m^1jH~p|lhF!zlPv=uF=1JxjM6PWm-ew7V$#`_OrR*oo z=xwWzT~C(^pHk{ll;qlEY=lyiFamED-Wc3n6mI{Sk*{DhIBPq41+=QvefJ?~q?GeL zPGTA1odXzq3h(*Qe^PY)CnXTawN`BVr?)D8+7CDiy#TkaG&VJvv+E5Bf*s6VllLx`;$id;1e8ErVSS|T>}Bw}y-cgIzF zN}n;#@=*Wo*VIT7-x@Kvb*V+^<{{*$#(v7g z;3hLJsKOoI%s5YC-w)PuE{gemlArMfb6)aJRd+6~#K1>fmwHy2$QRQdex^J2_(-<+ z8D}$>SDL!ojveBOLi6+PH_7obC>t@ zGjZ65gl130_0a4BbgR?(#Oxk3X!bNY-26g&Hrd~&pL1~fPhX>iZw5an@xwHuhgi07 z!MA0fPBZP(mWtR`>HBj1or&grg%4SL0DEc^JTy;YqECK}-&Mp%Gp6DnROz-(4ZpAC^@C>X zB&V*xMf7YrPwfQ$o)s7BPeO<5fjfF_-)BX5@1E)l^;HA#`;gm0+8NBdLC)uN$HnPq zw>C6LJe=5@en-7A;YE5}wUat7Irs_on6h3fvfj_#d4+-tU;{6$6yHJ1AHhW;xZwNK z$a*>8t%Uw4OtN7NzwP{PKi71AH@cs{4OrOu-GjhX_??AAeXOj}oxIbwde3dul1ml; z80RK+e)ns9c1z*guaS$zCwX6pS#Lw8@W5Y@E5n`vN|(N_k!LL)P&0zOFNJc3EGbY( zyPhp-osYaP=dlMh7GBA9N8ML5lnrP3FLJ;W1C#^r_OG6K^Hs}q-9gqL_Po5{_3VY5 zTMAwe{|8r+L(Vm*usc>S8@OxziH#yw&w>9{UjtPa>UXTdr#XPU)7VPeM^E8=ht{!D zcse+gyjo)4dWYwvZtM$;FX9xwI2gV-1l} z@Py#a#-5mL#@;}?Cv=-Y)cM{2xW?G);|-`X$t zX7%is^V!R~md%33XF}^Up!xgQFYjf)yyr32g1%qcd3rB+!Z$CMe{VI%Y?mV)BYJE_ z*0??7BKstn;9J)GnXAlEMb_HO`u!)aOOEg8{N94A?|xv?B{Wrle5w16B-RSu#!g2b z8y)%Z8(DIiecrA3jl@=Af6q>Tl8ZXld&S6!pX0swpA_onIumB-{x;5=u4!0Bt@=gC zzokmNe&%vM@=pR{Z!ydlJvREzcoGmV;lxw`D)GY(l@}ezZ+!GIO`vgtAPVP!OHz z)5r-DgXpIH7Urdo!R-H(vqtxwN!(j9ZEivb+rqwns#^z3svVD=PWEC;_L8|KQENy0 z$?iO_BQ$ZC&>V7rrDtuU50N`gb?aG4X3mGCKpyxNxx?Ou$R5=Wclc%AYa@@WE_XUM)T=!%$r7>>}$l6N7I4C4%*j-2b*8_esBqW>$9YnMVCo0&%mo9t0hn1fXB`} zNS&j5$w$46c%~vpk9=`*g{gOq3_Zzw4f%pIf5;c7<4pPD{j27YC#>tpXhRRbz~1-- zaB3C38a#dtUa{jA%s_+X`;RC5c_pr`b2hp*|g2TOqcQ&=&d1sTf ziL6v{6x(;}l|^B>P8L(W{+VXx1J}p{o`b!kn7wUCy0W2_XJkKXEgaBvGLC&H5k5t` zSJSS?)Pt(faqa!edR%guK9J!ie5-U@1o6`Bbv`*txwuJRI?3v#~!yjm+Mkf zLiEYKe_GM4Pwrx`=vp=n92K)aPGw)ble2+_|KiZmw7MK*(nZj10?t5rURg-GB>NN66wd#$t zkHtqVTFD5#PmOvHwl(k>4%7bA1D`wCufh6%za(sX-FT3{=1&~$&aksC(dvEmJ{ z2Pc{MqXj;%)4$MmdzDc;CX;e_m3!tC)In-!Zl}SFSN4+_|jx%mNt8NqdSY)_Do>6(G zjhZRtTki4;ACep*J|tE%Z+ViiR;mN~UECwmSM@%{EoF7O`%ANddC?*lh4u-@c(UuT5$iH3dR%fLsj zXlmxpFdNZ>n#mEoMqSfDe&D(aU?(|( z_Yqr)KP~Jzs>}K_JV%wLqqMc?x9%p%^IsT>%$9ryi+-;(sg=1QiPN$3-kkqtpX)&8 zDlv(6*|H+7M^3TiUf159b)5~Jep1czVYC}@#^HZV!T*?wY@7xUNQW0>z^_N34-V+& z*Ev74)?j)l-)-d2()FOv8^48y`-7u1oMRB2N!@7;8jw9Xg=;zdPF6E-I?Nh+!{NoR zKrH>*yFy9TbBliXqJ{r~<%>d_ct`Y#5^!Eco@vPiB6di5UdE%S@wL<*2vmS?X)}j5i|@{< z6BPaC}C*ug{M&9oSxV{o}3Uri^_*e1oxuoX$G2lWoU-ZP|8@Vm~=eJvYhu zn2POnX@WB0^Ym$_pJE&RNZe>FSz7j$oJ+kowU76vDifv>BPuby_;e2HHlv%B@TFvC z_R`Ci8OyISXwgedozP|pZ;-vlh^aH|>i?F?`o?eW zBaf}zt9h-5UUz+2=yfi%_b@a#2U>gxntTvl?E(Bjak`!+b{{L}*4>Ol>;$K`_V>4q zQ^>j1W9R-2_ry+cZXERhq?Q}D72oQ#Q1IlGkajXIB((Q1xIB*Dbofqq66fF!U&+}B za9+z??3)skl4JU(^Xs8e+kKInZt3GZbaB*q=r>0jG^yvT zU@RgxjfU35rzyTo@fDMIvpa@Be47II-?SU++3weLosJCMXFH1~{!8&|&N*fHFC~s$ ze4P@bW%qST3_)-@`w4qdEcp3|1GV5M=S_4x5?heX*&E@FoL^`n2VM8|%^BwP=}Hs6 zqa?2hr%k}gi`^oKE%*ZKT7UNtc4uI>2|vEqJWIZ@57>F4V7JtS-6n~@fM?0sNx|pe zFF{Y+YUatf8GG*c3i5uLf}Td75&COmZ3%t0Xv%h3UtYe2e>#z;?fHXNQ!~I7>OT_0mkFUOcu*Fvx9}^u3zo4a z#HJy!8XjU*FF#hJzyG`q$5pmi4Ot%&tKlLJ=#%mVTo`4n4~f^X3Q{hy2n4||jNsI;!)L!z^Z zY^YxwZC*py7he1X{Z7M2LM<~rrwFmM( zkQm$`_KqO-4%t^C*gGQFJ7iz6!^*JzkUQ1JL;g}*AxW>gbr8|#PGKXEI-#=HuAB`Y zIlSA@@5Ys&-)-p04aJ^v0NRuLy8msKJNzzfMf06WQS@GYg~4;khp6-5J>-(H^WY-+ zAA<+~9vB2odWm3*IXA?BVK^229b82?mlZT~?wvC2G8ggBD1XE&=G-H?T_#nRFA^B9 z;3Nz_Bres$hr~--_^|h9$vgJ3+3yQKzufV7V~podNj>_Y&`!s=LjU{7b05A@&Y*K}K>PhuuLYk}!td|l{>@W%x)x5^*?v}O(h{BA9~21w7x$}_ zFf}pre0%QS={kAG&h|LYDkb*Yc@&*#YrW&Z>SLnMn{gA5xWluUlkc_rC+BR6j2HPj z_?^ghJ?{J_Blfo#82ilFUoZB~W@x?{nisv!XX3jVeEY;FUls2^J=e^4#QD%>@E$7$ z;ginDU1!3%8N3V4pB@d(^AVbVa~3pRWzziji=p`!dT5??kp#_q7`wzRP){WkS&tvz z>By73p{Ev7?;D+XZydF*vahc5eJyQrmfCWAvomdS+v;!J=5vCVeE1^mNZp3kZv(5y zF7h~pCP|-O`6kauD|V8$xyWXJD$IKX{70B`g!xJ>*|rV%+8;#k;alXg)yV($_2ZQP zG1kw^7l?(Olzvufs?ui^KG0~Ii z@`+8#2ps0xH)t!w_a$@d7R4VB|CQ)FT9IXA$O!3i&!(O&`JTmgf7;P=X5E3Dc_!1D z*S5zvv;HDyUnIXupL(_}zGD;gu*u9x>_rceS}&2R1V3>K;fegV^HBS5EpSZ~nyp^_FZMyIp{_8&TtqJ}%XuzI-It3aQUHA-e zlC#)H_)g<_*=t_@qmjoamOlGJv_5-nk3Rb)^bt8X@6Cem6*(%;tk%~$_QZbr+GzV8 zXT2px(K=(DS=19pLjT9FpH8mQng+>J8sr^0^Zk!wSWiWFSPQ;LuK`2z8$=V`ghb|k+bG$pLhEs1I)VB^kvyz zrO(Dpb;COf`^+`>ndi)nb3#88cs}bU1|zjL_^dh4L!2{b>~ij0KWCn(R{ES7PX+f# z95d~XCO_I%wQj4^ao|0tT6f#l3-#9gk0jjZ4{~4nTqyHgSj1jPJRy9K_zBiIwge@m z!;Q^hF@3Y=haU(93FHYVQkr%# z_qJ=4u)seRpUYVjepwqT=CwVmbjjEycaz#5U(3~Qe*jz`7Z`kRAbCrv)6IO2G0)e> z5hrvjv9CkPPgkVZ=a~@VUiw0%E(6`msX)BAiBz{FxOGB=8?mm5pKHw*@OIPu^ z16Yx3V=^}8NfGv@$Y^Jt^wql^@_v!Ki@4LpAJ7S3UrIVVpa zcIHh)|CJm!JItJu`S?=v6O5dbVqZ$pb55oNb^sso@15Q{*x&ZG-f=|*;7I&?I-h2r zYfmU>3Jxj2cC1xME+WBoJ2*Yg^=V@=HY)ITsR23GMQu=O@?>^VzX$uXvU1mL@yT3I z=K9K&w`H!NmeC+3OkTNgPCB^ZeepL16Oi=|vsc2yWY6^Rt=IGc{tsI4?`Oc@kDcO5 zc$nnUeTF&c&r3`SwDz>%9NdYGewz87=i|tZ;XIEY0eKD^e|82wo;2OZBQ;Ztp-Ey! z@bUDkYa(xr#33Y6XH@?0;J>`1;m>)K>!5_Vg4cq zD{4OZeUbv#FqR`SKWHRpxo>0UF7?DQ`jWM<^5T+^20s zm#9mbuh;EOlGw5$nQJL^PT3pDQCGK#oGmTbx-6N;8&7<(L+T_E^98;)eXRI55z|#o z?N~2)rHEq*w-67u=^2NA(_q$o25WPK5zN&WRVlU#@T#)GqYcyPM_~_YJ5c^ zljL48wOSij%?l0YS$Rgqel2ZoB0f|hwpifz2>0%f+N%tCf2jXdt!Wp!+3^?0BYt#c zl)Mj33tfxs^Yt0-iI!JO>RiLZ1+3b3NF0EkXLGvS01?|@7CeSD~vP3 z7zO7F^ea5K)oIB4Mc~dZuk7hVUJ=?Q2j#)n-;V59ivDQ%1ol$5u(wYjxkJHjWA_P& zy=FXP-Ol(d`6bv)t##sKgPE*1zAfKM?#Ii!eWvQ05qN0)CG-DEO`7k>ylE%c#ri`| zYv8}tp8tGLd*Y``VCOX12a}_d;zT z%qYLi{C(8l8&1qn`W;P+Y~ee z+z&W|D`#@u^u3Am2`%c~=hkxm;m`2a3U!EHkKKCrBj-?RH}XEbsQY|qHZ|>w9CZtv zoW;afbw4~#&O1x~1+n?JB9}=11t0Ws7`@~$dPy=kvv|)@_H%ERAuF{nbg(zT+imj1 zF<|?i>=}p2eFYr8Ne;Rp;N|oDZZ+|K7`(Tdct2yq`(f~Y7+JQ})Ma@|ijaRRk zY|H;~g@O0O;Qf@jwk6jxJgB?g`fh#?V;oZRv9DT>BD=@EXT)zjiSIUYQ%?>wmFr@E z6aSFJmAAp`?B~RT*pDsP729AZYnm9wgF*5U%6s!UZzO)U*{&WxTOqhSZTjIQ=J(gc ztlh_Z7sJcpKLa11#2Ba#Q5g-d%guZ7eH*+apF}IUi>A@03p*xb-+Cd^VA|_0BFvm3ox+9BIq)Wc{Ze)Ip{Kt8Cz?Ql^zJ5MWzw_ox&d`vS%GSy2OUQwrXUZ zHFg(moI1wY80I3+NPO`X4LR`hw@->-%Y ze?RlTmwt4AKi9e$uY0EI;J7prMt*! z3l3%O4u02+L|&(jCy{YH@#Ov>UySzobn2j|1delUtcrY3ZTRExi?PUdS=8{3P#Zo% zZTRD(kcn)y;i+$~*M^S}cb*N+TW#hbM@XIPbh8e6-e~ni8EeU?P1jQ!-T_>Mmp?(j z&BV=$uQD5$NjqU+=s~`fw(;K~L!<^C;Q1$zfhwtsE@PK>8fBklUfLX&UiW(kvYc-f zKKkm+kmwyU-^;7{ozzjl9Hd=MO-t~^OC9DZ;NT8$Q3Os3@$VM+!IAFUJ$3KDfg2Uv zP}ggo5i63LBe5bYb-T(eWP8^pj~7rD&qjxo_}~RMZNRinjv08r$Rt_+T-`8} zxm9By=~`AmpS$%OX!2~3Gi#fut*T^idf0yNQnVs8lBuX2ZqEFQE#fSnGXn}jQsbBN zKeLwtUzw-uyHXoD#l#bO>4`RAt>^zfVe;OUUkJSf3YN1V`q`DY zVK>h^{0()oUi+AN=O80XJbaeZ9iB@&rI@(x2sq>WsF4`b2q6&-A!YcUOB5|7`eXtsR>f!=P2~O2KsJy=8nx( zTnXgJot{6apn|xt+3d^uyh_AZ!+7$gjy<$l!}D@(wuUuT&RAXWX>#kcwlvmO56{Sc zDsz_TTUperS;KSQ;lwucJ&s&SkB*sEl3?aaDxPs|?GXCPSBG>|qXSAVm>bv|KScJ7 z&}Sn!Z$|cPQ|}=cjJIJJx$wN|(1d~Hg0bHHGdW=N_s>Oi*;Ag^)O8L2F6-vOpTNi9 zkINk71XT36tYl!31kcOqr;wMRIH0-s4C9ks9B_5rcZmGhVZ-zEh zX;MB|en_QG=Xc&bPkkVANyA&Vqjz!sD_qX}{OmXQwhokh;O>-h1G~EEA5=ZB4TT1X zjr}09^7#*)3B=iDhSv3WgnsE#PHaX;1sA&9>>BE-Jk8QZ>05k>-NS|4bY3GIxU)P}aQ-*z@DhWSO2Y0{M#y%qNmQh<=bdAj)EHx(1_&kPp?+$URM-slIg_y8u>VvvtIKn zCQeSlhL%~S`ZF?>rl+7KFKbKWJ1=WX<|F*~hW7LIy~YJ?OZ_H^xsoxlZ+F*o$_9qU z9@jgU>A6RYT&ACvKkV*%0M7NpghF6n=ah{?GxXD1kH}}e{!*=Lcnco!7`E!S`eTcQ zjzxB@fIn2J13Mbw50A0tUxh#DHU-wuS6FAS!W(`$=I)aJP+jpK&bXnrg=gU3dJOU~ z)|k+AW{alIU2e!^NiHs}qbG*p@W8$#^Cv{K}_?#D+Ff~f&xhtJ~XFM-5 zq{zL+qVvp2MdwKl?Bkl~JT2%v`)oSTVRW9u>@WL9aSqs~^So{9Jcmu4$7<6jvNSqR zswp#|8=VkY+Q?(tqx1Zfu~<6KSyRW6w#s-`;@hvtP&X`vCp`%-YGJK-9j^T;=z5dT z^`1o6dk0-FRdl_mIVRGs@Kj5-*XMYD8gonv1k5=$GRJ^zj?K)mnK=fSW20@3JIpzX zoG*Hv)uuJaG;@w==yftjpMDm;XO3&lIm&x)Ka;xAnx|cFIL0$e%z0|e^LzkO0^okT@X%~7$E_%hj0VxS`4g`Cz zt|tVYIb*T+C1|Q^dgh=9D%z17a^RIxpP4;tSPlC_rm0t$vd;{WeMJ7sEyJF174i|j zY4^m@^RhIhOXNPgd?#o7TA$9GCGuGip7jFni7a-xGQG%b3TLWCUi0rdUtbL$z`wcu z?c>W1Z6VK|C4=3D4kI>R@qvqsxrt|Mko&6XPhuc8!Mo(Y$o`@il(gIA|90g6B;zeN!A|kK&~_Pc!5Pcdt}L&Vm)8c-RzTU(xBRFfn%oZtEVHg#b0wB z`(y_)fXLpWdz2y{=PPGWfN2k#pG1mWrEN(XY8a|`{BL+5#vIZsO-SbO(M zrS3yymXDkR1Fx!AhF)dPADKFj$OQA@+2Y@8cg6>blQmuU8H-+oKUkMTt_SNcb$$TxKkKmP;ZA_15^3e2WM4_{UX#*a-NTzfaN zMW#BaV?K0P%wAiJAGD3WwY5s9*rjxdEbqbgwG!LiC~)qBPDPIKL8sOD;=K!`cD>f2 zAdB3erf&E>u(Et5%_iJ80mnx50sJIubU(>H{3Q3Ft6KewpG3wWZHVq7&zGoc8ze`u zJ?~cg=r{G+^`X)qWA}^3g<)5#1~vvx)WBx!YWd{M7aRn!b1D4`LNZp_&&M*>mB590 z6I+%PXnD@CjlIBsv5n;dQ@Q@@=Xz{kM`_2heTf}R>_YRgxru%uxptkbgAu8QEu_^U z_A+qBJIS;~yW`8~Z%gNjLwC@(lgmvpL8t$GKO*?=sFAW%F^U z>i!#fR_q~%9hUvBCpVc@?_TUNR(qS#C&gaYm)<8e0W7`mAKyD)FE(f|ecJWDMr?i9 z26i;wNnRM{xuwO_<TR!VPq*9X67O`+y3^wWaPaGzx} zB(~!)HbZb6-nRgM5WbGLS>w6z50ObXBl}tMtrz(=)y#u&c9mf#j?U3P7`nWaZ2Kkd zTe9uXfys-=w)Sx^VBB^YR>tkq$DI^tWZVJUxSJVwGvf}JY`dISnB*t#Zs~b{MYU>$)Y#!l+UyH9s3CKR3YTKue4@lMb z=`Gk^<-DOEyMyf2!S22K!3r1m_A+)m4-_6-EpoPDr43+e)bt{<8EMX?d4PTeLY3D%gDa&=XoD{KqLLa|C2-~k^j=B z3!m1w`RV~V`&KnyJ+T#@xM)e;!F^NS=#2mTuR0eU_(SKS75g@pblq`?J*V!`KU6&* z&wFds_>IQCKQQnS?_$defABda{LwB)o!I|Yq6f=f*aBQ#in0Gs|9XE^Q^Wozym2gh zSqU~%fyH!c_er}?r~y3(c0ReU+sI*L?L(H&USt5-TMVE5i#DIV7H_>Pdy)P=xwEak z=xxW^hE>p1`{;jptS=v6ELp&VF6(EkGPd{?@f%BkRVi>;q_DuCC1;FSQp zDu7qXiv1VE3mv)#Uamgj)keDs^rekS*w~JYF-?bAkKB-jeS59aMN+J9rd&uSSoxXkCNGz2rIfvCqA@ zKypZ>1oEJBx#lbGeYd2C8FEhH@5b3<~mi#4<>nBC5~yJw12khN6n%yvCgXCi;eh`D$tE2mZ=08 z-TuBMqqmv5kvyMGZq^3YL|<}cES}g)KVt4J^sVbh_=KhnLq94YUm#;M^rKAlBS+u* z(J-wxo@d8m|F!E!q60ig%;?$3N1Z!?M>@}(W3D1Y${xEvi5N@vT+x@ZOnoU+P3?G; zeMfXRKkH@SinI}oYsRDKNz?GhU8n0wgF7T2Ru21>7u_cZo37}&c3z}k?@FDB-~RbC z7l>b@KTGe(LcbXZ|B{@%-Sr*RbRGW9`o8oq{=6C;{x!h=TKKHhw*mjb9nyyIBzZm; zJCxLOjkd#DzKx6heE)B(ztumv{PkDKv;SMyA9Q~3|JC((Hm=}D*4oQpp4QrXKe1=+ zxw~x@skQmYeMO(q`qRwy_jc6!6Z^uotbMUj{CBLszVu4*kE9UGE&7#%?*w3C*R4Df z6LNow?jPt&x01N20I}v)yni#X=FP;K2Z%KvOT7P%qv%{m44vx;`hU01g)Am<teEuAb{F34>(?5DY!Vw~?;J8E9mK&2}`OYA70 zy}aNW(4%)f@MD8#{TjWibwEljXG40)4O61zhUq*na$7Yp&H@jz&&YolGIB2Oj@zXe zde<)H#6tF@<0sW0ExP{&Q{S3m=v&wv;~7IteG3}-XrzSZB^t#66!c#*zEyYtbv zt^@X>FDa)!N#8Q$3L{2Zi?`mj^eqK_&62gVk+o}~uh=^L#WvR*OV46Vx}F7WN`Z@| zXG!jr4&YM(d<^|63O=#)s~GUn^(*@5)~|FJ_2^fP@Us-|pSAR=-G+?OhJJLpde!_f zuWz*EHL)FLWkIw5;0UcAW#piJ?q}%ZHXE#WmD?{iO_ASE4_7xF18%YPod?l(47<0b z?^u0V`i`_=$H5WWie-mI?X7MAjBe@1v z&rH4FwNdQYXNd{PcVe@Jrq8T;I23fu)oa|QoA#}|)s}s$cU|fSn7i1CU-Q1{79VSS!}*-rY-m-{);U*(rssrU1OqsbOHHa zjXZbq{%LH+Pl2OMC-IWlj4Ye+7Uunu5Qn4|nb?m}c7iMhozRX7-a|JP$jCKBC<^{gqO*Gm~pKHWe zMz{TKTiepEw13!&QSanG z@tu2!IrIW6??SQjkJSAlUh+tK$k(WS92q|!_$_3PQhWAA`VqT)5cqmNjOcWoO#edH zr>Ps4=}hTp{2(%WJ2dJu=~~7lF|%nVj=fR2>l=ZE(6zj?6T0?6*PFWOIw=t1zR`TW*f0FPi(6Ii556d^t<{$H?qoG4|HW zobTV8?aMpwGT+vy`Tp2!U+^yQw(z|fd);Q>a5=HpS*%InN%q+5YOYy4N$k=oCSI_) zpAf#U$6edj-Yw=hWAT&q-19Ov;U}L}+_eLKXz`Iz>?d!Sao1<@%jz-Lj8lPsMBDv` zn&T9nvx9i(cT$pT2Z8?xsjb z{@hN^V43}0#4FJ4Z{wc*%!I^Y2X((hR17x$6g>tToE&G4$jD9jN_4+O#Pmy8G1=(# z1p~>K%jaY2SPi~LHR?u-$R}zN}EtN~0 zwfcq7_h?Ix!v=?0RvfmC);5`aiBB&9*!C5NeVXUwnP`3}^DBdX#LpzM+;M1zxaQh@ zj`&(PvDd_~hQ|?mE$4|#iM=kxzA-)B-82pumEgA#-;KnxitonaaTbKuu1{F?d+^&djN9`Gdc#}Z&7yfJx(#j8^| zx0j^LAg|eYbu>@#pflO`;|bQmz2w5}=Z+mW%$k+J2RsJ*XQ^dn-l48Wf7N%GhZ<|#ZO1)i1TG-NS3k1O)$VCHt5d$IP5IPP8EesL#roWfl1 zV9rI%y^x%itEZ`i-XT2hD!1z0YgOn=)tC)tT-&3tvs{sAiw|6@9l}=9~|k zeg7)PoF|)oN8`2txHf`^$R1VKRWHRSgjYEjYcBotvcnBPw}B>7O`7nW%bq7OL1Gu% z0X>-XtLI&4MK5WEPquQtPH@TD=kc=c<-EdNXhPy|EcsIQ9HECaU~?8;BX|h`BhJ@d zgqLV~XpN$Wmuz^^qSoLE?pZu1mYuGh`xd|LT6QsS(e>Ir#=a8E7NcF|_D@!|{mGFj z>-__iy0Kl$4(%h)j2x&@Mt&%Oi}T@a5oDMpqulCNo}<-K_RbQKK=I&15FeJuu;X z{U_~Pt@o@w7<}{9&f@pq>}WnQ`H&Lz26C(aJRq71z9YuwudYI>$Y+yR+h& zw$6NT@gR6e#V#pwVljHXHi$Db#D&B$W|6H0ACea%o$EUuPT%N*)_u^r@X6LA+4FqR zyhqlu-Bcf6=dCtOk1}4_qTag zmxhX0XNKIXzjIqvRldJ!^_8K#t?B^}G%xvMs=?F0@EbeD0m%!dtxXRt_a?Uo{8){o$R(+zv2aR4zq|^yVSTP{`U>q``7X3>-+LQM9Y(!>}mAXm#yBC zDK8b*_S=DfL0p@uhl!pkI>uq>PjrkYfP+p?ml4q8Qafz+1MCXX_5;Nn=dZ9^zRvUSq$JhKn5}rR&A?A=2EGiXN#J4G z3{>Pf56|g#!{jv9Z*s>j>=UK3*9|n<`b|n&t;k=NO!%DHR*<%&9nsIlRw!+WO`*5` z&)G)N|9Qu*|9hF&U#tInpaHvGPHfS^D7)Oh(66jnnd52B`D*4Iy#*c|NFcvdxp$CRxSl6Ft^(^MV`nMd6jYad1n!y1n}lo6KW^$(U8+q)P(&aYmBFD zANbJH416fnI!^Rj6-!KcIF|kiUFrIx^lSCE+4Ki|T3=nEz84$ullHR?dq0J*E=N(w zjYoZu^vvNoSxR2c4g4N7a(K=S{1!jnKx6-D*_J&oYv|wR9E3e?@=H5<{C5&J`8NBb zJ?Vzx==R_7?*CNWq}YBg^5Zcc%NHkpye#5-E*&>{X+NIm zGj<gfs-DQXDpBnR%xk}8MTVw_JvuDASaQT*!Cb?F~HO`FGl`rTYF283=IG+3nV!u(a z&uQ4_@N1KEuKIRu#eL*ZNWRH?wNe>N&RlwQNZk{KL+hTT&bG)n!G|OsP5dZHfp2`9 znn_=x&qY)8afp4xlTjEhztR|2A^g8QgPd5y3&Ry37vOK%bvwBXn&N?N`MH8{)j2in z`kd-7k1q^Y#XG_-V5-A

+#) z52^c-L*|(Zo{|H9VD0Ujt8A$HST%5*(XYt^e9OP9!?=-%4erojA{_SaCZ&?vHYcKs~ zR+7{F#+nS5vTEjp?A8Y1e-dLpV!LsM#B=qmTxeU*sZm}+?W;LCA^m-UZ-dg*ioMoz zCpz<$l4Etn^Sa)rx@xr+U|y(%S3cu@e(?9m`}H_@NK(ROj|(1*8u3;xhO$D34yXC6 z9CdR3=2n?EV|tC;hwxBYwAb5=i?oD*Lsh-q?P3USS)gEkT#OY*&Hof zTQM-=N58ZW-N?njKB%{OgfVO&zQ==qQEFNSmqS0~i}U{eP4b8KU_?ON_V%011c4AbaS8wFmvzZR@OA_sdTIm=8MHFZa2Ix-0j+_3TXiQ~PB9%t^v7K`rrYWXhMw!ED&n z(gGJIbk)BTkNiBKV5jsawPIe-lyB|WGN|r#c$3&}?=K<`t#fkVUGnRRKRS*4ckg~& zaOhu#r`677e)-N39iwjE*l?72xUpeNE|Leq6T0JgbpDCrf#mg-v>G0He-BKk%Fdyt|ggjyhEET-LCLn4j&&llDT++k9H(Uk9~2o7rxFq5&dfSO)odBB4_GmoQHdB0O+8L?gP z?1=(+i;RKin)Y71Zsy+4z4ACXEomR_&Rc^m2^^R3e=KWjagaKRr^!<-HI0ADSo4XM z5Iw@qn|v?y(6g-3O{=IOW#;YwEZ{#S2K@ieg$wmE2eBo4uo1Q=VE^V^rnZtib_ML4 z@RQ;{MYe0x+?Cp&u9^6f$)7d&NzSv#a28L&Ze;Nk@dsy}SkYPihsf-T??&p~f8c%b zCD2wY_QR{`t1q78`xHE9m^mNeHx}RcD)Sjk`_b}1Vj6xBg>xe8D+}0D%GpzDfN>Qt zt}FUckLyOI@-NLB`zOtyd&wIcy}ygSBUa8u;q8*I z$dY4H+0%pOp8kuM$tCR=F}$38Ikz%nxa6*qecKCfB`5acBgnmFj~9enSoeqFEl0>v zDf~zqMy^b9O3Rw@tQwS84lc=^o%bZ|NZZneB}>27M<3SxqwrCwLm7u}strD^_l;ao zjSQvBeWR30ksG3AihrctC2}3PA_-Y!34K~JMI&p^8zqC(@w~__@=o;gB7;Qh`j(y_ zDRKf=7rw-Je2NM9O$YkBoKA9C8b0_;Y`ek9Pj}bV-2zWMf{j4tw}d{0hI@_Qy!ROQ znA1hDCr4>zCiM@?7?a3H^VuIxE_642lXfC|$Q7DOzS1IRC~}hD5o*`r4RqEbaO>WrLW^P#a@*~f>^S*liLVbiW zk+;9HjrX(QX~?O6Q9Z7wsL2|N!@e4WjyqoZbaZ@}=PyOaCy}LtrMrx}Kcn5@-_oyD zgV;a%>4$RRopJ_9g&u@PyO!MrT~CL;OYoIXBR7##w{QK4_ab|wRQ^DZSGrX1O!mq}z<;*>UiKiP?(MUVZNVb7 z&bbmDmhIX!`rbwwUis)8{I&&0)qN*F{#%L*|it)xco__zhk! zvTJgO_}3>P;}|vDB8?J78g-hY6#dg2!RMkhZi z!e=w0qa_iaIX)G8J5r0HAD^dbue2zu170!vYlY;v;(6ggb+muskmA4aaq*!gT;Ig? zGUkn4Wb8{{+T}Cal6o2G0jsS?Xe-G5QtX~(ql(YDBE^SxJpXdT*Piz_oPGYKjBl^6 zX^?hQ+OI$S)N2GJn$)x@TsVM)?@OK?)>bG<0Rvd zeJQxG?|s&OrlSoUc%$Nz1~E3#wPc_5z(+jxywmW9XkMc4>%vcfi#5L!4^rP*{xi27 ztKcttO#Z_Bj{Mx6_s}``i||m!DYX<@1m5Ie_VO)pyG~>}dk**2?1!!xIozd2k%v0> zQa|YM0ChuKCh@=Q*YLLRfAZhTeZSY)v-WpE^IgmCVJ+Xyn!by*J)JdPLW~USe~+>L zr!)Tjysx)UE`9PBrXJ=0aXyjN{oo4&d1nys4yKJEv^5mIFpONn{p$X}yn5}AkxFBK zbY_Io8Y}VyKU(9sJZ*WgZbQmPCV?PRl5i4`P|Ps`vt=AlBL+$ z9$4;v-gosh-Bz;TacZhKl*PG9+rivnn;P<+%HsH}mV+hyE>Tkg)6j_mquk+X*iw8Y z*kZ7y_)3V^6xskM<+P_|qxU2z;k6bL-MZh+ppBe zmbvscu{&g=Z=Ao?9lpT(9w)iGzpl>6r{=5=I?a4RT?}q#Xa1AA_z1B+!L#m4kCHZN z(=6;2!?2Z-%ls5Pa6UTb0_1@pvG!7X#EDLM&kV+0T9}tc-In&L@H*gi3;CT(XDKIE zqkqUeGMR_Ww|bDW{pa^Y>PPb42tJ$B4d8-%wHv6^}ilc4>a;Q ziZA>X=$F{fnHTu|?tsaYO7B%pNbK>c5y-XZYLXAR*Zlq*+5QjA?-|;~o^VjvHLmU` za=@7zl?~cj_s%xz{IO=ow=?G~=InaO-4y>-a#WEIS5;jJqv-3a_eSbPZV`NWKa86s zFcx~?G=xD*$lA!*$b0JIHj(?Fts{JYjT*+4$k!ftVLI})$l_TaqXUTyKFE;4Mc=mL z>7_8Wz?lE<`l}o4bMw>w8^ocCJ)?}cqDN_aQ-X3r>|D@UW$s&RWuL7m0|r8OLTkIQ z9c^+dCq(CbI&NdbDr(S78>k$}AI7z0W%dq7%Km{*EYj=7tWlJqwdLiLOI!)W%4Sk; zhFZlRKc(Nh3z)YgE8~mlr{;6(W{Pe*Ey10)51GKyZ9R#~1cAFNo4HLR1{K{gC0jY5 z?JAvIO#Fb*mIiOc)<~{Hche$;m;#43JxApn0r<#7$7;<$_L!`UUk1E_6|5i8Eo<37 z&@DaKt9&=3r(v(E!d~@{)UcBI>pI5G-iA#>loPw*xlbUYwOYD^tRL3wsuvBrXRFQb z85~`p+ZsjJ=+;fl=bzwt(Mx*SJI&{1pBG+shVfavtdM%rdmUvzip8%UhHlEpg)gu# z9m?5rVnIp=DkqNc4INb0ys}n@r%US%&mgtg-hwuOc~kokxA;yf3b6g?Fb2N(E5^qI z>k{fFJb;ca=ipt_$dzo6LJK7c z?#l1~V(rWlHKBz6WBt^~-h^yZ0A1e3TKYTQkvRz+?fu0z_%yV1vwP=0_;%nIMjK)) z2|^#NiW;D|6moxCy!fbd1iI(Qz@5aM+1Gs8e?+!Fdv~OMCG}xG3!2nM zD&wCvX|h=A5kr&3&}1<*S)3M{R0>U&LX*20tE`(N?-}dn8OEEbD2p$kcSn!oEcD}I zZA@crw6iwA=l0{Qjp?k73g+&2xTbTjN$fEBtc`N^1CjmxO7Wz8bw%g2=c;FxtP*)t zNy=9r@0|86YK`%Knp%?~>*qM_FJi3ajI*3^=3^FV!0bPzX`GCyd!?@9t^QsuPeO_M90H0$|JqL|kz~1+SIsWK6 zFVV3eD{SLGtG}BOILH1XbGe*395~Ay65(awu{f1)>>{!F(!3Y?=1USg8oeIxz7CLw$cL-*J_j%r`N2bu6uFnqJ&X5cy%C`D!Rd*|XgH-EyMUp0AVrpVOj@JEgrKEVIF8UK0_|7(>d zp(Ag9W&1hwaiOQOQH7kTLB59P94#v5n1Z`=bll4sZ(sG0R6+i0l_M`$}YtViWiwv6biPL)tH+eQ8T*)RlvMYz(}P93onIdFUuKe2nKs zcN4p2mB>8sT(46rDgO7kiB-_&B>MazeTr<>g5CZIGPL+ko5?StD(f1a=3b*i)qSj+ ziQyC<>rn^#7rKW_U~;^Oe5J}_6@6a(LQA$#`wP9#^Ig72Bdcr~oEN-?@9*-Bj_7&S z*(7?B2R%r5RAeykGDms04ESYwl)R8IO^E@uxCn#fLk@&>URSvbV!5}MHUXpxW_xASNYmT1Xv%N2w~h9q@wUXon}=Gu0&l@XajKHH zg=e!_i@oz#nZD8d556P0?o-^ai;r(U{eSl!d{2rp(H>W0 zy<=UQ^RklEwX7Livr@;oFWMyrh(7E-75$tSG)+uEXCt^D)krNfo)aI5LZ4mBcIkB( zVx5(exJTVCcda{oGkUd*x0-P^4p1h&$T)Y1O#%CvT)$yjmtNz5xVf=%?HhUL>yLml znNK92I5YC8i>-PO^7CeFG+LWO_tAK<4`oBUz;C#!s?&3p518fpDs!0Q329??HMHV? z)3TiUvq9+Cg+DSY%fAtPV|Sq)=1rZL=cOL%5!SDFB6HmwS0{VPh*u)@%AZ#}Z;v;z z&UPKmSL*I7qHbP3{%m5My~sZCw@=NRhVGkiM3+$(FQ8s2c}lm(9~n?rO}pBE8P`>= zA7jo}(MNwiKf57PAFB_1T?b=bzhvFz)o};TJL3X>oTP+LI{OFCVcR%1RUt0YfsaNB z_(YFkZInM@tUW*Wl1P=y-(KY4ytQ%yAACcwh`MSzzET5gI3Ff;B3i(M@UEfYukBvq zkQjH+30(2J1#eZt*uwO7+n&#w*Y=3gbrPNR<|Jju2tI;O_Z`ZHL~=YzJCW6nJP&nJ z1fQe8r(9DXDrl;5s1+;WU5EayAUsQW&NXukJIvMOv|9*HXXMmo-mUn9`wd%5aKD@> zN(o%E4n4VLT4j9tfLX2JfwhpQC1g#4S0zbb7VWHpU&*+wXM(P*N#ZZ2M7mB-LWi=B za)=ZAPu8K@uZPZp^HeJt1LHv#BJ(4IzY7DU$>>39cb7h;0H`odj!(xPYh$M(uuz83bwR($C`M}N_-)J(q$Tl@aYV+fh4*~MBUPcQz4`1!R@teM`*n#E2P?4&-< zDfWe8ZF%S%^LofT{W)+9F1Gms6BM8L`tD_~-pBg*1lj86(?X5xjhjBrDDe%Zf6{;T z=o4#FY~#DaG z;ra=-h)%vqm>fhb{+0n?C^m{sR8lscY?CgjR38uWj&ek?$57?}Aa@-EF+XFp@l)`5DN+PW;uJ1NfV< zzm^GK-Y3Il=zR2^hOcHX+)a7U8SUQT^L|W!JJH=wWxkWh3nqJ$(D9^8@#DDy{WSA0 ze3tNxvJYEhVdQ&FfBMY)JQdklzEkD9Ol8q>zFQA3C*SVnTahWTg=yEITa8U>IX}0|{85e#Eeefe^qbUm%axJ{Jm>~CuRy=&CBZeQc9Y2M{@ zwH`um*S#LzPQI1C2+aSE4MFS;9@cGTX;EHQjBU0PyMtLq;M;kzvbHr>*(5xkck$e5 z0nYpJ)1K!7bb-KTIr!y?114#%rTCIBY=_6s1{ce*LG0sQHolK?eIdS&72JJlL#@L20)x+qXytQ-RY{T~F!S-jKRV(pid+B*!}iseq~91(I?%1_*7#ZFk=&#BJe_Iy0m;K z_7}#nn7%S+En=s1Ka8wq*eTm$!~Q^4U!Op%l5;D1XVJBak8*aJ|M?Q5zV*~6&$7sW ztkqjt`KHucWU1GEak$>e)Z=EIWfCJKcx0}F`M^=^6tZ{8zquw}$W+QQhj5ms_%LqQ zX-V;q@LxnPh5V`cFM89!i8N)i2N^}?LFT}1nFH~2jW!1!{9Gr{p4A>}9k-eh{cLPk zI_*XHxxNy@e$O0y9rO9TQJ2i86-FC?kqH;^?M(wVDzMstt#_y0S^pCAJ`2B*Y;1-* zIqP}HFlT+Hrd!c*qVv$HMz_Jsde6IJsxii*8{O-QTHq}f%0BNY!B@|ge5I#ESz2Mgph55=FcEtT_#=KZ zzsrUPOfS&&`s4HBOT(I*%%3OWn0_u8B*Qu4`#i(iCB>r8qKjaiN!Q0Q^? zeXC2WUNLm|S+1oe&|{MpyGJTH^QJ!Br#pB7{mMURzj=g`D{v5)j69;BiT%)gwZWGC z@Xf+gi+*DpWey05vK%glg+k9 z`=zZ8z89I-3ilk~9%)B6?dL@a*s8F_oAP?n(EOg;4u&>qV;_xNs;E;@H;N^mB^X2Gx9i^N_1-sZ-bU%~tH^^8G>N zcGJfgyIzhtAC9rtl3zmiVcrYuu6Q230dl|KPGGxkr{2c8zikwHH103Isuvl!zrTPv zP8D0Rb(}WFY5P0=srvc|nr%rtNwkytx7ewZSlz|Q>PW;r(xhg zshzbsP+w)=Yu`uyD`0p&eUN+6amsy*=;+P>%b#0dsl=m4W^Fic#-G#XjKiNw>gYHp^}l^Y2gS$hJX_tpU))tm>NM!$gn{gIK%S|2n% zll?h`_3{F?f1y{mLvuC%*uRI$HUL`}a2MTA5`4sm`$L<*U`*Qk_d{jR%lr5B_cq?! z4u>{B!+RO~0saNA;J=eKpFQ?pFSK99FSpM}5WSA+BPjYvuf>ON3p8@eI(v&Zu24IV z`CR?=CL8m&MVmijVps=j4x+aPR)LRhn(BV?x|YiH3mU3^S19(G2X3|1t#hxruEj|_ zYch7Kk3I}d`4E^Nn~bgjn)LZJc>KZGF%>oLrIM`rD)#0*_PB*<@HVXZQEu#E7267X zg)Z1?D(_OBT0g8}o1^y^j9cJo@;s6QCXkFx$)IyRc3B5X!*`iSZH_baxv}G$y^6VM z=l&jJj@>72%KZ`b&9b?c?(eQI<$Q*P?d&%t=sn6Gw=J@@gvwH{LGHw7Onj$`8ME+8 z&PjFaToTh`bH455-XZ*eqSlvmlrQO~9^nI}jGVozqPLbj-0NS*SM9gxH8uQ6_CEkT zmDfW#AE1k#A-GOnewkOX15c!#@)%`O>buzf+1Ijc@toPK#`*88e6yP~mIO``KP+(D zOnHg9IdL!YZ22H`zA@;2$rrRNmh+s*MT73D$MDOs`W8bUHT}g;*w?Z&uK2}5 zZ^+SN^H)AlsqFF)D8P@w6}y~36s zknogE@RZ^k-vkaffx{kfIMd)MJqC_9;m2)#FEr{ogGQarmiZ`s5q_pg!*M-U;p2at z!VU&(g!c#6`pJ0?=h8_nT1U7RF*r>op zctm+tW;}EAEU`?Ta>#_WCm)Driq)r~KblQ`94WSkkuHpx%plk&8Z zS6NV`o&Q!@ptg)z?#io?b7O|rOc&lfkf8*vu#_=~oH`2FAV+Jy*4g;E--c}WF#h5@ zV#Z9h%8<1dzHl2pk3tKD=Mh?1R2!y+f;T2Dw9-BkZ_Y#;UpMe(tl`U?w@;6Qip*ED z|CEk5R@yiVyb(L_!^#y?+a4V^O8o3a2B;KS8J~wX^fxjd!K-%oJ7j`Iso;wE@^9rI z8raWYU;K$gwf zvSQ}5hOKv%dwX?!KeK>rn@D7v@P~JKR znajd+&^9>w96_P57X7-W$=^Ych?#?{50?RHJq6bQJ1zs1W`AX>k^XI*zq!IT(#tPosP; zB`CU2@N8x29>%^0I^SmSDj%F9OjQG> zdw}P9@Fsl_x(~*D{Caqf4Tf)T0&>(ZU3v3>Wr#LIwAYSKBvtGs=(Yo_HPPEHU~jkj z_U;q96UDdJ0X|exf9FZj_qc+xhl##H_A#|CypMgA7_W2Px89*mbKjD-MNeevhc?h( zlUEY`PzSU{g@398-&RD(&ntj;U7!4{?|)+lz%}p6fpJaw8@fDm&oglCr<6YvuKh*$ zI`me51}{vy^SV=6`+s?!f*-&>Xq^>@E`rb7!8_|SlLq{PciW8bK8g5F_Q;>|{x=cd z{uG?t%rmp?nK8=RSGn&n=xL-LbVF~b(HHUlhkWx3o{6vXUfMP5>cGbRbE9s~p;`O9 zasA$4W$nKi*QSoPTvgUS%QNDmT5OM3Ap0SUEmzjA=9#rGUsA4)XF`9S#mJM)dFJ^e zCOg%d(mI`%b*1jsb+CKml(kPpe4h?%mm1%Lr@=ORQEm#pW^R1VijgJKi6IUcb0>Yv zIuw!^n-=jMJ5!rAf6&{!gf{0AIXfbfPDlEQ9X?^Kv7NS-F|udfjjYEr(b?knm#2K9d{0pv8x>^` zzZial`Ni^6`8hW#HPv8UML@p6MR~-ItD!!njo#IO;aY3i&S~&H+-?FzuPj9!g*K@Y(vO)MK z*cw8$L*BRg`@3tF=>Gn-<9j-u6?r((Kc9Gc)2`mlJXT~Vxg8n0tn9B>)`osaY*`_? zrPtV7a^PFwUxbg5*jV8K0<&~Jtj!X)7Eq(}0IE~TnGHMxZ=k;#oSED1us29sGUw8V zzx@b5w8HE2yonKhU>o%QJVSqEFKn~=`-;y${izs_4#uQIM)^l6?~U~LHOkAL_a1!_ z+AU+yXtyLaoOOnCPNB7iH>m}^@t5g_|zXT=C4Cu8)mLa zi@f$-#@vrL0-t1!oLUiOzcT#1E3m)z?N>(me^CCk{mSAOej8&5Tz7q*M{;x5Lc5Ih zVFzFS{tsvv&#otjfwZ0QWvFr5*EHt$gW##M*`n)C{J;oqqD!CL(1)tg%=5yYJy&>>|v+qoULCjP8T-H=NXAv0Z z9E2Fh>*zj@r{w4LjL*+IR_JQ^7JEPTq#n`v_~tI@^J$Yf8|W6c@v!}~r#v5@wzo8& zDHDe|_hxO4P0@7Zq92{Xy~2IK5uTrI(4>cDet7l?`3?fK-9g_v-!5oL$M4BQpRfyT zmiX3p=#SL>8herGKIB>C_|nkZ?4aB;jAx*@?&C&$rAc?yn+-eT&1#Fpt#&L>8fvnD zQ7qR9itcks43p@<+1p#3*lccR99{6G(Ed%w@JEwa--)XVdS-DiHoCZcW$94nN0Z07 zPU*G#3}4F4qj+8qJcib}@-Ct5kmX9=0dn7oj4uA>rcbB9Z0~j#=b5+;$-eA}@#ykp zoi1OxN3jQeT9}8QzoxU6@_(P%+bGXfXJOtB`x1OJutO5VoX?)1DoNQYxUm@eu$HoJ zBOXl745_o<@$0H|J?2Yd5s~>yBkNG0$C2Oe#lBg|cT*(qju9*7CRS{qnDF(q`y@YE zhr%C1gV3ea?D_`0$3Wi<0WNW)iG|8T2bo?FbR7?E7GK5+!)H|Teu~fNtv&Gi^g+(_ zj(m5c@$PrLlYG56%CZ;t&*v@ia%SwU@DWzLn2cYY;Duf0f&Lzbh6D^9Q7X2D0P>5* zx8cJm<%AYog6_n8&O6PAv12@OgWSlRd;^;NlK)Bi3j(u>}8B zj8mQq9tj?bUvR)qo`e`>YQUaEUJfN&bOPcRv6jAg41C-M{jUTcR~Yzs5n~j*k^YIU z--mCEwvG0NF(c5P)*k!ikJQ~Wuk7F~j!MQ>!vY{0#kZon}H|d3}eGTqM zLYrR(Ppz~g4V#0M)!uXc3*HOtnvwT=z`a`TyTGj>&=k>E$3s)3{5#N3ldj0OH3vhR zpQCI%YibN@DvmWJ&s+Zy+WbSF*BEPRxv{2djY1Jc?ML`F&t<0dqWok=iEIMn(rvmdw z`f@6TV{atZMr4^1WSMSaZ>nr7xKDl{lm9w!r4uR2}_uO#J~^h|C{Ajt{~AbVlfn zI%zk6-e{~(WQ1gmPq?@{f;VDK30>UmQPwVjhZeqg4EZ^hpd&oa+{*fwSQ_D_TUj%z z!_XfY-~E#B)asbo@~!!uNuz|9K1Cg(uhiB&Htx7IJ%;U0J%(+O#IP+#KVikYTlai_ z_h|N}CGg-L;Hu@EAqGn|a?Z5jYn)O$NXuho&07^fpDKP0;*TNUxtWh5i6=DtJXB}c zAEO%BR5K@TnG@s>ay|{1537j1?kI8oM~GP(ZN#>WH|A!Up$|sJ=$9AEZOo12#X1QM zIZ7OV3UMyZh?pp+^vVwy`~qBL(4ox;*8MLxY^IMDP`4jrMa$@8Wo!_?Vt?&Ep;^RE5n8uP5(ob)Gtn zhyB@69bb0=`^el{W?p{v4r33!1E0x74{3H@!O@=`2sQTcBG6Ei_7^NKe-oXYM*9`` zB;f^`;T7irTjBj2#12{WWmPCCAFc)W+`JckOyH5QUL`PqO;`nI$1|TM-_r#jhCkC% zjqf-W+MLEakAaub?8WDr+wKeCW9r?7SJYt}&gEGGOf_B$-bi>Y;fu8K_DH__a5>3u zAIYJL6JfIN&18jGt=705^gC6n(%DyPNErDYjYuMCJ{cqUTH$;Jzoi*G~SCgUh zBMr|)(bY`Wb$ZRv;yzuCQT}PlpQfuZ$_pQ2#dp(hdLaMijrjU@81;USdbK=d*c<2J zZy`2N@wJHNXQAaoE;80ak=oKxt27j_4n%f)YCp2n0=@sKIoSA5`FwzPcvrXW5qX3+LF4`scRc{7O`Jtg7XqjZuM#V zG0)l}e4!*>unoM>e2Ii-!WYWRp5SFq@C=Hsv-J*Aa;{(v1(5F~o(&pYcK;gW9cXc% zPZR45eb6qAmyzeJZFMO&H~PAlafohF_V>-alfKKCL{E70b>gSj-!H-RN$?tLb_;vS z7WR@Y@NizkHh8XnOf~}#w`lZGx2IMFbU!Ayqv))DOt(YJJ_KIJuEJ)qPifeEd2~Of zdCW8NMn6BMt#jOceoV(Q;8Cl92|hONDkbR7j>uEYb8DVr8?^FV@)RHZ*~!Mr^x0YX zxoKx2u+IdQIa&R36szQRk{reAc}l}*BS-OQ{F%JugK`%LY?a{aaY~E#O6*GHr_Vyy zA-)6NEA{x_W@NfGtc4>IL&RK~^OtJiq^Em$ZaMGW*4=S8Pi;zfEy}>(&^m^qhVU5N1NdQh@WYtF zdbWP&KF6_(JUh_36xrci*X4L)>{dUBD(1(`A7T1IBq7h9sZHUHOuwD8!TXYS8Ug@y)^3WrJP@?j<*n6I&H}A1NzzSMce|70`JHHUZ9b>z4y!fo`k% z3-z65tJTWjzp2}Jgx{5Wu~`e9HFTb( zV#~SgO5K*Tc9^p1Y<8RZ+PK1Yn|o z6EwXEn(l$7t3#BWg|v4jKJ8rnBc3wmxk;n>I-j=hvk2aUxfD9~05CtzhsL4j4{EpC zOpNmPQ2sRD!zeFvB)Yyp?DctV_7`4B$c z3!l-(KIFB^>Bu~$oGx|~Yq`uv!}7f3i8aefp4c{v9IpFXDZXRzhOgD+2wyAWr~Vkt z*9smaGT+I@LplxpO7%0osP)4~M%MqFQU4xo?TY?i*U3L6IRh3LzFhFR9rwEOzW5Yh zF2)pMJniV>k=-{5kLuxH^W~!d{d~E^?=p%nS3COc6lF}ei~jtP_RY2Bd?dWKwjzi0 zF1r~Vxd~jk5&e&mV}qQEBF}w>uHhcOHS5|+yt2p-kMS(J4~vdpnjMJTtk`dYZ81x7 z1Gd!M9~<(o;DXA&+-B%(ME*7B*s5DGd3Q#xlmJ^Sxs8O7IVkRo3o%*5&&Yxyw5CX!ga?GG<3~8Pe4Ijb^-=jMvL}0~Q~e z{|z3r#Tc)Z2OYzBO&?SLlkh}^`05aIDt3Vb{C4Ek1jUMG6W z^nxHbwMlZ1n7J;GZXnkMzxG1pK=OaAxf=RN{*R)O?2)0?JnyKU+<|03jgR2mYmd-H zi*L?3@gK%_Z5IB+L;PnG`pd|;0_>MLMPC#Dakl|eEB~?m6L1JP3JjZj;N`^r(&wwB z%TzZ3>(h9V#riko@(k-i1eU|0k|xBoKf&+X5RW57L zHXOm%I&3(8#&61Db@1pOY&Z#;T~X(yM==g2?YdO!0jttNJC3s{<=TU@x z-h<8q|GnUCs?t(qoXJ;g=sb!zo6nplAM+G#oE}5Zu^c@I{9Vx0bG-Yd=o~l`hB19a zd*^~c4&3#jz9%j@kc*a7?wO zTxrFxkzWzNaxC~IdbCijgP1KRyl5OeX*|3s=U*)241hvM!(sd(M1~QcGLccdsbTp= zCAUcm@kd+XhmYcu)PtNOapQ@|G?J%U%BMafGEME(9}jl561NMbj-46H^RG0JYUfOw9E}HOX3^tjIuM(G%!C#GWR9W!ft$eC6+jC5N9CRY~9FKWkV`ErZxQxKWl6wtJr%lUHaW6 zLmgwL4t2O%UY-;;e&tr>mL}WKg@vr2A=E8-zf|6dT_Ou#_8D81MdF9hj1QUMhuE0i z;7F0>9Dz2Aefc)p7u~xF6M@CESA`n!GmICzcAM~o=%h^f$ZL@g#5Yp(o6?S#a&H(m z4)>6-jpI!AYS}~2RW1sAq2pT{d#K0_o#f)%$v!%ZxsqHJaYn9+Huh1cwvXy^#W%sp z{(3_l58thhqBopv@PY79eSG5rz5G_npQbl7$_pMYW**#zJ|LRE$HUQNe{eN2CgK%i zoTk5rjI%0YoK^4(;`5=8Q@1}4Vw~_neRw;sD^xa{K6%ez-#J&EZk^F5tNaq^hz{3B z&Hz`=uR0B`3(o>q$y4wY?QFa=-mjlb4kF|n(_Zld&R$BzUm-`}hi-SEGhw`ib0x3OB637(`IT(Yb}z9ndYcLU zX_hu?$p03tt@~)J^;B%GRu^%p%i(#&hYq@fZ>p{@z1UKZ3msmxdhVp28hjhIdWZ{O zPCojghn4|Bdb_Iy`ZYz~y+!#HC3)d`Xumm@!%u|gp`9~>qm5;Pz@J=Gzh$m9 zS(NrsL%eGE^YVGI?Y zoj!(#&oYLK8G{zzVZp_FJ^>eNm4*+pbiMuadpUoDa+16C2ss6Nml1CTPmo6LPtg;l zgRd>bf*eK{`5HdBvJYPmogmlR!8Ebm*`uEuSvktp@7zda-GlA;G@J2WW)4;HTZ(*k zroQ~x=t~0r0MeH%>B}_Eur&K}@5)`B8#f;EUHZq!uGqfa=xpFy#2f4p4xaQ z``vdB`QQ2c5IO&M^3MNN`gOV=k=@T)_ZK6xXG`9VgT%u##(DImia5TB=)@NuET2{# zgqHCvaV&Y0gh#OChR7tg%v?8C8&VPHDm=qFDzD@0+Q|BNwiUi)7Wq5e&;YaEHj95z z<^b_tMZ|l3pL)%2`t`-KhWqse8>{danb^aN$tNG9=jnz*mzEo`;^`DDKe3dgL=YfC!+viEnHu_x+KN*QH1LZb6YVaF- z;Ws*1V_T}P@@=U-Z`u~ec|D;Ga*i0kef%7DEq;~Q%Z;J!#C9TsY$&w7{|Wo(r~8Db zB#!?A=ofV~#}iL=9cRA456iyi!Iov(fCCnpPV(i59O#KxHhISxdt{8Iti|ptv6EJu z^GGfL&Ida}ULWB%0t=L-*0uCi<~znR-+|ivJTvay3onQ*1YEPv{lpjIf))A?q1o6_Q5)J5nb9Vtd+v zykxaKWypOL+f$%=YhD}qyx#;Sx6;lZyWvTA=1JjZCDf9hC$?WyFyGS}&u z^vL1|ZC#81>bjd?ftBeS`wVle;ddwc_zAAy1(Xq-_Hf_qnA;x0W#jD0cl=ZxX36z0YmCC1*n_^xT_TN7qL3w+Q;|K-b| z2X)W`9iKaZx6HkJ5931L8|cEuJ3y`iGnfBieBqLnf?(31g5a+%AqN+6Qy$5slU5>U zpG;BF!AxZ?1a`mVeLM74>;hgxkEY_MY0l@_F|U&*6#Ha+2^QTqPz-@<(tKln4A8L&O){UB!s^f(41-^U>54?^dv%lsV+ zquIIs4jIA3tK5=5UkC2+9|-%tGO@eOn^r*cp3xcJVS z_{O1T?{zrY&+)e{SAyr0!{o$-{2&E4$+;d!CTnY=rE{TvzJf{}foGK#cWGgZo0v^a zZhcm#CtiF_O#E|O_I)e8@Sw|;Tp8lKEV#y*fdk@Ln?;`+d3LbZKBixM)d*vr#71TI zwG){^c)Ar4`X|}j#a^ZBpLBcG-OQjRX3(eVf42pz zOF75%&O+h^`@_4<0`D{R@m%eIXY1n|Mj!uG7{?2Y8OQ%G82=y&j6eG;Fuv2#a&+JD zh9%_1PyE~!Jb9H8405)?smqn%2bw%AJ{{+2=MW5j<76XyM6fQ7{Ns!7L~7%-l=PN70F^Jaaxyo)$U3#OVqO|5(Xh(Ft9PU*#et3;8qHHv+8V zPS)@Va&WUgN<-CymWIaLf}EBS3~oqomf?4)pvwg+Yp#)p-B>O`9v*g$N0lCu{p9)P06jCMxotWG@z4^(St0iv_{mVQ4 zJ)S$Sn0h25Klq^dNO5+soJl-?o#bmwf^LoRmzOIC<(!Wbq4>O>4e3KkyjwQKS8r0)IL0#B0z0HF>>)?UX&iHF0S(5$?!NAiQ6%kqQWu?4{%tq;lm5>hFhNF}@GPGhf4P!3TMMqMSYcrDESW&SMThD^uv7 z#F}?jEw8gyZ3zC-!C5eBL+4?;uk&{DOT1+Bb%s=OF{|Xxa5j8?Qf=s(E6+LG z!yz{21blVJYv*1_euB^NkN^H4`VuQJBhLo(Uw8n?At>@sAT}S|#a>cNYz29N zWOAKNy(QGUa9m;Vdd`Afbx}#l5O~#%wjpu%&-rf89$Q>@D}HU4Q;*cOhjURjJE z)=n|Dr6>7!F6Z6fey6`nyVJM8csGdu$MC@#NxQ0|;H#k|>erlYPaC<^T>YcjX>OW~++5B3Ec>7y0DN(SL4*->&** zVT$G364kD>1GgarR9YYs30Z zL3eWWCGt$5%MO6=Hk+&E{>i=CIa@NvJrj|ES)&!uD|4QIo-Q^oe6B{?XU}k`3-h*< ztINQ{9%6ctnWo^U5>%MKXA{wL&m?~u_<8gta{Q#Id0)QfYPo5el2byTq|fFUyZF}H zxBQc#vU$Y4&S#&@0EVBDPqIdEkA6Z&`s{TcPfcH(bsM>ks`00icu>*3$=Cvg&IXBd z3H-(B3rw_WG_CpK^Y3cE+}8UM`hlb1`Z3n#DQLq|@_`R1JCbL%>sojg=!*1V41Ea9 zEokX1FKB`OE-V1fofGr5^Nq&MV_oqMzJ6rqTzkuA@c-7{&}NwjJ36>_`qXige%8%` z&!GpD75k*n4=bF8U(R{9%tIS;s5u9A)`Uh&$+LYX?8 zdgMFxQqFzoDr}hn9<0AaDU*3N=dqI9h>GM7GUh$N`7g4D?btBq1M7C~uez|e@e|Gu z=wVMf0bMM{<{My)&ReqTv+_lcsCN4pcO3i9M1Ju$g?))U3+zi1`NcbxmTZ0kejuc}UtR-@&}ddCda51c9YN%kKcz0*485{(;5b=obAB^~)Y|n7tuWVC5LRv>IN` zjLR1qcdl{gD``_?4$+MXO%Ym>GL9UTcX1}SA-6d*MJBIVr^(~=`9aA)O{_*6w6MJz zKW%*U|Ndum5cDTRUt}*hoa+jbvwl;6eaX$g=ppuDtG4PP7RY@RJkHbP2bbT zYq0MC23_>C$TqG%09;dI!8gehW6*(y1ahNz@6qh}&P?SNv4wZR%lqMfvscxW`mu-m z@yl2dkvrVayg`feB<4=`jxirs_WXwVJ5fE#cl3EfKeyPvbPIE5#^VJT_iqiI(RoI$ zgrDxy^H{hY=k?>|&o}0-$l=oVrY+2&7aqPCoN!_*4?KIiOg_emEW`ZlWxVa2X=szV z+&42%)`vEq&!dz3d}dyi%qM*e(Z6=;>6wU6IB~FnM)pAdWj-CEM{+$Qx#8<)*p%*C zaxFxgPuV9IVM{#tnX=a0Tif9c0`MJDkF7{4b8!~Vm{{fg|0Is?kLcfGWq#?SYRt(` z;g5KKumann&>(b-_?8Bb1S<}@iU&sE;bJg#uKf^&^ESKv_TqrLkD;dzX8 zCu?phefl%!|CsB}iWhT%ubkm!EuT$!X=fkLuVzgD$-UrviZZs_Mcrfgrs5mW2XIIB zqrK3Eeb5H>9C!#b*^&(CXndC*1K54KfpI%3X@Qac^Z>$zCE9Ipe{z03Bx8aPWgPn~{_KtXE z>M`^HouQ&U--Sw!(018tM86aFEth_#BKP3=qDMJ9ab1S8=}YE9U@rTIJTDU2WFh+j zWA9#zrY+uOz?3(FE*=IBy<$o`0dTb&% zDL4*XWpBBSys66h%9K;gsdqkW1H2GkqHm8{UUPNNbxP14uQZeovL6(j*&ypg_Ic)B z*2#UJDr;{Qdj|rga32ME-p^dUqPNf_^ zaa#UUbNvfFnGPO_-x7PHezwr}!n{zC#doPxXu;+A8ZLX$#f^a$XgnKo7kfDUo$pr=64y%R1-z=gL~= znaa0vZeHMWkvWt6b>N#ugQ!=dLEyXaPF>qM+ePS*d?Pf7@_&R*n0@FXC+V%&W93`< zj(uADPNQ9X1HaYK$u~2kE&Mpzs{+$xEW5ys_TJcB$?IgDlUVvLb$=Q0+-5wN^CPXi zL(kl6d|x;J@EC2G{DV#>IpgCxS8ySA@($Ko`k;ayd44bVkvzluf9lUOv?uBOf|q~U z7Zv~?>;AHj`Hi-}_`r{k$wxSczWh7aa%S;Qgf~HUJb)Zn!JaxFoo^iPgx0k)SDZtr z?L9)XU*fx?KMUocKTUj+avjjHQ@jt<#%Mfc-e1XUz&Cq=bvw_dFcyujb01*6$@zoG zyt|IvB7A2Iaf*W)4poSs;aj?F+~f*gNgML+$7%RINc*fS8KbmcFYmNH^=rBeGg+HpH$EeT@}7q z&P{Dz0=_^?MYg$XB(lvV$lmfzu$pfg*UjM@_qc*!CFdE&EqYqw*56J+?p#ls0f&rJ z>fxM8^l3-_B0fcf8s;+|b4|R zsz#U1_}WDidK)XylkQDszZ{<*`~}}*cg_AZr6^C@?Y>y&9TaS+N3}gI(f1lhO0G-FXnGFHZ23f|uNg9zv^m>o(}uk$Q0vgzlQUCXwAar4SPP7VFShbNGXL>1 z|Llz^3g?n?FYCIUJ=2V%b8z46xVa}q8Lzz$(Z08Kj;8Z2{?CW{Vph|Tt95_-B!6go z&Kb|c6rHm5e2Y!Y-N(?2W6&G%w>SCeIC$@Dc<&N>(NyQZs9z8mEpRZVefkLx@I6-RMVg2B46i3o9}3boKbR~pZw)PSu3AW*XPvPOh1~@Z9F?3It7go zoq`*AB!F*^8+pWyJW>?XlPk6fQ>T#4y7VB22GG}f@DB=Z807P$@a-h7X~R4EN3XNl z2aEpe0!`NW0(x^J`Wz>?eBcMy__`fK{aub>{x2uMk3nPGue;8-4}884z37mo_bqAS z42bo}1Pj^6N@A3Q`S>;?cdf~%UHO+fBES6x@1&2`ed+}8mVL@wo#T7Wk<=q}#ao-S zckk2ps7-|~Ew@^Jyvx_T-TwZzDZP!aId1LQohfJ0-P$92OXWrVZORGcJ+H=J zvieS7O5XFfbY;pzdy%W)ZLI!-O1g5I@6BeT-5taw??HF4V^ZC^|3FsyGuJ}Lor9J4 zQTdm$qSKvd8}hdJPEEvjs=yXse>KsvV)YF$H`tnoyay5MkW+1B=xEpZPfVZ_8-W~*JJ7<{qicW)dv=5y6gf(E| z?BRCkVIl2_e+SRK*l9-FYt|eMeR(KwIWi`A6$G!mS16mx9izK{%N&ujvrP2q_DSf| z_c-!|o+c{e>eo$DHm`<%oDBZU9vLur$Tkc9AA|1~Tc?Tt;)CB0&p5k9!?VyN-)ps_ zbUbq;{L6U888>~rQ1b={_D5Z}lL?;5{rW=t`?r3Z^H3eLr-`iHTwB_+K4JE>AAWeM zaSQElq3!0{TY6GCJK5_f?Fpdg)%YM}+ zWwV^w`2+N)v5Z4Sr+}O^WgK|d&Nw=d$wij<5$`JCmlKt-ZwpSb_pd2;BzDi9ymp;- zrgJ|0k2dZE|I5RbwWE>gB+g#urw#g)xgs26Aak<^ediwZoiaCVhMsE=`jR$T|IEwl zmU-EOo@5U>F4~N|MIRY@l1O`8Aj0Okd#^q(d(e-NucblkHAT5=*AX{Tk6z$ynVT() zJ3MCz{@-%H9vUh9t2}>$`^b4qibE58v>LyS4E`t3&M)aNeyO`c$c>S4 z&?oMd+}*#^a(63t*Ob0MS7FPx)L@z8mv`FJRwFVK8EUUQ7@Nn&jtV{N+e^>8D* z=z1k3&kN0TLVLvjD-QjZ*z_Rq!8aVY_q16FGLp*DVwp;tOF{lQZ`L^|jwCxoZ-!L+R%# zALkvpmhxubx>(z*q)y?%CFgLU1N>Fta~0&{tFY-UQ-Ud{$RVd7Z$m$%j{i#;c`xMx z^*k@EnQoQ|l+WzbeRjg*c3pn6uM9iMV&wZRg{;%S?|$j%2f>@UeA93~!UHtJh_{T*)haI1)CBECiPbap% z2l=<+=0wdkQQvKBXO$N@J; zgPXDzlQo-gqKTJ_=S=f~gB6a}>8aqRAN<08f*)F=1g)Ew@+z!mEr|8??_`P6UfIs|Te zW3TZ|0#D`I#IH+@ds&MI?~=36`>(aNAM{^q-LqXmbl=)~Q~CcYdJow%pC@O}c={#v z1QwvTtyFyRixr>H0FhT^Ehg~3y%0Vfzq%=050pRG%&9N=LnNO@4}AGg6Hd1U;o$)Jn z6<@aT(2mRAIaK$`{zGD4Yvuin4CP=g|6cppsO7RVu-!T0y0a&*SSR1)C9GIC!;#Rv zm2aNa+7_Lvopl7`0cmZ}1?*GX7cqfoMi-(u@ z_@6BA!S;Tz276^WGQoQO`8IDx3ug^+eVBEk#~&*VKH6@zE4iEaCIAoKWLLYl!ebtR zW^A^X_jIsUMNchsOK>R_eOjL`h-cZqI@##LU#)`k23?Mt>C@=D)ZM}uWh@e3O+QC| z$iIvWTo~26tOR%!1GggJcO7$atq*u=`G4g6%*b;y&78SHhrLrCGJF8f?>S040$b6m zH-8(L(*Kmv%E1ogsAgc7f480k;00-yezLbssV-3tR`OorD5v7zA>WV2ZhLe*G+*@Y zH=U=&YtM%NKAZw?fQ(_{)5q=bTeI=KRFuuaZ>u%RNH_b*V))9ngMqgPJGH$vulWZ( zx#}R>Eh*n2#|H3bk14GH9o7V>~OM+#r5~`F%MgqW!V`+xK6(vUJ6;jSmn9Rf1h?>CTu{=o{loGC8x< zZE0WNVD_PMj-E3l{|>z`(ua&Y^k>)Hq4&jZ^x<0m-!uB)HDPcmItlRFfI+Zo_3B+K zE_wOTiZ6)Cy7Yh^lQqfJlf82Ft}bG@vcLPzp`9@Yh~bLcNx%H`YqJ$D0s7{=O>eiO zEIc>c%k=)eRi^h%=Ea1I&*)Pd>qVOve0b&=^P*x$s-#bKD>oj>x`FsH+7|jY4_r0d zHQ|v-oxa)OxyUcouNRc+Z9ZM9_a}*RFId{IqkVyATdCg1t+(jq19KJMhFkP!FB<>b z7@vtx%Zz?!ny>_)gx;F?RCUS9(yIGk(dSd}>6MkGC97X~AljTt`(Eny-K4j-{zknI zGM^i6)cgIk@l59PzbX4Wes>yuP|ZF>;8fK+;M9RIPBD*X#3`A-y6g2mET0vgzXh}O zXWudYW&TV! zAP(HOx^%^jVH_yAY*p#b11}GV18vkq*wL(4*o!z1@yn3k;^| z&tj(O&+Lr-zol);-*cAstA?y9E%_o0gPjM?zDHzD)!WO@(P6Lc5yo{)j{eTXr_bnz zoWUmhaNtsIg7XOtKK`5b>P6ExJzBWVq%+j9;n~7^Rkx@m0b4jq3rATAF7hL zW^6|F8wFqEce20x2iiM#Ow{4DK3ngT%vEi+-q)v%f1&?o|Na~o!Ba%6*%dFZEX{r; zOnd#rCPbsXCX7_-F3!~3TR$O;v$e+ci$A&N`@-v(`0_j8)ME6Z$lQZ2g4Yc1gF*k0 zAMUDp`Q<}#tKU164V<(%?=)B5PJJJU%0~$O%DPC0LE433ymeft_bJABX6}VDSB>;T z)}2SjaG^ei{EYAz3Nws(Hl966IazmtZ=SQjF`ISY_3n#0?K#69X2LN*oposjuBC?i zzreV@CROj>e4`J8`M=lbL%`&N;3t-c;n=cM*4e5s&a1YB(|ALTkD+bf1qL3SA8xz$ zeEr$e=j&|?>@MUd<1aeP_`9I}aqpe`_=~95H(qah`M7W&78uvx8K?I_@MtDwW&9J2 zJ`A*v`z~F*%lFF5hkUO@=L5~T?;NAo-I1*S%Np9Atk3;h#xq&t!he3oztd>fYvONC zf1Y^f#+AEPAd~Oh_zryWr#k-L;_89+@9KK`l|xlS-W!l7Ht|9B!@AKr9Ofm4;Zc~V z_wkxUy?+JtNpMo|d7m?4ZAZu}CGeA77CKz@yUPcZS2TParMES2q~4amrO>#(W~Ba3 z=0o6e4?ps8X|!*kbuF?&iNNK+JBO;?GvLCyHu$^K*0soXhmC!4$8ZBThK1MXf?;|e zo;IGP(+9!NYxy-Bed(K{y#8}kbs03~ix&@-fE!g`7&K;{tEcLVRlEE*GDj~DEVoFX zgl1F@(c4dp55p%TUVnB?yxz|zjXufzEQ^FsR)4x@_Nhwf-pVg@ycyXi*9{0CH+B1* zdVBe*1y@wPU*A#ncP~&*){e~89g%&BpuuKe{On0x@1A|G1SW~p+csDq|MD2U&A@DU z@ff`yFUIJ7vs3OD{A8Y{(hqHp2kY;oyv-=LZ;<|7jB!7O@-KiBGRLwGCm7!r8TIE6 z3b&bIe0!1cFKx;mDBq1W#xA;F)#b_?G}iU#VSIS>JZ7Uc|By%iD!TM<>HZ-vJ=o_T zG7tY;(cAPcD@ONHgzn`!@|Wp47j3@>j_60qL|2gZVkj)z?O$D5Wy&bLBv-lkBWY<}WYDEZ)D6mm(~&?DIy zYxoRr(Z?kkIfAtu4Cfddn8^4%hJCUL`(zWgNHgEQ7kyZ?d>rk_?UDI7{)2vr4!H?i zq#5Ho{15%}@pNQ}UNFJ`^ZQQEDTM!)p7)l$vM>LuQGN~OBXbIwDv1*!J|JZdXZMZ7-gLfl%Z#zgq7C>sr4%cRJiu1XXyY8ou~jyneT1#{BmBWS zu{n|-prs2Ri6Z7D{@~-g<@$PXuF00%-OP6-X-dOF(YxTEB0BVmL)H3nd?C_krz;tM zGF#lj`S88DOEY_`mzJ~SkDP98WZlvHrf?< zx{0%~w%@iS+`hzCHPLU$RV}eq)^Z+8xy+&Aa@Kwp4OMb3qo3CAyw-e2mim`d|3Gu& z&4@lX?dhk_&5PiP(29UTcS6ve5OgPm4@AK5fiTZJ9c#pj;Dd5d=uYH0fNPApLK@xC z&on*w?x{w>-N^M>v+xY-6Fd>y0C|I#sC+wBdWaY_qkBkdMn z(fQDUUUD4Q3>LpSe1mzWf^&&HZ>PNYJlf730 z;D1b2w{l+EU?tZ|7uq;az@!TSY`P|0NIt_HdGRTVx>s`cQY2hI{rKy{|6N91R{Wnd z0RFdDof-c{-hcfC_L&I67e91L5UY;WeH!#%$#^j=XcKQRFmp?YW>8-IVjz-l_46JLZJ>#WdEr z@F=~@Zigqh4c_2Zc!V-|h1vKZmHOZvG<#j-{`U7j>HC}HdHf-DpV61s5%<2yV+d~+ zbl4k&&+y!8@)=`$QpJxaqh@-hitk~(5=>KEEjPaxY80O25rclY*>^iO=y^47Hs-IA zcP4)!@0V4T`2vS^9wUu8{1E%8oUyzDzN6USFSapeC;lLkd!zHXYiXxV%}u$LSQqTt zUXA9ZT3r&vetPXArn(*MslC_k&a%y06X)8nE6(1EJt?j$o-@$0Z1wAgD4W)(wjnDt{hR~8 zUrTu@CuPj{MZA~%mVG>@0qdic;%hU0R3xlt(vOrhcp&hrh_m6WW66Y{=o<_ENG9Hk z{~NaIMX8+6PzOKv2KY13I$&K*iI!W-h5Qnk+i5rWsO_T}oH=Ind+!=#{y*T4%CkwM zdmClG2Wm^!T(q`ipN(wmO`|RAnoDNQnRwUCH|@2*yISzM6JPHv@VRq=qhShs^c4KE z1^3(U8h=;nWTn@)#+6qY3;q$`(0YSXmc-c`$EE+q_W{0_^^lcuy>@2dd~h_t`z(BE zvlozC zIy`MA?PR4TJwF4!TH@$9)4jWh@$FEQ+;+w!HggAk{_{t~STdJAcJ*!R^AC6~br3sP zznk}+$f8q$!_)*F4i#4bhh(L<^51D^q7ocKUAHSMOC?{%Db@www#ZqH^K-F}Cb)v< z&B=N*bD}Z6iAu|3g0JSe>&Ezs1z*oFKE-wV{5Xf&ZkfRx2z<45O8iXLV9y&Va<(bY zqP27R0PS=_e`F4%zSeKhZ+I5+o!n9`bOGK%a)i7pYsYC@WW%RJ;xHr*(QEg-5v`rk z1GJMp5!yjpA~%(gmtx{{Wt7k!=B*5$@LdyWBXn3&y$N^8#h+T{aaTc_mT$a-Y$M z`Lt1v9X^%4fxfpC9|%q9qVMJG3$5Mw*UTYaoE*Pd&@S+_C10)uuP=J@dDZsqC4wjG zDObz75&SHgspLeP%NN{dz)jlPT1flGT2CkDz5iN2^L%ORWx_QYzarmev0kp8lX=$l z^7xthB=}EWU_0Xycuh3+xB1AAmGDObGudYa7S4;tJfF3^e8~*tV)nx_pT;ANztYEE znsu+p+|GsDfHOH|EcENlV-oyM01s1ud;6E6vd*hqLEo35O(}WGqBh#dW*;s|Ya3T? zORkcANcP~DhzYCzA~eO%{<07mWk2y0tB^al81uW*q8q3(>Q^ueba>HUe3$;D)^7A))=9#OGOQ z{M--m$)v7x`PLcn?e)gDcJ>;5J?U##+6owJSNb6BX2$hzcRJsGH?q%B*1ddN?Ck&T zWqkWwN*lFW2+K@hptYw9x zlkZm=eX`c;;JMHw7cdsuW&P&VAI<&sy!sVU-XF2NZ_wU5`+dLP@_syRN7ny|<$Xz% z_kXp#|1e7ZA6VYs#`mI6H2JgnPt=soeK*vYnE}5((x&Bz3Z#(hNy?-s4LH=TmFsPz zdK*)rWhqKh_g3Wm-?28bp(mZpH9YLX^ozj_=#TJ2@$e5tY4Oje!V_h|6FK3Dln9>-w*#AMP6ch7eGz)|9CH(>rH;Ym zR-pYh>YYkoD$tR5FHqpa)cRyiPm-+ZNnRr_yZGGddXi-SfARgBaqw+?W1dk`!!x(= z>B=T-+fKjR;M?U|cy!6rpw1&71UabspNFBxr8@7`b;@l|#LP}aU-=+%QReO_I%W7b z%~o&^dlF@Z_m375qHM;6|bS!+Jauo3QK|08|N6?BRD3! zy%&AS7Ia%#;ETT?v%a1+XPy`GGC2(Ed`i$~&#s?HUKG`-jPx>h-eKAGJMc%*^Bd~t znP0NwRHN{cvZhq0vtHd={-)r9h6nf)n0O$%*=Tr>i9K6#Uws{8dof~cKE}0zahd0d znddtDBF1pC?Nr~I5Ij6gJtCW$c+_zqRQ5e!X2tO$&PwYW8?hti*hDU}mTTk81+$#Q zmYC(l{?o7A@^CqcLp95ZK71h9xXA|=ZQh&VStDb*zHP#0031^B^Nm)QOJt;o_^?@L zsmuEn@Nt?^m*AqT0rC57w9LnP^4I)KXx)Y|t$P7jrM`nK%CptP<(o7l|4^uG4P|97 z$%Y09{Zg~ypNCh{_CMM4Wv@_e{r8F|K2X*Q{o4l3y`26C-;oSoV%-}aqKwc*r!Beu zP0B?-S5@)nD6YIj>~Lj+mCbH=CW-s$L~jDGJ8<4*;1xCRGJTDdm-o{T%^qgSTYP|L z>l6Qil3=WxxE2=C@oz+=S+1v?w1R4ebB;Ggo2v-pRM(w=6#DQm5~| zjfW~P!Yrb1uFRk9S^M+Rr`B!4IzvO`KyY*^*&#YHg?yA4+@$S=}nHREoz-SFtbku~ciKVu2`8B1*8{z;BT>7#;v zU!UvM8rFIp<%Lev4O3bwwROfCbxPhT6`gWEI^}Iz9BbN=l-V+msfw}|y=c(GoJskK zR4%uk1M+6lKSDaHN!T%y9PK9Wv;FO9$$oN$tC4_wVAovBKT>$@x;cF zo^>M!D_yegnIPSqG}us3YR*hj6aufza|bUyyrZl`W-bRgAsoJz{y* zLkF0P?p9#y9O10*qD)+jtw;8Qt-Q-2cco|4==x6J(FHswuulFb+lxF)BZV9^8*M*Y z_mWKs&T_h1USh5nf*0h27`YI8*lxi!J9A_0|Ki6^CJz)Bca-&U+*luTnol+gO@qz^ z6K+VYm;Hl0tgogrhi>F`J1}=c?~s=_$@6B$Cba58o(o>1d%eXoN@=N?uQaT`M`;lI z=w|Xg3*SGsLTT7cY`w(1>amrh{lD3*tevqiXIe@~@r`yU!6mYXv93gi>tS8VSVUg< z-g-UXa$ldo>p^3SezP82k^ZC@_tKv<+SK>AnXJJ==yw5jop3zdKyZ-tAimN?F-p#H z^0Xf#PkT^1^U~pe@HUrki{0M6mG=G!o||;}2lOAFYswb;*zOMA9qo$^9HY_gYI)CE z*h1`}_8u6TV-mXlh`g6Q0(!QExU_k|`H#S8eZ0h^>9qfT$}|%*(C_(D}Q$X>G(xVsi)Q{#`OWthNmi%^nZ{f4m(67qnb?cDVyH(|% zYp$B)S@&CVOu9pl?e=osN5z+q?RM`%?xhZu`p1n>>&K56U!Ovb5Er(zOx|~ro8pdy zvGvV^w_cqZv;1l`U#FMSw&a-kvpwPMqR`A|$@$y;UG}UuMyd5<9J)V{oN1@8H}Y%K zP8;%p2RdC&PC4?Qlx0(Yr9G}b4j#JPmeft+OMfAFC^6CjzCBJHtXXEKJzQoR_%3B# zF~UD)!stn&~dNk5x9h_g^8C4*$$7^=yiLPc-m^zc zc_ckQYa21H<>VyT0qo`x-zoidAphoryN|HfEMdNz2kY%N$La0X$UW`W$UW^IrCzgc zJ9SskZsP3R`pwkWOblzy;N@4#`v)z3=;1n*KDcQ^wI#*7S$kGL%ARuKS8I*DoV~7(Prr6ZZ{v=^$}Lqqi&va+-ja#+!{|pjJa8&|Ej)8SKDwRt(!?GlJoHL< zGvT4#@X+zL@VCN4Yu^X;y;`M?-$9GyJJq&wo#@v?4hOJx0$bv&m&N0wXor8*eG>z- z7x%>l&v*(u5pA9;-YOo{VgUGWh-M|_6Cd&LD$xs?nhANXf z@t5N~`WCs)8pSo=$aS$?C+XLfa*fSFp4Z8>umknnZWgB z`gN9EU#VXg%k@BycpH}>*V?g{n{tjPwLk@IkVZVMt$8<-=7uVX6~P~ zkFMA0a-zS%Qm4}ut}`FJ(D&fc{wL{AWc`0ydNNt$-L>{7er@7U<`_7<>7RjX zZRWj!Z(j4>z&Ve3Z{VHVyf<*K$h1$AqS4K~po? zAG~(z)^y_FGx7ilg+gO6$ zM!tzx;^MMqO{~9^{UF6QbYVMkn(POcu@<(lPo;7%?Kj)*U@xd!_XqZZO53n_&MWQv z?hA4}%eRUlw?)f?1?=>E5$p*k?u`Y8DlqK7H;67u_J`yDDr&v$Ay>=vi+dYG!xi$ZkZTB8wtcel z|9E@%_^9hE|Nrxu2{4mzPeLG|2_Q8I-UUJw%On9!0&0oTdQSk`PKd4bE+Qr&+D-_z z4PMfMTR?l6#Ja5&DAV07H`@l;+7-HWx7{rP)bE6}RRJLri1T~CJ{K~?fVz+U?H}`) zna}0@KJW8B=e*DDecq8Q!@QSysnGd_vOA)Cje+;5(6 zqNV?Ho1HqTvg1aK>9y?3vE-$O$`C;o(KM|ocP4qEpmliFX4 zx1>>LHJHcv$g3x>CQo^9__hk7)IN+}V-_6So1Dv@0BY!)_j-u8uK+%^QO|rwy}C|4 zS=y5iDU6NLzbzj=j&Q^HEnB_gse?6s=7wVPP0J3W)|>SIaKmj|)V6XobS_;0{qpO- zw`wj~wPvFUutc%xqQ!%{(|W7+x-cywPJRb4;WKGY5{|*a0dOF>E*!kWIPXbv<_frF z8&)~v4>A7!@M`Rzr#*uhf)$@&UKGm-C&cv57R%ATk2A$9!t}k_iBt8@ziy{?jsM<# zGaKXa8HRah_hsNfe(uD$K=TfCT*h_DjTYZg2VV8@#Ver|!wMRS!R3sTO)q~(dfQu-tmoP<;_xf6 z+y%2sJK+w^K{LiF~501;B>iygF09 zY-C0TF$1rE9{%RlOQyWO2Dy4~r*|dt*tZhfB365!QP9fxa>$R$BIl`?=Y7bKJaTL* z`G0T>dXREY_zokNUj;TdCn_no@X`9|)Iv&tHJkuzC~sg`!+FH8wHE{Y=^T?tLzzI!I% z;j#LfVy%mcFMME~TQxwRn!^yX^8)&fB;X?NQ}mldza4{&e2ccpE8Bh^^Gh2`CmFfI zjf>X|+R6amE)QTI<3l~Yzu}*vzoGQUT9U7}mOA*qm39*Gb@I;Bd2OIM?D+_|hZ{3Q z17*CwBsmZ(!4HhR6f-6o%{jgJ(Rz#b`l`zDNSHxOgK50AqBjAOx@W~d3Pda+Xosmy2b@-&3y05}fFSMb!fX6VK zUSxiM2(RNyb?Dn?@x{hUYRF$arvO^J_c$?};8n)-=g>={Q|A=04oqu@=B1ysQ=8LZ zpMJ*?WGebdXsS_hAMFPn+|+`bM41#FI`H{G+2o9cux}FAttTAZxV-rwyeWH@Gu1|D!#5~~5EaY4;i~K+GRStQ5r(~VR6Ghhf%-ogH1|xP9*@cc{#h`?X zC#V_!f8ciQF({5<`z`a?s@*e zqZ_grM>aJF$IrFTdwWs2`nJ4Ufvlg)daHFM+J$d~d;4GXHm}W5{N2<1kABYjQO+$f zhwv5jL0{EZjGYmkS(1P3MYCBsb#7aelIzNModwU*4?gWVOnDBTYpuM2REx$Mx1DSe ztvn=JNiABMbt!p{ne5TK5WR;S&a5fO2l7@E>A>kEj~w7&dw%{FoeO5sjXmcF zo(1lm%&*oC)~=rE@`)!kU(Q^&Kw~d3-j31eK0J32U-{#^Pqv_&+`CYDCu2A>XTRCJ z9$xhpEb zj`YRH=wEa92zttc?Dbnpe^V@)OKJ}DzS{ZSxDzcg+BrcxkM?V4z0=MT+F3>$OKIa# z+IWG4H{I}0A3muy`b=GU*FJQ7-VY;lHKwpPeT(|r03IH{#-8JU@%6R{ zK9}b3{iH=F_vtK)*z-L{^uV>B^Q`Fc1kaqtM^6PJ%CkM~TBG!uF>U`eiE|$uS?iad zp7|HqThBtZ1Z50Zy!A9KS?}^H8GbpaUyG?;^KFL zlVS8rUShfWNNlImYn;+=RMS%WjdYFZ!`P(A2mgiWz}Q4yY`Lsl&cvWk$%u*2apE`( zWW)~`12Kj29$4&1X1Uri_PYmbHGpkn)tX330d^WXA)xzK`HeYgjCGSGXu z;KIjyS1|Sg?*q>XhX;hSTj7^d=w+PKm)g(e-7D@0CJ*#}4*7<&!DSTNMQcl#Z{f5W z8VW)~vehKZHAlT;Pqg&xEiO>)*q_peem|Ci9_1yLoMJ>0>3W+(kH6V%JhPOx9-}R- zXQIjedPwB<)A>d?bNQnKSi~P2X{V!!xqcBpv2(`DXy$c&Lj`AejP2r?isAeZd+j}U zx!=F<=3MZs^9W7+r;68MOPw`$asx8Uf-KPU#qu zi{siBkZ3KX9!_Gv z_OZ#xIX%^mDYyb317B;rq}p#c!UE&)2p6=1X#+SDUd5QV5=Vn}2A^CZ0 z7?Z05WSRFJ?i`LBXKba!L3FO558m|fn{$Xy!Fh?s$p7!VV~l-eMqeAt4+m+(@8Czd zfA07u={^2`hxZck@g?Eoczn9_kDNi~4aZq$qi>iq{jm}GoDDTy^h@5!0_oXPmk++7 z?AobKqQmyfq*ozduz|wIhs?P7IQ2%0(Z!Gl`;T#U<>bD;4*-wHg4<~RI{p8YaZd&p zoI~>F(d)?#TIQjaEqVezANl!+#l_;68wHBVsdk(EYEgLV6#c_nqW`0>vj$;**?C7r z_%z9{+G$w6y3Wawub#Ze#HZlFU+2YNm&ASioy!K(m)RUI9`fc9C$B)~zbhWkne7qz zmb8YpKXIQE;~Cq16TSn{)d*-&=Sy#kBVTx~)$4te73!2z$ZzVn9xpZ` z&`3GgBRS~pQTgn2P0@jUwb5Nd4GPw266x9hv{?^U~P7XE_{cQx%X3J7a3~%aMJLVrQs_}o4EnFI?fNoGQgd3NWu*j z3BIxl%UAY8aQ6`Fpz`+4^!|{uy#K7;$HpL@S}vH66$c`=EoaA)8*e*Zeubc;t=>`H z;fB#$EPb7EZ1s-r*4o#Rh3@05Yl|~Zv`DW1a_8uo@q%r@R7sb0eUvCWdN^09u z&RsphnIGBzVC3se$sDgy@JsNc^Ra$3gfp!)hs;w4_JVL0q&;)c^O?0?vxdD<6;_+4 z+8c*1ncDV19N&Ox%uwHZsg+yh=xZD3{~+yN3odl;U%9uQzE(N?Xk758WrO(SpZzv- z&2!PGlM4LzvrazC+2UNkP~%+8Ihf8ka~Ws<=R(Y79`BY;+_i@9J;@Wphx3xQ9iGh| zS#-XNc!RgVe~+`cNZX?GP4+pyj4jZ@dOrKP zHk&>-Lks)l%eWJ|fsa}_M`9*x`}feDqLblYc)Mt^Q85AkN+Z|8JIiO5vVK~&6t-dK z`RE+vqD9|E?`|mHBH7SJ8;P-v zI^CEi9@!79uc1px&xyXp_`reuX06z^hrVt57NUF)U^Bs(N%sA>( zikCUh-i5IfyZBLH)Obp;1vLlgG?q+@&g`3qFtYeJnj_KL)YG3UDLDN(+4cj%$h`dM zEHF+3#$#%mK5|OQT}zmg3jCJh^)22C>L=NJok)LEc()tcJBI9X<&b=;);uvTcW$NM z`X1|>BjpWT;fz0+@Z1relPw$laUT!)*t1EVdYa`s{1D$e`=(qS&%E{Y^#NWWUV?3H z#eF`$mfdw0JREN4{9N>aW6-?jL+fC;A>EpjUCc?xkiPY0clwEz9pF1?`(ChhjT!3O zcRTx-kWp{RPyb}yDL;L*u+L9F0$M_^IStS6LW|FV|1ID;at2(#4tz0i-3_k)i@x1{ zeAM-E;{iVW`ukScXB1`g%qQ`{zcLP2mWQXE#sjY>^1uP^wVsK-;DO<1!RzzDeT+5U zqK^a~*u@$e!HyqruAo^0nrGBIzUj=B&Vx+ktMDJ#|9OTug=D1U>^CGw9GMp-);y5R z!`FF=R~yOW{WfswtR%_PPGHL+=g&k=I_=bi&_iSkcCxmbtT8SR{TaLr#5Z3A{;=j5 zn50)}?GNv)n0Y6#xjb{OcqT#imXc@mRl$KR^$0ZP(>jQ5?_(YGX&s!=ou9=zn8iA{ zk-2i)8A>~e1K~D=iAuw znYE)@YYb~rcAl{@o3Tapj3XC>bLj)#xje&r+c>{&ig*Bi30>RAFD2}ubm_Q#K_4A| zD*-2it+AXg?;GEbSDY_c8%~FHx;8_ zhWDx_8=HE-C2`=>@Nl=YLB??SWY2je4Jd%o%D z>ytf8E4_mXD%W}) zbNW8EF`V_tdR4IRR${@N^%y$enD(awJln6YZR{y7=@&z~!s$!>P&diO5xD#{cu`FY z7q?yfpTvLJXZ_d2&~M||Ug_Poos~W_J{da;Tpu{7S}J{ZRu<>VRITy{W+IMe@2ofVbNxoqj2%U01X`mIYfHnP5Tm+&qB)lT%j#Iqrb z^?!h~*}lcNeGZ>x!DnOOv(3noEeWzD3qH$&&wLJ_jro*(w#*s#W?;;6`0S4*I#ad6 zI#ab1S)9un?ytWr;=4At>0^90fmrQ8e1^R2AFHi$`cgj^!e{?~$&Ra?eiCKJ<}dWg zj$N#?XB$)Ws)1!k_EjI39aEjYBs&ztmmVR1#JTuWEIk5#?Hs~2a^>W!C>xBfQ(Unn zTs&%v`~a;xZ=5Nc-1P;>PM0qP|A6Il`6V%ZRZ`SmCB=HCMY^JY}n`|;+a$mzVf4Vff* z5AiL&ls_wxFTMId0AIR#a~S-(xGv#$Cwf97u?W@D^n>4t%*O`C;6C3lI|G8^-q8wgDJJwvHLoU80Ua_9J%)TYQ@f%$KZ4dIx zp#vNmpG{V>=YGKXz(>}9KECG6$7hwr1GYcFeBb1>e>Pcs-x=on?g8feM_W#x?*RMQ zG~YhHt@&ooPMdH2A7H+<4zFd6c$u1W7XqXHxA5P%tFHWF{>v{6&wr+U@4lZHy}rPZ zW?zR9^S^EM9%F53XP>+BLP}}hKgh7+!Tx^x++|;pHy4X$5KonDj;_%$WZP9necdd} zR}DWGVGk+qGKtS!=;)I1o1yVvT4S$U9p!vjou?bZ7STD)MU1=0IpfvGxtr+v);Vbg z^2|NwRdG6s9aCbx<wmUt|9~*0JN^5+v z&K^B|teX?Z`eI6J$882?`RQW~oMo(r?q!j0caD)dTYlN8^IE&;f8r~g>HP|4GUDT$ zkTCxB-e+bJBk%4zHx#|XdAD`y1FbJH=cnF9KUof4N_Po+Q@gchY;H*1(!uo&#ojl6 zG~1Ssx|eu;y{_N??=#zV=4{r^0v~ljqMXZFqVL+B1tEPG&nalfXVLLZe7o1oDcJLi zlPz&{{5bl3M=ko=#heL_ULL*3;U{FqY5b)B1MpLgrPEe)-zvP)cZtrMy$wBI|AE1$ z|Mv%C<@ygS?aq0#*Xchng*b1v1OD5U*4t9bHO@&rwGM`uJMpEi|8MZ+W_Zly%SLc3 zzLd|RKVPnBUJNpqZfIEivT@qYVfSi|@4wAb*&JiV3Rv45Ukqwv?- zm#dc#4kTx<-7XJL0BWLx# za&ad!ua(5>`pee0c}KE!`}<$FWUGh&pINp#V{~Qfo~$#E5xb^ujE-#m*cctzI?x#J zOdO*lU)?bt;@&`Ge2MFyBvXI8qi@`wRi@^^H{#`iWa@&?4=hvX>H24ssmnK?E>i=X z-z}NybI$Q@_>?kLYnJF>kcF2%nQH2Pf=ulNCy6pOrvC{t^#RTXKFT%O1Htj+A-MYK z&e1bp?Z$6L9DN;e^p5;>nc;?;?fTPC-g_z+W;;4F`2gg>TuqL^Tyh2GkTXzB?m!Xs zrwdy7>+^N;UnQk1n3SCJ3ZF9bP&-%aK`5$n;>zwaw=X;y;oZzwa^^Ihe9$BR9j&EaLdNsN0C;~xwjhL9`4ervDxy(UqA!Asp14?2>4UNiNYBI$+^ zX~~&m)%Dl@A3L@)rfua7%n!BPR5M#=RXohScI7XyhhjbUgz8-5yQ=uJa*H^#Bk|p3 zpY$$!IFti19@-fX{MMch=&g!99bDh&(_X4kmR*Y<%HG!zWM2pSA*_8JiFLePc<=m# zKHcY(cZY1*^Rr*XTZ}9tUt>bYvCj+9dd8j!)s5A@h(hvZ$pPA$=i^n|p+@SG+@^q4c zXAOCl7#*xTw-Ws`%#}?1 zq4TWt!`GX_Rj*p|e7W`uvOg19dXV^p zpZMK>?7!1R>FF94eQp`tY33k$L6qZ7gKA^u9GoPy)D;H&CL$98sy7pyuOpg zPj?w`ewW<1|FCKTss6F{N$=ZcJVX9ns}CLwdwgqy$wutia@8eD z>(W>iL-{(-Y+8ixfm%ztz7QEwW$mldxoCE-;;}F8wCa*|@@#(?$>p|SQaulMZ}v?| zftYAX?W#?DLlxmWw-tznTJr+r^*?(muYVskl6JDzu0YmSp%cyq-+_9`d$g9^|H(%47$dD|8uo(n3**$3 z(thS3vO29ksoD46#JoM-Ge&WbHK}=wWjl;%YXlGNp2D9Woou#JMqm(Bkc>{O$>i4E`xeg*RFf&==0GcYSu41t=Fyn0 z4)!SZ;ojFqJ;~k4)DUv%qYU~eqpn)va?jf2g~ppva;Yop%?}#LODI{sYc2JI_7X#E z_74gimhDJR?;1ZfctkTfclg#CZyWcRd?rrtU3chLM)M?o&!e6BA2}^eNYl>H*>}T+q=E@*@Cv7_Es}WcYJxzoL^qoD?U@xIQ z#i#ZYjufp~wAJ@cV&CFb@rm0%>s3W4*@h*k-?XcKbq%=@30+1`-DU7hzc%$>cE&qH zske59XA=8$`zxUjzjR>YU|PRRs0pTgf8_%v)zGQ3l|GNj*1Kq= za!OKb`af^Wh`uooEFb!6J5B`uvrV%GTviBv;r$RW)bP8O-(CFH9;=leV}_o~<+&jD zqO3Y>MMs@+Bwu))LbW%+n#%EUeoL(^WN&q8rS5akLFBiUxRKlLuah2?Y{mrH~zNE z!Bv=ftQOqp#9`V&M~iJ`UOS!qb-}VJ9&Zs%xVrI+tTmF0eeF?`(rK&l3~k}RCN`hi z_Cmk5iosh4_JDz0z63t8>8P*Xo-6mxbUg<;SP9%IPHz8fVl|EA_MaL*`VHr8jy}`- zXTtS+!%t+ycdBmd$8+yp9tqAeMq6hdX-?6z;FXOQe<%kpVIR3_p%0`hhMZdHi=6U^!%6T#$PG#?i=BmMHK3c#Y(#6C_EP6aBb~-xRkoKuK?GX?kD0Raopg3+Jnxx z$2jBuqciUQw!-(Eal3kTBXhR|-TEc)>DEGD;^@}4f7I5i2ZHI)D7xb1v`8=FXkCdrfZc?loQbbt}!Zni}(&OzVF?d%O#Kyc&DF6J9^cxgNyP z(mH_cD{9}dl4#fOZ{;;Ogxu!N$<4?rN% z`A(j-ZBF~%Bis|rM;S*a^z#6+rrLwdK^N9KGLX*W>n@Cy;E(ssEh^x9nlG&==G%{M zGPcOlCzFRE|A?RInpH0l*pvN%7-ubPEaM%%r%V3JC*?ay-T#l`8`Xy1cXQ2kPr9YP znS(YzeYT=|=JNh7YGxX}g)!aJcYy2q?H>mm={5}6)Goo!-juuFc+*<1z*}DZ9o>mA z=ozaG@Tu?kP7n2X2BY&2LHGAz15g`q8THy4Jk8zC-kzh*-X7Iewbtd^OO)F+%6Gdj z;;-U2V@MW0sJX~G|Ma2FPwiRq=go)PzWV3GuW$a_Vc+uPwHjk7@B|n6B9CI<#}5+c zq~@>edd;)`#}}K_FRv?C?H39bMM@ItzLX@Kegv z9&*(u0IKkM|N&oIx+~?lH zPS8dQ6VN#hoU70rjhn3>YHt;I$IrHOD>7#*^-#jj znWdXQr5yj#zu0noD{@@ph6nRxtN)ZeyX%q5l0~w2W+R`ACdn=^EPH1KbAPl_x*IYF zTO|$rC{`eTN8jAz1Fkr0jd*_q*JbOZTeePm+jaCAmaX%m(aQTVy|49Mw$7ip2R|1? zD+2|G&JVPvpch*@6a5qte^uV}B*kDC8_iRP_qH6t=DC5o`J4$kVBP#w@(pWgul82f zN%9S$*=Y|P;92CH>|Dj%HaW7se_e)i9o*C=)XmT0nsD%0^9Qo<9T~sQ_*mWi^)o81 zy7@jBJLRPM&E_VTE44+AqwAR1IISr z^PvOy$8pV?JEJ)pA5-*2Yy^0vqhb53?1rXUgMq`H+o29F+fT-)*>=h}CoeXw4Ilk8 zS}z}V@NPQK29L$3F*oHKxqdhJx&L5K%Rs&e)rUTgkHxKPm8Lb%jOj~7Bbyg48ri&P#mMHxJ4Q5r*)y`a>V=Wbny+W5k&p$Rs%Yy$+Ny&F zgexEVf&6d}eq_cT_IP3s4fe!pM|;>GKPRF%&oYx3($Z9Lkd5?UDYFF>M zFe+vvpW)lkpq@YV%qaVr!RS7T&%8ssAA`rp`r|U(~>&B#*hc#NXq8N_huPguEKiP6r*RvioTGM<+EGv!AB}OD`CLhCO z4|ImJ?=`*cyTo6fWu97?|L2NEHh-6S3PA^r4jsrp*$DnuLI+KpH(QrL3yt7qooL}g zv$YXgSl5phLeN4SJu-{+>JQ*}FTan%ds@#{tQgtc0A19*FcMxCE;HCOJErYj=wm4K z(f_@T%&+FB9eDNq6yprz&$Vi(K^O2cepA&o?yD8tUHO{j3q4a!c-4S+YrZA&hHotN zMlFgBj6Jw$Zmjx+GUSQBYZ7v=YEfQu^>uTtScyrj`syzULu|AL>gYC$7&z@6l%=p%Ii`gnBFNOUpP@&9=H?d-vR z5L$VlXk_z4o{^`?4AF`!Gin&efN}#`Ig{K#KZ)&o+-Q9oTKOKok2tjA$`Hwq{pg5q z1DAN}UB+9_9_s$@y=u`)YTH#7t)#Yj7;lO9bXqC#idOo38txp)1~^-tK#U=k!rYW8FRgu;TJwM9-rd}rf8#rc|N8j+wNLdHJss(t z-!#eVYpONPt~%DKN+Y=oUAbZd`kd%TXU|^wOXHbVa3Oz?Vskg0pS*S~H0SY-**%ZA zg4^dZ`mFVi>+;|yPBzD-v|+y~udxGNQoiBRDdDxjxR0~rd{$m!1^7@tViobuYW(NY zd5kCPXWfr4bs2Tz3-Q0!D(*mi`V|Q^*stcjB4|{ww)TT{B(UlogFfNQSgQkb_Qd41 zwho%nHiP#y79TK%fl<$CZM|u-f9(adtKYeC|JqEizsc{-+)aI%r>|set-}M6Mc}H^ z@LRRh6LEAnVz;TfulPMGgfHRB2c9~~KgzB3uT7@C0O#hFvA!fbW6fP?yrOa1@Qk&>QCmJhp*QhnV*jCHl6jW{za?SE;K{k5^zz) z9M<#u9e%40g3eA;T%e;N>1CbU{2@7;b(TGn-u4%Ch-WR^jdSG`Q%2^kVDDZgva|u4 z^ME%quWBS~<{H*oa)`BuESo*-PtTA}Gl)JXwEg98=%c^o|5PIu&H7D2@B;EBxBj{y z%y%e{cqIB|{C*?S1w0+IjacVYcxFnVwJp1@{Lm54(&&%VTRRrBpN{qp;%}17`G3DY z(GtC!HTisE`u7JSyFO=3zyF0H&7C}R@!O2n4Yz&G>aU1rBy;ER{SI>S%X}Yrm-CP~U&J4<{nCoRR*|n! z2M(;U4AA$)guXrW9WM5~sXVju_{`f%jXv+qGfO5I`Hjrc2JaWUbFSiSzeezZza{Un zIeX_#DJ;uFC#a$J@UR6_n|=a~tV5oN?>PgiT<6nN@tgB#M%9?ZQuN&2ob5BT`tI(( zd55>@+5N;m^se-sVWT)Rkk3|p`}NMg>PsGow`7ks@=yHKh^=KTuL4I$%C@Vb-!z+z z557H%TJfrPr@XkrrB_C~yMkKR^*PXLYww#17dZ6tMJRd%2zS%)d zlKs~Hugtc|JhT5-jJx5MnI#_$$yW}P=tXPac5BExc;~7V0TL-+=NgFw4rZtWpa??~l^-+9AK6uy7k2nEdKG~0*+Ir+ge9ljM{>MJ- z&jFA2lF5f11wWzz#p3^^3t3V7T7hi5&X>N>+QmKPZ~NdqD=r5Tl~x8Ml5{TBbfVDospeBm!*o1Z_opynso=D;5Z{*JYdFMM5}FMKCD zN@8wyvHlO>3-18OS;qLT05nxD*@cYSk9^wxnk}1hu=gaJs{cDaO@7uO_3Wa|rTgAO z=1gZck*k}xo*eSy$>%+iZ`Hot7@UOPlvtwA?CcJ6ws>?a_3{QIM=N&} zg!y)BZS5-h`ln!?`Jl;z#Ib+(&|26cDzKq{sjO7z-y${-P ze1awT1jCbTUt0U`ZC{#vf~oifZ}@$jysdQjbxd1Jl4`J}raVMjLDpQ^Zqhqt59#|s ztKS_3(HQ++kNnbqjYt2B1F?VLzdJ7ftiEwov9J9V?rA+xU)RLs@0eQ<+#R334}8XV zddVyF;~PoFN0NfCWGFt9Vfbi=qDAkm2q#rxo*wz^NrYBV~ve>Rv6FJ@$DlQ!~F2^!^qo;D%RK+ z4u8~=k%XKZ9w-Q=_^2sif)m9FGun2R!1KsPtu3=TE3brclq4G)OE_G&^J3;RArDTv zLy+tK(!kPObIfJ_Qe){gz?JCxOoThkSU;_wb1HMyzc%>Fm+ac$6XE%vkyg= z>qZ+9+0w~3UPE3>vK6zZ?s41H-pZM^oJD!pxUxqVA|t6yWa*J@f3KXW^ZTZn&#tND z{v7U4AJxPfO`MbXM*6iz?0(u?c%ABCn)6n$Z%p`k0Dn*&wqVJnIW->W|DaJ;vC%NeQoo4$C)%XC9 z1wQv&!{kd&&C8w4NmT0<^MQ}6LUYsZ$A>y(4mluZSCt?82zYawiwe-Wr{&^<%cB-& z=TvX23I9w=D%?_Odb&#LSFFkCE!@j_2jx1;N9&^Opt?F6u;^wrq#hS`-_d{cP_&=)bGh=^Y@aFRs5cK^rZEUd6n9- z#uKEv{Q_q2G1irnd0C@!lS*E&1Wh=5NrJ+I)iVIB}*8IsZXc3Zw93%~md!DKX_JOWJR#gP#E4`$6~xK?i{rD8z^uD1BDAda6u@xExi+wtl5 z-;>{e|L)g;PqZFqU9-+aV_tsWcCw}RM`oD7md zbH=l_K}6vGtc=>x=%O*9v!>IseDwoH4*SpJkk?pCA~Cr!@;s z#lffI>)PF8#n_q!w|HtQv^Co3 zHi~Y7d|P@LzBFRE1v!pgazHidz;|^|QH1wuvN=oVxtT`3-YJE-pC^gIbSXZrHK0y~fgY@R^%W z%=~@oyx{L|w)tosd?dfUf7skp_E>+LNHXf1_-l_MS06}0ma?`o(A>vUbtl!Tb;925PiMa=LtS|j@4<2>ksXw=G9;yAk>0ah(9Wkm|-~#-k zFPXb@9=m#PVw@$I-51L!c|X3%or{T_!Q;-a=5_Pu(SiES?d#r;&#ipo@~>NK5^Ij@ z6R*W5t~qTB7|+}VJlYFZ4&0WH;4|*I%|b7#>KgO0n3-|MomR~3D6&38%#7GTQz>+( zy?&}u<1^4-uqFNU=Nsy4vi63(ScvRJ|7NY*|Kq}frdITCXe17O=pB4A8#QMkzLkse zY4z{mqIm^TWVeeC9HGIzEk5v}>%otRMg65HQo{NnoV^FmIuxUW zmeLE2X7Xub!ojw|8Mk*_Z1ie9x*I)3G^QFo8qb&ymhY|qMxa~oKT9oxKTa|}sK;NV zXV!7XVHLb4KlD8MlwJ{Tu}L9b0OpS5Lu_b+&)U^ zBbPq%OLLmyj4wdF)tgsQ7X|*2EYR~h=d_MmP+{71<;Qm&*`f8U_qwhco&a+sO}^L*m9p@E2h zsd2e~D7rK7MBxLiV_Mf{8<{mRavnFq2N%=7{C4B`JPX~Qz%S5)Z%XtlT)jbj!qUx< zFa9rG(>mMhxAgBQ&m^vS(!aG{7<}ix=dO9*JGJJGW6hKP9qYG#jbr_KgtpD=(PO=? zEk!sKJI#37gvsb%z|;er7t3{HwxB2h*zNr;oQZ4q8{Mk%vQ!@?b-Pj&uUDM z5HHzc7@OETX4U1_|C?z~u@=rTw$6mI;*{K%{Hj*mWjMa~WMY-XUd9qf>t;Ny;8yzv zE@lsozK?FycWT?8*oRa_o07@OrP@WELCGA+Cggy1t@S)~?@2|m680<3NHX$e^P$_= zGHa9_*W^4z`Z4lUD}6uhAC8}-=Il83!Ewem@AlwtMaEh(U-u_5ZwI;mHh4M}XLM-N zdJf&oeI|xKOV7Q-dwTXi57Cvn!kJruX3BK?8B5WRszGv~NmBV1-f7Do$ z6X;3@nZQKh9I+j z$ZkI}JQ-Qe*`3RVvX2oz$-m8skhe8WadoZ5{=14|SKu=!*?H$oY`z*3J^lvoo!vJz z+_@#>*pA8Aj(O*N{naIdzmxCVB$>7J@HTWzjp4m_Pqz4O@5@{F=LA}_U#YX|BUtb6 zAKaXL)ink4ocx`X4Y#!}#CPm_W#t;}ov267zX09+2e4NWXAI%XK#nX3?MaH%L1P`z zt3^BKC@0B2SJ&PDBDv~bd+i$I8Sy!Jx)!hXE}PG|=P~{=@K6dqN?5}dCQYpM}=e>;+Y~ccdDMa^Km;`B71POk;0Z)=kaSQ+9)8i$d>LeTBNm6O7SX_g9W7=IkEq z4`^>5a?oEpXK(J5o6GJ72Tz&)WdDK-n=a!1K67kW6*fgKJhl%%x$KaOkqsE)DHobU zlUwnV53jE7?&K^#Ju5%C-c>G4Cv}8#Ij2MUSX0px{t7=oMVnjICbs!jzwydS*5p+a z;h~Af2NPM3+OY3#HVxt==oi33yu`}I+zXxgN2TC*Pa!wg*A?JAaI~`J?n`XE}_pF#+gEX`%uRD)Twa}OTHdh|L%-4TVr#^DO*P8 z*S>6yNO_rY{v+eOuKH`;I=|K(r`{dPI1f!$zI}4dj-&Ak;tZn>QG0ys(p;;4-w%P? zN1Ljxdp)q1@rh&qL0=VJ@Z%EVQ!g1KXRL(Y%78<(BzzTslU(wuW}A;6R=)mu_+eC^ zncUaBE^;4A*&Et~46IdK?^$OrmVfoqKmjp~(ff!`-Q^GTRIxT~HP5;4Z@jn4G|GR% zI9C0ANPZ3dTmVdZSN_*~fNdZ8ll2bcUX09|jGSwvkI<^ZZq|kmJkS93VfGB-^X_c< zKU^Ld+jbwds$8D6{5}8j^;ae5^nT+|#-R6sC48tQKZh}eJ2Pe|rz+SP--zC(T-S_7 zaA2Lg?k8>oeTu%_zKc0aGH8s9dtunMc`5L2&QymtUCfmz+~x#>ewh2Fg`ej(XMtqabU$(aom-eay1Ei4_!VKwnJU zFEZR5_~EA)`Y zdS8(OucCE6CY2$gB24u zUe#jb(XHDGj`40-`Z@1J`6dQGLw+&k zfd=)Aaz{Bgvjcf>7yr4RAN*WlE;@D13V660+L(4rPIpjw5XjVEvJrWQ+&G~ zn^S9WJ9#|Xk6F*!(Yx###&|Vjoy*!ghx07$oKO#Y0T(it!i{9WPhUaTquu`p?f2jN z>EgvdYi`UJn2Y>{SLsjjaOG~fI)BK~nG322As`AudCA&8Kt=qTk z^4!r~ZRHv!MgFkj&hC9H&h2*Jo$9>%KJRW@aR>L#>8^A2DAp9|+YgfKmN*{ecnmE6 z9QxF_*drjkWA9Xq0e~6f}N*ZY)$}&z(AzyJMj7Qz1w(#w75p?-KG8Qw^NN6^Wa5eq~oJxQLHVu7N2 zAJrk?3MBXaU`&Kkxy)JdG96}`FbkLdPZQ-1B5MVyh) zN<6TQ>$&O4O~#N z%h)i)TDx@*owxkrhm^CIVd>ub|D{__|KG`dTVB=A>S$OoOE8T^k2+GD)EXb+$g-)N zH<^OI4easw;8x2ASLTSK46RilbfF!;ClG)Mc|5gLjMWETj9TZVui@i^}xHC z{cYi6Mda@o&EaF7m{p5gvL&C|CG@X7GSV-n@h!VOyA*mP_7ugJXQJPFkd1k_a<*A& zz_Qmn$ldS#8P7~+?}VNm!n5Q1p3UOf?iVyp&gDuo@>_f6M*g{{i2Wf~N7km|6Ga!n zXEV)5+l|1OYd-Mv+1L<@B|l$`9aL!N(CNILTJMCe4&o4+vvJIsd}+GA&O5#2ubwwajgB^K@ zJf91e&)K_bN@-aQ_RHPq+}AC*py~ItG20x`Wng0r1CC2*KQ4QUSm##L_~3(h!|D!m zLC?#hzO`l)`@wC=<}$}Saz+|3 zA4Q)C4F^ZyE0h13;3+Eq$RPB2^v?iu@UG^TYftbUf^N_D?4O-ym-4M~f0lm!)p>Wx zr|4(9^K2#Gq7lviHtd@a`1V0RS>P-Hujw2R;cdJ5)S6%M+m%soAyZsAb!-Udv!6v) z?SNlDjjTFKPHcy}jybZ*&L_6>+P7d6um(CgA9n8JuZpz~^jc)?d}QuCWN#TVxD;PN zNh>nh(goK~N^PD5uO>swrRmyp>g(!kS(jPKcM0>;NX_02i&C4t_^Vg^6udLv)P&9W zr1y5AOS$ho;Jo)6aHRLs%;eqfy{~dFjI8#VX-!YlZvXpVO1Qt7_oaV#SDK8+!??Vx zAxZdlO!(i<^;LZ;YYkyM8PJ>$+B+`!gBNOR@JMC-G2bT}IVHbYjtfj;ZFKx{98!O85 z*=ujbnvr4MH=k(PHR*<2@L407++K$6aZ7Hu^)7l?`x=8i3K>o9R~s?k1fyB7hLP)j zzP-TJ3A`C*a#M5#a22u65qpdigN&E6*Kjz#3D4s{4|`;%*y9SuEPM8_c#1VUf=wge zWMm?_C5wlrQF|}7jkv-yY9rAmP%gD=7ql07ELzyUf^+eZRq}mGS50CLBzJc)ZpDY? zN3&$AvrfA{vO`9nk4$H>Nw3mA7=33u-`W#?wsYA^RzI;Pr}XECL+9-}T(Y|3uzYH_ znf~2N;>AyQu{X1Zy_qkuH*+_8GjCs>vUcSV`8lCgWU1_!cY%8-xf+eUqnzYio|zh} z-77!ClKPcvrk1YW({x_z;d!fHJ3O_tgR`|#x=Jq%tr6Xqq^Q2jAZt&D_H)G0NxJcK zq!U-UVtM*n$*C%E(d7w5EsI{NtU&$qvsP_h-ntHbb^fNST34ah zu0yY#kA7T-{u?mY&BCYq=Bfjnndco!K4?l;A%2qwZVI$6`5bZSWbFY?>)MWNW-Wx? zwU1@sdUo0?p7T@1iBj8?2Ttsx;#K%krnB`aI=G1TF8gCo%bEDN((qb&2JXIH&8hkec}M2w8mYTyFsE8O zcfyC_8DlVKs{SK&$&?cWUuWUZteS!kAj#IQuEn$r(Coi^exN2bPdzk3?(tn*Ch+QGc}z_k*_)uTXm3InF!mqA>-5oO!JpE$%QJ5GQE;jFvG!K{ zjkel`r8EtlY`nQ{zxlx={5W0c-pV&9xjv`K7?!sC2y(#o$&YJ$m${hTV0_?3w+edF zcW2cbAG}CAD~~55j{>T|hPDIkk z8UD0la;@QGeDAyZ<>JpnxBcJF#TR{s2eH<}_vr95SdUT5$gKb-)|dj8KfL7w=48d=(B-v`Pi6l=4f_Ym(B%blHhTbu)8;;SQgVn|k$V&~ z_#68BK6KOdob9^~y>y#-ZO^Jvb!!wus4+)adkWll?HB^@1Fv$QEnOblLb9Z5uJMe2 zRiHbEF;p{#66u}55ke+O7U!T-$tH<%J+_^p|3CSFX#D-xFKYzIQmln5E;$ zmnS$9bsgngsGVqU#;NvqvTskdhi5tM=$_W(t>{vsrGM?U&iOly0g zEZ#C(=WF4U>|n2S2K{}=ScCW>sxAM%zvcz_yZWNy*mU^`x$XnkL45aaTf2IHpP#(# zXXUwOa+@Dqsohlx?TS94doYmJbF#!b?7t&`|a*2C1u3L7RUy>2>b1Kz_t}V zMLwvDnD4F3brrvVPoJyI;g+rT6n5@~)nD!YJFzf5t5}%cmCb$8sN|+xKKsyR3&ElA z=4Y(4%`YBK4*YC&3ZJgu?>oGNcvu0(xO%AZ<2^&EXE@s&ddP45th}09 zW~HXz8jm~9YQxx^YgYbTV@&h`#nH`0`=UqFSheOW&{`ESMGx|5A#oAumv{dJpA)$I zN3B`KN$7_u*eOG?SB9bI4~I`iz$YWo^ONuy4@M^&#C}`5CawHN+M^=6(%SF)?bmJ= zt!n>O6x(eAa_zto@)x<+F^e3c5%@c}z8+oppC*lKE-rp-r0yx__JCLPKfW!r0RIEF zt6~s}o3R!bsD4kDF`6v#Scs3n*1Ns{%+~|^b&TO!#xft9 zYaV)bl7$QT;k3_2G->AyXg@8!5a>dE9zc#ZeBOr3-gA@IHW`?dFY*XBv`=dpzURD& zs%2x>(kk$#cAlzH;ej8 z^}Oq!pG$v!t3TO>s<%TN@Gvw|kw+U&ZJ50|$Q;GKzX9R2S-dX{_lJNN!P zzU%a@X1Qw6V^T zhqqcfmE>U!@-VS4$wT$Go?HRxRC-=(-XVTR7Zb;2EjmV@T5pV3(yg4EsC1Qt@pm)+ zxHJBLbH<t0}-onLRo0?2QXJa_?~?j){R08K{81ra|-CmWIDj9KrXdsW03 zv&OrG+y>!nDtRb(Pr1d?F;=2u%tv>)fc>WH%yC`vk4sm`1@E1`@#*g}hEue`__R;8 z%pAsg(D$CbPjy^Z0G&fMXH=(1^xwhQwGT~e{k?6_H}EOv)0b*YJI)-+-=%Az(Z)25 zA@|yxrirQx&Rl8?jl_P?=VQJFg^}FW!dBLx-C6#XT^{BnXK8RtJNa@s_ns01AO zrrcVVN3MPfbd@8zVm>Op!@7tmS-STr8o@5JY2=I0NXS|;R#?-@Jo@SX5btQcKa^sy zSB3SO`C@GHXDsL(VMOq?$1Fb}^T##I$58@~(Nk*b$y-dKl2*|&Xg+pxFy)PxSmrQ$g_)(v8+S|_Dhdg(7KZF0Y1ixt) zxC+p&YBO|TQzo|mNilNyqTRi+IZ4bBIPN>cA+2r9Y&(9Y>wK&;9O{|BO6CpOKwL<^ zm$WtjcqRv0XvY@M#pZPPo~92g?AF+Wj7fQx{zlG>Dhn)aH?#B6bt0_|nOlNMzI@Ky z*c1G+T|+VWWnV&6-cy}@T38lOF?v+!|!fG(@p(<%K;G$r_QPQ*7!_YCIES?V*-vCa;8pvQ>4 zxLEZG>@mv6iqGR2>zSn!M(cc$TAtZ9&DeMXT`9Q4fq%)z;a?~kV2?*N`z@BRw~vIl1oP&@>`dJBoX??r(pWeLGKV7+$r*+bi5rU*)oJnX&YF+S}4_>lWc{8!;&h_N1}- z%17|lejI;qW2Xv#!E$0T$dT&L6~&T?Q)Ezs<%)&W30!QvskVy6&kpWYf562%HU67b z*LFK=b!~Z4WFm8>`Xj=zk9_QUa9pP|Hd5=ZLSMCNhGcvB-|*xt(N8CE`GG6?#h>~mhAxy zCl4B}C(w_efr8+L5dqRvVUO*+dtFFBSjwx`rP zx1fyLHEs>uJn2WAb6YyX-;~oTzfWcxI%fGr&yyd)m^3zxE86&fJFd1*Ij-#!Kijyv z*Pd))-YniV@N@5-%bC!~5+8b*&Q90>j^(pv&Df*=9momf)G0jpR{r1OA^0irP)J=E z!Tf3GceC&Se*~$Sas~4BHuTO3jBN{ZFF%Lc%7s?)pp}~k4Jr_wM^P#(fC9((jDy%~kBJwbmo{DeXrO&>E(D zCDdzh$D-@U^81Yk+R4EOFL*+SN6ZM*erskR;>S+YzK%o0E#@;W*{kvmwf2f>MdQw{iRpPw<|pZyF8<6(x_AUJ6daVb5qt@HVOVs!Cb;P zcA@_*Tr{fL%l+mbVSht2W1U=^G34U7_f~Q*$hz$_$25)P{@H5l#dnT;)Ah@pDE2f< zx5-Iy{ZHe%bF$B}W^&%Tbd3kkH)5gZ9GS+o&>H)<_T5LkNpFU}X7hdMYv^hG&qS|z zbeU=SPXb>X^L%zg-K_Xpa$*j83Tnq2#OmwHJ9iUPrY>}C3Nh+??y-FPxrIh^wbzIR zC$kO%t9(5kKIG(;f8oT5mRj~VJ_hZFmZ2*``=R@6II@+W_mKU2Jurp7;lQfjEA8JM z>~Rb|>O9Zhr_kf}Z}KWDLM!awRp;>gTlVj|&++@1{kwxb$J%SF^{|SwenV~cZv(wC z^pDQ){ku-Te`Eh%uiu-U>vQ@2C+E6;@3DVx<9Fj9^_zN>y8cW1ddJmV|CN2c zj^Cjj_I2YLe!uSgF6H+d&hIjQf8YMyF`wUmv43y-0>2N~zw2(~_q)#XxA6Or{X1K~ z|8D;d@cV(OM(l^RgRFVEjM^^L53|l=ERB8jR2-%LW;>3ewE;RiW8I{OFEY{|M3-7| z5%H391L#=k*4lZTT0Ewao+7*Tm(;v7Pgd>K?`VU!c@lHJ^*w5qASXNDlkbc@U+L@C zh%OHHoM?%1KRC%)dM~z!-dRZd(VeFC-YdK(8aRHXJs!o-^0D`$-z4WC#kQ08>%d~1?NvgM!uq4A7(=K|Kr2M2RDQUZ^e z@38OM!S}Ae_2JvR_S4}z=>qC-fRAT0Sy!p)(vD8<)>Z0Xit5O%2U?LXc}4;ON; zBWv4LRT=C{DMsg`mV)RihuTiaxAOkaPaNR+P2}0?o$yETO>^i!j69Hzwv{>_TmOgk z>|cQf6zke*?Z3BqV&wOr-5zSc6Gy8%Fr>Nu@DSpiX>C7Z>|y+}qWK_omjrV?&n;q} z58`w90rymY>O<)E--k=R{c7_Pn@xE&qS4 zgWw}XyXHu8Siv_=zGn=s`KVVJgyuCDKH7X0xE05pN4r`d#FNSw$z&}58+fT1057k9 z7rcCA<3%_TZiE;74yW)^BK)|x;aL|qbGV+U_xUjpaVdItD{oig8UWr?CPyUtZxnKQ) zWJUFh_8!{<(l7C~SEm}WQv4KtXy_RIY7eCPFXY-SCr`AfzSpzRvgR=joZrfQ_+yXw z(B_Hswj{>>>|cso_cJegu5m9m>kDNCKJ0|()yT}NsDA-{dj^pg!85XJkHcU3ed6yQ zwYV_tK+o+DlVDkxGPiXxu=v;8uX! zf{$FE;uo0f;AG3s+IknXEIg|Q{X66h{Pbe!Ijr|XjLWs&t3NOElZ_slWn^{*z-55< zU0GTETA!@kFj=+y&b%fm*P(ti^GhwH&}ffhEu@Wb7DSTqrgGjrmvC-_W%sbhDzgs< zJDmAT9G4ed2rpgeDDD_{!wXtN=N5UaHFOHk7~~=U;s-YG;Kz*Z@4C~vr*VyVt*0eO z8{%)#OJGad?dv^8Z|KrWi+;lBK3em_Nc!EiXI$4*E)x z)wW{f)>{6C{a>)oec%4Ae&2-_w0^t!j`*F*uYs=CI_-SvJ^R_APCIiyu>bq`FMUWk zH>|q_T6-UuJgD`_BWG^=Jlej5wx6JFy&Hg@PVp7=du6imjN8wzyX|(~;#pglx6ctoXtt_kX{aS`duuo$eDY$I-hr_IC!`dd!)|n*vPZ7>~7& z*Qy2Z+k#5+HQYJ?s$;8KvU-0VbKLjb$7=?7s2NbV$k&`o9il4c3>#{xZ=x|;^C}qU zG9Kwd?znTOJ7b_0e--wE&e__IZ!__kY@VsoI7iyg?Cn8c;s5#}#*8fLW)=Pw z5jE4k&6&zpAB(-s7tmJNQ&-LzHcM5<^FIFT-og%@4~lILoEmfB0QE5V@1DWDbAO)? zVllsEGiUQ1L=TE~#5aba6X7e#fpLo8hVeW6ciVVTd{|&Cn4RvN<4g=?A$WCREqb)B z{4QWszk<(#85m0)7-u>#!tV=+zghI-{qK0iW58@suk#(`fyFn;cHgR7WzkZq{hwSF z>;IFP_Wx@B|Hq5OCzy}*N8%OLj3xI--&j(huZ8Fy(W%7|k7>>mjjW`Oi)h3huVAbJ zKH*aMeqowyz(VUh?*EUyH;<3HycYjIld#NWW8bS`5zRWNh!U+b0hjD*L~X5>%_SiV zg0?nd8<46AnHd_FQl;MlT5U3!8QNaB*VKM5SeJ%mCf1?W@9n+c62N6bGC@8nCIaU7 zKA$C-1W?;=@Av!9?~lAbna}b(=bYy}=Q-y*=Q&5?4fxyvp3A}094nFO&wq60IL$S2 zPd0GBinAlM-6G>PgYlZL)Q>V~@##Twc^G3X>-b96u}OTdjcwHTpQ8^5jGeqMd`cpjyiu`XFliqc(?O*u1iMNFPx>+KgT&sD|@HHzc$)EfsaYP4aZJP3!@94s&h1B z->wZ`B;PrjIWHae6!?^Yr{E%Z$~dQhYbm(a@GK5oZy|5PQ#|hwoS5?~;PsL(#%O1$ zlW$Vi2Ej3TDSU>U;Ow7%r5@r6?uW0)u1c{kykc1_{Q|GmGAS*`rd)9f{xf-f6L#?* zBnK?*&mPWgjtWR_~OjD7K8mFw)myAaR)g>#JBhG4CONZzgc2G z%=cn>&-9(WFcR3r7smByKKtPKvE+9+cJp&md;QS-n&-qf#$FYhM9-@E_>!8K zBmXk!8!M4~Kh&R*{LI6BV>Q@x;tR@&AzvtXTk(m7udq}}tckU1>UjBI=AFQ9!W9F} zG`|-68;8kxd<5G=d|Q_@Zk=t}&EdDQwY~S1yccGCI*$N5_oe=Twm*We(P{%0n}XjF zdxB^1z_@UmrI|g6DS{&>M)?+<2A`Vvro`7YlY703ke$$Bz}G~M$0IzSjh+;yAIY(w zN^buy`jzu868mV)$&brovXt)@{U#MyAs@m{-RHaZ7S=u=o6!{2?W0yY>`fQ zWs%6AZxR=f&pso2INSK^T;*ua<$8*Fl5Ck)CVMnOGr^?`yKpu8T&bn2U+y%XzY0HS z>!*cr#($VgOzk4e_tl1f`L zNrliy>@o3~cL1BnR_`;+6KIee4}P(R@9MRfDuJ)_7E5#g?Uv?Fct&D-4kMd|m%k3r zOoM0o;F-^`E2{_Q>XmWpsex`0Zk1UJM#bO$O`EWk0Z5 zri|~r0-DXuN$W&*Nxpb_H%INi>}hbKwr<)Pdmp>Uf;>73ev6QS9dV9_B`3`?c*McnN~PX;&Pe5QvA@p;#}mv&k=4EM z_D^G#iCf_TO%7RD)A3=XY(%aad1-UV4Q=M7mG|Y_{;2%4CFF<}86`44WlYKyLW>jl z7k)N-Nu&69lPwRZ4O_6ieysDGjHL};v%+gu_GcO<4c2J)(T)ZS?`Uvr7=)uHVGxe! z{u>gM1^4q^*zgfZe%!Oc*?^t)v5_bDt9bJzXf3?iAC)I}B)r*zF7Pz8su3EoM``9A z{j%JF6M?P9fKBp{M&}L`n4bEYFl{to+73+GSVgTFntsOqjmY{D?BDFvW8p%zG1p~n zjv2cDx|y-7O)~di^*n^_PoLV#8T63Is?IsG$D_+Dk-Hu6ezG#P>($lD(%ayb1@Mq- z7`=y#$KOHcg0s-M30VGL()pq?vB=QsUagKx7Pttn^lI{8x<}iS$XE2-yYnqo{aeuOkeNr3 z=d$)ojKuTb*VnVJ(gE;wjO!G6zEoKt@^0r+WM5P}i@#<&rY`XeI;n;~I>dbDz4`we z{IAI~Ltct*8-7C9FGWYTat{1+&UDLog;|?r%yN)f=9uLWlVOfoG@q2PhEHY=iJV$M z-~SALs)`y*$vq(J>Hm64pG#-sCy`lagDnNvL}vXnU|Jdl(^v7=#@qis{@Mv{vi{3i zmQzi-ec~vZcxT5h?2}C+B!@+P7~gaFDP-7I?!AUTRqkOcgx7PgS~+d%4&fs)^)q;O za+%)Fh7{UarEpf38b9R8%NrS2cMJQe3wY;W|8$q;>yh|F@t<`5(V^)h&!dlYE_3uP zQWuwfiBh{rn_0?rB?`JHESg*}KB>$2;Uqm00BPpCR^!-KX!w!kP!A?3P&&^n4@H<3o z)FxoQZ994d@JX!k*`5~}&n1=-pZnU6(IYzOQ)-Er>#FzyuV!oykULuD>gUPip25eO zNc*j`IG@K=X!j{=M-sZ}Ew?6Z>jH+dSxS9Zywdl3Xd*QI7Bm%Kn8dt~gO&%0Bb1za z)fVgOvI#$ZE(3hp#@9XP9EUD*m$Ei>Qtk9*E0ndh#G8GeJmx=NrPK?3r>;_tE5xL1 zTcy0cdDF5VEaSVK*Z}D8#6Yu7Z%Y0_3h>mtcK*u}4;#U*Y=~Db&zXr`O1p>I584QA z?@%1g`-a*3FLi&R`G><7ElU->b1`|y|N5il|I)eMh?zcVjP*%9c3<#4Ol(0vd#?Tc z@r{~Y|CFUFhqlA7AoIS1yj!N*J%@?Q5Z)6z>?rqh=O%4ahTA;c*LdkW=)Sx+o%Str z{o6X={UeO&-T3h~#NPFCw2z_+n8WdmnN92jg_@@5QNYkqJtZ}||JlTONdFP+WfKO0 z|Eu5#gWq(~71OhuJLl>8xU@B$n8r@lB`ubmXByn;7Wf05kMRuyzw|$AR?-0cmjS=@ zDKLb2)(LHd&M}OK@R0D5kGXV|Z@YNDIyGtAa$@evz%SpX^f}N~gmzloH#9m-4Bo?x zvDid0*m{E3B5EsPS1lF&UFs}hhq3=aoh9^qvF+-A!8vVU6d1Je<9khi=i5|Z72X}t z-T8k&cORu(5#3HT?Fc*?|n)%Rjx|U-#yktpM3o@(YKK4{^Sdta4Cl@_% z^#haOBDU8|d^G49TD^1GJ6Xj%iLM8A1UpdjQA!M5+{R0rB~C}NOzYF=0W4woLB89_ zd$N9+bQZhy!=6)H?*oV7?~1&CmHQpcZK(+(-^;z<%RTtUJOlbNzdvT&??_ZGr}leu z|1N{3*zjSo=?oiQ^r%0u9*WI8KXyv~BM8FMH0AD2fD z8aQh5C>{AqOnfJDXZ|JBkYFxfzCz2fq}6GfK9YK^{E2NTXJt4ecYHZ>^!J>Tsh+K5 zRdZ%B%-UKb@$Klz{jsrGy$55mK3{@tO5b<&K4L-1SJ&4PC-N#bs{`3~h`z=k-@@Bp z(`zJ^Ir?q{rkeic2h29L+DuXHo&1XSPEOU^3x7_IQrf$U_GHb`<}@;+USZzGF4pDf zBIM}k#aTaPJo=$0=V+@suAoNYs>SR-kS~xmAD{g0@Kwk)&H-uiROF}Qabg7|@0Zeb zggJUPc`AFm+lZr@LI3laM#y@bz}gQR?g8 z#hx?Hi!>SMMGnTqmCa)AA#=6!A{&hJBH6c+yO{GL8#pggg5Ocjipn7UHK@#ON`&E z_+7&9Ws0@z){ASVPs0D&85@&EOm5n3+^d!R?J+TBvfp3B+|~GQR{XY|v*Wjw@Y})v zv&kD13x7yEEiu-3f$JIVtoC4DB@-6etB`fFWt;MJG#rOnA66{Z<2Gj3Yn zv>lCW3-R=mp;0F_U5Ihea26Z|=R5;vp}|c4UkuJ~!q?JHOH53B4fuZV95{bjjPEAm zn)-43z)Z>nU8rM<>dS&c1@+=X@fvi9e zS-SD?9QNYKeGI(!L%$j|r@8j~*J}GY3bu-V9uZ$UaLY65yvEn4)@G>(TZ?S|sgY0j zAn_d?%)3%>jk8Q{%ga{YUY4yK7u_)LG&wGiLHUvDCj;Z->w=7#*b2vD<2}E=uDWr@ z$Uih(a$(Z8Sr;U28%GX3OM=pO%%#x&(uY~8`xmRfl(vPZuYJl! zzPhy2j~v)cKJ}wrkNlSXm9}_tZdF@sU8NgqpG)SHlocXYyQfC~G<7pjX>s|r`kJ+HdOzM80{*oHnFZ1s2|M2&EfBR&(EnZJt6mOU7gTot%dVuF9FYk@J}gYkpj*o;5IzBMStul za?Ok6!Q|@R`;SE0Rnc&P-^n#o@rKKVv;G&l1SHGkKRhW0y;u zKtE&o0QTe$WbfsnLA$Rc%8smNZ=;&Ji6zucL=Rt(!Z$04v0SCaLF+!zOT%Amm3Y`H z+M0>KwuCsF6l32rWpwWqXFDsDGg?`2J#9tnY0-VPpbLLp8*vO{Ai8h~do!Y!b)uuJ zUYs)EPmjZHTMZs1oMpB(V(TLx#Fu-I`;?aJCd~Ua#Id{rJQ@v&JKRSs0CKKb=p%M^ zPSqfN!ea;R;wSj#T<4txhbxV}iy1$(Z4>?GT(zwJ1`i4jDeMKv{;lAUwqz&{>EwSa z1^(37w7NPOcg}TLZOXV5_6kzQq}4UzlhX0i_kJ@C{7jryjKv0wjj5OO4WBPj^z#iL zPW*YtrkZc|AGBm0=Gl>h`Z*urt%Y+Xt_+)h^r*)0!R##j4-1}+;wMi!GSuL?<-&99 zud{|P-N-wKH|gy>;p^Xe1pROoakwJa4`b{0zeeqXs9gIxe@acGiNsA#@qB=N_aQ#a z89(daDsj5spS`&6FRT&IaMn#~;5HDOL`~c$5B~WJ&XYT;MvaTP#oX|?*i7b5AAU;3d z)!yO#4~+NU;eDagMZj<_er|vl9z4gsWX-UB{O=gNAiQky>hD~7ACf~r`Vv2boC7;N z=K97KlQYiU04zj_z0@HmDwR`U35nXWsYUKWHCkBl{$7;Pm?2 zkx#cFr*1`FEk$nKLSCt2Y7C6h@=6u){n_+6U5@EFvcl+}OLj5#PdOUjA0x1tHB=^f zeox+vv*~DO%5*(fk*53p($N@WiNSt|>3Uw)`IUOivc!8-J&WI*wWQ`kN26ZLU1a+N z&vxLffmcK?m-#I)oFSVxNgl8Sa7+Z(Byb)B?&JVlKaQO7BaD1`7X0aY?wg$Z2IaG? zgDQP=Ag4Rvr?vbq{MySrnt~twGh>eJ*fini9kClY)66`(3Lo|=OP1tf&jR+oGVy7X zS9=a|bioR3A7nb~?dO~`>b;6~UnBn}{lxGW%lp^#oZK4z&noS`#? z+V8yIoHs6+*xT+!<==ABM)y2RqqVBEH_Sdl1G;WFePpV%cW4Rcn`nnVoBOLRjbf{j zC!+BP_k~7gd}{Rt*fr$YVC`v^@BSP)No=_A(UdOnvy1~@!LbA1@o?Ot@j4eS@q)|C zWfu#~@|SuQya?Q#)Qj4&*l|j+?nvsOUQ`3}B%H3pAh73u4cKooV8>q2V1IEK?1$kQ za}G3qYj6&Tj&pkb&G5iY@WPGoL=n7E2>%z5%UaK?=H}b}P0HJ+*IVEba$tO%yfDS6 zNh`c5d?7g(ct@Krlxw+S)+6Nq?}*XP=+%(Ba1!%31sja|p=Z@FI!l0vVVc(g6b#kv2%{C;?$Zr@QMe& z1n^7*UuvbVC;#1R#MYDh?gY9-}+$6wN*95|COYSsOsQZY8rEl z*uO?%|I9eY24eq8SQkneH_<~5Eo6^v3ieBU5PpgC9s)nM>qPXje*}Jsy^DrFoAadTEV6H}`+>pf zlQ{PK!1+6$5CaV@I>G6xY0$&a^GeY3N)8fpgkQ9J6MGNDQW(5l{EB+!H{we*zX{t0 zzo|xtsooaTTtdu;HBMRZ9q=r{pN5|!tD2Y)&DS40vOe4&+j#OK{QWN|Rngzb`udQe zYfUwDt*J+p<6>t~H*-jw+@_xydkW}TDapjY663Rp_foFV|9_tIl_&9KeJ76E!gH`o z@coyvXPpW^nYD#yP+OR|x2$UR$4c-?=WsswA$&kK&b~(XzX3iQ-v272f1&HP&NFEg z{^1~vS|$(D=wG2pHNN@uVYK;2>5v^ohhElb;m0c4R_@+V?BLl+S@WpttJUvNVhJT1 z!QV*!M)5bAKP!JWpK{ZNtF0SG)>|9!FL>GGcZ{yLb|`(e_mtBolQ|ztJaI{^Bda9V zTIU4k1nOahp>Mp!k+oX(R2XBk|ME5|Skmgu_iVhU?Y;1x?7i$4J(qXFv6Ip? zUnlGDiA~6P=C9~=7f-;SNxjt;+4o^z_0SF3S*z>tSHd^_@J$DOYd%WGHvA9xDA#fB zUfrrZ{ZnwEMo8Zb=0kW2{v??b=!zzPOFW1emxue8-Wl%SExiAV zZIJi(iJ3BF9@m>~fxGao ztR*Jy9{x`ohJPn}y8DoMGKVA{NBI14+W2Sjxyj>=^bOt5;Bm=iV~&x`gX@6hU$HOu zV_%BR+`>L?i|pefOAkIeBI_XgxCgOY4`P3}V1HYY^A`x;7;<>N$YHVFi7hqvY^~Va z<{oggJ)MHReJ$U%82hvb*{6+ey9K)c`D@xI^9{PUU~eD9M%j<89Zm06?Cfu0r?$qV zYIbVAVW+l$OG{E}6dz93_;Ax^{X7ussCJg+HEc*ZQ!9G!3ti}l(5i&ZSVqe>n+B*gKq?S=!&o_ao7F_wxJfwZs1VYbXBOPjwj+T|4n*?ulLY_pT43 z-_7hDe@318eWiE&(`qMv37x7YlB4RstHxx-S?E;xe?X^>o-^sRVfmSKsx{;da-nbd zTKQ!)CeO%SmqK2)=WX?7EW)$EzFhoYvn>PnSoH9bNX^Q@oOR+0=sioVoPVUo#%$Wx z&X*{Le=2UmKptIexgmLU|JgM*Vu$s;eALjsCD(q=|D_rm+0S4juwL10|MzQb%%<

fW zPM0NLUSs3S_72T@T;g}LA=^_FTbq@AuA}VNtY*JvI(vk&cWF(@PK}@2wCy2k&2;mu zoM#2MzOQ!ws9>Ax^x~(QPZ!k9#y+W*y=V3Tmyt8R6Ps|SwjVo5+fTd`o@!)XG_c<( zdv~{86t`_Edv`IhliTu74feU7KC8*|E9Va5EYspW?1?UmWj_($l9lgr=QeCB>9=m1 z$$qWmRQVos2y zqra1Pt)7v5w;XzxK!1ysSb&m~=(L0D^x6FDqQl`&Y@STc6dSprQ^^h8I%;yfoEoP0}+CNm~`p5Z`W3}-&j=xzs9^2`g4#=4hbm?)6ES9x4;?7f^shLjx z?q-D?;ud0X7hZi*a^Ust!H)k+&m#w(C%>lX&2kQ$L!$77)WDeRiQ!#4@79iYG;g3k zu_d;%Cm_G^Exa9u4tWuWx&xlgGxqkaW2wVLUbRU7#wUB&$97qgdtD=AdfVB{-wChE zKK?#b7_u8Vi#M_PzoiR*fmXM%g&52Y4H&Oj{;%S~x@nb1vgx6eG$e?~0~>ARXd zQF<+mjYDc-nBSVY-)DZJ=b@JOXHoCr=jXH!^nAV|AIS&YM=l((BQKLZZs_`UDmcqL zO$X<4;u&(sCDaw-gYN;KW5g=>!PyVa62BlgS6dRgTIpeH#M|n35+$F9= zaF;piAZA8#u1mgXnVT;?r96$Ws+VWv4AA0eY(|Y#2j7UC@Gut9^=PD?Xc{$TiY!92 zB(0WAj)nLi{INMBIrqo-Y4QU85cwtd!>6WPPJQnu|CMvfB2pdI*T-v7aH!f+)h2x%~}nk&6Y9o1>_Sq`<5C;(*H~&-@d$`M=efiE4mg#@3Pk$ zBsOa39FNiRK*>C_G0!A7(eNDXGS4&tzUQ_imP$ zr&1Y9=pttmM21Dr`B_@t^ogFC%ms-Rs)5GxemcBaPTkgc+K{?GIna3weM#Nb4d5v> zlDe%ujA4HiojNQ<(5XhN+gd~2)`X~U&AP1{u!ZGJh|tLjoy7J%Q}!sHNzg*=zO*@pM|I6wE1k)czS_NxvXaV zq`FS%aTNI?d%gdi{SDuzOlBX~XZgu*jfNzv10#%eXEFXM4sn<}(@dMWe{` z4-ZJ6Gh=c;A3NUloQ3_Zh((FtWl0+NPUO0LyPUZt89t|bpEULyqW1LPc#K>v z&^($}B0qK@$Ci`FU(S<#Rc*yj4LXQ?NpreGn~VHvT46x-xO>#e)Jz0DvDgUCpZJWcMrJbDI$Y_Zh^!l73ugkAybO+S#_=s9&NoXy<)Y0p*J42D~CNmZXAhsg)$5Ls>>TtgP9Qxrix1)RqYXHvCop!N*+^Boh6Z)<#1o+j>s_fyQ2ii#5uaL^5l~$fgSfX%QFgxr-c463 zZ&yF29B-MYEDglnsqMeJ6lJWts_X|oTTT=1l2ZQw_1L=qfy4d*U9E9$BJ9N)Jg@Ql_0W>_G~MY8)xkea%e8f7! zTraLD_pLMMc$eK6A7GSN(`hp|6i=IFoi8w(Nf z)nV8HPo&d+yd{2fH9AEX@k}xX(x1emzPm1_el&D!`$JA|+39-^2&`2WW&Wp8u;!;K z3myd4p=qEvE@iEKTR&ekHEn_@#?Ix!7_vAO&dz*5+n{&9L({3kTAp5KO z2|0S#K)<)>f0sX~?sDq=NSk|rn>vi1z=O)};q9&vel+OZ1pH+Iui6FtUBIu&P};4a z-5~AyXxB%(Ce5W?p?lt9WzQCY8G1y+Y?(cjHa&%2wWtExqz}^OE#}dUJx%Jac3`gT z8KBLtokM9Ob6RjIzg^k$AbknU)VON(0dr=6`H~S-U70#;6^9+Y?o4N>D8r|^nFDV7 zu>oFx2bg7EmX+(gBIgnoGR{NW^QoE6pqhE(F%4IdSN;OzRhv)kDQs8$PD5T@n%(H?^U<>+tnM}BO3fN_eE}f518fu_)=weQzi2M668N?X!n18=dAKSdK}WhDIJ{Z zz{y<83Nu_GSEf&O+22wlb-HY*00z6u9kL&5Qr+$CYEOj@o6HGW*8~P_e%!6>x$rCK zHl+Qo9<}nC?8YGTOWIG0G^u`HK&@nqdMrt;Ykr&E`%#lC^y_lJdcQrOe&p>|g_lj- z<@!DHJ5t}(6Tbq!bK{pHC-s-%SO9)b@NSCNb+pHlYMFA!PoD%d!XM$bd5-pS54f~uKJMCTEDX$ z&1>wg;$uOdwnjzw_YC+_i_)|C+d6L{d%k|Z(A&f}6M82?@1Q@R7D8{W-|>#VdGOj> z+?Tns1$t|8WZmN4U&EXCS9;Zt;LTq{2N?$wt^o9x|3BqinIR+h{1SR}efH~6*ZO^G zw2ai&O!&o=ap~x@lJ88D<xRq0@*9=i`NR@w`UNpp(c{-u=vDJ8kWo#B zjG}#?Q)tHA=x$Sk8HOC7eSfB_*qym+-FvdmJU&!Dy-L37Xt*npeLeYu^V?tK*5!Fy zQ0;N`sB0EyHwy0`%SWC&{pvh$d<$J?Ow5?p`yX{=)f3AxHfG!qdA z@q!O94DZ+N^{Pol^joge%MLDnC$Mz2Yr2uy?-=HRwvJS`tMj@C`~A&X`jtL2l9WBE zq92jdV(%UFn>vh+LG&7Qhq{PCN93jp9?PWf%mVaSa5H7S#yb;;L!`aE-ycfH6?49v zj+KnhE@&xh(=q6~%V~^%adz)berM<>cCUJIFsT08sml~YV#|U-MfV~V-0QW z@&(klcwczboJVrqW6bZkE0sM51fF|`!n1SOTH2JbORei^Q``K5GQ0wqB87hBce|Q7 z#*pDF9a(P0& zm31=6xN11Vvs>tQIG)0@njDK&_B;!oO)vLfCS$(hWpu(X>09hAAN`B25CJA1FbV%u zFwRAcv)k@f?ToYEsq?vst6Y!HQ+EG#HfPPS507FWF6xh6n*y$)uN;D|o`5Ul@%vN{ z{F+9)CLCRb?od}cI4L{9j)?ge4Jf8=HO)XkYeb+i4LO6|i|o_TVxziMf# zb>)(*Z=+-0ijE;Uiwd?ldh^m9S%niERfYfa{(mv$>T$-^gkN;7uUzX{8#Fqu%! zVy$N!D-)1O6+_qhcZ{|E=mpd|2c9qMW5`-tH@M~%+R^p=Ue+Dv2K*y@Vvf@!jV~Ch zS<0RXM*HW|!M;kDsAOD<7%xqiSnQ~}-|5zL;b{HgR{GQQhvY$Bt?M0laWlTD}j)-CaSoJ<>HGyT{nkH{nB><6D@Qn(U_kn=9RC$Zk!B&QNx@iLOtdnr$7Wclz4V z69dfS4BfBbf6ahC=e{v_eLOo4Bs4AK2{Jyf$X%5}P$GF7h5iRfd)boJr5xiY}5 zJD>(5I&Pu^SNffyo^+qu4t_m_M&H;mH&!?`8DipbD|{&b=Z{l%oAV&wmItUucb|Me#xA+itGT#S4Ac>$Lf-NYQ8_9if>40DYCK~yD}r-RlQEuhhtp>z6lTW zT6{!)XlL5U)RgsLi+YX{<9n+)HrVqrHsZG_OV|>MKh~r+CA_7UdAn6$)BHA-US}xD zjXo5C4=coe)@8g$QTA*;U2(v3N#cCbufC1_soBbc2k^JN%XwPP|8L|fGO3jIZM2`u zys3q^$j{TY9Dms)bh1D2P0h%nCu=PoA-OL(%BLZR~E(H*1ZyyU=;-&xSWl>Kos-;nz45i27_v6dbgU3j9!hc+Q!lkH^ zFU{`t!ea$~uUhVJ$3Lh0 zNu=Lg)*QK}#4CFQ$J^?PAN70YX@ktN3Lg_AS zL3}n%1^OD#cb+u-HIrCpgjY(Sle9UDYb~-S7dTR#0kxBMR@)to0(%7zklo{Ubx75;8sUFj_LxV`Hbr@YnU9a&w8 zn%$bm@3b27@um>dt-+ZXJAV&6B=sYbM}A+GoX;As)Aokh>>`*q`>;lGoVAOcM*wLF&;0~2{H>utJ zfSOUR`!0nincsU-PygtE^=f6j?dtP2xt4`o+e06%w3lO9`(*yrH)`~&kud?cY_9A9 z4AQTSHWNj+N@y(yx5Qoe)#=L@!1v_W*CL*Ia_OrI_ys?cZ(F2I!*4rCtDw3cxhpci zmT`94@!NuvCiBPi4ESwL{ae~jMb_KcA6x)W$-ckPMr#`#apQ1!%Yte~FrveIsrbt3 zLKT@leCq)<64dRdXn4N|ys@+?uuJ>N_$MWAQ32~<4tin@>!8M8f(tw=b6WP}B8){L za7UVgYQ$yOxA3gr?JD+{``5`jrLFDIC1Kk22k0{{i$0#Azg*&o+waJ3_A;lObK9po zE!2QU&yE;+c8>5_5k6S11N~q>M8K=^8i(csYG=&t-RSQw^!M&A!w2N({V2l~`iT$U z13Yq5Mnv;H0E@?m9#aS`&jQ2uz;l;DM=Lg)*b*Xp2l|R{R||Uv`%3cTyK}Q<9&1zm z3CHxlkiBEwuF%EM;9_LXN638f8QhO;u@;(Nf=+zg7gR5)=vFVeUa1#5T;S-z4wh?V zoU(`gC4DRf-$!+Q?p!dqdt9N)Zv2OlHr4LeeO~jNuF$-oSDohzs5ka>srO%>-6;Cw zuQQ#YKSjLiPcnn*pCaAr#W7=t^qq(u{;1J+j*KgOZ{fOzas7yKy$PH?>cV%EVc-Pc zUsCQ09WV2$^_c;6ZDo%t_<*lx9{UNx^TH#XiPziWT0vVq5&RWCe5{r2YEq`*W2LQm ziSE#`c%OQVvD<=t5PMG>BihPDmgWK5@c9`&jQ+71S-^E*F4YCp#(=Ir$z00tx>|Fxl?${Z1&sXqV4cp5LY z>=ylhA%1$nl|6rLZXXi90nZ?K7J_FYc$z%g10NLmy{g-fZPAVnJeRl>=EWNJW$tHR zM)<|#%?j2%(LJK^mi4(C|JFg)hT&WO!T;wrh_m%Xn!#`AxE*?eiAax?8 z4>?P+_d;FQ1)jLQh7O>$2|pCU4?Xz-wI^bX;cV=5=B_zzdyyHLw9{;PcKQN?7sA3< z$iAIi2jq4cI!ck)c8k z&WenXakf%#QTlmxy0X+HXS3>(^;NfHN@Gf|Gi~ZP8 z9E|ly@VL>SnI>0P>T-2}X3U|Ciow2Ic2_78gl4pvi4CIB3t9K2^fLOIgq?Mn(N_a| zV;LjXBHmrm1m}5f>n&w9& z*Uvy7T>;e_5MRB{13uP?O!&8=2S2RcpdEc^`#kX8yg*Q0;~m_alCdc;@R5GyuOVROz!WS=rNX&@n7}8gp|e*GgpS8f3~mCvZpb3Bn`7w`-V> zYc+W?u9e@O9%|N`^Dsr`A>WIxdB*nk{Aa{qW_*5!O}sa`+y>0N*eV zo-p7^VUJ1d{zKGA9gu^-Bfde??ss+*zk>Y8a4|o+24fQ1u^rv^U1}1%;)X|&&8?OA z0p4Pb5|~W?hFqyzv`2W=R4fu2G`zQeyg*k`Ow z+MdD|yEC+f`LPCC&nxfI{ZhnWt@S&LZ+83F$@(w6(yr+|c|eqSw&Z ztMFhYeB0yos_njj=4V@Tz21-T^BTWb-NOAf&K`9h`pa7SnHg~v@2l{yyCg<2`Wfd* zI)VKo^Mkcg_Vl$rgm+j2U&XdD`ErdL+2L&0_);4?bdIylJAuE*SX(}YC-JN85!*y~ zbdvBtzhzC9ero;;x?ziCa~-1pb_K8n44+i7qqoQF)O8-2_s~toOx6{XH}*)oQRCEv z529{1{+jEuv)G?oD|`FWUy9IP6t77R}Xz5P!sYURG-6`_`zF6t# zodxe6V!b<55m0A2Wo}zE*-{2R+ZmrK#z)$=kz=|Nykx)NICj6*pXf9E7JbI_9i^8C zG~ZDEEs#KW6qXa+BXHxQa|FPuLPlz`v6L3>gt4 z&gxRen>F5@=~H_G#0@dtG8Xd;9P8jMS$joqGjX-j-Z9Q#e%&^Eg8KLh>~+v=Z<_9_ z5xTX=+Q9FPTs6J`#w1|0!}~ox;Ys*nyrXKC<`30*fp`v0?kI<(5vorx2rqB&*Z6Y=7!)U^F!vvVS{cZGH(2y#dS!#Ui9o9119FlPPaRBDAJ^E zw7;cVkh5mH1p#HZ&ax@B0x&8P}$N1GE^oUA#k7`$vT35jP zOYE-DYZ0${1eyKyZQpU#@&;% z8#e*RwZQyizhAw!J*d7{*Q5Rt815ln;3aJIm$1>_96PrDMP-x*SEVvSgUe|d>FG{T zb|(m(E0yE#8L)LCGu(`U(96m>9g>i3fMqoAUgY$vzhd3F zsGvu^A$D}@!|aEgWX-!Hc1G(eWu)f~Wwb}GtCZ=U7olOb!rG%eJNJ=kRQvt&g6>#xw-_WS^P!QU_awhD1Cfa-)9wCXmX0* zBB!FqECBxInBfx%xfwGzIJJA>DaLG->gfGtyDM}zxLyz$$|FKkp{LUlJHRIy{AR7v z=0G$|DZ(?1lkiNXG5-ZWx361WHQv#v@kfR;w8`#MUyB6PBiJp%8+XII?=vs%VP0J4 zZ&&Ye_NYH*T<*bl{5W&|rAn{*cu!EhzOqLZ9%%=bP0(DftR1@rMmhf?JS_FfB%WOU zA5ODWVyne>F8e40^y0VB>kN1&>+o)ZZ#SV2y#~L$R?)2r58V@T^u7eW?uA|#La#R~ z2H}#tQtj}qzfsvOb<^zpNnefRhnIXH@_&uUC2XH;t|p(pfINtGM+W$GZiO>6*X38= zM;CfO7}5CjUg-2k;JKmFuUu2M#Mjy$Uny06!3lYVT>jXUX6;zZsz z=e4Y@{5}-b_YubEHO9!CBiGrTp~o_P>UFyU>SN4dnIqEoMU2r!z;sb&Kz$-JqV;_r zaU^%_au(lF>0kGuGJVLL*tA23XRbkmTKZfl@Z>4SkCIiU@HeW~fd;lLlZuhBg!iOKghi_)gtk>EF&(Fd0VR-Qlf4llHyeNG<3@>hj z7hi!Fx510wffq#v{FuH!VD8KHf;se!@9;f*hd)-5;y!NIRpkNo74Azvg4>()Gis*Z#|QNBrZOh( zAbp5Ebsu`c!}K$qex74|eplFS!0qTgS?3B}<@KrGaRz8pZx5V2zdBEE>qFXlUl|#< zm$rl_@1dQMwDVXbsNR#&t-;Il)+f+$p1=kejjs10GF+2~(CZbiD>TFHQ?GKitFOQ> z0`GeG;+qBT;#0`>^|9Y*eG&L?ApewH-=C)U`#$}?NXi=NcUDxtZ_w`>@V3xN>lc1} z!tPV=>uytj#TX9sTj&hEQs7rlvi|%hbk+Jr*ZVp4=kPwSn>^U(PQ?pjTg``CjXjeP;kt=#j>VsH2Z*7qUb_}bOST(Z`RymIt@KrH%8(BkpL zcJ=WhT`tJC+2hfdiU-$&2H|^t3*VdlzaMN@Uo7a+cvR-x^>$b2ajzHtghzuBo$s+n zo-1+}f9~>W^9^~Z(eyFrAb!s)F>%WbzE~NB-*xtY`bMN%T`aH`J9;k+u1L0_%+xZR=a7@s$oOYheivPyOI zK9T7PJ>mDNH}L)wjHk#dX@4;=$B@HPWR&K+d|a1D1MT|*>L0wiJd*aW^E*RtRCv`F z8UMNd9!(xy=yZiH40_cIeF61xc+On2|A?;hN8WwH9#lVsW*Vd0CTPY8cV|FUN5 za7_15=V3Q7f0qsP-QKRwc1F-!9F33C_b=)Dy?np=%RoT=C1a!MEzHq3z0T0};Pz%k zKz*s)q}>Ru-P!nsWY5vmSIBdz?LqG3YWB0z-L5X~8SLlv2>oE|zwT>Se+jLmpT&x_ zr|Y(|u8k7eY4DJyuNi%aZ&P&MKd~16WHx&N*Jo#O-e~Oyqqk3&by0F#%6NXnc#en} z5qHzv+ymR;j}xPnzE_z8CjUK_6lDD!obMMokx$rgAA;jW&_rk{^Y@;c2YKyxiW0Ys zd`H3`%i#|N{@75|uFlNR*D}r`wLS(PybXomrvti1hKk%Ncr+b^5W)p!yg%$o#pN zxpD)s_)lQF3-6h2EBS+MkA%PXiYyq`wu3fgt=LD~_rl-T+kNW2PV_@(w>}Ty?`^>U zN;`5dg8X63K2&DV$0*<*?diB-un$Fv(csUf4|6Sesgkzfr~4}PwctZ!w5+YaDrnbe zeIK-5zu1xWh#|{#`9H&R;dOeuJFdL#^A8zwf!l2JO?yDS-X2lcuX8ls8#}7?D#q_B z=E6zz+E=_0bsPNu2!6kR_qsyoyM5|4;PdbB?$2Y-XMLH$`tl9Wg^QKl*JsAx@;l~4 zMZ%U)BIB9a&3+xQO1l-YiLI}6)2`ExtZ&os*4kw}f5BY&g7xf2?tuF5nGvmBpJfJn zOyBSX7ZJl2)n;KvPz`2w543p|{_&IW+Ep%KmA)>TT0hV({PPR=d+pIME|Mu85&ue8QP_P2b|4tZnPTX$Cm0?`F(gJ?bM5I~t{*-*f-x ze!u!nXHY$bKJZ_xSwCWJ>kPU=`{_HkvRzG$MAZGUw$>lv_jXZr_PU^%+pl%YZkFz^C~=P z!kp_5sI{4cFi+X#3~dGGhoJwvjHT9g2$-SsdN(|U9z3H+*HNPTNhNMvWSHL1IISNU z&msNzd(^exbu^|zo15F&BV?cHk}>V-Z&)9W<0E{C@qEtf3_a)dqt~^oS2CWT#h}}* zbY%S}^q2J^)iTa=JZ8E!p7MX`t;+5{MvcLKcrceaX7+PQfiqO^^Qy<0W54l6^nRFQ zzw^36)7?JxO8WU-d5_kQv}fYfSUiZ&^h{^y;hrY-uaThoTv9~6+49hnb1boO)y5iL z$~v2BncjMPk1O;U?_E*eu08||Zz@w-A3_g48E_SU79+5a8ff1}`*)+O4F6W>K85`m z`Br#VzS&=-?ABx){4X+2bb&YITcIWW-a)^g)9=H4`_~E`mxqymcOYX-e0a}OTcGSV zae1x?`UZPc>OBp#a}{`Qi!`BwwyW!bUtnGj{HK6_IPa(C8Soo0nBU60odNthzWTh| zPyDzBFKf2Idjv2^wYq1TooiCxjoM)l5x=QLXxUKCsTm}Mkwj`5U6wKvxi zudQpnoMGP>-CjO>fV8)(yGw5mpWFvQiM15C#(4H;55gpSEaHQ_5ZDgFi(z6VW*Kc= zRPG9Wi2wBoWY2w-tbH?QQVU~rSI0HV($}GBDzJC*Tw>5lp~2nIrWV><>v!vZhQfgQ zV_&!W9(qoyWrSxxa@+w;qrcma>{*WAz{YnD*5H|iUUh>npmtz;Y+zl!8=3SW^Y+KA z?Wq>UBkj~)rR*LhV+#Fd0z*fWSKX1=u5N&57Ev#!BMV*Z{3zcp_{i@SI$q5eAlK2u z*3nkp-IqFE{uR9WE5C0YedKX3&6cHHX133}q4(Onnr(T3GeY|#x@KObEPc%AUwh7) zc7Y3>u0Y0bMC;wqd#=+3%ra&pB*tv_k^#K^THMusm+mhq1@AcpZq5I&2|CX?7Sa7B z%$XP6ZlewI2Gfqs@E>ZiRMT1O41DH*&yjdP_{8R|B0lszm$MlEAGpM(k$dDa(ZR!) zLIeD-JGlzo(!lE|d%QkrM|~{dpCUQ zlrcaS++%lz-VgZHxdr$*eBG)(9unU)!jo#)r}|oxsLe~!v&=R#Y12iU5$rOF17c4_ zJGXl`JbJC)uiou!S1%}(Ju>=2S9r|lG{#EeIA|aLxy`Ugqu0%7xQFzSC~(sU>ttoO zE0lx{ZU=7htI3{@9v716DV;D#3sat$w9A0T-A#T~&b@X6^CDvKI@muoX~+E0@HFRv zmpw11SNJ9&kMrv<`|#5eyRr^A^0=3lJ6^dg0eMho@Oqiu8EW$q4_C&S8GAQ8LyRwD zRZ;F@k0TKnoKqp#sY*Cbac)rqW#v=f)fe#Yg-xpnAVha4?)bGEg-Th-z;nDdp) zc^&U~Xvz6>>~hVoMq6^GIa}I{aa7d-GyO_jbXGZfH2W679-oFEYC#Ql(lqwDQm~C= zUqpO$VpB;wxt0A(-6xIM8fo(y>{EjWo7#a1JoDY`|ICxQ0iNrK`wO~4-RL!PJ}<$q z@5usrQqkQm*7o4f_Q?-Y30XOQ_NIHXluZ$Wk*pT?eD&ZOZ z`!N5jkD49=3uBZ+P7U$pTDW$S+b=bHNpI>i_Z(R5C2vE9S3TMmRF7t|Uk_ac$2J$S z!a<+fgw0Y0e%c-)^C?p13i&da%RNE0&@KC0gJ-6ffpZo(TfzBFVy$Z5`QCxiz$x&U z`*pSp`I6EI_|zxrtZ5I?|dC%S{-OA){C z`{`cwo=R-x0>f5@=bwOoqx~nQUqI**Lq8I$Cc1;H_p(mENUYX{$mB7=yZ?(v4%qJc z#(d&U#|w;40pGLW@QmzVkL};{L~s8T+vyuM`7Lo}&wr|a8{OWMMticKID$5%ZON4^ zSC{eZ1i#I7K0AAKtT`Ksb1c@)#O)vNLk|(y?~dw2U|CBa!}H-x8pw$4d2I+V<&g6?UwCD)YK-9MPS-EX6U|nAS0+I?9ydeIWQ&e zIx|jM+r!4kBrv6MUaPavh&g9Xk3Ji)*^a!l`_$FQjkI$8Yyj(8b2+gm1^9-e<)jV2 zQYr9vdN~u|)9pm_jDd{VYCm$9`>UDnn%t$G4A$8|#n5)do-y&PyPUJ}H!H_$s3lka zfzA_|z5PRSqeSoD7ud-q5;1Tu)^lb=fG5(1u2QJ$5xc?pR&XBZBaJ?CmE#S_(u~93 z*3RD-o;r6Q8H`!C&xnO$jn8CHU*zUe+7-ICP1nyxnz0-Cj7J-MXvP>7*tdkb;gJki zyXq}4;>>?5c?tBr$s%xYSLpF7(w@X#raP3~4d*7-U6=r`m&a|0kKBQxuvck zv7*2^2)`@jgr?o_is({a)=J3<26HB( zg7)wsc)dn@*ouBHx}Mvo#ZdUm2kG%=+FcJF%=YsA0kzE=MPI)W#}ChD_;vb@xi4!uB&`kK0)<+bg_UY@p=j&|-og9gGPUJiL}9#vs9SnQ`W@%Svn#_%xzJrJ_R# zAIUuWMifuo8UrxbWZ{+Aenr|B=1 zxhMQQJ9>W5XB+e1oNrD${9^a1> z)1t;i@?e!^MDb&U*ez_T1V3YfZ$aZ9X&;&m%s=Lg@S~Q8sep5%*h0~LJgtorvWjy9 zf}ij~{MD?va-PI8Vr@76(M6|;lY+L}?`6#r8V6jVhMp!hrMz8jh@hjRtD^6;9_@C8 zHuw0{qg||xt{!zW>$jXAm$Ug1ANjhH7(=&V6M}Cg_zG-tj&C!(E!SjZnVe}VrIxJ< z?$UP+e6#&h-9C!T&h3?RQ%#xhOA%-265mq0z@71G#a?R87~?MXf}=i;@dNGUxmF#> zrA>*&P5B|ZxZp&r?e=xx#MMK4n`tkN_Kwn?seh-BAqQDG=kd$hHQH-@2~IBFFN6lJ z4E6);hAqwBai!f=+!NGgshppx124HAoucgd5q%3U3GWDiJ&VQP|?cs<`s;udlh6!_I3 zc)8#iEhe7)CtHwBYn;Sl7Z7Xe)XxyCxpZ;wPr&ykJLhlgLG`1ceg?^eTjC&f*b_X& z?(BY$JSYx<{mG%Q?}VNu_%>3Uezn0HR2u?=@}`b_TIt-cD?)}A>g%kRci|o4htJF{ z5efKIUr(D_VLztBP7K&vE_d;~gk9QvGGQ}mTEd>@)6kSnz=^5ME3pBjKA}0cw&O#R zy&mM5mJ4dd-yqCwlv~JpwSkwqt=E~aCadp$5 zX6-k5NqE~S`~)qs!QWgf(w+D>iUwsEFb=SADvi>a@XO?{2mv^=R_ zWdm*M>fbsOUxW?YC}oB1mgKd{+QZj zYil@f;=#VjvpAY>MfZ+-`rG%5ZPFBFn{;A#ihZJ;#q6pJrB?XW&N}wY3Ph%Gh7>(K ztpL9wdedgc|7eByL-iaHHTbP2(Vxgo>9<+NI&1Ry)U}ImUSt`ooppEAK3cq7*|Bc8 zeYQPKIsQC0*7nKD@joEnQu)?Wq1?QqIJ<94#i^|mCOZ1&6RRqDFyHu*^7J8cJ6_H^ zCY-rmKYL$6{H{hW3-sC6NFB08hphp>D(^OTV$XpS{A9}`FKX7&c#%JG@RBW!{l&}J zr9Qp{ILvo#nQ~s2ck#Kd?$O%Q-c7;hD)wW|rOM68xzM(+f2plj*SXXB`p2f1mMqM( z#go%J`7ehS-FvanSzs4C^(x@+7^5sWo1JPQ_hke6n3X)>F&ksGy3O~3|8RO8nqQ*X zl{RQ_=nDORv;8@=PyXV|vgem*<7?hOkNa{)_GbC!@{%DiP3GB5YJAJwq=u9yk9Wvf z8A@UN!;F6l<8Piz?}Yzrk@e;tS}uIC8r>ux`71f>>fnnm_+s4GXrnQ}*;U3&YeU8E zEF{-S5dV%3UD!4BYFcF9i{@duktaDDi*r;Xe zvU@Xtp&J;yzz_fiQwPdroq!hx)=-%%*cxjG{bobw^2Uqr)yBxA|Ns0UwL?DY*Z5}m zcxERLefPJ#D`O?G>t}g)$hiJW+CAOBpd6n{3VC0d``UN=Vu$8k9-d?N{-ga{75piO znM-%Omoy4L3;&rpW`8i_6uF!S^UX%*a30>)U9`1>@nL_cuO`8f)u7DeIEDCO2EUhn z@DKGZH~LvV8LK=k^}m+KE0=Zdb!4TSr%X6HLGp$xYjb~7``n^L{9LRNoz%ILdtts! z`AyApHeg@0hy5B^KSqMbU6!??JA_|T>L-y?S?W~^eEV*?{mzbL<#Y+YwF%=KeT)r# zQ*VxT-s5jEeG|v}t?EhU&+!YUIX)*=WvRm2A49Ha+o4hQaaaE_ zYZ+r2zw zjZ@JlR$oJIcWNHuTP@4xcUM0CUl)5f@U!em?qn=vEtvq030&p=m+Jg6^qa5Z ziFc^Ov11rdH2mtDjUqGfTh#A3qP$%ROgSIlr|BhO9b3{NRlyI)Y=Zs4!u z1;+Y8a@t}@*CPMO|DYb zmdtLMZoO;Tcx&c}@gtsfJiNjfcbmZ%XB#v2GW0R--|F*D&v6h@I9n@W% zCw#)a6z=Un2Be>3?7xa`PYB)6U8CpHaJtQeZq|gAV@1w<8Qng9U?|;2or7+b(CvXd zNAoAQQ0D~vQs8yLr9@$@hY@`mm03*uc4m-=7E{Z~Ah1Q3Q_r$n{eK z58JMz76fCojPXG?S-Obvkvw*}v-VF=KK9sE8fd>d;){!$-leJnM=S+|g@B5ReIi~HDT+`aLO0*>+Tyb~%l8?zY{IdNv#i+0y$?`#=Az}KL*Qg6Bg>EZzNZ5 z&3ALMa>gl_eFEGyqbAl-Uxzv$lRdkul&8fG5xe6({-++#T6jgn-?Bof|HDewFzf)~ zu?Ffgb%Lkhn+9*pr2dz(@fY8W+nC%dwu11KoQIM#0}^jB_1~%CEOg`>g?GhAkOKW? zLcewR-JeDu{weyfwF2FrIvzEDdp)ZKxw0L7csq4Gwo_|%J9TqwsGEzuvS0&i)lc~b zer;~yyB2)kX1(2pQD?8WOJ0Qq*?d=H)STT;&D`jAOA?jKo-u0XPBv3=J5pD=I@gKHREQ{sBB=m*z+aP0@z zesB$gYYw&3@8X*l1LlLn>ijmK@4ail99^fpL&j}bo%AdNW~uQj<1Kc}&&p3xn@q2- zl@`S(J7+0N&u3iz4f^(1Te3FV9et0YyZ17Oskc_&vB^?@q}ozH{q--lhO4u)8i)Z3 zZxa1X+M+%e25V}*skF)QN!UraKJ8^7&d6s z4~?Q}DD9s(2aQ^W(deTe=;PY2wXfAIF20fWHCchqJ!0Bz4=dkLmj0*ox9rud&N;J8mwSvQt=Xf8bh|ocQq6Pd!}W|osF*%Gmt|*V zh>c5o+r^&cSq?U4JoapLu4U~;^u4LdhVe0DlySD)l?$hiR>m#b$$X|BklBudcEYqn zy_^Y0FA{rASu1UH#xC?&EH>(>S?Xi3X(eZP7tf`i66`8u*W1Y>@wo!)Ea0ETbJ1Hm zd0)<#Tlww6*O-lrbLFNjETgvAQRH+r_Mq5q-OwRo!0n~IEtdGY2ypNH|EPQS_^9eT z|NopBE@viy1PFnECV`@vU|TPc5Npi@yd=RJ7c1IU67Vu1+KTl8R!t(coe;H+qUl0+ z3EIodSn9R~HEp*9ZM#9XwqRF#+a-YAPKa6&(F_9feZJ0_Lna}?Y=8Ux?eF);JZ9#c z&-MLzf8O`c`?JzqS&hv71^|-!dJCOg&#UNbTO()vc-HM56YF9x2?hr~d2#ej z;x)s}L2z~9P(!^?RxUjbM zCSxTwfgIt!?l63V^7m{%-XdGC>H_!O*b{b-`&qI@Fy8(^)sOXj;&I&#=i$P`|Ek%N!eUue0PvBKET)`Yd-7P56F0%U-zxs&^~qjzBtR3FLHWX*NpFxd=c?Zn6qWNch}>{7e8Jz z!5)8S?rEQE^=2L2bDvr;i#6JxJi#=4G_e9?fs`Ax|e%Z=s< z=&&JesbG&gZ;$1vPaz~r@`ku&HD{Q&LGXvVUW^qr{d(8`4Z^<2b$Q^++ zxg)|m$sOvqbjRk+)2Cvx$uh|B#*#@tP zz-tQNkKD01VGDIy`tf4vPXqDdFn17SQzr+Up29e<=l5XzRdjf$bpBCrOZ+vR&cC(d zXp8b{A&NX6rImb@$V1d-=e`Cp6ovS`|ecx+iW_YW%H4Lr~OXZ6Csbn z;a^jxl{Ij2dI$d_Yp%C(qA{x%r^_ZAi@~ktQ{7kU-tnqE`t!ThjmY}#>^AqJ2VFnXSfFPGN0WVUayj<^impT%d#O3OJA~iJqQ6{z`}kdA(+}q^Tg~$; zsd-K|CpZ0qwx>}y&Nu_iIoF)no!9)CBXP#j$-dYN4OE?bsH1uP8|DAtv391vyIt?7 z&;7P`dlBT=F#gf4?1RXNtfp*Rj?IpFSf}@GIW{|X9>2Gx>KT$ljT^^@~l2`?9D$-wm55deNTTluVl4HH^uL3(evGHvBUBZk^`nZ&pfo z+Y9ZV%RA{~5%jkX^f&$A2Y-pA^tcFmTx+@>=M}uOY@M$)rNgb6>gaHhlXSSO*dJ@f zqnvwJH;pvb6*JFtJm|}lCU@Iwsxw3>p5$S!zlJ|-;k!SMe0Z0$7i=7%OmdCxp7+2T zyzq!&=!L`aDVQhp*9Q6zyL1)3DvW%3m-GP9t0&TE9Q0~H8kf2Q@PC4a+{AoDqta=} zFKUjq*zyE4{w`aePWR_bh%I3J5zoo~oc#-Kv2DuxY*|CTA6wp7cquZu{L(?j9k0Va zk8ZIQJ)%FY0d6+`v|+>k7N3>=t#)S7=WN8%_bYtW(fl zWQrlv-s}|IBA!c11iR?mf~K{MuW@yD<5`k$aLiK43)eI#QN+?xJ8~)0M%*iym|SgClfLY*y@( zFCEw2$!)TAgr^y+pSuzpG-Z9C##V8K;5a_!;~vhux0AE#9-LM(7!Fx~p3D8u$PdtF z*`4Hz<(}b$72r%ia(66|%^d!z#$3~6n=;T&tyur~X>H8&CFy~g7{6Ag5K z+t2Gqm-r@O3z%$_7`K-t>eU{!ZP@8Se&@cXc1~5!%*XbSDwmum+F?RFJkSp8USEcO zpbs4Kdhx&Z-kTi2&s^}a;Of%J*wLkzndcOvBP>|Tf88-!INSOCG42?Z9}yj~sAU9p z27IHrW_EWFncL#C1KF>X|NE%3eVKFwNA{Q9qvK9^OR9V;?{|1)BjJ8{c-kZ2$m`r~ z+rhIqexRpV=cm!F_u6}Z0=mQa*bmy#tED@D@?2ZHZHe-?Kz zPV||=rymGDrhAE{m>=7Eb}Eu68aGQd5>g{I#>>WadqNAPcMGskzE_|}B;={`e?_8Tv_^yGVjwMB8{1`&YldGU!WZN_0>2Htzdv2kshA z9s8@5yP9h=AGYtbt~5`}s5E^i#;!Z9HU1znntXODA2yrT4>WGcak@vk{>4enZ~W?L zOCvA|1D_SZWj=86Grsx63>I85vuO9m^WpVBa{Fn;{zu`1_LFfo`{WZo)@qb$amEz%z-Z&>5owY%!*3?ku?P(pP=AL%*&I^;k2uueZELk(DRk4rPxe zxA>6%^75h^plNO8@K*VFEbcSM76dIBxW7}4WnZb${Na|PE%8!p>_?Kb52V4W;qQc% z3+wId)q&`|3)3UF{1n=AGJQ?HG-{XYU&uwTj(q=?RP4u;9t$2(fG@W{j|k-qYYWM~ zi7i*Y>kx8iM~YXt??T8t@=2DnrtUM!%(i0)DuN|eK6IRX{007Cu`BB&hM^;%PfSM7 zPNekg&NCc6+m;bNjcmjouSb8+^VWYl3=J>HFdY8?`jO&4)RuwF^ggouVRR?S@R94S z4Za5MJx-8Yg8EqvH@vWQTu;&6$Wi}}3>7B6!L>hIrwwd>E+B61E0VoDqnc|xSCmws zYboAoFZAL`==eH#y=Mu_I~go;!T?yY<9{-UEyTo5)0C)}*)Yd$p+H#@mI57UPSLDC;rdjm|yf zM)R>djCGUd83(J_EAgd4$5sk|Dusr~RvJE@++xQ8!OQ~7q*uWEAltsUeZlukQ z{{dV4XIK--iF<7OgJX-|dh^kiP*(lIn&WfZlH-lG_;h1+m)B^kT4ptu{mYHE|M4|o z{UV>--N#!xksZjN)V5zR^g4KpoV@!DWI@>};mf<>%e!9=CtGCqvMhKq{@dyJpL5Bj zz&sb{PQZrDf9RPlGp5UnhiUgc)+XIYG?Q_EVB^)q*k}2@#l|byALa9z7;EEMFD_;C ztk(}De*NV0$ri=F#?QLgwsXBW!LePOuxTD6b_TvEn}uv!z@SaG4d;DqLvBOv3%SQG zipmbqMH$(}0_UP9pjWq!88*gfIOFIkG-iLTDIXDasg^`h;LgNX&!cWx|Zf)e;`1`~k%tF5r zz6sY{JI*&)lO7w_)NXcc+KDxgt)wqr8LWZgYT}2`3%$X_1JwN%@ROaQdhRG_dREMf zKDlS5u}-jwVQc9B?)Ry8Td60zQ_CoL31h6{|3ku$YySlqyf>!FibJQ7ANIlzWM}Yc^}#Ui9sMp)VU3**_1yd;hB)#E-oy@w-=+ z-1LKaZCZo$c-vLANn&98#`PxW4a4_78a-kRb2$wiX)HR@FmxnGUbgx77hL{5wMTlA z&!_VV_^YAm@t4fzENp0p@lmU6@OP(us}nxjx#nvZ?&F=qWAH_QL*F`@Y}pT76w?*L zRv$*sss~5+qHFx^x5rz=pAgFa&?cqg{+PVa*kI`%+s z?5Vf$tu&QIRp-yCI?n!^Xz#xpfM4gQkg?VqDyc+mi1)Q$LKXC>6)-YS&@(zFpf}LVo@w+XMPAMJIhYqRV!_XniptIV; z)>ke`_$JM7s~}c?{{0ufdh3kJ%9+U84gxe9Nj*A#8i*T^fg(0LTz#G<&r4c8BIt$Hss0Mb*Cty1R!wjXvYs3j>)O z*ZUL89>X^pv&e^GnbnW^gWucj_q4x@pMk3t|9PQhu3cZ4sCrE2`vPY5k5tz((ZxL2 zGmLnVVt~TP@eV(j5SyKZE~jJ@jp0#jTV2rSIDRb75*;jOuMnTJMLI$ka#+D=tI29u z+G8QVONUb2dkyh1%ITAL@cwtMBxl7J#JQKTpa}fF3DgWWG$lZXf zE?Ga!K2W@g)zmBRhtccy;7{9wKWz`Vwg-P&hvZT8mr0E23D#V&?s)gC=bvZo|0m?f zU)_GP{5a!A;uO@+l+Gf-pm9n;@$=|6l7T1QAAHP$KIKA_h$&fI(S7YXqTg#dKb3yo zr|c$2c9tJl@?0r6-8sgXwf)Av&r5ior{_1_Xt(2*KcDAS#L?v6?8vL`^N;g<&Nu28 z5=%2{(=AS0Wx%FvS>JZ2t3EORd#n4_Kb_~T+|RwKx^MYxo_BJ`Xj#AVnLN+qdH%A# z?PRDOp3_dBaS!LYJwD=Cs#9(0yzbu5_bvA!aIZ%Pl?{Cl`9#X#{S8ZRyjrky=lZV7 z5YM@NY2UHF$#eBlkI$-XY2SAL!aKz@Px5ZeNyoF_L!&$J*+#rQMLIXI_xiqVzD_-8 z`z&HZ77kP|ZuS%j2e&Wj+usiARUJ>xUiI~X>&f5i;&cY}HSPi1$Dfdn&mErO&E%Sm zzVGVP=Nqg0?TI1BD~)E1u8y<`Ja%=sqwUx~mIq5Zq zkJxhA9pliIT+R(Fhi>?SUtQ=U?gL$~d%t*e!yRqn5B`Sp+AQQQ!7=RA?jUR~)t}QUFnCPE1{P#tQq3jq|P@E2{j<5C73M&n+ z>2QJm&7MHt>99L(g2JZFto=6A?3Tm{FYAG>Xjb?Z9v8TEmIY@5geyXD(^3cHfxPoj?odkU}v))-@p%4Nrd*Twnv zolE|#;ly2|S8S(mJr5<5TeDRke0Kb>6Jo202auk}__lP=R&G9LPoYKVp}V04eae1; zvO)6Wi8l(zti=6~u?JRWn;X}sw|Qf#&H7`>t^M2hJnsx_Bi7ZNsp1Tm5yf=B8)<4Vd z{(6|JyI2o9_A%>WZ`%`Ft{p3yY@ca>zsnc>+avIO?2Tb?G&j>&*+J}FAekH+TM?QQ z@cEna%-q-N@t3LWg|8iNX+J))$Pc{gu}$jP@5aMZc&0Ypa_`3Rxg8r>B-vE9m3sD0 zNV(n6ovj({vx!c8jH?&rE$0kWs_b@TcgcgIQQ_yz#9i!(D&SQ2%cCvm)U#@)1+kG3 zFKCACd}ruT0dPDc>;9@AKk*YYQ73(-pH0j3zOy*@<0Dpz z2ioOF8|OCgyMQ@__l+uz|W3hf|x3fU)T&3q1&1(zk-e8rz$lK=jH;VfU!P9Xn zw~;n3rtDpkpP*S0=&JO+O&@c3ne1Sn|Ml^fZ62e$N`BwoHpiv6c`NTW(%!iPjHhwm zerIvBh$aHJD2rE`xVY{>?|Lj=1?nDzdq*J|46oz!iy#vs$H zXuq)N3&>53iP+RygM4A%oHwUQeJNJ8{)6OVXq`R2!Nzvv)`7>?o*LUT(0_M6n#@<-4EU%+l$ ze;&SY<}dhdA_nzt?WvS6Tz+urN&C^4-r)TV#^n0LU*8NJ#~&^{=;IGZK8--Tg>x>h z1%clWnPY@lG~s&V?we28j2n5g^C=s~N1YKk zhcQTYOpjk#%Q#PpUm4LiHs`BVM_b&p5FzsRxwa+Ay93#X3XpeY;|r(8-2W_w#yr$m zQEzI@VQfF5sg3>Oi5k(#!-^+DzIEbl9NWxwjCoet8kp}T7q>EZKkX%7GIt%h#REf)0<-Uhvgo zYgk+Z&9!Dczacl};|O8Tx8SYVKib@P#3%03TL0^@WQ(4ujBiHvh5)`t$)}}7p6WCiJ#?ZxlRxpn8{Ok>RoZSid(2IP|*{9#o{sng2r((^VdLvTxd}dCQ zVi5+)$qIaJd+I>>5)P&D4EbKm@9Wr5G0f9cOnhe&n$_dk_(#RlhUv%g*-VPP!0#W! zOQi2;9P$@TWE~y4E1%7HoA$YJBkq|rHy6S?{|ubGeb3Cf>%15HvxUxJz>D647tOsd z_}J6GH*ERxq15*q^Ni;I0!|-1b+jeC`{Fk6dsgGka}xzw*t0kd@hs?~!1SpBRFrrf~=I;CksVmD-o)+MEOGi*=fbO}TbPMP(&; zQi~oln)r<$!0SuRG2P;m2IXD;?Z!4pFWJwyWfzlg!jFEkPIz~w+p1)SGVG{5 z#6Csld3VX)i0lxJc*ooH*ZAe=&v7Q;Xb$-ICq7f*{im^~Uqw$2d8VA#Q8;Y1gKOEb zKjY`%*|;6Nb8~d)eOz)1{cMMRM`zwbymq!7ukAs8j(QjM#MUf&p<+hPN%7hdo{KI- z){K84UWpDdtf1%*@U}SeLcyoZwojE}hu`yy64BPuHN#3u_wex!FDc#NX)DMejvv}O zp0O4@F1~A&l(ra&^!H)#<@`1Hs_vhD` zRNpekH$BJuS5tEj!Q0E>%^_@Jq15@nI_$1>*j?+O@paf;<=cy+<7ph5XyYkh+4D5` z&1XNfq1|M>`ztwPW6ZMoZ)sV=dfL}48&N*`(n{)uQaq`S^9AYcLLX<{mExo6=L0*L zTmN%vH>6+}f+xvmTLVwB_iHM~bp`xrY1Bxp@C2H4#!9{l%LgxD9-%RTrU|^$o@9S6 zeD($>KgTtUE&kU^`;4jspJ3a+0~^Kg3NDe<`Hl$ZCbt2X{^vV7fQ$S&?|qW}jLkj% zDduiY*eToao*RsVhmcG5p?}3UBFD}%nzN0GO?Ugr8%mv1$rjJ0U@|`CANkUWGZ*9g z_(vwEV3ICdq{AeZf=R3&Od`M}asnn!EbxjHOghunPcl>$eRMLvIO}cYoA$cu`Sm<+ z1s~$zpVnH>uHl(0Z~4B~D{m?0Q+y-o9ev6(9h{4d5~o2vXYwKfqYlpaWUsuB*hpkS z;kT#kNAEvvMmv$I1xxuEBrA%JCCRxHx`B9Za>Z0ZZ+BC^bEI=-IX+%|-$>Z9N!tAK ziM89oe%MFZf%xXc&y^*ztI864JjAF%?+?RAT^@!#XJPwSgFTk~LGW*9ERIiTk};`J zbxiCb-!zS)Fm$ZkmIWNU$+tgDwm9~{lk*2TJmuu)|4;l|WsKc|m(+~7II$w9ysZ*D zU54#1Ew7vpZ+iqk=qO~&=Vp9=!z;i}{>>`%9>q;6_OKlt;~nVdX5tMuQ`Rl##2Z@O z2JaQa7sjsO_Tk50YSHFrW>_0K{>h08dKtbaonLW5A#5~vvDd=TVC}c?lfC<`JZ-;? z0}lo(*O4*szKmxWvsd)nW8l1PM?Vbi6fmzicI|EK1NW>}1X?synf_DYi|DxUbDQL2 z_|jnU&iUxP$d0>X&@C>=JK;w?0FKY+Oy{k@^>TR1{LvXrHOPP!=GiH_##u|s6jFvY z%A84=z-WI{A!9G3+zw#sp2K|k6J;mPRw~xY`svB`{5wA1*Pd^XV?%qlgFJfLzw_Xs z>Gu4Wu<;CJ&zGNOFxvIsf#HD~2Zn-Q82An5hpu+)SG%J&{QCN#pTP$mg04qAg-!E; zO9nK*0GjX6x5B1eo8~+8?L6i$-aP*&M_YE%cL(yfaz@!aaBVLSlngKs4=hOOSEu4- zcMjs$4)^nGKa-mKV7#nGJPX?GjNjo|9>zbIEx(a{hfUkwf8y`|zor!zIp?iko;EnG zhOhL6~Ocat@y#bKD6Rk+sU-zme2ivi&pFj4UJYb z{14HJvzWhVMG)Au(|5;t|9iBeH8uB-q7|}NIJOvvUcCR=Q`3uEQuIQxd4t8Fzn8Kb zC>BpT-e5F(sPe`0$QN%9LB4Ry^_MTOgPtT`T#I}m-*5yuqC>XImipefacKD-1gXopaBYcXA&Yd`@b8_Q1y?r{H7Tu{TQw+W{YYxnDfh&J-UD zliP47yzFw;@>Y0R0A98OUa(_KVJgbOpqj1u1^Ir!a`}>a- zn|sA)k7jF4b+=A`A0+1{+Tym|Etr@ zI~Y&Fv245Sp`*z@W%J0(Hqcyuac@CkhsVmmH#I+Z^k-^ocwT@`(TIG4eKNBVTc+k!kA8*UNcPPI;W3;E zV~z#)8Bua}^zk*xhtzRz?jqSsb?*BK!Gb#0@bZ}D>Ck`0f&?ahciLX7z% z!#zE(`0z`!hD*&1+t=?O>uc(!OvZ2%o1edXJMT1iKl)4MjB-21K9ruB+@9!NiQ);N{_Vi-0}*4aY0 ziZMRmx+}n(r8^Om+I%Z>uE-qKbOm;?x0%o1yrW`2V9q~a&VNhIxsLnW@Ch&0eNUsQVcF`pE&=@2QD>_S8bgB%9VdHx`b(OXVxgN7kz?*B0#Bqix?R_UQMoHSs<4 z=5b43!*cK^Zsa{z-zUx!T0v~RaLzJ2iWLu{d8N1S+MD~v4valtd#cgNO?IH3`GhCV zOAyy%+p@JLJD^EL!0$=mw}bVOUn37+#HU#QQ}xrhf}9TxMswo|_Sh%NqsVs6m9)b? z*m*h6Yx&;D{vkKw5%K2mde-s&k&AX)b~2raic@D8GZ8N$?c*?+;F6|4v&J z0MBZlQv5+BdBv^^^w@R5f$(|*U;4zR@TK7SRN~0#&z+BGO_AwuLeE|J@`Fb6+POwT zdq87)89(42Xv&_2M#-KrM&f1913w~p4>;$mZ9lABOO8!McAW41M0pVgf$dyRv+SS^ z4|9v>KyZPVjrz$u!d$MF;N_}bd8 z6g`fn=y5cS9!FC2I08M^o+~g6#dRkNpapfz(V@*J+j8P3=yP@qKlp!(K1b5%a~W%f zKQ!?i`11ku`9*SXxHS29b{-`8ZphyOoQf$M1;<+HPxP4_9tr*bzd@e`i&N3(R@VKb z7_k2aeQrI}mp*@!bwAnHWW-!Qov)xQpx6iT-DRw24davlErb0~F(cz7e}R02qI=j3 zQ+~u}HsGt;1Tr>=Mt9-=TgrDibiB(jimRc^k}YiiAGoSMca+6}(ZJM>lgWq&Ge$x^7P5b5rKg3z{_wctIw$I~p9?b|QvL{pj z*xa@_vT(=a*txJ_D<7Wn(k79Gli2nZ7ZOj|E=v71EhOk)3! zJ8{>udWI+u-^meRUk= ztQ36le0!bCO2S8zv*W$~q+dk<%;o%2IJgFSk` zxhMmgRf?}tx$y(&evxk`($PKkmF~%n1M9Ak>SZMf(oy?fObaYv*kLS`O z8a+mvPrl2PEQ!s?IX6!{hW3LG-Y=S(eokL!+C`rO$a-W@L==D?Z*dmVWEZvEA+XB*(x%&!vuhnz@{J?_}Ta zfo~h0tB$+^m&wDY88I_6MyJ(12;H{pW3LyVw`gN0->PSskFF=~FkA7gC+Defe3g+G zZ(3q8hB$ub(ox3BHS`IM9+FI)k4(&-Y_qW6--!%#7jq{5X=OxlYo>8t2)HV~t%i5+ zO*fhgk-L@x<4uYM^vizh{NKxe_3fo^Yjy58)gz9pu$8lF;=iwq{;;L$H6yy0HbgJv zJN~5L4nE~7e&uI3orqtNKP6qCa$|8hlUmrGYVRj%?~g`wEB_zF{-`$PqsU2Xv#YJQ z&1&$(mF>kZ`nUhR(H~}a!FSh9Bi`DcK!#;ZD2tXGrpll9n`l8e7jxy`KGM#l1e@Od+9yP2HK zUjT=H0}lTH9R7`Yn*Wh2KH2m`8;3Ky4dk=2;Gg_ArQmN6olt$}c`A>L9rwt3lN<$>ca#(B_Hkn0GJ|c~4&}~a zU)ebzo`V13%i66rsbj~c^w0aCvx>P+@;*eEzrh&N z`|95}apJogzRh`_LrdqBf0&&8Hh7w65BOCL4#{8nrS(TzSj$Y}P3@d5oi`BANEz0)uoYh1#ebc_ zHfH!Y$QBg<4*8}vKXBiOMF;L6$I5jzJpuAxRNz1Cx;b@T`JTchJCfagd=8B0%TP1tjUlW+)K3DF~4rECEH~0S5*Iw|dc+7|7gu9`~H(d5UBT)rhx2-t? zU$c=AePo`xON2X%?Q_$+zxCat_SskI0ZGc#gAdRc-2E}>_as?3sW~ln}L-%;-eZu!m?!V;mmN&qK>Ie9`heRv$ZLoW^On%;KX(Jz|Fc>~8iCcWqH(jJK1@5m<+ z@8q8N8c*|hY($3^p(FDy_^nZ&>%7)Drai6v!6e*nK0Mud*LgSZ&f?v7!1ezc`N=pr z+mp*ZCCMhEEtwn{Ro-)Od0~9KxjH8=*!IV2a+Cx6!@n`3oPSxU7zTLR!c0EOHL7zM z`aOwU$hqE}+V&fW*9Nx^5r<41!z_zDme$IjZm>9~lsnqV#T>e0$_8r&XVHOu7~HZr z?->S1CV%Ot8{(W77wxX0Od)mVb6&EJ^O8@q|K@X^TfZ+>PO#IFuc2MipA3G-$^RS2 z_FFSzeA8_Bf1@|6N%farL#`R*lp-(pvT)ymYzT8NL(f94FS@Sa1=W+@1{~V3ts;A6 zHC3wKb;e58DrHxj8qqZ($2njNu+^o%PrxHs>4759gCh8rxqx>l1$l*l^~vdl7o)$ER$lC62X@wb;iw z2kpbVDs;(7VIDo5;BKcyekLJ2#cN=q?HJUf64}jClH+X**_S>?}Jr z@72;_#<{J;^uJFm;bClSny1b|2Y~z9Ph1uVtTx6~@;Te|&nK@H_LGd-ymKq>ZOrG+ z#`A)0bIeiLvsnwu_$ZUhdxQ4{yr0awdT3HUaBTp-iYroV>3-r*-ykNgljre!@HKId zW7j>#>PB#?kuzNtdD1g{g>jyBO~1yrQ$1J;u28=VSR4Q@1LkVwxs4Og$VLNxu0DO7 z@Uxit_Yl+n4m9N;Hm7;4d3^cYlHB3BcW^G2b3Rixc$vdq_9OWgkjW(kw$c-xHHeDJ!E^Sr|0`Dr-ZfL>F9%-zLW>0E-Je%(DX z>aWS!8rK2fU730MIN@XkZ8!0$L1y>)Jm(jf{=)cjPswlTFAiT6ysDKCxXRd7&bNpB zV(uF5uTEP-2X+hM^sDs@A3Uqb$DB&doZ38c1Fsb=3H)?}O-tBMbIfn%tkL)=vy?K% zJITcrz&0S7!22b<|1$6AGyaTslUvrppDNhr(SylZm-Fo7?DMy=|Nn-4o-{MMMVnq> zpR?EPeXh25nxl4Br}kp!-;#^9XAWkgGY&I~u?3VM^v$Z&IsD;gj4R|fcDw~kO|y18 zuwBc(swCfICGA&w^NPAhn&)8;SgSLT!IH8gx}y-k&SI^70K1l-oWu6qhMOz5L;w8H z#RI^ieF}7%dAW21{9f$$84L8ToH{<}e!$E=?^W>rXQPcG#mBuvo+X_h4IDY%qP#{q zeCI)zW-~_NsN}Dg7^C{O`!#P^wUmB^6N1xx;8e?8-b9v@T|;`Q=DBqE8Fv``eji$) z`T6KSK+Z65-d?wBv3qr4^RmcK>e{>_(|T=QS>z%R&BF#<7(eTpfCt{7d$7=#Xp_8< z?DM_jomkq!D#v%B@kTGrYmyGC@v59bzp|-b4gT1B&YYMbywQ6vu!VwaaK6|8G}f@DhCvQN~Xjm8@X^`<`TO@`Mx)w1(dT zP9AK{x}RY$_MkOd*QK7o2k@dg=6Mi&d}rz0fMEGA1C({vI5T#qTb6ZG*~h?vts{-1 z0^KzNEV`i2qUjyjSJKyW7Vtj`t{kF2wWYBLCw0yRygiY#yj9Doc__3<-`Qab9)eNrQ$Uc$WQ)e2hWmA^D zU2s&s^KSS)a))hiKfrHf4BO@&usCy*Hilxx@PY7*`MIXRY2RE(eZ9Xx?WEbwwO{(%$qx+@1K2;eO^k85c5>MZWFPk}I@)qR z-yg6(!u6AI{7=cn{|*f8oZo44Jbr_NUz5q>{%zp2$Ry8u+WYrY?-l>Ql{p;#OrA~OeoA>F@jJuPvZVvz7^FQLf&OYN& zlih1iEMp&`-w^M8DgM?=@LgPt&3hiY&|J>gl|{Rz|Ld)?d#$1VK>F_c_%?Kw` zhu^jQ4j@Ye%#1?CNQ96VrR&yBBnLTq?`-g<(bkg<^k(P1^Iv+j^M4>cbD(*K%0*Al zxT6aDWM~rf2!6(W<+XdEukM_qT^z$ce(hcIDheb>EIE=)z5Q!KD)TSt-!)}0xz7(7;4U% zy5YbYqj@nj>^;sdsC?eD*L;q=oyW+Z(l*JQQk&pDLXG`}eMa+Tz}I4I=zrV?1QY8$_Cc4fc4Me*&EYE@8!RB?k$jh^eps9XBqH=p#x0a5G03NMFz4EcZBJ#u&xZ` za^7(kxHb=2`huILZs_`?L(`|x_JDPL&bdR?gFa@)XClkc4)SE?0mi?Swzd;rAzPVy z-cjuY=s(xY;VjbiN3exK_j~W0_~Yk|b-MfFdavkE@AKdBTy=9e$M_$-*VqIT@tkSs zpzipDzsf^tFh8H>H^3Mp3xjPt3NH&t$CN&rLHrcCMly&ILf5RgH-xUa6J6Acej2~m z7&nUd?z=GWaGPaxGM4SozR0q&5|7WbC=Sen&79Ja^Ec`1tMla%=A`ikSPPvmkM~>WD(3e%>o|@+C-qzF z>r!j2yN=dU=9K$xjm5qj7TZ6?;6Ak8Gd9{w9dF!d`|cv-H;@g?hh7uLf5f~>YRq$r zz>SsDfvr8x>evY#tbwuCaQ0uiZVl(oqx-$P&~=)-us287jJ5eyRr%;|wR)Q`1h(Wv z1@E8sGzZGe<}U7=)j369bWDP7n(KQQtT%2iTTzkQB_g<7Sz)f{jUV344 z&DA~1C9ny6d6joty_r1`o^kJSB3b9LZL=QiuU_CH`Q?o|$uQ$%??#TdAV0+RE(#`E zDc_q@AgeH+^@`WcE$KMhn60+n2fr+HVw2j04ku1-Z*uHEX-{@g!`0!U~py1=XtK5#w^xb8(cM_Q5_%@XoB6zP9T8_);*H8~yj|BYvZ(e7chx-7b$k+L)eW=SFw^5t`RDaHG*Xv1vxxr#Lfb6?TH> z`+?OP$e3r+|JT1m?qihQ~Y(O)-7l@k7DY!+d4;_JIHSiVLGX`Pv@D z9>15EiHoV9Zj=AQKia(9-lw8HUEXtwRsjE{QAcMi1orwZeLoAGy%kv`@?h_Ij?f@y zy2704x&;{Jq7#_d#bjSgvOeM&@oR})xQB1}ZlBTA+0Jm9?4x)N$JXbp-CfKj?ky{cznh#b_!3Z*oh}18XUc{}56?{5*?)ZU znKD-{>K{km;prPk{$23jm5Vkt-LAOa<|oN_5nrB)U75-LzNM_hxSH4%`b*N6W!bpKIB_-pN{U99=q~1a=<{bD*@)3K9~My;f6ZyRcYgP<+<0d0tet%BzJIi38STzyjR#uaAp7PgyzlW~Bl#e>T6aW=)+0Ne zAO8THfS%j3479j&&95%3;@;Xg_o{~c2C+`idSZUW6NB_88efRrE;CA5Nzi)jfr;z^@+-Gp1O9~1bmWkj$C^*>qYQF~*0!Ekes4K) zMk6vv7SHBzrqM0u_d4Zry}nxG{PKG%C>N&O925UCeQVyL*>U)S*0hYXc^B|pW$gWn zT==KQj->1$S%r?Am=$}CXSqgpt>)!E`!qhXEb=S3|9_n-W1?60cqgUmPKy2ONp&a1 zK0)0{N2^|S>Q0LNJgqK%Zv1gh?rzDZ=``TBZ#nl0RRMp|UggM?-9)rnIPhccs@MKa zpQG0F64vy@`Qh*M$}Z9+gSzbe{wH+LytFywVM#Tjx{04BK88s(N2n8!~asR zQ7ikL`d7STnEuHfQt1N+)wdscxrzRS+@7PCIiA&bjsbYtQvBt% z4n7<^8F=rEUpZ2*s%0Nog`Utyeo zoCpi|<#6@!8Dh6>$95IY!%S ztkJ{tZ4w^@JiGI-*{iPu^feQh)Y3;CZ42I&=BUcr6bx3-RweBeK4iX7YEJE5PEN2D z@R^s8AJbtn1(>wcmm5Ri=8!ycOBwc*Uqxg~8OXlio>!O8{mIMcz7TF-`obP^s8@p@ zVQ6$1nIWA{*Nwx@fvv>0|8QS@2-#Wo%2NDdUBJA+%sta?K8`wS?n|Hikaav9o@a-OzTA4t?067wE7Gi1w|ofxn*YA?XH=2vF$_3HNCY} z?1e_g92jnFc@UqB*45|pZ76_#OD@Q_>4Xzc{swt8@-mI$Z1Cej?6NPCL$C~dNVDyh z#EiUR+4b!AwCy)CO`k%OMLR$wQr^x5aM+v<=VH?cMW*=);*nZS1n9*_o+k9XVa_`5M+DN^VX+{!#f8 z;@DJ{!dDKTA0}5e_~PcEA45($bIKj^JrKuV2!0p4xOp==fyNhN-HF{Vi5qi@2eT8o zb^7D;f5ns8zC0(FPc`EVf7B(pU%|zJnaB~Nz}Zm#_cz#O ze8fMP75BRLzZH~|C&94S=X{@Gmgqk=2xzGbYqLb{TJ@0H$f9xMZP)-c&o z%XA2znwXBo{)?>AR|%q%-nMcqQPz2s%$ zq~}vU!-qZo)5P_9uP$PrVPiM#JQkU(v(Gc9SnU-Qt399HUJYkheDtkyRp6=J7J5u2 zZTac5lD5>2KY4Aj#%|EJz7-Rx@AA`)ak-2I3|_DQr{dqqk-=Tbmqzar|JFF-%X@7D z?a(pzysGSWd-?x6!`Mnrn%#%8j4kEFbcMkG?=z1NUvg|~&t1=XKabJ${&a6N&NCl+ zefB1DPAwf4^=}&1d;mGyc*FQ&Uc=Wf?7T>KGCXu)=bEqb`Pzk5{I246mHj)+cO##* ze46+?%;yn4kMjBYg_F$T%{}|GqR58LRbKz*DsKj#QG7=88N=r^K4bZ0ZZ46H;&~&H z_>`GINKN>w{mlpFnTfoHuUwedu!hfrd>Ssy+3s&f&Pg1pH50zFjOMN{nu)44S(`sa z{SViei4VU<-TYC_2bxsZsS^NKyS_AHHMYmc4m@opl8d};$?utoshdVM*K_B@(Pw>$ z&cBZQVkdVfcVP!qT>U-R90gm!6x}v)nBU)&EebdawgtfT@e#&W`G_XfW82aPxJuW> z@4MwNcY*huX4rT0;d4pso$kP1ad{n&1y@-3m;ar-v#|Tdfm5zAwYv-%S~2tFUf8;k z9Njkj$ODyYOz93XpJvs&MgMc`|BvWDIf8Nx=T4hOyIslnlkE2o>A&r3A>R!38~E?a z!;P2s%EO9*l5D_!Xl6g0QYO|KF}HDo-J(Qt>&x(`Gii4ipTToL8?gy_!DyB_Vdpl# zad0^_rTv-Zw(aXPz)60hxqNqNyzsbfV~m6Ipd+oZ<+p2J-kmD5jPFj$w4+OH8|UOp zZH+o*o=lZ_knh(iQ$-H>Z4-@yijQxPIDI{rD)SWIuTUmP{y*8aYVfCq&J0Gi{)Vw} zeR`~V`dyA;&a1MP3Dp_KJqhic1y=mq-Jc0^zr)1tR^|oHl_>XVHNWq9vVz?H;Eh?j ztMr$pKeQN&VLZ6-a{A6YO>1AjJDW4N@;_HS=}Y*wgI~m_YR(ns-M9pP(-bI!j*K>Y zwAXz3Mv1RExGFGu!Z?2fJ1RaUU$e1l!gXdeD-&N5eci?Qu)pEovWVZ6-Z?R*vDJHq zHm_IMpZ0mU50YE%WL^=^gq`F_JGdYGY5k|=S8Hrqucxq=-OoH^w=pfD@K#?6>y zf6S9D0Xr)^#=g5We1Y?Bo5$*I1vY;8#(@;AS3k8QZd|fLG#@@wNc>R-{a_=F32qaZ zo4%cASuxqQ^zL%nco-e}Vfe+ZzEMpN!!JgeSId^l*(xKklyU4u?$Wt{Za#uhH)mtr zG0E>+&$)wb9=~lbEoZ!1&%NYM6Ku^E2bcEQbZ2~QvsdSE`rZ@eo}VaP!X0l{{hYj% zPJFv7J6qp?PGyZ)RAYEDN^8Qk0qIntRnRj>Mt5T9!)f%aK6MV)13h!gxUr!@Bloq? zG{;5}nud-X3!b2HQQ+F2zE#`uF*#EiA2M9Rz}6EIo&qDmql&TAfQLTi&2vZ>}-Soywmk{Mx}7!BhN@ zz4yJSuHv30I|!$xD|XO7^AS#;R91O+gv)sYyi2Yb&b>`zu@z)uD;S4=dOUvW39Nk< zXB&n$i+{z}n9Wlf;8$N95q*!l#B)ru`NP_gQON|I-eH@kRx5Yu)Y#E4`JrOXUwi1g| zx4>K}pO@?bYlvm_4{tB29BwXH+K3N;|4qMe&J;b&eJ!`GUa{(7-{_`#Gxs&iN3KP1 zM)s!5e8A-_@aH0OYEPF`J+P4xK{$!We8TK8FId&y*XN_b(sK3Bid=>Wdt1d9M z%p5(YiFlaABj}=kD+@-)dQRK9ZS*&~mz5Zce+vzGo0y!pR^Qh1$Y{~+vE6webLyVP zo;-W?7kb=!nbcGHZKIoQJHsAuV=lPW#(V^){YBEj+Dl4j5Zg^I&(aFMv1OHBhs~XL z>98SQW%t$0EjXLA$Ly8o;GG=<@Xq^L!_JEx-Z>H8DLJw=#XEOEvtEN|=E6HV;>(v< z@Xn4gW>1)yjE-qWi7g+(JALp@>6#sn8LO<7j{Ly=+s(Iua|OUA@&#~hSukpOv%5Rb z!S(}B53~_S@k?W+>_Zj#JJ$2P+dsDT-AbeLUHI@?osDn!BtB7mO5&HY8H#7t-|X_t zY@28PGvjYR%b2CSggQqW$L0~|3~2hV9=eDe?3B>0Uj z+QzA5(i8n5V|M@B@{8!bcot=KZ=c#c1di1g8pX0RPGtOg_BPKt1H?A*TRG2#msNpY zynK#3e&QL%x=hy6U8f{vTs$PkdmU}w;5_Ez0fw+*4&y@c`Ln zTd{-x!DH;q|FG^=?Dck@lQ%zOtUIE;0k224+Il_k>4eX1=6PqKu|@BcQ_`W4@=u9I zPLBP6_dlUdL2};Xf#lrBKXaPZRDPPTi97)b*#_B1%@N|Beh*C(ehQ}bDV(#|Bf0Dm z$*Kk9g5Z0%k3CYs9$`Lfhp|W6k*zeBz34{6MGwE`iT>rPVB3GqD=XQ%MzQ;5^Zuue z<~Q~`b1tUL?ZD(2_N3^`JZSA?bPg9M)z)g@V&m_4<(JNi<=b#J*2%8X4!v>rQ5C=4 zwKVzdu9fP9)JC?E2xhVe=zm{oZC+)cTuuM!{Ry{bq;RVpIo@5zgOt6bU)hDJu@M7z zq8ta6)+r}PdYk2`a>3EP<^D*ybNiLMC{@lt7k2x-mvU$ID>o}uuAMxxZn;Y+cY42a znhOpH2Y%?e<|VFt>+mP^^L3J~)5p4pcCfqHwByhGZXN0T-bPMztwSC;Al*Kf029&Z z*|ZTy7Z5K*9*D};9)P!XrSMz1n@iJhwjCSGkZ=$>t=N)!_OiXUS>{UlOSGo7@DuiL zpZ${0KJ33=%CNI`vN!(g8s;@_@V)U@-fM6C5&RSF>zRIO)VPW~%`f?}l})i@c&w{T z!NZYpvtut%K7M@Oj9}YeN4pJ}4wi#d`yz=Q(fXL9o8(QB+}=(eTyw$3 zzmQjfd&+C!)0=A++qR2Gkr?D+5`LIo;gjELmE*WJXQx9Q)nJ&6P8G*M%=o?|i^a@9HL-D}#5UH*l^9JAG!* zoPQ*Ahl8(Qywq6t545lN|NPa`B^=vC61$`9lAV;RFzZVKU+Q^YvTWf4%a>GE1ROia zonwn-t0<$4X!7^H>_6&DFBSaB*J|H89{jp!uyA#JY)gf)ZX4xn-Q`jHE}h-dUkEN( zQ1=JUlV5KvzWhc9K92yOM_1Rb+Uy(C)CzoT`N(Ikd=FVt{iz@E{Jb#zGe#@i^W4S5 z`i|j=)EM%bddKi{#_&&!p>8(u!z&%$>yEiGRlfap@$d1mhx8v<1o^Ky>|5^4W8GEQ zQ&{g#*1O(kZ2gX>Z@>4X+Nt_lZ@<%6!wI5MX)wCY9p6`b%S~W>clU$k@>IFbuk@Dl z(BAo!t##oEE?D49?k?6Fy*0VqC~?QD{rLlAI*qseLHQH7_uI8IrevmEbfZnQ5noOo zgh3u3B^g{ztAbW{+d+TDO-tJF9-IC(H%8WhZ2;oPZHt7{ITEeC0>Jy0`xG z4vsxk-V3X$`No!2(-uYXA9rtuw%+q3bk>`_Gen-AO-Jfpl}*54JUTaU1ADEC96t5; zJ2v|Fu=9>$4C0Sx@UHzn=Xox=Q7hwiVmTZ?`!wF)O{YU<}>KyIo!_rk!_&C_y zU!w1-l^@-AdHvA(4qtMOP5)jSLf_w}@A}(5y6@0^L+iU^qcd;cWTW}{A@u(h^Xq3_l!KDzJl z=Z4sKV%b<5_gMQEhS2{*>c3(L{Vy3>|K%I19Nfz{n!h=O{%=zMmkgo*#Y5}A;IGRZ z+`GhRUNwaNFH!$xL+JnNq4giRJGJ+rJxhkr{|xp2=^^xg`Ox|g{by?ZON{0@^zX{L zl5>O8u(@HYtn4xtEDadvzLbwFjLb9|IkOVK-+X-Xw;^wS|KP?sE6h<%mD`P7+pwz{ zdB{uN(Xk(71?Rl7-B_?EKn^zan-7ODzqb}JzoH?`?~Nht_}S zk14u8!)R_BLjPY?|Ja~Evi~_l>%Zf*6z)|R&Hpfj{%=NUpO$t@vtfbdhvS+1^-_f*oc>7y%b-;`YZS>btE zpvl$;K8-%`9qzz+gjhn_ckQl$s`huWK_94pShg)}BxS@h)1PQ@B!lr|N9e$(u6J9< zprZ`K-|G4s&|_RXDK-T3j+B0hP9{5$>Y&>!{1x{Ob$+ck$KXHCq8-Y(?F7?o=Zb$k z(GK-T5l^W&18f~z(sjJX`Mt2HTA$o{HgHqSiFEwrKP0yn@IOx6GBM3NMMEA`yS(@D ziGR)zK7PKyL5+H&F4nb`~P27bG6 za_8{oJZBDF*q7Y4&5IqGa)xjrrI+V#clO&kY0nzAIsC`9;cEYvZCiGZ@Id1mh*o`f zUT>RaKX>-fU}Y~$D_i|f4lNw4>}N;!w!8N#r>t_=x#QpT(^HSD=EB~x>f_6TD{$mv z!CAl^y*t2lxNTdJw;E=M5Dc0|%EaDX1uD zvaq{#7+2eS3V)})#zA~9+00RGYR@FN<2-%O{Pz65xKIy1pV)h8d&b2DwvnXQLkxlkJEs?W=P>M^!?AyIC+e3+MvuK@HoND){n*NE{>$FB<6CO* z%VLAhEx{IneV{zG-{r%?f8hARv6JoPcXOFR{YJAczZda1)6a(AVa$u8gGm(wDnmudB`+dTpNw$wkzBl|)(0AvhmR~YLcRrjNP%A8LbZMaAJL9d-boe-Fs{;HwIwokFv#1BeNkuOkm>^;MYBXoU(o5)q6=fqcRSxYWm8-~o+PajJ^?bs?^7;b@f zR??1Q3YS*&efK-+7xGSLDemOA+Fy%ZgmW^x?*=~Cbu6;w7U6HKXW5d^VyDf)9&7(U zwroiy{|~UY)PFteYWI(Cr3(L>#zsE>5^^T)h7NbvSY@279Q_hpUY!mrC4xeFc72JR3GuGX}Sf4n9w#k3Gm$LzSJ{w$U zKOO(=G}azIR|Sl!NSltj8(qd9hzQ%3{jJ3MrDh;Cd+}@ z*WpPnKYjuDi8t+Mj#eJ9x{3SBS+~REjjiG{%IWabW29lpkX(vI|b)IXukvvu*Se2T57o|LevBP3HY7 z(hc4ur#m>Mdn;r!-rkK2l%h*| zcD-nP>i=PUp*2q}ba=mTEZ^9rHmez*_R{bElHB?U%9l1nqeMqiG)!>F2ZuN)+2Zoz zD(IO@e@%Y7eYi0Do?<1~!=me=UGDQuspnPPgI5LJm`Sc%d;c#-R+#6=BKt3JU@hOT z;P&#B;0NV}&!yZA@4zh^xJ|O-m3H#}9^lpi+;#xB1oqT;%AQ(~g4=B1mcCzV@JnNd zw&SbQ_sC!}f0RB9%4?4dB=d(;HrB@1m)X7?I~KLyeo2o-%_0_6dq(~f(Hh5}a~XW- zQuxs&@TH64PxJ7@&5gpZY(L!LD~MIN<)^$eDdVB67oJ6~S7HI|yre1LPAh9C-%ecs zA1^pscdoe-db_x;8vID(m0A=0`t^_B)LlY1CVKI! zwy-RUpK9k~{MC06i!p|J>wufR7ZgiL41kN}#mu0UJu@dm46S7`Cd&9;r#?cPX z)mXRVXK3dPlKkYWiR%zQ%SRrqBCezO#2(D*u1@)>FQo5w=BEBjz0SFQXTLf3YJZyd zs~DH=XqNBbX<%j3E%gkRErCov0pH;|8I2k-BuPJbWEs;6|u4ju?yfCj+Z z4Dwf>d{;;g_51sO<@?RT_Zvy`!N~WkHIeTZd*C|un@uCK;G^Of^@_QJk45RL71&pK z+CMGXp<{C|{QJW_K+L$$UJqlHF|7X6{r$X$E+3o@oPh6*WzmVJnN7336L&@~GBR^+ zyYI1V#vW(v74Y-Y?WG5mzi8>dmL6Qr9D>l^+Zlg7xD!{5HRA`qiEQ%dmBOo}mnsfI z^SqgI!PGqa@N4qRbTLoG<7Xomsa|_(9UUH*6nN6yv(OHwO%{FyKG4PN9w8SEA7V`kK_ z>ObwDg#T`i(Kh}a<#z8I^X~4Q2btF?@hf!}A|1ah#@;#*er@l?FX2|f`ejRMw1$jV z`&)6H>HAxJ$l^@IiYboFt$kj;nzLfaO}??(4}D|5ul+m5`XAI8%2+$r4Q;FkCOZ1L zJy!fvgN^m{3K6H3$J`}T=>0%r8w{S?fT!9OJX`Jdow4bzt8U~v zot;p-|6SbO3=R#6etltRxEp)7+O{RyxLY$g?jEfiXrDQG7*E8VrDQ(w2FV!WYYmLo z<;kwx*P4<&cBJG!$vxr=;t~EYIkpQw{2>N^@WaFW@U`@KGk=Pw_2&n#4mid%*=PXX zb=?r*S$^Zt;ORe5eYNIc!*kQ%@QnN&WBtSs#~Qz3Xk$I_mulNyYLE4j!N+?4-!WG2 z5XTz6ZfIjI+?$4{*9<<^g@4Cbe@~sE?5TpqLmR9AKhoe@J@{D9qTf@UZ$H5+;0Ny6 z!z^QXv%OEQT()Eha#RWV17!D`0$qkq@1BC}sC;a;e7_xC0@?A)$d0Q{GkZepvDMR! zlA6bzGpMqY)ja-x*n9W*xT`w<|1(KjW|FoPnxq#lNeYxC1y-;vM8!_aEh(r_3zTb8 zTy@eSQsh=pn{siJURaBzKy|k%sGAAYYl~FfB?WYoRu_>~QCCUP)ua`)Tw5;Y`+S|x zIWwP`d}cBY`u%_s@|@xT|^i?mtalI+aJmUsp0mR$o@@tx#cGko8OoSie5b%Sf|Q`>l^`WN!6{>IN8 z-zNWt>Tu}Zv)n^=oBh%k**hkrA6q_+k|QP=axHkOB3HSN?^>~IZD3Em!`QXTjUK<5 z=d!`HVr$V}tcv%v7rV;NtDbF2?XzW{(LTGP>mxkBH(;N63;WC->@#NEbrX}paW5u+ z)B|SOMYJYl`(1){SH4Mnq29H7(TxAOWWM%l6;;$lAs^3U@0s{&JXxZe%*A8+r! zW9EYRfE0}D!eJa_{F)0dYc7(_%yt7VYMO=UmM-cDG#cZ2h`VllNTY$8J1KJZ~5eJa!w!8fU{+jw~uu&Ofq9Ymebm&LMR?kV_uDUwC1j z(YZ7D)R#w+yJcOYdB^Yp?THVvalROMqNC;#i{tCanun_za?6nND7E+KZ>0R)j%;YV zjdd%+xs$Q@Et0>DhqNx(iJiPM;w`(7I+2Cuv9^pKA=QY}5?q@1bN_NaD*82V; z_Anp7R?l8fYj-K}72<)Q&x%?ES|@+H4gcN-&)Zf*Tb)H-eINd-FneZyiLWDU$YD2L zCVX1EnB#kAyVU+cJYQRJ)gt8$*UUv0*ckF2e5E~-?E$-S@(aFh-lJG@{BgbHtlX>o zGsag7zqiTX zE&R^lUI#vI-EXE&K%ywNE>?xUM_Xz;LA`(kIr0f|^%Ns>3x1EERdJTa?}(jeO*~?^ zx9&#zNl<5?{|FzC3D1`yZ#;+}hg?e0?RxT66IS<0A6ezWeHn#D$Df%@SX{QyN8d4{lFIRtpJi*Y zx9ws2+>h>O-lxwk^pk??BWD0B@EsFBFok?XA12jXmoINh*ROe3HComFe&Cvko_Qbb zxjxn3tG_5t3LolYCjQu)@C7M1!}w#7G1MpB6)`!!Q_bAIOq;U*;u%bU-ny0h z-A8y^^=z(cydX`~3XRR54&t8YWOzlfUS_J^Ap* zBBPcu&V_5LIhVfL+gDA^r0BcS-|9Qrr1tWiY8|tu%`^zWu{#5h+Yt-)${T;coY5x}=j3-y%^VuO9 zqJGCzd_Ff9Pc?JZ#Qi=)pHsX3$<9}6u|Hoo*!gPUew_SZ&DV_)zhavt*LT;$Gt z z_+^7W#ZOvZ&*x9vOAa0~XNdkKpO)4I=TB^Vc5%v|*guRvQT3eX(QdXb00jkZYrFi)*;C1P&v0q#o!0QXx)l>2E z?6rhf;iwN=cLO+;-CMCSH_%oPN72^@XGzX2Va>$1X6nn#;X9rGF@Jf>{yoUe`6tGc zOL_L#_|{O@>vBXgAPmrq=WKE)Uk*O2dyeEvS|)$?w)jEGvD&tsi6_OZi=k3(k` z-v#|79`_92Jxv@7coKh#wu3PpbJ45g$WSMjo1v4Kcon@X!nO1uCogYcJ@3?dZhgvb zJ%3Q+wR*Gj`D?!{e{vGMYVRoBx##O?`50x-6ZFTrIyq=M^T*3V ze6s%fl(~m~;mq)u3)vz>!>p%1PS{&SjIEP=d+1pHCdniDrB1;AvYK4(O?O6??S*z% z3`CaoAh$$=i93m*ZzK;I+GW48U|_`pLj$W7!`PBY#W2q1*+m(2Jv6>TvSu^wAX|Jn z^VJ2uzm9n;i^wjd{E(sFEjh}Y*hHO}Qs#2WX!5eDDHErrOdIn>?B-8bv-Ti+%U&4W zdbeO!eY^R{&iRpXt0e~|d&NI-YBgxAn`mnWaI9s_kI~i{@YNRDee9!*6T12FnAW>> zX1Ts;1|Kz621K|<-ma_R&&WyOSFHVtaqWAj(Oz_1$?CGvxyK>ca*gp3@TVHzH}YRK z@i*{aa^fQXJA9@1d}U@X@E|l$g)TCL&*%DlE?OMeym-+)b|2R%Jrh~ni7YlYy1+iJ z6IuMocxrN@pU9Sb<%y~lgRO$Gi#J@ZwAQt-Tdc*D zR?b2{JQMx!4D`c!=!fh>ZjTtzU@`iphSQWSq4bLv)$5 zF*MI|HoppEWB3hYWsRR%NuP1}OYlU;!ONrR3qI0#zHnf0Yd?B@1NQ#}`uzMXzT^hne;-P&-9FyeTln$*_VLU63|)#T1!})fr~{+Kx-|~+RLnE zDcn(q-r#N=`+D>!U^RY@O7@L$;;W6U)}Ej19&6vIg-14l3x}hk88$|~LcR~q5~W_w zy*c(~?_x~NS#ii_OstUuj7f1#;-6&e;#7=hqiUfCVxm*_PwI_UTby3 zy{)Y4dgiwKg+UXW*jZGus~p{Y1GWS4y`EpqbM2=!HywW(+`5rHf%Zs?nUhxbxEH75 z9sL-q_W-;0MXh#^tGXHDqc_QcP|UKPH8KDGep`E7-v;m&&+q3O7nk`Sw`XGS(xCkk zd)!5=$8FX&QH9)eYoyL6)B4y3Jv)3~`r6>V;&q*EdXHN(vviT4(`Oa?ILRotW;#4; z&D>`3T=Q9#^jMvS4NVv_6=@R-lXtViB1sV%w%}X*xep{e$dc(&lu?XXWp_J zev3|OslBylOy5uEL`wFw?#3>KecjCydrlKK-&y34;Jm!#pKdFJuh6}NW~y_$mTbEjOS`#H0uL&)DejBJdgYN)`0VQ{!U^%Hv@mRemPu> z=kfH39+i`Jw}MZtm!ILigstz>a2Jy3S=Pr{2-){X)Nc*z%V_G$$2fmjx{&%s7Ygs!uh;)K>G$*To1-U|d^n?De3T>Y zSNL?e)|>|MB|Imjd=^UGi1($EBwV_FpsD$e5_$@$$#<3x#X1H zu3WfL=q{ttVaA}#90EN>v9ZUf$6K_n{PK8me^L3a%2IFNJm%8hcQo=&1Mi;0H)r$h zS=hzSMECLY6f~xeXPVE~S}8q6eva>p*SODKY$m$z0@Z}y#+of(rEC8fYKqO|*#O_v z;qUzJi`Pt08@t#qXb=OI9`@9Ux92`{-o)tYwdnGXt*l$RSNc18 zR8wT!(%oFI9o=1j63?FFH|KFLdP<*g1c z-0joeG|s$Gn`dzDRbPo;!~RUws=^P`H!!$$_Y?l}UzB?5&t;6A%yojfUV<-2br)v_ z@>rfHFI@VVY`uM-B}U%nXywdfxl;cBNtyK)8MokB8Dm}h-$C@z!tz*F(RcjT+<7dk zdG7L9svh-oa~*zu%d?L0-ptnTWb)&)-0v&*W>$SC^ZlQ4-+OIvmczwxbMn;^uWatN z=68ut9lrm_c`w-IW+u=XwQt6g=I5kBJDWMPOKo;3zn6LFK<9Du@FnJ90KG!{c(wP? zp^FzSX{__>xs91{>SINd_Im#_$a${hxEB%c$+_IM@OJIVkqhSBk6y>`x@hZ9>!RH+ z%|EVpW7n?^ncc;l%I>+RsAN|Y^iljPVsS6N_FH>i-(=poI1<0RZvS(4mr#3k5#Jo^0f1i4evHJ`fFOOQK1I=sZno!XS#nG=vZUmKq(cZS2|ONR{2 z`S~1rVc#LeNr$5^Ksz1asL=WOddw@^-09D;o1YcTPy8g+Q1k2OmRUL<2M^XT|BlW( zK3)Kwhxja;&+_N^!j}f`?f9?4>HH^gXzw3$)A`dpcXS>c|A&-Q2rb2TrD;iP&eP!P zLU1L#fAJ9Lop-wDdfU$D-W+_o5@%19Pji0LS*m$%Zky(CU%{sw%L<5 z_Y7{m{piR<&BOC%KQOUt{&;Ww>9n=*(C*axC)oG+s9<56}T-Vgk5r`WbypB&uUKp(}Q$?ShrF}^DM?Rfib)f4`=CHC8n z$Ng_dxo}2*|syf@|#=CGVOqW^J)PQ0G&V4>UnL@`2vO{%|?+ zPB?94&2u>I1HbC$kJP(u{*OW9tGbna>>}nawN7U3J5S>~&(@0yu9Lm2g(d9q_ptYL zYh~Np%_)CYXO_KIev05a*;C{lULGaqrKx=qLvCJsjpvTnuBXn*ifLnwKGMI(AJg^x?*4k_ zxFqjoNA3frDXjT(R^Wev24o|6m3iv?90wuVb^a>hM)H4)Pp?;6TvgfMm+Cis)A4W? z{X%n&cj}?vD=e-b7hJSg^^o7*eA`|}SKxbS-|(KL|4aD(1AN~A%|uI8n|Wf_`}po+ zVEhB)cXQJ`Cp|Yu1oBoL{oe@vyRnP@V;Va?gw|>P+#FAv&0qEBCY%1@9h*CzrvJO> zyZgQV+^k5&(<)Eg&s+WHz})O!o~i3Z|2TNBH_q2}J|mn^qoMTy@do&xz9Wf_w@Wk_ z25;YSKD?JWS=r&_X?fS$ed*lqmGJIsmiPXm`ZVCZc~h2*0@uRlmI!-SYJlka65x?N zweMdBw@SZF@oAP%<%1gkyNBwtN%1Lul^~x+|2NI2(P8*h`mFP(JOxau3*UObZ(~GH zSz4`YY(z=UsMGJw_YE$jp1aWrd(aECuf+bluo9cjj(Z0e{x|+0?bmMu-Zu97(S1#) zRVJ~UZt!afCBaR{248o7w%R*&GJ4J7*l`zMovAN<@eiDtc&InC`e@(xBV6Uvsb6JH zuQ|krOYh2-_i5%wdV+orO5cTdy6+y`y7J?heYMipt;DCaj`C(X8fadZw&_KQR#bB^JdqwHGrV0u0IJ-GOH=KE%FTLDf|{BvBIe^NCK6dM5_YW>)d zgMYfQHEKK;gU9AIKL320^2mj*Fx#a9-{QTEg*;=Z$Zu8f}<=|@$_}YPQ zkKE^VT61TEvmGX$Wb$5*{qIBI<8`;U!I$tD?Ks-#z&~K*+0TGM{?h;qPf)wT$d?s^ zTjhg}de|)izd$uC>$lul69Olh@g)z4wk7gl{Twg)i2|t** zfAY`tA^GxYhnKRhC5+pB<780~F2y=OYH_psBk6f~JL;`}nmy!moGH88ocWQT&#s2g zWJ9k5<~X$rgrl(f6Ek?OSQ{7rld3-fjn`Njp9PIO8e8Y)ZIrifIi756*Yzk4?;;<<=|BigyWzCV~lw*sMle)e7Pck+)XUYZF zIL{54vt!WXiJM*j5#D!mUiG4tSNNtq;8mTOl`r@~zd-#RCLv zF&#Fiyz}8S-rC5SaCVrM3h&*{kF*NEhhmmu;4g}f>9wwWUzlI~#rNfg+nvvug4^oF zLAaLzx3gQuk-;ZR@xj+`+2 z`TT!C1pT&0!8ug&j9$-xJ*J|-zx2ZW56BY*Z zF2+*|&JFAq=drTzIuV~|47qEvjDcoG;|m)F-4tUl^v9F0KEa`khczLC@2W!T6AT{h znRB5Z3* z_r>}c^Y3%lEVwY|d=htywS6Hc-~mTI11s7dv}5SvC8uILu;E;wJsjAypXDS5uhVi^P4fqvf zk&nO6Ef9XmAtzVsKA)ezXFxZOzYYDfCWXn=`+;BapV_sR$`;0n&n@5U5p#sxHnANm zFY{y2RuZES@u)jfL_Iv>Vrn^aq;@p+$=%c*<+sOry6WN!&X*irP2ALZ%Uf=CwcoTx zO1x7O+lRiU`U0k&n9bo=jF@b$+S^xQ^PhpubuT&mSG)QGS2Zs(XNZ!^zlL0X z*>4qZshVqwv2yhh+&QDFj}VN-+Rc7a^~@BlfN z%=<3_Ups9GADu<9UB=IIE_6`RU7t8NefD?HCC!V>Iot>0^4lu5Uai z^M3Ey+@IE6-+p%H{h01g@2>AX%fBBb2QfzT@vr_pNWAd>%gg9kq(BZ7k0Dl4d^zK6i7D zr0Xxc5BX;uO;-Qar@d)o$#KMf;O=jJ&jl$MIS;PunZUE%Z~AaeJI!17qrm;SZ#126 z=>7z6-46rzcOS6#XL{@K^ZBu4Gey_T{WsCCxz>1;C&jtbrej@rs3sajWkOz-9bh{(iZd_c{Xb?>K&e zKduShy3YshM_#eGG*8#OH_g)}C-^+Q zb4Dib&Y$7)bV=6zRdqg3&#xQG)5%(&r(0?>d32!0=jmik=KWgTho=XQ&%EDqywB6M zS@)CExnI{^-!eV({ek1Se^PgS^0>_VwYq-_?H`+YzvWo&&t^T#x}Q9T`|qLsV=~_# zn8yA0)Bd!~`?b11kM^rG@3&NQ|7_aNx}PMUu6;i3S7p9GP|5ud(tc&;{aW2$K>J5$ z-fuaY`xnuE*8SvB+`p9ekIH<1;7IOYN&81;-mlgDt7!j-%=;}zaQ|bppLIVumHSI* ze`@CY15>zv4ed|KykD#P*U|pu%=;~qxql<=XWdUu;{GkPKPmJ5feP+_mi8+$@7L=7 zZM0vWdB3Hc`=6tId*7`Ion@>EtI}&i75-eeChWkU+jdv}HQ`tF$i+#+uLDEk&u zvy3(2R(w+#e0dh<-4ipQwKh5-&6kOz(|kGqP@gYL4$b7tRpWiWoIgJE{?2heU#=QA zlrIOyf)Cc(wfYO?!%WYhh)CbzTNO;a!ls^THS{)2dEL10sm<3!EJVkp}qUvxdo}vleNqYMpY>t`>VrcjFp_l8v z^z=sgz=-+T%^3r3|1gJV&HSFhGo2?WU98dO!|7RT1?LI#tmzW}S$dB^o8|V|iJ8wd zrf%d>9q@LsCz;QfjNZ*y=1-V#s%T65iM4^buB5LjamxOGJ0f)|`th2anKhU(Ojx$>315e%P38z+w*7=($c&v_^OP3Zc5xX-i zettg@qcQwK2QcWZ9UNTbg9A&|&uq zHDdzf%kuFB$5(NF_?Qlso>dE5```W<9`Q@;5#Uj?-(*~ib?x?>#lEayZCf9POJ7C} z7%R2D&%=rpBJ98AZ`BnP%;uq1Mu`L=OEWe*qJ!Nb|$}vgRU$HjUsF6#zE!YdJ zjs{)6t(l`8ex66Soui(Yn^J3cXP*6>>Iql2%Ep++i^ie-P7u~hRd4yPzVB49U!-#a z_$^rV`x}bmd)xO3ru@iRuzw%eyQv#48Q^T%4tCbYRIEqmt7+JKhJn2zhpj6JyUsN5 z$ft35);XQ6`+eAR;dvdfvVIy^gLroM?d@+$+1#Vxo?Ms%!{NC`vLFYppPxNU?7PLK zZ0#C{Z0!kPj9VBTzqVRiyW`hpYlm+5V;ZN<7vy-g5&LJXN(ElcuRIVG|?Dd%q%a$EKFx0?b@jIN^R?1$w%3* zUPqSbcOSMtXR}hioMcER?>>tEEgG!FX8CR$xc00c!1*;gz_t7t+P}xL=pl;DuqMm* z)`-1du}Z@8L}X*d<>7WtkMV`^<0YXql2wZ`P-9thBRcsl-}PnDvS3$EEcj2=45;}< znnv2N-^P&bqKhy;nEYSui~T)&-Eris*7&g;@?B)-OU$7E!7~)sUQBHIC}Pw{6RSQ3 zeT?&6ZY8#zdisKO+iqkC`Cg8WPS;rk{C>#LP-)i+%^Pxe2|9^6x7gB9i=AiDP&fLF zXh?JNzV9|IayrPWQh&|g2@S~>Djmep&^MtWedE?x^R8WE!)QqBm%eRA&yg*(@2N=g zAzN#IKCwJ|i1qjmzJ0+^^2?@07qN$W6W%Xl{>$h~>uC#k{}i(NMsiSYCI7_b-%&%X zPPunCQ(NS_#0n@Tz8U&#z>eBUY{rMNKPJJ4=^y>Z=x3~hij_YNJUnW6X|lmnteCwF zyrexvdnso}q4P8rPcU&WQyBlZq4h+dCQ1YG9io>7%$HkZCQ?_?uor zjfr21biJFlx=UPsoI9&mv4%RUcXHQEo|6NRswtxUxH@XU82ofal0p1bzdpFooPokz zRl&!KdEX8FxH(mBgYdYSd#cTd9%z1d;74$>VXm&NY%sNYs84byzHIrIl~b3(;Yz=@ zxm%|rz}af$MG%L3XUt8;=0?o?i$2=_y=hT1bU>W1*=P6T-^ryfst!#QKI=z5l~xrY zKZ=uOUZh^RZobTDfTzxZx6X#g&Vtv@BoE{a>ZAC1AdRzzS!1j|7+hnX;+v|lH3k}X zYfQrWsapOcy(adO_Z^fSCD1-G{B_Q6o-HG$jR3aJgMh95ku+?LS#oVS*xuyav%Gm} z0h#O8=9R$K9fmh#?hw2^l!h%^<_2L)&h~A{b+!5B?Nh_S8#2!q0_4O>_XYPM5Wau%48ypx_dW1DgW)w7E16 zkMmiROU#-UjNi!AbykGJ8K|cmg1^zg*qR4_E)VfC-b>g##M(|D&XMLJUN{0cf6X^l zdEnF@FU8MaPUEp1yd4ZbpF}@{PefSrBgmm50te#gjeC z7s3v68?sn-7!P?p4!PDb+u2r#t3p1H7cI!P61Xp$OH}u5?A~phr6>8^N}agt>%5t{ z_Vf1vqu$YeB+TBW`kkVu+VvI>oVnFVzI-fKemS*Qwf~-t|DqG0B=d~V4m-qn_TG`! z`W|opchp|X4Nd;Jd#uNa@%~wsJEr(PobF?Ny;*)7txfSNeT&bBy!zdhDWA`RqyB`h zgf0+2#-2lI_rCx9f{p1#|9lzzp)(jbv`6ct55-|hM!EJo&@Ej1bGg=g5%4*h_M53E zARbKZku2T2HLgMU%c?ohEWHw$A-%H4;;&_O8h^EU@K;8DzU&>s(WEY(77v^i^+j`sEtf9XP>z?QhqS#DF*%&|J`9~6HV-T3_P@=S88mj zcso0`e$FbA4s;k}>#(&4g7J3j(I<&s3|)LYJ&*nSCe3y@?x41T+MbDBTDB#<8@;V* zkkN=z2yFUR``omg>qu8*ha0~=e;KK`3N z?nQ^a4;{Ac?sOk*=&)_fU$AZV;HkOTOkL>)=JXZjGm3sCUJ?Gj!1$S`ymZ$EOx?Vb zYuw7|?5FQW+IRSGTjt~6(UZgX`vUm(*t4z&zu7o^lwM2)m&%e<~(5S2EKee`4M15F64w!xH0tD;_vl(n7bb6uZQ{TvGeEb0zJ&( ze_s?~3+IC5iA3FV0i(&YaHw&4J=y!)knI9az)ox-qF~i;3Yv{ zYFq6l$W=G+fd4twr!D;*@#JB!wN}IrXwHh5E6vFde>r&XBM9uk)v(8XOm^||!hVdaan;dWcz9AM0S$d?YQ zhu6%_vL{(xJQz>Clf9VJ$2}YS8iucpz#p~;lP)uo^8*%G{Ix2s5B#}#3!Wb^g?IGK zogdJ8cqZ*oHyNK}dhhV=Uk*;X*5dhk@SF=?FPd&g@$|<*gZIF5syptn{um%PD zyH?L#@%_EjaVU{L*vjs9^v-B|_&V}g3w!f)>h2={94$7U^G}jPH|w5!GCj~hiOvKL zwz)RE%@*5c_c57m>giW=HU{NuVK4Uv zy5+M)-l@d9tV(r4vVY}2gs+XU=6HS0D8r)#%gLD* zrYg}xK<{vJ(yZ0+Sc0{xWJ;Etyg!^bDs7vs>`jAhN=~+a4fz=JmbX8=W$AqZ^=C>U(|X zcu{M%%;UW;538y2w%2sV zlcHPc0QqQrriCxgI_Bm!gVyaFzoT=ze3~1gFXw}6yoC!}btqi57Os)3VXuQz@%R6$ z^!${AZ-n%`?Pq^KdhWEajg+2$M2%?C^G{9;U&FHZceiOQ*jJ>Z%dXNcxt&8lUuj_@ z&dtz_qhqZ*MrNa*$9|9gkA5y%kJ6@iFnf>p(SzvoA8nt|Q|>;`4CxbjCtYY&w>bJIaFlD$UU4jTyHW82v+OyA(pd{Rr|>ijZTz7|h5(2nLfImX``DV}&vq}0g72KJ6EA$#>dGk(bj@;6PpL)L#Qdt|!{+6I2{ z-_Ys^WZg5izt$YKGbihUwjJ@%ffIaQdi#XGJ)L~b!qms!GKkwCo!pQ`CoPPh1Dyo*9Y-q%q4Nw`f7r|B)B&$?;}6h)bQ(7o_dGDq*vD2fw(NOv z_M7Z?yNN9-)Ry)^*MA;cT6qfp;kLA8mOgHPKEh%p-FZ63zsDI6ii0#d7&>ddxX3DO zg4uDA^KZX06&JZW5ErQ&--C^ZRF1FoMA;Nvj_+U*_3+URjSrgtE*`3oSjor1gO?{( zvSq`S8L_h_Rx)3l?|BCwD>)t*!en{s+{u-G%~o{iR6Ol^zKL7gc1AocK3e3NiGN=~ zt(ty(I`W|n+?O6lGGBbe35u^y+leJh{y@F(yzp6nbP&F)fe%_RzP$|iR;J-gu-_4E z?W@wT_2z>uIy7GMu@S&_7O=JEfz8Fge3ADOF8<~7K8z!cf4K)ZhdYx{v5SkT9j`O` zbcWsa7ulR2Voa`L?o|U!=Zw95YyPvvnk%oIAiTO<9?b=H0cLjC8e#7K$GG23ZQJ_y>mv za>cbg4_^9C^64ZjuH`=K1FpH_Na2$QF4jo71m4R_kD^~Kw_~IFl&d+{;HcE)OEjcilk=L?n|HZR4C&rh^ z7&ZPK$nkpveJSkR| z+f_&VXO7QF)iKmrkG`*{nddqevhA<)zVOTOlUMg}Zk{X z(lw*dXoT~IG*7p2J%fIHnPdFv-$XW=_@&NK=#!i)qIq=n9aq9@&FJhmrRvj_?bUf1 z!b=71=zYPwqPWcD`bZ{-cb_NjcoNT5pYFfFV-4pcsxGTyRaEEiw%_>iR;rsqOvl!n zh|P{hCYjpT$|)QV+&YIWTJ1e8`4fEpJH>#!<>xuN^Jgy-EW{S5{eA3<1XC5h`J2G8 z>SyWxUVFcpJ%t_y8=8NMJ6a_U4DXAR|8LZZGjqi zH@n;o8;={MUBz|TtOR!>JBaQ(z?UILFx|Q2!(C<7~ff% zBdb5SShfxuSI~>xQ{Gh@x=pil5TXf$US}Xj0=0W+q>oq3rYlpG*ZJ3TcWj}qewuNffzK#2F`|;qOv7d3O zZ5P8}_ly^`Wce+V@JcTG{2{g<#oVc_>^-B~cWq$L*rRwR@^a>~*LxjV)!)^fvWpJn z+5f!=oW@3o52K%XRv(H*YGcnYKYBOofobon-AQM6iPE0-ev^4m|7rg~(%K5zr_D39 zk7ti=-)O$)U4JiFZsMsL(J6ApQ|+>_#Mz4qmLNXY^Gx@I&wl#5XPSxI*tKcAw{O(C z3*Yb#B7WnWwjVFcmgV>7ugN#_jz52H&p9+t^O+joF7b<+p>~Pr3FQ2I`a?z+-E-@Q zj1D`4eB<-*jq8l}w#e#@ob$eMBxl~QwD9-k*)ul`fj^TEzieUe1a{T-dgVjp=ifSt zoUn+=pNgK2OpaiSxc%t;QEYHJ(+``+x;dQZQA`fH;#5jmJC?Zo?vg7PttRJb207kK ziLc(v-griN>w@`_($cY`b(UiM?mv0^XUCwK(y5vE*YMp5YjHMzbo^Vw%h;p3>)XILWhrNu*mkm?>HFGP)a0hM z&}XgfGX`#vzopT`N=y6yfit)16FT@-jGE4>hgQ0;^0dZRD^HW0sKn=@9Pwi~Q+sel zBsn;dIJDCEzkA)&j_xb=-hO=T6^8En&Y%_?=fmxV=2#17mVG+@g+lE^Kja(9r|vLb zrQPSz71#1kkXJizzjD8^6HI5Vf`(nLne#ol`fsE7?)l?=9{$7eIs-_201wb z=^tJwtr|0sji+!7c*0Z_j zyQhv|E)OqYE`P~zzVJ^esifg{iepmPhFYVfUTo2fyeon zMbmqZ^J)9Vrx)&BiwK0yppOGzc!J3 z{~K!O&9wbm@L&(Sx9Uy5u1YR@<{ZAOBHlI17@W@{XwMX%CU;%2=-ce?l>=|iBt|~z zELUe^R$t0VJb@bg9k1u_?-=@Xd3(9~Yw5}CFG_z2@}Cke|Ea_1o~91N_1M5R!^1&X zs(~d2Eb*7qHD$&;O%0TnvTDj$AC9v_y(JpRldtE_XYPCwIcITElMgm>vu??PZ7#4q zPksL87pbX_O?>%PbCcI&f4mWU`8}L#{C(`@Zz7Lwc@R7K(nvD#U}W7JAvW?&*u>{* zFN=+QGd6PPr&JDDF*sJeciG500yf1bH9l;LdbSDwVTZ9PPBM1V=ds!L@cbse>BPSL z%J?H!_gnjNw0Md+Z#jWHYoF%ZncLv`8?kvR2k4J{r}`4IVRn-niefY;OCD;7vCFJ&gv#!vlVny!3{xuUn zkJe#7uBOb`XtXZmve8^+`@%<^(O1xRQp-ER^{dc$nsaU2&~`@K1K>M5o+jI`do6gB zyubd~@E8DhhJCcBdUeH=Gqz*fJS;Y)c>BclnYCCPUHn@pZb-TC_ZY+?Rhy&)c7D zH@Zjz_mcePj4{p%VIB@AZxvkt`ES$GJ^hVv8h(mmv~t8Q_s6W{_U4fggI~dQd<919<{Tlyms+CYTfvG z$X`FkTfde+@#4B;hU%b3rX3fMXVPf4ldTJvMdKMU?b1a@8qcn=UKBD`$#8G1=!QRgmbGFqqk;3ttEGeMIb(6LUfrHI zI|#qdB)^?KiRN3fG^M-eh!;G-c*RTEx^gU3jt0&?=ew}j7>#43arkDuj8C}$8edRu z$H@f{?hXNW!{q_YBIbKe74hEMzZ5_}k6Ij5W#xN_erm`2y1JvEcEwRz`l+Dp8=)V? z=||c7WaoS7naQi-`5-z>AF}QZ*8+6Bq6U%oE3%zM=E+lfBE-GE;!C=Y`9Jnhr|{``02w4t)bgU5rsXZWXp z`084VkG?EDM6}Z@KbP_!e{koJ`060fp2wJ7|Dt_I>)g*7>uhNIr~rTeOSo)|SooT$ z>8UwzvRv>bKR~|G3FI4{>^(Yg$Jzz84{x~+8M)oe6>Up4ZkT=s=gn0F^giD=CmPA6 z_x+ynNT%Qq%;+=dqkD*VOh2qA>Zee>@5i?7YYLEqxB)5+~N61<<6%Z{M3~Z{CVXLaw9uu@RHsv4DR-`SE`{FD>ElW^-G!;i|QA?H>4^{gR12QC;AmEKO>^l|z=E z@7lLKqCJ*$F2zGgr!wu>eN6-HNS~a-c3Ki;?qQW-I4CPz|zG9;Y&=GI~1?ZC#~4^8Sj>ja%6@QFjy8Zw#s@93dyNdZ;;(4=HF~@t5k&5lgsClw3 zJm>y!3s)aGgTui!5}dvP4;VgWZxC!hh|?XsGZLKs&bBR|V6g2loVJ0VHc#=?-n!#G zk0Ud@GVw!@Mt|@_XmkX5go0mV>_%1yj^NrP9&>9`_I}jasQ&k8Urr1h?aPUwG6H+Y z`Q)qE*waq-2%YGn>PK=RDgB0ZEhrZ@<0I(+mW{yH0SvOmccP~_*z^n?HZ32}U3%f8 zfit}Nfr)c(uC-@Gt5);C#K_IYzWnFQO}^FuvC^`$UQZi+$9U`H8>|JV{d^a!gV0ZX zYU5wE@5rY0DZUvAjuRHgZcTLK5dTY;4#Tm-?^=i7W4`x^d@{IZEHM&)JmizuXx=_` z6gFGrZYS-#S}%gXki5qmkB)DDZj5(wANJ)(qTbBsO1x#aQHE4x0c+mzj^4BUDo%%Y{_mTndfN-p&-$>*nDH4Oc)FI&FYzTBIa`2jqR7Go{b#Jh zVe(3A1vu}KZ-964UbB^1*|>7FYx2xTNNc-o zAMt~wHTHMI@y1AKZQqeTt-X5W@U}m#F|VRG3@VyOQZ?tau4X^aYonr#k=m%Pb?_Bpqe5@XwozU5WF8xpg-bT7PHcg)(^L{~CLYgK zXYM=;k94{)yHqE5Q=Pf&HO$ovyVlac_0T}Bb*v_^-z!x1Ot*0L{Xb@(@hn_bIo2qr zpIP1Spz^0U@72hVxTTG384|N=-FoPvEv2u8*98+FnRm=uk$z_+I_VP@u2yR!$cD>} zIotj*aD*=(UN|D0HuBkMK0%*_+GrB@T%J1nZHSGAJ$bAuw~eM77<18WIsS^Eyed?m zFuFqldalN3^fj9==l0>fcMgs9d`J41*4PsRYwVVAn{A_or`hU{4j$<;jn$dH{}$@( zwU77xku8#^v@bi|J$3Vvj}?`ebCMn^;+#!#jl4UNA~CXc8Y`F@}^>;6{iuyhM1~XijA{orQi1?t4GGjebw496I!XV_mW|@ zPHZhfTc^jk3XR`-2|CkUw`Q%~K|9-a-U-^-3bncX+P3ZXjBeg~1AZ>&&&lRdk8iaX z83m8(+}O-BV{QHS0)792-_Wzgzk@^clRPw&etLypeE+6~(s4fRw6O25we@`(YqjUM zb(E6fb(H?a_SZa&ylg9A&mX%sbI3mRVx`0Q=H&_ZJDd$gf7IFGess$opw`Plv$tco20 z@BUdehj9iga;uU$5!KvN{NNkU_;nbnus7a3?ZYOnaWOWQ<_*|$sEKe>(P$ILTc;8*_o+c9zhg7Fbp37y zrkhN^lXfL+zs6S%Uizpf@om~QHAXgM##3I;{XXm|4xSG4ej1*ed440`bP`XQB%bnF z;wgKGr%e2-cOZ(bEwRx%P-fyO%R=KRdpSQOdu?M1Eiz4lC| zoi&#)YHY0a&q$0?cZb>|53*0_d;T($JMHoyuF)J(_s2_Kn2~3Z(3r`a5R4JV-Gg0J zxQ$C+w>a(N{2_v z;FX=Xt169y*na;!qu~0uERbs;8S24b(ZIP@$)hh%oom%k%^%^S6hFC%9Y#KZG%;C@+HQ71xULM>Q_P*dz-vg7<<4PRx@p6(KSM!0vZK`V&_WcW!hw@u$jT)hi zxlmntBRDm>M^>C|Se@xY_D^B|rIV*HRDQ zv6Zz;_u?nqa+G)YoXEJPySZLFy1V|aG2Qjg@p~-4UxJ=X{^++`i9e+ZdM;7E2xs@~ zP>q?<@%mL`;`OYjOXu>tow&*tbkqnJ5~Ia?BcBJ3y1|@)=5p2AU_@Z)Zo1xtfgk0tP7nX>%#XLo8+PRD%aS0j~F_( zBQ)QEIzln}iaVU<=xZ~5^`Up?>#KHRW?ui-ke(S!`eOPFmIXSs$=6g zuh{cW^%!^9`(>kLzb@123#%t**Fciu4)ZvFDhSMf~?$$tW=#G$*hL*v<;v? zi#}8*CyIa1(8sY6@(zdA6#0)ydE%?^iKKMQ9C0W+zi%8}v3m!!^FkCsn8oSYq>KQE6YCr`j0PhC05 zR<+&CI%UrNj!`F1*YM*K@T4^i`d&C6IX!>CyP&DoJNXIrBFc$B0em-76GyR$SM%=b zeVb0JiDaWPQAsTpUx&j&^THj@9sfKk{WV-`q?a zgGH0~&gH)LNUx)xMjgmM*ov zWXqC9*_af|dDmSF+Ryv9j9AVkxBF|a?1GKh|JL7j*|nmN)ziIo508&;f8mkyjE-}c z=S}!}%hjj-SM1Q$-I2KREBb#m7lCWAtwE@lkRtdd;}__SIiI?|k8B4fyy(OX9ROv2m+g z!ABkM_EGO=H#pJS5L=!ed+l<6?A46D#*6LR$yh5HYxLw*8>`5J)B7zq`|nqSk6iEQ zy<&Jhj8?>lh0@BSj8ihKJIschhgL)*X1zgg7Ol(HKFjm=R>4cM514%bxOMAZu6&96 zT>n|M;e&BlO<8k=jm+dxKB&5PvX$QuSYKZaUtd3K`)Li!1!?i|T@~p*-HWuIUiEbM z`@E-H-)HRPw5_oczqY-JScAokZ^n<)w()k!oexaNh;eeZ@x`{!zL2#q_HmyMT%WbO ziK*P}`FY2e_-?GZK69)#W~&SAE2%?TDrAfqQ*kUj=Q?fVSsTcPtKV$FK7F zA@VM7ooKM?Dj#=|nW^7~x7}~gJwMHVu5T_Z&rP>(O>=pGW`4^9^P3}neT>C{7c##o znKxt2g2od9HbdIg-0D0~#X@_`ZK3{wJ>X4z)0#E6LI1!vdB>MAeoSvklJ|u!oCTd^Tt0$mH9@MOm-3R?W0)mGmlqts1_j!wbMJp6?6QEBtzxTX`qQ z^V$44!OkHEf9G7gN9@NQCid2?OW(8O5Df^{?D&9~YU<)&Eo9#{61&?|6T|Ip+|P}Z zzq38^MmM*G?B$RH!|k<4f|vFjc;S91Uh>6b#28Pj<#A`raz2hEJnqgrXyUzuJ?|jd z=JWYT&pWsT_=oa#R&C{gJQD9o9{rZ@;ucnCH|nr8MC3b=%}Dre;ZO2t$sL!a&r+wB zNKRQ~Y)5IEuXK-hg~Dk4vVUh9`we{?`SJfDjs4|tUq5)|aIdW9Nc8B@-pQ;-3u`9F zx5w~PYyUpL?-;nKTArY$ZM;5){b)|4XxU=!%id6gy+*dLDEnvg&9TL``Yyq~+G9T> zJgXK%-|?HK>bjtQYOvjo_6}D(Z>k^cj>GrkE$ibf^@96RPS}YhAI|6}HnM)=S$Jy+ z;!SuO7v8RFlmy`&wjJz_VR$&`c5Hrq_Pn`v1k*Lp#X;4WC}5qgv~=HX^Nihoygd-_ z@_!?*`)z37=;WUq8Totp-rPPfXuEt0_;b+PI4-6*|(C@hBN zQcEMjob-cd0}gEHkK$cda|EN7FVxqOP!lN$BxOx=sEsVJWcH% zE;TkRXM-P@v35bP?1C}B7DhSk>r7Vd>;7JQwn+}YA$(cbp6%^N3){1G!k2}_Sij61 zNKUXWhR0YBvrn63=kN&Tu*$B7?tfTZlz4%??pCd>vU|AP661FYtiQYQ*_n3&HIQ1V zfu!>VwBA0!zP-@B(|7F{JF;|7U~_&kts~Oca>Z0VHGwlS=53(6RsW3Rop>Kc8QzLfuvU0-CbrT5Tv z$ikIvqv#3Hej{|!lj9uwnHIL@3U8}$r0+U`qXyim)`)1*BNsz;2%34G{fw9}pN`Qr z938j4C%!#V?9Dc`Nt|9vUge7abND&j$4^jR$#0?fzt}&nSupTp-S#$jQvdQ z1p90M?xpVv_SZf>q`#Mle?Ui$Gl!}nxPzELgIDXXY{7<^tER}m+rC?ggHYc(i_v^L z&c{=&?0z}U(*2tKwgX#>zCFawf9y%Wu5bf961DfVuJpQGIz6}b|B`&?YW=OB8b7cQ z`P#<4d_HT*yj=07`ZiO>4E3o=Z~pe7zJBzbL;v>rQ4sg-+8^bp=W=BL_p-UTd5?{v zK2}zp=#ki?&vxzSu}6>N9raD^=7Xv`^$%|Dv+Q2AR-T$Rk+BK%glv1)LW}!F;9fP; z-}+>Py3$3+{$hNlqwt+_hQzI7l5gK0S!d!OiLX+uPcyh=e(NiHBg-OOlNi75dieWG z(3zg^fG%Dk7PbPKJJQ~3rS_ls<`T{!uG@{1EhQo}ZZ2X0dllPf@y_wS1E9 z_w%%)c zd8)SX&GcP$jDPN2`aC|JJ11M`q^{*W*ST}Q4#ZvNilO|d=z#S&3NFK9DDSaxL2a*G zYI1`*iJ|OgUOS1Q>?8&uSjVN47|OQVMF#h=YUJ?rNb-8d?dI%vjJ^3DUzR#tchY}f zsUJtydYGkyDa2>%8rn)gTf$p6-^@sg?o zJ?Uzj7Y ztEe^7z!`UqoN;$ObDpqs-e&7b-ydjmB|h?KWQw6r#X~5TdpGB}scw3LHqrlQUdpw9 zcA$U%D(-i19;oQ!ZT2qR_|FvM;A&6ru;&tPr1tb&b8cV7-UxeK#i##eJoLdDEm*~0 zto__Sf<9GWcWT!@p5F?6{Dd*BCq7R6tM{s@9Wn!)1aZ6MN5mLQb`gtQ8Hw#}Mz3=8 z8{t}eShE+Vja;#bKcc^mVdD4N|NGKK+n_bYCW^+Q#MI@Yv8}+q1lT=j_U&8!GX!3{ z_)KKi8OX4C$+tc^H|fq4;QYS@HT+4Jdk_B`^qcq@&AD*9o7j@d9fS9(u6IVA?X0r^ zzsMN0J`dp&&H`*lzP36(`LWL@im&V^ZYPRA#^usQBa=^2>|8%*@yjD4M`OtC&~Gvd<%t1?vDn0ymSBgDuCCy_8);`h--wdag*`BBjl?`Xc9 zUER$WHY$_nkVLj>&PsYO^=<0pIxGRs5^^0%$aRo^uaOP*9*wrg-YXG1Z*cp|+n zPJ@pOI<)Sqpdmz(1$66}??JxmI=aJ#X8!^ew-#?k(k> zauURI+3{Ub+LG_#F=(&Z%8X!q*DQPHX78hy8sBG9vSJ7EIK;(?w{yvi?<|NUAHB3F z`61w_VJsTsz$tAT+lpeVD4Q;yE*w4u4H zMPDP&Ze5gf$V9^}&|V!hTmlU@K#Ox|S2QyN`dteBuCVcOeDk#Q%mL1dt>azA(v=p+ zn3L$<*WcZ~asRtlZFK#`;V0qjXmHktO+fNCB?oLwoWt35;7scZM_nlaSniBm9C{)8M$=+?y+G$rbt&lU)b5e+nNP$m@n*0yobCtGOY0%{=ARW z7q(#pa2{D8oWj3@GxpFsgY)VU!1)s26oSu&0Gz#Fd1r9W8v&d>d{dQY-aC-x3HFeZ ze~owh@Woh9llT;4tf$%Mw#ICHL-ro$h)~#u_j`5(@aW`onBQ{+aTtob5S%;Pt(*qV zo%wv9HVz{j*1wc`>ynnfYEqh4ox$bs{k ztyd+$Z&0stIb71M%08W$m*B=Kc)mmZ->TOJQ@U)NSYorD3wGARpf0QTD+T{6`OoGl z7yQ)H5&Vu;f@`^azzN_?Wd*lC|^nY z;as}M0CZMm_1G|YU5;h!PRn=b9Rtu(EJu!I`2~?=KWnesA9Qe5y>g+N!0B9Fvpy=u zT(-q-XsJ}PJzXMhMgIjkOb8e>n=94+T zIg)XP$6}Go88|1a2sqTY^7?{(?{9jtGx5E_N%87To55ScAF)s!Rhhi@8u!s3XL?nc z`&;p~hb`frO&1#*;v&;^O$LYrXpH zbzw;;%mKa1>VP%SvE-L@=fChxN7(vm^*|?6JAt!BcHX&uK42$w5R|Kq4)%=kZ77nj zv6@J-p_=#EyJo{WX9TeRpg>p~E-MVyoe`h^Qn1#B!&*kaVLm?-pU?PGzK|SeR{ezU zn~`0z%OnDJo2uD{G*p*cfI%$Hst+Fz7{t)OTcR_!P*@7DpIvM@Po93 z_=q2|{j}OxO8LyBk2f=KEwmT2wqE&vdzU4Nmz@NbRW7yd@n=gTU4j@`ld!uQ&5+bGy{iCO={~>F&l`ZnIxAFY%?O*@(A^sZt+#!D_|9Sy(_Hz~|@|^_rr1n6J zsj;)epCj?FU*p=(<6ob}JL>yj{p%OowmY)ch~5y`vvuK5Tii45&>R8j>LZQ)EOd=K z-ohWx!hP5pw}D){gFRF0P}hDa?&WJN@A^4-@Nw^-skNRss6ygPr>i}Pc^xzP&(F?z;dgP!1Gq=Uy2}$FiGu1F`Cj7}2eTx^8gC4eSnYlw>*7}Lg zdr<4vKiR$%n-t`Itj9KLRu{*FuccQ7$rOKpGHXNL6GvL<}p3icP&^VW>Xvp*~# zR<6OplEoV)ws(ymD>tyKZ9)4BfxK6X=k@I6hFc#*cR_fE@`isdrugC##v&TJA_on{ z$nUEe#d?KZtAsQ56pJhUxt18VXpS@dCb+q^b;K*7Tb*m^zHOI%FGp-&#D3eIm3JWA z$kwC3-a9VItf@jidDPN{U=(~c#5-sYYUt^t_65CbzviDCcy8N*T3vH)pxFc3a|2s~ zdvEsmsl7LQ{IEUXFTk64Dbz*H4pI zKm1bri$8v;@tVEH2500vZ5q6?FMS1_Za5kHPI3F64!$h?F&wvPT}Hh7r%^r+|7BDd z4-ap%mS4p2aXy=MiccO%^U2OEoAvxi#VIC-JcLIc2To_3asN~Li_MQ4%IAJwXBB>! z7&50EDJDZcQBprulL_NC~|OYtqu$5*481f}?DQnd*}=;{Re zH8mg$)ZxSAwD}&~INM;Wa;1PHc3kws?dUplh}TyPv#|xC>$|)N_IszgccY&v`7ry_ z>pb2gzSz_aHt(bFyZ2kOY9l0b(5>iA{34m<{01>>q2<<|uWQAl6)L;GZrf_kqTMil zc*kpr%|&R}*mC3|thVsM{gng`uAezD(=mH!w!Y_~pf6vfY=B701p^ z)}WIg?4G>N!kE1#2J1HMqkjXdt+6FLm8b=Z3-#bk)pqVtT`C^`!@Yab|_v(_;)tv zFn%}v7s@YMLwGk^)}`PbA;0{Sg|~`0&J1`%_~mtcm;GPuE@*vRX4`FM|JV=i9UhE~ zfe&?0HpPB$l^xR|zDxC0fK0o{_T^zi3-)F9oU#KV(`whQF}YNxU)of^k6=%mN#25o z%#j{aLaeA;_jA=;d#{^c_$$~~ns>{(ig?HLLwo9{P(9~Z+cr9FM%%Jwew$eD^NYgv zV&MY+H?mIc8d*Z(~*7hDd@AxoN`c_!~9n8Dl4V!cGzM1op!TV7Quk4T+@LD+( zgg5bti;RCO_KAy(U+uBa#$TEpY1>!>&dWy=FD@LhZsy`;hoc7Drw5${`%Lq4dDo%5 zWBR5&;cCToA@M)Z)JW;$eWDL^L*;DO*tH-y51lDG&{>{79cUiJt6}qC-cRw~!P5Q@x20S$)kdx8B6n>4>wMEgu3;~^h6gyuqn~p; z1b+=<>M!*UyqjFZ{(p;;uNd7m=QwZQF>eemtbKd#7itglp03^ZnY*JcE&Erf9?F5i zh2{(l}v{G2bTi}G?KTTTby?ik~6gD?;+CrUaj$80!IDs0JonX?fqo}?|Ma* zKR4IN_0+Xu6x@CE*3>nAs?;_9+SK*Yf$M33>*;~(y1?~Gf$LKO*RuoH?+IMLKX5%S zaD8^*dVb*ggMsS>f$NI`*OvyauMAvY6}bLb;Ce~m`kKJ?b%E;}1J}0%u0NZ){>g;f z0@t4lT(1gT-xavNJ8-=raJ?~b{jY)RuLiEW0@vRPTt5=H{%+vn?# z`lo^Gp9ijg6}WyraQ%mQj%I!`VNc-t#lZDTf$P@-*KY)_{~EY1IwRfYn80;O;Cg)E zx*~8rHE?}&;Cfo%dV1ixE^vKP;QExn_3Xg)dji+*4_wa+T%R4do*%gWVBmT|;QFG# z^`(L9D+AY81+G69xLy*tz9w+}f5^Ms+>j9=myzsxayrDObB$M}to@mn3^cR0rH zc8ovh7+>cYf6Oty-Z8$(G5)k;e2ZiJCCB({j`24gswM%a@5&8-Kdr=8@J5*o4st=e3h0r zWbU#@Uf8jRR}`qjRK0pwy{S%e?R2?29M|D7b=KqOJpTCO=i|TmyRh9tm#CE7$z3M* zlg*L+5VBuFwqH>9vdONu$Gs9X%cIFY|0jto74||I2@}M;$m)u^gRvjz9vj8U2P3l-;E~f`_W$Cz>p%Q;X!jRm{>Aba$DizT8@Xs(c^Y1bX)90IhjY&`Bcsh2ngb@R18amU zHP#qOidWtGbd6IJk6~<>87Q-?1Ku%>%T#Fts!w|Vef8YSZ_1e@tC~Mw#pPE%@`#G- z#jUzZy{uw8CZ}iT7ngPJ+SA>C@bFRN4nK0raVN~20iIn|SnDkwNsc5!h6f74GYDapvlIA!r- z(ogy9%1Up2j~=~y_v<%k(A+?1*s#Tm7xY)L3Hoh2RdnywX8=E)mXMN>Q&3V~*`s&A zLBkFkTRrLMsmISabxv)ApN%-P?fi=_U2*kg|8}v^f-}xO@4{s&qfxu`U-8ZA5jUM2r5*P*$qv z*Meof^H}5(y(*vYG*S7gv`Srh&eDsPU9Q|Yg=G~zR91Y-f3>auRGsp3Idh>pi#@nRtx(sho7Ela9`&GlM6Fkw*|S$D=igIF zuJ~@W^YNJw07PhEab?l_ll;uN@6Fo%py(tyFjMF}8=;nkUsJ^^AH!y~_5y zt3FhpsvX4tF7=Drrw*uCSE4J;b&M<9Rp2Uhb#Zlf^>+1l4RMWhjdk@SA@Jx<=iqRw;GN^ck~i>izTP?>(b!=_Qw6eZ#GH-t)k^$2UB+<>jsKeDKK^Uw!|R z{k}0OA)}zYN55ezCMG^5MoMcrakEg+mC*siVpPoNm};KoZ*ok`)R+@vPK~LFX^3f- zkZ=r%JtlF<5?;Dw2`^7yqH?=1)^{*ZOGYjk$FnSP#1b`S$@C>Nm&{(`TQXOcoVR3w z8d@=Q=up*jD3knv+|a&5v((W1p}PJ!bZE-Zp{4Sxa_#<%kS-JuF+>e9B!6AvRaZrf zW$UR<%q2Y^T!fe*v3dXDFFl(J+^V}?nd7OjoHxV3P8n)KtK* z2%JQEC})_gR!$xnO1UH3n~D_horo$4Qs*X&lS-D53?6RJHhmSe@sN+tWrBVv|j+k&t-JRyhG94kGmU0A9!Fpjj=~fiXPU|IRGen2V>lmNzfk94DVF1MT!E`_5AMSnti!{2 z6i;FUp2PEa5ijFaY&C6X_&Ii9H-5tbxHvguK>AWCNJj>;(F0ZJhXELf!5EIiPz^pu zt|s9qOu-2_38!KfW`o~@RdrZ^b8#M);u2hj6}S@D;0D}?n@zVeybX8aZ@35d;Xyo! zO?Vnx@B&`KtJrFKm!a5!eUJ<(1!>4Y7J8rx!*Lj@F%gq68B=fqPQ(nHj9I2R485pB z01I#_F2f33iF$}15*zR=w%`T4gjewnw&5dujP3XwJMaT`gYTKqnQICeY#K6< zg=+AfK8o*`QIl~jj)xa@I2-5SJS@evxDjjdF#e8>rp*ku;3d=R4By5!(|Zg*!4CX@ zAMrC>WZ)w(2B+X0oQv~K7cjg8m*P&`Wm?5>BR1n%Y%#sS@KtQZ`}h!_KnjT?a3uVg zhwE?yHeeH8!E5*clF2`Z=S{CN6k9QgLTfUnU@A_;4AaRBXJHO5!X;Ra6c4+g*NyV|W%@ zz=cJ%4gbVP_yIrSXY9eBkcvVQGSD4;F&izIhcGU~3fzp9kV1Vcc0qMkY8h6b8x^y$ zn2b3H^`h*+HMkBx_2w9(tDE;SxfzOns7C-TI2+euCGN&Oco1vw7@oi;Y{nM6fY-1U z+wdMf#&*o=L;T_@eYo^* zgkuY~qSxKT1Ac`x5MIJoyn(k&?=XBHKce?NyglURjaEqEFGp?>4M+sE>FA8J4EgKXrZ6hkl!hhem7EW>e_fND&{(|8M)|3P_$=kdB} z7sJ9o*?BL-q9F2f4kjFqOX4F5nIUp#a(R$?o* zV?R`kOI5&)$(V}8Xu~bI4bR{OyaSrS>K3fTy;y^{@E-PqP72i>{oq9ai*Y6{!KJto zSK}I7hud){*5YA2iN9k5RJ=F82paRTO`9&K2P3$X_4upWw=C*zP{N@6$|Lop18 znZ__Y0Vm;BtinUsjvw(ee!+eeC%RN8496H#8^g!30k7jt#PGqIIK(3b*~mpc3NaXb zS)&?(F_?s@n1MOC4V$q8-(U|^GOxpMoPZe!Vj-@;4Y(Qi;vuLM){8c*!)E-5edwOb zalvrRNpq>Q&<4KKQmw`VScA1t=`4?an28o#j5T-}+pz~KgY=1MXvNvsfK7NBTd@O3a4**2YwW^rP}v+8 z#32Rg$UruFpbGsk00S`?!!Z)oI0BP!6sBOR=_H0?7QCo~A1w$&<+#)-m<12o@I1ES zeVNYXMjSkWkD&5+KTO3;%z+Oh6`>Tp&<_)#3fN|>#E&?D_(GRTG^H@i zMlSMDfWa7wVHk;tn2MQLiB-4{@8Sb|0aZktU@Gpw8tjEC;haMfhF};D$5i;yg3ECw zR^eVe235*-A_0j=#t@7!9ma4J#^4A{#Z1hB5A|rk>9`Cla0RZy^;n5}vAvA##2P*@ zEjD8}4&a6gwxiOen$dzV+AyIj>+4H=LiKa0K7$xm6HgNvHn5MUlP}>k%t0fXu?d^; zBDUf??7}ZlbBPyB#Y~(654>1_CAb`8n_cQgr~vsgj>Uzz9JgURzQ8W*hH7!CUoT{M z5&86`43`t9*!7T0{e)kzAAjIas5LItWi8u*X{bd#TCjf|^EVKuXu+9i!$nwu8*npj zgL<8^1N|@&Q}GPcR^Aip4azn2L=b1-Yp6Gg2W-brP;a@^iI{_hP;YY+E>6VFxD~hI z4%1x>@5a5j9}nV5{N1#X;WKy+aqm!0A_=L;K^{s_hT#~A(HM(rOf*eqI2F@y*EW~B z2QRTi%gsC_VbKryeiTrgBWdOd#F8mJlt4oE@hWD@? z-(Z(X?Io{729Cf~%)}gAh!uDMYp@pTH?MRGawQH4I3gId%ffCJRs z#c$NX_hVy*OM8`}=r@S`9V^M>?nH<@YcX8(*o$(U4K`Z^a9%9!dPx8AM*PF{6vLTW~kJd0x`a`9EQ~bju{hZ2DlP=;c-N%{}Z zLL1J<1-KB);p|u3!nAN_kmfJ|Lopm9FbbnF12e(z8>u;%hcM1HEoFECmYZ&2D5O7d zHnQ@mGGhP+VH%{L&+6CX%jMLgcpMwC#q=V>w;+9fBAIhv`uXlPNk5)kLhOzn7+@OC zQ2O=q>2oW7>b?v3g4N2V7ME->DwEKA&~w+>F;|JZ(}`kA5FT*`X!I}Ttg zdEhSm(3|x_`o)CwYaKvUKaM%1zv@N=?&eq^j3v0uw2R?xbhyV5&rl?rx-#sBXHC-I zv|GaNW&Jn;$3gm?&c(x~#~40|4W^#=k&mN424FCxKWc*MXog~qX&u8y@Hn2pCj0|W znZ9NC9i-1ne7V}NlcA74DN(lHpeQTSA^k$#Abl#rm2_~sKbLqdER#@j88w|E_=Tz&^?~;zs(<`XK=6ADaWK z|BF;&*9$sP7m)J56Xm~q6@@5&ll61ti6S-XkD^om$rUXt2 zG$zNZie4&~%ENuG`&cB8>-iOG<3@GI8q%Kp$^AsenE9NTRou1ju<=KqIJ+^lwMR>3^}*ebhXU2%w;^w*uQ(6%CcU){GpaQox5!g z75<;Y7V^5qxY&OEa(hf(eY>8zhi7sgJ-$gj7q!GP*DL1qTMUm@AE+M~?xC;H__Ky8 z`<&@&`g96G{!W|T6qQyT6K#%h869g*Cnv_os1$k9WBMuGp5*-OKVaaX!9#`)8$NFQ zgzCd5s`CHCe=2t4#*GEzDXlk7WGw2Bu)qA9HD}}Le>syGXDu7p*K7QlyGjM>fA{Y= zZ(ntq>oEOT;<6rGiClwk-u_C zJ}5a_V!S%ySZ=92<2=U7x>--1y7T1;oqICbrHZkU6L=aip2@yL`*c*Ds~DSjjFD>^ zc_U2;X?p2WYc^4MdPo@E6cK5h2YamP<0z7JD1>I`+GdUtAX98XBsP7jnF)#<2C z!#dHf>@uiJ3y)0wEMwMVS{Lq!nws6=_Nfyfk7*3g4)ALoUh8lqF2X%99&dF> zx5uq8YdDKnJ?pA-JG*X4RBAstWdIKSid z9iQ&_O~=xN!xNeku1VOK@MS`7;@HHd#G4YIPy8{lD5*LroOFHCjEPU;!=wXA-I8Za zJj5fJmcU{17~0JkI-id@D9qQ5jTxmz-Cec5N`~92zScuE>dz|b%%!J|YX3+mD=ss_ zCY8~e8Fj&^%JQM*GQ42aWOF#Hyrq1(IsQO7TQo`@6}^n%z+Mx2&Ft0Ki#~@@hklfF z@7>*+Go?Fi_|bp$NbD|iD!UJ2sn`3QFgo)1$T9UzpWpOU$G+D5+`jZWjCMXojhHfG z)(9@~_I#%2S)&j2_^{`$p0rg)PaJs9XnD*qhrWTqflJNtI~YEKrzoyp9ewa4x6~Qd zrL-@Z*((EcySy^+m4RLKvy2&!zU`(D>vANi^{atj4ScQgtASrs?&dl2_|u+B?kb_& z@v4%>{O5~*~Zu|b9hwV{%qR6J*JFl|0p%Xdi3Bt9&^NimNC=NGUhb>EMvx_ z(VSj9V7WPV3wIAp8#Hau%I@-PJyz?fCwYFh`|I65=)QxL(=h1kP7Q;8gFIa2#;{>f z0%_OPH~KNDa$4o7m9D;ZFdkej>ASd+EAta?Nq#K(?c^QFdy_k*j7gcDa$d^qDbJ^T znv#@ySZaOh)u|g&x2Gnh4M>}nc1hY3X?)%N(|po>#h3k8du0%M9B|Dtc;|Jlc92!xg<(_I|AQ z7N)lK-a#Aw`(cNEd|I`;Dyx^w>(XmLuWE9i!r{)xEme`J2dZ9R>cgsUt9~aJJfrW$ zeUBP$J+9N^cj@Pc`##0+^}e6*{Id?}Knro)aOb1*(EdZm4dp(Uu9Lckhac+U>DtP< zdTxK`<*Lf{=J>mnUsZY-Z|0~xGW_7j znhIywvlVa9x_z*JpUw~Vf3W{Z{VZd~V^X{66FS#)CT;Y8x&M&Lm-`=GIg{teqt2cR z>9IDpV9)UX{Ala3XZWSC9ygfd_xE^$GyaGE#$#A#8IJ2bSr3h{S)J?Ho}>ZJ#}PfP zso6bS$gLksupYm4G{$1>5WY!U%^Wdg(997NF>}N;{VZd~W0pDHJm?&A?219uGzJ_t zU|u(QwjO8dsY`girrXMHtGhi;nL2I2zMj(tBvwruAdgIQIBh@y>FLf9(T{I>{nG0| zuRBL1_O`;a87n0%Pgfq*#3v4}Qg^D&d>&)i#D8Jf#9#TGhzt+)sGt(O_VDX6)*k*h z9O6Of7Q9K0a=&Y{>kZcq*M3)KhkhL)SOl0>(TykR?aotCHdm`*K>E|zBu0b_#?M8&zK&> z8onMckHxvhF!Fq5?tOOH)8^RweApmy!oiPD1rj#6V6q<86`ZSwi4(3PFRhvo{dgq5 zYQoF;pYdEZVSj#BfibryOHP?^bWu~$lnL!0=N8?jLtiP{9yRx8a>nKf4|IB}(>tA- zC&=R~bNEN6%<|#o5;C*ANe`PRTuNEBe8OKn9?UW3Jd^Vtv9@Z0Jh~Mb!y!czi%u=_ zGj(Rs6};yQ6Xda_^xaZp%zEBg`bTM5*$Wf;l#MPsrfd$izaJ-@+v&oKS4`Y0yIsIkzRK`xGlsi46H6w3g-o&#qE+d~hJ#J~- zopI~q-i`Y?E-}6`{@jV@P8=M6IKvt7&GF~O-=IStqNzT2tUN9)Gln;m-A#NwKK9=~ z%DXt{50090&Ut+7IMU|wvgpT+Im^rL%2~@<_0!m6vVIzCJ!U$_jj&qo2G8Zwa7XZ| zx6^5@Ua3~nk^B#OzrNwd&W?Pdu9Ci&lUxn1`K}9Hce@@VcmB-vhby^5U!6B!*x^Q6 zp_@8<*x}a>2{Ae3&?91wjF}NrOD=tV%tJ9x#cYlFSImK!{Mdf6V`5K&?N3>hvOMM1lzUP(q`Z^zS;|i-f20(r4oE#R_2kqGQ`e_%OZ_o5 zKdpb-q%?2ZS!p+>J({*H?bo#O^hxQS^fS|MN#B(Ie!458Bx6*@jEv7N3&kY`Y7witPa`TvX9KJ&u-1WIQ#nS z2eO~d{xthQc3#ftoEbR_bFRyIBIl!=Jvo6&>yeh*C->-Fe{Ngu%G@=%n{(gI{UtX$ zuTS1Fc};oC^KQ<2B=6I_AMz6Od*n~bug{;Ke_j6i{15Vf&Ce_7Q*cZ{L&22=YYPIE zTMND@h%KxvoLtyc*j9K&;hlvW3qL5_SD0QjplD{%=|z_p-CeZ4=*^<j$72H>NUtuYwU5VG8eIE7`JMy0$Dl*|8Hb#Xm998>#M(n^Ojgr@?~FX?VJ>ODt{=CjTaR+}-#x7N zYoh~jDm|puW32k`9`+U-a-aUc3d|_zriv0Ov-or|ZCuv~*G$)$u64@!xSwh3uiP48 zNQTcRqBAO5f}Z*&kGshi3VG^%p~Q*>o?vrhbA2dd!o=y}AY*QSW7rq;_+usOH(&&=+nEHYa%+7rMQHrWQ{)&Es#ZZ+15|hMGKKZ$pg7?~g~o z>nSt_?gfqE2Dh=4eh$>-d;AMLi$ZLXzp>fJih_aGFk=~>rkck3)UF-Av*!xI; z&+l(+2{ne&BSB3bZ!l2knAUsXmd*CFt;LZ!LEn5|FvLy*2qhbvqvJfDakeSh z6Lz-*1L1%-;4krn`R0f~Ff7qoC#zZz2+nmk_&l|~U@8Z-snP2;Vl2a&lB6knmTv~I z0)L~oG3*Zc=DEG0aGn`X+7AR5xdTB~#krYj2J_a$d$`dTs)`O-WF%@1U67P&#EtRR zfnce9?pkLS2hLs77_Mvd`DI zGh!f1kB3MHWG=?ZZp@zVAwk!sc;a-MmujlsF0 zv>G|@iG3r{7szCYe51kL>|3DEOJB(AY4K&&cxv5stFeM)STQ3Dg~A@@Mb5VZd!{}cBQw|p zBR-9^(}~FijhsmK8<7d>lQZ1l3z7P*`Hj2<2}Vvhf1tk6>+!So#!CY&&Q}!IkcG`< zcU#Esd~S*GAm^SZ#2%&B1Onu-cCL`oZkni-1&q9gUCgQ>Yv$}BI{k8hIHH*ZHG3O$ z;^u^Ap=3wqQC`#9=qE#z6O^;S$UNnIwbQe;fX)KR0?b61Z-@Juye*5|-iEpE5UIxw z5BlWb`$FbvljQByiydhqA5USlpOvX+7!&qE3Z)wX9QI&f0Z|}J_wR4Mw1;zo^Pwf^ z^XjZ^0a<`Yk}`!8FAC*GE!g6#I`U6t%@?=ii`%@(I2Sv(Q$6MqWv!c_y-v2@7{_EYcCdjFc z-O`&j$gv{*9zzfL9(r{{ovX7ZC@q@nIUDQso7kaIZ!*;JCi-0$IRf;m7duyNP1vi} zuUYJPjcn1 zaRijHlHtSLJWE2_mnRBL5Hh zj5tZu0rPm*YOJAoLj}5P7LgH9k^alMIs@gc ziG23j&A+V26E1WXwmN-Sjew{*?6g#2h{DCb+EiXs*H~}7VzDkrC#Tri~nf z0z1eov#pAw@p|9FmLRoeU2h0UTsBLg$&s@Q==8{_ySdfxPqbScUF|Vx!ARlGK{BGY zmeeeJOk@+uGyq6|A=)yO+LX4QR)09g+tN}%EmAkJ$p@1huQxHZ|^T5Dy21>RO_c1`5Ix<(tRgXVh|E<9A8oIrV_ zi-=Tf#O?Ekd};QSu6U-{ll38gp*;~fHnb^7blJ6@AO*ZTR7=gl7bX$bhZ1W!3T|rO zzU*3`v}wG$PDfxpwdT@VUrno=bWN1|L?LOjwb`tJ`hCTeHDVz-+~o8?)VZ%T^joR-Pd_MW(rc(W;XsWThD*OaA4{ z@*N8~S64xJV-pQ^5`(S`6DRXpeUc5?IT(9kR(RZg&qDWnpI7Gs~)&e!cb&eYvrBkp|G>F zbjRYl`rdBZEzw)pK2W#qWlr})&e3%^Qt*6QmGX`)i~gJCWPLeN>ofZYI862~Inq_A zvom}V1r{wq`{X6>^Ld@GG@BA+VL=}U*j}SETn?epV)6JpwO^)PTZBXzy^cD6fSe$b z=QuZy%!($aq<-n9u{u7S$3@zx`t;D}OvoAKr0SraMZ-Jbccus3X&?<{$%maU3Dlg< z*&6atou;kW)v>91j#a$SBnt*4qU1as{9i3ag(0d-f>Y0q2BZ7SnF8rPB6slxoslUS zfwbAJu9!N%r`~y>!VQg-HAV}}nLK45o$H<-pr~%td5ygV?VBE1M#k(W%cVHi{VE00 zLqqFCa%svxQcR0SH%W8t83D5YrpCqkNTd+z^#p5aK70HnQa~F$bW}@>4!Wj5Z6jH2 zFqB-|IA4lYTJ3=h#v65}Xa&?z1yE2;%k!}tKMrVaITBW6=H-o~* z>n~y{vopkuM4iV=!IWAX@KXGdzT{ArMolr_j-b$@W^Y#X+=g(IKQB7SYG<@tPiyns z=D-4J*OP}d*Ot`={6@=`-Ur@VM?)ls%%}~t>fSXfbphF`L^;FVQn5_YV{$;*^n%(} z*<&eeWGD5Bt5=m4nIY$1PGpk)qz3xRsAADzl^?vQ~0DyEc$SpJZ72%W4;88>8%@ zRMA4ZTO{2Glfszdu_>HqOjDh2rb|Ybom8X3$&@?+`sXCRQs}yAq!+_eLt)PZTVxgo z14!aS5LRk*3xunI1peXg7v;6JK93oGD@AYjmN7GTI-SWFvsOipS5(q&TwHCkevp%{JypHsuJ-lA3d=bM4tpv^JV{9_TS8;a{%js8ii+84=n*%f1+&KAN+^ezYczLl7)HPD$B9gSxX>`hom8$1Sx3(OyESZY#4@r|C8s6qr zhipAb&PZBk1vD{y!E!51H)zd-;>=t+84K7Motu_ZSTbte1>0P!-y_lkkt>Twa$KWn z8qA1<8oN~{0qxE?M_!@dwB2l@fp0{Dj;gwVU-yAq=}SjzW0QxgOwxmH1?%^=5>GQH zSgE|Xb$P6%MPF{<>Wn_R!DbzeI_IZ{*ti0iE-%o%_D!DPTwhQh6KU((JqVHOF`2Tg zRrAxqI@YBZS?VEXzEmJfN$tX@aYoXB`!8qc$Sznsj+ zx`3w-n-QhbGt)(0L-noE=u(#M1fyF@+WwL+HP`0LBF34|2^1-QJKN#<^|fq^v6o=2ud%pTnI}sE-tz2TdDV@nG+3*>dEJP2~zlwZ7oV~@Q3`4(=NBM zwlP3H$$L1iKxEMu$`u5vD%P1A>S+f#F4x#c+SvdvwU%_&MVBc#`=cNhYFN-i{0PQ zDYAqvkc~HuY+_+#e!E6pX=ILdZOk}*baf%cp39EsJ0~0^ZDbXPik-+Fam`&W( ziE)F0E@q-;S(k~86QY=Tx<62_DEjbJM9s5{Gb`_rA|cczY8kFm${h#MjiS7$MVdln zOHypf21njGYNdVtdKPh^+i@XBif*?i z#V$uOD0D0*M=sK?C;DaE^%{)Vl{!M`c{{zS#krM*$$Vcx?a#ZG}NN7(!T6-O78 z5heHc*a3mMI?`gGuH1U5wA;DNu+U0$y1ScR;4*6g$~@UXqsng|i)4E?mrCq$T5oc7 zBHs!ys^t{qv~no|I2y+G8rLvdI2gVZE37qKH!S1yR?d=;ufb?$`4^?l^)2GA5Z$>+ zzLjcC>cT#6ZgY@aQTiZATWlu@j^kAzS8-b9Mhd$@Wo%ZquJ%hu4qMSGt&NsN@#Ihb za3G!3NF~dA~ut7~xi)+G(8#5n2;c~*GzZ3&6?73|cgCR(~t zoslXWXQ{6?wuurWL!LOB&NGvTSl6sb3*P?;MOzJEGd*c zlKt1U?4&08wTcUy}nB7V53?j*qhU?QJOUap;>6oMuC-*rxLO9bT>FZRJ;|%8< z)4P@J2&9UwW3WpkjF^;*JhC^sG}5^rxn7}T5xLegpW7RZe$(8@9J#NChEpBAlI6}& zdTBTSM1C#nG;3Cljnn-BI<*xzUS(Z|(&to$a|UN&sKOaw_83b3Z?^1o$4RMk8EL&o z^=&vdAkr=2*nsFpj^iNI)8SkkS@l6R3daGWFp-wAW9g<=jG zBJW{0QgqX#(d&qg5Z9)BwT>iUc8ziq4L5N)l9ScGaJ-=vY6A-#$IRKJiyX6( z$QHFWTQy@YV+Cm%)H>pm-U+I*QX_EegHe)%e66)~dC?2TX%HgUClTc_#!W`%ZBx>f zs!K8wmT`Bbl!5zoIL9vOPN3=!W(YvVawKI)OX$T8n?b9 zInloTD$`uOzVTMK3ewCFiGpBrC_|FHaezbm5`MCMlVYH*V>6xNWFx603Y}N#lb}!v z4X++0Cy{K|+BcSOcGDeJEJLf9)_o-$46_~DUFPXH(1oDUtw&5p`udCrD*cP)jJfGD zS8nDd=}4(qWK1uRN@JZnydYq7u}MLdA+wyk|^?x?&HRww^gccyq(dHoMwb`6E)rTEsZ65(&`Rn)9u4g$4{9tlU$0b zjx@w0*UzX-hqCqT=n6q6Bh0MU7)?F1BC1R896euWFm=>0_5Ov7K0mb-4;hB#w+N#F}fmh^|aZ2rY``=`PKK3p>EkT9lb{Pb>u8IF4weQ zOJsSHyyHwYP8}orCRkcUzCz<2b(SuR%C&E9ndQ_oj=%YV4fa}^ZN!67O^FHA2Gh8* zKpl9IR3T^!OvHsU%t_ta zl#Q_FH+foGIOkH$`9==|Gxf!4v-h~O{y+t3-`rih+tunv3FXU04T?hDyW^-<<(lCX zI$YDg;;5mqj`xVVyWe<;G2bk6 zQ{^Sn@1?WDbbHDu$cxNr$z{w0Z=Gz}j#=_P5&`kXXQ~*;Z2-xRc72Xx+^w(Aa#2$P zb*G8*gB!85TtlhyWM7D7MNHN4;SRl0#wAc;c()g|0uEY~d+?*zY6l#=XifV? ztpIz`G`(ru^lhv&KYG$$EJwo4OoEKYShyR3I-8Q(N92wnD~_%6owwkAExMeIR)9Te zM4CNmoT9lDCb~r>J;OnLW{{wX6=$fKt;o4j5Iv{0*_^KBu(Q2V6Emv5?plfsDg8VP zizBnF%XoI$Okkca%(_}5kt&^CcA70N&iS|&Za!-)vI;($2-3f7)Tea~pv$~g-BD>( z-lG~HMhs-@zQyR9H3}VJ`a=VD|4NQy4rL9IB-c>vlGf^KwzoQ!-YuWpb)e6bJbQ{# zKzCs2nqIyfNX8hLz5K@2Zx%OOiX3v@OHMm?V_kYASXZ*sBGb|?%!o|td@__^-8abk zbPbY(6V|t0nFTplD7fBDi1V{-9l3%aALUCihx(HQnbw3T(aY%F~ne7OXY)0<$E40ImnrS2HV&#rVxdDafqpp)} z^zyPDE%c4_*L}9JLGJg?us(5Uy;*LEW|(e2Q;(FYgl>su(%Tl453teXH0~|!X!e38 zShbWqYeXk!`#eb|xX!HfvKD)s&Qd-fDKE0-x}{|0vMWVsq7IgQEs;-o@o`W}ff^r4 zk;1{KfXcmfyf+7@+2>EQ@6*=h2y5lG0sRRC3I|VpGo2TW-cY>J+m~bvs95+@%u&59 z#m)(`b0Z}iH_TN2n|ac2pzHj}=0bAQX}&qm#bV=*R*toNE+@yRpLppm3Dbgrrq@H62TFV$$CCtF+>P?zi=P_THZA?n}tD{s_mJw>8Pl20sh76J>>c9ahdbZA< zgT8cg!fJpe>1pz{ra+F2aHc&gJL(H6(9A9^(WHMjw z{4kmd@_rd+2+7B|h*@a|=t?hnfYG{$x}ce7E@WIbvR`YhOrH(1_H42ME=kEJL+nmB zwn^tMq<8&sRkkT=dQ`uILi!&RGQe7kmHMnDtm(*F>=1h`cF2H(UN!KbkUe+TT8D?FJq;5;#t3nk4DpM zCMoOKU=bZaK_u((%O`C0kbF^6X9^RtT0K!5YOUv+AGj`W-|ebTK51fPSo0*JB!Gpb z0MaF@m7iI4UKX~ilr?h2EyodHp9MMG&Py{#cN^K|SiEtMZ?gHZCCT^G%yDX8a&^m^ zC1-P2YAK{#AF)P@gDVa4L~g4;q^qNWZw080({-6LqrJx#b1_(&A#yjXeV1pbBrJVf z)+J2q6V}!-h?;2zmbMF&Vn#QgbvwD+q33d572_2TF^rjOkG`T+Cg-J@P^=cl@lLf_c- z@0ZiLvU9mEhhCp^k@BcTjBDLdag`qS^1<6?3R|zffu&MfTDqJfaa)%~rMY9>&?0-C z$~{=}NqhOmiSVMls42TaThK1ps(^*EqG!}lJ!-cKs~F6UUXuEum#b;{(V=!7xZTnm z*BUNaCvvve2R$u(2Swz7nd8Rx=SVm^O9ef$%NY`=&p2s@OJjo%zR3q2C{U%vL+0S& z(~?pyWiUrRyw6!;G%7kr0<0zg{qR;#lfLi7@hLie1=0KzT_{@;jalWkGP^jPkW%zB z6?l%yLN>-qTkTrz`H?x&(z5TjWFxIt7)4cIpn>iW1`GLmR0iY)4^smqS! zMP^YCvfr&#g82d*vpQj4%9Y^k@(y&BZ*iATp>lf3((g5_dqM~Z#<&_t#= zbT?3msrN5xX<+a49QT3-YR1N=T&*B|e(0|yNhOGa)~r3U(T*w3Y4R!3gn1>O2sAzy zW7l=;0Q<@y?`ke%C-xjGNPh~HG%4LH_HstSN2M@C+#4@#_gTbrE8OfwuwQCb<%1!j z(|DC}O2n+1FMsv^Zq9fH=>6vq%_KBnb-Jee6 zkRgzf$r&O?q_};6TYt?EXA+;DJ!l@cym{u992D%h+q3MTne|-#q3R=zjexh)L9=zA zCfSy8mm9Ggy?th5Jef?`{Lp7DF>Qa6mm5_Y+f9eq1jl?w0yS?TcFy+JR5&N)V`2Jh znv6<-bwfhb8g;EwYBRdbA&&`js3UJ^+ua)VkQ0eG!)JIt%zq?o;%Qt8P-*Gj;#)- z6rsw|{ua5&Ojaa)fVp;voPyR$NYHvEkvkLY_q8_A>RhrG<;FOsEV z5`0i^MkqSe%wIwsc?-GXovz=?zTrNUDj$5NhR($dYRsAD1!(e7E89CLB$P6r+YKer zd-w{kq)5VyneX?cLiY}G8!NYFMct$6>|!oxKZi`sxNc(1pu^B{6*||B53Bo05?J;E zX;em1mcBwPU)yCDY3}*DZzPnWL(Ped_8&z}HF{!9WV{EVr}y}`8$QFY7!{+` zv8cyLzPNBMR^tmK^S#NZVl8$-ejD*k`GUKSe6Kss#a8s;TdR*j2rKatcHnmmj`yU0!A`)BbI_M!{F5Yd9`@jP~;jPG_Fi6B6;&r(AX4G!D2&eNq09WA&e2lMf zBj2w2IzGZ)Wbw_v6L36g5QKai@ZYc*JFp+seCO;LI0v`m4J7dUc&8x*`3=2dzN>OR zHXxDT!t00GxC&3;8w}!Gk!NBFR^d^+hs*h<+R1#s?;7kt72lM*0PkZ6-#j}VXW-1b>=s8a1-7^To;ZjYVaa{z=#TdvjA7)Db!Z-yMbMm zIsuDt7hc4VDDK94;v#&5s_wie&cmbl5V1Y@#&RsgYV5=ZJqfQ;svO54jGNG>mr^%F zzMp(K{)tjIzg3B+@GXY)VV`h2HeqC6e&Y}~;SKCSbwAP?UdK-u+Mo1~yRi{FP&I(> zZbmDv!WQ%$NE*laScmtJG>F$@8qUS-_&atYZ!qtV*|-`HU^`-l5U;o!gNG7_I2SJ> zb{Odt_h9^RjxT16;Jn3+cnK*Z`7V5XgHDH$9jlag`@eMKQv$^UPjy) zzRMq8+=6e>Wh~3!bKE|TeZVK^HJ1h* z`|%Z~o}$z>*o3b!>QtujGJZnEEWSM)&)_>uIgMWh!R7cEeP@$T;SH3};ke;8e24KK zwh>q0HS9!w4Zn|zQj}vRnz0nuV-4O!k(XsK183o7>_CTF;u=2OjN7mgU!v4Uy2pt) z6R~x~FV4lcD6FTf!bNx-pCGk?eLxsj;T7ygRU_#DFJm`GpU!&mInw5mR#1amu?62_ zh@bosSK(T`jd$?@_93f@vI`^8h&%8iGMdT%a4fvI7?R7Mg~?BFEuP0q_zYF8>>Dn_dq|s4v4-)u5U*ngE?Yp} zfNl#pU9cHBi#R{7J%jDV9+aG^)ZwVZPdIl8@r_YuQAXlcY(ej{ z+2%HuKZp24?s=SV=d-;_sZ(5ti}+bc%(+6T#aA-Cir-qjnsuVbHPk`y9KOTUYZ*t@ zb)+kte!Ws};fxzdYxo`+HC(R^e^LY-GDJ1~ai7tMDFTH<9k)!-cpO_u^Yr{e$`?PR2P{i;s}InfJxXI1^9d zUnqWxJOFLD2dnWm;-BUmz|Gi=lxL_n;4-YiHcWh${0)!c8{|F5Zzy9v*5XZkgOn|l zshEI9T#ZjL^?C9E{D8a{s6*f)Jc@5n^diR$_u?a{mxvR1(TauGh<{<+%WMl?L&__> z7tY5$cox55;H%Uda1L(3^Vo}CuW?@DN^C~f>%1O8JcjQvek*w!?!s5-@CI>>Q?MNC z5%VVJAQs_rJc+mPCwjldd5r7v5lY`CUxgP}VIAJYUW|N)V}x~h4!@vc8|6Drz&W@J zd(iYQ%i=CPgWbq}k1`NpJc>8b^?mX!%)wSc52=AijKZysl;W})@ zw`OyoP<4`5oyF z-(bM^Y$MLZTD*l{Q1Sz18;-&GSdX+_tP6guz$T>ZraZxQco1(u{YV`JH{uq&kFuXg zH>ktkkoq(AJlu!9=>H4r#Pf*VL%P9oe2T8WQqJQ>JdcrkIp=UMDt{xb;BwrLcTlj8 z?L;%K!Dj41kNuPfxE{~rTO|EXI8Md+ScO;cJ0}0Zd*WH_#;JdDZsQ@O9pLx1aV4I{ zwu@*L;}>Md z@!Qg9!oBz!sqrq=8DlUHci?^GbabiHa1Q>4=kO7JLEi+IYQQu27Oq5>8i3hYiVb)R zKcbZ1*&dA3(1ts(8DAhi*`>-b8g;lB4`UnhQrKUdh8A3k^>_*U(IeHRYH&Ut#&K!< zb~d&mHJ#VublisZ*ohwemiLKRh9_`DChv=Du@PUQB+I2HVIJPWepF|>R5NbEZ|Il9 zHsN`^gl|xk>rzK!3D)9$#Ly9QJeJ`*9GCB+olo4LLjm!KW^BhF=wIkk^YH|><4-(V zL>wcznC->WP$e#YZ=QAIV%&~@qOg?X01vLfKd=vXmbug?_yfZ`xs(^z;7NRh{TN*C zQq$0k%W*4SLdVYR6K=;A>_N{iF0~MU!^h}b;ZoP*Ym`*F)G4?hd+!|2UT;eLchjaUpwvpYRILBGI0}}=?j@M8%kZr|f_#Ey* zE_E^t&j%~-?_zZ>PU1~DsV+CG;nn2n? z5UcSXcB4l%+k$iPEOsO1aF^SP{(opBY-RM6n3L{8pjtG;z=Ap*Xb@b3ooO?@f;_#;7NRr zfhQ1ecn9Stx>Oje@D-9z;+SAAuEAs2i}V?s2RI*V@Go?lNxqHgI3GI^b28}`)3F}k zA@daSAgseS%sSPjUO?O|_7k_@5$wg0r@53L8M8SbF$1^YCG?m>-ib%>I|g`M>TJxe zA#Hj&cknE7YRNCK8v8NUM>@qfIJwTH?nB3Vjx8R?9-P=fI!AmXafPRla5~H5I_$-O zx#VHE0FPlOO8smT9>uSyY~q~7W!QjkP}uBJQ*aix;un+#$d7O_p2UxEx3Dj`2XCR% zJn|ykge<{ZUYB}41Haw5y%cvV+6<)>2 z%PF5QWd&*W3ev%qtPk&9#d}^&K6nks`daEI*KvMaPx?pV4V)La3^6y7x1-BVlyP_+ zXWUF2;!_mgLOFtya1VaO*jp*<@CA;(jeG$=q1Q^*i7nWJ-nWzY;8A>X2kW|%bd6Qm zi@tv&4Pzxdcd;J)iL>u^sRwcOD&hifBmW+b1y&*DUeXxi?<3x@1-)0ZF1&=F?{}#a z9w2=p?m^N#{8;f2c^&p)#2St(o<`ML_6M(F_&U-h;vOb`k^VoZJMZ|Yimvew+fWct zP()B%!59!WBs3`kVkjb2n(gtyO|l7rG_wg1-Pn8Yy?~0nf{F?v3L=P76hskGL=Y@U zk)pKU_srb6yPE*w`#jGdzxVTgA478Q+?g|H&N*{t?wvQVq3{7*^Co%#_1_{Mf;Hfq zMjyafsPlFWbv|4Q3*g9i@D)%7w?N=s`VpqWNz<{ZFav&vIx}cLtb*3>F-Bnmtbnwc z$Q71AllQ4R^oQ%z^m{x^k0BY zhQmI^F2G#)4cdQ(yx>8Y2yelB_#U=Gt%Wtz>983ZE@Iq4v&HB)M3*2xm;tMx#^?A5 z7z^*hchKz%#vUw&!cVH_FUQWNk>#znc_=^67l&^^$ z;WhXTPFaC{f#+c}WPF3IgXz$AC3S>-koGNo53j=(=(mbk3%-E*-(ibkHf)68_xK37 z6dr`Hq4sKGSoj`V{(udHU*Nbk=nY&0FT-+Z_9He2a^W6W21l>0p*q0?cmwvqP3wq< zU;)(oiN1wgxEYp0o%QGqya<0l`wfgwSP3WmOdRtIu_4?ENBoLih1cMO-|*28+K7Ci z#_!l7cnm&*zu?48#K*7`I&7wIx1c|8o&0wz_7qk_lRxMO_zhCGArp8VHo>{uX&>x_ zlpWYG_z=$8i7kN*aOR)rDZC6nLG8b=Sx^XbA+(En!%Z*+RzSVoj18!SZ=mTObR1^G zDSNR=@FN_vkKe-tcmdW!FOHGB4Q9bQ=uyL?E`=9i1sv`1s19%$OoZ964|;n&Y7E>3 zAHXW``8=vMTnrCE(C<;h;AvO~hXp*U9b5s=!#B{drbms0dtnvSKg^?s!wfk3aF4nK zR>EnuJgOY#!>P4B>V7!s2#>lO)QfKkj7HU=fk)j6UqRoaJ?a@)2qzxnQ6(@1c0;5g?T2-6{IMR@8A@Oh zEP~x|TqBP<7jA@QF!VT&x(S|xub^II+6E)xR+tOkCLWam7r`U27<|WjR8Po*N8n>P z>IANV+o5h#k2(jQg;fwZk+#Dt;0trn$?CtTkSU1?9-;Mfl6 z7Wg`%!w@*lqrxx&Zin}wRVR-c1#d%(&K{KwufaxW&;>ce4e$f(fX-b#>PnafN1pCs zot9_96j%%Ox*cA9V~!7a9&@Jx(8;!7C7=O#tKY>rBJ6I zdJd)VC8)E}Zx{@B!+h8RC--MO!maQn96Nx2!~NhL=uuIqImn}8(0H&%<-;7<3#Sg@ zT6i25z%l1AK41y_1;-BMd2kI(h6S(=0>jXM_&3}KAHWW1I^3gj;Z0~T0y)4-@EsgJ z(xcMh5_lBWL(Oxs1uze4oJT*x4e$$u&ZjK63j!A~Mqx2@ywIb@!8~a2FWLi7!%i4_ z5&93$!)M_6H_wDZXgmtNhhb0&Gob#(9yJ6Whcm+18dwb{XVETL1L16sdI2`WzjLS$ z%!R`v9+d{up?)s%hA*H-9(9BWOoTbG7HW^CF7OR510f?pl%G? z0#Cq7IIfcCz;xIL-7ZJRVJXL6a*z>U>xS^{&GI!Di@nHM$OUuJNb}=y9z_ zy$fsM!RyciIQ4qu0)Il=8|ZU*2I}94yx~>Y0v&F`m%;`ZaWi@jo8b6cu-`E7R{SZP zc^kF{Vz*<*?!f-S*>_S`_zueN;=gd(-Pjj6{2pWtN8L-^AnQKl2umU3eq;y_z#Lcy ztsbC%;bu7dLG%}PK+Z#qb2$EC{3c9?nvd`tcm)oB6n_r)z;fvI82$i0ge_42aju1I zxC)+yt*{-Acmml$Inl)yW%3EDox_=aa-1GIhCqy7cY!3Icwj{b+IU=7rHp1(og7wCUj z0$ZW|i^vlu!ZwJ!P|sMa3jozzaTOdKLC56-5azIj(QVaf(bAYX2bBes54B3&2aKGu7Ps+6l%YX zyx|_04O^i3J3Je1hZWG`U3?sT1r4U7-|!affV3Id1egcApy)m9HSB~TGto8p2-ZNx z`ySST@q4fo&Ygu`zO+O$`fm>lY?18Qyq8qRXJhN#l6u@J!47_vjXYdGo18EHFP-UT z=mES5M}E)PfqUT#Xt0{`1&_c}KjPnEADq0F_QFf>8yvR|*}-FQ_)m;^D1s+pA+V^W z`oXJo*dz{wuZ(E`?7a@EhY4E`a;tEeLEx&tV~Wey41>3YJ02Ci)Iufdx== zGkp)a@D%(2CvG8@gE!$fsJE48KpDIUOF{j?xPj5|JbVfMZO9Rxhc96dbl;9#VJ7T` zQ+MG1;TCumy6>cq;614KC$R?H3p?Przi2Bw3(FyW7cnrr4BtSF-Hbt~fFGd69_j<{ zz+M=+m$42D;4jG7N58}K@F5(byz1ZZIi%O{sv>wCJ_4V|tL}k?P}}QeZ?IQA3hSV! z&#Nwj&q4XUDi^lGO#!dk3@6s~s%h{8?1FZOc~yUy0Fz-eG(X&{vfzIB7EZ3^RhPia zuoH@Ed(}(eJA$8K7VL!+>v+{#7+aTTz*acEo>!d%H^Gar0R|lDRpsy`9DS5m^?*C! zYdF5XSLMSbm<#>}UX=z#FcFr+;YWMbAb1Y8Lhoa^9}a8iRT*$GybMdB&aqwT8%~sSOvfx(u4o+(ARp-J|Xb?g_U`ZRV`UM)L zdsRQU9bSR8a9jp`1eNdf*y)Dh3nx{sN0!#z%wwpE4l+GobFYl;4P@t4LQLmxDDQh zeGu!8Zoo3w3+MLmsxP7W8R#Os55GeFo?bNsrot}hdM0`RuR+aBuNn+@!Adx-msi~a z^Wo^;v>EP!z0kLhS6vBp`%)kH7EU;evfy{f?dMgGz$R#NHZp{3U<yP;0nXrNV{qID7`JMtD^iCc-*MAL&)^Lfdn_Dhe}UGjuu+{f5WjnDdbb+zLNJ zoeQulPzra!r?3}d7t%*?>c6Nryacr`LMI>(X2VURyy}LFv13_Yb!`rM3X>w(fLyP7 z8ZOJDeWMwZa3}l#J@a`Md<+|*)foC29)lIodMr8ucSDx~#y}y@gKwcr5zm1y;hbXn z1>S?arRWt*hQltSFCY)@fVpsF%&W5D0r(PXR-%7!Bg_Wh zFptEp2+JnU8H!3t>n2sR5Q!+PlSs8>A%+o8{6 z*id)})T;d3}*BDw>UU^$#L37Z4+p!SpW8@vYfoQzX82Azc4VLCK@7JY?hU;%7{=Fef{U;;c1KS1l}DH|?>Dd2shIRKCpbVIK^Ag*HNuSCI$24||}|Yv>Hz4sXF95PBV(1JmFK zs51qx(oF_!dJlyaO_<48)m>R z2!4zX!zMUxo>z^52jLyq1E%e?AhcnH3PnqN`|jDqK26{zLtI=m0+E7}H2VFv`iMjr4a`~@j1&}VoE zyx(BUU^tY+4j8hMeuOn}&9{s%SP93h@~UBQGfanXAmuys6fTF!kn%k-8{7$>!!BsO z8d=?;ci$AFaAXA0MD<-e{A47 zSPvb4M$Rw`_CVWT=u3DEj{BAP1}4B{IQ2KiAxwaoup7?ZNT0%+umUzgr{5WC@CEoc zQD3+Prod`Ac{92NSHnW6v4wiT#qbPlg5$TMA8;ScfM1~IA71r6G~R~Yg*i}TJADC_ z@Dlt1U3Q=w@HL#UlW_z;Lc>3iA=LegzJs^nJ7~HK9}4fmvAezMEVu-og-y_85AhmY z0nfl{ICU@M08;j0L!rO&sfS?)w6Eb)qv0X=8iF35>IXN#2B_=xsqXLq{0X=Cd@A7g zsWSKi8U=jnVpsq*Yx-0N7z4B5gu{HSefiWiumYML?o&6xbFd4B*7B)NwSBy6=~F}C zc9;ik>-ba=EU4>aoy(_2z;j3W*i=sWPzHxJ@Ts?;^U*#PgE^3K4EMpukk!zqrowhO z_gJ5L7Jh_`Mm}{F%mMFlJ{5+UP@}O=-3AMxdlR1;45ASdWkPr)+ic@lMp=U_dY)Xb;;1<%8B@Hh9V2z&~mlerfr z!e>w`#iz0%^%U9yYoST1PxXga;m8&~)fdX)2bkT`r@lPZr`~PlQ$NEQt$pfhm;?*K z7xJlYPzg)mm^A(kSHW~RtBp@x2h(949GUJ@y`d0xLNLRp2EmQ+1$1hQ9N;nd0M^6t z?YIXngQww7=+xe)UWBtdP!_C!J&@6nf5SVl6PlgoQ@QX3Y=!qa^Lq$&p*&aub$DU; z4EP9!olZO9=59W9Vs~@{PVeDUpFrsuKITF`H3iN-6B)z9umldz^r=j^4Q9b!Xx|Gx zg2!PEoYdE+2KVQlfj)KG5cG1WPhB|z-8>I{y}+kZF7l}hU^00B?NiNR9J~sLjq>r1 z8?u30VH>o%*rzVZLho{rYaZhVu7YRaD>!U4a)L4N415QN<)i!X4EzXn#~?Gvg^92S zI*p~?(4xSndO!}Wg2shD_NDpQmqwq$HfUW;ogoI(U^DbB@u_@x0=$)sS)n z&xf1g3#fM^x&(K?H24l`-h?fL2VgdAfRvl*AGikIgw;^@7TOEN@G5MCA-7_?VJ@iK zu!m3qPry_t4@_bOm04jgWSiPmP7AU?tSO8(n~h;8zIT zgAT)auoArYVhi9INVyML!mF?k+T4#1fW=V%0b~QeL8AxJS!n(ceG3o6EU5XgPi=xT z9^rYg1^PdVJ%!bf`51D7-=W{*$O{@iL4Dyx_zL_J@gZE_jMD2-{)M(~MQv0LM+{-*6QyhZfIpFT4fbXVE*j4W_{_(C#_Pg}KoBdBz*e zgf=hGMyUNF{SUXnOxOl(UqW}`HTV@SeA%ZSg&&~o74Cx*UgbV`2)+gXYuFp`zmDC8 zH()OepMuSUR#O?v@CbYkjo;vYm;_tlq&FE?a231=KS9s8xE^N0AJB3dvWNR&Jv4aR zrwZUv_#K+RgDhY&?1ppRWo*N@(0Dp}2$N@^^Y77Tun-!}q^rQ7lRu^2&}Sib117=}sIdr}3#ITftbs<0DHq;`Z(-mP{tXYq$8hB5KGh4Z zgW6yC)R{0Ec0oK-XUw&#(zXzhci|3VaWT|3=@#7@{iT;84@GI2c%zbbH+z6k+Z;-l$f5Qgou$7--1swJVb%m|aW*c>Y zsW2C|LeK5k6u1Qz!ggq|13iRu;ZE2NhwVg1U@T0A#c=$e_&#_N-iH}NjzK7%YV1wZ;*aN-xGA`glXt0mIgBRg5I70bV7pQ=DU^{fL;aC5H zOW}5S4Zeih9=~c0lVL9Synb~yTmmn^0`U9%stfv1(ywlXZQwu3ubRVIun1b$=NS-f;8!Oe?N_7WrG|cW)3JWlw2@!s!PBq~>K*4- zy`TVYglFLwIJU8$Z->x+co=5FZfM`cuP%enp~3NfH549$SKv!%c7k918?J{>pk`C* z1Enwz4m%MU!d0*Uet;d29z<4<57)r+Fb`IN=OknWr@Lv z!l$qsI<};pFb#aC`c*6F1=qDgM(_sgfDWzw>OzT{VHh%RP zl%*pJsFOiE;AU71_1n^Z7zcCVS2(JjU)>FBp?Q11Y6m0X8~77W>A*d3GfaUV9gzXN z4|PtX4bT@x!=3OmwCm(o7r;2U2`0fiP^&Y23%9}xumL7@LARlPSHC(HE`}Rn5v+px zr!%hLLYNL;cjI^6DF>d0Er|It@hs;c|Ee&K`sehhT%wK`ul6>hxi>4_=4gq3Lk0g$H2{ zd=7iyxDkHVKj;_mjP$D^a0%QEpTP#GaW4G@BjKI%kuU6q<`?)?Z-~L&un>;95IX`R zVLaRqi{LkC`7gi9g4^IL*a8hN@~i&v5_|)j!4vkYM%mOQ;#XJZq3ij6_3l{orHFD$ zcvjS}4j)H7Ctz1$=cVXdCC|PByLuI4|5{{p9oO85+;8%$jW<*7t;pau#srLmy>P+p ze)S$SxC1?fHPHM{`Vg}2LQn6eE$|xDyvMHw!!0lij=qfV3Jo7bU*QUP0A7Wi(C{I@%78&I3NDA6VG4W)=RAz9 zgh}uT`~)Ka)5le7hZu)(DXU{4V1$wsQtWOwS|k|GuR1jUO;zXE~ppzH(U)f zU?-gX5^{vs;43)(W%>>-g_qz5sPzih!IQ8YYQ0MPp#UbratOYLZo#AQHSB@2U#A?H z0w+#kOhVhKj2)N+-@q|%&^EXq-hkkn^bLFre?jfH&_~FFG1L6&WmpI)Z!?Bq49tP1 z?;t<87goS8FzsFX3))ZT8899mgpIHhj-G+Nf-7JP)OwFG1?RxcFa;KYXC`9^E{An+ z%KP*gTnTT%O6W8TIm2u?{R4ChX23q^_#yR#J75tso{gNK?HuX^zr(Q~QD0DV{b~d( zhK!Gi8K8I`eh})+N3O8p6aKaUTMS2hN`FBGyaE$HgN5|ZBEJeOrVUWAgnB{U&+#R& z3Tl7BxP@n7Jq%t-o1xw^#v{xC|Ci`LyaBu6!sWzu@Ex@I3i}UpVH;$8jcnmdXu86$ z?t`_^>KptN+yt|s-b(y6EP^Ax<+)G>i=g%@#sORki(m(|{*LhvH^6*2?0cRK55p=r zX*FfQ9q=g}`vdZV$?yf7xQ6i$7r}Jc0Rw-eu4}PLunJCC$1~wWsQ(k=6F!En>(LF^ z1f4f9mf`T9@!#+iY=N|2h;d*xcz*S(T(}d|Z^R2Q8m7PoIASAu0{JilzJ#*hDGQcC zlTFxYxMwr>z-Mse7IYV;!(KRVE3$<@q5mI@BUlZQZL}9MwiBDeVraXAaRuMPQ9F4y zya+$RiGLC!!gYT!?qL_S-G$EV#y-F@XuJp6L+!oTB{*gu;{~39b9mV)zQq6r-7W}(afIUM2 zwH8io9Z*HE90r5}>LRF{MjfFL7QiuWs5fM$2h^`{Nk%};hF#F1EwY5^a8bK}Duqv= zWqbM==D{vFvqM0Y!(>C_)e;3-%RZMp^2`EVC3gqqz0#K{45E?foA z!&gwFM?ejM`=G`d^gY}Mb0Mu~Kuv=mq0O1d7S=;)CUS?pFsN6696HxPpf~r!9q=t= z^a-di+yHZ+d0%7#w}bDjfEo?c;pl$!DSQPr&!&FxI{XPe`v=r`m<`PapnI?s>JCK4 za09#!E1>lt>JGm{ufYNJ1pEMvhR~0&5}KY9P}wjQHo}ENkp=t=?S>&+mQht*I&hc>|^*bO5h=pyWZR=EKcg|DGb z9y$#d!MjjnbUFhSR7CVa2-4Yi(oyRUcwlJMwd_~1WOq!umtKwks~xO3#ei61k8ZX;jnV_ z62`+!*aP(eSzs7vW%IQ%l~9?XFwV*%9{u7?j` zA9So_Ou+~6@5`|zS1^8|-IbIN*TSbz_bPq|d9W0kU5$-{?$=NryZ~FE-?fZ+cow!n z>2RlgDUqY)J82_*fM%~D>Amb*U3#*~~&FCl01Me;91WbibVGXpoHJ~mBbsM%1 zu7YLI@^)-8Y=ZuGpkMG6oN^~+z~!(En%;%4f_vdBIOcA~C|nLpAmtu>7yJc9_abZP zcprKKYoXiy0p5v6uVEhyeE@xhU!c*0_)_=^YCeQMz<1F0VfqDTLYGJA4_FE(KZ>5h zd(iVSbPc9L&Bu`&d<|(&&@Pw?-$0Xz^cTDgOQGSUfVvnSfZcG~le8Bq;AQv?Iz5H% z!23|+Y3voe3d>;&q)f)nKn2_f@54Gc<{A7Id;m3{4X7(&F&z6GV-enkCeI@Sco}|! z;uokFWW0zig61z_D`6R&_A>nivtSFfdIg^Xzd);3kt@6eU&B$aQ7-%rm%q+*ApAt*J<)6`xh1eOm2R?=Ni;x+-4lNhc4)_+1UP7JWFBtVXdIxKv$roG$d*QgH z^d(fl(Bdm>COinAK&`I>sxw>%li_>V0l^j65V#he zg@v#gI)6hO;eL1z)Kf%$z zW3yl$bl$}M&|ouq1kb^2_yrnlK@M;Qd;qn!VtZf@r2m2afE!>k__m?*@DwbBeb9M3 zwg3hV>sK_6FZ9bnt*OyqN_sFq8szk>pd9rQEGrIXMS}7{_Tc#ZveChCk(^~5$u2Yh zbWcwX7ISWQX|QZG7mOOki3;4ze>g^fv!Ek6D!+(N>F4JJQ!^hkpe7j_B7*Ic3PXH9Llxr5>X=CcW5^W6XT`KIoi=7?Xnvia9Hw!v3A(Yn)?f zs6)HdpgpXhvaRhX4(cl&PsQ_*Y0w^Y5VR*xs8l&xU{qOZFj}6=K{#4|&eNh&L^zn) zs#V*ppmQLul7ps$`tYi7LC~CM613+spdU_;#W<9LZYXrlp9snc4B@ik(rBevEQYSl1%wPja@8)=pC?PJSp=aPBJT(v{T`4h6<4MhdfdP^^gaMN7)FIGWM^%8uqB28^5c z^LwX4GJI*9vDh53TU;7T{!9Esz?d~6Jdk4o<*ou09*dHaA+i&z&d}ueIQg5jnvTSl zhkhTrf$T4z8UH++fqEu9do~eO*mLJb@jiIYN#igpssQyPiqP0_OqBfJ( z-R9@wO=*z~2b^(v32vc`A;Rwpb4O=m$}mKQW%83g;u+Cm4Wo*oSnl{MEwc#jBPYn| zHJDr@!mJ2RjWIQjqZV^o>vHT-YvSpH_$#Ln8Q|JSG3V__e`-`4gRxAaO*DT|igE1stE1Q-Lv(qwX%15J((*nMl_aUc#M8 zHMDIuc1WCTSw2TjX-y{JxTHKEn}M~qV>mUXZK}G^In=oHA{oYVUJgfcF$NrGE`F0g z$Uw}ci%=!4q#P#1;lDIJuHtwx_ms6@s&im_kXmqis&%)H)1dLCD5->f9DQkegdu|( z**3(?b9C&Yy)BK_2S(dJ7zdLY*O^FV+k;iWwqFkgtpetO$~Z(F!phjdW?*^5i;HjI znYoxhT!KE+DXOgO5Q$r&Gs>ioxZ4IqT#-}EI1s(DSI98b&$i7J%M-L`y_I4V_iH|A zne{~prJ?-jAW@he1A`elnFE~G@Q}F7F%7ntau7EHKtYUw_}j~MXX{wmi0Qxu%augp zx_K(`fJYIFdmxu~6$CvJEG7h?qeht%p@LbQ)jL*VB5a8GD&it^R*t1o$n$`=9pcq` zRPki}lz42FY9F5}Pv&3Fb*avUwzJ}%)eVlVvp}NKc$%sY{nTwPrrt9A4syGrRL;4A z)@t`Wv!9l;s`C5HAtRE1-)9I>5r#usyeb!S8fS@P&|3%jDU690GaYjNKubAhE_<}H zPqVJlJ=IQh#XOR!(t)oM1FSWK6UDK8`6b%)C+n$PqKKXSmWVjK&wz;b|E6Dc863C<41&)!_jMz z;P3&%To<$dtECDDs@i^jU)>V8Czl4p**eaR(kv>bHA0@593<+$RgF;5kFY2hcB(Ku zmjAkwK=HsjEQ?=(5tB(v{2FZ{xYbObZA6{HbXEHWEVXl*9Pwt^cya!T&)25Xo%C14 zV+~vujyjC{s^FNlVD{)p_E-+%l#x{u&K^r}gqK6c`Z&paLS#EH60dE|mQe~D(>NXK zm`*fNY(Z9q=njXN;{G{b4?iI4VQiwPV{09}5KG`U@KaU$T>q zPi>b|((TZViKW$j5X~mq6ni}C%>1%`MF*W9x1E6f;+S7;zEU~lIgf~;C{NER@O{2(94f&|8K4MoS*11#lilo|_n6B+=w8YkCe11WJYyWlbBK~x9 zLktmcQ3w-67Kvn#ap`nUaS^sCC#vk(k8YO289&wf@Q`{$#Lvl~6+s=uMuQPEEox1s zptz(&MoLlfcoi;)7CXVN9RmN~B(4a-Fc0+V==>b!5*P|59x@ZvqEAc=NxYUG)YDt@ zYt&Bv3q_;n%cDfC7zmq3(|@9$q+9qUgV0GO>2efG6pvQ4H#0M4Iy7wNv7E)ukaGhX zw2kRX_GqIr_@CrZTms_~9Yg4FT-k_PCjBxsPOfQUeWdL&XLp+sPn2o7B>l~u!yU8) zF_gAg9Yt6JA9Ch+A)>jEHumDWh@17yPhvbL2UXoAsDx^nuHXq01xUh1GJK4@vit(X zBlGEEIU78fTA8ZKqYzas3cP@t{TJoeXm?^?9$`L!lv(Vj0FthChro3h{r-%D~ zXMBWy@t9o(PPDa*6?@}BXMy&jgO+aOWD;$MUUVvDli{JO$xD-})iXj-%>6W-c~SMo zld?|#+tl_yC@f1=@$37&w{xchS@8p19=GMmgMnBejt0X!YXgV>E6EK;0Jw{Mov*@T@0@K%MG$b}O-=yo+ zFlPA}GUF%`%jV{GoI&B{cOBa(>y&xLWyNZQJ4v%w<`)&Px~W$(NrB<- z3W~GHmlKDUF+*})M>vm_N>)P5Zy7kz(aQW>6bY9yX^u=7P0EZpPgm_Sok|DAli_hl z;IVQSDK)9u>{4!L>XlQj=ZmJCVkDT$DxjW_737bN6jUf}7^H!E3|AZY2}?QRy(DF4 zu9SpR;%JWx$R@w?%aqMg#GRY;zT@9y)-4mD0;XgXM&C$}CQU8H=Jo7%9*Y@zR%x?} z#L&)c$z%;}aAIwgF0WfRj3(pTMrEuSn_hRyN=R_Erb+aRS(GC)nBhK9mt->Hws3L# zxOSZasb0FR3Tjo9o1bTIl5(m&#e^{GAh(bM*4b(bleITvv2_NuTk1jYlPZUUVb zl(Ha_7iNW^RHDPe$lsOV7zw;vZgB9h;CbZcWl~36mXV&;Q6;IY?p=3aTfNl@0+X$h zg=0}sRa}~%CnBjIod#CctEQgo?8IT25|9FQm{{rLx+fvv10-4zeWT z-kwve)!obp{{HgBrx6AumR`uBcwu>Aa9kAC#1hEzGb4nIgF9EX?jn!X21&E(OnkaZ z%#KwJC9OFgdhU>Zv7v*9^kXI0=JKkK_5I0AOE<|#CnqNJFDWOypfQ-VFzYgAKL(3U z<+3)+lpsG_7EeuHQ|qrDzoGNU$P^WpgiL~wnXC>BE9{j ze_;&pcw~aUuvdaLZ5a!8VB7u*5)>5`hr)$9c_rnc{7581K$Mu&)_TvvQ&<+22ZaZj zr7P^JO0YA`7R;gy&k=KkQqoa+1ZCxyk-3VmyT%_Zwo#;r+0pwq^pldTPx7_ZtCe(d zaV{PJ&n5#w+=yM(jY=L+Y&t0t-J2~W9%Kp6gx%S)6ex+yVsS98-*)U%gv9_vs8@+Z zhB7lsd=Rq<3X@jEFCiz9bV*BB=uYOiUoTywuF~tW&WHJ-uWT*M9)#RtM=CPw$3E&9 zm0_A)Oa?53Z^>7BYn zOUb8ZXSeDyp$M-XN^OZHl6CK#kkfZop^^+*qB+XP9=X=2V;bYnsJvBRE%vZZ-8r3% zCOX&1GNG(B4BaXti~Bmp~;|LBf`~SPPvL z87Cu{K0xK|-nf?dSevA`D;`d1TB_9xw-FjDhvFWjVABmInw4!AS(2>MV>X`qz??|8 zMsjqH;{ZAk??*iv)qyT^RF5+4%6UAEz|ws|afP;{X1FrMv)J8N#fV7eUlL&2-jV_@ z8kb*Mtj#ZSiVu{`5xsf_v8>uE+4-ZESK?LGeYcpX(Zvky1lcHEm^CeqEx~rgpXOLd z$cm%tRP9W#iVMi zIM`HT%EX8un(8)2UyMJZ3S-KPbSA@@G2qahyWGr%=n!3HcTiaG#3Q-}9amuQ=~j@R z#ns$OXq@=Ve16*@RXICPT&wH&du`xdBWjbuOsHg60D?Ep!NdmQMN9JWmqg;a;#O%U zplVuOWHwjI<{af%DQuAwJUKCwj1EWA)rL;jc-Ta5J2#ln$!(AmVIfZ)6jTYR=ui{6 zrRfUD6w*!*t;tOo?o2>R^R;12?K602X75p%19}eXJE~W|o_z-m9yYvR=CDzjLwgQ8 zOREWMx3Sa^QFBQ4y|Hg5J5s9uApMDj?mK7%F=I(^uwKStyoECD^dO!jVEk6gk2=zj zn1`I2i7QwcEGW}!=UVV)wM8@>b!Qu}HZeF~+@CBzM}sZQens6e+S;{514E&W_?p@XfCsjnNzU zYb$@Q)i2i!3jLSD@R)?ZqkZYEvD_?OxcBIhpqCa2Ay8*q(F2s;$AtNaz4`?d-|Te# z#O`UQ>!);YMu0w{OS6o$_RU(x%XW4*Xp5?2BN@LEtZ4@+n$L=8fm;LTXg!T^mUbDz?si5KNUFhS zp4~fh_^5${d-awLG;~;CZ+=o|JFsGTO?3T4SP9Wp@O0Ta8j={Hd)L#;ip@`M z6_d@Ip8MGG<7C1JfKUwl*uk~x(5`Y+BQYN_o$OaP(K+45Ap> zxUI7)k{xhV?w}Vl7I7edb9J?g+(>v}-_&>&Q$qut@NwWkHbRARbsCS#g~rPYntmrl zgr!wgcY=uvMS~<L6!;=r(N<4(eSv3IY1EXm}WGgY9ze>C>WKLo$3iQyaPwVuBda(UD1#h z9~1jfF>joNSdI(nz3HK-*=3s4A-PF^3Q4vZzh79=?er*?uZT5ua?<1`dzTbvkM3@R z|KyuwICnc;4{H6snd=tL8hIY?v|y`3g`>w!XruqPn-~$3dSZ)!Bp#{?QF0~RnP}Vh zH(t!|tg%XE_lx8wC4I+^5>%e$EGGr5ToEPC%d{PL-w%s2sgNlX>lcanBPF`k1*>iI z8`7Gd!-ngWk+TYB425`D$2zmMHpfhy6DPx6C66aB7SL?Tq?0!ommg*-l`tbtoVykk z%ghzw>xfQETsw5hfjKi|z1$Z~dMuVNhmAe-nAotC^!QWt6Ony@8;Ql2e6-=&(B5z>qnhcp}H5Jpj3!5NEW6>CWp$r@oQ&qb~ zw9>H|=29IH#1-?xqT-8+B8)2=zp!RgBKD9aGw#z{Op+EVgJ#=tp zVU;#j6|R1(PNo>vbEOCFYoQ0~2`)^Z3pnp=m^$u9Gsk(m^AK3_s7# z$X(;Etw-myw%X=q5nFUMW(iT)eaa&Cq=I!$6`j)QHfJY`lW^y{c&|vATs5HInL~RH zJ%7}Yp2N?w4p0Ul2`5v1=DDG)Y$pl9@UYoL+;UhW7!(Y zN@GOtx8{Y?e8OW1p6Ca1S8j|h_FLj#tqOV?>2!sy0dkZ;QYY<<8<(ts8=^b!ml^|5 z<y+SFS25(cYWAhT*alK zymGd)gqUhES0hOwwUl)`>LIZks;akBB@Jdh2I4hxG%vq68Y&ED^X8a@sKzQ1!jeYB zYZingNkQaQ8j{zwnJ5>Rg!CKL-A<+2dev0hT-i059G&0*4y}~xjotslyP>b-OOVxp z3hw4;ua3d@+W@Bwwp0|Q%E~&aXJ_{b6LJ#_u};l=8=J^7GDC(ilYOjWuCnOr(K3ES zb4$ugX)uW;5rNjZ)Q~Aawh8P`PFk^FT1voxE>=rkJySY*-&xK(kj~S zgi5rNH3!#l*jb2UD6BoPzt-mU0M5i%9yFK{!%K|WWuYQ@8`}&ok&r|L zY;MuB9MOO(_v|mm*Q!uuFlqpm~b2^a_2Disxl zaJ@=W2Oi+}df>R7OGswwaG+xJ<*u_QFLZ!n51?xCs#)=9KPJ6tl`SL+l2~fh1dKM4 z(%E+Vk}e|Y7SR>D&&?D`CNgw^`v`f0wg0k2F5zN4x81I->@jenj`#vayy{w4^|VMv zq|{_!I6IihB5^@Qv`lo#HS+RCkIq#weDPO~n$Um5a-?as3&Bb$FX5u3>9#vXj$_QDIXYa?O0@4o!!G$h!fmASHITCfiV^`y*BQlk9OnGIef z+Ty{B8M4~ob(5!e3OjdeD-!>GXZw3So*gYs98B5@R+lNO%`$Ei)f&f0IwEAIHab}t za^KF1n+UCn+MT<}MmL~FFA9UH`ej(rtu8vB-UU-oT-pWiHkw@nbiYKKxy7lC}d8?VduBD+tnxtP4f4Qo16y`K$rRWC|cJ#%v8ZQx;U+D?LTal*FG zlBU`@yn~--r`1lZozE6nc9g2bucbuuv|E%CCSeRNifw-q)k$8FG<_)_6cRy-^=H?Vb`>JX_}mc9l9%0MEFAG4 zJ;P5;X&?W${KNTehg81eBZFG!W359`As&^VTOm)EH#M2P%FaLj%;y8NeumjPDUrDj z^zhs9Eu`Us65hp^kK^za2KP%LY~dJPQNq$XKS*q-UxecYNczAuAQaN+jxwTZ>4U5& znQ+gKEn%is_ZC73R~v55`UPADa|LX?-Pidy2z^@lftEiRSWorEk-5Ggx`OuS>w zEj5lt%Ndu{qfjs9gm9+a2-B?Ii!xZ#e9mw7{@M;lO2_M;fN(e zg8lHgQkg!o4Of)ZZc=f)fSH>*hCPZ4i4?XL4J>vU}rEZYYbScfFe?Q zMI>@aJMM<{>W@VA@*IU|-){EQOAafQzYcT$m^LQL=h~=&`^giwW68ed9A(F>t0VCt zW%AU{TBw=aC134^dCuZ76HBH9tkw{nvpvyFo5hPro`8vOm52S~+wEpxrcGvtrVNd7 zC1pg{u4}M+Pf8Js>_-5+w z5?bzM?bH~)CM8;7mv`+O1*pBw0N90QXO7|qn0%wEC=$w(J@;lmDW72}$O+j`1-PG6 z!L~VVVg%uQWz(%=>Rm~&Usi0PS-#03_!?7~t!I5s_!DBw?=g#U{g~zdS?9VzhBU zG2Z##?_E?mm|aq0UJG#jg^k_YNX5Fu6+jxTH!!=~P&>AdwQE~Rs*Zr(U5YcgI5u`k za>^YZakWhgj$r;kCXc=|zcs4En{WEX6*KqOe_=x&-)Cghnzy^{6tP-&BYFLTlsa?JDXyCRwEAM#FG~u_ zdATAkG^vgAI}C>-d@LhP~liK^F&Qt7=| z!GW10tUj|zi+9M$qv3(O(M z6sifjS^)=X6}=TtM(O2~aByUPRCZGgD=W{*7cUwQMtJv-ITub-zkiphzk#9CFl>O8 zQO%}L?Oe5=#}<M<;;B`dT zVZ;}9+G=UBX^aqn#bS2lxn>zj>m+N0cl7upHY+rk9aEw!nbz=={?yFl#x6mry(AgE5k|9}VJ9cHB!{$?Q z#JVnyE7Q(MEUiiutqd6|tMi_&1mp5!Wk}wOmyb5eXAbmUOI@f;(VY6adrOTOcZ`!x zYo(Hh(zZ+%+`EVY*B^5rSl~zW!hBYenq&HHPdYBZnto|mg6YptPIIc)!pzID#IlWy znNxm|L`zx>q1++;GWpnNe2+U52D|Ci3B(z8rqA^rEdAoLbsZQzS+|!Dt;qz)orWS` z#F;RGHDQ(Bn}yXJ)^ljTKH3{e6PzRrZ6{x)BT%JhYi>W8?4*Q=;tri0r+ivO7a{Ml zQHuL16n>6}g;mEco$6LHVapDxNSH#+biM7|#tn&Ve~KHGAsxytsFJZg*ri(8_M6Fz zJ4+|%wDFA{DoI*xB8m8jQOP^zM3l+blaR3qE;HpxcykuBpI3s$xLE;FTb)spGHg;! z4-Kn-F|n#PtVMw&6StfCBq%Zp_qUZsE~OC@?jS-F+wxvl@=I$vlVMak!6fas8h`shsb_-e zOpv+^S*J07-=Wp|GKPnUccxAjNS_bt*j|#m36Ei!PSW)unI|(N=S>tsYTa=8d>PsA zf^PAT-IDz$V?&jPj_lZ8xg*5{w7l%ClNZL8>Y0}_v{G5vkmi{08tZ(PF$9vVN%l<2 zMo=P{)Hdg{th`RWDLX46jQ7D&LXSOVz>V?pd&TeT7qIag8XoO$K^zf_D1FN#@W zkZpp7;IK$Oekffc7N0>|#Ou2J!h{|6()JXSM+i{vT`Q>};vfYg6$o1~VP`8Yz`$TR~{xaJ@n{@EoTb zl`dAo#Vpq6*c+qgJ?588@buJA8ls)ZOV1pgM!GY` zglN`$W!e#gcAE*|Y-9q$BA%AoyJd_i&N2}$HK6g!I)SyGCC%?iQE{o zr@~6iXw?X($`stmsAy>>=E==Mqi%&3zBbzVCab-l37f8}yQj^y==y#YPe99VvJ$hK z!lr-po#ydwp4z!gq7(Uxp5HQ#WNCxRk^Z=ca~tjo0ma*tEsIlj<fz4d$jyBm;6J%WD5As9IuzU!#TgSOmb7AX=V{jFYog0 z1v4^__oJd%;7TNz!xU!|edtg28qaWIy^* zU2=#v{tVY{*l&~|eWvLNN@hOKA=eRe#c$M>%$`SJdW~@q?CSu`^d|4w2{R6~6VR5*wIGsxv>K$_TP5z3IZu4@WCQdTDH&-gZClFF zmd_45@B6e!X_uDLzNK={u@S2(dlMOZT5B-^?3U%F4?cyej2`EQnbh#*|7_CUj;pcd z;c0kFna#zR3X!e-yZkz4FRIKyVcNPQ%Kkd&D3ZO${yxTmGN!ijwSsugE6T8Rl1=h1 zi{qxuH(cBYAMoDzoiP&$*@wjc5KS2N=8Nv8aFf`Rv}at7xEosmdAUVS`e~6`S((~W z!ZWvMu8J#Y-2Y*TjjtC=oN#q~g#$Zj4Uo3?&Uvkc}iJ@=j1UrLYl(cV??1mSyZjzS%MCbaAY??(J0hwzZa@BR4Wcq%M$6;xDaQ=-;`v zr>ap(x(c!D|Ch*GK4n-M(I17Efr{~WR>1VIwI5J6@qah#yJw&5_d$9qPh^ywbXQhU{dwZ{ z9Ph6m%V`t@L`<==%SMe#jUN?af&r;mVij~H-G$R=TCg`q)$no?pR%SD{b^L)fZ}Yr zUB1>w0WxNtPdM0D5cG=^sHAfc-bqOKe0|hoszus}!SOk6`W67~kgoLH7!oK{&oGaRS~p zsqdXA)tMTz{K(rJx$M1RL#$R$Gf&6XrRYz1ILl+~3EXelk}^QA#BFM&veOIWhxMAE zSz6b#dUx}bNhTy!HWJ~a9}_^cV}U7^o^K)vUSns26e6_;;K>(43GC1V`MMdex#7h0 zL0Q;D@&@c(WC{{4DC2Y0(kZU}k56D*5GCmsXtgo7m<81XRD%4YIM%Ve6A@w6RhC|ff`Yal|D%kKqnj9{g!Rz)T1?`eamQEc zbtX|zla_KP^s+jN+_A)FD);vV>hDWBs?Ly3zWcyw98v?4RH>TCiF+qK6XmmT0|qC3 zo<~o4aD3_Vi86UCp*#7Q#P6NiJIx2>&g$K>SMQ<24tQTEmpDRRy^Jy$lm#~|tFxyw z7}9%466cRPBFJ=Zuv1gnnkV0QmoM|#K2MJ;r(R4EHPGRYKIMd-I*>m%9!=W_Ni(pr zE2ky!eLsmu*cKqc7(+uggqmd(870o|rTRLD%*MI|O^_=bGviFqoz2pEDnMPTNL>ev zNLZp2`@WrsIw26&%~3_?MW)WN3A0mXh9#!D)7sTV-?11fhaVz(kqtdvqCY z3OQCSo6#<{2Qh_PR=R6#?6xAlUZp1k`oZ#(9gAUSt9)Xf*w%&IM$D*jjE_f97rEOV zdHemVeu|Ek^bFLN6vKkGlYN@bs6EKN*yn_v(aXZlVvKVrkybNXkS*A07lYGXTc+mn zg#b>pM0?Dbw09*o)}+RqcIp;NWAzUxkc<^=iuF@zh0_2DTVjRs-I18MAcje0J9#~W zbtHGUYAo17v_7RnYXmYrn9{LjWqV~7^J%0+l;NOFA4&IM)#A<2+vj9JR+%WqcPWZ^DopiVb^*>RYiI2T`^HO zuH<@R5&0;mh}OB-tz^;zkw}6xoKMEeg0<`j5g+2*9amcJuuDbqZ4!B#+_)Uqd8>rJ zVX(@B_0&oq$;v-Cp;!yg=I!KjueO#t6IpHi(I&fGhvRX!!Y1UI#oXFTC8oifrL=9P zQZA7l!ikC5cs$|shdpJ~)GNw<8NHR!YA}|CR$D2@?-o6Gb`kD{Kwq6fURy znDgJt4yID({*;ahcj+29zwcm*wgs6(rDOO$bLbyG@uFB+gcGMLJ31#Sy1i3c+3x?; z1@>VF6u0z=NVy^YT3jY3Cbn| zM<05a6EpuoP4tE4U#4F;Sc2~e#5YoyV>e^E^X&CjoDyr|dt>rnthbFFFmLFMDO!i4 z$$#K0$q9c$Po1wGrbN+NSJCXJ5Ow@5lfaZjHMDEI3}3FBNVr^7kaM?TC!{)Pkw zA6(c^>D(_5sB+!^QOo`Z16{u?Bm%QPR2i3!b(7H;tL(swC-%Bn!q4U&v+S6|OFx)B zCm@g6o^%Q*EH|3~+*0fh$z!&tI2JBqxQlEZ(I#J*EYJU=IBneyIB5ULfJiu`Oxxc? zRb&4R3MOyUYHig!Y?8FC$_3^leI0E5VzMS2uE0&|7%b(q%0$zbe6PJnMhybg`u=bprgrm?(bSVr{VMLITim$DgO_YR+h~$mD*&)jD4?W$gP>*-ya-LQ2kFj6OpYK zaZ1a~kcgSB0x{hu=psfyene+tdalKb|9px!mdksr)GFc7vY0D8IY(Ds25|&vyGZN8 zA?x0MBPh^m4g2^Q|M$ObZ>*eG5}XBO5scG*t;B955}){q&5*Yj#No-3-~VIpO}OK_ zu6xh>QP5J5B5gt@Kw6TiIkc2`Jz4Uyq`a(l{16ZTDMTQOMuDOj7yGmK@4u&W?yV{e zq>^@eB`X#SRrj7@pFK~fxyApomy6bq^(qzbGHEUQA?xPRqY)>we1Jqq1zhx}Oi`w{hzo42AJsn$FK*UHMVETCnk}AMJHViE~oOVO&QHXa9=x zyun!3Bg#gLPp>^5WKUa?SMTO(wAQJ*_}&|=)lV-DGxiq>{R;w=GFT*IoFLd zMe@##kYwXv`XLu=qW8UZ;xwPQ1{q#D)6X(?X?FM;-c1ozYQo~?Fn+ggovu+{3oG8W z7)Bz;d#j_~d}(d3Qe>8<&De={XJ6F>{zSSNnezi#{N zi0ap?S_I?7D`Cp4dy~+x+*1c-yojg}AAYm-GN;D?&X0|hUw6s`iR}kTB?4O}mBC^P z5H;nscH-b^CkVEp9*lL`HyL_57;Np0mMSyIp#7SQN-#=xu-81u0$O1QCwBP5@^44s zq1c)e$2Jdjx5KE_uVihn`F8cN?@!NX<7?9g|8^jgN1BF%NjS45aB2u>=6eVfU;7Yl z1Otu5Ur%3(wr!B@cmH;F*_lJWSl`+G-cBm!<*Wboy5AC7X~K?OEd4CWKgQmG>6kel zjw;ih9Ml)~oIz1olL&fLr>#ruvA05g!- zx`LUS&LgB`M@3;zx2Cxx6#tGUSH{BxJvW@&raX6Im>go~ySj0ZO?FnpYm| z+;ZGWc!WDlxxq*Hd6~4E#~KK^&lAP!WDVpH+Fm!zqCiJ(Hl9ys z5AUrBr`zEOv;ps^6$o4OkRlLuM~XY_{FrU(=z-HfY(dYQn!(!cgl@_l!E>?F(xBoY zMp*X>XXd?)NSIih+1BzE< zQ3kK7*a_=fCK_<**x5oh5AW*QD~4pjW4H-6D|IIkfYNU{!6jo>P*h031nj0!(CDNT zaIrxFn*z-`eEogN;GsX&qHPGTy|X}5t?vyE(DBYzlf(V&2UT>c7#-@MwXv|$kq?h!S$40uwDk>ER1wja$(TCCh@g2lMkB$&Q>-9OIvncpD5UTw=}$;6JH@u# zrD2#bB4dgUE%a^PGB)WMMD%Te(_65TBvgN)F0N3o7Nocdyb3B;lXCF|cAZJgyq@Qs zL4M(sIP)t~DhRqq5~po*5|`up`b*<-drLZwT`w-jqhUeBpdLRnUXX_Xp;(0OUiWM+ zeqDs_-Y7!XeBh%Qw(o@j6I|r72;Kb^C+(}(NY=+=916O-D3|z(Y z2^#^i0#|$24_w7Cb_N_|M4`49{aIybN-h{zvHH`=UjcG1h6g8sE1){osv0JA?`wjF z2z{K3GuRB_h}7(5NJ+`i2*yIBGUXIfy4S);dtFE=-f6SHIJq>ibZ-(^nogde@W9eN zXsre9a=pIHJ{6>U>e3yzW%f~^?f%f3)n>q0J-5jN0j*s3)Cu-$IdzMA$M5Dlra#|X z>nqyurk*Wrb81d4`yb7Nav2i*cMZ{8wkJhQ8hl3n{qB21r|$2FAGH_lXg!iN{t}3H z?{$Vw_wAZEQyXN(<3JSEtJbEGm=O;(MnXBCMY2e!g1;gP;`VQ)By^dXmS=bg@sFV5 z6xG&;9N!%OXpd!pW6fBJ+ZGAg%Lnt@^sC<66bXSMgZo_l=v9%B)u_tmX;;TujfA}J zco7M?pF5LQKaGTt+3?!?9!El6ca^rPl}N~&j3%Ya-)%2B;X<8f19sOqWZn8_yEJ z#Cc8rWw|G_am#8<ZEp@ZIlclRovt4@u-T!IAFpYSP!3 z(Gq`hDg)XiI3i{4lD%)(NXsY6u^nw!Xzfjgo({Glh?U25G#_k=)PY zTtyMMvO*-6DRrBp7BnAN7i-wQx9;01P0<5-3cHQ3HK0sixpd_T1woP*y9}jQd(AKm zGGF^p8scpMEAfT3`S);*P*O59-dlqYFQ%~5u*JsEEiqmWQX1jt z%4%dke^9(L{d|94Rp9&k-~R6Rd>+*I3s*C-8Z9>qDi_CM*V>m= z^&cD_5Bq=M#^$F~M%kS!>P@f9w<%~RZI^l+!Z!E`Pr0Rvimie2>fxFCh`cIYTsV(`N`m&CDumz@Vj4b-~E^h#hlQ;uqctWDv*8_luS)aG_6N7w{9Io zEac9iVEn>vAnnGX(=b=is5ny|Y_@UAt$FOD%@mt3_sozbk5GuN0THD<2G1yX@K!Hu z--0|dL+)e349#_Hu2I7ckIxACuq3UJmgOJQ1rw_GYhOT*_9Efp3p(OP&%UFRCxi^# znMW6j@moXX6VCoh*$1~8(SGJ%aha|>*w=-E8nX}b@lPF(DNU=fAe(bgV@?Y>=s;IL zSbAwZiSk<3@ZhTp1OuY63z{ELC98Myl1qw14l zEDb0-2vfNu=4H=$zCZqHIX@&xnt3L`%BnT%Nz)*g1uZTjn2WX%#_3qjzVivd zSsC1Vk(37VGQ&+^TUp=&u-eM4Y(+=_jB;vUceV7I5?Y6&edP-fxOMOYcQ{+|Wusv(2wj$<%In+9$sJ2{*amOy3HeS`#}fO9G?*IPtZ~y7*->SPugA&8T{a7}!LjWvJen>gJh*$AM z?S9f`#Ox_ZCt_Vul)&@oFBYMKAA*)?w1Js;g6qFpM7iVwtrCRSv=9b zRP>!Vt;bj)9ETVKACvXfgJVXJ;Ec0yoX25&c5tqK`gs!d_JY{_$xqHlK!to=O~nXT zE%PYxQU9b6s`|wB!#MwOqRWER@6Nz0;9E;I3L5%$#zCegShv3W_KThOEvG=Ks#f84 zN{ea(5>%#88&qDi1?N}6^Y75T zGoPK^N#5SCJ{D|Js2+u?J%DJ1CXM0d~|p15q^xsd2{kCl%Wplf4cjlN~mG)>=X z!#{uehz@qA2izfNH#ppBAhq6)wJn`Md#xl{)#{Fc9k2EByY?KvjbE6(>JDP|XM>6~ zCU|P%DGVk~y|xy#fnDL3oPjHXvlV`FGjM%OAx`T8YDk6Ul2I!pSQ8Xlbps!G!xBNX z!KkgNUa70lCCI$^*Nr7oG6L8Gs9PqANPKnT2I}COsUnL7z~tx@A+>S^VU2zY)Pa3f zvurR&*mXw`tuLjF$cS3|_7%vRtR`?1Yuf4b48jguH#0Yyw*Hc^S0m}Z=*pW;W=8)LBNSFj|R zQt@c8i8Jv?y1!G~U{Dm=RdlUR7J<-)Aasp5QoDMrL9~aThQt;-=x&F!Y*Ma>?lqOs z024*_Rjv52_~FFwkU})kJIZwHfi3r%*L7u5o+u&U<7Eh8*vX0rrPQ0L6lg#WNb{i# zoAw`=^X=Ki@P`9T3T_y4laEE4i9{01Fnjy$QmdLmAI5+yw%|0mO)%CC52r;IdT@LE zn79{cLmAlN@%Nri!}>baq2b!>(J`I-ozXLS&yl9TaI=<4YEU=Z$8znHB$I0(7}YH^ zEDmAz!0%1K!D6lrR)!3mePRt*k}3>2g%2ze9ey-S*($>k_l}#CL0{H&|4M``t303^88iRc~8@ zl=`lj)?VHkx_q=ApR|^7y+K<_9mo<11%JoWwt01$ZjS{rTW`=y4y%T3_S)5BCz*Ed zgnsjmLcqG^7I&n zA*uTbDZWFfFfdRv%-E#w20GHZYZ(sN^upmKoO^K%#@G6bO@ZvX!Og$W}7Dylvh@%@l3G zKZpmPQ%5Q}-`nC|%YXdpJ7$sMWS&XpEV%?2xeFS>BToppf&8Di-*SUG2h-@J#oD-S zlwtCBKPv8g(#L2fooaD9H41d2k-K$vD<60E8Evh~KT%R;Kay-MXJW$7K{qi@{trT|nYS*UlG4qkFTbZ67 zbHOL+ETy|;1&k}#ZZ6%cH=q!g8(f!<7^-gg5PgH7C5sqEKK zSgTLf+sn5i0L*Lrax>z)g-&CL!>;M<^6nncRNG$CcblzstUP*-&B@DUerZ_ptABkf&zh!UBWsT zMT->-!FZKE^7!~gm1*nx(rBZ>8LiWHVffr|QZG}qM2IAAD!Q#){45sy#jo0S(CesMQ^W4uh$6N_ z7h9l%7w4xt|F_(W zfO%*B*zahQ*|xQA?w(dHZ_ z_a?--^o0rP3!UGqH+}^rIr;l|lKGipg4{6WPC$E3FQTzx3)QvD_!1VTAOgnRB^LfP70~OoxTV~3DOz@cI?CY=GtNZmA zU-*aLe*VR0-+lRQ_`j5(glL9R0i`>3nZlx*%xv89vp@dhH>5xr?Twe3Z`{Nc+z(k^ zf$)4ADp=6d9p26Wg?eMa4ILN1-WZejg^UM{t1+)}IES^IvJmaM?3_{B%wTA{xOj;v z0L<*f8xgMoR~WALOJaM+9@1B}`FA(9_emnMlIa7otej>>ql7_@FGAPU9ihI7g$rw> zC)jmejfF-D>-lqTjDy=+i~Wa9M7Td9Zghf=qaO31A}`qH+w>eyDi)(I*){cIm`BE( z^+myJz=N9ObQv*1IvR|RE$PMhYnx6+_Sbl?AAWg5^DbkNKqnK6oN)w~71+i7;tXs6jaIXq=WqS?kAl)2b!LSK;00Kd++4% zM3=0(7Z@V*l5vm4OBv~sg5eC@i3fzGUYSe70xb-W3U6-xq>5qWJY~7odt!?`&E4Cd zm^x;ep}00MIISf{0(b&IKPh@Cgv*elI#dO>L{$U7WyLg3>uX_Fov{0mZCCYu4>=P? z+(Q|e7UAVGuTwa`((xrT5edUqb&-SnasImU&7Wr&y~jVEa1#_@9nD12EVDT18HY_N zWr?gjv3?kmc{Su2(PN;Z64OMwHkFOknmTgt4X4mwEi4+rau?*KmEbVeVUm8QJR^Bv zbiYIHQkL%4t>}gxIRULXh|dV~no*3i5!kK8 zZP?Z^z5odoz>?b(3DUDr*~}1RXhoBkEA={?!@!Lqs;vX47=*8X$f4hsk+Zb_a*+PP zZJn@K_-S!`oyR4DGU944jRx><`e#51k?Y(0++u4E?tLSJOgAt;yskh3S>S zyZHVCLUSBV$Zo^KGH}x$ujf{0Uo*|M2PCDx`xr<4lVJGt-)+XQYI2c z>WB5LVbiutVR~L8t9vT!M|VCMi!;V$d$h-m7oeqsNIwk!Iiscz+~#8wo;qPFPhO^X zNIQ+YgS!;9by6%{dA6Mo={VN)Pa>5Hz|}8a#;_lpCJGbAkCqfI2SA){#2yKKIc7>9 zTuu=cQbg7&g5rTnsH|8EUZSpbi5*bfjKR@9IX`%2ZKxEC$RQTMyW$eq7v+$t|Vu>cM zU)vXR?WuYV^V%;ZyNoN2loeH{C7c??(bQhn-G!sCJ=$-grrrgI7;4Qje)AhtH&09s zHn0(*JX+(GZvv#!RUzUTVxGDrgX*kHJ!ETEK`dwWxa3Adrm3EzDke5AZu1DupbtTj z)_&e_WYh_Hh{%v%Ar`@u<|@xu!nMFbm`7o_+1S?_(p42LC2u;Y*{*v~n_%(YfSDgA zWVzq%+$12^cwvC?YyDYQq3MR*$vMA3eaOELwqDyG^H}7*e4@fQKa5{2?_)UcI8_K$s`230Hetis8 z`G`&+i~)hH4b4E+N>Y`0*QuH=ZS`pxAliu74C)~w>jc)=pB=wBLk{rdZkTmF$Nl9m z85Df`&HHAaS*FPbH{r-iAcfz445=Ra0Qqa$x@`ysjasq(RyIj`J4wd!wCKKOod1J7nFz50T6cl9JPA=3L_;IA9jrS8 zgC|%xEK8-v$C2=DX*SO8VNk(og_m7)xfSo>9nccD-j0BRq)7V+S1{Sx)t6CY8b~dstO!F)bve zBeeW?N<-D366&xl$1T-8kOlif1cEar676}PK~X|2a-7CzW-ZB#HN@4ZRm^*5>sMbq zOBb4vFzXDdrJ=*=Th6c?r3kf6FFsh_?S$CIRM33D#MGrJmF84o8q9-;K)`LGWYxv$ zGD$8jRT1(q_VMM}wVVm}Ja$hx@FqJPN^>UEdcq~jU@tv44HMeCgMVT0s)+_{-K8A-uA0KlO?WsO!ApV6g zW=ta9Y@HaomDHG*O1KVErS-3jReUz3=M#Lf&kWSz9U~@^R9ScY?w1VTUmRP~vC9KH zp{t%wY3UqBudrD;BWET*O(iZ;zroxFw+6PNr8URy-ag^f0?#TvON5xX}SL9vj}DW99Fwyg$(Wax3+iR8aDlO_pS9qZJvDhW5_tS3u?Msc}x?5Ia9*$M=vEs zYI=o3t5eQa57Mh!z^1D*`&NyF-U98U#kCqTTzj;Qc4eRINCfSC zj*`+9DMFd+4N^DMjJddm4#bp_0dK_~edBdqK73PhLluw~S7zN#eJbS` zO1FsIVr%307L7;H!{ej+7>Fr=ug0jneE+4;~$#LWq%uSa3plp$t5G<*DtSr8K3kA~JH0+*?}f>o``Q zYnipmawf$Rb`&MA$+&Rb>%vplP#NJrEH8M;gBFV4_`S-}>Z)6GckM&>DOPJg;Osh8 zl?n_p_g+3F61_#AXc7;P5pJMF7&v3uSwYjHgp-%ndeZK{MZAvO{e z&Jf@}3B7sFPmZ1(@1uKDyfDd$esTKAAgqKGeuQ|aHagyx5zk_)1>UhK%~8D!#GPNj{ziH|mN-8YB> zME^3lO``HVx=K*g+>bQVZk~X)R7+U;3q)ooqof`yq`-rd3*|QPf?oU5^CE<~`J9gw zyNZ-SPo79$3haZBIx^y^yK3y$=V!8JpPX5N#rStrv6>ki2wFN49<=0*z=pXZ@KPl^ z9Gc9B;(&*Ym(iQ&i&=Fxlh6{Pmpdo00<%E9UdXn=cOw-$-93+_CAEhP*f?;H2tGWc z%vM)-(;4cy%2aE|;lYU}5wHQ3GU`lzq{+|0qZ53zG)}OX#0So7fBtT7(C9G+c1qE`_AwrjOnEV6|?d{5hPY%+*iol6ziCIB`{TN73>ymM`yw5gR@m(B?#7pEm)_lodPaLZ1RE`~<;VBkpp z>ijz9vi3NlBu2 ztHmkyl3AdWrqtqm>A0JHR8lRFByPZsO2AG5v!nELlPP6qH$NnC#T2m!ixXfU^*&+S z-VwWStiY%vr5hnE8vcn!nI8%g#qf!P2?^^F2Av#rC$7xq5b3qC8^g_!3_%=oXaA37 z5c)UJYmPkV2hg^C<7gG02hQ5xkJ3GRzmB5QJ!WxEHkjdFd&;+!9Jq4%aF?|%9|=Ia zorgLD05Ly2F{Nqn_XCmF4b!9WBhnDD6wc=_ZuzxiXLGT&XBYg*K&B_CE+X#uNoq@l z-9#zo!1JerJdja&-cZR;D@+XpD4bJ#Uic7CTPfB$icDowcP3?<;`~nJQfz|5fStGw z+o?Z+=e`@g5D*Rxs7jhKpRKq75TYh5JYq0%296%Jyjj899;V7G+(9`^@^%X{o?TpT zxams`H*FZaL5^k+$4r?qhBl|f435CvVCUmrCo^TMHK7nWMD1YO48Y^?`|eL8Dk1JL zD~>L?i?noE{y}%k5gbc`3>7XDkH51QKFM{7Jj@6;A?VC{)43rXA{_MIYj zs#p33F8MgU$$0$o{ck##lv#fN+A@v_#@4O*BbOWKEjtZjw$enZFoalphkQI6p3{%) zVJsap#9%8rKAABHexVv6Ct9J>fzhNtfWTQSn`H1ejwc(`W|I$jg89Mvi0)3_wr#o9#F&_~mSveF~Y){E1eOg*w=+%G_6YhZ?*j zqa0ZB5ETnCKTdm!@3C5FeQlh}&e#@aZ*ua$f=#3%RQn;m3FpFY8E;cfJ9IX%v2}-k zQ2qsi5^&UfNHgm19)B2V-4Gu3Wey=xzki2n+t@#S`OTMG-_YX$o9wVxoNEsagPIaL z!T!Rrbueh}$B6=zi@EmFR?(i({=7_a00O_nvmHGkfX}qW!tk+oT5uqig`TIX8B7oJUpsFK9Hv8sa%W;r;Za0O?b9c$h zwv-{x>MoUTYv7Z@vyGhJG+JoC$ynOvirDLsJxcTFlt-4C2q350QNE$~B}eXe(`EPe zazArVZCW!&yoD{>AiAY2I0+Njxu}|RdMa1Ozt7MA^bXB7^pA2|^y$yk>(rYwN{UW_ zKHxi=|H}R#xp)3-==8?&Pot~zVyZ38J&U6uzps5UA|RKy!+f7(;nXyM54P|=S>|&4 zPB2!NG3)az`@mYbt@9OA!$|(->_e0xhoSa9OVE}s*H)LRj4{>biXN@iJG_2+}5jmIlQoetQvMsY34{Q$7TgCYgONj5Lv@+A^>pqMYC5|VD`OuWZ#1h9sXg?tQ0^}&7^LEo$#2LeNQLbLL4)(w#+6*e*CS|hiY;wTL{HJUx9 zvm^3&BGtBo_7GpSctx0;8qk9K|Wa3vtcR>QqhV8Mb0?P+>MW%O-HiC%E8u? z)A=L3wf?JO%}>lLE*uRl%OgKtAL^=EaA46@f2s9A`F&`z99AOewrY-Egb!aP7 z3)77=(t_g4;%tK3ddCScE$owb8aR>>t;;0br&E98+*I@k!o$m;Fg)hw)-Ag@;cTYR zQb)pErEa~UlQ}LWC@;*90N3=WL@zB^orpajcW@HTw3JHvYfJz;C-j^l>E}Yt z4PDmTVDU~*cjghsWRv;P6Q5tsV0nF)#{D|$XcVV_1(_cbwCT4oN+^_Te>JPnJcu~9 zn@Z=A%eFd4f}uM+SN%FM*dERpSDux8ObOJZ`Na|d%wI@uad_6CREX{x$ObRyn7ek^ zF=ToY2-JF5bkLr-jAtKFZCY5{l-X)XT^tg#Ima%pVmiJ@??;-I(&g#j(Z^L~^3m?n zk~4JzTMM)y94Y1fw|Lx$egQ(Nk%I@v+m4h2VhB@;;R7MMi`x2bnEeUQiHk+@!PO0l z8II49X-Z6zAHl!oF)t?RttFhNmTV68NNp4qx{liUNCx`YpFCUeJH%oU!8}-M1PNik zx|`lABZ^=<`HZb>=>nMZ7nT~o2qqM-4%OG%C&Q4-$?}ODa2EF>Q=@M-w#0XRCWpl8 z_MKtGQfh3wtU&|kF8h-Y-Mn$_l$OSbL~MGWf$K7{T8yf zGwtZ7*V{)l=8SFl^g^Y?Y0a6Xuj1!}txwJE$jC=qpCYgklEef}nH;*VAF6dxgA|yo ztPC&^oak|^EAdMC=bS=JWmSu?AyhFDQBWaqR?;H&dMVYSM6aa4nu{1vbR3Ppw*S)E zEe#72KrjifoA&A;Klp=UcBA>0DF5fiHTNaF{AySKc=Qr7IH5^bd2-N96E8Uq3R z>92F?z>^@1pgki~Sjo(5wl-o=q8BAhu8FHQ0()|N;SO`MrTNqin-Ni8hVG@}SuGY& z!4+)b4O%mPbk~a(YEog%0*Bv!vsrc3-m1SYDXNr1(&gh>)S%E3U!Cd>EqrbVC!=lC zc!loVd{w00A%P_cW!hLhU|Vg*ylbrPE~sZl78gVh$X&Zg0wY{20a7q*2X?RL8N)p8@~DL)Pde3PlOUnS?t<#~Z!4d0;-=(r504s+s6x7}pJrCI~F?GF@pX zrEzD9RyCv7&!LCdIY&s6-{f@#Q4YcpnOt%nSM z%A-%~21D%Bs0>)?-d$u2!_`W)n!;S2IIZwmiBu9vm_iy{gYJ65Q6LS>)3F1VSe2y- zj!&{7AEdSROi~7X+IdiAJ1OzvsJ-&gJuF7}l~?xFHxIdFun*;C5=F!Y3ga!0GV0Y6 zJT;7NDm?$+ROmhYM;i~HaTmcS?=g!>EERG_XS*Mq!5`6~>F{`8K&P3f>5UDNM$j`4 z)CVKv>Qk-(MJNSL)jn8RJ!(i|?iXsXbg`g^MO$?;E<)}C2^S0Ijg9WEZL@$ktwAbHQ(NOxMAP1)XQ+RYFG3;(*ok-GP7v$@-cmLIzB}}~!wn;8c z#MZ)b&@HicXMCBmLNO)0K*@19(_aaE{x=tk9d`V-)8AR8sgy|^4xeSg;?-`W_ZD!@ zNwJ{(_TySkH?-sMHDSP7m7axk6$n~fQmVeSEMjlJ6eroye}19={Js9O)PMeu{i(mT z<>iuK{`7Eje^4jfRCu-931|^qxJ*IcN998pbPM{CB92agChlog_zHFR<-U&uI}fHozDfohEY3}Zg$I=0ftVhy6)x(~*m?(D~b z@RIknqh8&dpTaxJ5OrPcQ!49c2)}QetNy>X?p#L zsDq@tK$()Mx!Ua6NG!)j0>*~3;BIZ0KH*(T+$8xl-fa&THca2@m0)Qw&SC4tL4|MJ zKWevg`^k0d4g?O7C!e|S@?xH!BvlXf71|C7iK^KXzPk6*`(}*CE+kl)1Zp@qeR1$o zT#~eCuFKSiK!e&HU!mU72lE1H+keaH{YkjcdPd(-!uKw(^^&6^G7 z7z;*J9Xily-y;6xwg#cuELtq?N=T z_?NDY)fSrxSNEA+%6MBuW>4t*U;G9*d|T`3n=Bet@z=)B;dyUV34Zk_xg3vLqKfngnZhg;rOAX zsCYvs0IlRt?HmkOIq&|}+y*mYAdG&>Qo$6j+3Z;&gjnGn;fO*YEUkIdNWSrFHftp_ z8mapNgN3wpYjO`Njl}!7K3C(nM7-ofaH?KZbUSAc)6E-KS>XGoqaX(0XqO3QHK}gk zd(AK|E4L^Mv%=kBbr-WUq`z{;DVQ>@(z z=Q@Iwx#V3lRFG?dvpR5OoLyKDyNp3`vUvXuBAzrsMtIn^1yp*uuya&SQdqjlb%}@t zp?0E+Bdl-i;qOv!L+|+0=6(Jdi_c+W3xxU#bW@%!-RlVqqDXG~aR26rDvs$`gso+5 zV?DD3#>f}q#wgMxcp(ujwl;|xCwT0BOD?4Ki-^X+vB=u!fm?qSeJ>&HSBO6G;OWok zX(x+gilikDU)|aID{ZIej~~C<`qLe~`sdp>;xUh(k*(JAmqat#t}H?&o>LL0l`spI z$@nkt_2FvTOIYJGe^jS>c)`=2y{6y2U4#E@iWm*QW)Ngfn0OsZ2v)J9WB$%Mx%jo` zga%WZr)zWqkx{l28KYIJLdj+_qdfh75RY7y^OFuov&Xsyc&u7_jmo|naa_Day)<}a z*NWHQ!jItZoa=aSKi0>6ik>g%w_)Z28ugF-*gk_u+~{;?i~3hCMa+Nm+h-@wXs&%G zZ)SWV4^X87;vjrP-NV%HCqp}-rzc)Y)7sfl1Oq8^jivc|EU%6hO7N17xP`xy2XFlg z{oz!!ep@hF^P?`!R(SDtICB5Rv}}|ICsA9+^tG9d7^oHRlcbpX>Y8&+tdT+yw9n3; zA1`Mc4}3S@5e`fa+>G36Q-VEZi5tleM`;gm79>lWwZMI4nI>jap@LJ@yd7RzLzm9x zgksP;a-*9x22spY(CJ1jpkC^xA5r;m8T}Lybc@v;BBht0Q*gav`ax>1_>>W$gyGG` z6Ups+rm9Msq3Bn;Gxv0ZwmAGxNE!BiFzHO4d@lwSHrD8uPRdpsJ$r{tL*@OU$+ZY6 zsohuj2*OBj<%w2kj9`qeBPCjo7en{9DvHg%@)(4C8i-R65=$5>xd zwh0Qhl1H1Z?wBj5B~5d!4|Pv_%!TxkU96}auST(u*7T|DTxyP3S;^h>sVpH;wBjtL zLxpHFb^&-!of$k2pat{c)YnY7P9C;-+LqaLRVsF;f%pE48KrRwu1GTDIgmH8E6;YT zt`ZyQvtUgja$RL4St_pxv?WboA&~9s=6G?6N~u_W`20*q^YQ4hgy$8H1((4{h7LTD zibZpHy)f)fq%CTQV`?s}K#JN?Rs%&6hUqg%;V9s3ryN;}mKR|=RbI2!gQp6VUPG6x zy0S;tP$*ZT20eZZJ*FTOi@<5FOV8~VZIuAoW2Xl%5eKMa{C=S(pRThgy1oDCcwd*V za7zx(tq5HeHE-X|y;=hZ+|#;}H@S2Dws^Jo0Zm0n?ccuF{d&*Fbg~6q+~sIa!iHaw ztznbflZ7Vs{8rBK;pcz+gtVGx0R-#Qn|y!krbvQ$op-sb z8FN!{f1`voQ`(GeRXw7sR>z?pX^Gt$>j;D;-ZF)#&3Q{s=v_85?*jx^q8<6u(JvlV z>(aK{a7|{a@Bogiw7^h4I=&~`aCsC;LYSGAG2NUOeJ*{j8cl?p|8nhFUimfND1qHx zZR~3$r)9ES$ra8%b0-RDYExWXybbIIbY-AwFda+(uoA^X;NzTJlXg1xV}}dYWJnc1 zT>Tn?WYTux>UWfOGMG!f@f$0SH5b734xV%G0Mlr3`)-$H-e}g)RE2;sV{|GLKpDrq zaaOEK!H5^D0<-*~xTzk;CQ*7>=hm;T5s2ATXJC+`_;9V8HdN%{>hCaf)-Knaw@pdi zDScnyu$Nt!h`#i3d4$Oc45~Y-;wmEBc8gs%&NRXRmqTwf<*j+|&?V&@-@|oqMo-J5 zMMHmAdmaCBLZbd|xw+&8BRs!y^3*Vk%NCQgECEGmn2Z)c8Cyc}H~eT50MP!yG!;^y zcZAj|EX$xU$5Qf44012zjATDcm@<42bF+IGt*Z0Rmeq2a87+^h{e>NFFK1u6@eQbj z7oiK%tFI?>*n5w6Ua!(U;77srvlf=97~8P&pwu_Bo8`FSIWzq2z(zOpTHZ``HtVa5 z`~_klU}>Zu4>J!Ojm5*t0kpsuYp{h(=))B;P0p2ec7&rMONwKi zUIabc|A&){->FFnZ>KDM$aT4z28)jko!ixm=5+>I7D#FPNaTx-1AXO@(7O8jHK3`w zzM!B~4_nnk&bEDDw#?`<2fHeGCxCQV7y26R`XICdZG)8l*B!ccsdcH`GA91bZ`KUC z|4pw6b{$;cF%suK-RMf|-r5m|%r5rsR)S%SdLbGgSOsS^V1yNvn&UWNj$3aACns86?QkbQ-dYoz@NSY`hNhuwhIRmn&$ue8!2&qzolT98g%9xbgAURLZE(G!rhdcfNE=w5E}1;$Zd zlZ|b-0@nQw#sMqmHy|bpJsj$MNdE?Yp z7KgS$)5B@gvpl`!2S_`=$I6JJn~+vlpI{+^1y41Q2W;f88a-oGopreDIM|H_n9x&c zz*hZOH6=ZWvYN!a5x3MK@pdDSp2O?Vortgkg>2|>CMw%rl63wnL#Qs~Ct$svKK5B=iy|~)j{quEhYxjEnCspzA6zhfi zi8NBQr1tWzX0qo&qt^pIv$1MVZk1W{faW+gBUiiaNQ9TYX#-|~Cnx`vn{OBJ!Nc!i z+zslRF>|CGM1O#?r*o06c|WEWlismIK1F;546cVek}ZN9;e95GnHreQVxMwpa;xI9 zQsq&ePwq=f$Wp|qxHz2|5E!~`8Jf@+Q?Y`NFchM)8oR7Az$Uaxkw4NCII&3qS33d| z0mdM#n_ttgM%$cm>n`gdY+U-^M)}xFuWPHK>jW(|eg{-kUSrB2sCB9s%oUX3#*1v| z^Pwwh?|lEU18d#PFSs2nTMEi8&Ql3jyKZ5*j$#v|abYv+Lr7wA;#FlGyf)P!bpQG#b zvREF4Rq5kS8XULTZn-I?o?nlkQ#^7B;b9y%9p9Jc41&@(bJVNxyLYe=eq2>#^| zI)NqmXkrWaOs1$eYv5ovJ?xcWy4r>uPs>!iwY~SzU$=K}ZQs33vfa=xqZ=DN?TTe+ zWaZHHBkwYD3+dg;F!t?NdpT7?CC>p2hi<*tO~+wjZ-H|PE+=1MPD0}HA$Hvbea)x$ z;0S&cmyeq1c_tOej@}*m?cWt($zckxg8e_9SgW~fj%`N@8}S=Q2iY;kA({dKdRtCO zdc`cHts_*+l^s#r*#+dQlt@M?X|QFkAmgxlSUg#--5h0qzY^&uIcR^A32+71)S=9F z9rrLV)?W`dODd%T4B}^q49N$rzAA_sMX0&(<@oVq5;;6YNnUoBouB>jp&bB>Z1$;F zikR1YdS#bZPI?&`kBakt50)GO ztwK=^(s8N&i&}{MEhUmpsUcc(Pmrfh4H6x^H$KphPyipj*1i>ZG|;m-Z=p*`=+Qe* zpNJg1x|cq3NzfCJQD{E3{gW{+N`LzAv_EW?w52TTr72}eV-Lg4Qu=`zeq%L{vV~E{ zO0%6&JmI7014qNzkB69F*+Rx+67bFKyJDCk9uGyhuTYG&CiA{|z~2piB56{6bUInz zWd>F=svxR02)W_z`rZ4vOUvD%mi2@NaOUPxncS_}$E58Tv4-E2a8X<`Vjhm*9a?TN zK-Ox?xC9|Oatrd@U|H{VFK>+g+-ST_?$R3#QbgLkPyj~>;(7U6qgfN|H#v&;hAW%{ zH(F}Z_-mW;H=1spzeN(vEg*A7-Dtd<+PpU!Jf;2BcJFHqdz}#U^#|Uw-aV7}#*JQo z4X^%<#=oA!^hQJ8)J=J#!QaTGcm0`m-y0@QlC>U^8x6SGd2;<>K&We_mfT>(N5QUJ z$p1z!zfK7JMiZF2+jJ;TTT$NK!MSWC94(1(8A3D}whmI4u=U@(Pd3dd>hggu#n~W+ zdB~nYd2b84d{B376+@3o<;!L$9RcM6O`(NlE_*43TX4tgh4j}R1ABZ8&;O>Ot{*|( zG=jXhE3w~A53lEzn@{uVaOb9(l5b{S+~ruXCIdze_f3Om)7I4GE+0H=IucVN%fga_ zHqB7)To;d1>~4^c>jM?0NpD)hObOx7E+~z-gxt*E)5QWd4OTAw%EBeYgYudF#mgbM zJ}$6nu1(Q_P0vr&D|dgIVSD+qK>e#y3pUNki&?={HCUHVCWSvcNw2L1xCCKGbprBC zB}cYeA545%Nk7iX<>d~~CstW@e7Yc^SOLp+-2oRh&(A1i;^~KT(sD7AkH7r}Dlu=p z!W3}RJ?iVq-`QDdlUTliw<+{`c{L8bobU_9rFCm*d2fcRQkjg%x>ZO$v4&=ojMXqx z6?mLsHL`75RM3+{tm#@;l3aZS&}{$npCSnp*HM|TQJm3bXnO3UA=gz&F6;b3BdO08 zOh|9^2en*)HLj(~xEav}V_G!ZQY_O-Q}Is3B)G8MoYs*M%<*Ec<2lwX;6T?)ufZhi zRUt1oIC*${tY#dFfb`*&Tid(0Z7h$xyv~TkRV>ov@fsVD6CmSCE|-+e`}-%4_wfYo zAOECmnx*~ipU1)2AXInV4PA)SPs(Jf@FoGqel7fO6dG@lk3#*l1znVlKo*-EX&-bH z0TkqxM2)KWNO?(nQW2#pyI~2{of7k*6_*8-^3<)tl#=`up7V;W^ty1gR+*SwI3W>t zco%opA`ql=)Bj)ngwK}32XpdgT! zNgv;Q^SK0{j-ZmS-x(q>*pkAU8*#*#K2V;CZaLvbOWgfLDiztGe1vJ&hx)SbXbdy= zMn5ABla3W3=8Xp@;&G432_0UqAJ|_NN|6b$;-!u6iT@h`X7nW^KoPBVC;?#vh?s(o zFdxms@j#e7v*TYg@>N5IM|?D2H;82>tr>$T+qez|EdUMfmio@!hOF$bYm9yQUNV|i z3%bULcK5Nx4;3f2;88^dT9~Z%&j}+|mE9+*OUEBO$X=7{rcA=!6sPkjBy{5xcdB%S z7TH$x`-s0!7Z*eNQVtMJkCU}wlhR?NSPo$B$FuRmzT!FjnxYWo48urLWq?k zm!9*Z!9pO>G}GBekSm^odW!fcQy7m|SAc|dgf*d4N0Ig)4wqhV+Ryas-&E$%u%3zD@j+4uD3ftJP_ z7){dm3QRA+sBVetux7Lw9Rq46 zQ8_HmpY>dRwgXn`291nhXg?>~Tz+^xqcj2wPfUBH0)H(mOk!QpsVDK zGU_!22KN`oPs}+rskF7GGp^0M(Cku=&uGIlKNE`^9Ad7f5K5juIkOVeN6$}ah<=vz zK^2cYFOZf}3!z;z**;Vrz{#1Cn^EW^KVLK$Z}X^rUz{8+UhNKaSClxUgiF_R4OVf2^)(a9bS&CsUc;cgcK7oHf~Ko=dtvt(4qOVZLsv$UoM1q}}ttZiKwQneqxUQ5Q!5p_yg z&(mZ8_ph#_#5@H?sl0@W=6hqpj`G$5-+QQ$@I}u?%8)|YU?H7RAQAHLyWf7c_3&3X z-UctZSHG3fT{bSXufl}>m33R{)?2`;lkmnYQV=750W?B2YS);WbwAJu;6yh;C~p#^ zBpvS9ay#rwcbGsb9QLnBxA0t6_Q@f|U}_*IMJQd)jKy*$RTpwd)+N4Vw^}Di%lrP~ z^RNDS|BqjP_tkG#Ys+GgsG~O!0`A8JIw#K4OfYIA$<5RTP>)h`b+@lT-%+(KSLvP{!-*I*%wKgV zz=je-uXI28tF9V|mF=>!cj#KIZfXi20-*Fk{@fEdGw3Tvf94%X?3<+W{d~;*U}Jc+ zj(L{o+%S{dmc0tFeKp;RcwC#m;h=R~Mzys<3hv}io+zo8RAhSp@&NK_KTEZ<`q{O+ zFi~k;BCcX^jj0(zC=J?pPC0g~;BjR~n_dNa2IeU7sbk+5He?Z55192FJZQPPF;E%P z|Al#?Uz(HaVh$T6DrSH12-ML2h(roq>n?jF=*+HS)o@}pc(%zcj&4BSe@^C$`N%Yy zTQmMAJE0`tnUd*s;R|P(5uWeyx{72T9b2zdUDte65Xn{NzyDRM*KGx{Km*KVD*+s+ zk5hbprY(+`nJisgT>03&B@z44cQ{z{^r9gkj4sakZ~h?Tw&~g)TskoEzu#W)Yh<-8 z8af$2Rj%;=lRY7@cb-xBvx7>$Bd1oDWBkqTPtGX#*=4LmswOQbRh@c0%&Wd_VVH!C z0gs0Jcw&G@W^3fXbrzKTJ-Jjgx;~-8>-2<9_!wT?>`?^-#c-t^T@PmepF?0(gv4e& zoXb5Q3PGx3K6YddhQGmOQu6a?1q04)IZL-8WrPw zD{-udVi^a(eLE@J_WlbIYQvJ3@qM@Sot7M8wkeYOy6_DJvFTJoF*FwQI)$Ny)c1QM z<&A0~NOU!H$BhHvn3p4J>-;$uGxXWL$>Lx{erY5_$XV(wV%p%=dkn84swWDlb-0lx z8rvg|Ybr%}@p2I++@wP$3hHfY)H5(@*r+e|@`>$dr<8P_{&n}k@&O%<(Ok}U9BKXR z!@4rZ+$No~EgIzsPqwlHm_)qSeu5dL6jH+N< zH)W#8Qe#sm3Ru~{fiCtF{lx+eoR*h501YH`7-(wg%6>kilOjC;aJCoC3~<%=fLkH* zQPYUIC&%1<$tm*BR7wA*;$27JZO4O9uzgG%adH(-Wp?Np+cb8^zEY_RqR zWag(oijYTrJcUQxA=^TTm%kemB$pY#$pXxdf)14v(~Z>S<cyh$$3yht%w@svG7eAq_TP<-=R_BO!~-?EAzP_z0n3EOdit}fpbGdUcefGQCs=u6T!yp1&?POrukfbsl?v zwF@dGHFbFGdCZVq_Q4_37(4-HfxrtE)~ombDJN}H%&pB+9F^_yg(;1xAiKjjvH`J; za6GgVPRJWH%(EO1jvN(}t;AgRj%0ehA_9&zb?%Lrqhp0et924}NCMxh-L=P@EN4Ac z^-dSLDey$1HIRMh?On?F-5NP~JT^n;kYLLme@tCNi^<57Hi;MNZ0FhYM}#3=h&!V5 z)R;2%(u))WadA@{sL(p1I~~RFloQRpB{wT*MtmjZpc_9pnX#K8-@W!~yLVreR#&kq z^X|21m3j?NLZt+lUJ~;=IUFX**l^#A?}=Ifu3(AHFKf<19Hm^ev^?1Q>fkF)4dF>0 zH=S|wSW0%0UF#AL3L{!NO8myRNfax(jCOus=+c-`Mhab=Hd$&(v#`|l7`B!P^ulb4 zf-ct>PqBcLLv{Sf0t)wxO0A8HrPxc!1-=CnHfYDL?YQ#ibsqTNp3PsNZVP>0%!kC~ z^W%y_nu3!^>3^t43Je@&JSfGp^I1ex%$Ts_!l3CW_M)|~+}=e;3b7=f{K5}pE%b6x zONlI!7?k-nX%wQ8qNPd>;y`SorXP+G-q=UTq1v&?vrY-8Km@&6Wlo<+(VVz6;6st! z>olrsV1cCf=qCqsYt`d#F~-Ld^C7>3A=$$g689o zz-jL4v(uLfjdrNe9y;Fkg1Bq$JRQsV?|+qpdDXX(D{97eMo%~ims@GKy#F9u>K|dN z$>F|ykTQ)^m9TPY7DmJAF^xAIK}d81GFctPboe(Jsee~{hepH~$fDv7M*TIdGPdm1YkJO%+D!Qr zD4nYr@50s#7m&D8;AMcak`+I2i_olYV>f4foWC&Jk0=5xO>f#YGomBfmy+;;Rgj1;&s;`^iYJZm{dii8~~WGiQkwPhtFQh$Wwukl9_UK&eUwjD+q_O z>Gy$5H+j>Mb+m{^LUyO8FJX(%Ub=bWXHk5?OQVcRtsxSWVOj1E@ZoYz)7hPCQQihG;(W#@=W zgXYJRg|#)yUh?*R(}L+T6e46Sj)&AzVuu~v;waI!)DPWNrAFhG4VFzQt-^XpQ0_gT z@>P&IS5_n-hHg>*DRg+hP2@PgS)fA{9FSC5EVIJCakg$amM=)vrov4&c_Xk{KUEs;JrOq|Vcvb3U7e&WLV76(?Y z*xS8@FDr*jbeer@jz@aZo+3%SwW8v|{BHDS8daW7$-x_XDDuFY605ZQJj1m-XAps9 zv7e*F-Q+`8SqVd2=%7t#nDA4y{q$O%E=BWXDrsTX^IX2)##}8nOpOCK)4-Y&6m&tL zV+3>#6FxKuKaq}%+1hO<){#e>V2C<^&H^GJ5?_w(oA$k@nYO>3Oj8>2eHfamCp`9><_{-N)ihe6bLnhk2aq;oL6VW9Gck(HUy0*RQy)b>{6`BJwK^!~Z2` zXZLsJQQZ8DQglqg;A(;Q%eF|fc<2kOoFG7S#$8@^EI>UCNB2~?w-F9*>bu>U$}@q? zSQvUC6QS&{mW@7=z7#`QY}Sc=I=jftbeSxF2;TJA^{CERIkSd6%bd@Yo` zdKDNXLj*K-kDy)LnApA4|7G|f`4znLREhP|q`fXxPlMhtc`p>tF7qN)I$L3AajCcs z&lbnekLI+;cSk=?AyX+{OWAwI#VHk>s$Hthy6+V1RVpSa^!VE3uJh=NxOvN26>hb- zr89E^9ddB&EB14WK9a&h14fjvi)-{QO@4jmxtyfe&dI1$go}p2a%3J`H|kGZt5Il< znKhrMfHYm%azjU^l6-)jOlgk_LE-=^V_6BMR@mTqP?^JIUl)F)uZ-IUVkL1ab21VR z#G=pQ_wrLpHD(ni2}iCA*2OcTN^YLZHAK1Qu!a{YsOj8Ir~P;GEy}?u`_mvr%hZ#kf|ZcEb9~g z<9YU~XN}5jph4mPpEQP0@r~1fIzNU7mMZhM!F2;Vr8-tNIspn+CLH#4O*R#)gz9@W|<{gq<>go?DB5b$?n|Ic!{=| zos0QSVmrix4p`uilhULclCc_uf_zFPsf5Bl;VzeP6()lU(bw0*6Rst>Dp#q zk7&*G#}_ycij#GUo5ocYFYvA@9<`?HQjXEKn|#NMLY|UBPDAf`lZ(IeLgZQ5!KEbX z#QP>%&N0&(E$)(IBtzNSG=hHE958`}SC&J%7pb9DR#=nFFf(iVHkIX$Y(plhloeab zWM+(}4W3LnQ(f|JWQy_QNo@ew{Fmn^I*CUriy4&XU`6cwDIq_&Dz1zrD++PmdyEpN zxK!6XM?Aou&BD{c)hBJp4k3=GI%Xn=!8JcDOPoZ}vK3Zsn_f+h+5Mugt$tAyFUj-x z{6s2v?$o~|%iYT27uXe6C*EO#y|wZ{I@wv+`UiYr8-DXO*pF@Ftk9-SAJOgu`)%p} zkU4g@M1}vbwv_DPbWvBwM67&e@o+!`;N_!-HP3Uuq2J#fkU9D1I>SX(fxaY!e z!s}*8z}F;jwt2yD(_j=Cqlo6KY*L#!(#%5>~$p3Iu#-W93GQrq-IKsx|IyFU-^13Pvm(NpvE+ZTJRZ^^S^G=aDre z$Ys9YvlJH4^!Wno4prfZf-n>bUU)>39`{d~LX};)F=ka~W0*{8x_|>(yE9m*Zx46P zUo>#n!`6$x6?Z}v$S&6M+qQNw%q^FNBZRA@e6^wLZQZ^16lI4nQ-bJUm!Il}Q2KpK znXksjC@~wJyQQSq(-Y_#n9u%IZD0LenQ0dXPlkW`_V-`vgVe)z2ju9=PAO)ew8@U< zhxF-;*34L`M_Z_qc>I{@(kC;IG#9H$&3M#X)9yL!@spNu;@gg>ahhjofWVe6<}^@R z20fT|{7Ri~11jTYQRzRDOVztNaz@>ia#9Ks+}E3GuRqk`=()nYR?D6XSUJAv1timp z%(7?H1e8_3@@#J7GHBpX2VqIm^aW=V@57hN?~hNhM&lpa{KXkf%D+E( zmQh5>ZCy!=cax0cCI#LMtZ3^O+%k@LtD}OU`z*2Y^xzyr#~Iid7)e*UlsiGIt{>AJ z@qBJXe;dG;4y8&!u*)X#*r)|p{NrEA(XA4iH6WP8qgSyi@u^)mNEUZZ4dHxkIUWR% zcOF7XHur{a(@$lQut?e_l->c+Njm7;NK#;hLp?e>LEK#LFhqxw2hNUb9#7ANcG$Aif=+s7cb*YAZ7Gs!8CJXJ=iiht23gmI^LRn z_}w>u-1+kO?8(LVA3x9^A8&p51>UZmKX8Y_$7+eGYg>yM4V8S_qQy?LP9xIzv?`0_ z(RWd1Uymvv=!nr~>hU2fj z!II*tooa?@{f1fsb!eRT_IA+aqk-b$8ZW$ifAm7;$|<|);L*uGf&ji$T@Tr&%3ON1 zO1g9kYYydQM$VV!DXr#g<nIZ zSWzO*r02VNf1EU2dVhGLA34eCNW-}woj#!b_q8JL- z?E1B#Xe+8WbGS?U( zBm`Cu$Hu0_KIPqzCw;r}zAp}bAaUgA2nVk@ANoCKw5#chftgYp_dQ8x-#kSncn0Qr zjA3n^eskCbewpL`?mT7H;SZoe0Bgz!zZPZzDl2N*)pgch91t7H;%?(SwpQN?XM@m* z9r#pc(ae-gPilfyEQ-_0+K=pqYrGyWUI#wPB5~5dZ02(&4KC217LaEZR$Xr}BYw5& zgl*fMA{GEScz!|m=_mLsPO-?8cFT=ln${aPEQW8t{KncmpSnsFYX2yyMvBL0zxx*| zU*u|P8k6uIN#M2`P5G5AO_p?fU}zupU;&jdJNSjrPZG6}mnXuaSGztBhgtxk$k}x+ z7IJiO{^I0pupZ!r?f5YCgge9V=%IVk%J*2V&(d}qI$9uFr*HeI2Rca__pa_+hH|eU zm6>dHgZMh^07)ty1c=h3VyD_zWxv|o#QwR^p)zHIJ<0Fls+cqcfR12RiqVXqBpFiz>7IN)c1U(3LuvZzPVU1BI z`@|v)Qq`vKHUKAIwH#I^%vY5FskT9{?sEwwO{py(opo>HorZrLUi_7mU{Mm>01w-% z`+*ViPPJp+j13$1o4z|BD%aLAN9wh_X>Qj8oZDK6nT^`w(R*v3%QNHn*n-un9G_p~ z$tR@7f^uj}?$0pfZM719zrdVJ3J?gmxU4re5XRYR_1X&>h@oSf1KucK>6Xqx&8O41 zI2x<_sfsmsP?s&&HD+xE_nz)!x$Id~vAv7qr+>KW_r<6}4z&CReVL-b_7k#r{Rn31 zoUmY&2+M~Rluu{P8?aDq zyjo+MsI)XqJa+0tan&$8ajuz(+{wSqu(tK5UITzSD=kP%dxB!I=#Gyd)&P+Og)V(B zUh$q)_1@kCtiP6{$&7$y1nmON;`xa;_7&j5y=r+f+Xx*;2I*PW@SPF9HoeKM~ z%%oNLTL)IXVhY5tCg5fqw<;vPJ%PH4t$jAchIK>9W_IU`?$HX;vN~~skb9!DiuEM} za|H7YE-^;C$uajZ`-gqYaeIskTHoaPa4=Gb zfDgz6R^-;ZXyeks@Q@`J;N6b5>EqE{W2ya;Q{Ho>%;vRh{9eA+Xlu9+C>SF;4EcM+ zCWr{;>p=-!|2>1gm=CxJE#;vbd!zuMVAqp2Hm@HFISu08A&f$cggse~h}ax*gedYc z$}TY_s4ovw-9k$0@;TQBcgN7evYjE2D7$<1k`H_*i^#r`gNk$T>F<~&siPksnIVUg zmmN3KCzq4xg)TWW&Ce!T5=v_xl!&P9`S0EKUhNLREPIB0YT{6(1ZK&@_hQQ~U=ve> zQM~1?v1^SShNY^Q+5wgX6(~5@UTl>em(yU5N*il$UfA)A3760lE*}yKz-7(jIu>;- z<#^E&yz{LX>-wkJfO!5r!CF^~Mqu5r3{pi2;0{LUyYQ+^5x4Q|!Mx!Gw&lmig`9QO ziy)zKZ+JAuC6a37-eYny>@k7q;Zpk4!>nnTUX!YolDg2CjED*WAkww{UXXuziuccD1h#PSnx zY*7kEGOI|lt@3v(L^LNk`>;$ObZs@LmXYrfhC8u%d5Kwl;797&Zem@vnUr3FWMoeu zN1mO|Uz)?(6DyiJl+gfFrgD2k1ssa@ikQNdeqX1S83F_#f`UiPC4MThsm(MT%FN?Z z(KfBAAz())>HCR9NwMobP&H&$8?>RDFrYEc2`f{@zNT=v`<4(04D3%zGuhtzb9;;D z{l&jAS9UpuS#66)bivZ|<9jvuEAgyMGpbkhwrc)yjho zdyAlJYuOEK<+;k2$RJW(n@+ifKOwEv&qE6Et`wl^v0>BP8v%=jsV;_Htmrv z^0jMuD{>{S7w(!O7iWOoj*`k?auTKCYNg4pP^61ayDrs%YyBAIvf<~c&qxzE_aXjZ z_+)VJ1`=Os{3P_`WLWlal6@(dV*5|RT6fVV<~kkN&emLj_IzwFF-tBF`N@yK(dm}` zy6NrRp%jMzzwaQC#=A61kqd7){N1_@apnqVD9r;7cEU|t z#du;E?Ua2|gNOy#jF5)zaep8^OB~_l6ZzxFl4`QJrIP31-V69W{#(2fIEQQhD+*76 zk+U;p2L3DAX3^B7f(SrpG{+Z13iy?|T_~gc zEuKyuGF;Ro-AZjmIo7F*bq-q~@R6_oyzjB*j4e;*V${*f;;D^GYnpCT$w$V8Fq5>h zmOzL5LU5XuN_PDh>q+B4Wp}nbnIGYVz%Q^e-Ty0x1=s8qM+ z5s3C?L-Lmz*N(KLnEU1uF^t1db5cQFrdEc#p!|T(LOYwzQP;GbzjFkYXbSkZC;krU zg(4#a=+-4NfF;uyC)+o*E(HgtuzvJMe3c7plVNwMvm|%S)hB}&=v7KVkH>IaS*YJC-9;n=uTj@RC(9{cG>(8t5>p&vXSiAVDDuWBnEL4qCUp% zRbw@_f7O#_<5pBn!(a6V z0#dZj_W$AJ;&*3gQPc5B!nocrm7jj{J*t?^GCbc#P8y z1fI;5=>3{lvLNF5kPvFDeoZEF6Odcc;l3)Rw_1FZ-+Xn;rlyx)W@~9JDr7Fbx*Kx2 z)Gg~(yIgu)B)CF>1KAT@tuV4bHA96ue=!d)U}^!3`Ymk`DGERcuOs5B0yMo*eRiyH zD=+HuL5XTRSvg3WX*fuy_a$@GJd~HhwM3kX2sIHoo)YkSR-Ig;E!DeabfhH~g{q}7 zlBR7{k);)n$2Hx@l9pNcj|72CH@fL$dmvF3(IzeR=D0TBvYuYu8!k>4`xuqSEh6!j zn=w1l`S-0mTW?wDuKyxD7c}=gK730uB_6TAhlGHG`UYO^asemxSq595Xl`;v%G&4& zlRxFh>>cO&#oGQ$903R+K;Br`m>Pt68`Ewz{W@g|NGF+zA=i?tM~GOGjQK(WpiW2h zlL^bkb{D0IW(>-H*V^tb^}(i*MhZ?vMz$}PH(%|J@g$ctdTv4%C9DhAERJP07`Z5! zy6ooBoK72wAyVYS#|@IWbIyLG&%lNG9r!w*=;%t(^XDf7vzzBa=@&hvj#k3Pv0X&a z=J8nE;8CC&R~M`Bzr~5=d;s$F-sQg+^CkTtJK7;0;$Y|e(U>$pR#vTP`5>X1HAZ=3 z(sUaI*%bK&jeHXo!0M8?4uMMsOfMU6uZDQDlK?Mf`}>3?&z|gaJL`TNz`m8Z^qH1D z9D0UDh0h;_um;W{ppX#%H^RLY`-;*UOc_tCVv0Vpa1Hs z&%gePI&`z|#o>mpj*%Dfg<0fzzVF%^Segfzf>gJ@`RC6^kIQJk<7Al@yTWd|;hI?0 zDnw>mx5T=we5OgKlILpoc#Z{)tqmc10*^RRF-nq5Wu;qAEBPYz^n@}t9i>k4&elJD z`OTMt@E?Eo&BL94u9KY|k~n$x{F&VE-WsDA(-LIW|IlC~>f3%o-k&`oH|5pcT;ETSsK{{iL$6+f~mumeOO_AJ)3ncpDAZt%mGrXlLjR=<_neo__GnDqhu`S3@M9 za!SF=BlAhs1=ktU^SMpW`OB{Rtq09-8#lkRnrY*%IVECp_JeX&j6&Nqf0}$?) zq?~pUYz0DVe__@NvpuoNUd3bq?@h3pH@mITTs&|)>cOnOcC-_t6@S4^XoOYf%|@`< zX?RH11%Em}`6=C+sojp@v-U(r6)r>1EW`14>}h)5E2@9e-_Ee)y+$ z9!{0nm6w5_)nt#>03@8MJ7zu?jRN^j64a6V!wxmuY&gn`0W47VJ47;$8*Db0u07Gn zz__kN^(@QpMd+Po0$;}$tbBYF%6!HmR0mtSK}r!0T<2m;r*s=LOx`%wxS*e`VXn$|}TzeK0B zItNzNpfNvFj^<*cWzjqddBe3O&1RksKrF^AMp9RXmnG8qI}*%-Y_5O=^CR@_<0rEv ziG`L=Fk{fRrP0}5}6W2w{~cyc=-KC5=uK zL70X_b(c@Kg`Qy=`)8tqtv`JI+pSxkccUu|bwsx#JAxrGz_d- z$tsBC-)u%tnjH;p5=Okz1yJVLC2gvmn9R7rGC6x59Vc1o7rh#6Ps2A5)=U{SyJ7?_ z=MK7rA$uJpl%if`Yu{>oR#bMANvwLsmM5*z8ch^ZjWKM(D2p-+&R)X1sD@ioB%7T< zrCJqB>ieilY2RYGTF*&;&FUJucj{O2ba}h@V>jJ&VwL@OKi}y~b$t>wf0a;JEbYHG zviP?<@Y;C9>PBlf4B@RG05v|y`q5b4|N8qsa@XTa6zlqasz9uOazdfaS>$95RwsEb z@Dcf!LdX1e`JntW$3M}W41Y*R(u?W*WiQZ?Gjn!i;k)Jg8ngR~YU;gOm;{F14F^(} z>dpCSby=;+SwO!MlFh2l%tEEYx2_>X;G^bZ-(-20iiUXEdN`D{Q0@+rLX@Q^Z|U#b zeBl)v_V#R99aR#H%HE`k6<56HxJqcPP$!#X1N~$7);)HANYEaf20mC3ie#EMu^M?1 z`mMkNYER z#mf%D{+Rqq7(7hsXwnW@E>CV;JwH`~S}aD!iY4)0vDWTH!YrDv46(Rm4BPB$z=x7$ zyN^1S+s?U=PDo}_E4SK%x#<@sXCEDNnwabs5jUR(jT#cut}{xsEgW(#FtNI1>0Bvz zu`Hllf44=Z&fnd}Mj_)zC^X&s$Rm(|Q=UpkxW1y5H;npi=_D0aD^Gk~r8>TGwS=dNvvB+PD4Vt)qGkMr zuW{W7GH85GN)R`XRdOIg9^oL&brgrZ@_3E5QY$e`S_ehQJCz_QSti(eb%sMx<)t;R zuDSh?uHw!*b!kHpj&iLmS$3%n)iP5w5t{fN?w95#_hs?v(*~Aeh)Jy%g@A1(uEk z^*yKY|FZWc%yC}XndbLXvTeaBRS_})mfdcuv{b9TMc6KfWqW$2!h-+-kc5SWMgpQ} zC+4@$^PcVAZ%Gi;Qtp}Ph$=CWdDnB#J^MK~bmAPn1RBc6R54KR{O5 zIm`}0#b7X=KO$fqI)^_@Zs`tGlxeoAWGHzAb>e1FFF|f}W$QQSCpRtMzIh}%fyrvb!j-CwH)cV0Zjo^I00Bhdt0xB zak+|&>kLxpDY8k0>2Ym`PV;1GJ%m`TSb}cjE5&8*wG4RUZ$}b`6?yY@Cmt7F9|_Hf z3uX(#AzRKlH>#B;Q-NQ4Q`M6pEsZWrcluVQgMBA^6^H@6()`I86tMveO-4$wJ&v{5 zShou|Zu^dIs4IsKjz#d+zle|&|KkyxRgj2jpw@$}yO8Z^`G-Lq0d`shGuK`M&M3eH z<9aeri6p*&pLD6YxQbU_ML<7VP905%EreG5%&U11;3kwa4mcOJC6H`#aAV9A652^P z3YAefZ#!FF{N+%WvSskO7W^|qBA1GWCVdm6;jVG7dC{=dMfmyAWBEXCT&a5yMIL_| z;l1>ohNF%R|L4OzX;m6GvKR@W7*Rw~MMJ2p9}WVS8talg9s;>(JL?3;LlF3ZL+3n; zLmS_4njDfk@>;MHeceK2PEzEMfZx+bp{h%MT`T;%* zyR;LW)JuBzI4m!HIW`Ex%b`rR8r0v`ttB_oZ|X5dCowYX09qZd!t7VlXKu(Cg{?fO zAgw#<5O_bMi!2*hEn2}Y&R(i;9rh**zUE&6r0HDE>$0tVU9YHQZgozZIn7mO?c(PA z)$8$)QJDuF{cxE78WXy=`N~rj&Thm`gV@CehvJ=PeG2hiEJ==WrP+i7VU+~;B{f_z z+!N5-649Ys=15JsN9Bd&M5sO?e7B%qd_cZ}Fn1vGkB?BBHfN?CT|2Y-e+HavQuN;_SB zBhUoNaLmyXvq|Z_z992rhv);2<*Cj_FetJRyO^ZmpHZ|+O9{aviQa^$wVTnQY1I3T zxef{7*>DTauMphv{-RIM?^Rr(AAPUlwA->`zuML^6aOHIv6HbhVgmD_Sqm+Ne1 zPOm7{wHBo^myw7A^^(l^JE?azvJ%Su9w&4HQ7%9+eT-1-ydItt@5 z5fKl|It9x}PWoifX;W`y+SD9N)~q(!wK*UTE^F=9*X$b7B)7X&MEDp79;`)t=Q z6I!5gbe7y-{qyH7JK_844=Fw|S}5enMmDBppxi%`Kq@SLz?LtUW-XmDo@0Pls-Y#= zUX+bv^Kf^1_qV~tjh%U?vQMfsZkP|Nx{p#oxT>Ze48GVPQDNdpz#1_tQvv6Y6xKRb zV2hQnBk<{TwU1!wK__O(-hUYlp`bbDn{xmxVQ^Zm$*g;UuqFv!Ze?1ESedU^ho2f- zuD_r0x6N{NgwRGb{m}`rzN)Z$baq9u1`*jI`f!&zVNh{TWe*M})CpAtYeCagh3o7z z*SwuPSPG@HGF{wZTS4zP{<6Qh5FCnHsz?YvJ&Lx<3`Dr~mK1Bsou2^Fu<4d{Uja_% z-|H3^UgA_INrHmU03A%OLq= zDNQqQwGBNp(mgqgg>=ZRN96Wugxy4rWFb};qGnGMzh}(Q7`2V<(wyFzHpZGwQfQ$MW0Y^cj6Wz5WmDv=X`gHa8ig(z5=`Y|AI%QGP z?8U!;gAB0As&}Kl_&2MZT9&_yqFO|BP*ZErQXE3&P}84>lG8>g!&KZ({Wgkn=4kZY z0o143OSbqgpEI;4j*Nzp*wvtwvPK?nmwYt@W%p$8woxmnXuK#6bq3~iW8u* z%RWFyOKjC4Zm!>Ms0Go^L&$2nrkxC#1x!vcL!n4g(suZ!XcNnIgeFO)$Hbue+9RtK zYT!bVtT$Kt_P%%V5}gA&Ad((ZU;6|6OoE=<&xqcZ1)nWMN~{Wp&e(W!Rb_^%?)8XB zf{)Pkv3QsSYStbm-el&zF|KM<(i||L!F!zDJH3!!tK#Qs9O5U0pw~P+UL!@XYE!ZtyInhcV&Zx

Cj6f#^o)7xYK{l6IhJlvV}! zN-EO(_N_ca<&=229UqC1Yx(Z`lW;_xztXUOaSjKqGpN8E%zr%SoHdaW9SLxiXvF2> zA6;E2PI)nG)C$bhI{A(rS!gF>WFv5W4Tq5xT!#sJ4sPRc7-% zf}%GG;HabTag_Z_#P_van=Fd?29~ouiwMZO(WavI#oepdb>u(GdR(TCH5l)nw$1wllfaXg-tO=I|yDR zh<2qdfWW#Tu9VI?K&C8H(A8f4vhMrH>dKUq$ixQnVGes$Z)$1E^{Op{sYZe)aiJ7j z(hdV4*uTq>>=P}>Ii+5D?=e^z>9z665!G|iW z7`Jn|F`@R#o>q2-rSSbI0c4wKtQ&_KHhb|L%7uFzfTP4O*-s3Q%d^ z{fl0SrwA@w9h=5H$8C3~4Ehb3#^rGSXk=Muh2b*8Y&s*Gkk8Lvq<;g=FZ@VtbEahm zW@yj*&D@Jz_wwLR2Yn&(U=#zP`INZIe}{~C@RhO(f~*J3(jR5QAUvZ8M(;DSSs)h0 z6Rh))BlWj|D<|pIXpV$ZLt^{W2KmY*Oi8*`hO=+w8)Agojtzk1i74OBema5wp6j9e z`O^K%>95xmgr@HD_eK#l&^)$AyGgIW#>1PL|)QCT^=Jl75}noXJ3vXuuYB3 zb5viN>QLr^?kk!%Cd%|#&>db;BBO9$pVY%>u6GCcfhn73YTX`bqfwa+`iB?o}FP zB9*}4j%jjh)C>y4wX11-!{LJA&grG{PTpDXJzKY+9#(9T%t17ak_ZQ* z!a(zKMOGQ$mUqP;-Lnl1P&#1Q)0evgj_{NhIRv#G^@T#O( zz2pq+A}K%sJy<0K(k^O{#-cTC6{7wuQXf7N@iz;;#WtI=avf9I>wAirIF3BDaZ@e# z47^?8b(JPct<(>iSTyY;A6cCj7qU?uGC;vbFJ%ToBq{ayl0prBDcBcd87CxCwY~%- znE~0ub%>i(i><4xQQ|QkM{b_c^t5l|L1BT!?&a1;t7mL+bAS1F{}i+K6IF0bARd}V z-ZYtqsenO=kn2AE0$)hlo*}X<$8bW_Wx8fyl9osPY)kPqXGV*G_Y+K7PY&*fX$fsH z*D)4~-T$P}s%13`XacYZj^c>67TkmUpVBL6^`#ak7yoyXJBA z8mTgaWxKl(jMf?Bgt?Q{AW?kJ)Q~9F{j7{{jI6KhF@H@R26QgLenK z5mC#i(-uA*fu2l2B>J`uAQE>qcf*~Kf%6#8?G^QM?XS{m^HF?L&Q+kau1KYzb1`A;B-3*=VMG0XXeOnd~VtWkTlL| z`xX(+OK9PcMo35VWGB$PnXKb!sv4uel;<~=!(9i_HeaCbZhJyv5{@#arQGiJ@>)kj zoqBXoH_igdF~dM|N!OI4Iwtim?ZEsWo)w3i^6C}F&3F6ph)5Xu*?3#Jg=v2X^`PED z+~>!gu}$VCOCX)o z3#M)BQ!{I~5oBK&SDK2;QfdpuoZQ{Tj_oRq;W?XOiUA#57uFN4eW_;amPy2(Kk~D9 zT%WeV_x0j(+69q+IwtpvrLsitWIjNCRtiVYQXw2V!|rN&n*|8xjsI+=M^4mBmVwHW zj9a@l4T~6(H351ucKU0}*o!5R#Qo&UZUVl?>lm$pkrm>}3;Uj4NRZt~Nq)w6+c%F) z4+q>tB%R*E$m?kOZ;N(`6ANXyu_f);RNw_&^P%v(h4DD5RD&Om&wIG9$ymeskJgU- z*u*K`VnImZtFv08c2e<0cm*X!tv~1Ko7>ap&rh!LbaU~}j+AVJ*|2Td-ptWsG17=+ z#)TA_O|{*bDL)7y+UXhAhvebcH)v$oOkGyZgQ5Xj3~8YIGCw-I)F@uFq(fc0g6^DQ ztK(Cn_4*bV^4nI<5Q;!q3``!TGRMaG3t?KE{2k?jD|X65kS1HrkhA8c8?>(`W~^X6 zN0x?+YYIa)am;6*n%z6ol0T-_Ee9}3OqQf%rW$O&*s`Cm)&yZ-Q5Kasa!W439u$kt z@T#Mi6!ySRr{V0#46Ak!K0|^$XxZ1btg=bPwZ#3T_9wR9-p&{c8L6v@ew4H)diwBhQL*)gz)#h`nk8hl$D>02*MbA~-j)m4g1d~xtx z5wsOWR*Q(aWpkS@8Zt&TD$yrS$m%vWUz1t+`bOk&{@!3C@wpi`b5S=+l;G|P zN^6Qf9ueWpb7?29*X0iL90 z1a)o56d};^s5BM9u2Ia)L+V?O_0y3f4pT+h)7p%WURkq^DJ#iN7fZ>}(UWfZUM2GA zhzI2Wtnj`?2>qw4)o-ZBf81uMd@`%Iq8k$K@BNoo*ZU`Tr^ow3 zGotZ?fnnjti1q2vA#9p8_WKuRap3-Bh=TR#4!Pdk8g@H^qQFDJ1i3vWaDi4#^Zp=2Elx07 zC_`0R+4dYOn1l}q$u+V*j(ij45hBT#X2mbhZh8NB_4fJ>z$5=+SG`eWUO9iN2{VaE z7CqG7ZLA@(6p{)WeMxy5ITEExB}E&^Ao@{6$nt9B5m$l{2&pIjOfHbC7W#x$B~1Pm zKxJA+(=ga`p-^5IiShj_Rn6>SXIFHOb(BTHkQK{E=8u}g*jwtQG4>YbD#^k1G|=cJ z4u44kU80*jF;noAnNtZW@66q3Ze{+8sMHMWOeoz9)AFO;?m##Wm;u!2_pT{-!M99x$vf-H=xi=GmUV~^ zep2vrx+KGZB*cnmYb|DiID)O+Kq(BU5V1u_?PlbSnW6;f8N zg1X%hEoSjvmc_rYp?T+A<6L}4-Z{IJEhC@-7jYRX7Y;q~%6hzzi)`cC@w`zFuLESQ z0E(=i$>VICgeK{%)V^6K@niIay0uKf0~BAEG-35kFh2GQG25?$kI;$64&} zl5fOb?3?5<{#OZ!t~nt9ZKZj17st(9ekPKSij(Tl%opYc&7dZ!C>hGPcs{LqXe8Yj zj~UTeQ>#$g?AR`S5Z*y5l>$IZ+MzlBYyDr{DXmnawC@C^T*q|C$ujfjuD8a5Bset$ z3&pbhv8`?q!w(pT`qc5(XVKbjL~=|4q`}1Y1WITna>tSJhU8uR*jae(khO;QtFz9D zNCUmZjctJO{$Mf-R>${)ZpdMNZLf%aQ9jWEb2&%bH)H2VmE^4kEeceA=yBOz_{l+| z$+gowti`gu&OA~aG0>S+EqjXtIHsCy7kq3>$$=YzHD*?}2)lC#5iWNlNN;$D$|}#~ z=xsLUJ)W8Y7G9=WM|h;ai=p{Fs8 z2;efYF9B;{a-n2`W*P00h_rosqtfTdWSiCfA+afXXC_T44k6#rI z%2ShCyStuDK}sk(O+k0`$N2;-NJ!dqS%p6|S>t9yjuY}TrX=QDSMqWlsWTg*S<35^ z1J*cLUdDKKX|EWiZd8FJ%_R{-K=|Lh+g@T-W6>QetTm5Wo}?etxMpy%E+ND{O>k_> zdUh5>aC?FO32LNVt8xa_> zdesr4?3i+8(&Lj}SbmKaBt0XtDAu+RMl#Eg>^O^Nik>W_oNT}ToL6&aSE%%4SxwIN zRa#auFyUGo*4z_7-V~SC*@d2RYEXOAu9?u8YrhWD6kW-XJKA8urCn;OiQ1a;EHkXj zN`#!>zQgezi`>>%R*)5UEaAdJqFQ=JZRk3rnbOx9&#!oz!UGU%hx{{LtwrEDwgI+Y^%SF#r$bf+Yk*|Z{JuZm@wUrvCGzX? zFQI2K$eu>VDG%b)g#tzv;Y3XY$|40&6N{fDVvuS@mhv*ALv)P7!X1R99?$167P@ZLeOJAA4Fz=ydASMfUtjEHtcI{k@yr{_g*Q9JlsS?cQ zGwm8Tw`teoiW_aJYy%e$lzABqz!+SVxr2#)tkW{$v*4#+Z z3w$cbw7iYkC3J2O<=$APBQel(M)%$&+)-p8t_-~EeLH$Dq=32EaTH-WN4xh8pCImz zz-R2gFl^|**mW`dKos%VZ6Gi@T=>U}Lk#;MTkCbRiKPL3mc`IyRkSg$>iRx+`h z*+f+_uvk>LD_SzuVB~@@rGWvacWn=)HX1A%GL3V#?bv2!(vXjYeFGy|>uENYGNFnl z2QQc}4OuJ3@L8bz9X6(_H?OIJlN1Hd$gB*l&A6CizaUa7Q`9$La02atfdQmBXC!_@ z+DG_grpotZQhA#+Yv#|WBmWw7{Fh}nhoKrfd1#nb{iMs5MCc4MdZ4-KxP-Bh!p9yWZ{V0NA$s%IF zmQGibfDRAYFs;51CW7VRG*Xa!EIBhxztJJ&G7)wfN`n_U2IvX}Xa+JNm6Hff3_)+Ju7i{te!h$6F+k^~lY1gxuf5hmqU zf=`%rX#cue%krBE83X4&9UyPkVIC


m?3rro|7R^eTR@l?tR~xSeOPa|3+q0H=zOcm zA|K3h>IQ8GoyJBpB~1~*g!EAuOb_2SNMVa&LyFEUw=Ge5z8s%!oR^a-`RV!bt1DvW z$lkKE5&d7smuE-kFV~(Z^atit`vZ}XKX`k5a|MF^0SIeRSOeY<bHvhh7B6RUcWmm~vH1pq+x{L%=8l|QSv49vW3gK68wEmxcF&{8Mh@{BMe=E}IptV{=KFIqFi zOI^7xmAh<3JLlUQA$xT$`N#c|VaKlEA%j_qWL8ADDMb|@o`LN1`0#|nMsdkxD(r)> zkSk>a#+x&k{uCcaJmX(~^IMAamhB5hWVwMw`#^Xwk}&*FKmBw)n5a^!k`7`noLV-6 z?nxWt4PnKbAxh}fzFACTMbZb~4gGJr2r2lLOfc;rAzB#ss+Hp~Lm#qLav%%qy?7^) z#Uu@tyAm2L0(@HZ2$L20p%nWts+Gvl+boPsCIw+R#5(xv64QC#HxTzUhq==zrX`$? z=5I_tsdMtV$zzg#}vBc33Kdj z?$Suz&qmmmw7b0IOa1k}3r39_%;nusv*$Be7bHTRcwr*%E7#AaM-$ z2SYqT*&L&mxYkL-4V0Zh!x2xRQ;%3DoXE^P?1(pkUXLK$wmL1g^W)Zx&A-&L@uJTCv8%BOoMu`Lqd^xLz9Py+0vpsgqLIbDI-X73)v@z`|4V)sATmy1RxC&}R+dEaADh{to!( zSITu-?;5oSlFwj}GC5V&medS)thgOqkn0kEe(0(BA`IWJPk2}dyVEL4m_E2=`kBoz zVxy{h(5{X9JY|Bgjkq#cTgW4P(juDMSCjodNq%K4#xC+4l^kdVF}iuiQXtXJAg7;ZmI=cM~kjoyk9~ds8WZeoJo;!6A_osq3`I5 z4+U%5lu0Whn=03BL0Ki;jZCh+D+sd9pEDO1gv&S44HG5_Yz?|JfvFReCc12)C#p+R zrh`k1+dI@7;cP&+<}GqpWD*m2R326~q4t2ZjWAm#Vjr!1=q&P#a8j}zuj1rMA`Z!x zS{3@9?B-)$ULafUmBylcuF*VFBzxtw)2R?(;(dw~kt22VB}Qt+Wh@xb7HMNQCh$^D z=4lz!O>v7QA7M~KPbdCD#prs%+sJhd5MTM<;<-Q#&qA`m+YZ}`EH3P|kj8eag*17? z1Sgjpa}CLV@cb*CmpXp-z!Go^hO`9q09HSV<2e~Lv2zf~^ZV+u3QwnioA&}3CKF9s z);TvTk$JVG5_20(Ike0oQ_+%nl>afZIi<%}uyoxnN%S7C%LuNLEc^yU5+`)|O)L`B z!#}RwrK>#KHSNr71H^0Ezt{*E?2%(|QSIoW&Se%^mY&8{veAN=U9H4GbdvNsg4^-N9TrmKp+j0O=I zZ?!ZkJ#7#Q}?$q$yv>;~uui){xIj#wHFJ>!-+|s)(JDaZWeCS&&VLQA-<+-?0Xp zuT)&fwd$se(mLTTsJMFKiFT9hI}XorbCZj)KNa8K6L)YosswYhb~G;V{JDW(UOcwy6!5^f%Zqb(L7M0 zvpd3w!4a596)}IjkBZIWThQs^;wKm@o^bt9qruYL8=1l>OY1=S<5xLzt?b(U`n%&J zAZyjtIGZ4_X8srW^t08X81#q%8vToN$natuKoCi63LJo$I>M8!F^9!2A_fzSpOrRv zt3qo|KN8vft{aQ-5;}h2U8fd(wqm{SmtV#(NMWv6xHOvPTKFHt&H_4o;}~1?m_!O915Z6t z-DL8u&Elv-(_=IOSdlDI6&cbDw8UnK@2MV(%UB?QNj|dDn*nPyVThKny%4CRfJiRk zY}>N1J^LeJ7w4Bq+M;&{1GPJLt(9_hTBin?_;MiI?tcpyz(Lsu3Xfu6nxo8VUGP-M zN@}1(uuoZ7>spK8+6Zjb6CixNZvAyxlps&G{=4ftj1tg)=q{E$iQAwwcHbybxBdkW z33YQdw)BIixR}EW^sy_1Vm*OWoMBb=`F;WYW>wX}rDzHu^RRdVM^vF>b|sM{ zR;QJ2OKBdlmGzd_p{JT6UrJZxpVSulXp*nM$!8B(IYY`}%{wfMnVpD)ERe0U< z46)?OLuL}yBr56G7{SHn8ZVzkwJVx3wUgbf0M$u6CN?5OGssIT1vFb$QLtw>JywDM zFs`TW&iv&swHRjy*EfWd=!urV4Ku;6MU@Hif%cpr49V~rT-e7r`eEOfQS?RA8vwJ|+5ysoPI(cPyZVlTU|>=t)Y zqlZE%2efa*b>*rr^p4}z<`tpnS%7u2CaRb~&{;ix@F)VSN#fnga!n=l1==xbTG@;r zcu_NheRSz{a0ea~*&RJTzV_rD*1qJZs~UzOm)vY{@!VQKzqpQGpkng=W|0m0bv-gN zbg1uQ8{x&stLuKWY}6|Pc^Ah0nxJc1R#1;Ni?1~WXCY}_-(9cjiHXbBufym&)S&*t zmaeOv0T9IN?EGbYa=AG>0$Q6h%+_Z|k~6zV(Sh;ug}9WW*Q*yyRyRDqMu_9_LNDNx z;MfdF9XZDQa>97pNYF9~b2iX!J~b?$at>WPZpO*<6HV-SQ<*9ctE?T9_JV6Vj|deo z+GBL5kIY#Jxo$6k(JO9dqRv)2j-?;llQ1=PZqUY$Kuj52Z7cphU~ahfiQe7-Tq7Cf zK=+(A_oX!RCiMC3+3~j>vK1ae{8Ua4@_o(iv-7NY8V>6(+hb$2@V5^;_MYc&iD|bM z`D-#KM(3BlET>}a>{#-WrsTgOm5{{5!a7Mgy zJ8*UMA_#nVZVGA~zR!8}689l@C;9)R0{^RPI+qDTBZ=6%$s?%1dwW=GUE{D)BMG@P zOmO0Mfks~B_x14w;ow+T2&e{fWdbX1dUA2}?dzMPmv4AVHpQ<*t>>%FyUQ2X?e#Em z3M+tU-u2o!a0+0gC?no_a}EW_fE$7j@7}lpnU$8g;P21BKfBoeGp6IS@3((>MsO1K ziMM}scX6@(AJ-}|zWo}r8^dh><{Zt1-~QqF-CqhNjNxwoTCbr%l*E0vzqxw%SLe6e zuSqgT+||dwJ?G(+uQ@NNe%rr2+q~ZXhG+}ksBnf&e-k+G52!eJj3Bvrk~|!fhPk=h zMoJQfGy6ktE&3P;e|N6hYCRK4Fg9z#|J?qc+y4vey$&5g#6>X#ssdJ=zl-qV+_%@a zcq(bDif6{Fc3*3IEbIIQtc56+6YjR3#(YyIPGs>}jZpZO*B?piv4eKil)&lo3sDcfI5jY~ zYY{PGASqiR4Nsa80L{>@SV^?13(`@ix+^91O{e4{G8@NLW~QZaq-%D9h0Bw%_c{n0 zp|@Kr!+d7@cKyoym&qrl9TkbOv|(^vb4D^SW@`n}fm}2Ru!-in98A6 zSQmg5Uj`2Qyj)jzT&7bhrmz7>&% zzdOc@*zv#Lb}t67K?x2_LvqOGM1nw(P4J*rW+FDvn5^1#Dpqxk*Ftr?5sHChVH6pG^5zq07pCPLQjYDF-^c4y`$ zC|zn{S1gRX5WpRX7r!o#9Q?qz?c|bg6oli6b&>C66oKyH@cTPu+!i<9ra z7fxO11#e3?IQZ44l0f(_$zsPJl!7vO#X|@Yxb?_xM5KriaDV}NIaNtXKq}V@C`STE z6*Th_I}Rd_tw##uFbVO+3fHS$K<$Px_rn(;y3BB+=lqKPMg$G~vGpTP7oC_U3R?qL z_gw^N4LnQs;b9AJJF8xEo}|S&&6?_kS}4tQ)d0&^Kn(8EL(fx_7{F)X`>oWZ!Ob z(ip6K<>O8+Tj#bOKI(27<);;b%zyz-WP==e)<%QzeB|i+2rp5e=62tW>=V>(36eHI zx7tL|IZ$kQ9R^RZeLQ40V#B_;cvH;Xg{+3B#YyaZ+azIG)uOt&I`p#*#G1RWU!2{( zJ)`F4bl)R{`pB=;3=?CxQCnNr7-8EgC(BD5+6mljFr^GTM@-xq?KTi@wpDPbc^sw^ zwU%0GJ7V%k7Z7(xI%TGm^t|n~f@UZX`MZDGnmb9wbe9|=!fy~dcTn>oczOg@nFcf) zPnxO(Aj_4p^I)AW1u*hr&52nL8m9Fw;sBsBF;?GL(MadWJoQ^>yDc@bWdO1Y!-U!D zWDS@tm{-#f;3X3EI{{L`)=|d3!jnm+8n&C{B>k3`1~tbKd$XMoeTwqk@N!t|Yo2?1 zV7!V^ZH6WnesQTbZO#qT=azV$_1~HTuHjP(&VN6!D%@f*y;k+mfEJ}0Q?^ERYnF&o zQ*wT;Kk>5oJt@$nPna+;Kcx7VtuPeIdWMt=LCVs`r3I9@{`u!?X2VA6#dnZJ^Qm_x z=+~B%YY2%SIn`HIdR5MxDuJ!;_oGqv@G77bNNU&JxyCgvSdO-p4zT|8|5J7~TV+G# zQu$KmJi`^nK1}<#R+MTPBRaG#2`|{*g_k1d2+DMt0vb|YW*Ji{`pAb;y908C|US2 zA?NuZ>Y)<9Ci?T=v0yrnF&fa8ZTkpVw&m0aE%b#)O-piY_2AoAA=fxxvaouy#q`MK z^3v9%lT?eMecAS^@Qs>z-ellE3&7~&K}GiHDzAd_Zxamoqa#GnEU$)!XN6s0)dxG; zP#(Ux`)_A(-*-L6m}Qd@Ax&-!>agNrE}*RiSAMqAw$RSpbpxL;@aw-=9pB*>r9t97 zC*yK(B50H5@<4CQh`sd+l(2$o=`qrP?17ff&O5#F{@}^8Y`=Zp6(2FZ{%qr`S+!#L zfAZu{N9`JoDm>V%KHmG2)pv;YfBFe+?V^F&5#51RJ(4s3q=92L&7VH`ucK)?k$Kj3 zx}?sKI$B5*M0W^roBLml?sK38DGRx8;XQ_ER5Zwo2{DzGu1RHis6lEZ{RMIq z7oCe&L!E}0b2I+kKXrnIfZbWCtRU zJs1&7d2ft--5DNv@z-B;!|3g?0cPfHB;?cwvr>BO6CwMr^cDQ`0ba5dR7;2ShD4*$AYi!Z_B@ zH}7ZCU#``DXBn3b)A3jnO&f#NH|O7{WN8g&vOQQ@qHAQKJUmiL`ybhOAKA-Sq6JNG zHuFLvQ;eAxVPWir?Fhn()k!gFHhf*I$Bh?-xnj6Sb`BNd;eEET-rPA|7C{Qnc>U;Q zPMvz`_rjO%we)TO^g4)B5M$t%C=Ghyvy%dIiojUZP`y#>Y_+2Fy09Kcw=`|u)bPE$ zSii1hWF=?79&Ix56qeR*;LwC>QMN4F(&T zee2cdH;SK)^aiM<0(~v@i+bt&>J((nbLYDe9y2zv7@^PTlo!^Nq0fGiKaRJ2U3{78 zZbWzmoW;nm;nyXhd2p7X@!J>w`gLqVyts@G1&vq`Kph#S2HVKP%`WEy#IEl(@i7C=XXKut02+bk}>&x;>yZkG)5=sj*aI;?S_R5C%_my+T=bEHKG2 z!q8t_pT6U)zd63;C9J6Rd808S*v{lAN|_z~1*x610cX}nzoEX+<=X8LCN~-28XNam zOtsCp%sRHX9^j8J?TO?1w`cF(VtbNo;WLA8G*!%}(IGb*FyI8xE7y$^DROk(AJ*Cl zJmyf@aSv_vx#h%GkR#RFUUt|JQe-sCPOEL%77M8fMF7MCK`9@3=_{{3BoyM%3UD9KSMg^%Rw0Add=Y`- zwobKxB`<=cR%?wG$6AKk!U{V$N#N&H0;o{v!1nFtH6>-Xn#cZQ<~Z;bO;pq?2*Oaz z1zsp#*f`ueXGda0*5}%b2Qx6rpnr9bftCTV{VoE(SM4WYrWn8=+ zlr)Mi)*wzFVkF~cvnq@!re$u1Wdlsc3BwP*8r9tde9#Qv0!)a{Df0z;iuB{sh^^Y& zaCari^0X+ic#Tj32&ZEj4te6PNiiO1czBhXS8$kpqlSpJ7JQp~$f08}hm86-A~CAUqPymIHIc6~deRkf+#2NqjDL9?;M ze0!_ph|^&Kf|Z0G_T^ykAbH4Q6BEeN5V_|!Z_cRS%BDJi4{LJR z&g4iV@9K)so>>MNS4Ptitm6om6WBeBNRuWg;}rOS5kITt292>YVE0d_f?ot|CypjT zFIQ*6qa9ogtP5ZivW1z!IhExYdv0f^jxdnhw_RN0 zW|(yt9r}!(%-b7jHWVS%B5}d&YfXK2S6nZeJ5JFYsR=t|TdwKHL%~igXWpo6?*^%L ziuT5N?LQ*My@;R({3x)juixs!(XBq})4`v%&(99+Gfc%}&R6DY`^9>`>5e5iA%mWK zB~Ln!Y#pf&%r?=s7_8~ZxZ=m&!IwiFYin$Q{nD3b1167*s2|&%YT8T2ve-B?#{A=x z)jwLL`&s{*Y=i;ui<(?k_?zx^AV92S(xZ`!bm5XF0qIb;jRYuTLhf?tEZE3NtEqCm zX5BCnFOBDYvkoK0AqOW$=>0uEaC;5)ssC9ejhq2@1ZY4m@G>N;nqUm${!$M~!i+Y^ zgtX1X0!YLBP0{vVUV@Q;DIn*s1!H1ujx}a zRzL`E{m0p{iH7i0N||N>8?W7Qc14k>YkSmoEBh~`=5jyGF`fc+|MZiR$8)cH2ZOTM z3op8xjQcPMVj_P&TWHrO#2cLcx9dT{43`p7YXb@sVm=W(-^cTs`{|R_p%rO;z@Y;7V8ao89QDCn8_;dFY>8j*b~}|mE!|x^uJRNM;Lr{j8{7mCc;IsGaON0e0 z$z@j(YSvHO7B4cYV&U%NL_x9o3-+FYG`^&EemNf)B7%&aIAF{aS)jS3>;Th%+IUm3 z9uW|LktpyCR|g z36Awm${_8h`X)b&+S`Cs-H)6|Bh)z(}E z&!L$4ez{dZev?qz%}WF?;#}ekFm)n6P2LMAkFGBVR`4!bKn=Wak=hj>v3i3yH@WKB zO;V9OHxO#05Fe)=5HNIN!x>0b9yV?5s*v2QI3#zjy(2^Pg_6JAil-Mph*=agqNbT+ zULjL*&CtdYFdktvj2buon&LGhSFpN5AEgXrY)v=6L{VK4+mYNc?#kYyFr%Jj<7UNj znq^VPUI64Bo3oqO$8R=O43XMivh+ZUTfy_KpAPWJi$PtNFFU}-`faBvAGEgFFin8A z7I6+{GGrPDqP9>AqXgNjT}5^km)t6*Jk;Po&FvkEu)$>SP=;e;!0wHiE>Gv( z*_N!6PJKT9 z!2^}k{6`FNMSUd^CGOj`(xW0}e4Q9FqB9O=P6Nr|f|w4kP=y z?L2x#mP(kqTcf+HcCQfV9yiC#?A-2Oe=RpdlDxFrNA%gjDmKCC36b2}Fy(*nf3sI+ zC5M)Cf4X_IUj62qKYy|!5mEP4UYGq#zJ8#`>gI4qr<=oF-7YuBOMUoy&{Ia=A^ydN zW%O}Hgc>19P?SNq+ML;A4;DiVscfRo*=C5KY@Kr6u_DbTYd>np808ewn!VxFnVXX8 zk@0^n&Pbps`tR~i#pZNE&qG5k`%;^P>7#b^7F+L?{4y4_Xx<{;UpTv%_DH;GqQ`b= zX;@6Vg_<1Xj3>lO9Ym;i@ytWKEU|ZpfSIA^{Se|>@9q4n`)9-59k=B%(6dLh&#`B5 zL$k)v1HjIkQmWOcAY{a7cB@1PJ9|Zz!P&;|7HS6`PL&A(IGK$_M1sh-5#c}w$cs6d-;xibVt=ydgaI_wJ~yqq&a?h%T^&uFa(lLo3LUd9i5?i*J1K@!*_Jt) z6E>I7hYt}nM_0e^9Js|Ijr+Chw%YsEXY46A+kQxu<8Zb2tbVq_)LZ|rMbDIRADcA*r8+{;`(#y{cnKv3# zl9G#WeDmjjCHJjb>1`Y&r>a$G;YVeWv&Do_#8z$*6Xq3@jA4)=GDq+c2KC0fLT)Ec z*`}m7>|*v(4lglw6~>)&b4@7XVuA8vWv?lY2z|lue)|Evf;8K$jn9|IZ}Kgmep17Q zdb`c=C@+Qn(92I73|op_w>9LA&}&nAUuS6}V0lbxvY5r$k2^2*;y&v30m~7af%Nzb zPmn#Tqmr`#PFYyrS6o0XLC`mFdK?h5Pi`g+L?rxq_CNGq!*XYL5Z4n#dssWExT%AP z4UZz$YM(tsCkHr%1Y9;v0JD{cD)8O$xz+dd1@FJ~A@-H)_PI2o6l7$W(%+CPU3q@i zi`jaDNwD$+hvF+3m~N5LW=7@@}ja)(k9@!%D1 z9>~#C7`>j3Q34unsS+KlC?u@Q^oJ<5Dx0_7&b~r&Sl=Hum;83NdM#$x4(vCOTbY`E zQ5v?;?fz-Ei$u3Xb%z*v@pkU^FB%DGz!&}jK#Vqsz;s%h`%i&=)Kd_xb{Sl>$}tW7 zXz4JX6Z<7U<=OFf1R2T_>}mN@9-*Da8l%=S%|c;3y4)P`NR0_hYq4|-R{G{0B}gt; z|GfImfB);IE~O2@qNLCd(qgE z&eE#69YhY|mM=4!8Fd^=B%WCb%FJ#i*|RmayJ7@CQdQXZPn0wCGSqYj0mCaG}Z_Os>xX`pwcUU zSVByy)}!p0tv5J2A;d#Zkt;F4f=#V>qb)jI&r)10F(&OUCDn13KmF9*X;krS*Z$~VZA-9#as^o_GqEWU0;I)P*kmSk|p?mv9& ztn#*hdmXEh#VUwt4hImQ;ja3TkmqHca(n=h5&X>_Y*-ltHiEH7P?W)%#4c3<)@{F+wXn3fhz3VJl3pvOPBwed^ z1G`Yy#FEXpVWli}O~b#IO5UVLOxm?CN8AS2p+YAVfb?pGl_@7RsY|yQq_Ko;em29Y zDl1#{rFMv2^<~w<9`vNy6%V(GWQDlD`n5>@`PIwos^jc$4GSGwSwD_VPx7K?UVnS{ z#&i#}MaTXkNutdx8eY%T2w?({u1}>Hyng;%`pL;Lm?K{*odl8y;mHG*flLN53-a8O zZY07M~MbNv4Af+&aR3*^+_ z;O|f0CONN2I)P8=ZWY)dB8aKn_o?9ot!RDx;{53Lm|QRJMymV_gSQ30EK+1Akqw@! zxQ2)nu{5c%R4dmeOV1q4a4QnxD$%LxzGroJB$l9EtjtFi6R`nL-*LO)8_Ks%;-8J9 zEmz2&AI#rRWi-2#=94XRSj*bB%T@~|;v)$s5~;3Z+&dQxB*-tlDhx5shW>yP(Vygd z-EgbJG+}hPxdjeTJI#ro)^;>^b6FGo4#0!RG&c=r)d9RYWBg0DcJ&SpNv)|MG65)_ zNy9Z;k^r$Lx3}jb){)eS^{EaXC+xODPnqqROtoZvON}SQRS{h3?ooIm%BR{H6YQzR z$MO%8F}U8^&wBq0^7T}#gO{*$zMn$3A2~!G3#W$n=iTc5Fcy~jT{vX=rS14>vbg1u zgOOo|6fi*nGp6@vC$k6_Y|IReuUVmZrLbyqo4a$*z6uET3;b%=$Z>=jhmgS#!sDkJ zbhfj_FQ*$N-67CQR0G%V0z$>Zq$vopN5>KE)|bNyjwjEXiJ|*Lv7B@h{bU_bC5$$*?YLHO!vcf zkQrNqp%L7I2u_4;?n*O2PPQ0Gu>gp)$jBMA)htXhK<5a0L_(ACblK@=N^=B&5Ol+L z^T&vLLjOUy^Udq(R)UWaE)trkdZXZ65)<#ts|4;5%JsRJA^NMs@9>lqKL=cNGThO)ATsr>($0W!iCu)KZl>6EBG7EXlmFfiy}1 zW7iLDQ;NEpr=42>WP+^t4ZQMW1m3P6up0CSYr z*KS`+PO#`z4#n31RJemhIhzpkqeqyo6FMuP`I~$JyWNo4)hy+nL@AMOqfJ30oB837 zBM_~u0H&h!ZIkC`OIN_teo4*Byb%Z>8K9e_aPL6GSMC`7s28EpSD~J%?241eTuAS4 z-o1CU#q8zLAYp(V&sW?e1A6){QDeGGhaUZEF~|a7221CPRpZaSW~QB*?aqUsWwVO( zB#2iSRqvl6vc~J+c`p2Fb)~oG8zS(J<*hd(C~kFAbX(<7=xuh2o7%yu3>XGP%=R-( zUHvinzb}Y0i(=g6Q6O;SkJ$(l=7Z)?X#z6>4dFI(w1@Ze(3^RRs?M{y@rMn?>))x| z3kzTFTA;NsA&ESl1s=a#DlAb(v5)j6K}U6GMy|nxNh0YsF$=m?K-lwwa^n>h!i#UX zl}{_?f=KWl*#H}g-!z0hw0oMpbi zW$w-~kI}Mfg?>!xczj#cw~cK>v$|Y|ArC%jmpDJ&1tctegf`!uDa$ ziR=+>{KJP~K8dUpj(f7Qs2qY4*3lX+1ycpQO9NZWo>c?P62Fvh1Xj7 zA$UYd)PP9|pZu$=zwo%m>!ZJ`p-LDCLlhS>;onR>om9%c^cbqCvng6)Dktn!+8;WO zz2cjM*W`PXX6cvBxv`1J`7a1KlU@%5o5##`Xl(txLv_=K)KwQu=b!!Cd+~XrTH*No zrlkam`q)J(w3qvmU>B<;ntn9XYS|4}Gp%L`WZlP%?Y@d^X^SB>C$rhnYgR*alTSJN zR5z557Fm;2nxK9$alu60n_03G#d#c*=$tdK#x-q~Uj{vO7h(fLYr>j#v@%6DLFMw~ z!%;P%1}lVr_PDPhk=uHWyym#Q`^J;J(mj&@isgL$7yEIeaFrB&8wFVhHfk>zbj)}R zow(XqWfa0*Q4q?t1ma7`tAPfjW@V8ka0@}UAou;#lBA4gi@Zcgn91)13(Wg>;+HL4 zosbj$?wfCLD`4E?NWcDz_*|o`|Lz+HhoV7gXU{vEq((GvBoa975H$M;^PQV_lCe2T za6?9p_>RpIMv@=kC^a8eGm;lDk6BoA)+7T-;vR{9wz?PQUW|C6euZWQ>Bb3fIz(gF zNTT=C5!^h+U1|max;G}V`j~tE0u7Hh-JMEmbvnj)U^WKad@|Hgi_kDg?YH9!k=$zH zGOZY>N6v&G%#b&)nb=D2-5(5XDBoM@TOFLf%Jy*6jN!s5C8CbT@D_t+&t^A|SngU6 z4p}K?1UQA>L)L^Fj6e9>p)YREPhXvVe17^#D}E>I-&=3)7h0kn9ue$CoSkES|7bM;W3`Z?8uehZ)*9T+OImt@jq|Bp=zcmJP|N*| z3gh3owKC}htCQH% zl}fAtoz6ER4W~otDSlp33>>y`5zC0_B++tP%v+`t8jB;UEK9p}mDN z^>YV3a~Z%F-uW5!aDyEkeaix`U-n_-i`XXi9K!{;`p))M|GYz%kSHw~h62;9UG7w1E62OM8hRJ6Y{w8|gRr~hD4r2fjEhc)? z!udsR4#7DJQb<gEBk;cqp4mM7=tWHa}qk7-8hp@OsbSR ziUxTa6x0)HX5wh(B$z?n7mk1v>okw8oo->{W>^sB9vrQhz~X2zC1rJjrT4t48Fsq) zfzt_if-Q+ByR(E{2_00cGu_MO{z8n+XMTI!=7+WZ=@85WprszNacXUWLn<&T!zbZH z*zs8uB>RHEneWb8M_Zl|_!^=lLf{(*D)#BXY-jcdN zNfO_RzjNw&utrr2q=_gdkfjo?gn0Swf7@kn#qFDq%*C(K*inyOCI?%U4n%4zI(_a> z3qCYKqT~XZZ#1TIwj_85^INub^jv$AQy3N!``{pg4CjG0(qjaF=eP$Y%y@l4^WrGa3dFiS6R-IGD6+RJ}9aCfR@YyIqyurd$CY`RlTXkj+ zxTuc>T$1TlAjN#lRCGJLa@+1|{fqS{^FBJ;umCPYQbxasa<;Y1hiJUKc@~1|@)XhW z)hn!uy!dnS?HQUO5{QgGUf!@7Kn0&>%XsRL6pe395&)K%2Xp**?{KmoMz5@gPC*e1 zbpmOp09P5E#sj_TruB^XaovwEL`B|ti5vM-pfr<3Q0gs-fJ(k?U!J{XTa&d(Vhrkj zu=?1z^BzUtihAumqdrL}xXJXreaDSLooXhZ5;8~^t5klIc~yX}#yfkv)yh(;EpXs7 zXbWI9zARQ^i}$+#ThQ9=o9ZZjBn_^!669p!l;(AZ z<=rA%nw0dOiA&vi^V3U|1l0MLX23L z`e}W==Y(at!ee9)K~u^sZPq!>+U)+sfyx=f594}ELpIY-a}z`Rxfbh2+b8Z#H|Js_ zZlmR|k5B&Uj93Y?x=aG7!S?-nH$N+q|8LE+%c|q2QQ%SKH)N%>qmHODseRRo~xGl{tJf_BSO8-stb9j})v(?R#~{V}W6ccFo>u z^M#^~oeaL-3s@K?P{#pk7qjp5W3AO(o$9RER9=d()aU)tkDa%+_&|Z(OhOar*N?z; zbO7!l0kG@y&Cx0L;uFjZx5n}S;d=Nj3;Nr?4@U~6UOhA}5)@)(R`g)pkZNfLKnvB< zU6^dfy42E%%xDF}*0^yAZF_U?$haVwf>LqR-doF|7`U=_xYS-O5|~ zPt}Xg-m~x6{R$9BAJ)*!lEY3qjYj^S^^7{zuLiER-qBKX#$Wz2CW$Y8`OBPDpK2)E z9c@QnBe%Y-K3ky_EMsr#PyLxNwkK zz4oY+(`T(UKY!0w>oZHP&!a0;z1(ro!Z=RBpeG+CQt6=$9W3CTEFWU|6_NP`GAh^N-5MrMT~8+ zmN4Ub%3@^*5!=o4p9^IY8Fyn1023w{5`5=K@Q5q=Gop~2tFfex*_<=661?`F@<5)! zR^k&PvB^E5{8RNL1f!kYu*1Li(a)rvNjH;bip1*@c)mKbqGCvSBYmr&K|)YBX^+MxvJLNrtE4VcBY+hLnNmj9pX@r-6zr= z&FgDIOha|_1EEps&f#k;WjI|By2GAKVa}9rMtnM?MrL6-A!z$to#&i2zs1>8=@I?X zCC%ztJ;~%!aNj$k3Cx%0M4gIsULM0%Q`BIrBtI^!;De^>9~N@TNxJ)W@OO4hpKSW| z8Yey}K_{=lVFl&#JiA+Qa0a`7h`XmERDt4l3+J9CyP}zHAL<(lhLJdB7u9tfm}PTY zwYK%(D`$dJf#>1NOo_%hZC1x3GOF6>kt{#Qh=)4Qr;v|3#EDw0g+v)-&4dRYk=@Aa zhjF?_$g8n?Jf}&lpyiET8&e=3kws{b3K9*&U)o)4o;|AC`@^U4gbu%oPkBI#c9oy+!R7E{IQF|04NPlW7P`j)GPl=&a2o_E_b7qJjFslK z9@1IvR-}>QmrD!F)!MA6ee6$StAcQK0XXj;T}~+PKIcRXV5Fkm6Gzdjq_dS(_-I6_ z)J3Z8Wz_HgTGe>oyRp#jjYoOTpJA?**?@SR-DaivlV(GSr<^cGarh6z&96ZiRG4d& zxzPQ1kVhT0*xj7MwI~9LdAMM09Uq>!znNyyG#ez`gcbYX%~m7!Fa#35e@pD5?lm!I z`fMKET%CzHy4ph5QxE?JPHLA7U_wJJ#nim|h=E4ah)1ScU3!OoZHlkV`)4EvRcGt~ z(?fQ$dSUEyD_B{*h*Py6X67({fZ(t^S-KX?NWfoU)%e})M z!Nu@l`U%^rg&4|MrkZl+m{o6v?9!0H3i>JsPgFk=NRzr(^VRy7&gJ&`C+$_a;(D@Z z!^1`=%89@F_BjDWyg-yot?9;*eEq(B_id~gWb1=2jJ4Tv<-AwzuVY#!i8#o#AYZ-@ zrpm1b7Gv|gznF03L#VXR@o-*J)I+xB_AR%HoLXI0MZ|KDLbcWqGIvd0lKk}o=w|r$ zvzu$;y{}$T>eG#gJFJ7c-Q(kLpyzws_1!A?i8|HV5UCa1z;3Jg9w!bGq4CDuX> zGm{F!dKwClFW3BL8_ixh?sY&fV~GISs&P86Fu#EdLpNuyWH;4|mqdMGHzPgMt0vaU ziU|i-Y@*Z9>|dVAIwo@^howwc=_{G2q7-WLmn1zB&_N21h{)U?4r#E}OKvdISa4P7 z+Y7w|(jd@lJ}24n+Qa_h4jb=&xLcm`DvIqOkQ-4l&q~9r$lxqO37-Aww(bOO-*L0e+dXshxe9u>I? z*3G;dO1{74LC6eri9`X!&VoXcGf#I1=!6Qyqb9n*K4@;RqKEWon*a}$sgXz;?>#dj z<%;h?xYWYCsF&8I?OqH=>a7|5C1XbNvL6z+2wCor8}xj3e<#6$b|jy+m&P`2irLxa zUIgR?0EZvxKm@e5)>)h&f+;H-h*i!hqd-Z2h2sfjdNG{{V^v~tWe*FLAF8FdAF)gK zLrHVj4@N+#M)GF(}S6ULU=9JO6?F42HX%|8Vl^=K5}7f;ZF+x$K%7oz$W{ zgSz|0go`?DLY<`(g$!3EPb9n%t^XY~KS}AbnpI$)Vo>LYZd5>00_6MZ9P(Fw9jOE{ zKeaP1NaeYxvv^{D9FAa;WDhop%JG8>CH`#dPc=|;9NP=%d)>WFB(W=(=*Xpa=V4+# z=$S5`fNAeif<&J9bE$*mU+iT$$GWl1pwoxQjzUOb~Ws{ z(x1Nm!e(n04V$4U(}*vjdFQrWM$oc&=n}&Xrk!Hev0k~9P{-ni;$NZNvFyFoguYV18xUSHOy`7?%**k@lsv`h{9|!3ckq`8FED> z2bvjgd=ZIcOCd4R@9Q?OdI`fLA(fi#L+g?9p;46axC>p0dJOZMrsYiVpDj>UO%MiEHj7#4<$RMAaPtU#Xp zmdkebrW;qYtJ2XOf`yqYC{1qKIioK$4M^##*E+gIhB2f@NWk`?DTVOiQL)^v$rBWe!I9mm8YDkH@&vh(5_{na$|_d2xPQptxAXUlEDQ@P1KpA&-%)TdcK*6#? zl*=-^Ckpxm-hcq4QgC)+V3(_p*Q}XP42|Rly?+!r$ZtLwc3e(6n6No9(4~5egx}qB z%RHBWrQlf}*)(k!;)|yb^&4_drbL0R6A$cqfA?ZU1kMAB8uUcye!5q%r!BlTgE8pRf_@68+eGMy(TpErX6TvGhEu85F;_0 zRa+WV(_B$aVUXbAXBd94=+jo1N`x*+@9oUhwzLV!Xjf;iK#cFOA44R~zDdzTkn20$ zqXe!Ikj7?4L6O|Xz^S>QgtSw8p`ESZgH8JbnJ#9aBWSBgA;=o+q+8bMsg%s#u*MN~ zAZSe+n|5;4(qaIbN35ALVaGf&UCGiova>bAyufK_>kiK{;&l5qvl#{NRY?o8;Ax4c zjruNFMRify=_M7`b3X2fAjv(y1T0pDax5brwC6+OVQ5Q%FwVNibCPZWNDaLmQ_uT$ z#s(K7yu0#rO;u#tTwf3$l#)!Te4XN{6&rZ585WX2dD8D{i@UapZ4?LaSl|9EAKfP; z+%n5cDUs?+Q55hBzq@R3=u3L-sogGvs4^1 zgv~J_&s-Br+MGcdY{E`((PT#0Aogw1x~x;1M*(rDve#+efLC_cn9gQwJG-LD)%BJ6 z0U#3c6tXte>zMs~suFA%F{lA#^;$d#%4vT$a?Hm9-r#&}$J?Cwx{E`Z~VcZ0V1s5z7;m+r;qglc3F`6vm=W z&gj3eHmmysM~*Lx%8a)Vmi7))T~2TNc!C7Awe8N^f}=^3CKzhFi4Jm72|;UfPA%B& zLHq*kDUQXFO5n=KjKCiIR%|A3u1{=?5oU}e!r@PBh1pd<+#YYfZJo_J)ySNew zWA31*z4LJVp$#ScmHx6YLZNZNEKosGMh~w5O5;>z3m5+M-hxyDObjUPeD7&NT2hGH zNA3>>)+7U3Pz&3%sahdKb01I6D5+&e9h09(1JqY$82S`Gg{1f{8!Q-_1B|u0 z2J^TIT#hlhHmlR*D!h+MIGuUc-Oogp!Z084%k2*zN~pm9oH!>zNjz~zQ5?-L)sLAi zc=3;4wo%4d=8str+y6ndHFiMIPKYK&oV150!iRK~p*_@c9~=di^3}Cn-ocTZ#X&Fk>pQ+#+8P>t@)8E{6b@9x^v( z3Z)uXZ{)lr<>%xErl<(B`A$iiA%KD5K%o_xbUK6cwKS*JswR^{XZ%5I zpc}@Kd1yUy+|dDsm)Mj+jt{ZGeO4N5=y_EGi(Q%pqEX^{9{|v%M{c;Z1JPzQ{Jy;9 z1UzLt@^>R^>HNT#c`iD5X|mK%m|E)fFnP^L;R( zBS0footWQw)NBbI%Sz0R$`@LO-eYrVci82V;VrohPO)+ViGX`WKm;|GC`C(%+Bhv3 z_xAec+l&iqmocx{t24G_{$jmfEevfCWW>4|q1!oQ$x3?V)j6inW}*psfi1#lX)DDY zS|rbknz`m8Pk?vmXIH89$rVjgv2ANII%Pdq4h6Gpj_vEWGV4m9X$Ay# z-?TqiQo+6MfZyaOf{8LcerVf_Cn-8e3yXAKyS-(FX{Gnx9=|i%&}n^5?w3<*9PstN z+cOUJ;;64Kui*nSrFF=xO`2{y(tScD^T&1kNMGGS7ttd(C$G8eSpDP(8B*rqTym!f zhI!;kkasj&ph`JjU^6OHdO)n4ByUKBfe4eItjK8V^Dfv1(xPED-G;`zTXx2Kl8O*> z<81}{@STsyl!@#n(g&gz#>!rJcl9k2ry{W_rDGa{B9d|;iB&3Z^deH=MMX!v^i7Pcw_PFTO%c8$4IF+Y)x(RGPMpu4waJW5Sy9UFS*5bxCv8fKbJWHLf90m!Wu zZ|2z-*QZ=J^eYS8LkpE;QQe06G~QfC3m`lTSweYo>&@A}9i41$MVVwVr#2xUl>v5q z@^5h63;n=H1uiDVQet(>8X8;G+Iv)wJtl)gQ^pz!PMjFVI70sF>`CErF;iQ*blVsW zz0VDVGYbc+tTx7{Ni%%&Zt8Bc;|7;r0hdT_Qswp zu*9|&@^3G#OA{9&0kFJOOmTN>r8+=RLBreESoVse>Kv-snW7x>gp6P`_ZL*}-ry64u9>5z5>+|Rn~KI=oq6W6BDknFZJX`cV(!V{jFU+BYcRqh1+)}kiuX>96jV`U;p`q{_{ur&sP8W zzwJ-|+rAiQmI0@{V{Im}fV5mEB=TAjY==m5{gQLR!c`dOF|fJWYD`?dJI`R@hy!kk zyu0FJQBco|ar{Ys>m ziCQTmzk;C(Uo0hvwc6`!qEgGlsHpbwn@-?VGuGshF>Apamb&g1kP{Jf&jnWE(R7yT zuCdme` z#8L1GBM1FB>+t0ck&2SCDRBwfhF0Z_Mb99)c-eC4eAOj=DGTA2XBjUxcV@r@F(RkZG8ou_00ye$dq!?AsI_r9g6*2>uEs_i%w1c2;CS^dum7Fl z0*fGPJi7KeWWp^mF~Q|2LKI_bn@~kIP^Pg^2M#L?`afFXP=(c+tk?pA=_E4z_1_F^ zL=uzHe;?l8P72?x{V`})M@Rlc`ZP_f2%>@u@$#HUjHoP+IPgmGM`k=D^ojVGcApP0 z>_qJ}v+QKOw-%6?$n3_E!or}_&NvgFtL^XXg#$3t!8F=dn1JG^)ufy!M_w>`%>KlR*uWta^^VPwV z!za(4{OZY{JvsQ(PYWe|1w~ytv2(Q#Hy8R)w+WsE56ByhXbtz^T=h;ws4q#60%JG4 z5D+|6iMl{1FG`0dldi(&3H94%FPD@%{sG~bJ$DCa5&TE!TM7jzNKiG=hAIY_StW)l zRulGyQEAF3UG;j^E;N1Q97%-xU>tml52PC^n3LE?KNx3n-Apt z?kct*a%4{~j=p_;bM*2J2f6wuVlM(KEE=Srj(ze79i!z_d`t|p~F#!R@XnC zMr8OwOicQn zyE9@{Z_!V<-_Xt{H{6LpJ5MUdZpmmY*{hg-iPbOVj(8$HT1`8Kf9MwlRE4%6I; z9a(4yo!HI^=xR?A0tFV!`F_ov1j$U*)a`_xVxNdtB9s(aa*KGaqGaS5OBHki4#l8CDlpS%>#!v02X4N(0Cz0r@apVs z!$r=(!v%cthORqt0bRf!fjyE682Eg8B*m0WZi*daJ_A)O2qDxvVl<@6+4+GRDPMf( zKB%HAkGs&$9U~#Gk_&xHWM|p-PR?(X>`}I3&P0ld1&y6zbCkH5x3y(_kisPoLXcH| z&YY9FMx#^+7Xq$=e8KZ(mIRwd{YAN;aYf8b9aYgXIVdFA+2|m>>oY}X;?+F`vO5Q# z{BC5Wk4yJeTDY8BQKLnmV;EyIRwX6QZe?T5)r2BksOb^k(NK;MP!vQ=qAbCyYh3f@ zeuJ`UxwAb2CX)cVHV{aSmL;70cONwC8{fXgyqmUTgS8DZZX_d7HR{&)Vdo)f{pi-; zama(w>aa1O8wgMO7V}dbY6&_Wy{{y35f;KA?>VS(;}n!y>@9Igyj+GIMgQ0zqA~tA zj}91DKupwY3wW)&XI}R<2cOtur)?>1bP4Nm#!-AC+4NGE2SBd+Pn0!vDlq9)(+hc0^23-wvR>HrP+_c*J!k8vZ3(9+iM+?EDzV4ql@cTA4^JZ z_1__7dTWCE``1Da+rGJpM-&`AiaibSYda*CUxTNnRX3&i2uAPo!0iwd9yvye zjm9j^LhbrbdLKq3*E^2sy>GIaR!N+QkS1@dpl(cx_yz~E2{)mW4B}GV@nrq|;W$6( zz@o8*pyizm6`SHLn_FxFTImfIT;3Yf7x9v*|M4}17@ABd?}H zbLBs5tbQ12;I*^zi#I|JK{Y}bSE^Q@XX*F!w4uP>Q@sTz*K^mk5jIZ|g%>p+qwNGp zr}Y4VEk-Sa%g>7dn5F>dbSDLs{9%S3vlesAm^De1Tp0lC#6@jf04yGYs7aF8oe7Vg zg#grYE!9zm+M1+65Y<8S6P6zd3@MUzoA{YYd70a|Z=k{+6-*z%i(F@|4=)6)2I<<^ z#%b8L_|q8(AEK%pkH)lT-^38SRzahLNY1gsaG4kw&}WgNnu6Y4zpls z$Y&NW90dB%ZJf{UyXvNlt_u4qF-_{->KE(#U)3C5#J=CoY;`HC-2cgF+f&Lj-o&Ws zA{1v=Q(buIQZ+{#3L0Zt{v}c7cmHBwPh!?1H7o?E6)7tP_VdpvsF-D-wD6lVStog0 zjrmkk8g(Bl#Resr+a*zZ{o?xiVm*XREhe~IgXrtQA>K9h%m{8Do7VO@jHSsSq3!pr z85-j@82zdBx5^J_x<HCZtEgJQN~c5HJE+@@kpO15ns@Y<{orH>LKvG z`Ze2QKSo>b0x320k*#x% z9aU^&P+g_V7pGmQW&)S=rBifnH}Pia>#7W;@YdURr8CrqNf1x&670x)S%{_mnH`x6 z)K<{QHOLS?yCK>#D)YR?o%`>2H!$B@(3@R_cnsMp%F&TPSw&yOWl3L6(pE8Q;Aw5Q6V3;q^Ms^cjj`8U)w) zfElANEWm3yi~2U&k1K}5#R4IM(e~i z5cNsXuNNC_o?geO)lNF?T&P$qF@so(sSlE&s|yQI==||q3d?~5nOa+PN;3kPREo)! ze?PfYlAh-~AkkdP?Wh6+?F%8U$|5*BQXv05$NI!?LMu{*x+*sbVapjLX{z69uz+c( zGRTILKLhG#H_harkyTYx*5+2Y*1g%?`-rmD7v~pIzU^M{%?lHq4cfIXORLH23ba+m zgH?6KDyyj>?Y^W$5+jHkRKT1(pA@Q>IU<73j4vtrUM~&tyW*`t@T0c5@i@_xfsT)7 zMv4r12Fm-kWj%L{b4)>+{fO7Nb1*~6e=sl?DVx9sa#HTZOo?#Ex`}eS7uO0GihGul zpWC$?EPnT||MYU_zy0D@|F-}0moIHf{{zMEXh*a|d=wpdH+1ulEbjv<*XB8ANb2gDT6wYl|5LnBH zgmmzFRN5YCS0x|beg@uKiG!o_C-x7|I}phtP#h`DERSyv4!{4y&vC5d^e|^nJnPEn zlI>bV00iIYwkLiKE^k}#GdSK-lZ=~kQ2#5hr)2B@m3sfy;R-r^wuvb8u13PoNLF$o$b%>_ZxtUk#*<*IOoXp-2kR@ zPU$BdBrQt0`KGvRRLcEgxi9DEz8`lD4YLg_>AYEY_0TbfCk?7R*U@522+u!m)(gpH ze`QA-H6l{yW)<>pPI}A!<~vMQDR;Z-`izt<;2)`wL;P0>mSSeddc;`q6`f*OM=rv* zH2+w%?iy_!2;7g>R>@L>yXBHN9?A(t0iNmfpxeMs3LSPWA9ho>JAu)p`w(?1SI@1T1u z40|k>5JbYn^Qej9CQvwCq@6Pkt^|gqhh|mkI{JGEP@buEbFXQ5X^=|}bmhmV#eM6e zZ6({V^wUO6E;$ws*Au~zyN?L z>J)Npq!VWssSxC z#w~-(4&SvIQ}`*#WzXw3Hd$V3tNjM5cOriMOty;eJ$fc6_K=+3GJUxZ#0L|mgi-h* zLEB(}W~pWg<`h89MRJbKb_M{TXE`O9nO4hdgq#+H(vvzf)I&cv8;zig%_FI;#fFtd z#4ud)2ewUJJu?@9W1Hi(wRh5<=uldid=Go*wJol8bKkqNy8q?jgH4i|-ChwuLFRx8 z;+($&2((?-dV{=fX?yeZu|>w*w)(-1DI=CRf)zsiB`-_MjHPV!p~aXagXC!2wM-kw zQ4U-f+*wp-nmZvkDRpK=pa)lPvTd2Jat-o!a86u%gS+{+kksi!*M)wQ!Lr}j*djC3 zy3+(0bh4v)`9<^KtNC}kcP^1M8#@Xr2{NoBFISQw{52C{g*y_H+yW1j?4P0SK$y!a zSDW4f_>?a&U+LxK*N4NH?O)-N8MJy0l;M-{p}|c(!E365%HWWOMkVv z`7&GS1`_b&Aq2VwTujQbU{xkeSas#h1rG_4?VfXnEA_01x=kzTj&?+maFlwN&x{=x z<7?+Z&qMQYIYK7ED~@Rz@ZV~x1>4UktrZp#3X7~%ODYNv2{T#S)L}4OU3xO9$_qyq zY)s-3z)Ym`8WF*U*3-qF7u$3t;pKyFbeo5AuyRH3bUCz4G&hG1z%O@&h0B(%r~h zKF*30k)8F zTrZJhEy`p1_P5vHR`uV&beZYB24k8_Jd*UQbZ??)0x)Aw7<=m5$@K_uCRFc?7VHA} z=qJ_png99Qs;b}^{~tZiUT&@6y?N^rD<(=TxYk`jk`K(tV&hnjz@VZV9WO z*7=hOvYyPXDs^V*TvJ`r{Yt7pt3(*92~1wm=c@6XVumVK7)P zNw9N-os~&?D{vcjMW*F+xsn^=dJ9GMS_}4L(}wkLT(=J}30xyEX(DY}F6q!` zUPUoPI*ow5lbP>B?m#*oES@m%toI29H`q?5s`3=seGTd2F!G=8HyXOq<;j=%!s}NY zWSkW_a5WP?Za;(u^23Glb1}*jNg;76e@Gb+8brqq+PQB$Jy|apP0wz2CCS>ISPe>} zR!M-v&sBq8>^~QP=YRufz|_Yf<=yV*=Ea0BzmVYyx=s!)Ml9i~JJ0o)$Ec47wIt+| z{wudg*BN{lQ4W7TyTHw)+TmtkihHvAeC-*Y5hb~fxRItyfclob!4U^U5Cq-;(a}mg z_+;qm&uHJdHm#`q&1SHQQp_;A(SUhDwlyxkS^Kk-A-52v<_!rLr2CZDRaTD_pDi?A z0yKxY8?@=^i=_O*j2L5WKN_yosC$n9YK<$NSO4fv{F0nOc}R!ab#casZL;%~fw-a8 zCBPd8Jo(WlQ{>BhBLX9T@6K1QdWAnQlWpO`j@Wsca?MSDiW z_LE2sVPT}>` zXg8!mkT+J00W+ZHby4I;7Wne)2TyNrhF|1I@{Rnknmd~ZIZEw-DvL5rapjFa<6w|un0jVSEw7g~m=v}Q*U2~VO3#iYZJ%OJ>1uSZ~UOB>RNx9T^V1TkY zpJYyT*OM^qwk1G1H4LUzI{v|B=}P~dmJ_#Zoq{eA8ii-9DyJK~HQ?Y12OBg4QmR3h&3%qT_> zNan1lM#wh?AASM5u|?r_3g?Q5*=iUq;tr?#p<-g9HdCazfi#d`&%tL8O746zK5qbe zO5_2v^I!9I0`+8YP0VO6;+>6m-#3RyT-9ZOICziEW;xBIxeJMdePuD>B1jw(xhYw! zVp6!0-1K8!Gi6HNQ%6@iTCC46EWK)Re0=dl9|)YY56F;hYn)PuPARNZH)2mN&%b;k zQr8B1gF!K0u4}&@EAocc+<@-K$-L{$_4b3^><#0?$xGkNEaJ5L`~Ira{(fEX)kjcx zDhZI~e*`@`Ip*F9*+cs5j#Mu}8F|#p;_mKkkP8QQ$4k!z)~`r3fqrXGeO{!s{>MLEJmNZ+9gHn7ay+o0bpfYr zJAiBE$)BipIks($1V%AK%YHJHb|=kRX6VKTLidU24#QGgta&h9mchqD?3FQ;rqUGU z)KSJ;+$ID|fojbvm#v04rXuh1C|9bgjs%!)YnJAwkj;!ui|TaUpQ-9t%H&NGg6X?P z<5fcgQEayh-`1p5UoOGw!zKPkv{#rwB@96`-@TE;Czr4V%6%IF8J+>R%J?t~N zy5fW;i_S?HGMt+p)vvph<5KuG&_M~o?KpC!@jJtLHFjaTvko#8bK4mzcf!@ z$RJWz@Qnx^PgvC#w|a}4q=q)DVNwFEx7|$V^txs_V)8vG)Xe%4UY)m& ziv}QEYm%T@C0n}7m2hr&(P-j<3SLm#YI%Az)YkdzX#uPu?~}qZDniVuPjkYxt}#cU zG>em!Tu+ynWE1C9--VTaYBuJe>*y@ZD0Ew>?;ZFM(*Z`q$FYX+BVp+T1)ZL}GD~Ge zU-h&%#H8YcpA`DXY8B|)NWEP%PSYcM#TeIpJ4VCaDU;XQ#3nJ4m;n<7$ms8KUc6Hr zcWY9d#D-VT(G-vYz<>1O#p?X}@^DFc8JAREyx6B6;KgZsrzQ9NWX`jd)I0Ay}nC$ElyU}~k5RE`{;k_At95)@mG|HY0kO+ou{ zLS=jk##t7!B#3Dqz;ST)#85EV7Wdz4{lI=QOHQid!OnmG)$g^GFwrzRkoT^qZ|%;c zVB!}HtY!?%?F2ITP52oIFbW5WyI^OXP4jlj14hY-{Mqkke)ROo;>|r}0JYA+WjXcS z2qXisXy(D{4K`z5QNvQqveH1pS4qrLfAz4yfJ4v=5W9>q3env&B?MM*j*ge%F``xp zJy2kO|LyYN^8r)eiiZRV2$1ZPHTF9Twu@_U+AR(9_?i`=iRADJ6ZYUY)w|3kM$VbE z(H)hkcF8etrUV196ls`Ww@c7%2F6@{p^SJ8I*EJ{oFc5@Gx!cFFE3@MFv?mfmbCBC zd@xO=47GlJ3=0BMx%ssi;x)CG5xBUu2=)%iP)bNa6dXJi9TDIeH}@?%=l%f^d%X1( z%8UY8aHUaJzOxQ@lZL5PL%Eq##yU6FKc38BCY&WMxG45Ch5C) zXtbcu?&gln%!>Wl_2k`WQ(!zFPn|^Yo*T7va8oyIW1>!iqzHNWBu2+Mj5Ff|$kF$r z!b?2V9JP-|{)OX2#I#2)2K0M>{Cnb#fmh~vVzV}0A;_E*@5)Jg&RwB$4kG6ghM23( zap1MVEG9J&&0UfBFufr?A~idatwLNR(MiTT`_V3dz95Z*g{Sbj*BxSJ`}GPgO$U)> zHsg;^1##(oP%r>D-5F|oDPW+#JJZ)=s?v&(juE3)XbrohVmn}$%%04dT%aVSFF>607}OC=st;N4Emp}uDN(<>ClCbu=-+PkkCB+6gHp=$NI13eNX5gzJeBKDdn zKzZCmJ-7e_D%F>81K)gXTZ=pCZwSSOUF?3Z(>(Lml2cY(UHPa_AAW21*|$jdHPvf( zul|W#{^nYS542+JSIhyF_oINQi?6v0EY~N$lJ=VfP^I1Mhhor}-@^E!c6l~U zn!=o)9(Z^Ej5{<=)s!LtC|Lo;7(=c`jgC2YsM-k9lMilw33nG}bJvhJ2hb`lY-zj} zvkbLSM;jZvI;gj%z#fMTnYrkDMGRZKf3Eif@Z9&52XhTK_4)@`F?teZ*hV`kawM0TQD30iIu`5&VHz|UyO7Vp{ zs`OfPr)cGwpr@u(ryHXPkECTWy)ARw4Cd!?&InyU-096ML=QMp%mlpIltlwQ7AY3c zY7*tnD%`uMpFDP$w5mb=QNMKLF~31VcK1-Yb5`Z}A1~h$@TyXdXYcr)O$)%Dn2aua&-21j*KO;IPBS(ABrGGb8rg3dFUO3c@b08r}MY3*TE9 zNrDF=w7pwiy*CcTB&ir;Z5lQJYv;eYsfK&0a+>6i1YxO*TgIu;>yMdnQn8EE%QNNG z)yRPyqtwl|Mb{h#;<<_R;m(!Prr~HQ*q#d6{Igt4RoSwnIx`u6I&&Ir4Kw%Navq6~ zj#73=`?5mRCQ=b7?n+GbmZl8uq=Sp@2?*WOaf%k-vgg}Q8GP%K(F$W6dsU6}Hmr|U zW!aNGK-TDy_wbe>wotuxpOSR?@t8GA>E|N?ea|%}P|d`PFUAxinblAQ{TWH$&8-x- zE#hKN`sXyxW-L|+!fs^j!mewv|8*_4MT}v{Ai;TXRrR(mXqT4xe z9zdVxpH&!}vZ{S$+MH>yMOYe0CWg>$^H6F-J2TlLwe17MH=e7m6!|mUJ4)h&l zXP<#`!7mw|`O|w4)_U`*-8ocuD+`GtYkgC=tHygEM!k4vr4t-@=C&_LD8~^Q!ShoG zI~KcEmdz00zO9FenrNU=E!Ql zXy#Fr0AVXNk7W!A>)>qx4hCL&xgKZHN!{`3Jx*hk9s^}2_)!2>)sC&$q6K-`@7|u{ z<;N)@iAk?&<STsr_vB{*~B4vlSsR=4Oimg_X>xwE~-yVyCR#xt1{Dio-8 zL<|=o1GaV7gim`k+%p3H-?w`mLhIP3v)HAc6G-Mp+XcS0^g^-vrS-qr8xy_Twl;Ah#qNf?y6Afr zvd=yV8IJk3?#U5xMJY&6$(Y6{?woU5nnc0UNMpqwIVy5_0h{i(zqbtuaNM(SuIk#U zG-y|=}1Xhea#L;gEB*TQ}aZ$afbu;rcfY3O60y$2hnY@=R+1MPB zQRF7)Og|rbT!&EkwWyhxh}PRUt|)Is@YvsD`dduC#uBtQqnO$WXG7$O!ivHJemeYI zoNes-t1FWYa}c}!Uv)N{Oo6boalG<>c)j2LlqT#k;*ty|r0I-|dXh0Fzp!CNKTqSG zt3pe|ktRdPRjOPBgi&I?557EkgAWise`TqChe0wYa>o; z{33NIFPmD}cMH;W6X!XNwy_THeMF0c08CcXATV~)K^<`62to#zs3%h6;RJ5t66>9U z8lfR`yW*3s{Rsy+@iP{zYN$!K5(|1lvUL*H*Ot;miY6Vk``~*e%d*$h{A!MAT3x>} zqHTF`VaBMOZiU>pD5~#})=O_If<7ebFP@k1)XgVYa0BI!1{(w$hP;{&ES3{bhA( z1?k9D*YX=r6FEa+Q81}QF)KzO#Y_f~&2Ej}%#PYVHzQ!`sWn(Nl6D||$h}Wx&FD(D z+@Rf4h7mz`CWZFo@eY{Otv2o7ipEkgtfM{>08=b7e~CnE^7PNVMXPT0ethmmD#6Y2Wghiy#Ligpd2{eXJV%INVtCLtr*-3yRc>|;bn z^mj;;D~g3|#=-pK7L51$T8tzhY1WFaCI|GJTS`emve2YyMSPJ#wF{~b(L|U9E#Rb4 zc`0(cT(wLy!@Bu#6mM(7!STD%1dD^~t8?Okg=>&AsCkK9?Fn?ezEiE}69q#Wdz41Q zn>M)QwfGcZTdq5F482EYCc>!{ranM6tW2P|u2}(WtHnK9WW5v7Xs)$2>CvdI_jV>Q z@vYp_n(;iycY7Pj2G?)&47Cvpcnp**-cW*6k0ux5cx8-^iftdd6GOG_tqmQlRvgp7 zV{AMni>R{tV zQ8^hzO-WMZGO_em=NPeNogj-H*2l*8i5lGpcKZtO&<}mQap9n#P~;Y_1SW~aH(5^^ z9GR~C78l4-cO|s|oS<$toqS zhg+M7QcT0V42z&HoJ>6A7@j~2VSk`geXvfRrN+F-sN%ntee6^f6Bn^;Lr+o}DgolE2iQBDCOZ_8L8SC79-$v* z`p<$mm;TTwSxTP5#qNz?8M3k9OUPWDSK_3@Nr{g`u6hH4!L&U1Aj8^|T)B~7F{N@> z*!}b{g$xa8CsD@?AGZUx)>RV{#RBwDU4V@huzZYv*fym>x_$Y_fnbI}pmO+=~bg0{4Cym>lI z!bFMyF}m~%k3Qxf3N5kd99j~P?YPq1oGy5Y(dzQB8f=o21#9dfO!`I7*K6Fsl#}+f zIc&5iuW`cIzbL{`Zys|Hi4?gCugw<;bn8i;x>zC`do07HXFscqhDo7>1OD9>0e8&z zUH^WosYN7s%U%Z8PATf>KNpZ%FrPiBvitngzxn!1UEeC__etJ%b;TAjE9%9u*TXJqx?{_JL;_MZy3 z{MRP*A5*xQ>LsxWyLVQ%>+m|M!M<+t) zn73Va7BrJ;R0M%~ge^C|6K#5FWJ|x2_Qyg|-;?!Vhr(Ol4UvjaGWUuqzEd}Cc2FPY=5}M{t-^Fah2}kN3Bg|zpNjNj|5`tuakdhq|oW{MkbkPx+j-f&y zo4tA^iJbnd69Ld9L)Cna8;C=4qvR7Ynt#ioNO;e|`sOB^bq_|4#q3}G{$KuOtq9AD ztIL}fWam<-3(%$+qp><3K3I=_oruB`R@dHHqNci3wkr%3Ef|htHk)NxvAZSUe>ft5}KSkb#tXAkQ<0GyPp$n^~Ln}*ms$4eDgLN zN^dhwNH~w-Ms2O-xLI4}WO>+DEZ}x4aI?X^fh)RG$CbN{-!`^-^9&tmbx6&kQA~1* z>D#eqgw!$12%qdXJFktV-RUIiY;J#}7nYW6ZDUUXveBWNreV+)gd26yY#`{y0Kjq7(Po-Q?v1|PACFB*cD^Q>KSs_E zIQao5^@wW9AEO(%UEKbM=uF5`5=(KkF70<+1DkI z=6D~Nf8+8Wh~o@#++_YEcZ}HdRN3AdXVZoGCr`5c5GyI$QAM-;;^%>8;$_cB9*;2I z@rW}&HH)p;ELXFAD3v-kS!TZH&Rc&;O$C0MrH^HJN|nXodis?L1^M#r_eAf30E))CgsVL?fQbcdRFryh>_1 zgVOB)a9tWp_Z6vzmZtHg)>^Xn|KIp6A01t|}vzw4zX$tj?dok4ZBL zF2tM*fU$7DDqDY|mfJ`kW;R7;tuFu<$2`3{mj`+jU0uYY()mjpJrhMBk*X_{ z1UC`H%yLEnIlW2W$vAzaQZ5(iGe^nU``n`iAqV!L^r9-m9S!+g+dSz@VIfoMRynZ^ z2Fg(~G4av8I|5+>`AKQ3Bk$8*Cvww)-knA_G zdA8W-dcR&SI(N|sM3R}fpDBCGFHThXYt$l2xyS011iHPoT%zMr@*ac`<)qf9Bx4;N zbcjpTlhnR=57OQy;{N(8DcveajZjjN5jiO?%hv?)eX$&b){*er9gH1Eqh6zO;E_C* zXWP%2( zEc)RCyV{Y@!t)s|QH)wcp7deZ6BOE=1lS29b8crc0G!;DS|KggMXyNP0CF7U+ur6! zIHBg={YK@0{mm@Vg}5I^t?lSU*?H%vC-_#%Ycm)H^aytQd=M)U7GEv<@T6pwuycBD{Ed$2);`S|IFd zjoAC0T?u5QE~nFsQBTL+z}#XMb)4KlL-@!SlMig&fAq@|5xiQLg^4D4s8`(5xFKQ4 zl6%_=Z*`Ab8zU}{^Q^55iu1}Ovn&bWVhNzXTvE#-Y#*Rfl^FQlzx|?BO6Vg~A45zn zf|ltj6Lo}rAu8n@dNWX1`d5s2YdweQO+fT;n^vYslvE_XHIb;&a#nZ$=-imSS5Fj` zARoVbpeL#uD3q-WGk4iA6o+zxhInE`)<{+F?&Jlidkm3x&9N_1nssB1=((gbmV$WB zu!^08Ez_KxZn!+O%CnR*JoBu;h7V@+oGv{Dtw8?RNxyE9p28oov?edL@^A=UxeO7nr%i{W=a+u&9@b@%BfI}4VFjd}lsEb+EoH~dn#_GD!i zXe$!Z9mL=dOG8C0?Zjko)yMhD<7s`ilT1wv{iT?)ZD1l#4)^PYK6z_Hnc}lPufKcN z?KH}#xR90NujG9wCv@lwN}C8mo4*M5O3B}7s#m!6t4o-lpQ%)FC#QYFF(@$zvZdr! zNq;I7O|tf?-Q`*kqMxut$u<(a zx-RydAg!4%FSJ_Nt~E&A(>yV)Tf_YN)t|^6hCDraStQ8i`cMy6>!TBUc}{A4{bp5E zsD4mCygG*qcRIvMPr^%t%oaC$LtHQxsN&G)SLeJ*94G67u}ZG6hkLf0Xmq=KpUwb= z;WnYAn}2k|nFjDeP+ad9};=|rR*LfW;T3V!@ptmCc#Vcl1Iupg=lj0v<^HWI6kLg6X= zVsh_vS?gQ~H_3?hdV{2|qsVQyf_3FLwA>eJ?*LF$8bKlOJl)NTu`S)gU`d$+6@bzK z*%rv(m?_-4=4H`i5;rMKiq!ym=09-cqaWQY5fu;&rU<7H73{TFm_Lg1=HCcHR&Wb! zqMQ2HA|=`7lIggWkys5jqnBrBm&|zmF3Vyy{;eTN-$U)ZjP`wd z7@%%#92drKi!s^^SpaSq6frU*jmb1+aBw=sTZnOaWG9fi2?t0lFH52V>{nO7I>nV| zuv{}dix6*{qHc>HGIh!YMe&2Zb*(QEa;D8_3ZaRQaB_wae4_G99&fSt0`xFu2*Ayv zg=w|f<_#R%cJ!=v^qeyS$@@Y*cC!Nk{A;y#S*PIRR0sMi8K<+TOAcdz-bQqN=JJhW zKYYr{zX;PzMpDQ|YL03$#Ap=rZMLFDA4hLjr*Jh*)ypY+IV%aa1PL=eWg}ZxiWaWp zbmQ5Q()2Okw=HhI_gHjidXt!@>?cgP6j|ccjdwc)J5o9$*GQGc?m+m#y<1jdeNYI=ay5h#&t(ZRB?=ArlZ4DcNoS8B3$gTymhfJP_ithOZR<*Tc zPNTNi0NZ_)D3n&bQHg$Y0bYs*lE)9?+3jZM8A-!L59{IAlCber9UQ$C&7t}tR#pO- zbZS{>FK>%A({)}Dz(jST*VuPAX79$5zE;o5?y`6rvjj8bS zY2mga>=Kv@4r|I!AEoe{1UMxELAnrCfLCoi&e#tT#MFZ_M7AgJ0Ch(PWy-=3=kHK$ z$SK37(@}^U=x=&CEa>cr0aI*`h>!#YE z&4wp4jaX!pV<~L%KAe2E{|sZDhvyS%ME%WlSoqHY1mobn9ZXvp7Iq#@SLq9x7GCc3 zQk6U#CU1%ubZB3~Z4~!M2HiMoZVtUg(ey$XM+ih=-c;R$`{>kX&ahlW2Q4nbVC|(? zwXOCXm+97U|MqlJ-Y#SCCF}3+UsKf>B)7J8s12q5`|tnxpH}bJmm^fK;pOlD>F1n9 znavw14n_~Y?sLTlf?6qLiTb!l!Y^StKZs0t?^<@TM^=;ch zI((v1XcJ@m#yM~b7QcJP14VCk{%!p~PtLyoF{k?TfBff~HT+)!V6Q0Hg?!Rfj-#PO z?oZV)->;r{*urAJ)Zwq@=rpN-v zqYvmEgT13_8cr<=+l68__n2(S$6|5YJ8u2botxnh@bYMTdvFh&v^r>hYfmU~d+RLs zxkUJzG27OmtcWAOlv&+ta(dM@Q5eq7B+{|3(8%8%JZ9vv;<6ZyTH_S0w{Q_c~%X1U`fpx&;OLna!)j z9D?geQMYY=adN3Ls2y+?@l&A-n@dj&Xm+-tlO>L8?>?G!b^RQUu*A(Cik2u8Co$3L zW;>2%&6)guHT)_kcp1UBv%L8g%*1_PdbEa(T8uNhsXPN_Z=)M z#NjB4m)>I>3;Kzi7DN@rM8v~L^IK@-4ZToBdO8%>xRKtR6Zt86C7c7oA9LQdr{mRqk%H5;c-_ za@>$PJ3@e30~kC&_xOx0L&C@n3fwoJ6}uqHfO`~Xx-kYAj1nYTtn3E3D8%F!?ymqK}91>nm2Sn zwDGuu{&scnu;Xy)FwMc6q=r}&acg^bv{`?K;zR6HCw1`pHKq8GMw@D^zzh!l$<+xl z4t7ci_ro|eq_dT74E#;gCWNUiuc6gCZ*XzX}zVpYO z#m!R(?vH-7bG$s@@r31YdVwB5ogS*A6v17GL^LC&warpd{8?sQpNyF8#qe2472bA?yJEwrQk-Z97Q zs!QUUNHej5uZ~qb@RiAFEmr+2;_C>`O=UGnvmoml<=P)*b8#KG*Zw5IbhvR}SoL^k zm%E$(s|~r=U%rNfCg%78??gockwW#KJ=2AaD)^&c#|sO*26{pa2D%6$d>xJc-5)$M z2Bf|&uAAYm;17gPnG9+o>u3jeHvU0(bMIyOY*NJXdDS z=r`4MK)7k-DWo71crd0)RmI);2G!GfG(42BF(pJ!fRx7`@4^8;iHw^Q$08UdDM+SCwvP7Y*Ly?g?J(KCD!SUd`= zBYj*JTfm(39?j{fy<$DEEFHjgKtxRk;L(W$hxD2p0KSAs+kPi1gx$e;=hGn%X0I9Y zT_$9)BRQFK(%Gbez$}jgqL^xYY{QyP?r~Wn+jsk*O+yoWzzBIuq+CUyNh!-7K%>LU zIK5HdHz&M}gj~(F>N<(#%jw+x)Qw|2%*0C^Eu#q}XwS_Osp>i0wvD9JU;Z7d0#XZ8 zHy}7M7xEduVuTM_*3~E2gs2`O`-)ZlphH_dnF7c1%|{bO4*O*SY)xSDF#(_Zs}72U z3j;tkOIYc~N%$goEhT9OAjoAyVwNAj%a z!ZLAKvW)1pp*LJc(mNUahLLM#HBlNH*IhmApQ8(|th z3-f}`cv5M&^K|**`D3Fy2m$Wr16iB~s=-f4-hhs67-R}(L_al@sht!Bhw7N!dvrnA zFN0d}6k9AY6<^S`Om#Y;6@WiA=b~NogsRb^NMl+@Qjh?wAQZZF0)E>NW?a#VcN+#lt9h ziIh4#Q?BouJWf?Oo|KTojr%>yIxRxJDea3ezB-|v3jckp5I^&(EUCs}H-$;A-@x}Z z;;<3;69I0BZBF6UI5^zF&Uo9wD?&%(sE$pDz{m!ulB!{YcIgCsB`dJfkj&5&+-7D8Sr3Syetetg(x~CkO|Z>|BrcUi8+H{&fB3BJ7?LpoDnNDTDF$VK8!gcBte^u~{f{Em@)d#y`P)ktB~o4rfV5n&o7zTXz9hz^U>> z{C8pbG|t#!Mcsku@*2kE&2`<~>uXr<*%K#1#UR2j>5|P(7i%S%&Q}|DjPV^i`D!DT zSm!1!NgjQT!V!};q;}_rGPc+HvpXDkfE0Hgj&l~YCyfOA>oK@qBGt?!v z1+;3cTaY=WwB9o129Q#=gf^W>max~0%K)X>U84$5Xgql*<>u>+kTX3jB6zdg*uEn} z_ZX1)$a-gqYNeh~1+}UEo4%60A<<^O6?O)=Pg`OH0LXx1#w!-^qyxpwAe~PqgH7f} z?5D^h#?+6N&RoB8vs`x)m$)8{^~rwJYg(>bpPrw+ zrZp<1d}3bKr9Z3 z@!jt({$cUt36X^IvZQ;g$c0RkQQJp&C80G^ zOrUh>Y}huw#dz4oFKI3NJZ2MSm~ojLbfpBw{TVOqsrm7xDG5zg2KKOQ6;*LzO5$s? z@xqp+d_zVR7UAX^=fbX|uM8ytuewlz|64D=1^YjHeXU9lX$Tq#BvKy`B19ekD^&aR znu!_ymZ5j|hJ!=dhqQ9M!Tz_bH+?b;&}A6WM0PZ5G-)z#p_y5bR;~@p&)|1{P7(0i zYe5uW(bqq_CO0lf#_t5HB>QImNn2cMJFGEY3*WlY&4k111MyR;wje2kN#o6)k5J0$ z90&^Sjk|;^PV!e0yx(tJ%vI^Ch6b!YJnEBz)v(7tCMaOX@P0M>55L*m(5=? z>VI7XJ5Mb#QA+sD*p*IB%qF&){72e+IT+B>E>bhcIlhY&a{+TRUE*7(FOp&JyCm`n zMk}-0rY?E&eEa3ejWqNVPpw}KWq}lGsk|H+z3E2APj=369?I|3(v-H(w3(`7Sep9r zhyVmHPzu@3Lzk$mu@T6QP*l6U#iQxW=C`BP+6mq=RZfk3(_!)ET6LUA>4CN%G=qpU z^sSdKOcy#ES*Ft+?Tcaf-db2T^iRU!6YI2t9m06gp@7EYd@%(8%dtl9yOR9chXE#1 zc+8)d8qnQ`El1$r9Yy0?jpKan)=ECMlV+)Q_T6*1AKqQI0`sP{cjpbHx(H<~Y2Dk2 z`5^69;AR63ZUp_Fy%5iaec#bTnc` z9`LzbQ6$FV&_X&hPeBAAny+Q(l>R0#g-HRj^bSP7wR94V~)mh zdSmtCg)h;E@Xfb+iaTFVfw(lWjVizmrhlDs*DxKOSgk8T%7s4<66tSf$jJb*SAkt@ z4zLhc|0~=<_{-0R^W)>y(&RCSl4knNum7nujE#?yPL;6zv;5`J@KRD> zgpE+U%w2?rhl`_g^4%e;ir>CG!u=<;@!<8FgB?;Q7Kf+%pMQ3_e|+)%k4@94gp#bg zsI#=UNSp#1?Kf&1;hk==(@v-vm{P;@j}Q;fWZ9Pe+Uhr0$(3T%oKMpz3h<~nQot4i z(4)-96Vt|e6y{PNB#CcK&kW%fZd2+}3iEI#<-|98a1+5e^u&^0!ZJEfoIw?s=%NDf z@>oin+E{FW{5W#R0}zRNa5E_ls|)HQ0Ay-&n42lDS7XQPMY8e*#NhMgySIci4BK=R zb{#6eNKCz#oP@>&=XhPscm$OiLvBA^1@s%%O^hzSW zG?v|GkHQsKT(ov7SS=CLwK{XA=smz!vY_K=km8U_PC&R~t^0)amGwehQe))j@d5>k04JkK9xvqAeIX+isqngSTvDmE_)|P; zOUxsj^0}~~F#^y=l_}LRQTJ=m6yq|of=WuvbFImN8IY8A)epdf9dA1E?3o+f^<_XFJYsr)iqV$Uhgcj zIkJH<30Tz^=l&Q%tG%Q`Om6nx;jx5}nd|fz<8)fI(mOPy1p8?R`y zJMXjg9%mSqvKmmwJ>%1LE<>yAkGe+r7*&I`qqLi7*pFqd_fWb9vq68_dV;Qy9ve}WPx+fYlA5eQDJd~wx7T4I_BI#8*n1W zi21>_l!zyx__LKOm>HnJGhT}cHf_|Br!_((&gagZgN z3aAO#pXEJ|PlVD4m*pmf@0XcOjN;z63L7^0-OUYCjd7-gt;mv$7TgRIOAtueveSiDr0*dah z!uo~fx*+e{-sPR^-Cpx}v1L)$XX11USoXiuxw(*@i#QE8ZPU4~VktrrnE}!=EQo8%2x6BoR1a3n z&Yog0I74faU>urV7dDx`ysMq|gZ0E4c4rrR@I}t2+(cTd zz|wFB22M$g5pe9C1^q@42&x^wdbFK2GsGWX!Zczv#jBtU0ZY6e6B z=sgzeZirlWUZNUD%P@kT1NRRy%nXpt)Gxi_eHRBpSk#OLN~+XiM^9AbFm@t1J`H>3 zw=+8V?h&4bvF>Vg86EqN??;4sh-v;IS2~(bd<1@4ullqB=NFRa*p(AS z=gmlS#hGn6(ajFNvu2OPp$AUAO;+C^J`cuXmC^ZjKz=3qaMo^;w20fcKi2 zKo>@z|07N2!{}mcTtmMhA22MQ7VA*y1BPu}Ch96@H8~2wb*_55CErD^{hL zU|+t?7ZQ)KsqUyz{P}IHm?PxuX>+p^eV}@cynVpQzX47rj(1IhxeK%e&J7 z8_1N9*3`%u@gA--Hp>b7fZrdOz4X-(o+BzFQMe@MzZJ8W{_0`LA8?d#Rl&d0DLx3K zx4+|_#0;_WYY|9uybsL(1M`1R=0CD#$xJm>wlPoR1J_e~;A%NuVfg8w+G17KQ!1_~ znbl7=a(5OP2`4Tie4WiE>b87U70}ZR3Zd5jVIkGB(#%T7nWvfgI$ihJf;Q79c}EHx znq~T&v)RUb@8~_pN#@Yo`fO|~xoB_*2S-1+AS2Ikb5ELknq&iy*>7P+#ATewj@oDS z8eGi}jiX5gF1zOEhg1(nOp7w{fUC34NAOS=INy2mm6N#X)E4*a=iZW0d}|s(o`1L7 z4#BWCx_5p05T1OpN$>K^D}r2Zj(^`nvc1=1dAxB()+t_Fh2_lT6Q|Rs4+Z`t;w|3c z5e&0}yKu3<;a8b23P4mf;L0*AKxJ?Xd$?8chv>F+5p0rj^dyOgV%s|AA{zRGF;{9? z3~?)P$nIH4N6Ov#-4UJVVJbhQW^tu?H=#A6zXfb5{ zAg5kM%e>aAlPFvncxnkUl;)xURJ%5Q+6vWl=-Sf84w4$JLB4sY`n1S<1+Cg>6i*&% z*uH#v`bo|1A9Xor8{!Y*ovkAwBH|g2TM~zyAcK0N2oUpAx4@++lFf#*VD`|RWFz0; z|6RpFY+}8H$U1O>7DBc2+PSGm?q@2J$Y?~fH>BAn>z4>b1<5qn1ZVGXV8y$ zk!+x+FA}1E_#;V+3qRzlVquVpP5K>Gj1y_mt5HKw$L)aV1g&q$<8nL!%^lWNu(vro zpRPxM>XV{tS)Ia#h$ui1;sNFP9__x&_T%nYfHp_V6R(#5O9kXS#9GH~K3IC{9n#^} z#raOVld!Ngz;3KRL51z+-xZ&3*M%JzNQ^}|)e_6m{mG!e9Hhkncf)0?1i44bG39*g zt_hD&frnI4#{zZ6XXfS+EwJRXlM8R;5i~V zPE}o%Dc;CUz#aVgZ(nBQK)odHC8oZsT5^_ev@wA_=nJNfCOBQ%^ISDUHfqato@3*( z4Hm)VHbJi3uU21<`u(AiV-*zU2YsLY*O0X(BLUNO*4H|p*$q3hmW*VfHeO7+15!k0 zdbHMvJ6R-+;jKv|Fl$~de1Z(AAQtdK;Ow~>Kb0IOsY4O~zv+sWyk}I)!t%`Px`t|r zR2?I}fOm=gi2@fl0L7-vJ8!V?EQb%6scf{d>F$sx2WzN#4uI5o0VB?r3|C z;^4Erw{Cmzy1!X^F>glCu#G|GTUNe}b=wdGm1+ZkqVGD><>u*daJnL-&JvM152Hx^ zY1rxcBN{y7OX7{L9;ZTlhtdy@Mtc}FcEQ;ujb~FRe{_2O&XQH)k->_riPV_E#UV&z z$q&w@;F62$A1)LO&ud(QrVvPcRVMBq?yCsyD4cD8k|(L7b)kYxhSN&=#Jjt+Kbf`T zGZ{FT4WGF~`$H zteMsi35(r*zO(!BqbkeMbtJ(`IjqvFU;Ce)*@F{+Te8zck|TSSC6hExF+Q^WG; zF;b@v5it!r0Bj_4gP;QxhJAxn+(ZgC>F1@I>sTz%CDeC8HdLN~0ZM@p9>i>wv$?to zWAu>NxdBej%I9-^>?9(k0fciY z104UVcUXr|3t!1c!;kTjHiWfvArJImSzL-+w zhp_;Xq3dz++r#m?;aMEXR*%qW55&G%LS-0SI1h&*Lh^GM?x&DJW7GPCq)#FOM_-UP z#?xX57uaMTEu>ki@Bx*n)Yz3e)pvBt2_9771fH8hdL9l`vM#&!xW(+^a zf~N7%()mdJRCI$=57T%;n`FOl?52y}w7Jsc{7gt3)5ASa66V($@gcPsOFW*JkfJ%#p-~xB?5u4T0s}9dk zElSra8aV|d6VT&Acb^S9Cn+QnM3cC2X&hiq=|hK2MV+U`^}rQKSAE^Y2FimcVX2X5 zqdYp0NW;=J-gp}Pk{o`vJhTZl2of6?41MM5WQDQI%(ODe#j$5ZF4#Mlx@DZKXpN!s zd(2T+`mY-%tukyI+W(bTQ&yH5@BW*1Xl>BBj;01RNY1&7ly&Hpa1NJoa&~Ht zLtdXrkk(6#CR+}Th;X5d8G>V{ythr-Fl&I0igtn%2V{Rh`4Ve?{EOfI_Se7t={kdB z?ahPb6W*(7KbdfemEIob>|$T-GLfSsAxY@$54?@RY|Xnoa*%Pt4J@@-L@>kTA&aJ@ zY8PW^tG}+(>o+5`%!oJ>9JDEV1P6HFzI5}3GhRHxQ25rhdgQCiosCS4*rM3kLTRoX zrn1wWX?7h=^Mm!}aj=@Kk#EZ2;=fEgtAT-QW)uXg&6sCV2z!WIBw&VQc3&$MG@0V4 zFGuM>Nf9Z~v&mRVhQs#YDpso~hh4!5jd7$BR=Wx>VAZ-8C7wY{$1JsHI6<1m4P^P! zF(<;4UuP*{`!+SU(fu-|8=L9a$a?ni3zneMW60_#NcNn{z*1DB)?+i8BG=QygVV!n zQqt`-R;%+vJ+7?sQbmDvp_AGgA?tc8ALi=h>e^-CY}I5S!>4Ynj-}Zs#!UF0I!qDX zTC_DYymisUa97nJ*uEwAR~PVs=|&kRZUaTut>KIlEzZt8QGQ{9nBb|-;XW{Z($2^P zw8L}A0sHGrk+voi*60}$t{WKOvPik}196)BCJl$Nu?9%gi1XZET|=VKm{10I$twOO z#ed7dJ$l6$JJHJpWglt$o3rY=i2KMgdUbk#zsa)&G~cf5`wGm6xXAHyGoN6^{&qSk|aAw=2*0&{UR%b&g*Bjf2bp-}ISRL3V zFokQlnY%`dO7bt__0>ip5x@O0JIl!!B1(8-E;ukLktMo4lB@e_$*ytXlDTLFKG2n@ z97lnVGt>@$^4kkpPIw&x`fBI*|N7sa{16MNsksg6P{n37-Q9yCx$1*!;EMyS-BFpT z5qGZ1te9)e$w6XhAv!NAZDj|&3V@ev$@BKyr*%V(uCvO)F_9S$Q5~{NHt?E3U{v4+O^UqHp~ zZ^=cly{_9|&5UgDyPqL;AuU~BxO@n%<5NS-(@)&l3n$EZ8v6{h25avJ6Zz#5M2=tqnMwB&KbFWqg{=D=g@@( zHAlxT@0cz`oT9R3*@95XspKcrL~pm!H6|4rcxm&-W;z*tZ6f1(@#6&^&n6HS7Ol0m zSO7*k`)4mTJ>R)`Cd4)u&H43;!z(r5&b3_aytPDsD=Q%%VXn4YNCGaAXKl!qD|+Vt z>{s;cW)A}D60mHd?R?pG)r0x*Pkw3%$D&9kLs(sn0#iQ#NXZy_ug!E9u5gOYBfO6y zft^gr?Sv5(*%Q*K{!>3dD)z?e2ERcY%QO`zZr8S1#+X>ZayV_QY25m+IX4a zZhlP^0+74kUyuGJy6zw?XEyZ_`(LN0`yT5>6v6pyN08Nqa!K&-iPd9bG$w|Me6VR zw>dMA0-Wh|mxANhx)K*Qp%HRQ_QgQ_+P=@GAW|a3MYk<9j0$Nun{Ae;+85dTCdy!d z^Lb1#=2FR5;tZwRIk&S6^5$ZCI=G`01zRRe)O0JA?WbXVKq<4On`M8DMRNVi(s}yN#(C>=N%fg)vp&k%lG`Pyfr(yW4s5 zv>Wi|d%2nQdbM8Pe7qKMKim2Bt3SO=a(wgsk!pDhIaAC>(G#=ERnUU-Q!zFBom(Pv zOHjsw?B5=uh#xIDt;{9~6gMN3pp**c+J6t-fW2s6GnKW;mzlVNJC{i^H}y4d0_y#r zO;h}oMOoxPHt7P~;exPFS%Wz!hjkb|WH^(>PoWm=iVV5t&O=L2lhT?mr75I!e5UVN z#AS2m!-y}?cifJk6__10f9#4r6$c}5`4VYL_Xt7&e9bQ0=$yq$+f;Vd#P|D6Rp?$v zMfylT!zH-vUG3tNiSAq?$P9 z9RTaqeF$$++|$A8O=X7I@)hk;o~C#wCnTaMAST(t!i_|>TrAk#8!lAMSNMXt6xW#N zoDhM&l#K~@zS*+32})t?qiH#*Xd1gkqD!;W5OzO9vBhIMV}gf1iNh9iR+-XGH&WW~ z`s1CSVKbUO&4UqqyRa7fC&&9#H(XtTXoe*AG#9H^2<_>~@$%|~z+~=U#DK-yI)~Hah{^oy`r^@}xUL%$e}&u9ogpX9 z`(8$m`U%7zHh$c);GVi91v|g&N9%0=GW~Ck{Ie~~_?dYFAs^e#gjmqsA6U zCw(iiw3D8Zokug{fLS{*mq&zfNpP3Kg@HO|iJA$!sRfq`u$9UnE zmL_^d6Au&_x!1E7xn@C|ILML_(`TUgTS#3kwHm+|SY%aF{4uue&xp#zQ6Q1Hyh9>% z)0rgRGFO&mljt$l3sADAx}Y3$_a1s%4GKHL7bwtD;6O4*8s74NvMM_CD@{OfzKMmOSK?{%$dj2Y_Pp?e^e7W%y7?;FQHnkzpgAi+pl%Jv?EUCZ@oipVCyy+2G!NVpA zukezE!O6%0W!^||!qack!K_`-+U0`<^66ubmM;2AH34>?zOdBAW+0fTVB`u2RGeNCT?^2O$?0j@T5Km1}5&m5}cM(k&9D??MVU`>2uGe25a6_TL*; z*T+gtv8yowMw9eis&DJjHBp#*^^_)zg+j79ue7*^ZFg*or+_Pzmm-oq3@5|>EtGQv z<9=iYNJKLmEwIzKlU%qG;To~&;u|oB^39LUejFU=+@^P$pZ=|I}^ci}~ z^35y6GJjPXye~>?7lC&bFFE3UViLw5K07(W-eW{gDI8!xnvLs3bP%IXNnF-1uuf%_ zO}lk{&Dk$jI}aZ5miOc5qjvrvx@f5^4g`Bk6IWgh?RTF)qOB2{Ec!~|(atVKC=`SC ztab5xN|m;aX`Vm+cr;Dt72jw(T~az@rVDACE0#J|$9VN(u+=(v9Jw8$!{jHBSS<%6 zj-)Xha6*!UzJH>W^E&`YW*;vhHBW~oXfG)954ffTmGsXTCOCDJ-L$2dY_JP=hM8rN^gu34;!>xCRO{UP-@{_d&BA7;pp;0BQaM+j zWV)-+5+!nxEiQ4rzwZLe{yu*ys;>J3y*uL6 z{eUF9s=F(el@PZU=CE+~*$WUQ%@haAhaY-8#%d>fZufF{;DR@j;Zo#lhU>EntWuXmSS12kr3_ZV-zFO-(b6*doFk-MQ#jGb)bm97(ZdB{k)HL3 z^y9DPCE&A46aMlykk!{b4ux3C28|0#kF-oBH%nuE^RNxf3kMeuA89Ayi4n47kb(&> zgpw-kop1>vWVyX$i@2NYR1gOoAG#u(HT`LnNk`T10 zz?X=j&5M=FLAG38mQraQy1*i5Hkn0TV}k*f4p?`#^^Ye_N&2Sajcn9y9CpL?AY=esg@8?J zH4P6}pPyVsKe6!P)e=lU zJ$+Q*PTIH5lLg*wU1l3LM_E}=jN7BNh$-`Gy#$bRfzgY}qzP@4^%usspcACHa5glL zl_Y|?!Z**eSN@kwQxjp?E4_{3tQFSAngi*IdYlr6a3sCcJ%hVXZ}x`xts|AaMJKT{ zyRej;*Bo8zwy>u?gUt2|^E?g-(wZ8&5fghlS#E-p?1VNmFxDk@p{my2R}Gl1Wv>o1 zS6k~`t4Py}7X_T<<-Ulfg6RATX7HQQjQ8nYg?JE1{04x-wrGEErN0%@OOX;n>vGW+ zn56vuw8`I+-#7c0Nz8rq9FuM6{A@!6-+SC_U$a=TPWhui1Skq=69A|Ugksxn#60{~ z9vl)(zN+UBxrt^^LxQq~{M%YoNU}MJlo=^rLd4z4)!P$Ax~mYMoGwFnq(tNujJ-ik zGY3!Pr`JWD?rC9*|6)X|96$Ok_t4x#?75%%{E{uZe`sT5PYpa>9xFE)XTGJnbhl+n z@N(Z$lSK-@e1P&FP>oLwg|7sC6v_t3XJ=03@)pea={ zA?-?ji~hXa-+rH_Rz$xkuoZ3(5;PgSW|DP2h!5dY@L)a!Lp>3(F3otEAk?Z1&-Eg7 z_Ull-8x6#RN}yME{A;K{M9=Sw3#wNdpA!}aZM>4DlIX3NdYf_`X(>C9tu=R!$WAZ)m5P%(r2w(rMb%}@Q|~KI&e=BGNw1w)I8Z?jc#en zrE!`xuhTO-n?%slqp&uP-goWql1r;DfSVa)y3j&zMKi=*;$AchjLGrIMr!R3tVoV_ zDN_?nWv#zkuX$rrbq5YAvZFq{3T%B%9h);gJ#P?r?`3pIm3<#bLiYp6CBzm8DG2B! zP>k9|xNftgaPL9OF`-askQaTl6NUXIv)34%hVsSf(cZqQrb25t$|iRJ{&9bWwVU zUKZA>^C0{l6?g#k;sN_0XkwMnoRP^ok7w*xob}zkw|DIf#UWI((LD+o#y;o#n=*aZ zW!}3+{X84Gs?lx@s>L2aY~}a!XAx5?Iy^n+aYgsYK`;5Mo4`hG z@6&zvyL&$ndEdVK_yQq6c>S7JcNGOFky21mNkPdH(5fsQ8D(=q0fv9lg1BxN<22^m zay7QjE7Dcr{?V%Dx_!l3*(kEQ8|pqiVda-k!klD|<9M05)H7dKSK!#V50}OGc^b71 zWp*!4uQ{K;?7gryYyS^59lLD0(HZKfQuPn&ot7*LQcHd8!&4K35U{lCQsz+R6-mh1 zuSs>(C&!#;{`-lNkZq~R&%ZNlQs-k_e>D~e(57#op%k$j|I{@C-% z85$v(Tr8RE%)(oO(U2|5O{9YZ^LwZaQHxodCm5BA*WtH&O(< zl?E(vmKHt;vTyw_v9DG(rpdveg%v(i0Ec*`XU)rTYbp+m zji51)4VUfLq#X7qknj!UG`cL;0kG>thK_OFW2^*($noY_BWji$!S2b$^()FMzTaH)bBJEdW*ffW;P0~$ zX7sfP#T%drqr;Hz6+yFoYBf|sd2K-iBaKfgH6W#2nWPx@Gh|E-^m#zq2H(sOgf;f2T14n}fBnY9zD?evCa-4))c zNafgB3-*deBT&`TG~{eA%I!wp_rLt9evZh=LS+v zM>H6PSoL|kIkPp^fdj(ha}ZEYtgr@ei_8v`CwRQGz$Odxu&N>;oxOBZV`R%_o!6+u z*D1^nKrX)A`N>aqmKtWMzrNI8UuqCa=4wLzt-nGyH4=*3>eDu*r_`o*tllj}Jwq?WyP*%(VkB-x2^i0o| zR^1q!Y4=)u8z|1IwfyUrlc=i4MS7fV6Z-Ga5O>h&GM28nPNF))CZUgya`^Jy-8DMZ0JMh(#>yn z%kEpSJUgvR`XJ2uGls}gqhT&k1vrmUnd=dce^Hjp6eh0Nc#ML;0Bo!zwo8pGDzVF~ zU#Uq`^W+`PABXUXS86nj9p8y+3R@k%iz^I22EXa}R&=VZ-XoihGj_>Lm7w%%5{rNN z8*Zx0Zn~kWW{uL|MrzI)o1UASMK2MPVp^LAzFdHCDWmh^94UzOjoG?=7`E=bG#(Bj zg!)bFQ>0r$KJGZHI*lA+iFFO}q;$Y;7Q_^_an9dLZZo$0l?KDR%&m~7TH9!&$AXk) zb9i||&DCx(K3l>fmZlB-;prb2gB55;yt3tR^6nx=@zFP znyg18C(wt|o5x)k?IMqtd=tA zUZDwgi?x6Hu81W+Aa#PWbZ+CjWdJ)$&(BaWPmMR%B@0-+Pah?AA7GPLN&kCJL%i=* zPGzU1;D=im7Ql#%p8GU4h0Z|OjVwXy>R|PGha+=fFhMpKSd-ZS$gV^X%6-ou2Dy2f zhfhL2eDE_0M_V0!BsIkYa!eoq>IS1`SNf@3I`s$s0@`Su?V>{hs6<`$i-*(o0=ft7=T=^mpDN%?RnAw3P zk6uFm@+%>xiHR!Z zSz*+QIkAYsVf0gttzp>$hOI*&)s$pdq-)(z$MKB)DAS5t8nmkBJhB-CUu zcv7HYl2uJDWbh63{LwJm+knu0NrQe`@XO#+RgGj_Cxk@)tZHNk#Ho>V!f}>4aPUr% zsg;j&sm$4Hic%wra8cbD$v*N7OwLr?BHwIW*+Fv1kYRGFVcS|n|FThaUJWHWw|^wAEQP$O&u{DpnPMVu9up9Zdwo?oBquRvW zpq(0%2e7rnHjDPw4u#sI)P(w#goH;6+=1XAtTU^Unx@eig8xqmRr z%r5#=tJ?dnuiZ6>-CTX!`j+ggn~y(S2rE@m6lp7d|NSonrEX3AA=nq|>I+jQdAV?v z0+-BtDn)&@)?czxuZfV@{kUl2?&@RDtpr<|)1CaL^$T>f1i?tQl|&y2|INvDZn*?doG6oqs0vY&nm+&z--7!s5~i(!M`hDh&7_Z$!BIH$VX2LLoYYm=e*tEo@=<%V(tp7;=Y*d}#Y2jb-LkR0OsFr)CK>j%p5jinqDPLoKa zICE*#hEetp+-XOWU;{x~yjXIYwFMxUHrU1MB)thEjLX2_UycI?S^XL7*A4Z`2O57a zmZxF9=Lm~$MYS|OurV`Le6z(KoQ0q30Yur!Wzv%M*QuE0Jv?v{W`8Fca?{?|L(mc@ zF_wdOC0>opU}l8+j_TX$F?svs^>*a1-fa18T1L91I>n#gzWKoilKD+1nMoZB6G4ED z%m*2L{t|fm0ZBg?XwwW`rr58`Ks%KWQ-EvM0g6&Re-y7PkZ{vNK|y4!Q`oMEYd*Yk zJG%J->Zi={>AuxZ`Nz12jY%?FBnR;z1GykM9@Y2DZdx=~#r9YF|2H4hl5ehBQn4tZ zgUIIZ&bP5PirYyBj)>4mB$&lSoXLqem24J>$fv}x`Xo1XwckL)?&Ap0DuanwLLvew zW_nC9AzUN%p~$@D@mU>(5AG{VBmD+y&xvqJC4aDX#Jy>1gp*6*Z#K4spUCrpR`O~k zgsc*L+}P68>N|RVul4*O3Vvfn!9c!W?{`}QridO@Qo)9 z@6TjN-8wj4lEFo60>#`3$Gj`k^nDo7`;Y#CSz(d--@vKpL0`$V-oJz^Aw%TroX8HG z@mV`&jr$sg#QzZa%pXYJH<#pX@UIQ4!#-rq3Pqt@l@;K_k$+>4JpIbV+a?w86tlHw zQ{yKz$!6kMY=KZ4j>YF6u;Ckv4Q6m53+0S}eaj9aXJMrj^zKZ{q*AsPGpWeff-5>4 zJ`YSn4y0LUfoP=Wn~zhT>a(mT#SIUR{&YqhC4 z57wh^lJ!XDoL5UKq$id@_bhwS(mN>Q4OjaDlc5!0=)T2!+UqV*BYwK+QDEjyoUAmK zRZvo4yAEHD;uBTa`WX7nF(RDM;B1)}8}C6eZZ5WV=6Bel!6O8~9I+{$QAQ=xS6*LJ zf{#SFu;zAe?Ie`eR+!P#JTpIrhp?3O9VMJ8I(@x-L=sWE51Shaxl<{D0rc*zuDn;q zXR2?r#N7d;`;XlS&*m)M@+riE0a}GBEr^fYOZkdb7|AMKcA+=aU=Us@yl>ybYxY&4 zzU9Dnqb5ZA)5&G%2-0rd%Ey`aJSeDEuv+^oK>AxfAkpS)w`VYn^ir(L^6|TAyXo2- zZD*Dygk7qd;poYb4<)MyD6)oQH*l`K8&waAoh_)xqsJ^}Ebn(5WO0!&wk2My3^70* zd{Eq{3ksKWaXx`RHPc#eeAyG4W!retg*6cqpv{`jXHa<$gNIkLWrsOJN#IGQ%lq5NLvLFhr5kMiM~d#%*h(8^Nv>$4S4 z=b|>ru9X-x9P$aVXR%QK;nAAU^8fjdcaKM|F5-xL(~?&#k*Xa*2WU^n zJ-=Kh49E9mCi`V)6SLcRQMru3f4{4@LbE+FNDPY-KJ0zdHwRyyyt#hUaC>}PE&c7* zVM*QNZ8&$G=~+!b<_DBIMCPIrjtBUG5RY#ss2k((CQ^>E`|r2+9c@i+GHN$YI8*|> zyWCC;{W>TeR-&HWI?DQ}^qoV zOs`LM>YiAqG{AU#goIf|T|YUdP|w>ZX}4YdBr@mptKxm$knvBbnvy9kG>9m<<(De1 zV`!-%G)Iym1u(T+YE5R|F0;g2Hek-&u{X``bS3JRXZCU;SeV4T@rGRxnsx~?g zx38qFH|bqa$(Fep`^oPOl;Zvc18b$xQe0{|`2JWpd-DuUuYf^CDSu4C-De7*;u4&m zzcZO7>IO3b6a*#uycJF znQyN=vlF-}mNf(C*~NHU)7Mmly-Jq!mvg#m=vH*i`&yd(u=uYRdf$u%obvQwWA8f` z*Q?Jm&zXV;XGa@p&e>Dv(I-6Hor%Jw1Z z`Wj5+t*1v`8pF%zg|uLgDRG<>Ge~75lH_wE-Pr>gWLa(&e`r5FSn6`>)93E9Yhf(S?gc$ z(8w!Z(Yku~@LT@y-?tt;deoyHQiiJEW%hquj(oonC8s@9Oadds_ln1C4A+aA=31fj zm*_-#v!mCZT+D(v>4&w?O6F^NL zO;6lZ!_kX(7n4P8ZtZ8V133NYmH1m$lHS@vp}Qy8_Kf^3 zvrONXzoqHyJM*`;_Z;>9LHk>qzb||yXjo&{og1IQo$wRB{pQ#Iv<=`jtjXwGYpstd z$9M@s%o--@a9PQ%JJT9c3Vx$qV%r!p>Pg+&5Mxw(|#4cS3*fVxaiK~wI>n#Z9(ydBc`z>Aw-u0eTrM}|! zA-SV z8*i`Lc5-P72KBAms^R3qo~B8qm_49v*bK1)$kgZ87ekgWN$%g(=`jN@2?%<6l7}OB zP3Qy&nJly-{`#oOhejLT*rJk*p~YA03Pj^|OOuI15)eKN$>A%BCZF*I7owc{7-reQ z7-j$5T{Z_hT-M8q!*x6HEpcdBu2wZ}A}@NF2;$=zO&#*OX8U@~$sPuBzmetfq#9_C zzaCye`(0Ddm0n+{7s~zm2rFv*jm(+fTWOy)lIbO^$o6jUGI97RSdV1>#4O#wD)3AJ z2^2W*;#B~RW#(%|*+)5S!+J>aJE;pH^&>FuGHxc4In#i2LsR+FnoPF7hYk zjmbbXiwehLXdOyz#3w;b1r=D3tS-x=CtaH<7>`a4aLhm_4vR2wo7dqAV%6Ye`L6xR zCe)>81KUT@^QYugC0#0}Ok?0xlwP!h?6z8(D2wjfCHP|3J3P$Eb`CN7_oFG*@9QDMjwIBA^mSL?t(xxO-coyPK< zack`NpiX*|0a4u3FZ@wQHJRyke94Qm?stMwsZrM#i-~IP?NDU(nzi#0QH6gnFzZwi zdLFL1S`H&sJt5_81?UTXj(GrgZI@jbNYd!1L!50h)B-JIYZeyK8>;{tPu^1mPmeY4 z`dj3BUS{N8HK@%!?HlaPq=#nBz%_A5fk9pj#Vog<;vLm*q0A;5B3E6J=*1&#_B_`3 zGVlzOy~JrysJGW_ie?$;251(`_w5N5wxW(nhlBuj;#N$5bgDs|>J}C4%05wIFLsusl(x;P1>x>HFr zr6(W^wlv+`Za}nc%Su(HQgo#%)JZxWI6)M_5%mf=$7E8pQFvn3ps~%$Hwn8FW<9Q?t;qI^ST?B+;*vI4aAsr8n?RJWQ_|W+R#ep zaho7z&=XVLc2J2k-*)%LjVjg2ur+2fd2KQb=QTE>%_=Y`L zqk@E|V&`&o3m(d^^M~AFsIviIzpHGdh<>pQ;|2{^P@V}UNe_yZ>&3Cm{b&ccBFmN0 zc?MZ4o!>CMJgaD=&#R^T(Eny2wn4_v4{;ylX*Dj#@A!U5JF$6ikL!4-$cvXJ6rKCL z4X>6J%ge-)0X?=P&WA?*vFUS>WX_XGgM9o^YzT3?)4{kClBk-QVz)&a&Q8`hKaD#+ z0= zMcgl!g@r7vG}vsFV~N;VI}xWzbqt48 zb45+*G8ZAYx+vDu-42Q&_7GA@|oAg`OW2jd(9q)V(fy6WrQZL3VW& z+_T}DR<>D|R8sffr)va~mJ`R?iw;XM8&!oq^^IA}bik#)tlrC)<2vxPhHw|TMiLJ|4+c1X zpQ4oJ(FPlv*DLg`3}%4n4i?vDo5j6XP60K@caB<;=Gf4zmVvQz1+6IrlOPZ?^j&jG zXB>&isG!inD^@cY>lUBNn-~B|K0vYkcu9`+5sKeg1b1lpwX{TR zmSe_Bfp)iM!X-!!VvTk}2&QbHAJ8NA%g9^7t@sY{+S2SYS}REU15vpJUEi?os&FN! zHQ3(154C0 zKAH>1k}b@)L_)spsGp)Hreq@9u&6%xV;**fkM)bfrhzhix1&4P1Jx}6YH`qv?+ApO zx#(*B#A)QZna$h=GYn%os5!5S@v-tdrrGLSO^7*29$oHc5&yDsBrv#ITF za{;WAxH_jYtT_}?9GVWWZvO#=p&L!M3YKGbo?7e&O!3+Zp2-z}_q>fR+b2Dn%*x@bT#~X|AWnJXwUP`3g-=(73CqN-vRy7YYNF&Bq6B4T@HRw7R#*xdK`C_N zB2Hd9Eb;4t|9bu6DLFPUGhL~?vBZ%&?3Wa25E-hQ$csd4IH_uPp{P{k01!q(MIkxh zL1VtrM{Wztc*2i%*##y}PqJwX=7c3Ef_eqjX*gK<{OPLiDX z5PaKHcJOp%Z)ForEGU%8QrXbH_|J!a1eLjP+d;%K2>Zl2)oA+07=ufT(lZO2Lf-2F zMO#~JIu>wIeg?pRV8g9W$uf6fWweEI5aEtPLP84+8rW6W|mn%g{!JY(#=G4$IGUfZkm?E)3LbvI2u1l|#t(>6hXxbo;awq=r$ zivH*=Za{RQQMj0*d8Qx%!!hw$OO7QCSTQ#K@Qlw>vlqbAJwDwhL4IQ9HrM>3Z%ir3 zAoz3Me}0{-m(z)A##mS?$;oa6ThOUkum#pAjmC*MsG1s{fW|JG$4ROm&c7*AjPt#!iVl9(r@4N+r zWmB~g*K;sTQOpsZjr9J_`{+U zl^W#=TXtM&Z}kZ3tJ$Fm@hGrX`BG^7Jl8 zkjPK1$Z5J{WRQL*xknh%Wrs*m&*RSa|*m!0BmRw#J z-%oBnz$zAApnp?Z5$!Cu)PY%Val?L8@uRf}K3gPuFZCOgA`79H*hyDV27sDr!>H$=$GNOS%tF!0gm} zQTM{!2i*HtF;_kvO3}ml4NHOC3`{h5MtFXAhAz6p@+SGafV{Gt!N>!-8hvKb^LfyI z8!zZ{OiLPIIN)8;U*|rt<;6=!&EF)oczy#yroBd{0h#!9f*Wh=HzG#}Ky(hdmE02L z0V>I3$*_a0Dp;+fg7xPzIbUIYk8n>o5G zIGo8jYz#k)Z5m=CN+;U1wghp=Nf?{VTRZ>lq%wR}1arJ6A_G!^{_&pK`NQZ)?;TUi z7_mhAS#I5Q;O4TEE2kF~Q&N-)oVIhP87}B8U9`vjEaxt)Nkt}os`2UZOU3nUo+>&( z0s{&%%+2JR#|zSOtQ0$UikKvMtjrlZ*RPDqNY*Z>AI+Cb9X+mf>)&u(-kSa6=y@v` zsH*k{Tg@#U%Lw!TH#62DECS4b=KPC=yxJ+-;(YbqwT7IpJ%p*@@X`O9#`^zO*_7T_ zhy$(^B8FDk7#gTV(0fCw^7b!GYvDmXW?*T3^W;f{l3ZeCrQ4f%(X*cqQA#W+m~;z9 zCkH`ueh56qtpm)fFjT}Cee_ZXU6X~RDUxPv7Tl6pv)La0wnI&s2wXL%%bY_@p>eNh za-FGId>F89b0FiPJQS%%qrXS(cT%IzNoYG14fSRY%Fi~3{F19PkL`&gV`G|0aiG~* z2%Pc9^>mxWKEoZI!SZ?%Z&cF@z-O1sor3~5PQJ|ZaYzLcYwd24#qrN9XTyoTMDu52&OWg=JjmoiSPCMmQf&RbWD!*^!|_a-7Cu;g@vScawC)mrfbOfeaIR_4G;q>Vg3(9gU%W}b;)ISZgPJCiHD5El;y&EBm^Eb~bp=6fp> z%}iHuq&P*Jfcf+n+&;L*g+zIGO6<6_uIjn1Yz&lo6|n5dFE8!OoM1*GB{m_P7S6!P zi0#E1k1w=yr<{5etD8u4Cpnf}o}JVy8cF_cZcBmuV#m1xu?vPa3nci)&KZWSuy? za%_nQE}K*Nh{2UDrsboqigk}jRQ8d&i* zwJuV4miCGcai^pOyH_DvYP{4KYY2~?8F+K@C}YSw8%W4RMZr(f5u~VT1OLUDyDzMr zBBem+NVI-~bCRMf1)y1ru1LV^Nr|<~AVAJK|2^E`i9$JUn&2`h>1ZPzlx!jA{GxbM z7J^J-w$hM`cRxq)ne5FB#sOz^SnQ+lnC6uWChX7V0bzr)NIR2GK8|*r7t>>Iq7O?$ zE)6yE6^ucVUc9d?;y_pKDT()TpEpk+v+&+uB;3=~DSc zs!Mi=-0IHl*id3AP}D6(%XzeYZ#><0u>5n zql$S7Nrf!MXhLJ%r8B*i1u&iIidPmmLt7WSfrGU%gl%e273;~hwG!x!tpxdj0L(q8 z5fUdSk%eR|(@hz^hF}0SWxEEBPvCrj zHgHS>7Y3Ojn6!qw;&H$^!qk++7EZ#r(zYC#ve>b+<}s#Z>>8|Y&&^;64TC@!T?|qw zGQ!Sh@w)8o%q+o$`H*nD3c9Ecb^u}N`pznGiYw$3XD@nLLqL-fjFe7F4+ATTozwh; zD)`-dUABsJF?fQoPO%?OrDOH1&0(%Ii-v2vhl zo`v;pG9zh6IS-16&OchP&T}7k*Ie)1MFi>mewYOwS<3ka`k%cR^O~d2gUJLX1Z$XO zayBZdk~n&%Zg5V6y`9VAxlobFY1D%MT3oiiZ+pg_=g+NUZj7JQ(U=xEhf-sRi@UIC z4!C4t@I($`H@h2aC%Z5k^0`Hdr*rWxn{uc~a&-W%nPou*AR7>~Euu#%0Ep5#825^6 z%{;zHN@tYE6_XM5fqylA@}dRYD`+(rWB7g?7%{Yn_nz}*7EentuqPbN(ff{l)-QJtS{`*WzV>&O?S|be|x=i4+!| z634h;cdjoa#eXWnLFE?^x`A`IN__@N0Y5;2Ma?D2@Io?sj>{4A>Z*`>j2YwOVtI}t zXLKTfk-7|LVQFi#IYq(+sR0Ulj*VddQesZL@1uY2X272j?S_OWT7;!ZtEJeIr1KX1 z0GWGj=$IPHt=ur6+)YwsG_|#2LLs9$XE|E>ClYp2bT=dNyJs+UA`c$L<4IOskjp^Q zX%0t>uOE$;GtE2$I2^}_$ z4&gw#LM>m6&Ku3EMpMb7+A=9bvtJP>r|`@sf7ppLH%q1H)rIVW*1~Zn#UL$F5;=rC z*GvnKez^w6ng-#l6jw*ZHAHiaG@K2Gya{PE|FjMFMJtKpHV||fT5iQye>Tsrp7w!- z8*!L2H&G`Ghsl?C8Pv?@fp)}6>m*~Hu|&$^H-&anS)Flh$BnLV%E%0jv)p8vJ0lmk zuF0^}2>W;sW}`Pj%24>pO+a0urbj{bG#M-68V`;=)kwCPiWTuMmi9)H*SLpYI4+ue z(3QVm_eS7#z(}#axwnjVv6q+rYx0;Qj0U0rK<5@%+eWlKkqffxG>(H%2=@`8r@ov8Oo zm;0Gb+#EGQ-%mI6{6#3=KoO5*ugq zK?o>Es0q93cmz;ZWC_IXW*7ofEnz$3-bRHSvy=Sl$Wdp=6eStp!qU*y3~@|UJd$mK zf1ly-5TF#}AR|W#4!bnZYO|Ud zU`1_s?<+mx`w~f3qVDkGqEHN3)QAL!9#Zd#H9;teEYTF_1oXPC80k`EIQ3O9aS1)Y z7Rj;Z;z=n8J&UgV17lF&lg)MX{1B7-Do>?`#00nlYaxoUw!FfYh0$}v zF*@}HEHZaU87MXzk!##0ETfn&{GI`mEnVEjT_}QGWx>j15>c+{&aVvgM>e`LfdHJw zR-svudOtWTw{MoVjm%2{%GH^Sn#?`2S2N2j)M(EoL|YC%CziH~kqk2zgnlLIhcw67 zQvq>ZlG5ZY5s`*LbXm=oHJx5D7lwVMV;P1rVX@J-8ls7YnahteZ-$!^W8Y_M^8t^c z=%wv|OCHx36t7=`3`OZn2x5s=Jz^6t8*^ zYl{DE);7*UF#QCWrW_F)?`~3*c^*$#8ZTlxS@Xuo`bu16xMN#m)KH=&CW!3+!WTSy z?kSD!N^IyXuCGh>2Kzb`@f6U%Jae#SM3QRLxia`~P>fxElR)CZv;fCYd!Cdgj^5fx zhn!~vH294XGu@Os8hn#=azSr3TUj|aE{Y3;!(-}-0u<{9-=Q?8Gw=c+E%azxP2B3> z4i%E{ARakP!)Pdk-z@lO=EY z_;GiBSukG|=5+kHT}#sC6oqK8$4;t1{v)|A!%ui*W6n}HI%OO;SaB+On*Ryzw^!JN;$WniB*8%L$!9R$aBxlZK7ZeTvt=!7yM~Cdp9e3Y7(48STy1V1VD!4j-M zm~i(=ZK7__h}CBj%0){VGsV+Q_Q-vPgpw>QoFMxOqXo}fcGicckpPJQhd;Q-5vK*b zA;ck6J7FPTI~@7ZvL>5FARA4_^+*qE@2lr%f9^0+#?&Fu;odR!L+!x~!aapiX#(t}=Zcki*VSCCbSV%BB*lQC@7}+;w*2_+%t7-)oIN5h1guap3qYEX zxe*L=iUQ*bv%PB(3)#}l37NL8lLQItkaZu_^^@aNEs1ZKlzc493pte>mwCAkEWeR* zReJekE>He+k{?roWzoA54BL0DLQ;-%Y*N~=P?z(DoiBjmQG3UaTB(dQ`orwDiA+YhI5*TrdE^89vgY*P0J%qE13wYVt zFUwgu%d&tHEN$1_R$56K?VZNrjJzY-?OqE^Oz0xp5Nx)Y`Ln|ycy_nAd@2Ie-A~SY zVyT$^fHuf>T^Sl^5KHqGO6z1`lWqBi63Ct)F!yV8pmZDEU2z9vnn>~qJ7?Pby2~pZ zy`dgi%WF(8?D6m|nCWGT|J%d7JvCm)z;(rYu)0EG1&r$+aP3EQER^7EP|n11Dla4= z^X?Aywu7#`KXFtZL>on&AvEQ-+}-U;o>ePpVxo6&QwrR9uxt6`AWHokjd2&hC;hKk z_1%>L2HxX`4N~MhgVF)JJUBg_;jUSM1@`PoD90iQ!HEN!Ch)tcu|km%0KB$2Y1qNV zz>RBgyaf#|iWUg)LDJ?pK?9HlZg*=q9AZnknMJ~4byTi+H=Yc5n44*A?1N|l9jL1$ z5?BfObssOJXJ($Ua5)~np4`ZOVauF%ICEywLxZOAuyK6|z$40ZMa?JeP{w_^7u(f9 zV~`G%E~AC2g%*~`1&oR%Y8;>vxtIe$!Gl+V3XghXK34KSj(E^#hG<8(iQEn9TMIsc z#amC{e&y54tCzLFTGKLsIG0iGJt2!)%>j>zfOd8HaFLdQ<#f%f&2HC4B@!v&kShh5 z$jwz&F;?$Nk2Qne7Ri)zID3{$ofp?%K7U+(2z|>OU@i&SKXyxDY*#vs#LaQg4I3vd znloH}3W zDcpe+YZ6AK#lT%5Tz1*ct*0#LA~o5W!xO-~XZKFa1uj0>kxYSMa&n7hDph>Qu6V$^ z^`mui@zIRX*|1;T#!vk z9Ykkwz4F7n!gW6x4tW28{Le{fUWCO7#EUgFvG&0>kn&G{=AH)GjqS*+*-c+{1r-sZ zEI`M%=;7owP9|wYdy9j53hKP_LzP^QLNk>nlD=B#JWF9}2dpc2C1Ev=-QISCfet(B zG-merA@j@tk~iY#h8l`HViUrU+Gosb@Q9WbxSXK-~fXEK3K2jWrOuppC%Itm`S(D&VwTc;dBd%zBmY147 z?!=TkN~c>bNJIh|xkfwb@~dr?`LpaWC%fXLdGhc&_v9X%H)s_d64|{D!oc2Fbghp`;=Z{!UoGim4Ke z35_e&Tee|n%Vf9M>6jEIA#K+l;3}3YX9N-AL;>A5VUmSQoQWAAa5Qso33TTl;D+G}_op_PeaCXVBIZ&uvR-l#NUBj&(eTxSxXs(V#DpKq&58|rq z#-peheJ40)u~UojgL4GQNrSY>7Q|BZ9OX1hLiV=&hT=SUGar^#^Tyi(fS*tTM%ymp z+7HS&;(qZCq)NPw(|82NRQEg>(_%|xofBUuGhZyViF5Md1EZw0NHToX=M-iXad=aX zd1HSf-Rj!v`Q!o<)~Ox~uJps;0}*E_0ID`Y8_qDL3q|O%GZqGGb=(l%T22<`P$m_Ag%uOaY(A^Uowzh$* zDB3cxS9NH1W@pdz7JRhXzD!Ulm-DX7Ja0{|wwtHUkvlDi6|66Edo-Ml+_p_`fE~^R z@8ZqC8-5IJZh&I1Ni_2$Ix3A&7Uu{zFija2CQ~qOUgXddo}mI&KE+dK{TL7!MS7Qhh;jY`51sNS0GeY zi$f%@vSd5MQ6fs_q}}MNFCC3gwM#eT+2(?I(Pl2)kfzM^9qVQa?q8!$ex2)Hy5Z7% z@`jG3kHXcGyC68Q%wzi*u5t-WX&Lx2LS2qN7*NH-!^pdk?McFYVJ?u8h@*+BJh?I?<%p*hEDY6!DO`(qe6jo>IwG3nMD zJ_{Lvk5d&xO}L{12+@Xke5~_01bOK0rTPDwgEG^-wxT4GJeYW}&8rQi`1_5kBzUdjOLzaWcJwZjT~pM#tdV(&Xs4zJDNR4 zP91zEQYH^v27)E2(>^G+bx#R3=VuP)dMP;?XFG0K@=h>7iRy?q(Akp5*UIvttY|65 zKu%h~&P~s{>#jTRx}|Rc99&^yir$qu?n)`Ih^sdJ0(s2jvRJ|ZlJU>FD}Ek{2j98h zSd2^{ajBE=QYa=b9%}QWmSuFD)ewwDVdoV~sVl@>pirtkq+P#SAQsMNIT+_9w+In> z%LfszeEF8BxXMb5d2l+#I!a+%5oB4h+!^l^lH;LJ^{9AxQX}LLp z^d}XcAtIx*+>z(9up;jb?WFH#*B&5&iX%Jlm1`T4Doi%|(&}|HFM#_CAkTLrxaPmN zDQI#hCx+&LBDfR<)QzvW#m+6R%g=Ku@zFOwuujC7-<~s=1|?&2+AMlAsbL56=tjh& z)1Wd^4{&7obxFSzfs-s`pQUS9}*rIXAMULD|Kb&l| zhr`hYtU+Q);Txg{)s7KTw#&|tQ0LS_%tn`Fie5s45=R2k)-|L z>|q~hLde?W+(Y!9c@bkJpf&25R^0tVU26UaVcRmZ0X2NDg~HSDFC>t=5AsL?9QFyM!EIH2j;wSrEpKiC44hu)3k00H%H_^>B~|D8VU4ZJNkg{fA^ky$fuR-1{k0RncF;tot#V;gGbU(m2j;bMdejn z8)4|Dbn%=5-W0uPQpPgyuaslwCH%?Uslv zg2C)HfHHCz%NM-v9^3*$?qrsy?XJF9WuDC#s*?5hnJXGx%Q00(Ww|k3l$;JChc1=X z!Y%urkKEAql@wE2mf0gWTK+IE4}7NycQ?4H5}DEciJHMm6}2w;?sE;$NoLF4{LMx_ zp=dCGGTrJZ=>exRdG^YE#=Lz83pN61^23xI!)fbWRpHi737hH0vC&@d0mp*1O5gWJ z(rC;62Et^dnf+N`xbO7V z!l|=Qx&g|PKijEaSb#m=;O&zAPCPdI?0MMOYEKAUI^i0N;mc&Re3KH5pGM&`qPvJJ z!Hvc;>^ilQa5s5-Ky35mQI_+L0PD@^QanrftQ7M`$j5Ys!3i{i0Y;yxa( z__FB3*$V-TE*27wSw!C&c`HqgG&6gQba8%XNGQmIY(_IuMCxJ@6bzEt5nD`BC#jU_ z21qM)LnF>fsm5$o-8n_aRqsikA+)IO!MOHYgW^JD&+VkzzN(6UtPOUi<5KcNw$l_S z`OsGz)!1EjbFwjbs{e7r88IRc^$nzO*2dLzT4u7gj~cN7SpQG_WOda*lrK?Z(9sU=A98g}k;P6Gq7%-(L-Zb|Lgbp+epuB4B^ zGA-E%H0M<=`9j%YZmB!hla7$BwoEWd2p6Iycu5O7zf)z%K-DXa6gi%XT|k25#3Uyt zQ-RsXO*)l1AiKLard?7TRgS+csdf@=O7}CDqUHE(qV&@Ew=0<(xibMBDpwi`QU>ec zW#C9X=(jZAq~KBG`LK58SqvxmOfhgyJaVeOto;nUWa_ALkvsG(t^`j;P9Xx^?wXNP zn3xyYW7&JZ1XGZ@zPM^!KwZJl+wD`Ql^(4Jee`_6`1 zVu#Lv=DekZuYl=T31@PgO}+VM8FWAVW)=3!PRT~eZFoSh#Fj3)>288D9gD&eXxv;f z2L>>G|GY_pWBrJPhDPIopWGO>&PRlXJo=xNvo*QatW1{bsy2$+rpSVlfFu>>%2H*? zG=yGEoFj!%6r1rwibVBd*(F8>Qq;<(6mf4uL*etIqaNF_TTx&vWNkNXA>KC@Eg92e zk6p6w&dxrco4OOd&Hm9kM1u3oKW4Gt+BWt^GccC}H`OL*g$FQ#=Z3fa|S} zH6$Wzys6*J;CZm85uAkd%^8?=tyGV3T>kS%5=+SZhF8#2oyNqRYfPDMZQRXJ1%Su4 zm`iq8c4z%l2(Qq0%-Pm_bcxx{|LX1SuvVO5(0wB7#f4|HM{XyEG?WEsxDL`!Wm8L zRa{jeJRDnKT&oLv(cSvW#wxC;k-kQ{-;wd7X9f+5@D+_XJlbt{-#K$4oz|gd{Ad}$ z8`q#SW!twob{?`&Y%_*|`Ndd=FP_VQn0a@#He3ojWqq?{y6@^xGj@WNx?$81B?6L} zI;ScMx*Q83Lt`7nNsCmL%;>upYqP@+{%{j-w@UF(on1F+4k%GjkF8T_^lKlt?!9`12lIU1}ai>lWI zq<9$|gymR2Z~Jn`3vWlF;tt$KDw^7s!=hdwABj7)iHC#;dvq)1a#SZoFDcHA5HS6a z{iRsb&Zl(`k9Ny>ST9#%pzz#~EG%ZkRC4MN5dpk)dS&r!rX;B|9B=&7@flB-&ZZ{? zSpLD;`8os#6ll_Qb;|I@Nm$c*%{;yxMi-D7Nq6?B1;CmbC?=N6assl{d74Y5G;1ma z5>@5CwSaiL4ng6f3T3c)_}Y=aO&Tr9I|>q{M;0|_8nP_&BVuWpj_K>H{<5Xo^fL{% zssMzI&dJ9s#aP*Y?Vjlui|PXy#OOGDl}z;8OST2EUDVChssy}s|9V7P(Zr=FH3D_vD zaKe~YOq3uJV;dbxXa-nVSN40llCY{3IOFO~*CAAzmhS4t`Ls&4fhi9$c3x*5?mGN^ zlKujBro2~yQ?_X_*#4|TfIea7soh9iy8uc;SsFJv%hCp%xU+&;IZx=`x0!hyo6rEB zMiGjs%GOsm%POG6Yy!{dl*~Nmju#^KqED1UI|s}Z8IEf%L=n@? zXJtvR0F?x)88KXtH%pEp=Pu$UYhW+{0;Y~(<%f_sKZX^Plbx}#7+`_<+RP$nJBvk=*&wMktN;usz9|U` zv?DR$$kZ*`!8s&P7|J{46&uu{!5ep>7fWPDlafhbuz==9r~*kH0zjKUhL)oECk;A> z7GVZD=ygp?WulEOZJN9?AaC_fubrjS%D@-7&O=^eqW*Dy#xtugEa2s%toDe$IZb+L z0WFKNluCzaqP6m=acuI-RfELdAngL12yvZ%=L5;t@d z3R?zMvG3TZbd5SF9xV*Ie*BfBL?QNX;~ZA!Jd`mebL-JCjQTS;myNHeIYtYxierXuJ_L3% zLFkE`$0!M`b=wI9_~wVz`~B11k=2;5L5-F6y9GW&s<mrNHIrq`T`K#1Y@z44@zU<(Km zXGDW#o89y<2AalpZG$!_B1urz`v6%x4A=KNDC1t?XmFJ!Fshw$rnVoRV?}(RV{69j z^4Qw%$w1RiUvZx?MrE>DStiE3S8ZJK@08SaYi@aL$uawvNJK)qxq7`AJAvmF9~Hw@ z=3Sw4@7;*^@tm0zEh$6RM!5b5d{5aUHpO+W7ilb&a;vrV4Lc(xRXSlPmlEO zCtzR&y%clD24-eDz_Y4|O%j>W_`JDOVkPHRm+qMpPjYXQ8y3j-dgtvjV87!O&OQx^ zp%o)F5NR&UWVp>B<%*KLO*&FTRrIR+k~3!)PM%NxQMzaF%o*d17=*i|Ce{J12^Ni+ zo47ZHD%YL9<+{C$01adwfhmd135VgT?`U%N$Snn=hsKy?x>>HgfP=noCg2zT+Sz8V zU6NlAY+cyM`AH5h;%)sW69Z5jnunGNX)LX%h=M*~(^KM1n%2!#aXgU{&Xc!CW@RCW zY#^<8`eCV9F%Y5jYTU8Q(@=9_tjj8tI}!m{ls3^M$3Z)u#)652-$K%JCQdzpka>gY z0vi_6^>R&&+0A!FLIP=A>)3n1ENINZwaLv}P)a#_mx!|2EB#_xz19U3eQ!DvG+e7H z#VwGLjO!puTid%qwF#Ucu6M@JeU0slZn>uHa5tIn~Qn<{Z{I~S#6T03ub)lE3YZNFyW zIVVdWFflS;HO75j1Mh=2QKv}G+XMR$Xcs|D(;?~P9>Ccqek}?B{vUck^O#HE&_z%XXmw#5P9N{z(3k*NYhy1veUo^u5Xwi%2=}$vEx)2Ba&l}`2Ggcrxhbj;>Q1v~B|IJ)$|%_# zu9l*k=7mq{`xDM`+ja7W$pdQ=w8Ux47CgUFMuHeFbW3%s>>wn5tl z;W>92&bq!?F)xzUH0w)p80I)gF=&yr96n<~2H0~p%Ne*r<54(~ZHDaxG-IdXgtRnJ zX<;rZjV}&NP%*jJ*rpQ6{Nc zVcxj#DvkvHoR)4oSI8UfmwIZKwjRY>*41dE#sFM`NA;X9fkC#_) z|6f!IxyJ7AibTVh0?9C393`%|nBd zU~BK?%wZ!t@+MM7$K6|xry!GPIzZeF=UMGLB-PrlNvy@w)V1j(W81iIag}?3i^;|i zjE)XlCNs*QHr<%(gkl1oo_E{x@9sbE?&sfd>`4Ev+wMLw_acec*2mWQv*f|78wcu~ zt+xRME)j~sox!hLNk;^`Wo86=ee&xn5r4Sn8Lqe_4LM2+EFKbpS)F9o0WCmd6Bv@+ z4vNE!Ne$;Rk`67uubaN(xP^n9m4{st6y{EyCbb1F&U4toKpe>eL`bj50y|H2cZRS~ z@H#BdMIP4ydsJ8+p>2~~`4G?zL>z<|YVRUobtXw^hr=`@5tgGKq86@rpxR-{65Mzi zNnbKnI$?oWhK0tF0;*%up>33M`wY^o#&QoP&FjK~xv{c&E*r`Kn-#8=T<4IX%@>jw z+0mKh@3Dnipr3{do6le7KwMZX9(&W_b)9qEH7-pxC3+3O%{Ef zUEhcGJqCyY%L7+G?!C<+jMBwZ^pN~ak92Ppt6{n(P0Zp|fV9e?z<2{PoG8;s+L4|z zZ@W)-RarAPUw2L@kGhjPF!85WDK(7$v=RGBau$T zfr4Ph-Tkiqii5h!xHEuGdBC`9Ud$M3Od*KIAQ~Qu)sZn%fdr~y^=(SZX_?WAxT)?L ztcl(FU^TgY5jos~y@V0$hQoNk&9GDhL_r!90G zR0nl(o@`TtsR59dSMKL(csagV#q>2f-NzA47*T_KPRc9RFteh~ke3=3c(Kzgl;FiAF~2^w zAvHr!2H;f?dzi7oNXt$4%-ZS#$;d%D3tX@ciXB4igCso_NfbSa(~E}uWxB`Oxp1yY zt{ir|Pl*KaLyWPB+jR&qsx*|DFYYIPRpV5$ExQ~%)(}b-KIq03lLtkXD-^)eJz{QN z!N;t(>B$)V1qUZ;3*%%RawyIL=1`CKqHpE8^501Y#$v;mAH;rjfA2at?nC2;N-8uP zYLR4c8VG+DYeb?E4exz4vI(Zus77Q?y}9Mc-x96%@3~hA`zpMileuv0L7x}I#Crly z7R^vzcVp#3)W7FmLdPs=UcWMljNU&Y> zJEJPcRY82ZFfs)Vq7 zt|Nsw@;%+7EB2sSJTC=R@_d3XwhMJZRQ!uCFHQOsWQ{!937O)0c2&`xs19(7tP+@1F9Z=V?(N2x(xg?qlUXl3Jr#6LK z)RC4Df)Abr`V^(4xE;5xs{@Xx1vHqo&mbv@Q-z;nl?{!RjDf!W;;yI-bFzjuGGT9a z+4wsuS-^DD6I+dt2gPWnQb1~-bsrn9a!6KZ-|u2PhP&57r~(A856 zT>fj2x17pA{3=d)1Dd=fWA94elfJ$JZXc3PhApuhmkg8N@vJ-V&`J~K+*?~R0g%MS zu-VINh{N*LL-&ex97~#KHVu?sweFM1NG#BCp&4$aCu1A-=S)b38vv&ky`%i^_LeEN z#JXiEqWE7Xo`*du{U(=z2h-PfxX9Yl>K0^E@DVT)dTs;{4gpyS_kZ#?G}z5BoOunv?)w!8B`mp5$G=579-=r+2;J z1-E%V8rB&kC$COpnq$b8b!Q7aIwt|AfBkdrngj9a)PiIm!WQLoXopEKpK}e*JpCEB z8FEM>TgGt3wZMaJk#n|YR=HIdq^cIwoRAHBl;u6?TB;~Vt>T$Qa*La&W{6h?b*+F1 zO{g2l-2kXXe7v_k80_BN*jzfbuylH5>AqT7lPHrr_RpPKIyEQ4maGZ%?bi_7XtQpw zp)Cff3^0HiEUlD9DfAL1nOBZmK$0Q;X)BtrD$%S}rwMk~Ty&$Hr?cWPU#SR@tE`I3iFLXjzJ%g(g8f7lV+pvP{mhl8vb{E7^a~%PWIH=Z= zDMX-~Fo@VnEc=Ee5DXWUA3>bwYFUx&!u!KWNia}_{}N6p(qB>zK;C7PGA`ppp0juh ziW^QJ$PIY(fqwdJU}3e?bA{)q`o!{d^a1(y6qq!^TM`!sr>IQXJrp&Nei>?#%6pp` z9=l{30P&@furfHky{58c>)28QiRdhvFpjXYLnZtX#k4GBi_S`=A;1!ii=tIR0m800 z^nMAs47=w=H=5ifNGcHsXx~oB7}WdX%riG1xX4}01aK^(GB7!U`|3B&g?iJ|Y=*KC zyaYHUm5b1>IUlijW_K2+=59Nds|QW(&4=KIkt0y~ubwW-#V|t~YR?09WB~ZfP6P7avo{LJMrD?BV>AEa(qMZ_ua( zxnfX&U$rDr35E&L4<~*GE2Wyj;BC)%-pq?{pE+^80&G9@U@_O6@1j`K~1sdccg zaJb$6Q4@oJ<}5ppb(tE+yo!kBP@qboJfLyEvkC)Cyko-q{q$FnfG3{qp7ASN@%H-6 z`RVsN@Ah`0)TQ~NFj@(ZQVE8? zrf4`!RC=sR?5w1VjoyO5BsnF58!h;aLnXaW#y(YICvK&HxrY-nt%X1!>7SeK9cgvy zZB6InN_-0TGGB58!&Je9mALjw@DPm|eRQOhybC%@oYa@{9@{>L1KyQCPt}R;`J$ zC%j!ej{5;cp30wz8Z#CJb>4SIrb&k%6>5~))C@XaiC|Rmi8{+#7M58r9nM0B!elZ_ z?h2ckjkR@Oa^ETz-KcQ%m?z+1$qW+Yr@+)|AG`s_de-E4-P<^S&U)n&Pcfub6eL(` zc=4@8PseM$h|vbBaI<85O9(N(#*~8bj;7#@PbhDHM=38(=@A3iS+8oan@Z_@ZmyYQ&)ZWQJOuk)P5sHPLCzZl|gHNJ16n zq&qgr+A9ltFs6HWX5o%Btiz%V_Kg^~C`$5n=j1aI(rh2{J`y`f(+DxRZf}`)c5lY- zuG9%lf;2}`uI^edEuY^4wfO)lK+gffyW|Q*G`Q%=%M15j2 z#2RxxU>#sbjY~BHbKoCsm4LDODK^Is4^0P*f%3r;YSg!wgNa>*v}m63d}(cK={%tT zVp`1$Y{`&0@ZxdNjgv-GwiMD7hERdhZ^;CmGL2esM#iZqB=48L>kMQ>RNX7HqoM@jz!)r-8vUs<1 zqlZEbov3X}oW=5uUyabf*=*dNJK)e%xkYKJC-IcTPND6Rwq68@t>%ZYOxf@Lb5rrj zl&@Bx%c<-w4q`c-ty7ElipmGnS|WW41S(L4TTk(JG2V=Z}tzC;?VuwAkh}l5Rnfs&>7>$>Pjn&%6l4 z10Y;*%atWO1Ps}OYQI8)Pc%cjCwZHgZw9UiV3LmD2e$mMvf@i3)0DQ@5jobJjK}=T1J`Bv*oZy6n-X*HUsTiBFVvo8A-~~ zM;3f!8<5hUErZZJ`Y8{;bdsm0lL|wWLz=YZKK}+=P82&VRkBD%*J$_1n6>ZGEs2zw0Ip27bnGdq~RJm>e9b*52)DU4Q^;=MZ#x!lHJxBaeG{Z<4d_hgCS`2cTEh_tLufAVtZGu{ zUvck3Rk^9b$wGAY)@PbrX$Jzi^C}1qEs=S(@H^JvLUgE0Mf~k>Yu)aICx71arM5FK z;s9onGyznwGAsgsnDv? zMpQ;L>&iRM#|7vF%*B>WH8S}#X&u~hVtQ+53?1uGrrX!qDbR<#kMhk628EbpYG6hp zBPJ2Meufms=Q%&DdtT`?AY<#>j>NiRhszmD=Q=Bw)i)N%p#!H2DNqf$h?LxXvM|pi z!3btx?tAJrm!iQD!bdY*!v7H);1S)BF#uD@ZbO}kV?Y<|b~oki2438aXrQ9Gza)Ek z!5^7e7Yj||6-Z1r-(_GG(z2}JVPIe>*#ev3;jx)m*Ux?RFOE;lEE3N@**cf z`SsE<^&d*Y8yYWJWeGs9ptOKy;wt1$yCg&wjAnrvJK6b+0cJlW z-f~O-rkemh5yEZqu+@u^oWSiu2@hA0Y?~Sni&nH9gd%HFQN9;$PpdPkhEBi$8cwNQ z8MM)GM20XxEG!&E3|}GR!r7HWhccAT9+}g6Icq?*TDLPhKloebIa#F`F7SgM_y+bx zrzZh}VI8dZ!K4ICI{#43#+>4HQ+`;Mes zf>Id}1n45plh8!A240zwdp?6k5i%s@3f&p+E);w#IGE@mA+|c-cN&h6IO`^hCD)X# zAhL*DMDj-gjA^~eYf7z1ZYhw<^ zz4N`ZYfJLdZW3NL6rHkk)^-?;ZU8Tj7aXGVzQ+g<2yp5YqN5|*4KYo{_+o1oM~X#F z9f&NGqaF+$D0j^In=!~}aP%kxPye7|%Vr*b=*Xobw>Fg~ZGww<<}Gx?bi)!8 zs++S22IP#N-?(MM1weVGjZsUY?!ZdL2x+k|R(v>d|&JUdTkULN?dGz74xYuEaAo6G+3JTM5Gw2!^ zJB3rh%CeI16s9i9YwSWWIEo6goY(IP>voUozZ>-5Q-nw1BIcjeZG^GY4R@^(@;I+JhJ=p{S*UYzVvtoIZTpko}~@wpyqaXhP!+Z4YQXhgEqJn()CFL z1MqGmca{1pLjuebBhNh0H=+(met6@@Rw*$~BCqHODrQY=MroAA>Ym*^Yf)brXq$|0 z>cJg^f?qf^!~F~u^voV1Yxh~7X4xbdG`W@qN(V=RT1EMDYIOA*jXTf~2gjjFEMU|W z{5V9JOcV7@i8oR|vp^Pqt26Fm0!UyW{j7-UjSym#0HKlyH!W@JR_|Oxx7*jBowp0N zwyXOP4kM9|#NPU)B1Lgv3u9-{<^o0yyJe_7a25#!_4xjEk5^bp{G^%cQl9wT)81>T zjF5rD1pHZDo}&X4GYmYZjcj&nByoKvtt2GvA-d6mt+W4vH47im+_s51Hb)qj;PA~F z*u=O-*QXgMC@SNQ+})40-8PG`juj_K)NuUGQbK^j@^j^Ib7{+BO1I6rgfM0NU;2gY zPW-|G%H61O?V2Ukh!G`~?iM@(OcsI*N}#-DrgOtFq+S42NPNM?QVO>V~prptwP2l;EzZ4S6x;a~{ky;w$;!WZ@42_dvquNAuBMut4Vna$3j3+nD zK(h^@uAb&Gcps9?lDTF^-o}23*VVR))WL*d?SMl1tO=q(qbgGEEU}6W7ok}n@KE!< zNjiebT>~=1Syp0XPbiPbQSIiK#i6cpgqLwg+$+)Ye$f`{4i7TQo(4si(xj0aDFqGr z5{1)J9_-*KERfECL*am-xUI*jSoAWYU{f{C5iNbfqWpqk0WBhk+&IT-C*oul=;}M~ zn)xmPfW5PrrqS22L9_E-Gg*jc=b;N?ubq)p-|!;g@`Rj`BsM{u^AN_6&*JDOv4f+E z9=4W3b3VK)$j5EqoTM0*$J6{U6dDG2O~RG)ow#j=k+gdH=&HnnHl$1#7+dncnC%@s z)LdEP9478Va^o^xdHzOQ6ZjqP4hWQrLfBz>8Ehxpp7J=P0SNYIkJ|4CXOEdG<4`oj zWmbwh&+X6N5Wmd!o;WyEU}^k1cm%Djgk7R#XHswx>ydJIUqNc4J4c*BeRsurOZO_) zZ@|+u(@UpkRw$yEw`P0u!iSCtk{ow-9ZUE6%jb_vs^ULHs(BDex`etZh^q?8VNUt< z85oGdV|T95Tpj0A7au-J^NkoryBeF8IE3a{RNdF1qda&6%hOESv;-ZYptbnb(o&ph z9T@3eJoUQ8bDN#L^iko3X5jb_A{Az^zD6J`NIleX!2j@tusZiT*_+qi7VGQ z6s!%7G!Qbbr=*MztQNEFhmZhHn$@0JZ+0=0h&@|p9J*RT^td0kRz?;UV<0g`w1&)+ zu154dkKU@2RW3EhI~J{*ges1=j*^*lQVLH)^wn$QF&Q1)joisC9}U$K#AkJoCXkBM z%Cg>ha&Pfoh$7aAdm@S=&xfm7UPMmLF?W%m!dO&e#XR}Q=sn+sZ{!?4%^hHhG#ye- zfC-0aP_xp_B82Er|J){OR%b{US*+r@&Axt>W}G7#Z$qst3wz(p?3iK1Fqi?(wM&Y; zV6Byv6#ZllJB92U+3RZk)|O1i(+#gI;4G_OfwHvHpyYh;fDaAC$LU8?O~3qF7fxuv z*BY`?8o0K<)314GhQyTkPXyhw-6nowPva6eWt)$7-3GZ+&pP!H$CgFyY9T}mOfOG{o7(ET#Q{%uW{LlCD$2S1oSKE`~)iS4a_3kTbbDK z(b5au(RZ>JxnW2=gIw9quAPI!M_Pj^Rr$;vbQcz6ZgL3HW&{xz_FX)=vVOR@qngII zDjJh5Rf_j9PzBa<9dQYmb@n)K=Jpj?G*n(_e=KiHueZ6R49dx#68(DXu{%}pkew)> z64+Yj%`~e~FEC;Xu>jL?kdquAN6ERnG+FH-N@`QnLESiKSPLvH!tuNrWRYZ}xO0ZW zSfIgtP_2<&OdqNHte$e*x2q|{*L7Dz2(1}d&4o}Gn305wiVODMB_^zFNquJ`jU$iB zD|vuq8(5!c_S&-7HLby5yZHX0tAUJ-Yn>}#S8V$|vD^5xZcWC02^sm?xev9`2e5zl zgv3MCVyD;eg5(6JO%hJp+GN8F7d4oNh|nMKYOdLH z+M$Ip#H29*jCA4m9e3|_pMI1Z_n-tBTNBSU0Bz79*u8&o!H`PUoEU)?r|y=eOxw1r zjcUs3a>l{nXSjw^j6-7JQkpY5p_t7&<5Z_oYaD^VccJ7Ge)E8GZ_vrg?ED@^pyNrf zXLP@Z8c)CL#7w>?XD%3R2${QcG@LjKbby3PT;qOV@LE1nfiH?x#iAY9PST18is+Bg zR#;P_mML&!_UNr(&i|~=k{#~B&c>Lu;kgXT7b(;xPLqY(azFzwq>TR|8@~*Xv4<@E z|AQGw_-z24RNvdx10WSk{2$w%PmYBi3t{7zZTdFfbJ#nMew6NWeqh}$9g9RC2{4Al zEeFe^4YxXxxz3_cXKuNf1G5Z!O%wXve>D1AF5MI8a*!Ny2Y8#hDIu|RO&@u-tcL&W znn$^I9Rjnb-4v@S#?Y9)q|0^WkI#kKG(;h0k2!wZwmnFpt0h%JWo;rLQSX{VAx!|> z89w?p|EwVba}X+1jk%O{9?68474x_$Sf!XRh(Cat!MoXFV+^k|U`VoAB|w$Ttg9}S z6=%n`HPI|{3;!BjFFlDzLVk-yWs-Br7=WtN^s-c-0IAQk0QHKx$OLj!E+*IM67~~u zwVR${M%WRTHy}Xj$)xXb+lowIhENkfV6Jh3HW?e;t-)ZgYQU8-!P->T!acR)_7xm* zlADcOcS472UOW!O#s!E}Xs6|_2k=R(LGfv7=7Ed8r8^D(=5RfTqf=YA)mYFG+dW^E z?#w|6nkiilfk@`xp~6KKo)l8OLol9>(bSf!p;#dH^dV_H;}hfNJcurw@@{R$j-9UA zEc3KrjcmPSn~ZIa)4E-Tbz;vu*yQmYPCP5=mZJen|2DT?x-fAUo5HihcqS|$VDL)XC*v-*W@Kx?t3w9 zCz_F?$!xW%oB;;m+)g;Rb!63}C@JB-L+3Qmj$hE^Y zd*l)~B3q~KKW=xjB0nnjL>ey>ZD<8Th2ABRt3jzqXCS2@&bUDeJQ4DIS}9Y$$n1{+ z#nX)@sVJ$KvdX}_YEq9wU{u_4V89GPOGwZ-SYP-y##77TyC%eXnlBfSU!imH*Q~vX zAG`R06#xEM0Z&4Pd|GXpHD$&Eu@C1q?S!%!#WIrjjj0GzCR?=a*9k(jvT-1`U7MaO zl~f{qH!>u>C2{)ObT_V>u)t|UiUE|8r2J?=eBvO0Ci=~>5`l6uMv&*S(@8^1Zg3u7 zcjr2Z!2nzBF%x};#Rx3i=zA6&a>3YPoh}q~Te@4* zB&vLS_hDQM6j&S46YvP<;b}n6fDL!|T^JpCX~@XNZWGvTXiU7c$WmXB4Y7gC3kF_% zUcyASNeX>o`Lm**Ktz*Pnp9k5NBVVT}elVh1=1>9z81*2@!=)*s& z+HDW8O^4jT-FXY*5>L5S)L>uC?x-A}=#zL9LtqLmk1zn6L=3R(L^>NO z;h|-ca}CNvd$oy2{AG<$1}a1L*~n9+?8$o^_#rfVvYSZaa@Eov8p;g5{JAu5%bI8C z#_U;z)Fs#X{6ec7Du|=P3w0>;v8JovQT~Qsw9r!Mth;Cq14_q~L#mYKk*%R1`Cu(4 zXP^_q^j2w;oM(kVUOFewV^$O2o%yE|h$l3f(XHp|=E^`Jv886p)v`q)rJ6KT34=TQ7kSO^fX zoKG=YxF3d=z_`BdVLSRjqkhr46esF(=M5v*+F=AK1 z(8Za>xzd_l=tE1v@S*0n4D-CIcoV+GAiG`TG0cO{HKjMvRl=2+{7(Weu;06(~qR`&z^a*%z?*+ecVD^dQ zHzv1w%sFppdw#_|hsOpRxuFr-Uk6(V7|QE7Gluz1SSFLY zIpJI9;aufiq1&xh%*?`aS2AgC5KRVZ_Wg7|v~q=2Lt^vf>?XGhxM^1aX6%%4V}^j) zqdWCeb-_r*)(&;CbIU*+4il=9_(bLpCShFRtl30r8H^Pk4sDJT<^0?9G?`OmBbJzw zT1vcSmdpnvpc~!EL(6F>G}W!dyDRS35n|PXHgl-cZ~?@SA5qyteOq`@Y6b}CrXJi_ z(M1AzB4tnFrm*~zEdwm?%WRqr?v7GHqdIhbLgoK~VZ)NLvHeC3b)!hqhBbs2u7OiS z@GF$q0y|($bjY)PPkgcjGOUM52buH(RTl|aFgebKsXBp?OUH17y)$Tbf{jT$rJ$;q7UT|bz9!jFhy5Gq#CXD# zQP#N$3z`u5F-`H_I#HX$3`wlKy2p8RcoZCJoGKlq6#lo?mc_IMq{&D*ICjli+Kn>r z!uMevKr%JX$U!F=1QcYYL_|BS9bn!yKZfN66i@fbkXS9~6^vor;sV^V4kk5|8={O) zN|I^gi4kU$qXidaoKzPP+srL@=qJGTq`)D5We+3~CDxQMy9-+0k`gE3Bk-T_wj>>m z&r=y&@NQ!wr-hk-?n*_s#Dj%sawBu~IUJ=gF#v)Cvyem$zqUg*nXy+<1f?3+f{3=~ zYw)%>+0NoyC?Z&wu>(xT_1i1**l0Qhgbc8-O4qG0G=c$3C2vRb7xpt`6N5sJ!2+>x z2EUOp1ND?uuQ-hA{<4F$Ve@rpj(Wzg|=Erq2-$8Vf2vKI|Zpm_R;~_Q;%qM}yXuog)IIVA6qz$tGhM-&}X17j|$5@v1TJ&>D+@r*tb}gFHs| zNe(EuDt9Ywi{K0WLSkV|ZlBnf?ByEaxhB?lGh_6H>?E7x_$lu)rw=uo{0E54l0HHkzL#PP3)%>w zj*2FET}`; z?rmj^$pp7CxRu7sTJPdot<~JV!QDp{guq6pnPmHNUZ+d*oa*~FWFYLJ06GVw1aE2a zfr+5!q>dJ$&e9@z>xVSXm-r!!jwq z;TJa3^|j)pl6K4#4PbQfRtUnT2`P#wDVyAxH}oOKDn=wj^x~VPLzF!Nv^KFZVo}Y& zE^5)wg9n1TC$ZRbi^z6^P9f<=1BC(-O>}qAC24*T3Rr<~)zK(TCxYsi_d!w<4b-nR zqU?~k378Y5maMU03HQ4>Z`|b>-F3OCq4UC}Xfy60lubh#DATXueUhT1UBP+e?x|G=4YvQYAg=IpvWx5-SkKpc=0e?NXoXVXebZhW%uGROZl~_ zC@%tJ{K79k+Z4%;A{-Iv${0COfTqP!T*K-ru|9k&5Mj1Qx;sC!_uQ)Y5Hp{;~w z^K5P`L)?8}48lM~%1H!Aaj#VfCMjgsxve`OJyVKm?fTg&!r3r_S6kQ@x45KGYYG#A zz5P5ADYv%4T_~@Tu#Gy*Ne|qN(jTy)P&SA}0e7zwT7jadguciGDK=8{ zNmP7*tr_{zn$TfQ4vhKCW|Ebukuhp?kcZsZ4bS1u$%ddVRFcV*z-vgbi)n!j(UF|H zh=D(Ugc&f1AgMB#du3(r!s122P`QjD9V1WB0e~BSQn52lo=67M0T2|n$OiEpru5J> zql=#!5muBIfN{d&19QMEI8tEHiOZ%ap{I7Vjj0z!R=xR60cbYV5YOO>hQq2Bp;!)?jj|6YiErAO1xPl+V0^mxAuS{1)w;zv}KxRqZVC-QDRpFxGelK z;1DJciNq4mt~-&|!o&@QK|tiH=`4awkDja`=SmiZU0o-!tzY5}$D%hbP&JYLA^MY# zkmU+sAz=l~B(O?$M`?D9_sGIK$DS}N*d%$_r8?|XMj8h9INTMyG)PR0=$FW5F3#W% zO5I>&V3p&+#GH@Q+9c8s?<5b=zMPu)E7dTV%xSZ0F&+P=JQGP>n=@CWR+P&23^JhZ zrAwdSE|WaHei1Ak>oeTnvF-btXDx>N1C^&P#rxnu4xrW@9s7^#NLWK|I?iCSbNOsa zMVE<}G#hR_0}xHq#zIR-gaU-RY}-L|BV?s)8eNWuVEIc25A}M#r!8mNYX>~HR(sWD zy-H*{0QT`*G($aSPHGm}i(r53E=~{t@l^o$n&LRMeIU?ixYxv|0I^;l#-T5xuj}xS z+ULWFO6V+T8i7hb`it3P>7_hSj5vcNOtPo<6bB)+Rp-{TpDk5F!VV_5%eHSw-!In- z9v27`+Eh%X#kC~DQ0EhH=Uk09>jZSAMXz}wc24SdajhcEGa<|cH!qlzaxq3F8<5$s zlKnvhSaDarb~9bw@-aZVYnOQUzcS?H)fbYRW44C{xsBz<(6+Hk+b-i7_J64JMn)iD z;bjLz)0mviQ&c1+O2mEHcp|oi&4f0cX_Q83W-#dA^)-Q<8{jP^t;e4NNR;9?PnA=D zhSbGSZ8fyuNk)36FA09HyNAAWyFGv~<)- zH-nvjZu69ayaD*Nt#b%Q!4njmgqwlX!}ojaZ*-7!lj>WFDXwQ4R*w&=Q>C6@tol3i?(a_yM{fcP@ zSz|snoib3S>v%Z~RaaF}I<)vHGxdn)F~6#%b8IK}Jg(!Bsv#w@&CR;@NQyIIGY{?Y zc|ek_E4VVn3Cmw}J!!FW#7pu5EST`=cfWvHW1kkBnpzJ3H8leYu?2|)dPyn7W!k{E zqOrsRG|lD9Z=Se(`Cgt+>|DP56I0(kefjeBJb!TYCr+Qd=1tdJzWj$*U%vcDJRivC z#~!$R`Ox&I@%sZjf5+Q*@ccc`Kk&T&MN5CtzI^%Lcz$8u<;#zt)sJq`GS9my z^Upl*;`tYzf93h7tN-ol|HI$QJPl>{cOKdKZ=LU){(IX0j_L2>{~w*YeEEI&w10q4SMzjuZshr4-o5iGW<;sG`2I7k7j|CM`KZo|J0IPdn0)8d zKTiD*j{S76*pBk%=xP18@N`JtC4`ha)!q^w6|GAX>_|BhCFEjH+ zetiQYILY5-o)wO)=#x>V}7sV{ZBKh``T-*m$c5bUekU>>)YCop7^ZR zFSfs+^@XiBwyr+#m94LCeGTn?GG*V^xxaaP=Z{;jocPtwuXR3=7GKTVlkJah-QT|0 zetG*+`<3laZGU?Ei`y@2y|VSw?ayhwuJv9Ao_*kJsee!Fbn8O<744~s7d9KM&*jYp z>iilNsw>p2(`pedDcm8+hcRGL7`rXd&b$-9|x2-?u{9Wr0JAc&q zs_rs?|jC@yIX(J`Ipwew*Ip7 zZ>_)T{B`GVJMZZHednE>f9(8w>vHR#I{)1H%!zk(-rc##3jGbU{y&W2@A&)co!{uZ zdg8BJf75!P`Lu~opLi|f`Ul>xwl~^~t%I!7^PBd>O6w{!51V-G#Fz2wAx7}oozLm~58j@m z_RlkeU*K8gxt`zthP{3ZQ2e$0J++^+%ir&z>~!akr!G#vZ2B$JZ=HViHLsz^*H3-> z)Y<6|zUEPU{u)YrEzigD`DXh4dOm;9BROZCx3qq1>bIwE<1|mw(tY&u@14_>YIyT; zo&VGMxvBT3{J(bojT-OAr@!MAeGTKh%=bUz^KZ3Vyxl+bNNWE#p7-YYjQ0ET|NHSw zQ}V<3`^Q)P@zsx+ddF2)@B7hxzqIcSwEuy;ouG~P=kJG4&0q6*ygR_>58~WCzWv|7{|EN}(EcCW|Kt1Ly#FWm|K$Fk+W*u0e|G=R?f?1xzp(!o_g_2xR`%ov zd472R|J?s0`+t;AKg07q``^O<*U<9k)5GiAZ)iV~y?h<}`=aJ!n#Xls4!k{;UtYoA zw_se6jvmzmo~@%P_Up)OK(_cFMrs*%6{)*|Z zoc^lmub%#z>0g`vb^3iH&zJL@rk(qy)~C+!cZ28T)G7Yoe|CdiB&J zC;x~tubKK3{{NS&{*^Kx%ly8VS*`JW9G^dcwmzA^Z{_=Go>%kvRjp5LeHwpX!}EFV zuW$b?@4uPnTX?>%{pR-V?DY@vd7iqTM$4bn`V>lD$G1=C+xNA#CJ{{ntc4^6DAK&&P`r7 zdHv*($@$5nlgB1+n0(^olO~@$`IO1ylQ&M@GlieEQ@wCQnR0bMiB$pFR1R z)6bdw-07c~_=w4moP6%&?US#Ye*N_GY5O0qUhRC(^pjYnr}Es<`EJ(jt9ibR=Z9I{ z8+m&Z&lm9hi>BVl|Npc7Bkdn;uXnzW&%fEe+{7({s;NI zz~7JOd9SG-;@doX^aIT8UplLtg`<4?tM-utAH%+UD{r5`e7}IQC+P2cDE*83zIfto zYG?X~`1Qwl^W!`}$*6vczdzsp1^)jRzWq4$ev!}b;`4L)d&7Yz((*H>euvM$#PiEM zEA;nHPVQCv_U(JGeXV_y``&xs`|dkR&6iQ@GpAllnP2BQ%$p}Oo)=6#mOlTX{Z4*8 zu&=}0eYE+giKjE3&55)8e~9Pvcz%;w{{!gyCO$WmxrM(kpMD#)-%PDkW+kephllGssrw=^sz%%*v!+B2dd=EALjAxS1zs$_vk3RmH_P>;Id>PMG*X&~s z|H_+Bq}1QEKj^^C2j0P4euHn9d3zO~FYruWv!8OWpvFGFeITvO(b{|Q`TcqKt^2-h z-)3``wfoYEV{oW%<+r2!o#**&T3Tc5-_E-);q(9I+0W-U@$U7sxy<*M($4I}m-FWF zQ%{(>hHrnz+eh>FD|q*nJX@U)oBqw|-{Q^R0xuuV-}&h$@&6LPE%UrDpTCZg%<=aK z&(Y~){QshfkK(s)W*!TjpXC2f;@zux&Nb(ocTYWW`jZ&(alYTk^JJbcW<8(6->34t zfY#nXotyZ43(wQ0pFaJJ>1R!U%=A5cyN>6P*L*JHe}FQ-!Sj*4`If0$^*??7y?X1s zs`Hkqw@$rn>g~M!ciuc#wRm2@b34!TdA?)nJEwkk`uF(sO+2rm*Sn_g=F{Ds519Ty z{$AqEf9LrqKEIIXMLZwP^CsH+3Z9SU^XvHiUjClmzqbD+`|sPozW>brjs2VZ&+dQe z{;mDJ{pa?d-+%x93;Q3~e~}aSag_XL*7r-8?ZWiOPj@D#CZ{K_nf&0%M@@dn^+yCNu@l!$%uYOR;*G6;YQNWk$4?xdxPIc;#FHkDPrTm&Nn7m$2M&Dj zfkz$qs@Am!9(UmJ2d+EtqyxtfJY(XsCQiYFzKhYmx_K(+;uEG{$!C>*1gmhd^R3L` zRlIouYjKR{HGKXQo;iI6{yvq@f6DXeJgcnJYo|YpPoK?mns4pNALi3HcD||e&7E)Q zd~4^et#9i*F!Al3@92Cdd;Z;>Uuykw>w7!D(t3OA`#Qhc`u@(Zw|=1Wzgqo?-)Q}A z>-Sn8OW(iN`UKkf!Om~CeyH=qo&VYSkt{OuyY;i3 z-)UW%c*VrecYdMsi=DT0-rD(npyMYyKSwW*I`9XrU+Vl}>z6yf()pv-h31ncUN!N_ zwE45GpKJA+mriU=^!Ro)b8j)HuVuWy#P=_p_@aq7PP~oX`W2qH^ZY8$*YWGuPk+Po zw@iO8pPtJKeH(wjhhM(&eJ)@AIqEGi=6k0;j{onZ?~mpAzUl9$%&!4YU%|H@<@1m6 zyf=OQ1b@GUx<5blw|x3sO1zlAKg;v~adp?>QC#oe$KSgN?iPW#%f{W^2!R9-!GpUy z#i1>GqShXl%o8L z?eT=Ao>unhpl5=fwVYl9`V43Z92qz&@aw>D0>2G>&SEbGy=;&FSp83|*EAq#puL*j zwcWPFfj0GdHifEy{J=l0O~b6k-7WVWOK02bckTH-OHH=){y}l3$K6l3pR`&o%l*(^ z-)L>_YR_-jYqzCO%pY0$*wR-*9=5ssY5vR7KbHOt`Y#Au?oO6ETZ*+-b+_l>fWZMn z0zv{p1HuBr10n(<1EK<=17ZSVZ5k^B{tk?{x^v8(ZF+sJ`~XX52JE)TcZ1Hb$CH8Y z1`f2?aw|8;o`Wn!1!dc-K+`vaqU>>|rCFBREd^K#wv=b{|684(D`ovBmVdJ-|K&$ z|NZ_C_&?--%>QBk9O(t_ZH!ApXd1}_U<5xmV_Z?&}D(k@G^wZ#(kZwu{N%DA9P>qzTP~Q zX-u~=|I3@j|8izB!{VRwA@lLpn}@f~T#L`KG|$p}OA9SI(_M};#g#0x*UK%fu(Zn3 zH5R?r(g8~cE$z2-wWS^G`aiAO$~LzDpO&q(n$Grml_lrb!{zpTfP?l}OSQdj;syq~ z>PT_@>^|h$>DuOc!j<68cMo@;a);WQ$hR_wd4qvg!)@sS9^ye>vgngMZm*x`1zzN3 zPVy>m@)mFN4)5|FFWC}*#^-#=SA5MkR`NAVuUdND()*S^wKUPv&zAn+cYFNH(p#3C z-fWz`9|K%(+pBjh4RQsz`dj2(^Lv&~;^$uLI^g=qey9B6zR+)%U!l9m-OCl?S7)WR zGu6uUGNrlF?J>($>`HYPxQbjku55dq=W=ZO2TQ58jCSrx$J!q58sTbjjdYE2jdqQ3 zjdhK4jd!)WZt$DnYI99=O>#|kO?6FkO?S1sj=5*JX1Zp(=D6m%=DF_jo9|j+)0$;z zm20&9X-hQA(nfoAforclUgmN>bst(g5810*EUmP(!(KbvQHIMI zUw+5+p^Y}b@A}g9qw7uk+_fqj;LaD8lJ(qFm0aJ}t%&-#FG zx!!esHP{{MzTdtbPrBc8f9U?h z{ii$JFWs-sZ@S++za4%z`;BtfyN9|rSWoXfB3xJ5S~}bPi2G$L``>@x+LWD7gtOKD z;=Y9^ojF;2x;>ZL^E6Ado!6FTxo5ho-A*6T?YG3e)VGlpzw@;3p#$)fyN<6Om|L?tV-Wlh8`M>Xq^PbFgIq$`B*R!s7 zY+bx>YvU7Je_z>}TIasO?=HXHuI{ehu5x#xtA=rYFS_>=;r^RPT@SgQwt0WX^RCgn z;7)Ls(9$7m%LVQm?ePk0?>xUXu1npQoA$e%-}KHW(fL$5pV}KN-S5879zVC7C*4jz z;5ntq7IFGq?^^o6{k}b}u_=D#{@V1J`%CvX?k`MVS@N^^Y@6@J=8vq_KkiPhEcdIf zSc_-ael5o@%`e~2S)0x;yL!K3d#vy)_pA1+G3EIc`4#vTnl7>Wlk6G49)9ii?DS4n z`K|CvyT&Wqzmo?X&12OXpfzET8Fjv0s~cx21!Y zZm{I673ULu*q&W@HoL&yDL0-yj#BWG((%W$-HFB1N#zpgOiD+MV7e%Fn66S9-Be>F z-KAuDC^v?libc>%>OpU5Abs#`apqP-U&Z>-PYR^Ja!)cqEl9Jynz!|343hE*km3kb zxn_bC8%?mLQp#Y(!WkkRCq%i$glfLIgeg`;xXLvVK}yGZnM9-%PL$+)Q+d~rhZs#I zkyzypAx;`(Yt=iIToR-l5~VPbRJWXDJUHvc))}6Q9F^F+X5NvUM}%H>ENay8!&@)XM@UrM1silR_Tph%iZu@q=6^tLaMQfVM%QXb_}EEQ4| zl~O8IQUTRczHJlU>4i}%RZ=GvQ7>iCAVo4%DzLqiw{9WBe6c1ep5c18Ds5luEmul2 z?F_ILolcAO3@-7**1$;9g>LM99wkLE+H}I+j|j$CjZ>x!##+o}W4!4e z%l(j6+Wn825^S`;UFF);f>`UHGEC6vcYk-ql-fuua>mVB6n% zWtWdieQgiyjrFq6w^x0brRfc1wqha7k%rj*+FLG`dD1}Z`FLaXERb?pD0O0y6wYE= z?C2G?9WIe_SxQ^S5{+W^~Bmb-naYMVJ!%@w#;OwDb18){kFEA|Cy>d z-PEhI)6z4zj2@AG>Zx;mP%o1g1QX!+I48}-t zjFsXUCp9r%iltS`WP(&ooAe_Sr9>u4Lzpb3Fhz=GsuagG={%-O3u%`&GecU;OsR=k zQU$Z6am!d8M zm!i2rTErnKgByLhH%SHDESi;ILH4?NT0hNJ-o&6>ygn&fQW8M|`>W zNW-~T%HgO~!+p{^?w20q0V$XVrDz_K8aXBf@UWD~aVd^Rq)__};@#7v@|cvx37;O9 z5_m#tv)^6b8l^lXrSY^B$TLzZ&r0z;CnekO9B|6H>(+*<>Un-W!SEh}YTgTVZD87+q z@vXF!@1!|=Z*r!Wzz?QP#wvc)d{g+zVgtHhz1W|9`o*-|YLs!x)X8$^@T+oD`OWmD z^=aDqT}t2&S1adOAEbgmEp~~igukRV{x&)D$l)I;l7A&ThH7%AHwWBp1Y191tMzu; zm}dQ#WZY7)ji`9bCE_oo(Mh@Wbe0~Vi_}C{Q-Mu+GTlthJW}aydco$=LJxOaxigO@ zdRpvr%bh?klM^eYx5;JiQ3QQdBbmNV%+^ai{Y=gh)zjbPOu3K&rlnSQ3j%TPFP4Rh4P%isn8nH+!V<-b zSgKeY%cNYEyFKSS%i(5)%7wC0if5IS$7(5%HBtm?rC8QUyI3!cV1tyxMrkgmX}%sd zNjYqm^4TJ#vQ;(W+2)(dcI8I0L$PXhDi+2r-&9UlZVYEg`J5>YW4APlJyI-Z`Et*e z0&UOZ-8S~HS4!esDVFo36wa55xIl`xU%cMB@mwU;u}=!&VyVpbX-ys5kef@T5-#(_ zE_XNe^u9A!xV_&R;an*Na+SNu`LS*9+kVAjZ3Ns~E`= zb<#i^fA_`)aDx=fA$QApz3lVm=0;PW4&5Y8cHJ!P;TF??j@(;KXLaZ{cMHz%+7J#) zq15+<+YbC}HCA%c-5NE; za!2usyDe&(X(_Lo+D-AiW_91R`8M*p6vg-Ml(U?5P-)}OUKR19G@hTN8~EAn=_nV< zFH!-glpD*hQUbpzH-g_4+ruBy+59Or@R!Oh;cvw}{3B)YuN2RJZqJ2I`(}clw*^~W zE@=U7DHlKKxXWJ}?&>7Xbaj@>U0tOA=<0WK#}c*EO|c)kc9()(J)}?RDb>4r`FY#Y zKySse=_6IrS8Ao76i35_`aimFuNS89m@N4N6Z(DmdnSO1(VohmenIcUWlkL|wXpn8sCCv6~ zb8WFDPhgIpcih`!y#%k)nCFYlmxi!Fs$-#)&mt+F#Zm%Gq(tjcd8Zf4GGFd;KhL?& z8joOwl*3Bp*0M^fV6~La8Y!Q(QUL3uGS*Au*&sEsQJTnUQY@RKX>69-*&-#dRVrbd zRLgcLi5*fHJEeGbNwJ(R*|(%r$eB_$yQNh2NP}%J=v^<#oGm4Bj+D(_DT{NZK+cmQ zIp0_A0;z-xeYqD&3G9<{xLC@yk!0_D!?{!%!e!DxE|+?8g|FO|QW{rD>Fk#>xmwEM zfE2|+sfcT&P_C6ixK4`WdTA;*_{trUhH#_Qz)eyXH%mjgMatz?X&|@xVuz(BZkK|& zL-KH^l*(OFBzODD9r5!X;Rxm)#j?29mwQx7=RPT!`=v}CP~8+BR4kH*q~08p`tz_< z$#K;v;Ss--j$=|$9p%leOQZXM(g?u7K@Tp|K<9)HurT%;&Mf0T;%U4noUrWRI zMq0$TQVZYta=(}2_(4kHN3Go|ep1ZC&r%M*C^wZ;(inbKZa%+hO=j`CVp04d#qg(O zzvHET{4KfpN2=yuDUtuAR`B=k1xDbKig8ON_({3=OXYNuqUkIRp^KDFSE+z*QZn78 zNP0+t^psNQC8g6_%A}8fo1ItT^v(MEPjg*seaB?_nVzxjFU78jYIixE^cDt~f=%fR zwA`cCb4z8A>3&lc0j3kCJOZtpb3|+uK^AkmLCyBtcbe-DQ$Gfq2H8BS7-HpCTa6$> zEapr(o=_>BFw@&Ml?>~FPwQx(hX~V~R=1u=t7{ePSX-3o3j1Ddww|W9+#q61RrZV7 zj=q}e?E5#EI8&gNOCsKCylVZ)6cQ}vOfQc_{~50J)|M=iRIZw2E9Xqbj$SGjVLktL zSCiF#s;STe1~+ZL-NOXbSRHob1SIpkRG6_(qdT;-;b zXF6uF6!Io zvPeo`vAvgTtyOmPRr1*R#H}vpNNWkpq*pi)?xu zEf&N+i#cmDo{LRs7Hj4blk@vGjZ39EE|cQ9+~oXrj^qlHGo}#0m8MABB8s@mRAh=` zzscEC2XnQx%DZ1XU>aq)A5%+-r?V7C7b%IZQV!iZdCqp`kwJGUi5|+Wrl*uj zFDZuJQYd|V>MLc@PqNVpDT)D73NFxZ85((<$Jrm1g=XiLRd>(_PY=%go zgh-PK?c~|(EJr?JiVY=PN+Uu_BvMKzO0pvjQYkS~3bE2u;-t~UOKBuX*(6HEBuTL( zOLlIXFP0)jkSbN#Su5UU>qoj&M1~Yjrj$UIlt{KzOO7;_Tq%z{U%7m#HwDrF3Z+7d zq)>{bR7#{IN~I#oq;kroC@Q1@R7#;#Nf}g2>C{NM)JlcaNe$FXQ8f58REnTc3S^j1 zO;Q2FrF=$66KIwa>^vs#IvvPJDVSH6_-u9)lNb;~)Dqx8e$xR<49+-Sqfu|l*U#mj%`vr+ofD~Xb+OiPTziVms;P> z>59d2hHBJvrj*ET$-^Gin8aC9E@vyZjB}*1?A7$#oU2#_=SiuYFJ*9nl+J}x78gnJ z?2{6?Sh609l*^^kG%l0cxm>ayiZq2QrBtqxJnWZBxmsGv0WDDo2NlcW8mXFVr3$W- z;<;W5V;Z`Y=+oT8%OWE8m1#*W}V8^EaSNAR{ zh`Xg!j!0?T1sfhcfP#*Bb9+VP!NXp=t6wSjv9hX9RL@MCXPM&?v z_8h`viVfj}6v5+CZ=O)OWS&&4n5U$bJS|1rQ6KN}#_+6ar16|$nLIBI=LIR57o`GT zlA3v0n!`z{g;%5uUX|=j6RDZkr3T)Rj_{_G##_=7-j-^3M_S0cQX%h2{dixpGfku@ zK9uVCNXp`4DVI;AXg-xZd?rojbE%mxq~Uxi_2nxmn6ITAzL8S+RvN~4(onvaiuggw z<3}lkpCmiKL0Zi(QU#}^XnvIv`Ay2styl>$QaQ0w6>(Au@zQP*q;wLcB$A{FBumYBB)b+t zN+DIMCQV8uU9zK9QaqW`BVvg-q+=@d&vlt{Uh zO6inI$&^dMR7gcsYW-zVrI_{qq%vxxd}<{Rby5)ZQZ5Zr7DJ_28l?(`NgkS{!3^*0 z?R^C?La`v4rD|HFR7Og5jPkwHqkZMZNXd+q@);*ZFy2?LwR5|Ff$iHXnPBo>DbQwe zMpAQ`Xu86F*QPSbwAx~`nA~}q|3QmIFvYanVr5LV+;2@KOtV-&`?3gRy2%+`_0TQ_ zGD9k4rqyu9ce9vfu~9a?YG!wC@7rLpV&+(`(*iq>$K;I1Mlw&zWxi>-mCLic1x)K` zb2STaxz{tb=IQD>se}rUt9Kf{mu*9c?+y z$~jAv$R@QVk zPK{K~knBuj%YDXT*8ej-XDVe+=XO3e*$9=zoN?+5&bAs(`)WAH^r$J8y(VYb@;KMD z#n#Y5&NF$plJk{Y!3Cx}EjO79ZF(n7^;~4Jk4=f}Gkt0r!Nn$L9>eX-4zEUXsmXa~ zhH{zJb#g~>xoL}y_GEH}<-TvK;7XN?J|2lKOJ9l+Gw~0HFHd| z5i02*$0ZMsNP~G)s^+oIZCy)ji9DR>+~z;UG>OMe&e&lEPnevwX6Nyk)?03!omtuH z?~DlN@U%3HXDqkIR?Pc6Yq2`h^LD*TTiBhZp*(N7i#zm!#iFSsMcY0%a%jAq9kLPWvly{^lyep05 zJ!vBEo1C`H4xrL9VVm^_k@~O!gi7wzXlQY7c%I8uHUr1?uDP{1L zl+D+Y9ifzl@~z1kv991d$;0=`P2&ftfFC70(^yKi^IN?wNZ}VLi&IiMze+9qCdKf( z6mMsZdh3?(r`+jt}M$saP6OQZ~_2H8D~xu~H3jCTGmNo_MK=1Sy0>DV-!KoMb5ukFQ*c zWc@#B7HLu~>5^R^Dy5StHIXGnlkF>)Bejw%)srWUAz#Y0yY{sDJJ-pzP^ef6MN%2X zQZ6M@Bc-NqZF%QXrdT57QaKe;DwR?`RgzsFD&a@h5u78_twZeoPV`38%pS>+mOF*)yFA=MiZ- zk4k%aOd7`tDUQdb@jM~b@}yM7Q&J&MOH+A9isjiZo=ct2+d`h};{Dbg#q-jGydZUV zy(m4yOVUYRmfq*2^c$~8F4wEl5nhwt<8|pi-jJ^0O{tu>q)OhFDtJd)!MoBi-jgQt zzO;k&mS!K9LIeRO-WLQZk=QHhv&A@TJtmS5iA)OJ#f`&E{LFhVP`Q zd@tqkgEXBVeX*aUG=7%s3>RrGr=&1`l@eXQNk#lFmGXx)j6bDj{*rR}TPoxqDTjZh zhxt!Rhpygte?Bg0Gj1srKj~BarEog=)LE*ei&R5bsex|N47y7z=^<^Sr({zS0!>Nr|rh(o_aWtqhdLFh~j{KpIY<6zvLfwnWcL@4W)mi@ zC0t4*LYhaUR78|?4$;yR#7GsyO69~!ONf__lOW}jD2*XWnoP2^6pv(QxJbFAN_K{e zbT#QxBNF16iN#zlA0)%hEd`xS1N^4CT*u& zYNSGHqEfQ^3rHbUOVg;4Mo}wGp-!r#UTUDBYtvw74;RKz#ro1H6);Q+ph=2gxD?C? zUv9HBkQQILkv@%*hA>(RVT|Nqtdzqz$;J<)Dq4N92~s?5QaTf*d?xwIO_q|FA`ND$ zl)^NrfazUZHs5aFh^e$o&oiTI8)NJ{FN2w;N!CltV3ui=t)doYcb#ZwVPjYC&#~C6 z*5_(qu2jQ3X*BbtsVwkmp){LC(lQoHD_A1UXQ?!uWzt@jOXFA}#j#Qv&nl^w)skJQ zEETd=n##Jat#(w~8DUv3jbVdS%SO{>_PxJ~(@cI=E{H>XceNZCATI%QMI=PAYZ^0YLA zXQULKl_v9?RKoKnXL*BoL9t5ub@TR-3g|D5XMis@u$x_7V>RrW^lqN520K4-l~1ds z4Aw}iSljIzm0KrW!g{HM4JtQ`jfxd>nv~Bbsg})B5nH4>wn}AelS$~jZ2V7D)}r$gA?M$VENI9swKg;FVdJLIx+)y|dLIZwGs zoG;mtLMev}rD!gaLfI#kbFq}cCDJ}Fm5RAcs^@a4nk%FYTq&h+l~log$wrDK8!7VX zpfr?gq*Shz26CO`;d-fp8>CVWb;xDoe>W;t#!ZqPDU|G7cIgaml|r~pis7(S#qClu zcSv?kx@03oQUiBO`5cj2xkswvUa6j=QW^J2Hc}+j@qkp$gVHG;lCn6~&9lSVUg~+c zn`ft^^&D4h9FItiJSr{Vv2LDSPPti}kZ#~{>2jV>xm=#?<~hS#?kUBxdAghDOeYr4 zGg3CsN-;dA7VP19-+W(?ig{7`fS05kUY7DXsi{29D~gTgRjG{Elv}{-ip}5+#TM|U z)0CoxulMUJoB?%47IQv06S>xgC6>*i1f^3i(VL z&*v(a%@Z{J`A(X}_fjrDNHP2b8%2GZ4 zQYM|GSUOAN=^_=%)A)zW#? zNJ-R6mr*BKPhT2KgVe}SX%UUmWQIxcG)c`2_i2PwL9^6Ai!_IkQZl2Y(TtXI7$b!+ zR?20Zl+Jj`uK1TcOproolS-H0v`N?}Zs(wOecZI|Mi;VU;&%3+pN zz--Bm6iO+~m6Dk!l`vn*W`UH&LaCTVQY?#oxl5#EmP!pQlNwnr*G>PrfQg%pl*eRv3 zOUmJN$@+g%5obz6UAv`l_DEAWOIpX-Qcup2;@KQg7 z?2{IAu~fw+lAX)$D|eY>{XZ#-E2I%zDP?e#G?x8RC09$09FPh*D6QujDb(*;DTnK% za;}$#af4LLA<6DZB+cL^sg9ea;oQ>Qdv;(Ww<;FIZBjLdr8Br)D&h|5M(&i(;x3== zmezAb+Q~iAx!fyF;HY#R_xWP?`}BZhM+&7uJS3%aOq$QbQWnRh@jN0e=25AT$GUre zJq_c8VkJB-#qflb!jn=BPe~JaS{leRQU%XSwLB-~^1L*j7bLqUkyOb`QZg_5a!*Q$ zyds73sx*<;q;y`FB6vf}cYnVgae_*J&z$jV@9pU8P*QNeOh9GUy?t(NoH!mt^-fkjm*J z71LL;BZble`b&!$Af3-ZX$6C%D+rKmq*t0ikW@f$4^PK$_GAW2i42h%2vIo?p;7~3 zzNv&uQ;Cogh?GKz>f!yY<`Jz}7BNyJu~IB?(qQ7Ha1x{>5+x5wQZC7o-BDJ`Aw`NK zRjMORvU?IqlgN<9lPQ&vCD}bGr9^V1IC7<42@DD!=zC(Nv#Z*@)#k7 z(JV#NB4smDN@0{#%xLLm#z^6em4X;2B{E(rrBxck1gV5Jsfmfw)l8DIm@F+|id4x| zX$jM$CZW8p-ZuAtkU*vMU9oEH+3vY?R_SP5PQmQYxFJ z(QJ_l*(#N@P5XmFw)gP%GP2kq-N8;Nk6lt6r%NL_Lu%$s$?o_h?P8Bq!dX%=XZy;X zBc-rcYT{fegY(qB>o~uMcMPbO-J#%)qIhowkvm(sXHD&|gUJa*S6>v&d5;yLLKo|mTZf|S9FlHJQfx{jA68~>9cctsk?tI~X4lk7?XsgXCNO5XJ8 zEvcHfrA53W&F5Vyh4-Wdye}p5fpjAuN;~*STFJ-KX?!AG#i!CGd?xMVbLk$wkT&wA zl+IU@9qE@Yjjm`}j$+`&voY^NVC>ib*B>Dox}!se<37 zX#S8q{3%89msHK)zHmH6g9w%~87$>6M7od=$;SVrEW#waQb1}VLMkFsT1%AVAzBI{MzZlg$?jz#+4!H7 zOo9|kqEt?jG=O9&3Xf#}RUj3UDlH&QiXdG|A;VWLQ;H%>N+(;&Bu7dmSF$t3B>N`; zse%G2i$bZ4BB_#Msh$!kky5FVGRdwKm1?Pw5~!3KsFKR4mJ+Gy=^e4Ylv?RZ>Xche zy-y92{gZ%HPNP)HFv+eIkVZ3HTFeM3k7g;37AcvLQX-?IQbtR5FAFJ;u~Hi2q}_~{ zVrZ3$nIL7_UFf}^#!MzkcBEfg$z-X3DZbpPQWevrTBiHTwM)g!khU_jr}vw9IGU| zQdG)hjg-z>DV%lESk_CWY>*1sC=KH@DUD5%_5Y-Fwn#SqC)x2|sgdnc6Fa1F?DT1u zRLJR4DQ8ISoGF#FTgqjRl*w7rEY9}D&XJa}SIXsF$?j#*^OlYy-{Ux6v05&WQn*kG z!oyVkn%a?i`^*YaZ}HB_hLI&BZZq48_g|JCAUgt+$N3Wur!3*CA%lNbeub- za_*8!xm#-Fh%|zGqoe)f`_CMj!9KKEG2MU%HwQW^h96Zlsu z<-eZZv9D_TQzEZQaY<#kr9}LsN%;5jelvCP>m+5;SxTUb6hl{O1l^=Ox=ZEskdo;s z*_obFBE6;g^pR5NE9KHp%A&uN%m68afl@t#q@nHrUv8k3Pmpvu!M(i4o!2s0N@j?1 z^9Yez2$c#5ldS(I1rZ@x|4*`il9EOcElnmynnSFVMVypPywpH~G>SxN1W8gc$&&pe zl4So$BNdS<*?mo=Jkq6NGNe*6eX%U5jBIHPIg(u|Dn*hfWson~l>$;Og_2z&ndG5dN~J=wdzncqsq&Spmg1<9%BhtisgovAFO|?Br7*OY=K|*(hf*4S zHHJyiG)ZSOT#8|Y)Jn6|LW?wwk&^wBlr){uQY2%f1jb4;7$>DMUK&rUWOsa*QfZS? zm?*_ENvdYDl*trn4pa3Btznw)^E+L#f5GzAouSw`W=fUJk}{Yrr7}m#Wv*n$e>Igx z=1V~=kQ!JhwXjH9&SGg2OCrQdUT3vr;N#l~l}Xsf{&~-6=$}|D=&7 zuwE))gEW?nQVOR@Gub5VVY750Tcp`+l~ULy?PI%S_p+6O*eTigpA^XHy}aLG9?tOT zOyxGRTd@rG^z!x!N;yl);%upwb9~w>+4+B(N)hKtLpfhs$psy9*%iYVDmIRbq`B;q zTDVv$;Swo=OQjQBChg#IsgWzBBCeDYxJt@qzm&(-QX~hY5)Mk!xkgIhTFJw8(o(LM zlDI+3+sq-Uo*Sh~Zt|^-n^hx^TO|8W8mW}qqy!F2W!x@haEFx3oxa#zQZ;u=tsIet za*x!)y}sB{X*Ksr3EVHG@_;mq2c;liC2`zW?hz@CN2NR-lPWnOZRc?* zgD13Ijpa$j#_*J{#?w*`&-C*C_RHZ}shsDeN}iXBc|mI8MX7|BdZl#S`75566>H|C zw1`)vQM@XR<27k4uS>bSAr0Y8$?jz<+3{cLO5Ty|-)yBjcu&g_$NRn|`asI#Ln(=m zq*y+d>`GBzxlg4yKGW7ToX@2Ld?BslODWp*l{A#Er4+uAlK56C<2z{`-}}n_pt;TF zM`;{CNk{ovTEQ>UFi!dOtF)Ehq-=hda`;0U!Jkqre@O}aEoJbJl*qr*NdA*5?8c(r zex2PZMB0E`D#cHljlVR;-AM|jv*e+RlulQvnr>1!-KAW5NLlohGU+ARy=r;W0M4?nmk+gwgDVdVq-d`9VN)=0> zOd3zQbQ%@XFe;@Ks-%TfOCD;ZSZbvb>ZIw^OLa6zWek;SXp~YJCXJ=3xA%_abqtp( z7@^#uG)og`kw!66Dq)nb+-NC}F;XpKrFo2#>KQNP(<+_C1SyI(=_V#hWlWOVnJg`2 zinNNUQa011(M*?;XqQr$Ar&xFTEr}=nAuVebA09IN{g8%O<}$?ngvo73#CRDNl7f0 zl35~^v$VJOu2*AOCN;5K%3y`c)v;2_XO)!2YN?SmQY&jEyV6G*&Uz`64N?{xr8G{H z=CVoJ#b#+NTO{lMNu_L)^4P9VXbn5GmE^Khtq)PSIHTQrE;b;iQQ5TdnD`s zNj01;HE@n}279H|oU8ekbDm<>|C2JgK&s_Jse+57DE3LKxmZhN$A6_VE|tb{nUuig zQY=?UrCce6ag|ia{@(vPfBb4`BL}1+4oXY8Mrz_(DU0i*$y_hl@n0#ELsA%mY#^4@!+ZY*Qo{;QLAyNxZN$Yr8 z%HSC(lV_z=o|8g(UfRkFQY9}+6L?7~;$>+tC#5Q0k?c+((o|lPDtTR+%NtTYZ%Vnm zC57^~bbxoHYTlLXKMAEw-j}BHfv?<$Qn2eIX&4_%ReU1V^QqLzXHo^9OH=qlvg<#j zG`^Cm`C4k?8_DhzC)xEMQa#^GG5jFemEKZ3KS{a#?2G;4i=C1h_*ELoZ<3AwNp_{T zl+K@0B7aF^`CGCpy`^&gl?wPzvO9(J@qT`za7k^rCF}o5N%%{Jbdv1945S*mNMqiVyRt8Az43wrbNSaN6G>kwgmmn#V zU}+J9rSS}rvI&tY36;VKlkEBrX%Z1q7Ln5FL`juIO9jOE%Ed|*#7TCiVCftZq~#>~ za+9RZBun?Cy-?q_Jd5F7&MdVBNpVZO@ z3Z>-~NfRiR>`rk~3Z+s7Wzts4rD0S^?Nmy$sFL!imex=s6;mtOo#Lbb>ZOr1NW&Q_ zmCz`SWtbF4la#=4DU%V>R+^=3TBKY?N;QmQ2ujiptxJH<&O zXp^QgQJTmkU+!cnmnl*aQ>9d`rk~7Bi$sW=eMbhZN6j=?vyb70i_im?t$e zU#en(WXFG{1{O(kSu7Q^M5<(|G=pVQ8Ox=~tdJU5DFv}gDrdE{gEdk;Yo!USlNPdG zYG;GAnT>tCzi_iTO-f*s**`u?B^;0jb5P3Q8mWqFrFgEBO1NIi=LRW@L(&*- zl#;l~r<Z=O@ro4BtG?K4lKtbORLUEY zUH>5s;Vr3wx1|ldBSrGA6vcZ|2JcImd>~!NhteN>)FI~%m>(;a!zWTPpGsT!Op4}n zDV;Am1Sj}Xu_V5dGWl8x<{K%QZ>5QR*CFSwdfzLylOGhT;77@>|L~RjSz5p^(n?NA zsr)LH@|$FLijxlUhZM`7l3o8HHS@QW!aq_3|4LQ-CnZ2%@7~e+f6_SIQY3zT|95oQ zU$L=tl4|KJ-9s0to~}|n-J}w_OF8tAqUkA(qL)-dZ(ppBWY>R4_D>N~6#b=m2KY2k z%3+W+gaE0EKq;FbX%xXyG=rrEhDZ|#kqQZw<`O1Z|4*tT!ly`S7*WzHqNNIAqyl24 z9O9%};-y>?Bpd&ehLa>!kSy7MS^1PA+4+Bcy`zX3q)GXt`;;LSlPRT>CFPOb_nVG$ zio(h1`@b`_bEO>eq)75rV+I9MD}`!7IYrWViludwNKuqZHvT6yP%hc^ztT7=CA5P`b86!0?R@OMCWSFks$i0oz+@?%DN-?0rR_|U`ZHZ>qFqX5 zhLq1tsfbz9EM`mNnInZVR|;aDG?e*L5euZ-St!kCk<`lK4mo!%S)y17OQlklNhK_o z-eQGR%S!18tE5|4E#wVgwJy$Us6|?@Il*%Tlh|SV^wn&j|m5SLW zdDt#(XNMHdPRXwSm1;O$D&Y)i0%!Wl?UvHmBgJr*G>EgMI?n0qJ&Ih-Ud6_6u0C&e z{f9J~^Oc*<1yURrN~v5VRkBZt;bJMDOQc3Fl`^?Z%I0z@kt@`e8m?5Vfvcos_DeZj zEmd+risPV^$~Dpuu9e>4I;nu`r50|G#&Jk0=0<5GH%STHETwXbWY>R4HQXjG;INdz z?NSwYX#JhWor=|Qm#^I2(teIeW4T9~#l2D`N2NIKlS;W?s^S4@0uM^9JS5rmzfuAZ zOF0~u>Ul&;;!!D?$D}+?NOq@3sgNh6dY+V0cuGt8EKf`K@Qk#OXH{-E&q)(`Ub5rA zQWh^t>Acj}d%jc|FDq8dNhyI>q!M10rtz8-!|PHSZ%E^L(^u{->0JIFbMFBkMY;ZO ze|JN#fsjT`XelHlAqga;Lr8!S2%QjmmEKiEB!CDA2#V#PC~y!FupkyhELgD|6?<0@ z#V%mS#(O=xcS1ROe&_%G-p}X#Pjc;j&HX&j%wui$MRCbPDyQ*#v!MrJVm~ z)=%G5w)ocUQys6g6J|*|Y2(e(cV_AOKG47SZIXVlT!wzM7Q;M0nGNy$Y?i2B%=&pw znGM#jW-0p3EL*>ut=AuBcj`~GB|2@^g&Q9F*Z&ITG2{0iX8iuwY>29u-KeT&C8}n| zl^)GTsfHQn|Cwc~mRT>=t}Lx%)#vq(MQUZ%Laogr z)W(eS|IBVtlv%c-&5{&jHbt>!9n{V&S#f4LiZ@GBd$Z0;Fzcm6Gk*V3X&uZ)D%os; zQp~vii&;l?G|NyYvpA)has3ywA?j@Qq%zDFDYJq99Q7<^SuR!CW(mqM`DTaJ&8(rP!0f0B%@WkzY@m9W<*BDxa}}Azsh3%*ip|p0yMcdeCr5oO z*Iy-OUDektO8qKx{ms%fz-+Y!nq_KG1ONIsS9-KutcIAysMIV)L(RC-quG@jR+$@a z7OxRz{QkqNr$(8L(`d7f8e!h`2=~`#jLsy%1&^2aRT5p!BYt7Piomqme zH_O)zW?9~Un8jDwVAj)nquEC9O=j!88_n+X-fXtjdrPHlG8^XIY?h)eX8iucY`C_X zwa~3*{j|+2O}Cj1(RQ=8y4@^CJItDDr`ZMCWtOTt%({8+H0$NP%dD^WZnHkxZ5E+> z%vx)YS(5HGOVE9lcE8U&ToLU7%VlVUFcRdc&-*-ZYEXTV`>3+pMeJG3%mt%`Vb= zW;uG_tUw=_aiwpwD1BtsQisj>{fAkOJ~j){QL}#f#H^1#HA~eovuJ%*S^BwI1AWoJ zf7Qe=eQCL1ePz~7Uz>5IZ?g=2YZk5JX6L;@n z`q_-@znDeqlo{85skGnBdg*twmioi&4*hAy_5aM?_o$(N{V!S`vm|*dEx;^JRm`qe zRkJ%)&8(TKo3&95v%acnHb}M1GF00vOLfe+Q;eUu zaW8u_e*a+>tq`*ug_ zo!XeisI6I7MVWE^KeLvKG3%~avkbK}WB*^J#hXQ`eWfK-TB2D`C7I=` zgIPNzo3&AjS%y;0Qq<8bTAj@J{fAjQrJKd6vss!l%%YTOmZL1QY-O8`R*qSB<(kDQ z&n#VC%(zp88NdHEYol&vDJn2aQlVLlx|?OIhuJXoG;6IQGp_$)#+@R}GSu5FTYbz5 zRZ>~n*Q|&7ndPW|r42C4)Ic+S|7*sb!pxF1#H>K2W?ePZte?uv!ZpmSwT7F;X@uEQ zjcn-O7H_Lj4gKqwJvG`&2Ww2jv0iuIrOPz7;b^bB!&aKcVR>A^yF}x$`?;!jA5FmA zJ*pBk5p#DLYpn~gU0e}-fhIK^=iP$kX)-x?C#j1y1#@?P8m*~}!{uT%tzj!~3~fKr zbQ=oS|HIsPi#5|MTC-^3?yu2Cv&p&f7HCex*4~2*<#Wv?mq&{}nnyi%mocvNZO>zf zF2dZsv4-ekEA6KFR@z6GFr{O8MqK~Ta{YCw)f=pZmMhg|m>bG`Ewa*nDz{Ru|AKj# zLpfT)v^+<>I4#9q^V#Ltr#@STeTi}WmlkerBx*Uf5-ZdS8{P=5#2#U)F4vWoOVKK9 zEzh!(Rx{T|S9f=KTf^|&ZLMN-6{XJFXf37AGPKTe<8`%JTU|rxdgk9;t>>Aarruy( z%TWHn`2C03FkO$`%N$S84YYNyR*L?CC6Mc&4Q{*)WuR`fT#0TX_Z-ulqm2#62DtZ7 zv2JcSwq64BrI&7@UNUXj|7Vt?&1Ny$g1K=d>z~+X45d(8Z5$(XD>){Q(rpYci=m9y zZDy?hVL9YD{!5E}*i7xf+|*CjPV5baH&eT?QgU5&hgp{H#O^1TsJpPeSV!HBxfYq) z&3N5w)Jyk}bI&(Xd$8xIm!W%YN~h~Svs~S8^}6W+a^EnNtF@QhCR()7gV;fAiT0WC z`(Lvj+E0tOXuF9Wc94k7F-WuZN!CRjtpp=?Cgb zhT`VNJUvCJyOZ81Jxys2#`XVrjoc@N-+$Pv#r2=<6UCjv%((uuSzo=tJEt3M`|4i| z&%KuMdJ%JTsI6XNY;F#v>1AvOr7=2WIe!03y#`EGsa_@Lx7WyRVR$X|I=M|)s@`B6 z+prY9N$Dbn7pb=}H}yUBwpm}jgH`2y6{~l#+LUsqFoxnTbDf~~8;+|NLuo&KfF)tc z`jC3>P>=n8m|KQr>M*t!8>Azcd)E!n$7X|c6nmK*`~R?mz80TiuEhu)vwH13pP5b4 z=M9f~k5jr_Uy%C%>!>fWB<4^LeTBLAco%(*#ZWrX`wiya*{uIz0Zb{s|K)kO_im0( zkSiv)KqoQRmOF)EZeDSxFl+|aR6m$S>&J#;YagY>82!Xh-0Q*lf0VvQu28>VeQ1%a zQ@jtpA(y6Knd61z9@cM^y7|odAGu_Cv|oQ>+s!-~lWr3;&Mf$#7`m0({nRhVLm#pfPo}@*(YG5B> zxvGh|CDMhegXO@z4OTsJ?)mmmeamtE=b$6r<&1Z! z8c^!&W=}(`1@+o1$ShWk7>Aqpi3$!H8{pp02?`#kNj zk0!L>`hPZ#T(t=*_4c6cGPSi_M@3<;F)eKrjk#~)^@?FASy;MatzHkcBiDxM>!3Ky zO0))U(o578`-;+T%C~ym)eSq&@Mfz3bKg;UD#YA)Gu0h? zgE?8I9++D?Myn_0K5y|VVjO*$s%Z5h=a#J5D&{ph$vlWu?;!t?Z?69rRO}mGUzM2U zsIS>9^)pLXf3su_Fl(lPW(gYPGY{{?!Io>GA!Z!^HH+3zvlx|`ZPYNcmKtu>MkCB3 zHL@}{%8Wa`_{_u7aZFHgJvTR6Xlzh%b!X`sXEsXXD{X>VktUk;(*)JNt*D1I)p8k{X1Op;4=QfvN+)ZEO;tn9w0d!xWmc@&RvMx?W*s!wtc&Ia z71wb?iPD8;O?8p4)T3ZsZ1v(b-z-g+n2py0vj)1b+p24uvVG{>PoZrT4m<-tTyBKA7*K~$}CoE z&H8DbS%$7QOVu@IW3}Fl>%Uamb!HQEy;+EEFe}kNf{JUqXW3F4EH_s-nnmg+vutfN zi`UJSxm(O~w8<<}o6QQf#jKhBS!r9%f^@4{q_&y0(QRfa+HMx1+s!W4j>_Clvn=g0 z>!v%*xNDDDn(ne!FhX~mrE9lYuI@1_*B)yTuY1jkb)VS(5$QXBeVB)*z8&zsjT<0 zS*DH#6$iL?d!9bAT(mwlOVKg2D1BzuM4y{A*B548^`$L`+UYCHHPzQY0VBzFA`hnuVx=S&*_%`8LdX8G!D^R$;TEZ0_?yEZ6BU}JtGii>dYIK$PczQ{Guy0QX1!HxcAI*e^;aLWe3h7S{MRf^ z{ml6NhgqHmn8jqoBo3+#kv&%KIk$<_? zOry-=G}W({<& zSufpZ^#XOjS-KuD3(;P)UV6|hQ~S)?=%Gf%Zf7mqTl*~+rH32&*R!|k5wlI+N6oI% zV`dXP2h7U!xLFrHVV0yP&9d~AS$jQgHd4=+h3lYMJ3VVwSI?Qn>Upy!dcmwv|1uk{ z7tOf-i&?Q=Hskt#mG+8RXT56HM6a2J>UFaeywqzGcSxpII}#W7byhnnmh8 zGw%Op7OW4n~XROGxT%{qC%GfVJ(Z+6o2gIS^XN3+%5 zpUk#-e>S_-`-|Cy-cx21y}z1e>o>DP@9$>u-apK~_55kJ)_dA)j90;K{m-NCJRY;U zUa#3}o&d8zPZcw-x2jp3s+onTx>>Gjm@QXLvoh5(8>8B0169ZDLe(`JqIzaIs&96! z0?l@)f!R(qG>cb|*<3X;8>C>fLWP)RDAbJKf0*(653^2cY}P_e%((xD&!i@5X4X>8 z%_gXYSsO)|wNPYbuBF)!wK9uTYcsFf1pD`^mOzdwZSY!H=6PL53@XNG;5)o z&60JCSx0R$>#ogatpAzi>7Qnq+G^HHx0?0RHnRlXX4XmD&8F&hvzN8Qtc!M<_0TS} z*1E$iU3XU6U1n*z+pJK#&6?>RvnkqR)=Kx9Ezx~u9dy50pdK(w&|b54deF@4*=N>8 z51B=1zu6!?Y{vCp%(!c}S*aeYv;$_n^|)DEJz>^OPnrelDf_&=pr$dd)0Nubc7vU$drq zGuXeC+Cgtwu7ln->#ldqChA?Y9KC1OQ16>@{-0T@J~V5ikIc$+IM{#wTB(khCF2i)eCF^r+G1gjNnC0k8 z>K)+-5!e3;;exY7hELW!(at?M} zzhd36uk>4RS+hc{P`_K-rTW8ckp2uVt(C!$JL+`sFmDm{Ql*eGZ!wl4Pe^H#cuG&o z8&cN%5oXM31%#A(J5jnzRmj~;Zl9`BuO0PvsT#TczFc*N(v938)xi4DqNi$_-K1J( z$*LVv<~z0%r#jeR>XoQ2rQ3Zi>R~T1|G57f){OZ#L4jBqrJdCP`L4 zsu}11nRQSnYzptrD5Y7B`+t~atFu{KWnj~o!NJ4c;#T#tGn|faxIss zJhPtaf<4LD2CFN!mrq`*@~t#p-OP$qU>2o9vtV^M8?GK^8R}`q{ol-bdU~0)P_bDX z^=3Yw;#J`MKXUUK?=Y2++siX5R9|wB@Sg0Wey)@p_y5KAkz1hw*m7)+24V{+?W#d$ zGd0+Z>;IYY`wz2i8j87N^dnTp*p||^vxd1+rl7ZmV^8zyXJ`aApVzmyMq+aqTQ`m3 z)3}xv*&0pmYHX~=FqG@C7>&g?VKX!iy8#Q-_>i)y8?Zr|5K<=huIr(Rm|Ny_)CCNs zKVxgDN!S+Zas6lPI!Y5Y1=~fgpQiG8tI0HnYZ|#GOx4Yrjv<_vF#~glf|E4Utea+; zeXQBE9n92Ut2s=STiQ0)Tx;7{^RT|;xc@JuEf@;-{~#BMP1VKZT47x@KcuWlBy*@# zmr&YDZW2eFy2mRTdgGLz71yUN^&Ey46QQb{69)tF}z(` z!_sUoE%xaua#7^^X)X0~$hFiuGw%Ok_L{E2(iz?Yt*6B+OhG?gi&bZCNS zx}Nt&8{ar?U>y5=*fOl+_hWM!hTb0J*_T!F=ttT#+6$%hNuF;_4OYAeyj(%U3%Dz21uNLXxO=EZ0}>R_5L_%hUU2$@;*I>;F~SM`oRM*eqX1%+mF- z*#I3i>!wf4rt4F)d>u0zsL#w;|1*0-Uzjb^mu3a}DrAFi>iPYL<MRzp^}S z!)I`keq-J*V0gXtyBX_$nETdvNq=JVu)&_w*itN()3R9qW3k8mKSE2r|Kjz%R$lBK zp9N6rJ{99t#q3g5#aj42@ztm|nB`DiPjze-!&|Hxl&-`sQcbh%sueoSyNX=AYKN9B zS%VEx9ZC_FQgtzRPIOn*3oTu8HFkmOhn6+pgw0hT=GOHB)qr|8VY}53b5oV7AWFBA zi&i6S8#Y|Q*jrc+g@l$i-;Uj`P;3`=xx%nJeYtS#ZeOl(Xj%Qou)b=-^LQNN{vS5p zbTzZFas1bY++Quo-9Wu9im-Z(6p3viH&`th@&;^#T9JDh>!sG#wy)Y?`*`LZ)Rua$ zQOfmSth9ro%@P!2)PnOAj~I$)=;Y$aoEPPS7DMpL#rQW=W-PUilb>{>%Um8mAaUGQf%Tzn zZw*{rLk&~_T7 z#hPl?U(?JIG##5kDZl@q#T<<5|5@7+nq?NF*=D(#!!utI#N(1GyM2H{<$$)}pUgVvkXJfv#jeKaG`Y74=pz z-T_)o>3H4;L$$`Ni>`7lcvVxi7IRb8TkFi`>T2v}+UDt+(6T0Lc<<(GJ$4h;N7sgy zHYwv{(OK7#Tge=ns_U^BhQj^d81HYC=IS2|r7Gj?t_`jQr3JbXt4=OcH<^vlMr6ZA>TOnGq2oMU>iL^E|u{v)?VyRN?HG- z)NS{)(mt#KHcAh%jP<`-`>7XBu7e)N+_z+!9>Ls@U-dppi$a!lUA>RFQpQoF1LXQs z8mGsx3|esiH%hax3_Zzrz>_rYt*6Mj-uOrGfgG76WM;qode$a{@1LFJ~xZh7uXI;XX{JsJ=)%*uQ0c~&(YUr+4{!512Xk3LoQ~> zMLJF?!tuNl*eeVrLMO31ePjEMdRutrVfvn&n``~`1LpP`B5CWVIwWjq$EiD`&pf zqjgwm)8o`@t~TV{)EBBP=H_XhqA>SrwN^AkDWx=7G2~icQx%K3X)abfvlWUnyGHTY zB*t5)_Skx?l@hSJzHuaCQ^*Zc60h`dEL|PQP4m@DHY-+&Gp68DrH1+6S3T6xTJ%$= zuwfyKXz`5FC|!yTPfijCx^rV%+}&dyW?U)f;=kXMMu_M-@h^#J&2|8>haMQph)UKWs0#Z1p!Q z)&Oi5xnd2(9>+RpP*_>S(^zW_=Dqe9Hb_G#y^hzsyGpTx(v`ShG1AXEs*jD{TVihB8(D=ZXzffbcq@J|0rF_=dF(}#oTs9niiUk)MeC5ro~b%!kY0aB&gghPm8JNjvDpX5_0nx z+hi>z*NS=*bUC?un9>1ShFwJIFkL}u45d+8jiB(MrsHeh27EER|fQR$(WZ zFKxBjEKh5&bV_^ZDzlDS%Q!N~jng`EZk~45)n<{p26IDcq4j3Ty4Ea5*J0~v(NouB z)fq~LZZPYqf7tRqPaDie>PE9--GohK$Whv8InMtx>!4fAT5FS8du=vrqAl1Y+Q#Ug zmgD+=mGy2lE6_Hxak|Z{o3=B3rA+C?x*Z#j_0bM&B37WCW<}bCxw$b&cVPGODzN`A z%)f`AqwcbLLv=USj`u;1c4Jc*%3|HaJay~dhJ)5B&x^az$iy?J^RbNgCy z^cX`P%aDib0JebWh*PI?k^&o@_3VXG+JsHelqd}mKwrDx10=^&+_ zQ87u+I%CNF^c=(6K#Ohb$uv*CITb7O0x*Rid<9zFF2ZL2f&QF_x}-z2?d#`XWGm&ZKF(mQ60^{#!J zCh0vh_Wv=IM;O~weSkggvk$Q+u@U-+w)Zf$6*_D-Lr20&^$Mlq^f9>)u%a(!2R>@eIKF9Vl_4)dOC6_z0GE`sE;zvqz^%eF#R;aJB z->|{@Cd~f}ilh40a!YiaA-nbLjyi#P7+#v^B&7jNbA-Ob+`DdqzQ=+oE!7X0TNiDu zAF)t!-SiV}Q!%dpf+b-&`Xy|%HyOJ?r>N)FJQnF!EQ?%*e#3IGWc`kH!r1?Z3l)8OdT>m+ItcFwilp0a$wh|U97@LG$sSu`RKDJh&*bFRRVb~NbN8t=*0hXr5 z;r{QaiE3h2s-_Hur_1NE8Frd_7pb|~JhiaWEJc`gRAjjS+<*(zlH4p>T&h-#Z7DWR ztczeIJy+}z;)AJ|vCs>w>nexx)> zox_KDyZM%D8BD2%*O&c&SO8X}EbMU_w@@}MHZiu&$|1J}Rd;M3)>b_z-S4xWn43e#mZ&#&5Gzt2>bV@h|1c|2U#nNBe&pVww7L3|Ta2~R0JHHL7+%`y17F)g^hZ^`wZjy&)8VzT84&WUyzH{2<#iIvqoaau@)Ld z+ml$dMw`WG4CdBkGBlQY7g0J%<9KJgrP(r#CpU-O3QfS~VRJN*7F#Jz)CHJ3M&R{K z!rWZjugTP_&+FSvQ+OT?u%?of;kpEChXrW?rSVu(U5X`O zy|s|%(F`lnWsD;U%hV#XT$S6id{m1WN;0MGwS=~vu!dS{Qn8w>&8oNgs$UrzJcj$r|az#e}QhGULU5QvHn55 z{#cYYU;}-2WB6Ea8P-ELk)tWkW1|_ze`!0M((bwi8;MQOCQ98qY?U@+c<7PDym z(=1+F!%IUZQrb+n()MmzjL1NzH&@53i&C)f?tU$BPxNiX+ z0}%@c@wCN9`LDaczfLdorI(#oxH_vcab53v{;ndP-FI`0b+^Wh7%^zjpa~PEPrp-H zS+04#ib2#nGcZ#7L;PK}kZS+SPO9l21p{nW**#HdPF~=uMVGvHi9SyCcdmhN{N3nm zN=Uiu7zKL%@Azwf-jt-$-x+ew%bz>71y;FT|e{Xwk=E!;NztFrHGiGQ;QfBdtA!E@%sOZ9#StrwV zVbD5JlO_dCqU-D)d|eL}`@7iU;`)Be;N~1*?u}5<{dxNIY3Jp{de!(i)u_vVUDd5y z%S&ICz$*OIHmnNYMiDL5nu1zw>(;GXrw$KwSxi-SXL-5%Jj?e`S?upU!AR&-5$lo; z32)k>Rv@2HFK2_*sdL8MzN-LD@>EkmP}87*=%ApWfS`6fp4|lo1qS%~1N`AkSzr)X zhB>WKq>W%5D`-4;K8Z~a@D`{MvP%-_!vPO+V8#gYjs7DO-HEvW! zgAuM~4Xd6zd;Hvav!~Cg>>q#e%o(%ip6Q=Cd(wrY=Z;reWSY|cwo7rd@O*sp&&Jc- zVuU}ICs==?^~ayrpJx5JR^H6|Bdp&oOWm2Wag6k0e@>sXaIJ;scB#+$ioc8ShjJI( z^i?c_Iia&b|NO4&S&6P)&+a>`;BU2S*AZpI z<_?P(9^1-d^rUI-CAxId^s!UtjT;|RHlk(b@Yv`w9?|UKv1L5OXhc~}8u{xwZ5KUf z?&!Ib#`@ARIniY!TzQPIH-q&j*D;Sg9`}vvar4P_?)%DhZr=G{PxtF9c`|)j9%|FM zhYInzuZLVk(pBGRY% zxA))r&nd2>kTT9hJg=*wD!zZ;_dM>t?s+_`^Cu|xm06!JoL}>-Mt`rYP0+DQ;X-A1 zrtgeZ7P{*Gt|GERlIi;kXZbPu0SF!vUJ!j7N zX=A3&ujrpSd&bQ1v**rN?Z(woi~P0SXH-SCYSkK7tJajD=xF}wI^0r&*X-@p_{x}B1oA|KzKmDzybhWoR^*-@@;?c-qEE_l4-xN=U z{Z%|u&tE#-`&YFpa-7`|m{WAj_w<{3+#6yOXTMi;U#W=a=5SY*toVDT(ke1%$Cd$S z_xatOy>fsL?8N~W2gnmp%~7Sn|2G{sUT~AgLxTo9+CW|O_npaAjL%i3)T;QS8^T|7 zf1N$2A{YK}of?_=`YIn7E@l2ge?=?u75Lg!)~y%u;w@Mw(>t?L!(4p2f zlJEZ-!Zq0cqPr$YHLLvH-_@R4Rqg}#&FAW{kz#f0_^ghq`tlW7f2?1bi>tJXww*n_ zEceY>W#_j4>%L#8Kl#6GGyA`+cm9~tyk|0(d7gRJeUI^F@4oN-)10MGMRYZRu zSEpo2x;@~S@2?{I`=n!TwJ4xEyLX$peT?i|PT)I>E9tPGrv-a5;@Qj7jeVU3?43zw zPgor1dM)M%|6g_+&&%$T?)Q9kPFIhdj9YF%dsiy&(SCUlwC~Z3O1f=_%8ap zY2AEQiL3i}f4k2sd&T-c@J0VzJG*V9KIQ6_O(x^0Y$l;%77 z_*ZrQo7}zUR7&-o)pSlu?mYL3?vDzwSl9QRe_ryu-um=6W}g1H-Cxy=^2GkdnB1>I z{!^FF^*jgtrwV^n=4;>{Dx}g~Zq#2j`G1x34aq%J$SB16(b=5$InA5T-%rZtyO-mh zzQ^s}J;Z!(2mUJee7R!wIZu7z;-`_tzTW@%aAig5hWc)EL_wVb0s@9fb?V6X=Z|&H z>L{Y~Kb}dS-|w%(kNM8->V>#nVQjq=vh|q<|J~o>f2()7C!=Dz&x&;voT13b$Y409 z>vvAynVu$2oB8{#&LCf(*Pn}w9KHmtK*|RK0pNY=`AJp>;)HR7hdt8qHPX z|E!B*b>pn(7Q>Rbp}6UN1nCGQ3Fz?pJo=LK`2MNoYgyU8QqCISf6`XILN_MsNJMa` z#X6>}byAX&&UC$!`X#M&y|GF4`E&m~)OW%6=vPXUsOB2eRkrsv^rg;zsP6{&7e#}T zRyIm%bXN05^qd(O?i+d1==u>s9o-!IAG>7cT66cSVArvv4GXAvZU47XZf~0Jfn9sN zlUzq|8CT-#@ne#XSMHA{aN)3h!5cDx1x&-c3p=&dWXVjxCC|@ z_Y)lt83%}O!rSl@`~r{j4e$)S0*B!f_}ciE=$Og)=E8+=5iEd(u*kTA=vV<)!fN9x z;uhEnKY}|8b}?HQS3&~&Slqc*gTS3V_!iue)nDOva7X?ffgHthGz52C(VfS$4el|XCw>V(z>n~&;dTrMLt~>g(GhLr z5{JMjm<%&vIjn@MU^6@iRXFC84p~MC(J=%@!z?I=Ww097!a7(Fx5G}@4SS3Si2L9X z<0YcwefR)Af+KJOPQvd-)mrQmhWZc)K@bhGkO&#fo*TDw35jGmP5I4bAxD{@Pop2ZIgF{AD7NV@bs2&7DLx_f0 zNQW$A2yrM3H%1XhL%FexxEij4b#M*b4m;sqxF7bxBgO~B58*Hzfs^nZRN8EH1NXvyc*HnBd~@j*k)`e?uI?aBSgmmcnY2|-Y0$pzO@I|AabD)dcagz0jq(Z3vhpRjzJqG#DOpr zhQnxMJaGa{hnX-B7QX5!Ou(0GyPI0V1K@6er9r=Cy@B`^qv7(Zg>tjK%@|efB~@GsLEz)GIWDpuoO1KU2r!%1fK%C+SLk*p$`m& zg-{MFU>!UJkHBN_1$+(P!uRka`~<%m?6A;d?qC{w*@HRVHw>CV1ayP$Pz)t77)p&Y zqGPx*lDH7c;XZf(9)?GaCx}nMGw?4s1h2uH@E#n7ui-oR0e*ttVOE5nMMTFs*a+T8 zKh=qjS`Y-mMi{Xfv^Lrj9nnTCF?2f9HI=xvN5t^jt`=o9z~zJ>4LdtgIC?x;1%8F!;WSibOD6$X$=6k|86JZZa2nWMrN@eiy;-i@%J|_A&^DHT z77z>VAQz^?BDfhg!*K}RPCaM|D_|8o4~O6tc+Gf&_zrvroTQ)ts0!7g7Sw^3kPCBQ z3ET_c!O!rU@dxowsI`OF0}`PCM!}_UIqZR#jl;xG;20c-U*S)1D~!>Q3tb=|ieMC6 z2$vX_5$}e3jQfcXz&>~c4j4}m9q+*ha2URX@8AdU?2@WMO{fPAAOs?yEkr|mNQ9v< z3?{)$_!x8t-z#tNK7wc91^5kgisjF*)Pr|GzcC&Nf>3B?v>--9EOdl4$b@VomzW1# zAs-6h9(WCg|IQhqkj3t+5?Bk@z(F_xr(o8fe&!Gz7a8-3js?a-qGJ*K4w0w%-iACF z3KQUF*bI-t6L1iohnL}1cnjWz_klUf7MF)DLl662J)BqQ;RvON?L80YY)|&p z9?q8Za89^K4+zb0)KRgVo=26G$Og{{S zQ7{q8VHqrk-LMB90uG$2D%3D)5mTWPq(P>ULmUXB;6f;eTi`H!2S31%pqkv(1R@|E z%3&`&3Xg%@Jy;+Rf*=?|AskX+6kH1Bun`WxL3j>60oA4+EuI^9{ zd*A?k2VB%at)QGc$?SmLa1Zc9H#LPQBazq*X2N3F4Nt*g_z406nKv*CX2U#VDe-PN z1fN4d1Kw|tZxj+8B`_NkTEw zdg3NH3`gLop_v~2yqx+gSl+EBuHm`EiMu`e9)5)1;1Bo{wA-VW_b^=O2jgHO%!J?e zQ2sFU1ZKjeP!20#9c%!-?$H1k1RLQN*aZJHwh?cG9dHNS1^ZyX@d)urcp9p{;Zb#{ z1$Chjgg{eh4(X5q*^mqQP+;^VmOww)_9mY_H~>$;yYN2z3co|6w-_#@Ku72eB`_Uk z!V0(&Zik)l1Uv;VfIebcKu0`^gHDhIy^VDrdvqrpfUn^2XC57c&*3Zh2EK(8@F(bV zh7J0H*B^=ueao^7szM8hgqG0Sh$1>-paY~pCrCFkh>k8sA<@ypC?YzFp%3(helWlo zOmvhQWkkntVJ75>=2X`jm zN4K$~Z@X7%+=Me1nmy=M1Z;W9tKD$;E6RT%|0_`e+?OqYUmUO<0V9pQLs&c;zbJxjmJPuF5Q}8q#Gyr<@ zX)p`s!V*{mYvE=X&OWS>FbQTs!%+5NKnF;He$XFg!yLE|UV{&e!^AJ3Hv6FJLVXB_ z9YzHEoZ3MmWIzG*fkW^vgtGss2@Hf0FbXb(MX(aq!3Nk3kHBN_5^zd|sz3_lLkX0^ z6qp0$umrZiov<7B!7=zATC@MD6XZeA{6KX445#2gGd_oq6TvWG zfN_ZUJ{&PVC7yueaqKIGMe%+P6HkLXjFtc!;6{kJjrR%6Fy;~$!Y!}~PQXd<-tMOw z(NWuIMT~+tqnx-H?t`b{5WEA2;Ut`fnmgF$hj=K2{!k7};a)fZhu|If)Hp#r1u;9> zeuu6`FJdv2Kq)MMg|OS$L%a|6!h`TAJO+;&UlEVPkMI*5-Nkncd=1~g2{;AKfAb@@ z+3Q0DBtSBxKr!@zK(@)lA;-9dxC#F7ux<#4y?!oaoB0yhT!rst@YG_x1gb(cr~~yO z4LU=P@hR~bv}Ri`3KAh5cGY7&5DMzE&ILA#{f#D2Bnt7~)u%08?Qm%!SlOOc#_uDU2|76Q6`b@DAJ^?BUAsY@fjacppB1 z#1Phjp#(}{7wm-t@EkM?WxEX$Asyn_ZtDz(;R{G)n=2JMK^kO0Hsl&zh+UxsO5qiF z3*Lpp@QLAJ8>|*2Lpo$YHWV6tiDhslY=DHOyf%;qIZy(na1CsS9q<4ovHjH%Ivbh9 zEXaXwP-v78OW{hm3f95ZupVxJ&9DQ$XB+M(7&Y0WNpJw3gA?!rB(km55lWyG`podt zm$)3(!Zq-Tae{afZhn|;9?;9|i-JTbfbp*|9+(ERVF4_GWv~i%!ky6NRo*}F(pzj> zgWhF2AOohtJmYer<0jYyTi|ZcVLpdY1HzyQL_#ZQ4Q(M764^fK2)WP$?qb`-@im-+ zLu`Y*3HfY)^n?;9g%e!f?UcL38wc2q1B*o;EP~{?)3}@H*aPmk{$6+p9)?HZ0K5rr z!%y%Fu()x%1uKsaReVpLZozUpg6~a;h4zpGxlr+aG76soGhuBc-;b>s9$Xm3v0iY; z2i*5ad3$04--&#ORP3jB-z#oEeGcTp>9?qV%D>+K zhu~2-058EIaNBwAypF%xzVq-B2S8P*IIp4@cjr{}19x7TN*w&%`255sNO%>Z{UhNEJ;ZW+D`+_@Q!wQx18 zhyUsPjHjt{{&wKm=WDogHY&CUE4Bmw+w(R$@=>ffZ=)-I{`TVe=W(39y||S6|6iQP zaX-V^3+{Z5zuW%&j54>~>0yx(0RQQHk7UYI!JYTsqG+oMZxw>|pbp8xT8 z+p8Zk498(O0u|eN^i$8Xz2yu*LIcNyx~Y?O0ui=!Uf`wm60yoG3pfdpeD(c#-+$?z+_ ze=7DDR(wxX>}RW3u2wAPDwabP%Y};1amDAY;&W25U#4RJi`(zw-hX#8YS%sK;k%G{ z8Q;|z#M%)&(AA0X1>d@uvr*PR+Ts|CV=a!e*s!JVR15_Z6U1RB;xJ+;F_zehcquW8 zID*)b*qN9~yo|VVyTAYRpY+gqSO;^{7eH)9tU_!}tV(P{tVV20tU-(>)+ELdYZ2Y} zY7^TL>k#9Jb&2uBdc^j``b0OKjfrkLnh@Q5XiCf^HX~*cn-jB%Er>b92x2ZVgIGq) zB)aFBMI27dCc5WWMqEG~Ms&}6IB_9y1kpX;%8vQ(_D@&rM>!P1`YBXw$5+&AULh6H zTOlm(XwSjS7Oi7a_t51P?Ag;0QcA2YxrvHj_+MUV&mF1k(@^;pVU7b6)2FO%bbRRFbEJ9l0W&s?nml~&Pa zxCZ@2?o8=l*$_{6R?Sj-4H&a()v9?d&ONQ&-={kD8wCbU4r$snrfx!qx^+|QHdR_) ze$%E+3!5&;3kc>4qW{ewpF+M48*uHq$L0RmBB9^!fA?76Y3kLSM@+#y_xt~2$2=lyT7rqJGCyW^?41AdUEc5g zzW+aevH$*nwLYC&yQBbL^Y0J;m7~WSZOUpY8PY$?5P!92ia&ewcmJ(_uhkv=O-PsG z*4wxKc1!=gd+P(fynpL&+^X2uVsz`XTl=?ua_fs*&u{%z{?m2-^w!@b!2e74{O^}} zeM_IN|KsR$+wrco_za%Z^gOb*5f5`uzvu@_kTmJ^TP5#F!GUoWz|NdvVwBNY( zBi1$j>8=0Z)_-;DAKm)DZ~b}e*WdW`jX!+jZ@m#$`#W#^mv8(}Z~T)te(lZQfAc4A zYMsnKctijFw{OPZ_3IzkpZu&oYyE8;{V$yJzrFd7-~2z{{C_z5rEmT6xAGak@~uC9 z^SAtN>svp2^W$%Ic>l+5wjT5EwqE@2z1czQ%?XP!-<;~@%{l+yU$*|UTmSW~|A8Go z{>iQ1e4|gx|830AfB43K&g-x8-#_rHj$Z#Cf_3ZFZ~k}A{+Ae=;jQY8&u{$^?|z*m z#)`kQTgzMj@Qwfejrh$Hsn^jzdgJrfw{E@B`YXTmo$tQUy8W$he;1hitzZ1kvsQU} za&%QZxwtIa&sL|;TRW#m=TF_>+4k!=Wg8|zBs);d49Tzldp=$N9V<(m2SnQuUE&MU0gk1JwAEz zHP^U(wR*X_JUT7f@%!TF{J3bJuFjucKP#Reom^gR-QM8}7w4y6>oPB{xWbnwe0g!c zDo)Oyzql@*oSd$kW4__jO4P^!Ow(NQ@$UtP66^^_H- zy!r7bpK_@BpO9)Hn}w{wQ;{OC*~>7jdmxjJ4wp#SL)`^-wgm$aw zteza98dv(@?ZIGw`1aQA1$S)cHl(YcORzcT>Wh=-S!<9b9$s{H#UQqIQT47hTZQPm ztJVvCb-cP>U7pcbH%DUi;s}-1(DJR?d3&RcNtnh`uh1>Y@a5|0SPK5+^5Tp!<9O?K zKWP$!qAowzu1m*1e6`oUVj+279bG+gE(|s4`e0%Zq{NzuVhTM7P?vm@#PFWPDa_9GcZ|nBei|5*BmSg!v&WYw&#(FH?2oSag8yIp#H)*QoyZ?f96$Oa znnv5(J35p9o$=}9^5Xh}_iQx9pvIqL?AR&a&5s_PuDUB-Ix;#0(c*^guUh4nPiY@-Wd?qDkB7WRDY4y*apB_EN zCLEt!7oTcOeth-A;^fKN#PE307uVRv_T|c_TKU>vot?JE{+t%Xj$fQT zZ$G{`e{%A))#12^;}@4q-vyQ}+=7Mk`j{J7tX5y0T!*@DnCFn@^qZ{~bAEMk*^+8L zzqmSy>xFv8S26ciH)P^=Q;$LnQ8o1!k9Bf>y?PpLYH`u)cG{Khs)dZ=uYOou;8QM7 zj`plv}GcNR+yM30giZ_mnHt(r5fgvq|bH9xTq*os`fz-To* zM~hxRy0}`^ExUSl@nsA_{w>n>cqPS+tF(?UzixAx`WMF+R_>$6*E9{2aCY=XSd!$v zHGGes2?boW&YK{jov(DzY*CwGmhKBo92{Mq^K&%!kwgbebVA3tO~c6LYZvD$O!o0s zfq^^@-*2Da&bKK9T+Uku=NIj7x?d-{@+G(Z|MQklkgSW#_UXyl$+bi^)?(LCk{;oK_IL8nBqeAn6eyb-9kP;UuuD^Z`x_WW`#regTYe7lxGk(aX zK~~vNO-+zx)?9Dho(m-=AecQBP7F)gX!Z{aOz64H7!A3-X?l2feROhuRpk4=bH4g= zb@_vAlfso+Mjju%kih*=R?X=2?BwaQ>*5rbCd+kvc?1qUc`Wh1eSGqS`P3YQeB(Z! zgO2n8<005-lZVCNqqTdH^x*@yT2CHdoFWXzCr3}uFRr-i+62kt>KfQko$~nTxw{+Q z8KGB?E-y~6WcTPVLgQ5^uE0TRpat$aor|DwvT;UoX};f6oG+8fy)LCT z%w1f?)^V$#iR}}k4Tlt<>)Y2CPh&(a4z+vK&HN)@R;~p3JUMUM4{jA>O~RKaS0Fo% zz$|VgVyl*4BbVRQ-i^>nlGBRY+=i34dVP|EfFIU@jYb3$BA_j;*a&XuWDXwG+^yRw zq_OMz3B>+S3#b`lq8fN6Ar%fhF5Ygv{Z^}KeR3nXk$!>tX~Z^=Rukss(^u!rr@G@& zd~sf5s}${B9wW17N0(m|-*0{2u=6nk+LEZM*=k3Nwx_wD96f z=B$M}TwS+b+FO9=Wpr9a>njXnYa7LV030h`9$lVTi7*80?}?J%ZvCsp_je2}zW+(l z`eE^XocF~U(dYGQ*TSNY0oXXj>&q7_oiSk8^=a{^w_Bf{fJ9GMFOSZzf1I1z9H*F~ za6J*9zMEpUx@RC39Q|?eO(B+I*!tvk7*^n$Uz~zF@a#5D`1U(L=R6J8>Z`}NMMZ)R z-|Dm1UgtHDR6~8wUT_l>=<&1l#H6U3dApJuxDwRJno{2&_b~<0jAHuP?_pLQslP)a z@HJQ9KDG+$&Nd}e3iy-uHvxv9wm$s>0BZrLxnin+^!A;t_ul@4AK(6T1G;j?Cs)q_ z!#{ePFb}bKxq2HJ%tG*an2aC2-SUv}m!gkP19ASi_369mh|k__UREbH-WRRtFF${~ zNJN8cyjX zet?rzR_fcCfob!=R{AlGD9p2}EoO7R(@9X6ITX*6$-rV(@yYc3Izj;62eBd`9 z#G4QO=EHb%&u{L<8#U+tmfLZE%Xiv}`_P{IYShP^@UhPQ*mveN-ssLBZ~4w2Z|Tmw z`jTf`B2@J2`si%y3}h1@rK7Do^@m(B9j_j>K7X`ooqTzA_SJRk=<@94U7jf6zelaR zW$Pt3)jREx?7wHN_lK>Q@1ObI2Q0pS`9U0Bw?3HjCN9E^d^q>#nv)ObRX+YBy?Nkw z_tX^;1s?2q%Y+oCy7-?=TC_WWLL(afi7(cAl}xApwrZx6S& z5ea!3kAuFk80ecJlCl1VJLT^%BX7-~Zsldmr8Z zxZK|9RJ+|?|H0m1I2unro6hEo{ey?g!`3H)bGH7iq|b7b{O{A&k1;>3pZth}pSF0t zLx~4w*7TUYA3pGfv3&6e@VNrhWLxxBF^AZBPw@5*zljsGL(%+{E73jsAwZeBIt zD}lE#&;;3y>tXrho)ODnOn106ykGf*n`1^-fBXWv?25p`J$b8jCceGxH`xrY{Y0R* zea%kirH^NSZxpT%9T-fHa_zUT}3j*q@B z7W3UZ_XLwUxZnD8XSdtwPZO+yX?84@xgP?cx}SK7b0q=fD~wJ=D?VBg#{pHr{dF6s z(f8a+oDWEd0PDse^$v)tQsySf8u@^S~>VvyBH1^`SfFZ$v+()3BOh+vzMwL}`a)Y7lH z*@9QR{Q46Pi?;JOZ|i3rVCdHuFSr;D`V>G0+zRDWA z+Y+?Sj=r`wT|5R@TK6ypN5#|23ucy@D-y&=Ar{o40(^8_^ka-{QT!-O(#hjwb*@@U z%n>CL7ood^sb3=qv=MfRSpMYHp->Dt`I;C3pWc3Ua(qn0D(foS@`Lr2(pRru5E*>s zqW)QZ>MLK@_k87c)KRxq;+{U!1cWiWynu#2W38CV&rwM!Te`zz5))6A z(s%~8$1SBEDImgJonG)gLCrf~HSqR^AHc7)#{1QDzh507;EP$=tt99_0e2@g`YG?y zWq;b5Ew*>Y!(ln* znfJ#duGvx!t9a+x_4V^l-hKD<^yS&s#pTm?f1uopziZV4*1GfI)_YqYQ8Ze##<;gL z=W-ssI5|CTJ(V9mdE9y@F?&B?AXBQWAkR#z)a{}t?cA9xc!Nl^ga=we4y_kHyt)>P z*MbYZO!I%ddi3HcTp8ACb(sW7Jf}ZLaQr#G`UwP=Wl7?ZK1RGyU>vKmORbKsguBy) zGB9YZZD~Ya=P(uMI_$1PY5B#YW?{2$zJ`2}@5C6|_%H3ZJgiD;%Se zks1o=$Q4TRl)8l%kG9ZX>x*C7q-2z1eD{R(?&|7YqXz(`9<$_jJ-$4;dgj($U&3=> zL1x$Vs8F(w5!w2s*65;$lO)YzNUvaeQ=r zq{+#*P%`8g)Qvuo%7Vz0!3zaf@|-IxDhh?~NUA^I9Zz?v&#O_nJ*YmP&CB`hcI&;b zTJJpyoyCi+l-RM^YDW)T4^^E;`4k~ z`#{&yz2b+w4p(ROvB2kL{jM0PaD%sCSG5+`b#`*~xHaTubqa&Fx_EH~KsKae%5~Yx zE8W}w_+h*4qgGM;_DAZNI&|mBZ~yiWV1QL466PUGL>q6kYP*;L4$hW?vI5T2Fu!9r zFHk4Y2UGonmQ<`c@-3MMaoVk2vEH(Ko`Zt?w^9|dzrF_vo{+ffi%44Q#gwAPOGEG&jLc~-FjjQoq_~G#J7%AYH<&KC;o~kIyyT(Ipw2gSEw?7Pgi=m zz;UK?&rY6S@lpI)o%+Bp9G|~@e*G0+K8KyZeEzt3bh^r)5{wc7U0%76f>SP;qnG@J zkG*{S@?-v9J$}(zokeq3=dCA~tJR~ct5P&i9WQt8*H7{_-*7b)=WowXP>6)!*k=bnxoK76mD1 zSIjw9@#Ly?{gR6^6eqmE#On>IaPsu6KjHZ5*%2d3e9jy^{`%?p3!XcC{$fQhpFL@v zUA=tVf;WqSd!ObogXih*yq9JYyYv3t^kF()e{=7H^*1`^o0s?CjQHOdt&O;g?bU=^bB$bV39Dl%4#3@yrU((USd(b9BRjv8O4Lx0V& zq~_)OcVFJwy4yH@zi})H%jbL;$M-&1pYP*zzK_%SK8AyawS2$z)nh1qiAC$F4w%?y z7p-GHb)FfGa&1vt9hUvZ%6_#vZX2MqkFMG&!`_+;C=kTUbt|Wa#XkZu=-e+)j#a{# zo<*lXU*Uf^Ogk!S1k=^S8{$TeigaN@@rcPC?Irlt8n>%fuN-w3gW|zr)SXr1QMYJ( zI9m<|avcSa`z~8uUMkjhElZx!z2vYvYjrxUYTTN(`20a@*jn;t)Z+o)%c`}#-5R$B zty$}!Yz?}tnGUMfVAvXLw;r^Xd^c-7c+gt32J_ZV>wsSlCapaVT7zC|-a6>B-{o?_ zBao{_YkSsO_FMhdsN33kuvaWb{oQ`mDf*pi+?|$_Ub{6{H|EVx_IiCjEM~)UFeqj_ z)Ba>$?DWcMxihb(?bc{9>n)le=)#0NlXBkQ?hpF&W%FXi=k0GMI@e_A_@J5=(|)(d ztrpY4ve@Q!d)2&{RXYlEhQ*-TofpGuxXp><-J&yIXnYCG=KY;=FyYhfac5b~`-6F( z$=IEahefYHDISdbqxo#N+^LF#{=8Srmc#AwU`C7it>8Ko5qxpVC17@@KoLMi*ZGKzK2IXkZ&#KQB<)GLbPs>q%hl`9S@zefnJScSn zSYkLG?+&Vm#m=C=vls1{R`c{>Z#>?c9rUMzV!J=>mhpA!U%vnNV02JUJF|j$n2g7x zIBC0_Zl_V}ji>#?@n}v{W?Xo$nicbLG4ECBdktS)VLt6M<%6o-tI7_|D*^BayYu!$ z1Em4+x5a0RF|dcBFNWoGuNZiSdgWlZ=#LmVF!r$O^cTZo*5fi$Zr#?{YaIE2vpdBB zf>g!zJCeI8V>m~K3Us4i4*G}d>vtHsy>iwY_NRSr(5WUBFC*rEznmhDZiDB~auAZ@ zmd9*Z8rp;Ys7mt@b5zgSfM#&ho!o+X*`uGGVtz12l;$%eeH$%UOe9FY1+&^=O6SG) z07;A+&Z=R5r$606@Z6SpZ(3FH`*Iqy?$0G!gRO{;cBrez%Y4^wDnh&PR(* zyjdV%#hd{c_2UEncKVE}Pu0;4-3;w%$U}u<3=m0Are{9Je|IoO5$9EjVo&!=%m6a- z8u^o~4Evo9GasUWa>bbMAf?kPiBU2QP01>@D^AScyTN0m6rn37i_y+}VI`Q=mQ1!! zzi=Z%R1nsfc?9Whv6tq**H7}GPj$z&ijUEr80e&aGaE_7amtNtI8!ec(-Gedsu4Dd z`NT}l#|%DlPRan&TTG_o&SGbt%tR9VWI@)`Kg*i27;p=;W<9fUQZg+KDe2Kf?utbw zV4wH?DfXwgm|`Ce%e`K)hmq{{4~vriVw?76i)o3W>rb=6iLTYEquG3T(x zkTs*vdYrRl~zF>4%4RWB&Ml?ZA?v zg|6bw`izVKmQ`)-H^R>)Z3(Mm@e8Yr{hn(6Vt#Oj)A<-fgCJBg_6c5O|C)_g7`rg; zfQMS8G$cLx7yjMf@Ahp`LjUb3jTro$jND>qmtXGajG3itF$o!YRX;UnF)gX{yG$oW zuX;F_BkX{9@{eC@U+adx1|UfOXnr`rHtK+h>tJo?AQRNe)~4ag-O|t@ON8YcxbTW| zZOVkPNeo{0ycuuAr!qa|Tn`HtzDM78T#fG)Ly@~E_%Kr^a_=cK_M&joPrmomqj5?ZnIjQ-w zz1t^x=v1f=U~XqRp4oH@Bzj7l2U+J|y|G*6riL5F`H9QZ$qp3;`9{1MKK&Q@a&#HOToFzYH?>aNE z4-_%)Er#2QP?^Cv?vG~jmus)tFm1MZ+!O17<$JipV5}paICD-8L1$Cg9Ah7>l; z>~MS|u+re!YDySFi;YhMtVBGanPRLyD|dg+9Ld?Wrm1TN-LM!etf$pv)>Xx5%#!4>slJf`DqqT_n%>X`af{aj44 z3>%X`ju3E8tss*zkqACHn~f|;f~htK{u0!*zg#N_=Cv@uf)#;MsOgOA;bedi&?tvt zF+_P3M^!fj&M~NezNtK+?>mdx!~vYA>tT{?(GsbNQmfj`C|quaE)9bNE7Tw9XS)kg zM}SINeX}F(U36s~6E|IIm?QNtVi0i;a~(>Eq2XQ*aEzQHj#$DNe0YWUo9nk z$NO3wH2gRUg8WxeYJWBa(1{k9wx?x}1b#ZIs^N_ZF#yT)kxg`X#5xSHk}84{y{4e( zrUXb+%jr(BVRn*R9*#$%=Z3>EEDoV9u{QJ5srJfwIm3&A=0kNYP;_Po90S(fu4IfX zG|fN;)(-|PN3V#AM#zX`6sKjB0!n3Ke#tgdW>j{jgC=hT1pjppMjbzWa z+uxlFJtVKUc1&jSHo$j^PfKVC%p>!Y_BLHq?yRftv(DpW>~=zYc5W?p0U3J?)gmbhMPwTlOJvyMzXv zg^0@6eKiwJMynN_TI1H2PU;q7d;`QK*~UcmM~m?S+KskO9R!6jao4R7+0vs@9zwh; zJn!p~z9a&3&ter5I1kO{Pu*aABHyUtFM`Y)vJoE|hUP#8em6r)Iv6Jakr+fs zWM;cYCgFu&i};r110hSr=iD$vE$DB4y$}(?P z@r!L3zWJi2@~dg2bZAI9mstbAOdk!X|=}dEIc2HNH?n z;ATW))Mh8RAg1M(ilTk%^?RksAdyB4*rYUrtsZV8@|;flXR`vXf+Qc^*cBrUSOCOJ z?<**dFjPLM`7NMTW}(8;$TKWP((M967G&3f@T-$+3>P;s{i%c$c^za4e6N&Q@}3&m z4MMdrcBK)X{id!z5G_5ejO%2=)~QIwZmR4i<2)A}k`9L);3Xs9Ngv5c1t)4(CgZ2B zk_SMz=?9_OHt1E1LxW%5AY;W^kGocG03+Bk3)q6uZ6JN&Q@K11aLt3cux}3}dr!uSvvYp}U6cnG)PEb(l)y6*IR;zhW zh6Zj-Voru4gH3P_UR7RnJ5O$fNB6gOQ!^#i>zRy`@r68kJmq8>@bHZG;8n9iCI z9xj0K6JSCL(3EU8LK-m{Q|2EL-&sbcEM)kmSS7PK-jnJWoP+`iXQy9aJe8~ivzpPV z&Adz&_b0rBNJS&Jt8O0yG3qJ@3LbbhzwoLnN!UqQIU5`uG!0+M4!>$A;`9-CO&Lt` zdQNK6V|Y@OInt|SjveF4GZ!0!*fa$-dA3d@g*}R2#I-Pp43OGLe6Xof2@px6kSMUc zyo#pIv2S)ESh3dy0MbPyRO5&BKy?P?g2dDaQcNg08O~%wWjYg@0{ta;HJc!ZBV`{q z*yD`)gFitNAPhoENgTXv58h$(FNkTCG|C zp^et|0>V~fFk6sA8C3f)`4U!v%YatlI>tn-Sn?UrkSrnqj9xSt#aXAC0Rw}F=8^(e zw5OVN2Ou}zxRk_=3o}`JR@3BGI*(^LmCtaZJ2rz?coj0lCf4ixHwh z`05B~YZvb(U_SXS$zil7@`o<}$U(V9$Df<q zrQFUsk_QA-KwJ3hc3r9~avX=#$=kQ*D*q?Q1+?4!E`#oNq~vZR+nqI>=RVM;r=ZYkVEvJZnL1h9ONAr;b{+zPrlARdPK*Pq!U{*D|q z;LO3OV+N&0b+mbkRx^Q&XUhEOR zj&x0d(msJ9sAnWXYg#4!w8jb=wUgKzS%lYUigkrn&ms8IFo%_((V8T=6L$g{r(qhc;wt=y;IR<#PR&=+mB zJ0633on9sCLWHoENx>&&U@`tSksCIw+Do)llI=cW5{~S3-YOH2Hv$}aode?i5m;&v z`3XLndHmPO=xplk%o=l(^C>&vi)1=oAc;)@NF9poM7@W}Ylae2Kt>hOtL_M-#BFv_ z6=~mWM6u(5%j%W`I0%ha<~nGhY3nRFYpg+7xD*{4gr@>b-V*jLkWN)8Mz+bsodTI@ z3M(bvYqo2nVpUvPOySxrZosxz9h0rN@=ugU(gL+y(rV`yqtqe)rr;J6YSz!=I$GSh zLrj}E2@d?}I-E#@yg;jV00erT5{9b2F83YUt3-=Q-WI4 zw^<^4Ll`O7VQfZZG>PF7rl>=v7~h-x3*1^DzQ7W>yj7K(Oy8sjyFzr8YK;QQF>8bd zhvj5RGG|#f?dcv-TnGQWEThMo+iXizWClR4Z1^jO#|W0RA&o zB`L2P0n z%GOzcv_Aq0*tQ8%Dy<8B41aWED!@p!dB7_o=8!$J3J`%MS1sR;M!XnNTA+&b*J-z7 zF1nlGD=8?kU~#vG+4Sfb7Uss+you3?C9p2ZkcKCv(x`gcY+__s{Eo(uf2Xx7k#*Nz z5eV_x8hMC@hT142(oXU2x+-L|GCA`zuV0stEJ3snnmWoS- z;mLw2N3Hg3qtqS13NRV|g`Bu~g6;9cdKPq}rXjKh>CC1q8p+-~ld;}k<_sa)Qc-(P zE&zYYY+-91Zd=g(%NUMODt`d-+AH_^R2MU3K&d`J4D?PZf>uDt{4_)L`oryMpPX{? z?YV13+@D-bIW=h+0}*LE6;P#kl2PbmSW|QZ#(mXHtv?(s_U5UkwmFOS(5*j2OESnY zlUbjVgrGZJ2*+3kg)Kc6uP~aE1#$0MqoDj~zkH<(nh>8#D0mwsQ+f9h}_Mc>0&2rY#2c#aKZS3#xoMjoS1)xTyBu856NOEb0^bF>KPYCMnlJ;3v{Ru;QpC+W64ny2@gDvr-$>M>C}O#OxjU;NY5)!nK0|Af!W4uhOBE!{Rn76|l5v3}QY-@s@MM*s zyOEvj4?+kVz6}vFry6q$kJ%+Ps41rIHMocRV6b>!@RwH3mIQV+6;|6$;e*TtQ!K9n z_(e8FZafV^+p9@W;7b@vbUe)Ts}l#3;H<1wDK`?$USn%tGo=t5n5EJibVN3t@%7)? z4w2b-Dr=tOoftJakxa@XX#IUoKu;<7pVC2*0InuYG$r&+tXZ-zRMim`Mz8`vZmgyF zj&0HbLM(N%FjcX5JhQ3>VU{YCwVbV4cjF$76Iq!;a>JW8u0f_z3&=7hMCnC1ujI#F z8Dl0IsQ|tPF0v>UV^#;y+OJz`HS;vQz!5k!t!-cs;M6FEr0YQ8wB7-2_JkRMPmPt1AM~dm&@@fr#>EeyVi|mc?Ye-ryeL`ZWzbSW+ zWh|3Rbv9zxi28SG`6cN*zy*!)E2&B`5;Zf0>CX5k3qTvzpE+4*1rVwvqoT;5MtL#x zz??>jTV_@R^u@d>t=}xeRnpON?GEaOVf`acRajdu<&8RKlDv*cQj%j)+cMpBuj+BiX%Y&p7q*!p#}}f2wdq?-Ud4lJxmuT^(R$z55DmAZ3P1oD6SB!k zGH#7g3gDHCAu=pydeDs=Fe(O3v1I+(`o1=)H$cKiABUDf9kt)%jr2igzC9ne!??kF zqOo2i#H9DoN>YMm0-QX*k*x4}mhN?Q?nNdy7?c|T1_f6wlWIq4cn~4Jy$gyOLBg7S zt(}6|ET6m{e$V{_)>n?HIb;DdHc(V6uor7Q@8bbGi#khPPNA)_A$*BY{crYv6TFFp2K=HWB)}TAY*+O!1 zXLV(7+oiqhkwV1qsnqpFb)ln$Ot(1p9*yCX$Wnv(sV~hL?~>ZlHcXo;P zs1A{BGUpmkGH{I&g*8)!5efv%aNf*+d4_T*7j+*lCZ7=0K^>nVt>XI)hl4MnHs z@iN5+e>fa2ntVkfg;5MR^(94Gj?;y3P(-HvP`P!Aon;UsIk360gE&Vl56g6R;Wt2Z z3VhJl8?&8PGluG`lx%}f3r&yl#)mPyX;q3_Qy2IenOeNW?X7c=KrDZn0G~qdFd7@T zbs-$VkJc>?opeh1JED~EH2P5r6w}<>*5!7^3ad|4P>>CmzoA1E2eB|p#IE;s zI6%9-{8s6Km=)2YGAhP@k%QevQbH7x;RuxoYqzn0(AFs8z+DXCkQ*~ZcsRT=FpPzb zauy;`X;m&pkmE@oQ4$L6NqTUA#IOU$9?f1D-H7Gu*|u-i^M|)%l-Je*_72y{sca*_j?1E?y@S6)COGhJ*`2}|g3NQGZkuzI1c=1&=~ zq)OEF^s)zQ7(kVbgSfloPgKmtDimewP5U&wsG(+U0dTXV*k#A+AdJOy-7a+-eCX60 zN`fAwLhlZ7tqCwTje!XX(MZ^4Lse0u!C)K2sKR^}!=#W*l@Ua08UMRzGMuMe8v-w_GZmaAxCG#bSI)_{i0$Z;dQ;Zk6 zLZlJcC?8L$2s)_TKNs%r;BdvQ(^8D-LBGR5g!!q5OyLW%hBV?7&;*>ttgQQ;*(Q5P zIWu`6#uCmbkGU=om7=7_EXzo5>U|3F8-Y&aQ1uv8nX1*4y2>A6sDy^qPaA?dtYw^~ zm(=UaB~V8O-5`=gi(Hj*k~`8GKAaC^G`VYvak`P(c))Ph1*lTWHn}itV3LFsJArbv z^pKh9sf!N6=xx}pjhbT|6A?Ldpc}U)!35gytm{w`WS0vc$v1m>Hr-fzG|4lQ!YzhLmNOq{{?Dt1ubEwC0Lx$gH|L=vS8H2HvPqGgz1!0ikdGR{R(yLW0S8iD(bK z^7?E8XyLLUwR4<5T4-uyVJY|^fkXJm&ApgKQ9+`S8ValnhpqEPJD zEk6kch5X=nl4Pc;faKo;WP8NYCsBy$WrtWWVykADop|Z+|F~o!gBj%AWN~;XpVb#D z&5kykSqwCqx*)16uqLUjWuvT1tQ*5`v$+manFz%l)*NaNwim zc+9qB2=88!-fgmk@<=IfifQQaQxbEJZQpHl&^GOnWAvJ@)%mczt^3JLRWzanbKpAILzC2D2UNA9f-A?B^!jI(&FLL(WF<}QD6X>`+hmObJYJc(*U z1fL(U3(bgvKg5mRm?>d^6n`nb9V=9rKr47mPE4Zv)i5_DYiam3zsr&w!AnOS55E&1 z&?7$Zyb!QSr5FVn9LjKP+#|ab=Cl-i7RQD|6$v(a54^}KNluruqQDVxSXa19aIrx0 zzTPJ!%qHl6!`E(LHG-hgKFcYkFeA2_vX_X0u_%FwF(qZ0>!IzZHm*U5?_%v{!CZ=c z8y|$L>I!!-Ja{ol^?=LYXqb@528l9~C?@6L?48Q{V>bQwE<~-fHzp#f9Zqg2WtH*F zeFX)n+Az!|Pes5bvo0a&3wcJy)AkuDm@eC~0fnj&?E@PdkJ9{@KL>?I8|{bJ>j~Bg_>tsV^yPh+;&0! zk;`JhC8_GJS)EY;wL(O8U3#9K#_=M*Z=`WDA zX|}+o`_*!texQY^D=9#b_%!=A^A>)XL9QJnjV5wKv%NMgZ@9@_@0!qQ0)ZeO$fV>< zl7EtlY`wA#<&32&$hxb4MkUpPhu|skL+ot{uGadoR6C;)%cY{;BS%nkm)t$83Dd?^ z41ee-057llNrg1)jN67b$P^};2{a7o08}JEAf7^Xaw(LpYwS9GNPUSEsTa{|%s{U3 zaI~GyM;7DvYw+R)@P1H=x$lO>JTcD<<@MH)(n)aV%@(TVnAfLgqd+MZ~DD4xr`MjvSSGY zNX$WX#x#wWtg6Q=ZKYC^C4}@yZ=`at0vJWA-I-e!2)Mw->u5uN{7nx`Pkzp}UiS+p z;55DE!P)snf9t3(_RjLim`g}>30MFaZHsH-tret)X?JHa$z9Z!Vvuc(?)ca!0W`vnXW>C$|!t*Mjost2g?-joBbw?pYYl~t^K9*?A0Q z_=Ek$kZ3#X*>ae78CEvHh7E4|Zir%}3$VStb|a&DqtJmxTIAhg_gA+^5FDZa5sUi^ zW9J3dB7FnxIa}whdOsG1GuJLwJ0f(~)RoTv1Q`KsUQknAHiZIW znqbDp@1uOeV{V~p8X=#+o=>Fm`BjkaR+ zm@zY|UH2L->=^Tru6a(plQO0n_AeC$F5YiKYtvB!ctAa@^hi4f1a(5V_lVw1#f&UIr- z1#i)CnGzt(dWRU%ZAdnjp=NZ43vK5#q=0Xt8XZyRx_d%G33hQg`GfYTEo?Yj5cp|o zy$-WB9AG?ZU)SxnOLOB*qWSGQCkmzA#X1#?EVEd7_a5Dt%qOsieJyLhJm^0Rotr9y z+m&sEGm{4xE@!}1s6uTBf*FRshTZmXuIHbyUg;b194x+_h}!_fq7N(sC84n5S&9A+sKkpd*o;W>5t>ASx@<|9 z1Mv>DuCN6XCKjv%DX5>@@fvsWMv|BiITqh|`Dq<@TSF^>5!nEh@@#1{{ek4mHl5J}@h7XG4iZ7yhS#HJ#KnbL^>Y?0}%|W4I;j zSS^#%kuRw2HrtD-OEMg)9rrLP!D%dsrANkMHxAk69!zFh+oOm$C_Gw@2D1}{2q33x zN|rl<=!r0KZV1+ptn_D69wanyD>mLsLTmT8<{``sHQuT*B2XLTUmfH?XJhy_>XCO# zFjSN1K)jmUKt!E7m&0c9P!$7?D56oozqx(P!$nt;5}*QM5JTS3!5lh-*&%f9Lx+@{Y>ctj<>R2+kzlHELUz)MJtXixzuM z6RypOA1guDU-D-|p|_1x^csEoDg(CUTr)51fVKfO-32>#x5^K&fr-?vE5y|JK~O!b zC#F0$Wzvfj1wPd@D|O(_9l_zGT%3YG!5CGFQA#%Y@i;?zqG}0}+*}cc`rtp}GjBKD|%4%`~*#c&SDyXQ~9gGt0n%adp<1V#XVG_|+ z(iDNa;YWE+5}}zzWzo+&VX9`sMRrLJ><3fOo1q8SMMmooS=(kc5LVshL3xO6BlEg5lk=U^W#e@>T?dY^iGgnNg9_IVYU>c@c8j^DkvXbFHf9zEKaseA6fF zy}8@asd|465j9FgC&)xzL))N1+4*5RhxD~k3Krg#&U^rT1jrNqAkkyi<&KnNvm9jP zHcQ%=lf>TSGxS-`ivg?`4{7h>C|#<>jvm5sIV_|_DT1}wrXkAG)X}&vZXYQ2wx$nmkq{G)l2R6H>;;NeAzmT_IXtHHx)RdGGW^U zyy|@*g;Ya^*izQAB=-j_BE8xldgmW`5p8r5+;)T^_TaJomCc>%|gVUuz_u^Iy?a=nWnbb^B3fsKSRL^GfRI(uJ<>JSDSdkbY3VH!{i z1A2hrs-x$9d7pYyhZ-!{l-uTU)36o6^W?s>B}l@@MlIsxs3Imw1YFN9H;mk*Y;Fxg z`4}bzB;c@&#d zg##0&cVHE%@5Lm^6dHw0X$P~2*N{A(r{l6V%h@_4|M z$6j)JO2%5i-SFr$l@&Z90v?M5xm6s~l@?JNOUv@YW=k+Av6LeK%P}rq?*e4h^sIw4 zHiTe%JrU?Y%!-oXc@M6Z+wr`f6#S+-OhJxyYH)oOMSg}(-a8BHr0sfDn4jPxAwa4H zKs{JhHj$6P(mX&>L}XBy%-z*aFr46n=YkFZ$7=6P1#o_zoswA_IVT`>1?hmZ+6Eh{ zhIWn14DTOcW-#wqvPkacz)4TSYj{tpkU;z?TE+Nyja;A*P$uVMazHRUl-%)xISIx? zwKvI%EGav%8_VT8Z2MdsQe#Rr3iw+q-k3M)zO_BL%z}JuuAi2pU&gKZZRs8iXDUsa0XsI!-X-lAdqI1CZDxl#Q)wCXQ3k)184n1d$VF3Qsde&B z7T3!=*|o6WO&n(gptEww5<1E(`aGRX32Zc1yZ3vA-v;@%u`lTusl>qU3;p?{#ldx+fmQY06=#JYcaw_aP9SL8)+H-I7PWP(KMR?U^?3h}|B zuzmns&6+_lhd5v!JSKGH&55U~Km&kenO{GofMgwHU9<3>V75UVl;l#^tCzhhto9ax z88sF7m%Cl76Vct{B(NHF(+U)qoYFKyN;Ti4I8W!srecatm1ujWi*LRSvK%}TTbhPMbA1*=5 z_56u&S#vdpe_SIG3}^uR^%4*$ZenZX_XFl?)VU^d#IDm=J)unRX(z z_BuLM4Hwfu)=+!}5MT2{1_=tUAocaUxZ(mVTO97Z*85b*%9V_QJhZeiuv z@`vmx+od37o!W+sw5V8S9y-@ct>#*^Owm1KQ}VnGa}oipX$@6>DM!oQAV3*0%?{sa zE@=R-d7-D#q#p0XN(`YbSYoezI0!7FY%-yzo_SY19HgCE%s(t3CwQP~fJN-GpaPzT zXxka8Z0LS3XGhnf99Tnzkb(#%nZP+wfw8;h+)#>)25cOm#vk%MJ z5*v(pMro_D=2~U2?y04VatTtpq(O&^v?dNGsM9Pxs&Pr_?@C*0oC)Ur_wcVmF13JbTo)kjyG&$L2F@)9L9Wxyokqz40Df znpL<1AT&D^r{Sf)a6T0E)WKEbYTKpu1`t5C(AD2SgA&rJY)FwG%HM0w4%p_7JFOSSKh>XEN^87tl1iZb4kPR;g-GL{ODN)I*Z3 zdKANFJVl6{kT%GtwPdr2#npslDe^ZOe{%F%_nPDyPs8}92u$!j{2U*f63Eh&w_-Cx z%xRc>dr;P7$HLQagbaw>D%ELNO-G?I-)!NU-MY9cZRA+f`8bP6r+O>$dVKfFyH=u{ zvY-|T!L(Q3*ET$^N~N<^XI+4+U`{_$k%Gc)+G1t^QJ%~WMQLO;6Mb4+Bw4zMbpZi+ zsvO3zx1*3_NRCtN+rn+j6=K_L3j*Fm1AA4UjVebERL+Yo;OsW<>~dt!n@#kDyKqH|-7L}YgNo%hl$;(y0S$@yp~Y3m z%=nXGL2}rZnw`0ULn_i@071IkRzlRJNQY1>w%TILi?zx$l z#F+yKNC`E%ujiF`L?}>0up_&eu_wU%zz)fU6y6#kv4By#nx3BR0-@Yf!hONr2xy#~ zx0d0Ew9R&Hk@T>_Ix1mA5%*xr9>yD(P#yz4)hfCg#I7HAjc#gT23-hCjzQGVq(bdG z!3q&p>Y*_ezHGM|gPOT{+&zaKvI_N_cq3TYrHmBUTJ7${EmfK`#KT&SHm8nU;q_)0bBP!$E8 zBSWgbVFdWZE1xVBq45|qJ+r+84GfUmIhCQ5w^In*KRk5A8v~N1m+9C3XEEs+WNIvN zkPHCw!l;o=XgoSVdkFHhFw|H!YFjyyWQ0=nT~Ndbr3KvPR`{k~%)Hp8(RpE(d5v*I zm*d1?2P|?3p!G#K*B%g4kO_|l%JqGj29ub(E{f zYod=T_yn{L9OX!Ia{YzIE^;DlgTen;JfzSlK|~ZX(d~#zoy)-6d#UTvg_DD@eh5?k zd~eJ~Qxh(ecIklKvaeRChxmHZ)CPsYl34BZE?=hHfp9=oV3QbD|2TlgA2ZLwky^wVo6y8H z6IykXyk*9$a(=VkZWZ1jHYRZ7#~3LMoK$dEQ8q%!_Cp6K1ZKi8HQI^)gszjl);P;| z!6VgiN4s+f%M5m5*|Z7{=%w3WDng4`AZzktSX>$=G0GXbU}a4pdVz5~nGQ zX?w(YB6Kv@vd=iWJ(_FTeYK5;&4Jp%kcgbKhtf~&3P&?W;@>D=LBQz4a9ibY;LV)a z6=+SHVg^Oed0a7^$*>p^U$Pexd$Ve#zsh$ZIDr(!VV_DI)vk`v*!+1tkAubC`WUZO zA9wp7q-y$rwUk`svTAK+m2nb>LUf*M%VCm}4sk%p!>CU{1cA(CVl8nOd;z(^(pb_Q zW}KxXjfB+_Tgvq6aS|*;K{s-pa*TMS^(#u)nGfgwcor2KipL6tAc?~+0zrXfb{ed) zjbnRZQkE(((S7qCxso;Lz}n+blre&gQf&rtS?IM^P-|-l)EAD4_>n>m_VM@DlZiQT z%<83%!Q7G^TKiEHO-Rp?8;7;0N(*>=Z;E))$e=noQJW+2+GTA7{yoJTlzF%WOO8#Z zSwpI@D5N4TI~lWTknkGOW7S#SB8+H2o+NU1BdkKK5`+rNt7ORqYjZ@`L887g~Qq7eR$e8*? z%>_HziB^g7(MTf!dtyas))#%CE~x5AfFRrVv^%E_N8Y_zOA1AjW{5;Kw1%?;z^eEH zQ0(!-LX5?{a6so!WL^fo5 zfUipAljbL*#v-cqC}CQXcOZG6vKTtfeSGK#tL>4D9hS213PqL91(e8)*pD!fN;BAo zC;RPjk)zBTJ_Jk&t1A?YqmDpV12@4z5nFYM^#hHeQ9%n+{1(sgQ|eX8rxYv`&{NA` zc#Ri9-t3vf0PhPe7?vq9A|I0oEkh9rsaA3cBH*OhYgdn-f&+;5hewR#R0iaol!Ar} zV4I86G%xC6UL^w}%25V3D6NU0nhWu&xmQw~O8c6W?De<3J&ELvy-L^>J)S|DbNc$+ zjwShs`XVrWUdIgnP@f<_h}`H7uGF!cNf|}=+f*N^92ooHNu{;g?xtkN2s+AYOe!J) z4k>`_!fEB2ob=E+qZkh}08%?)l%%nG+;x|12ndysOO>~y4q-!vJgf_tkH->3#-1Bt z7)ONq!|^?U8X`k|R*%m`dg7 zWuPs42TBFe1yRCv(2h@u3`bN}j6X}xX}MtCq#lx)kbvZx-OMrd@Gjx=HU9D2uwxVw z;va+48Y?@fEi`By@gs{tqAx_!M1hAk45Bx(jm^b@4EckcumjmB^iq`X_WLF6>;PM3 zaa9{s+zt)vddFaS#QF1BH@815n~GjF#UpG{PP*y0Vp{dNv4rSQz@TzE4b?BSmY|&y z59qz4J>e{pGtqqxpk4@#qn*))s33O@VDh1|otbL!x)8cRP7H(rI_vILi?aGml?1E; zgmAipx7sG(%cF7ORvYV&Xlp7DRuzzqh>|#@ezbK%%4KaYrR)%QTuMubt{#2kT3a7o-8xl4~?B8hH{mf=D;TPp|l2m;YeQ-Y!quHLz z#ioq|mNs2{uVNPK4tN9pBW08KtNuXZOW3aoQYtI3Rk46OS5GNEwug=N>PWkTYp^C{ zzIMh{`VDP2!X-)io|Tpq2yvp#-~-iuMkM_JzJ?9z6Z)YFARST~OJtz?3NtHQSKWka zo)wel;nS9tUSL4Qauy|?xv=d z#3)sMsv_YE%%XP2nx>_+1WpxWl*hftli;ztgu9gYf*`_*u;4)fhsW0erG03JCA@|l zbb0OFqYdsno3N1ZWq8?QtvtZYNpADr?B?W_+%wd=%^V}bXT^quj53ucUdE#+~ z7~gF?E7(o-DDe=7hvqbJHq87GMxVzy(fJuCu$oeAIBP0_Bw>@_rcs+dl;U(%;7IKs1b5xW9?Ab{4Q z|AfoB3qRLIs)fFwaYlyJu&Cmz%UF?8AyBZv$G2X7{c!ZIc-{h;65hgkJfE1)3PgE?q7a&2%5{8!8 zg5(~o1rk?Q)nOJ|Do`Q+i9xh$5_AG0LlE*{j3s7I3HVkv4MGi>gTX>D4G=jc4H8Yq zzzzn7?14*?kEkdq89R$np0GANQEQ3RkpUokB+Jje5Xg)UI6=+P_)(wIXL`D5W?xJQ zg*`MRmbp8*K4;Ak`bZX?IGu=}d8!s@rXQ50haq{{I1|F+uiY+E8RLK_%y%R01m7>G zd1xFc!UNHAMG#-HKYiNmL@QZX@B>96PDvpl_z&AC@Nh8cwh_g z2q|eIdxd0$JWvc7+abq9Qa#W~`?|XJnT)Ocfkbotm?;7T+?FS^VD+201l76Wp22D( z**{VVWDo{ybhBMAk6W^9mzEtF?-+!-Kzf?V4Md0^r~`%GpD4 z?Qv(q2xm@tkAQ6-_ zrwBdTgn+B@s8Dhr#vwuPPVzWt~Qn(bQhR80E ztAeg(TPC*BVcycFDakWSz$d*?$(O?gbat$@R22$wSB4PjSu2_|MQjlx&s?Vp?E^|7 z@{_g*14Ufz&XZcfNvk+zxWF;9?sB9@+4$50z(!S51`8Dwk`JV|h-bFjothn}PuO16 zhu}rkz36`ZVST6?6oEWHIh=w(EFueZXN4$CMWT3;pw(;<*N4icu~VQn>O{TMA~&jE z*DN}*{hs!}T~N0S>6XwpyD=&-Qji6pM;)ZAhDVKe6biZ$aZlmlF*1t z^x?Ib)N}>34so!j)*=BO=)-9<)7#-@$bD;F(ZpI zC=nhMfm_+c2kyxbmdbUW6#zNGgh4_~B0}|63G4|XBe2t6s)?R%+>H~-Td33QRGFa>vfrCLhtW=JVgLCBBu7t@>0D^Pq zBx*A+)p9I|s(`qHQ4GWhp&4>ZB{v2J?lFQvEQTH~WYCLOU$9CXCzn4J*1#CD@ZUon zJ8ZFp5N~6eT0?v{N-_)@oDd5z5+`t@#bJIF8W)RmP3@@M0h7sNCJ`L%KobL{BA=Lf z7q!JRk+)eEk)BnVn1lynDf3QG%B3p>rm(W`RK;4oj&LCS<)RA#N-Zm`g){^x?t!~p zp&bZYY&$`?>K@tnQAKR9jixokPp5Q;Dm!H4@Nm!tIB}4U4(eU&qO!-8V{rx0>V8^X z=YV{O>%t}|MF~OYpjv>Ft0rw>c7}<(O`8)a5<@AKO`))TT(VLSiJCy2a)`e(S*yq6 z8l$OflKtXAFlbK)RHS4Nqn=`Ae!!>oqMmdpkyFJDF|q0;;j|9n-gqKThX9KSq8F5S z$53k(Uiz67B7LfLEvdrTcdJ;V9+WaZ?lTw;ed-culhODW(x_zkSc*Ob_9ZGKjNujHeaJc@Nu3_-$OD4vjZk?I zgi15a#L9920VSK5_sNVkK0J>_mK49j%2}}x#*&^$u8g*e9JZd=ssy|Q9w_?>Dp4(i zm{+VEM_`>+JCc_n$uit$ zWeH#{ggh3rOQW!%wp18pZBT0BP$Z28aSx2-Kr+FyJO}hM1s_( z_8bcRU>E_V@{eIE-4;VUnT1>sIH~T{zj$DytL%TSv@H5sOBK3b*TfCk*~uREkeaku z;6|uQcO$|@`Ni&%?zW?`s*`m^5=gwLR8osFXOm@QOMxVJSpYANIug>P<9m3b%tE+2#;UKvk<8 zKLBE9br+gLqVyYEh|hP1PgKEgeTnc zt7I6E2)CXsR(E+;W2P{9B(VcE=2CH&iyni~*E%UZ41jBRmJwc0HLX<4!9HVTof9v^ ztsq1|<`7owR^+5B1b6a6AJu(}@en}%VEq}3@uf*D@^!XEq6LZpaxj35sr-pNc*IqT zSQ52^&UD30S$nu?vV;TbxWa#gD-$ykN75_WrdA^O3{{Gg<^LjM=+8tnwegfxtwAE= zN;UP8mvj|0hiDgdl6114q-7U+9S^I=b5@XFlw>%>X)759PSGxcg#9+=Y>lg=1+{f- z37N35a{;nftYWc&V}nCPJ4N@A+BII`(gO5V$I4_0WBMbohxK#SJaTTdxx)QVAd>jW^zf@$cWbX0(*Iz0UW5Et9}s+S4eD;_mu?cDG-PApf~bN^L?I)aY^lO6dW4W`re|F#p}0Sw z1SA|)qw(b0K5jLhzH9hlG;DuB*;Oh4&{ka{5J~f+zP9h6Z)DEJ={zt%h33c*hRGd4 zP?!s>F}LF?+*suV9Oc56uC_)K9P&;?#zXuzNjqFGUJ?1hBAApCz~0XEb_E(zK>=8q z(n%4PA`62fWkYTD5*1b^0N!)DeFv2U|GCj-CBwet9QnTAK$x#%;TL>8nd)XnBB?s0r1y_O1 zWzVB*a+Vmc86_q{qlCY#$np_Lm35cI;7d5n=q{qdV}Wgsp%DG#UYGH!rAjJ7MZ!73 z(~yP0{PJiNB?UoJsihVJOu>X@M080ojOTy3a-0!NfhD9i55B_*D%r+{*Mc>QR0SIZmn+KSlCXMcGQ4AF#&_krSf`D+24O zWQlhoH4AIf$q5*!eS}RmwzClO)tpCuM!k&H2AU&?bkh~6iXI7T4{NI3s;l)ve%=`g z6|(Y{h`dh{>EOf;Q%X2*@2b3w#H4FW9cV_h2A20&xquoxh$YoNmteAY%}Qcup%dNl{ioP31|z6K99ygJPd@?H4sxy-=gW( zGCdcAT{1~XV>`WqStrs}<#6#kQ4J9DhKPBdn3moWqtR#VmAa7c(bidk_2L|F6XvsV z)GQ&=(`DQQlz}l;CJAT@JlVxOB-nxS+7s}Jvy+4>1g)A&WxX+<{RzQrWXrs#i6&G9 zFFYuo;12y32%_i$rO}+`siF@+bZp{q>J^%Zz-M@RegJokSYi2e3rq)srKm70zCuRd zJ**TfVM8|Yqg29Tz+hV73yA_45XcV-z1mo(4jQk90Xi|laMd3cI|L2W#Vn<$VE4EY zW{v%NNZiY^kW3@tr?u#@N|%OFJ27T4D%~Q|G}S+ZY3Oc~-XT*k(sS&n6>%`lxMBw^ zS;2MD4H4!kRDh;tuPOX-`V|txUZ$39u8fkR(R#W# zkfYDzR!EC$D%od}WHaR5A)20Ah))nkW+@g{5Z|EC16^}cmIjZ?yKGqygwThGHS3pg z$zDuPK%ExgvxtD0hk>J@94Ek<=fPka*8zrVFy zE_$40$pD=pS|NhOYxoVYFUI^K7S;8UUhgjY#=nx(dL5PwMjA2Hjg?Ssy+m`!`^Lc@ z&m&Fy7Pk2t;UiKJK2>Y%cJvIbIGGjhS~5a3#K?MhSnck_fzrUNMZkDq(#-U;(4W1~ z2_dEDOYo9n_=0#a3Oo9sEl-nKQF$kSCDGE_GlMfpCu7WYNnTEha;5V%G) zKF>=bbgOpTV^*8OkX9pH$8@2Rxjjh~Yb{eL$$;m2$@d^S?L-ongFUS)k?!cLh|c+u zm%53?K{S-+w=oz9?#WN?hwIdGEk0r26F!C|4-7n}V2o{fLF;S=%p=B#g5)FV>Ff?H zG=Vs(&FpxA#RFLgjEJiaQ~*?HlO*D|k_nC<09h3=sxOXa!=?V`>@?!?rC zYh&cI(n4pD6Yd!dI>J7wA#AI~l)=~YaA_UPN^G$XRKzSBhGj?iS+x_>Z4e=ry-nSO zSoR&E3ch5YGvtvtjd%crvWHo&PhNi z!x=iss38IRh4_-^Q9c?QugVc+JOg0cOog5RQWpzpATlhop=I|cngf+jw*MSR8k|rDq|xuB&Lj7A1(;zjOE6wY4Z`P z>-7l_QfE(Uf^n~#Q?N*UOi{1)jx@aY8qRXLt)e^#Lp&`Mr7_H}svKqj;&=6GuSnZ0;kR@^+2ZTR@P*4Pc8(Wax;AtfA zr_zFb8caM)4Fmfa_Vkb!iqwgZAzOLoC~L@yW(}I3sx1Kq6}@cZt#Y?fF5BdP?x#u zUU9%ikvn4K^-0;mU3a;z@vyQcChrw`Y9VGihx80eA{|$m)eb-m*oOz=!K48IVikGZEF*X77%P(2XrATKeE$~==oVr*CkA66aA9%@BtS45Cx z9$Y0lN72asBI-z>Fg1>+cd&nP1jiccMscfO*aOgTnHL^NI| zP!GPd;wq}Vj6U*O_a=$k6GrpQs6ehGZP*olqs1Y_e!d(i>tm?vS6qY_9V zo>E|*TrgP*w%;jB*mg$_+yH^3tjWfGuS+V8k_k94JrPJ%NivdHaiFHF(_FtH)dTf{ zA*iBZ0NDXz+>0)R_LIyo^6D%GRF&>6Bn&F08g+q*ND zo|Cgd{BU=U0n8~yJWEIwatByoZ~&X26#N3fN`t|5*-00Uwiz>y?Dq8?Q&jLPZlvJ-kJmmsey2RXa z11-zMAyV*5yh&9qph!7vx z5+5>!8e35v>LY?p3=qq_72-%o@k7=hZ8Ws(AB@-t8DqklX^g+-_y48sP5j$9l0DDd zd!J@{?|b$rtDoBvEl?zN>-ho!kb-!CA^;w;Y#78r!U6#_08*k_{pMfa?=Lc|P*sH7 zv-A0^Ef%UGv$FDtjEpNI(bgM#Ai+k|f+pH8su$QeOkk)`#Z@A?;3imsrn02jamy0- zaHLe}9K?b|CoAgCe!*{+K@%cU!;-mUlC>gRF!L*D{EGVS@n%hIsn`H}wlEkVge|=c zorXl3PLS9Rbu&03QBXgk+Y$p=t2ObdMy09v@O-bi2_tCurf1kef{h{XB7T+2I!z6f zsIku&KRPo&Gw^kWNo*s?P+-zGd+8_IQb?<>EB88HI zdRe$2PA>y_c8Xi=0#7B)}OogVulVoiDc zfy|iCxeT;oDdp2I;ru$Ohc({uc}kvY4$jwkR5Qf{esLhAOR7J6g5Zd2jo^7 zY0ip4*}@>9%3>3zAC{9k2)qlj(c|o0W2Q-|O4ni*Av_@XNUiCSw@e2gu8SRO(#x4i z4rM8gHk1rb&fDls*#oh6iri<;LpZ{Dj`tLiN5=Ouiw?^KmDf%VZ&>dlS#5nBm#%xtdiR7)N)q zM8XBYdJqhN!1J(NRq)yDaxjcq&sBe%)bZ$$QrUYlHQ06`VJs}W(TPrnq7NXq_^pav z^ojz0ZL<_+B{2N}xr7$6hDn1i*4xA_`YrI1#MdyjZe3dYJ@`P25(ZH)t^NRltIYX_&r7BYvRq5$CRPhd->bXN&xNpGA;X8{M?P z9+<)p0^ODJP4k+5u=QWZC4-n8JnaRVO_Wh(6yq%mwp<*57=rvW^CVH&`uJ5gG|WlkL1P#cLzLJ$E6ULg;KlhurG&xlJAxJ z$+UOOZ_r>*?Nz7Ape4;GgcNojr3eepF|^EBY>uB2({)(o#6gfzydkt~=gCu3Uv1!; zh(iy|9^${~F(v{bpbrZJ4aF3M&4Sksbz%$5J9O{>ho-m3uR*xt>m$#YQ8lwvfzfO{ zM|J#`K?Cnkd6qV$gptLp`$Pv(fDrIPMg`UoG(gB-d;(GmGSEbkC>a!)y7KFRjAFaS zCB){Z&H<3!)7WQht{@@pKVp=aN2qqth#)?oFJRqd3U4Z+h42L3j#*iQn0zXBT{Rt+|z zLm^bdhCnA0(jr-7Kr{xP--SfaOYv>7H6bBFGWA1No+qT8V`vwjTCf z(Fj?kj?jcdgA@r=t0SKw^o_?F5kq-6lu!WSbx_!A7WQfd@BC1BB)7g2D)G9s{RQvT zhz8~rdk*lR7q9Wu@5^Iff+(r>SH3FBx+OX0sK$DvVL6m$uNW0jR-yzG?G!!I6rrEw z$^8l<2NHX8F`7PVEN`SL)=nzu$EbLuz(3}Z%0@78YQ-&uEOtI!S9lR<&{o`MTC$bjDvko#(nliJ@PEIcsFv(aMz9 zcKISXMMTiz$d>G_DW|X;Z0nTyRDqfIaL}fQz#1_zn4w@{VOiNSg)7pY;XKl-mPMwW zsQ*#r7|H)wg!dI5MTq90f?24d2D%_fApVVBT*S)++YCkS6AV}k4vHvrosb}ybW?R< z)j)o)Gr2gFrUBcnvuPv=^VUMA0a$`YhWWK^M z7RWZ6YyJr*z(ZW&@XcWn-QT7|MERf!&{;M~n$%;wW3&9kv-FDj&xptz#`LA9i?6Gj z+K*pDkZM|g`V%Y*66h$CAE7?Of($5?H{DHVQXlR`ZCI_U?3HfT$tT?~3;>E=EWh@J z<_>=M#>&9F-?Lu-75&h=pN7@k40``T<{T}cCyp5#7WG|BmmsgH9EAp+_}5V{VNV6h+=gjpw08Kh znuUJBgn(G6=FtV%@1=~8VTroHZ%2o(nUyxue2>gH zILM~(yUCFTy9%C;Kw`@u%Q7nrRkpc_{t%jkT0;AcB3MbSJ=o6Slq2*H3|U!GXJLjI zNYp?(;Gss$hgOR4_Jmi{n#uR&+97e}KygC|JcUUGIRJ-lg)Jk#Ec^X<*VKf_f%3bN z0)Sb-^oFfKBP%JRq0wYGIpYtGvAC{Po%NlOXT)AZnOWvu!@scb!2GL&U;E*Jpb!S2 zE}IKq$7UtHK8L?+sL)+Yf)w+?6O)wkUPDqjs|L{q*$bSIN1-(4mSy898kN&i8m$+k zZ{X_?+arnk3@#*Gk(rtHrx??~Gh9>HtifC~Nf)M<@uN{JGRP>nFW~Ory}wSSlm#d3 zh*UJQD1KnoU#QVA_s~^B1MT1_qlpf)a6}vvdWYHnNZa06%q)f*y$%Bl!G?l1A@!#y zQ#joRm;5GZU3-HN-TslO5vg)y{3&v8MV#Y#b6zq_Yn## z^dB|q0fm31gKLqsUc&5N2qOVAmCq zky`B`yRpgVSUd>&7rGtEU%DcWy2WKh`hA#0Xk+b|5jwEf;Ng%650TVu&k{zfeTIo) zfs5S5VFV0-;Ipz3i3iq!9;aLBV{s`&JCtD-OAH!S)Z%iD5VvawbL06Zg$YMQAXQO2 z@R~jY{h(#+QqyiNs>}uv_IBXKrj=61V96`gAI-E=#8XcV5k-_@mc@%~#70HRpd9o7 zoNSFWM>n(boL2V;1C0U&vm$xGAXq5H@hjsErTB$pL#a;h&~^wImBH?c*=Zjcsjfo@ zVO6ztUNyllZikoQhnSKTl-IDsr)<>R{}@CU;wWt2Rmn0=xgwUXB861*A&Tnt;aX*BB%D};g@ zVR<`v!=Ne2vt2kf#D(Oj?&M2e91Sk~km zhO-dOV)n!4gBrG&ZJKirY!n827E7$=;y@+%%BdK&EWD^%ND6#1Cy&+@@*axv^yza{ zbFx)oE2W||$oA!xiGFAfjxZ$P1Q@`pFik>UIb%d@p>4tE){D08}c*yJc+tTM_e z6_`w)pk(zeJT@E#xYj(78U?8t_LA$~4yt)sEzm~rbrk9d;3UBMBJRjP!z`~vX<{xQ zrfT+5MQJs-velo!q>L~9OnqdXRjB;p!3(Ae^c2;TBJzm7nDg!l+y#5ZSkjRd5#3mi zxPqu0al(Qp-)L4E8@mr)k6|O9n2MqEK;YQ1na$j)0`_9<09*dYB&8@h0Cb*JOajKWPU!xVf=Qv|H9L zQ%SM{z#nMcrwf>ll?blkH6_z+HOdqZOsk|8QE;2{8S@ z6#}`#KS2o#AQX`hO*es>9Nv$uRVGfWTF4|)+?WB9Ml*1f2~|X&%7UeP#qeV4)!_C^ z=8&NP0p=uX6P-X&;^wq+qxdp|HS@2ze{$kiJ!FpJ9(oQrrs5b2Vxs^pzCn?I3xd_3 zP5-$5vE^PmsdXs>E6@2Zar5eg%ygk4W0c^ns&3Klfkjs_>8bxO*9Inb7Ds|2l< zm4!4&I5s(zj)7ck9_BABYc;M?p`5_rp{8d;6V!z@96=4;4r^E$^FK@n%oSR1=%v|m zG~KY6LU!=eQXmto53JZ*_%g~F%u!108I$q}RXwaCPt#r* z+Of(LS4a0!Ibjx)kxUFN=mqU&>ptF=^h~tJb}T%n;hv?Uc*6{&!yv%SbT+W?Q^5;$ zm$86qajF782%nJX=rLG_g2?uHasL{6%@D~=067rZlRf{HZOndf(@-`-IYV4w#|Vmp zgeiFkr(Wp#0bPi{(qkwpiy$m|40(_S7~LH{)7B71&aqF#72#w+-yY?k=+z}v+Q9F zXH8_z;mk*LWX6Sr*TrWlUMI&l$czdLlzId9vg%YN&Ni4#jAWU`k1j@q$&7=1?LC$z z?s0V7WD^PAAKdBjsD{!(k|WfmxXm5pF6s*|EGTdkiX!m} z?1K?{TStd=SU5z<4FZ#eFjbhM4{)NUOPIhFb`r5Tizn8ahC+_*B_I|An@f`&8EjN| z0{>chd&)LGChw*lS*ZXU75Np*1r{GsqPSpIL5ic$-8Za&NS!PXmHK1#uC|NHP=|R| zp>B0JSMiAw;B)aELUG2cd>ofLctNpc7$NCbxN{g>)>*hD_DaB2a(OFPh8gh{5=t4B5p3ieV} zJPU;M8Z|h^{{wkh_P_(8TGhqi!NsBtszKBm#SNX5BA+Wr5X>vPvccP@RA>X$ba9(T zR=3#UM_2%~?pRT(A$ea5fF^5`qF@t^!&$=vh`W z2u70&Gf*5pJa)82be<{}OI)td=T%G0a%LmFj@|&0gx>)i8{&&uS`qtLmGS+;{!t@P zc7^UFJ}Uiigtig=Dq1#aN#V@lE;!g*I~C9Xp-uwLx3L?s-=M@g!MDLYnQf`5)=zv- zz_w$#t7&>wiN?n-C(glEgLOpdfDAB}Coj~+F!xOkK*Ncb;A$9e;keO$;j5(03fs^? zNbp;;Nyv~1yT~^n!Ue5j7-HBdp%CT!@C{tVPUCcf?U2!jwy`i2IU52a56&=PTnWvt z+Wi`VqjADA2DwKFrIO46sfp1*sn4)-oHk|ll$gY-f!K>MEjb=W3lj}e4BZ8*&)5|o z?crisIeLo|2n1N112Q#QOiWbbJTM$-AIh8JmgLl^tieK15Y>VXX!8T>4Wy0N4^KIq z9vWMGBNl<0p(mg->0+`_6BOh}XkQ!k0%Q>m4ptwrNT*Fyn0!U~+j82t%!=@js9|ed zz%Y5I;Dy2Rg==XIUnLYrGATqlwwW+n&{t-*qRuK96_PzZfnv7sTSrQkR>NbC#Ca9U zp%chb&?vfx;g)*>ToNmq*cWkFtbN!qq|m?(LL3fch20}am8fJ4Egvcd_f+J>w;yG6-?R1vEm zdQQY(88?tF4nYH0Rq3~mPibER)%r2cKebIfx-qCpJcW}$V3Er$BwFSk&QpOyBFYMy z{vsvA5Uv^WzhTH;AQwo@vkha+m3PZ9z=yRBnCV@jLzF2VZIE%9dk`AJ%yB_p-a9C; z80_H~QmgJFIl}^qCTiEQ94qjsWeGXm(a_QuCSSz=Ym|qJ3njv^GTNBdF$?h`TOl^< z7(s9hQ-n}R2^;><9TAcGRpk*0e1Hv#^bG8YrZoFQkhl@ji}GiE)vRrjA7_+u4NnzR zb9m?!pR1K<6C+fl$47RN{ub?Olir7A*roOeO=~#la6tk?!a8gqJ)yK98c2n8KnLuA zX)h3~AbxPwwbuwn3?sE?7EoQZfUrMM6(MU$^+&W~Hxx$_}oHTU)%*+}|y{ZocIXx&}|!APYk#>k0=kR4e#R5b#**2WAQM zdo==koW4cVeaIhl4+RRooFIqxvm?(`*-`<+z)A*{IpSrG^E2PbSLow*4skZeU6bg5 zI51U3#R;)PM-WG}_?81_2|cv?w)%GC2(>O=@3OuUy@0l&!8m0SkX%FQ3JS~2T3|p< z;KMMTV$?f9jfRV5<3yY0h`tPo+(hMHY)Y1c60kf0V_B@%3TO;)*g@;0=NcrCN@Drn zI!3_=Iv?@3f6NP{38O*g9SkP&uY?4^$R0H-Z(dbSrI*DBi17f4kw26jr#xfHWp~yF zvH}bYJc#)GU8utm3<~Ld3ud>5O&Z=Cf5&%gHKsucf}}F!Q9O)v<9&4 zdz?}B3Rl!O+b{&%^btD^`8z~6!m@_!N~i2?U#UPPj$w7lmi0EB()gUb%SA-3flOq}x-s2sZirwZZYCp)df(V|{ zXS*9P`>-HIf+TQIFnTd!YSi$JqxPISk4%J^LHS42tFAxpF&=^Fn3f-5JguRvDOcf~ z(5*!4kDfe7U&FA7FT(~dG5ny2K@WZt-Vi;BhU^hKup&GVe9#*xMjide0X=bAB`PTJ z-KS6KKWredbqq5tXO~WyKYlmGcJdyZF_6OIeE@_JeT}i-Q9b}Za=Dl?Y4+qzNNon85#2lG@Xrwn@{lL<8Pn84PIu-;Gs%C zvjtNzz&4l~+fSm`1bNve=V%PnHboeN0=Ai}<~WTR_`)#QPi# zDm`vlp@NUm`(O=Z%|v!aYrD6BsB~Im6DMjzEIzOHps7;ZMFkumkgGUf14aue1!)3z zcM;rul;O>7+)P-z@!;8OZoEOmgYAyLA|iM51bVE60~gHVCQ2T@i9HQQeUJGGf;DPe zIE9PBBTfoct)L$u^fY3$tkn2qVM@R%farf*+1wWw+T3jHOU+O_E)~dMzn`_Zq38!9r z)Qg}N^K5P7`6j>nd#qguw;OCv!mYGg$70QjUOYru!%{6y>}k2U(X1W3ZK7eSmY=Pa zA8#BB$C@Q{;m4?j;5wk1Z=Y|iJua*@VZNTRaS!jmlNZm|UsN77_fE?AIXz!1zs3I( zz6)Q4*Tv(Lx3${a*OkrZFE*b%ds*A~R!p3@G?KMuJO=P?`^(r2{oW#zK&C^=( z^rXTj-`88k*R={_C^Qo0qEM@zJZAsR^U~J(<41+HlOjFG+O%_0+<&z8M>E%(PRG-ik^9h|jwpw_(RkkDXWF}LPv3LXQGVB)^&-2QMT1c^zM4<2 z=Fz=wuiw5J&cAP8&Bxu|oX^~M@7}q0cip=+_pa^Uow;{&_ip6gUAuRE_ipOm_1wFU z?%lq7*K+T6-Ma(#?#jJux~(nO@4G+$%flaE{`8mczy0S2t<|sob>*9T_kaGymD(dJ z#pg$}tIJ+A8jlL?S!XbaIv4F}yEE@iXSC{S*d=Km^#`MF)cZK0GQDo}GV0UX+2C?A z>_zW;Hx4bStHh~)y`r&Edmi<8XWWlwo%XOjbz9>rp07qpDc`*OCi-weXDY`?9Tl~s z$*@gVgjXKC{PE{s{`U9q>R@!;9uB(EynVK)q&kke^+xly`rd6*+1}-3ev>U}0j7xd zETa1DQRjj&NUGhAhJ#TrI-B;|@1u5~4v5Z%CFSZXpejC$wULs`?u?oDj@y}FK0f=U}UFLXE-)0^UGHY zBzyo0%|G;dBPP#$I_S;5TGD~8IArX)H{|IssTf(XfPwZHZF6&e0d|e(%&(Sbjx@@e z#Hoz={&mqmiEc3|8AmnUT@Gee<7w}&SJ1z%4bnI6DqxO=?3?fkgM3BxdP!n$1ep;E zcH`NdjAooy#Nuo)kC^6Hy%}#Vau^BGfPXQsN8aT1+iyDt&?+3y)OK7ECD=D3OH!Me z$VNqy>Z>oK_sn7!lOd`!bbpIn^mkV%6xCV6Tw}1Li{?q^LKmRZO z%m3!T{qO$!|KWf9pZ@3n<$wL({`ddm-~FHe*Z=+h{Ck$v^TFJmPur7=|ERAms{Ga6 zKmYUBe_@?l`Sw9!_2J8Z`R@B4e*Ed@cfb7F`t5I@|NgIz=}i}Zx%DjigVFr|=UPN= zay9DA-9N0YEi=w~FLGCt3042{kh+h)9~Zsh^ZLfR{pPMi#CP?^i9;pfa58k2{WlI( zkHf2m!R}4x?yTK;@4x!00RcL5yIXG5`=ABV9W@+l$b10)7nB4Qxh(X1y{;=Ax=wpK z9kkDTg=ueoH68IOSFbvle^)N8^S8m@BmN#cd@dZ8T`IBZkeXa&&%wt#lwht}f9$sE z{J^T!H{79a*Il#jdLKK(_N6WK1u;@itVZh5Pm{uIa6WR~@er=V-_v6q=&65bU-CF( zfphK2n0YgvT=d+q*Pj47KaAb{V%qD`XIE4B$m^b)4L-VA@0wD2`l5pk z0@%A8h%X#bui301zQBJFqS_vYkDs#!P@_~T6n7>j$oEZWdc>N+&6H>>(gH0 z4F2wYFO2fAJ-dJ}Wk_eeLR@9v`ds$TVA{F5><@b%-Qu>;?x<^_lQ2iK&Ui{+jz1I( zQ<%!vy*PvQu)Z8RyK^-i-Xv8?Fv?!^pZA&leKt2*a)+(0wf6Drk9LEWS>9M~IJ~}W zO?&6PkG;;l`$i{OZ{}lGuzBagva-(*Q*=UY$$SN#h_{{6*J2@6GO;GHQ8g+_K89lm zrm_C_hP_L**k{vr+(o9Cb&kMOLUXSj*yYI7r_+(9rQI2V^ z&$7+}8q0EgWyZO6mvJ0GKoyKGd?vU)qM>anO;79T;Pkt zUZLBbei)24HdZ%Q*WAU2)z0Lq-@Y6SZ`|y|pg)J?C|CPgeDgfMVVU(w>ubr~dUCgs z+&v0+m+LNk%&o5Rtp&Lcg#O^T>`;1E+`EUav3EeUO8q!_x7kI2hu7`t!};Ze92nRf z=y~C}4%$ZDtIJ9BVKBcC z|Kd}lQTq}cyVpa~glW-$TtFD2TmcNX(xzwz(G7tHvmPUY0M#2^52oXhjSv-%e%swW z+H3ikUMTnvTYBL)4c=tTs;!&B{Qk>3?(4!v*F|3!RuJC5{`1<(@S}^qjlP~=IV*u} z9&!|F;h|c8)xy&r{j!^nMJz@)NVn&s_AnBcXEQcx2c{c57-%VAvULZu_8H?DdZFV* z(6Wby)|(&}b*7r|DhN7&%(&_dQs}Z+j#z~Tof-F2SB}*R#+>`N7I_fESrArTt93rQ zYPDRe#H>Ge?T)eIEhMP+8AN7&$XUxT;+~5?yBZ9^>ZmWSLxir?3ZPbd-X3kZ zZnx{U!1^4#j}oDu2M*nCSO4eX{q>FKbZ^)HOQ)XA++g5FovjqGGoIYoYQ6Ym3`&17 zfvrDv7Z(>x3K%YdSydJV_uJHf7G@#o&nSm)S3Z+{_ZBc|X*%U{ocRQgnNR4M(o&TG zlCFKvcq3mM3D8KG2RF`Ta~f$A$u zz}W^SMFh-9y)YQ1OXVr|nC=&)W;)IQnSL5-gm(wK1G*{Q2v%+-kV4X;qlVqkt$CPr zX&xAQG!IU9Xe!Nyo%xg=7{P2V<$=EF-P@Tnb1D7S2zjyFDG48%sjmX&1A;GF2~6eR z0;)4VE`fTazLhY`7CB3g6rW(;WIj2|j+JucrsHJ%Aw3v+OqZ$WbeTM-zQ9v1bM||r z=U&czz1H!+a+Y`L6&Si1jrYX?~l$I(&wU+^_Z@Qp3_BG zdbS>&UIuUUm?=Te89JgInKCU$W~{TwlEW1}W(&+BhVUd)1D-SAQ+loqS?sefWGd8~ zrc2~0_n0nUsoCOzq{*WmbMJR@1$6QyupTW&2{M{_pT|sxke(?|>DlIdoNQ#v+sGBT zajV3QT#@9+mkE!YspRE)uGGu*Tp{GhmeTJIE>nf*FNnoGke3@CI=yPQH293-(O-JlGT~h<3E9P@#hi)(& zpP#uwx&sW9F312wH^_98&!xg(Inu8W&)b&_+X5RrAYGt`q^lP4Wa~B@nuZdNCR4sa z`{H#nWg3_PhXq=a-4A%aszZcelw_&8dPuHdWy;hxnzu} zv)nwP(A*o@X+e6fgnR)WW3mV%82FM!7XiVR9I{we!<9U$+`<$CWNK@WO#OVGY|oE+ z?djQ#RPPJqNMPym5@5Q>1ej|$cBHu(bTg#}Shmyv%$6GG%-8kfd_Km+j~*Z^2O07r4I#UmpFdax}ZOW^Txu_Vcg zPgaa`^bDBQurq*meuq0%m23&6BBR*e8Sg*$~O9n~@U(*v2v~ zvIKbOm?bIcez}#LUh}M^48!dq>A?(nviJcfpTcRL(m0{V44D$&pUs|OOqHDZq6CRs z-^=C5XZUM|f1mM~sgjYe^p2$8?+WLPuWau=H%30HrIyW3El} z{G#mvnOazuOfd$@mo!E7mae>>Gvx@7DaLZ-D(qot^zIMmEyL)vP}?zu*GYN_7YIB) zNqbnNJJr^MGvSgy*-jWGg|QEC95~lqxjI1G1kXNA5F%+$uE#P@gYoU&neo`<&07 z`KHDpQ!4>7Z%E^nDljLxb#jtgXK>lhjE0^w9}pn(f$`|~^aps%cL5nA*tWXbw%WwPuRdbLPVWWOC&2GLM#d z1mig~lzPs5K!EHApp@yp%hTG90GT&(IGjgep7Yr=RTBDUsw6<>jjdoa5iji<0q*l8Bf{=_V^gZ>T12>rL3 zN*v5^bHq_D1Im(<0mIO56u^P5R}{#(f*xM4T$ffprAgme=W^l(K$VdC%9;NLFJ!^y({!B$_RVRo z?*<2IKZJhra{XnhtiC*-cqavtue2YMG_ZHsxtRJ&4$_sudqmY-G)n+=0kzB3+o!bP z;^fA`M@g^d%DTiCI$PL8rDcl82f>QZ*8_N}@Ij{NxEtb5SW9=pY;GUvtnh!vPDjE> zpARYNy5gh;pWCw`;-&n1B&Lx?x#>PDKTSFW&-9)Enf3}Wn~dD2WgqEYO?;WD4&sF& zo@>gVg^qId&9#Q)T#ZM!s%D+B7z;i%N~(}6ZgPezptbi_MXW0_rKo{f0Iur`z=zIE zz4u||Lun;#aTHurMBq$oNw7Y+1+^xH{OJmR5N;-)xRsI_BFoDdQ5}6YpE-xY#)H~) z%1%u0i`fc3A>ea0v$~_L(uNQG=H#+z*$n4Xz0#lJ!jlxR=&e?(gYWVb&yx;Ll-I)D z)LTrF-_>W=_(`SNb{reemB6%INn_~M2;ZqL^9n}kI#o{ZBCcB(M-}<^jmBo82pJBt^yxZZNSi_qvV~k{OG&Hp<=NTpw#)|!g8?g3jXPlU_r+H~4=I zI<0iJ#g|*HK&m?z?+xoj$kTZ4GeD?XsT_mbSu__cORj8@+huT3@VLZ$ah^!udQ0@N z?cYj&MBpXg5OIvNe=_QntF&cU{-Ow^eO@yPY`y)a$m5a<+8S~j9%hFn3_2^H27Zn( zMINW8NDF^4Q_02|7x^A#^Cfd7&gqM6J0?u7$au=cIju~~m@6~z4?f?)%D`r6eipY{ z=^FYx>A|>OQi^>nLkn81>wGbmDZ{T^(QCc#pp&00C`rI~nMbnXiHEo3{75CoXR$Ay z&zP1%DYxH~L-IA)o@T>DK{_|k)UdC|X|koJ8iX1iH8bOTIJFvh(F<4X4uc@)=J@R5 zs*m&bHFeJzq>Tkj6ATY%UEzw3&##G|>B@&>Yo;-jOxsTx2Iy9fFP0^i+;2lKMA9#r`TYx@GA*n5>o2d$T*}!mybG~z z%`%)%`l~oK=TgDGt4^<#ZCwn^G%Nr=W(GC@m%+6AVNw5(k?P$BUS6f;SlsBRgO4rn zZ8T%^M2`hB&B%phzlc!p-^bI|aNJH8yDSU4CD=9q@wpjwS~f9SR8Z`OQ|Y(Ct##Mw zlvg^MI!jVF+-z1}F+bTQsgKe8lvhmgvm{ln57ID{SGatke5BA_%jL5z&BtiouAGKg zJ}J5H3M|=t3Hb4Hu*BmfxlKII!EN|&!Io8n!CF!api3$NbV(h6E~|pawp<-F!s~9X z$-X3INk#O+l9~v-q$*atOeDF&{S>(kT-G7vUDhK4FR6&YODZDpvQ9x|sCkjAiJ;4> zV(=xMV(=xEG5C_&`0|r}5k=A${c03ylL0O&lLaSr$%2!rWWh;IvfxF(XbRk_NCvp5 zNEVz_BnwU|k_9If$$}RZVYF`5BMnE964R@xMYq4}K&-U1zX$f1b zPs_7CcW3cRmR`)3XQp}Nj#o6@C95Ce=@*;3ERELM^1Nv6(p+JgUNm-TuCQGHacv;H zR5;XLzkgVF(c|m_o@uV41hnRCrSxNq1~3rDTL}xCyPcGv-R-0V^KK_4$agzwfq#}Z zg?B_Ln5kQUQLSsRE$M@FeipQAP;7bHV|NW+El+#yuC0HUr&2kRg*{1ndQ#|q(OLGC z^p8CyU1Cp3FZlPgTy@sh*`+j=BEj@S8A#wUQ+Z;RG6iDuF#BlF)nh4WJdNX2ROjq9 zlTrGQGJPOMNH$AgTaATIz3IU$HcN9G2XHI5acsAs(8gtDh)6Aid!gZ0Dc;vM1rA!P zzQt;|maLgdltGfY?-MgAsILsGOs9V)eN7mgFchh$)@YF$4@*e4$8>r;3X3;lNIF-3 z%oe%xDe1S;BNV=$9-R!xcWMS?-JgN@5y^nUXaty;9s9(hf?_*zp!B*HUS!?PWzx!+ z12Mw4K!Z^pWQ$nt%?xcjyJjU>RM7Of=p{1143Z>6ox_f}(Zdhzn=E!*m&(!?h71|h zX|>qwi_24zDBW)!7;q|^Ge)H>G)9+OW!%DyWS(aWxRozV^=vNdP!mw|b@eHJ!Y_Tt zrt7j4L$GBD9?zDg_}RQP#r*U`kNZ0L%vq4-x)@C2?hiekg2dgN0)45A{wPb*+o?>c zu}VSHU3(jBy>|<2U3UxW`|TFk_rq4hf#6y+7us^{Lh%4&DoXJZV&TNxFf9DqOawRp2G8d?ufcrmHZrOK8;wk1Q9HUxSF z9Isf6FTX1p^O2TjZwYqSEJ(Njh!WsOus_Y?s1d60F4si~YoJ(bEvACi|_VlBS z`<-J-l;gr8%c8`PA(w7t%3i;Cg;EW0Lp8-fT+ImBHFtd}*v=B5tVyo9Kj2>)jN`FB z1Wyuls9kAFExMO-(-qsjdF5lLXEEEVqy88@=BoQNk^?|hL6x+?^}5AT_qk_h^I&)$ z(=+AaZhx@PXzLrPa!Oua;ETqEgfeN#m-ECQK<)YEOv%^&_PsXL8r3+3(e+xzE(#8dLpsGS0mK zAom6k2alahfW>_D(_ZwzWA~RcY34x#$~MQpoUKLVfm<&IiOofbp34hg*1Gc27@o8C9`=+x~|!si z{z&)cmla&G{Nmlm_r?}r!c~cxt>_vYonj9S6Pw_P2=Hn9_^E`mN1XF9dnS*=uHU-9 z0AP0X83z6ZJQEp=3q6GM~|f1 zqyLo?k3IU|kNxQ-u0Uq4wzBYX(M#b+em6XtuMWR4Jb-~HmAH<2mG0J)h1$Wf$C@hM zCU|i4Col1-irvk4!fc!PZt#eyH+ItqW?rZ6XuJM_UW`q|!BNe?l#rOTm5m}|yR~EI z4NIF!I9zr8nSDFJ4pt5muqu(t>fzTZ2p_8Z?`PjOxK$vPS+A2I-rku%9RLT~U`)ds zlHs#p8@H zN1gdccaE`t%!>MP?gl&XQgM698Q)&pn{pZPb55=!4wsou>vDEp7nRxGezj_lheT7F z5o3@3NM~p7Qjo#zy_N9ExF~~5MW&Y)skDzs zL{RT-VW}$h5Ht9EaGBJR9DBWa+vl=c$MHZ3M6`Dmg7;o;`xXh!TPApKZ-?^qPC*fo z<5tTt_k0xR3fUyy2s$866dksG>dOmII-59Y84yiDd?-QuJ-8PO1fYO<#kp;mUu%)o zAW4s_kpM7?->@2pZT9U;O0onk0y(&kNjKOIjZT$>@cF^#Hfhn9I`H`(T0I1htKjq4 zT9e3#_Ng$-niFae!ukW(u=6W0VTy2Ro+3?|hy>|9{=`a~G%XCeyw)2sp`Z&?r zUEluJvT!Fp$cYbaVqPge zW{HnW!sqEps$;dW%IIRx6hQ0_{n^EyI(lSJp{@Yf?!_OE>K!f{cClx6tOFoMhFuM$ zco-J(!pFJr(Jj0;ObA;MB2y67fl*Mf2}rXpAMIk##CuRg2D=EL4)XCBLKKD&dm%(u zV2c+!;X@dO5Fa6qiV(*_2*D2g4+9&*8SrZ%3c{}?_L02PD`I=0 z!*5?)kN`t}123m%W`O?%%#hoZuoWsZT&A`bFZ>1IrYFK5-svOTm4>EzjxKH%o9J4?dQTjY4+H>*1K#s^_xs}6@dEs zt8(!WV0za#$o~pVh5c1otsVBGtiCP&m#oZLyO7aOWIvO`RMrofn22^E6Sn|t`=wn0 z`K^}l5x+wVw5?KCGJ&La0l+N)+azgMKz>7HM^sK1K@=NHyiB+P(gRP{axOr8yVYL1 zSC8pInKY1g70k96vb4|-T!8pqQ0whpFX}{kGC8o+)9pc^bu zi2VYv*y^z-v0v50=8i$hS>kM!J&T!a1Zgx^HThbS3!{t#&2e8$}sUWMrBx~?S-l5 zTyol9P|2CSb2P1`BM4YWKDaX}q~o3=>mp?V#{X~`_$ph_vW`%e)VpV2=nKo1&G)6w zxcWjJ1y7Z*sJtaCPx(^2fPe9Y2xTr_jtWw-?37(N1sei9@uuaKkMEo4a`$i<6WtJ{Fnn z!07I$wqXIKhG{x7`@$6aR>IL$W`nchO82I0PEe$5LbArzn;7x2hX!?wJJaTe)SNJQ z3pv*IuiZ}R>U^3-Qwa-;7V>H|YxjF*>CJf_tgz~5ncXM`XVv4Z z3U(x6x~LASX$ofP+63d${b^9!<&^(&P;zGKh_rF5hJ;npw@fpUo+(zc*Rw5p(12$I z5>5}F0VV0vpe0qlnxPBox(-PuA5R8sNi`Kv%Wh*dTO`JiR3@x&muv^R4OzUUnSBly zIMGlT;{H)us-(q>;VaADyIp|~Xj#7a!6k{QFQ!s&ee!l%>P5{Hby|xP16nA$h=$LC zi`Q6^An%Gep*JG3{#CE_zBe5qrYeuWZKXb9D3waGJswkB>+_jf(6#vh>uS0neQwcz zwnqz=qc!UKNb0F_7rAVYNGqLfK5hja2X|7T!7z`tVdh$js|EVmoNQevgvE9DB8{z! z6q5RxBu!TTPWK|S12RcK+RLVlH2ffb9{pL05YuM(0ENTA{|By#ia8D)(YIW1|V`IH>TPa~I(4(00hytZ^58 z_&9B|p^8JC#+TaO0pCY4A`zCL{oj1g;MzNtrX80vA}0q*%NEGLq`4ZOW<~;1QWf$E zes?t~K2f0zE-neRuWb-IJg23jkhG7-$l`GBesK460@Ho>9ZG2e-TSVM5M3;S{SWTD z=C|%UPMJXU>7W(_C!>2%#q|e>iM!}EK?(&|KOpeFKiG_iDywtP)ls*SvlAY7+m~nE zc6c~wb38=2FCLbnTwa0j8i;dWD&#QI`! z{TBc_c`}b5%bZ1kI6dmzL_Q8Xh!WKaNzW+^b^3e6(OLeBn<5#sH*}n3**l-o5EeZh zapV&Ydzb-vqCw30r~wj%6jiytq{4o@Erxr@x!vQlgoh8092qw)lzZsMQeWwV)MI4% z^QbzyX4>0H4ZaFT4WUm|+^mKIj&!(ER2q*IMM5V!_321PhiGky8DG+- zG)D((Q0D6}0;&D=shio$J9pMs!8(qRu>2A2 z=JY7G0Wz;T;n)G*Xj_2(2jTQUedM>_h&Mw;Mc;iLS{H<*`R%tm90%n)hr+pEU5#}1 zS>a4xCoiMUx8DlNe{mNsEho|`O?U2Wgvv4X3br>()@O#6Eqpp_&RXbCH1u^UNMl#< z_4)p{-}o|7Nk`p+f+gs){UPVREv*(alGCa})qIr-ov6LqnM|TgslH;5{$a)bxUc4f zBNKn5Yl)3!%~Y>imNA&hV-Ws`^75?%b*O=0+UbX;c*I%09)E zVUHc>w)jF<@7&qfsfrv0F%?>qlOVnE`5Cy&pq7sv4-N~P?(|AWH0Of)g@GOcRYT7HJRy& zQ_R#FjM&YaVk7v$LDYq!f+9Wq5L+w!t4 zYqnUh$MM(VOhT9eJ@JDF3`b@N#vN=JH*zNAPq*@nXmQF0;^Qz53vm7Ye3^pDZ8&YL8zk?s%6~QAs-mEB%X2gPje{l+10#fPfT5l zxg~9y-36#LSAy|tkZ!Lt3_)z9NHiF_O+`ncJ$7R-aTi{gqgF!$AqLoTOvd=u;Igz? za4mmhFnt2m8mc_IF<9&j12n!hIJ#&2G~{YzE#}w4_MBy~TJqt5y#dDZ$S>j|{1JAP(9XrLJ{-OqKFg0BL)cTHpfE9uPA15;@IuTO{h}K3 zqpWdXUBr_3zcypsSwZY@0cUdS$=#K_8fK9xEI? zqlngF{b@^GqT5Fs+-kHT^8UCN^+}tH(B*r7w4?h zXys9G*mm%^!(>YguCL4s%&lQd$%HLWdQHsT4JS)=-eZ794)y0iy0JM?Qd!<-i7{EI zeT_R!*n>Xk^?C|oX#ZF1Lp+xElzO% z0lr!P#n5`Kd047MyBy`+sMhv>(0`UA&Je#QaC{yl6F&9ig`FSV&+Zp@&;8ADFr@p7 z`wRcRcPnn?d-tvTmVe*7!Vj+Sz57@9)%Wh<_wH-=_4jn8SS*l=P|9$? z6Gs>O^MyGfkAJA^K$aotGRQLk;0U%mWI7yep?T+EiX%F7Ja;M|=T*g~+8o$#d>9g_ z5Qqe>0Xq24yXpoKA$$PW#qpsU<0LG_J`jbVDx4e#tHL$py&i)Sb}X=K6*Xl*27ja3 zQnW2Vd|vq2D#gSvFv++&Lfe@8ntl7=u2rQ~_pt@A7wBxmR>KEs@dO%zixJ7_k=e{| zSd8%Lm*WLNW<+qbT}}oastm(}lY=L!^y!?gGzDQC;>2Jw-3&UI`#my)-UCg>OPn0k z*LE0|`PNA97^@nYTV6=l)MQFRS0@b3phL&nyoH@5B8a{jM@rDWv}_VFoG1?$`3>N1 zV0~+%usWIkH0BS-J|i_Qrm#kl=wAD!cN033qB5)>v|~z3!}^gJJ>r!j)Fox`o4(;Dj1G4V7&0M+cSh7@#hEH}VPwP4*!AT&YM3hm82fJT`?U1TFA>DmQygfc8~ayiUu-O~gu28c551IdELM!9zqpPGVgs=5YN5r~eqsM$b6XrZN(C=JZ1TKEOLTuUg#EkB3`Y9YYm2q8KxP#XZc$^<3xm^Co3&~ zL0n%hTI(x83|=s8VKGWNJRT%;LGd8QptM*U!)9p5=@29QEI#lT-Vh~f|9*s#o{g9{ zhVp95``!)GF5-D#;?JOu$pP=nAwrfe@q>qsx`4H0;8-YKjTv(gOS~l0@-hO}uMS6y zCsQ;$OXTXYNJ<|N({!L zwE_6wA(KUlk!kov%`u?odc)yhqSN3xGhTn;$A_S2ptWa(a&#!WoeFOk9S?6e9*+Mj z&rl6SGrRWyyZRh#6qCY6XWMhwT=)&&fG@_Br)&6%Kgr%vK^?p9ffgrUm*i#v7buh^ zp>xK*XBP*{55KubpF5l8VR%U5_itAZ5V3dE~V^A3t(9J!_7C)Lb;g3XegI zW;%onc&T{q$7hHina^5|>4$tqEjwmBn{S@XG6nh^&7 z=ymXXMQZxwEt$|DW?+$(95P7>mxo}aWy1+xoL!%1G7b$(PUPX3dazpsPCRJOcS0-I8nq?!M64lZ?p z{}8I2O!Y9s2hPu!YtKaHij}DzMX3BpZ@+Bz%`6x+Q$&kIDi5hrGFY^We5Wh_a+vKd z%+wbX%A|F{5|;gBKUcGbLL>9`6!r9Vcr$RC!77ynde0>PYRD%QmInm7%yI`Uu=B^G z*p$5fgtO?Mxs}NM1YK=kx}SoQC;ZObPbQ-4R}A1!iT>)RS&v8NklB|(RPJFAfO#z% zZ%`4gv7YSLpVtd7e))Ng%a89?UGz`?=HLFwWr2}<`0(L(T)xufp>FtliON(;Q~t1M zL7>ocq5(9;%e&ECw}sXkBOL~>s|njS4(en3Z zxV-!azsEPcNj+2UPj}tbUDxFL{I1)*i~923U3d4s`{%#W7(W07|K2KnE{zpS%@q4O z^4Fl7c!s#tA1SFYP6(IbEU2H)34t_QXa0Zyj#7%4d~}W z>z8|09nD1D+}GDjlZh)~SvViF7og`V>iXG%>^-_4TsOx4;h=a}ue7)}s>R(_qjJ({ z9URsgHRlr=6}l-4SmY3zHKBefaBN-u?6EfBowF2j3KaS-Jo2p1bQF zu0)14VLD>_?nf&kJ2~*^cFvJ9z60*~h2_Bu%i$gn!MbZkATAGktI>C!*!-Y*$|QYh zPAQ^I5V4oh?(WgvgJ^rd$;Af$K~xG|@gUlZ8*V-nd%kBLM900eoxyxn4kX{mIR1_3 zp12nONxrW>xx)WOT4h+LtTDvd_-&frBf15cN?D&!RI(6^+n4g@Ql= z6nqDIpxuA(>+JnSSZdn%XRY4BUB$ZL<#(QYc}{(cQlx?7fUk74`rhpVhzLRSz>>G3CU2NGm07=F{1OsJD8)`Z8V3yMNn6 zf7-2$>Oz5+kudy0RD}y2&5sA&`NhlV`QPt4Hocr*!RhlG6aKjEVI=S3F_`l5`N37R zOz7iAbA1o7jH$&hL!xvyLcgR;YlnOeBcI=82E<9dJ30^O?@C$Rq1=13`%!m%ug8y; znHf9CG4(`l_4k=uz0?Jfe|2KS%KymaatIh=GdUyaY<12aYX9uBp(_~-ls4~D8(tSl zcB`{%{av_Ke)%bFo5_gZ1BJGUe|30IeE$}w8}1LKcl19MDOUU~;el+AYIqFqDX+95 z0UAoE%eTC=t)GzZh(EWN5)?n0YD3rFa%>gmhD!;Kgs#`ZPnIzlMR6{uO zrhEY4JoJ6PZk{go%X~rWty=_lIO^UM5~4@By7s#Mp^R=Bn10G2omGrSZ!}Yazr))G zZK|HVAEqp-`*rmQj(c*J-mkwx%4irrv0oQLemr2F0JnSpu`1s^w_?D`;Fp+g#ICfA zYQ$c95*MY-;4mq1U&N2MyCkIK8XKUYnekJ)5doH}W`NZEX>q>l0hWD;MmnvY^7+a3IR5kVjM3j9m!#XU1#nRddxa~`E>K zY-w}wLe1VW0ca+gGFC7Dak8?~cyc+O&eBPs zs(eW@pH9E2A~P>k7p{B4sJ1UG1>MEOkp4mg&!^j8*A%f?GoDt}u}Hr7=gxe6Qw4t^ zpZ|ajjVPZ@qoSm2BMPCyzN>e=OX&>GNry1)O4Y!+3dHoQOV~8xI=Do@P(Qle`QaT-2*f3?deC zzc{+e+B_6H=VX6tFl0vsZRuHT0KB5sN`N z;FSD*vuU25WwI|>I^y&Sn{tSP7z-^ic(F*;M%DSO_5q5>!XKLD`?S|RzJO{Pp^F)* z1qC`dNca3<`-88Sr${q*81Fh;ya5QdBAL#IJvnfB43>I@S3J|^C&IyaW-BeKnu)l) z-eU0$cxaqOf-kE7%+=vU!tC}Bngv?>2OW>!mZK@LJhRv>u%cvyn8`kO3*hefTzmWW z$7x{P>oMDC*#nVHFlDjarjkWUScl-v)4=R{kV!(qO|6f&UQMmLx6%xa=nK}ZUchf5 z)~se;ZE5%=D1Fw=8%4uUGV! z(A4%Zr}{CM67%uaFUqFX6Vx+rIo)_6BjM>;vk~A;(B^P= zi|8divbe?cB!-=4%XOSl6wMWWeEIId&%dBq{maT%pMUt~?$*Xn+6LXe3Xm=NL+-egGi4ZLmQ}!)MRIJiN(OCae(>A4Jwuj+E zS+SDVmZhku2eJ&v#%-lHZTErQruiOEhG5aK9{GD~r@vir7as44gJaG)H3@A?Yf{~ZD7h<=x7}ETO z@7QgE$>GK%J8JFaZ04tJGO43)Xv+)1)!4i?t(!YWRhz1vxO(J@U|gNK3LG+P>*zf0 zJ0@%!i;WhzBwM`RyO=W&szNP0oY@Ik)6*7#567G-43Xm!M1EnKxY0n4D0$XPh*M~m z@1nS)vMQK>u*`AU76YlUE{$uuz;=zRFJ+OIXoMDtiJ)M`)Pm;-YJbxqGD8g>GaoD~ zo)6DKQ|dlH@XP2Ib(5{bZQ?8sksJ*!nMbg`_RFs0wMQ}^>N=Tl1+u;_2v^-dw(B!p zH`dU`$?mAEB-0^&{5`YHqo`>tRzfM#sYZn~M1IM256q`cd-}0JXg7$DwSZmZ0za&Y zlsqoFqpeLpumj5eWB(I7ptNnv{q~zV0AQs3=JRjA{rLIayLY!?yua(=;v^Z!?ar3t<@{#R%O4rr^ooWcA)JWbyS;~96VYMe&ZbSb`e#V!4MNXLT#cU8_|4X}T{;>UX;&KJzHF(A&?n@5LXuz%BB+b{tYo zW(-TF3tki2>R}sdysP=Z3z9hg;X#F8qluM-Gr~lIpV50hdoJCt|3cWK2XLY=wuFW- z{m_i~d^+R@)k2G{8T-ZEI{s#3_#K{iYmGXkmv)Q$+cKK(6&tPUexq_owpO)V*>ALp zTRgT{k=f&d2XmJhM;p(WQ%*-TV2$m;l@R4RSQxZp$~Fh!7XetUw#`ylWu$&l%thRq z#$Jmq=Y1Rd8MW2^F*uyIVgtLG6(07Apll6a!6}v4M{IYpgCag$piAZg$!VJsEBxRJ zba@uv6FbuqXfMwZCKbucaDqS1VUD#f(U-q;O zXYx~J!*uXs)rM1czuNx6ZmPY4+MrA=gDvNR#m(owAG_UfZ!YDar84_I;Kp#G&&(Cn z`yuI$xoo>nb&Foem`a0B$LwLigHrIZmz%kl`y&onR!FegDvMc}{LSpWd5UNd`U?4+ zROSL!^3$5LpLINS&0AOe8W;lk*QN|+E5UWo*}&@`yo!)1EZij-uUUS;ZN_^f>x@01 zbf3H58DP7}JJ{c_#`OrRJH^03M-jr?2;Zgj=%M`}BmFuf?sZo_4t$3FzE(2PZ@Mx_ zFk@`S*}JkUyG1W?=PgZikoDCBUVDwLhQr}HS0)PPNJs-0b{^`gk=0Goi=KWkA7}WL z!!ok*L9vYdE)CEt$v9KgZfDLG#EhU`ap&vTHw>gC`tlkRTfgg07UAIy&-Tmdux4JI zv!jTD=WWJG6F{Gm6VpI{Yr2qAZtUby`*dX@;5MD_8FQqD!Bxe;b?_qmthuWXyrUfn zFvNW9gG!T=aVQ0-d>>f398TQ9P;T2*y}uY!I~v8h*9bg|AU4unVoCBQ zcb{4D7yckCPB9Nxeu-A@sf2s@P2B(S{*OG03u3d=)$H@tNT2BnntbnG^zFT$@m9fS z1&5WFtzW*yC+OX(UVOLmQ0_thpUO>@rJ~;5O93bxx0n0N3;&1J@I4Xw0E`+k$<5{y z<+_QL6U)^IntQ%*FH@%Pag@1w&c8$EfJi7vhc0MT@Vc@ej#`TE}FAy!D`s&n+AS^ z#|q*Ct``K}Ab<@fJXi%V#QWFCn`DvB$lF#3wIo&WY+1n;1!dTt%p##@gpbg&R~L1H zIt9N!*a!1s(I%$IgYg2wbx&SDqcaf3$mvb9KMbMIx(;=MeKUw&m<17;7g`dWOauy=UX)5uE>WA%5dsar^& zqM)VnV7w1{xNa)`NE=@|Y!!Gk{yA_|KKO{qtRepvUVB)ml-8fJf;4os&`wDC83Zxr8 z6lX+7kFm)SzdV@|%Izug89XRHGndBQBriw4!jyV})xp?cak6Zb>oDb0p7=563oC25 zgD=n-0)($j*EI+dQmx_U=Gxc13Af~=%m`a;TE&JoaJPKO2LcP!%gKG|5I;;rfkwH+ zZ|k^txQ`nr9I+DWl~NNI&l2vNHT*nxYeimc6!*);!?JmAX7gp+!{t?Bk2%akqH5jaac;dHOhk9n|X8lU6BtKCB>v9MV8G{~uKs zBiDF^yDyK$EjHet)D9~9$t&rn7H+SV{j#db*p)Sw#}tM~aN||0eo!gd*7v&Y%rqFMLA z+?eJ=5aasv5`5kTKQLJwM5G&&(sVQLw&l7JZl~w|Mq2~@9SH7k_)xgf*LiO^IP*7R zUKJX{p0p8*n>ilS_DN(-$g3)&`N+?ewr=rjVI%A}G>lN@;{A|ztf`1QYN_j}p~Nc> z?c~myiRdF{9eUv#N%U}k`U{c8C4urBn&$5mm>r1bt9~iY|MCIf7CPAK%&)|vj@8q3 zteTwDgBXM`FqG5+H%Kjg!xC?QFKddGw4FiWFeFom&hcDlRIf~1r8Pti*49c@-`p!0 z0)!*>uTeUiM#i|u&FCd~@Joa&zQqdzXbmSwjnUAt>B_^kzed&a>6|%eGA&#v`aqZf zUgv8A&U^UKMGtNp@-%Wm>|bP~>y*tG7XSK3w#qs@dQd;!fM7lcoG!dB?wjJK07FKt zq2laOR*)-X3_BnZgB0%Ew=WDEZw#ofU!EChxLtD_}!A3VD5UeAl?$1AP)m~$M1~BQG7IGa$X`(W~Q$#RsE)j>Y>)voX>YiQdr#F8% z5Poyh)$9DK0@OkeJ|35Fm!JTe58><{-F@hCogqBguf;4AfS{KXv;{(Rv_u~kb3&!i zD=gM_cQbqV|FZY~@jX}n|Nmn@&3<|MJq%9_^Lt|$hGAnEhWSl~(PS8g`8^UU)f7@x zL@cRRq4`lQtxQQ1p-_@)BovjR`kwPRw=+DR_h*lD-j~ni^8M%ayrkRnetTTc{c%6% zaUMUOzaE>IF^+re8GGy01o>05bKDd+Qv$*A%4cb%&)7?UbF567ecCc>f=rEFjW6I& zOKlt|xIa^_ac1TWe(j3%{B|nhDf^xrd&0(x|9|}S$!vb`XZ(vXOlO*#kw2NjW(6Mu zHc7<(ojEy!AFue2QrS10*)w*)YsmIHHn6D;4So${G*4jPF=k(SYX zYTo(nZiC$-HI>bb&Ye3u#XlzP&Hs4Z*`IypKgH-iek^;>k-e?S?yb^?&BtCdr?7wN z?0sPNPA&V`y&t=n=sR^5drNqnQMdjKXZ@C`&iYZQ>~}BqBU+}mFa00;H4NKfCh?yG zn_o0#f47a_p3U4_`K#d3?9C?gJ2DN$TV%LtUPCqp=Ff7QNo6mQ&HGfQv0rYR_qrB4 z9J~Bwso7pC-*~%Y+yTtnZQTtBW6W5#PqUZ6?1l|&Q}Flk*jsVz766o8pZz73`eTgx ziDTGVc{ro^Kk+~Q|FP_VIf)fAYbyJ?G8bH#%QsNWZ!MY-yLfjRgT}ODFK+mckS9*) zS$c~g_IiU~ma%G_;@vf~l`+eFjV+!JPEHn2?liG*F` zvY|9*ia5$PGmr7k+2;2`raRe{Ki^G}&pg3^C_S2r7aRA0xZlWK$=R#lX09&#M(FfZ z_61J%9sujLSP6T-kiV#6cW&jE^XyLK)8~iIC)^ z0Olnb?60n{AIN9kxd9ZrEf{~xiXWBlJY%;DlV$_<^FuYEk-(-IGk3_mw_l@61b+ z-MFmuY3Z}OR?P2nCV{)3#@umBpRqfoNtE3}O+IaQ=h>a7PWH0#m^THFOH zj{RBcOk+^GF()IF{VB_MW6)Tmp?MTx_kraX2C&o2B!6-p!cam;EzRr?9hnY47gL;ktF5?1w3= zC+ychnW^pD^QXAbGVM+V7MO4Q^SuLhY0N)sV0SHGXKHpGi$GrXh(+@9NJOPqO>y1$ z>7Bo^g6^^pRt@Kf(nDu^!&!Rs7j~HAEITcgjw8PwEe-fz@nhHSJQsgn@Ic zdfLy3n{C8p83W>te(~lXk7MugvX50uYcb+xv11gwkqG;_E`OJEgpthNMw)3Pvmep% z+uh+a7Z1ZQLfJo0_AiY6;}5Pp#>4Y)UxVFbT>N|N>#C;BV=wkE(5Pyp85K=0$H%8q zVxUjOowa=`gf;akpWMR7(X^XSxyx}r!Krf;+S&BrHjrjK{jTpzD9i+ntcB|e@jmiTxSE%7n-`xstB**`z_FW=kn%3=Rv z*}rREhSx^+FOmJb;mMw}e@W~gE8Z*L!|+OD|D5b!5i2!~{j0?OIm}88!(;9M!y~Jf zha<4Ik;J?Z_Ag=h`G5Iizb~M;-~!K)fi)e^dpzg)l-FwSH9l*7pDwe`@0qgC`mYbz z5a^#2Sep^S#@A<$9slCQ$x~l`b^1(P0B=2#{Yz*6@=`?3G%-Ym5f_~2k@6ooeG-Cv zdIozrD+bnP;JvAR6MTE}RM%Mln@n=$vM7FDz6 zEJYZb6vc}~-Uz;t`6DhkHi;{^kpfKOchhzm|iCi3}vEoF5jCEv+&r!H?cx$~bEd9wUO z8kXn3>q?}mG7R3&WR~yT3nJeQo4R;E{XI+WE$z#%>=_Zkn&rS0|jDOnQ z0}qUv-?3~NpWoTYStNIEK3A+8)!%D_C!ZgsTU3-r<(2!*wauc|`N1e6E;#4E_Z!do zfwdWOXXBLaIOe?J^}Z=`=G>Wo|2?n0lDvp%ZEm0aEZ@*wB44;qX^k2GwVQaAcn8+b zuvEyJ^7;bVHjdaU^5x#i_uuo!D~cs_o!9(WJCfyF@t(+c_|E)sJms<4)1PgJW5?qH z_sY~_n~%6) zXTn{_0DsTaz}n6PF}vad?mU-#sXDU7a67WaF5$=)!|KTPHW^2@*u0KxaY}Gxmw_)1 z8;>ba}|3yvMWQYK*ibDk?2jq4l7AUh@3)p$(u7q2bU*(8kav(5BF4(B{w<&P#qv{#Z$eAJM}H#P7wi)n z^0`pQM?#C=5jwO`s1tF-+ag|fSZK(8p+#t)2R{6`@H4PHe7A@TjtVWo`g4znIHefN zvHn=(&-+aH8$T7g;ys~$9}0CA3C&0Q1aQUhvr)&o_XU@6P-ynMLeC*z8rEx|{!>_f z>JZwYf7g(wq(Jzwh;tAdsMmph7NP$U9}7O~n9w5Bu>!sy>c5HlPoeH4j88VkFGr5s z2O`hL146S=XEE~UV|-R%d))+=gmwvt&td%`s3!~CIU4;5$2de_oWil56y!gJy3){J zC)Pg{<8TezF$>${T7a0R5kW$YFrnFHg$`{dv>oPs2g)I@DEwxBzvO1PU%2bvO_gqmCr>!-?2gNAMfL4TT!P z!VkfEhGIX?LmdT8#d0Us8?jx~<;WAtUx)DHR7YqhXlH0FvP-I8V~Ib?Ey`I zitBDib}z)eq5Qne*WU-qUMiV!KPbOO;>-I(2S5ix`E@*BJ_tG(Is`ftIt)4-Is(dH zXYlr;p!Y#XLsOt*pktxqpsCRD&G&^a&x)k~lbQyFxbOrQb=p)cap(~+}K_7=c z0nLG~f<6g-3c4D)2D%pdG<2P*5!t5P^}lQ8K(PJW8~}&0j=k7r^T0?kC!WJ7fS+j& zjKj!==0I0NbLDdOf|*^_-n9ntv=Gbl%>li$o=WD#X6u2UpUruPQ3yQ_Jq3+6Hvp@{ z&3gRI4dO6Xn*DICqmsFy97dSAf$r2%WbRN7BL@C$sGqq39YzQ=6dDF~SI2SGaRXeG zIRRLHXcF|c`2y5oq#_=9fRA6Sk;{d6ILsZ=VI;r67seVx|1sZc_*bfIa^rda%wg;_ zIfs#f&6ewtYTE37&w_i#RM&YHx{)t%7(1W^(EXc) zfBZ$E7d8vsxCP3E9hb~*Fb&)$e6xEj-v6?QpX0*fknLPuxBI$%=5k}*0iipfJE8f| z{m|Q=m0=*8mxxO~;O$)Vo;xD)xtni+pNZuTXeDS2Gzpq~Pwcv$f?|=U5LyJi235_I z{<+AL0nLPNfU4$+J}&aaKx3h4P}Mw@z7cucjbAL5$3YXI8Bo>rth^@HlLK81&4(62 z4?~Mhjo^*$c^t=bcl8$i#&L&n7+MVV`(60fdY$m6Z>!4yr@?0(e=M$5hyCBlpPCO5 zC&|$8z%!|GL1;_S&Id+a#F87_gD+1qJT2|za;afx=L@b0V%e^_87HwfNyS69lPbBE zXeaB5Ml88uHucE-?a@xw(*?0yPn;RMug4p8CZL_Hr!QhzPZILTywaD-+wtcE(N5Mg z9Ir6wjaZJKU7oV64IiV2&`yr?qlo2tCwITMU$QP)k3Y-7$LKM%ljFP^u^i`b zEd8?P31B&RO;4kp9G@2uOMbH%C&|^9?qAXfWI6blC%FVEA$lBb;|PcNH1JRQ%|*^;NHCC?CddD^nZ z>=DBL@jU%3d4^f?q}t@EhCHJ!c_v!&Ommw@ob#(A&s0mEIhH&NZSvGWo<){C4_op) z>MqY8qMlIXc>?X^aqMZtavp9%9vSob!XMKy&NbO{zU|jr>Uqgh&vr{ack+mOY9Y@n zmOQ&GdEU0kQyY2qS@L{n$x~>_Q@UTY74dBA@lT#lt+Cy8W_^}}A1fPS{c@bGv0a`9$g|2k zKRL~FJ+lmJ?CyF)j(0=ki9|iJU(tx=c8@XRJJ*N&cx*m!;16d3AG=sfo*tGwGjKkY zd1W2)I-wErlk015_Ai6GxSBMyY2EimcIs->j}hq zQhyQi;#Dl)0euC!9r`lV-FkLo`7Wqj5C85C?@z+7Vm&*t{4MC4&^MrYPgW zySAt=#3@wv$BB3cG`fyhz6H7ydKg*^b=RNcSnjSrXR!Pn^a9kcF6x7ZLY=0%_9vNt zd4O&2oAvMQkI_KHE1@9`MZ6Tc8oC3T2X)t7Ph8p3#9@qX1 z9U|&<4ihT3PdMVk(2U_?`FyC~eIgEohCto*$K85D(XIe0*ONM0zZ zt?!SC*mYc%P7>=chQ>`0v3Hu#)cb{orV9;+t{%%{*L;O&7d=t%JD@V(2D2Uee!v4_ z`A(?2eAmVa?l`zpP?;|gbq|H6Lf5&eF9G!@LQ|l7SZ~;T&(4Rw4NW(fyY@5hDbx?m zhwgx0Fx9o)hSj3oM(7r(Y*%c?uI)~N^IId@M?+Jg>8852yS85NH=vu$enn)Pa@YTw zz96`K=uMM%%@?&*a66!KKiY{nVVhWf23i8W2~Eir%d?>F_M`b&?(Vpdi{(3@dC*f( z)%ouo$ge=z=U`CTpQUAmzXE<}kch*f(a;^xBB;CmY$+$&=R$Wtk3&`KjU6EJqzn`) z>rF!(nk@Wc=o#o`s4-Y9PldXxHx0|vp&8J1P}O>~yYqKo9LC0>e7&yoNiIKbE_WCm z66%cLFM|U zAf5)D0i6k*4V?>J0L_Ljh01<^a##W&iUKzX_H7--Y;X=w4HutY-0e2fpaPQ`$xWe9?c`d2zs8?&^#FyZWM^uDWhB5`m&!NqAvNqi4kQO@AWYZ7oT0{-+qDc4Nv1NJ65{z?D!b0t!*HuLePVNCwZaPc#k_hqS{Wd7#C9G` z_dZg7!*L7h?mU)wQ*e3ErM~=)35T(wjL;%Kp$P#(FI0pdDD+y8(8TK^k0Y4JuJhqG z+KsFv_J&{XI; zQ(g01YbrRSnNTNmuQ|+)?51Yy+AadzVQ5-&-rkkVixhbap(Rk6KddF)?naDg=iNzY zrOrZ^n(DfqxGrLO0#wWcN4C59;1?(OWavidh6=)O8ZR^#T4<{4dSiMC?hJH(ZxLTM z)s;7r#PUkeXs8^II5T!_mjJE^n$Vx;apl4WiTu&fkx-f67)-az86(>HjTI`d9~_9c zK%>Wr<#EttXaV#T)ZO*N87w~sy#NhN6?s*!ADrN$zvu0Uaj;n)ZnM0p&GHB=zj=mU ze-xI-xT!B1%X8fMw}cV;!ybE&~#`fwD2$F zza=!q{D9SAT=-kWLlKAH7V#FWHxHVHJlRmEk63R4bU*aAukcIQkH+}%IIpbG7=NLO z(BsfR)Sqao>vms_<;S6w0!1FbYC>;T7wTO@=zRF=8i{xv{E(U=F03UqB~)k*@`XaP z9m0<*FEqNc(58rs5wEBs{9@!g1wCF{__v{kQ^YyY>^jI-S7=-Vp&QK~$T*B*)V&(I z;TOI=T*qZU=0{XB(LUN#SFQkD47gY~Tp_qPa0za>B5)(YopHxSinkZfb9_}gf7DMgUo0^DvpmMzz&^{|kEI-#<=wWbY zdhwW8Kk9b$6MQmMx8IeJXA9cf^*aUQS^(Y~b?dG-9P15*%Jt?VUI+c6jd;Cs8Y*AM zRBtPMCsg`_5Law1mP>yG{4o}O3F6mJ-pj9m|G0(!8S!>(SAxp*W+I+z@z*2X zWbtzmzh?0dAuhhhcbykv^@~{lD`)j53y3zK)Zh%LZY&jH@D+-@mACiEAO2pQF~X#ElU1@80l7Q%ih2 zLewWeH*SaJGVX?0e$Lz75)Vc!Kj$5Wn13eCA0rVvL};WSmamIa5zFuAq#@?tW8sf< z#Pajc48-#D{Y=F2bz&A`{yh)=n2(r$UxPog5%cd|@W+FQtBcTBj+lQBfESK>bEN_A31F&4i!x76Pz-Yvg;Kw0uiTHlRtq@N} z%-(J=AJY-HMm!sF8^rSv}m&f;k@b7~!kMrs9XTz7r_x12!f-jHr``~{BUmovE;9rN2 zVJ`hXN##zwUb(l*rR+G3_p=L!@sV@C{HD$Fh~fX;KFwV2FwVsCI#TWwV0k9{FYW6l z?=ZZ&i199CYOGvremAf*U{34X;uYVFa*Uk>KU_wd1FNm)qBlUkF}qUpsyk_)XBQrn>L% z%i#7Z^6!BwR^(5D`$>_%0j^9pF@MUM>c0L!a85P8FRInk5Bvnh{11Rr z?Qa%%)&4FAuiD?W;8pv(4ZLc9_kb^gerl@w?Rym5w~BlTxSLSB{pB{epg6VjU3qYI z75Q**F^YT~xPef+{dXw1G(~)_?}n;q}f9p^cD9=5q& zst8_QkJ;sK0A98JHsDq3@1a@$P|f<&HS3?JS^uM&^{>~g|256}-_@-DGtK(XY1V&3 zvwp80Vm_leHETo1hJ{BHwZb^dqPtbd4R{b`!@&$rZXcYJ=-QokL)-cr9E z|B9u4JN{kG`ajjI|15a@v;Lc!^#}G6^I!G#P7Uy?uXmb&SAG4`0lez#m%iZT_}iVo?gKB! z-;SROUd}f=ehGLv-|YCOz{~k&$8Q6_3u-Tt z9(dLMegIzf*KR%>126k)$6v72Z^!>;so#z-n<(aSHK-jQ3NG|zwfWx+d?&^HUBL}j z|3YW+>XtJdEMylVZ);N|?W8~?GI{B-bgzS-q}5WGBJ+wp6`%lT=?Zv!vq zryai!yz2OT0$#PhXEf{oS+jnRzG8l=*6#qXTE7##>h@|0UUhqQ1+Tij27y=YZz_1z z{$^^{zf`mSr#0)@3SM>m-`1@EBhC6xfiHpD?O&I{{jJD*^b_;B0@N;lRd7ud`ABf8 z^>+oYT7NQl)%quZSFL{@csc*=u2-J`FXz7h8=}aM1gBd6bnvS6KMG!T{9gdCI{rH~>p!Gf z|F__Oh1y+z-UR30UujZp_s77i z?(ZAHtM)e!ylQ{n2d~=SYol?e&556UkYA+-@}ey3tsj6RIh-S^UW^*9`JI$+3`i-<$SZ_ztpU!M6;fo z;N^U_tH*Dkn3r-s+wnEPtLAU4nLh@+>h|glUUhqo0I#~eCWBY)?*j0uws4s|5o5t$0r`V>i7(|)Nglw zf51||9Y4=fza78QQokL)5xm?V?fAFAR}JI86m%FbG~nv~`=v$TPeSdEZ>Pciq{v?d z_pc)FGf2$0%22y{s)K8y$VX`A@1mK1Ao%f$^`vX^3&5*xua)3cx7P;ns@rP^c-8H- zAH3@J`cyOj*P8jSfmhvLo`c2wROKszR~?^v;8n*bN;7}FX8s}IRr@z{~y1 zj_(a#?q7EN2=H?L+wqeu_1K-i=345p;~%lqW5+*hsmHGV?Us7%_)EYY&q2+4PHEQjqh>vSXx39^nBMhsRq(3wCmg)${AmZiH?*&*?q81# z05<_T(T1O*$uHOB*J<*vY4RUw@@K&R0<{~TKQ;3Q4j1!Bb^Jp$`AAK^izc6}$xi?; zzrSR6{+a_`et*f1UkP5me`5E0?akoj`zLn%KJfDW0l~AE{CHS@@BamO`F@LC{-43$ zmhH^n1INnrF88}IMu_)%tsbSFL{-c-8vTz^m4u1zz5- z!*0GU1HT4p$3Fw^WkvoqaH`|;9(dL9ISO8Nd@g9#^M__V0VBnHR;|YgUT!bD`5ysZ zZZA8&3wXJ`?D&D;$3yM-2f)o&nD?oCDhZE&jf9|5mg|M%ck>%R@Y!YDB> z?8dVixClkQEx5jl{7`UH75O>fRO` z<8uR?*J!bS*?s>c5L_tKuD|ubwSwBspJ;HspaV>GKmJ3(-4C_PKMCA?MLrwcDne&u{pJM((aA%-)S*)0so0&{-fY7LG6yu*T4mj6WiC0uLw>x ze-rTC6!Z54Hyk?JRQKnLG2muEKaKeB?{CZj_XJej@7#Uj|V>rYB&B9H2H<#*DB`U zsLAJR^2OjULT{Mrem?&NuEPCdyM>zSp05io2HL}h?*ncEG{c6U1@3X^(>DBb;C4do zUN7$hcN}`wRQLV;4%{uMXBrCr;fnlNa5EM8dEnMU z?T*ju!O81^?PlzLd|tKicKP48@OJr6f>-_g_xdg|?^XG$$S>bNv3tMBm?-kA^0mQt zfZFBnqRCGJFZ*kEy|L25+kKwCRg*8!i8T4uR1=b z!K;qXFW^iF~kuR1=X!K;qXOz^7X z^B8#5@p%cn>iE0^UUhuF0IxbeSHP=|kJnT&A5_PuI(XIbi3G1YJ_+De$7eKn)$y4P zUUhs{f>#}%&EQqXXD@iw@i_`!b$m*|tB#M6A?AbX_*4R~IzElTtBy}Bc-8S43|@77 z(!r~a&x7Dq$7daQ)$w^9yz2OT1YUJ~&VpARpBvy+$0u-_-u$TpUUht;!K;o>Kk%yK zGXcEn_$&agIzDT_tInTSz^jhW2jEr5=QMcL@wpCOb$kM*i}|2BK4IWh$EOSU6sXi#tVyz2fn8NBNLwF12A{io$Ee*|hbKaYdEq{v?b z=Q~qv{s)4qtH?J7*F}-<1x|H*27_0fKdInV=g)NTs`Dosyz2aU9K7oMc@Dhl_`I&k zzpu$3*W@p0^5tgf9ZyA6it4%CjXcwzeSVZugM?NPm_OAlYd#0e@~M?p~?TO$$QTi+gEM63R=M(Vq`)1bhuVvi(I0pV4 z)QV`Z@|g@%WnI2`c=$_%V=lEH`nBAf_L}(1?%_(Y4YR2 zySqQCHGe-%{u;)`YoQpYGN#@+URm=8f~yI&8_#;+S}XD$!1Y$-lfg|>u7Rpg%o zw?~mL1b1GM{{>v7MPmHz#9^!DT7(i@>RFuhrmH=g)TVs`KZ4&HP_! z=Dz`6wZDPcV!MVz?Y3_uIMwm#1zvT0Qo*a%KOem6{8jwNwj zoZnKhzgvH=XXOLIRe@H6)-=`i^=UYk%lkn^B7O_6XRW_akHT_!e=0k^19*9VDm%U# zc=>v^Ct~^f+FDN{mdpEzIlGB*a>ow@FFzl#%Rf|;zYn~8zsRnh@!;kCH0=1v;N|cA z?fB{7=RocF#o$&b@;TtvDe{}ZC8J`N^L!k2gAIV$a zKdBB*+;`GpG(vphDn0&@SgyK1wgVpzmG!vWAA4!?$(r?y1b@F`J(Iw>JDynQXD0YX zQ0wu>$}a`y?)9dXU#ZEj2EPd^=l@nSjxhWz$G2VJiWK=H;La=Z7s1_DAl`{(W$tEAl5b^Iy{B zJsuI;Hv}rTZ>Sl&_O~TC>-(SX_}-fQIPh6eyL#q>vuKv1>hnk79p>%J&D{@m0ZjbrJ1qB7VZn^;;b*4^`yD!AC0c zEx^myJJ#>xS$BrKXUjDr^ zJ3a-x{Cj71`~>jw@0Z!}>ENp+iuq&ZJuK(18Q|sLgR|r3ftP;|&W?Z3rXK73S!q*` zm0xR9kCopDUd|u8{p%%JkK+FF3V1nx?DD?>ep6Smz2xV8?&kB`;P)!>?}9H@pYeGa~^BL5}$f#B`N=WFn3iu?ue3l#aMI`jL}JB&BLUqX9#+v^IJ zA5!G6gFmat-vqBZpKpVg$3wgA>#-8^QgQzKf|tibyZiy*n)TGw ztS3yfo+g_0L}=F2Mzfw6&3d|O*3(n7o+QnB25HtaLbIMRn)TeTSajaMthdx-$8WUMW5>UwS$#y>&tIDLcswSqH&oB> ze&AKFe}cfPUjI}CuX;US4ZOVmu{(a&1~0FF?Dz)Y<@ng~&A{*KrFMND3I32G-v<13 zMZN=gxxMV_j|DHcmmS|7yxd-Pe4=JO{Wa?uqFK))EGSPoZW#pMsa$%WnG~1uwUk9e)D6++Kn=40o@`Pixlmt!6zXWIYbI^<34g z=QquIZfn-#^|+YFs@uyCyz2G}27mANaz8$mH0!CRSx;@vdg_BO>8rNCHv#{*BHt2x zg??)J+kscDCswnb1kHMqz{~k#cm5p?Ud|soemrqsMg~LUbUWb-~$Gz&F8A%>nZY1@EsKSM&MOH zzlZ>@`uRmPc-7A@V!_M#V|RRr124Ci9p4kY++KEkU(I?3YSuGMv!2nK^`vUn^MGbO z8JhKEYSuGHvz~>T^*pFq&%>JaJfT_7TFrW%*Q{r&r5?NMfgP55?D)4V_1N)yW&Y%Q zkB14DdF=ZA=l8(>q{x2={$EA@6Y!M>snv57d=o|f6!`Iq{5P8Q{GeIS70r5n2Y*s= zJa2(l-CiC!V%$}?R~hiC+bamX>h`JxUUhrb(5$DfW<8BH>uIT3Pqbz|oi*#}u31kX z&3XoD)-y`8p0S$sq-oYOMYEnv&3fi(*0V&jp5>bLJf>OCQ{d(CN1k8YoiCmNFONTV z{6_Hd_+!U!122z1cKiDDk6rzr zS?aOlPgv@)uIK0Pn2do9W?8S)vTwxW<80T^$gIgXP9O^ zDd1J#Pe=o=`hLPR&HQsT^DoiN|A=P(r!@1g*UY~K{2r+My~6=Bc71*K3Ak?+`4Vuy zLjQoazrv4W5r((r``iCmc>gCk7h6hMd55{&VN`?4{GYnXUrUp3pvgDUca|pqm?pmk{C=q2d^-s4oFab(oZo7( zo$c~h1E*R~BzV<&dVyE1Csnha`I_~t(X8in@CTuGA(BiPm`Yteks&$JRbq~oFczPliveg-Y?Yd?_ZqJrK4RCUM+0|2a-M!n_j;{$mc${cw$47$i2DRh+f_o@cE&n*pda}Sjs#wn|aJh>7 z4sh~*$kyYV;d$?b*saOGugM?NZ));opAp+tb$se;@?F8J);~&_K_hYy65#Zt#`Cj0rKr>B^Ej?&j zuP5h&TMd2AhTj726KJu8zq2N>%H!bvg1YqAQCgb$lqJjt|Gq#oIKvzU2lu=aF!0*1+%@=s0qjOIX-UA();9_&dZh z-wnia+`XO`{cQxk8DiPK1L6=YPev@)HwAH3EMJCL_IDj(xxQ_P<#_Ey9E$cQ5zBf@ z5LdzSe-X>^t@whdU-Dsy<#fdI0_()iN!$bT90;^xf!^vn@ zzK#cs@@Pr&*Vy#r z`=E6kZg@nIe09$@q~Fdnn)Lg5wkQ3Go-qdBm#ynv;@OGhS9`{i{_CD`r2mm;chdjP zGlBH4dG;lJhgW~nce0(49}BJZhj|So`LuY}QwdQZ<{Q$`iVmyCX z`7z$d%`x@!b^?if%pY(l;^tb!&BK=*y zhQZJA*8Uz~ehb1s#{5Xa|IYVqqZQ#_^F6>^*ZkGkE|C7_GCz?1TV<}0{*f}*NdG(5uXwh^I-b{<-;?lt{eCt25`I;`zsUS8 z*$qhfnZcUBvmd|WaP2)~5+{LEnO*R8Vc$a-q}8^!{HZ|C2^V-ewZ z_n%GXdBA@T=`Zo0NBX(`3p|z)`QPm9;@FhAeYS3KVZAZlLUW%z-uHwJs^+t9|?Gq^q&jZN&2q^>?ZyEfIXyt7;gYr z`*kv4AIbk1@Gj}!2>5{XeF6`9@H4YDf5pH%cQ>`=nCmC3;M(3d1Ae**c&hWio-f?n}YuK$Rqq$ zg8n7*9}V*GWVlP75|)1_;a?8&B6*)+AJPvE=Gk5IHxDjD@-2hQl76S)a-=^bxIF1k z4X)_9n^f2#cbr2k6!iKPEo`AMXIwftn#cT|{4`YkI=BmF)VGD-jb z3bRN*yTTmOf40JW&(p;C6jWG9^1oNeCVjt(OGrPo;!@I&skn^vlPj(u{nUz&kbYK0 z!#GRSzq;ZolHXSGDbg=sdHBP6UO3^#|E}U1GLKQoFuo!31Xfx{@--_xOZqXDHjw_< zO3#!2f=U}bza`fDXr-Mb|8k|>q`$w?K2QD%(mD>uD!oJUzf>w9eMjX3q+h3UA?Y`% zTtxcOl|Lr^{*^x?{glc_Nq=VLW2C>b@)xB4LgkaB|90gsJ^3qX>-Ze0Y#1ek|3&4~ zWS&cv&yv1Tty~NRetmQh2ZyB`NQ)!!arK&PtQLH|Hmq~$o#)o`G@p9s(P|_UR}3i z<*H>!ze&}yq#s)~ko1RDEl2v(s+K4H?5dSW|LLk#NPk<^5YpdUwYt}zM868EhLZg8 zsEQ{LK>5PbVyUu?-SCT^oN8*kp8rg zR$f6w{Yyexll@jMwaoa8Ttj3j-}YWIBm%?K>CT*CX)WhYLiHRdNsbcxb|y7waFx(!}t(l9GHN&jGt-K76RjeOGg4c$xnHA3Gd{g$Bxq~9&{ z0O^kjEhPQS(1WDEH1rVZZw&o}^xq8qjPySWJ?a%l%%5*UkCA*(%`Zs5Ud@xFpHTBl z(jQXuH0jT+d6x8_toaS;@2q*A^bgg%K>8W~X{y#M@lYXUIS4h8M ztuEfwaiD*#Zlpi8R(H~0A-;Cua^BfkE5W-vu^sbj&GPO^_#f1oN9I4r^7khAi?tSz zym#$v(hsk_g!Bj2UP}6zwU>GKCD!|J?G+^dO6^BT|C8D)y_1MM-`9SE#UHn^T=7dl@g`InqqNdIl;R?`36nM?XtoZCs? zzs{?q-@MN2q~E{J8>F9BXQ%fNVtgK~vzz4Kth1N&KdrN$^v~CMm-PMX7La~eUBeho z)YGQ!0g_LyTS)q8b&E)UQQc2SKez6ur2kRf&%8$w^?XtHD9M+w`l)&HYu#fc?^o{x z>4(;%^VF+%isYNu`-=1j)H_4^sr9}l{pIz}k^Wj%&uC&Cw$}TO+#`0m=Eb63-crW8({&YU$H(tA8OYRBKe5=4$@DkUy<}j)~`(Z6YEzc{kiq4k^aN= zYmola^=p#;YxQfB{=xcnNdI(wdK}Kz4o*{Mp9YOczhZ-CKI4gb)wDqil8iCx2a|qU!(pU9 zx8VrVf3o2y(%;r_H0keaIEM7UZa9wgZ#EoH`oZD%lYWcviKHJFK8f^4g-;{>Dd96n z|H<%~q@Nc)oAf^npF{dzhtDVdpTifDe%VIZq+g@a64GzmXesIUZ?uf`(;KZI{RNF4 zA^kOtR{A_j%){J9Pm}zCM$eG`ca7GQzR~zO(y!e31=5dbyovPNH{MM8{TshT`u8{9 zM*2${zfAhiHhzWlw>N%`^!GH*BmGYrze)P%8oy2Ye>C1h`W5(>9@)cso^9gBk8QG# z%rm0NJEWi9>FW2EnF zdV=(0nw}#40ZqRm{gkF>NPlkAuStJR({rT1sp)yrFKl{&^e;3ujK_%m@gnozBK#}N z&nNs_O@AQs`!_2g{YuR)k$yP)lImU}f6HcA%;!Jn0{AUXk=mnpYcO^(hqBuP5K>MEg}7sR)%qy z*uJw{EhYIUTP-8~7g?SoM4tRsD@eYi)gz=|C2A$Lt?8 zjG~X*4@Yey`Av*JM)Wr~>SdCDC+Zc_KOFTQ>7R|_zbtV*k6ex_^yS~owVogUiYg-c zvaP=${U)tXk$$()@Mk6LTh^5rndf?nt4C6#*a*H{0pr9`s|x8*8aZUCYj_5+6*bvgy4^~ z8A0+tw7HM;e{YjQ`oV3-l75Z0sb%;#aIJMlw4G3<1>wiFO(XLRZ~Fl0XSPiz{hYQ_ zNdJwt8Khs-b~@>Q*EX{Z|3hko>0Ty=6KP{J!XSNd7RZCzjxUh(1K}zcIcm!3VV~Ci%MU zJ}3R2?T(ZF&~_(C|NeHTNPkwlv!uVW-8ZDawcWR*|6V)(3pv;A@>RR=$yb{i@M{yRLqd4&Ef+zJo96_wL~5*Mk^`sO;C5@PF)3mCWN2Q_YWmbKjb$c1$SA zw~VPp`iU`4(jOB;ZNkSm^H`qy2!CJ4;bfkojwAiX5&VUY_mO9;ErICJmrw2%Xai?_B&*?OU^tW`%ApKpPrj!1GPMM_tc_(`RI@>9W zv-05A^o#%@_g6zTax$e_8sZh?)HP<45GhL-7fmgCHw*1 z3}Zgw&*}CvnSV>StEB%yw_iyAT(|3_|97|FNxx>?P128vyG8n`aetHk%D8_?|MfTz zcIO7y?NuD-Mf&IBd`RCX-Y^ys<5n@g49Pc+uTJ{i;`yC0T%%05c?(|e?lc^>OAmh{*3c!2b`^++fEy*;Lo z{--@M%05QabGkiT+Y>gI<-Y*7?sp$1yhQTfCFGL+?S$>5U!~`cviz6C*8B~78phLv->v6sWS-$Y z^GJVo&o@bbZO@&gzq{vd(m&EOpY*@#xsUWO^?Zl)|LR#l`qg?JApP)Og`}U@i{Amr zb=)TPIz;*ldmSeIoL)yre`Bwsr2lrW(r?@+jPzsru=~{z z{p#1J0m+Z-)0p({@6*hm-%-GNJe<_01V&p}c83m!PJu3f zu7W-XeFORt^fdGW^k?XA(7&NR`J#>>XeDS(XdP%cG!ohd8Vikw_Jt0G-Um&EPKC~e zWOU4?&MXzlHt;y$&>(0!2?~cR}BUegN%@^&LX|Df9&NEc83*PtfboTTri8 z#CQZkD?@8T!=aJTcF-QsWavofc<2;p7IZOm1@sB%GtkY@SD-tg`=AG)2ce%s&q6Ok zuR;HU`tA_@3x&3Tc7pbYra-4bABE;Z_dpLqKZl-%o`?Pf{SA5>>hr3oqXM)6v?a71 zvHpeLbMp`Nda_11(ogZ6-qfo4P3Ky#r7peLc1px&<|FSHFb z4muE;0-Xe11bqa$8u|is2Xq(oBj{;p3Dh%BtiLw2Ewn3i0yGQy6!cB#2hiit3(%WT z-#0}5YS4Pn=Fk{uZ|E54Z0HK;YUoDjYtVhr6VU6>GH;6YR)9L8O`+|fanM1~vCwJI zrO>CLFG9CNcS7HTehfVh{T6x^dJF3HmZ&cfS_xVk+6dYT8Usy$4uXz_&VbH`E`zRx zZiBu7eII%h`XlsrXaLUlPG|?HybkDvco=j7bOm%3^jYYO(3hck&|T1Xp~cYi&|jhd zK!bOQajOb-Lf7E>u`%KpXdE;VIuJS>ngX2&oeG@;T?~B~x(50ZbT9NM^hfA*XaKHf zYe8E=lcA?@9rGjfcc|Z9^b;Bh?FAhL&44a~J_X$Z-3={<{s6rR4c;f#QwQ1#+6$Tn zT?~B|x*K`~dJ!75U*u~5?FbzKoeW(JeFmBbEr6bcUV{D$t?-V>U(Zx$=_{2XxPBW3 z9Rd9=R$NbvLL3tz+PB&)bTs^N(DBeT=p^W5Xa;mTbSAW*iC9k-;?LWOcpl<~(AzD= zdb1H%#`W5Rh?hZEKtI_a_(u^x4qXLZ1KkSl8N?f)FF-d#UxIFHBl6}Veg*m(v_Th9 ze;(pu^yf{)3(t!7yAZz(-3#qlAeO&__EB|&x>*R z-}Ss0hyPvAi*fkh^}HB||6R|Earoc$ycmc7f3N57eE&gSnC_p$KQ3|@1<-Tm*BKl} z3G^~Ff&W1i#ub9wk2s}=@DIZ;hMt06fR;e7L2p0}^W$~by1Y%_VfaCJVm+J8!X4Rx z@MT(%8jME;0ubG3KbqXzO}v; znWFt|sNAmMvxG0V>+0De&V?Sw>ml=IPORcDE%DcgaoZc?ye0m@62J8>-=Ca@C$`BY zOTMcXUR?j*;jQh(>jbCafqd63?fz6>)jQixJEIK8%?E4NLymfY>2I<5k2m z|87hBcM;3}9zrbpdkV3v=X=DmKbI~1EyR4U;}4&MVtm=3(J&vC5zBmaEpZFPa(i{K z#61wpdIunu>l=xf-R9kVOh+vHKhM&B8DiPrHHdK=W#e_kGT(=Y`QF1H#}MPs4;Vip zmfP!3%krQiF&?u2)e+0_X^dFb+X}JF7jIeK7qJ|#;fUq-pM*F>lxr+SEZ4sZv7ArO zAeQ5ii&)ODeHQ)$#Bx4-j98Ax*NDxVg0LDcSooh1%l_U#EVu8!me~K07(bcMfmn{8 z6S1tn1!7rW8^p4|oh)&83%@U>4y)Q}$n(YPmgTz<%lhB5fazQ(g1Kd<7mF2hD=6gyuk3o9dcp#Tk)jB{T`h2}zUn(A8jsm}x#RV*|Iy5T6=e=hVgH0BtOUGs#W7F<)PwI8vj z@7iudytw||2(`A`WFB8#UvK%f7wxv7-HrnsbQn9K`Oy8)+n@0>jl;+{dDlA98}l=j z!^nUxh05c5@@G81YrD9?qFv$yp%H_)65J56d}W%@1hbth7dlid&yk$j&Xv3Uu;3D# z^E0l)NP@10?t~UW&p>ZNjTXF}Yu+<^MBd~Gkv9!`9C`s77b%wSfbNGLHq|w6-cZ4N z4-;zL&RGfEcWoDaO7JnzSZFFV6Pg8`Z>nn^=X8fTPWz!LGeqo&7kUcu8E6S~^;l1` zu8|)6O3-0!2o!qeuu$ixLNlPVF4kq7(~#Fa&nAfL7!T(1M=_Sm>y|GO%j=S~A4Pk4 zopct<<#pG07C!W4!Sky5W4igdtyA0_nZ;R%Wj%`!%j+ufy|O#kS!*oqmm-$+tVAs9 z{{!{#S;QZ{mqb1MsLLM}5X=KmFOs7P;=`$^O<>kUWD{>+p4=!{s_I|Q-p?>NMA{nIV+VoRKZSoUWN zV%guFh~;(pdx-hbmp=|$;x7@)*GWGjmi7OGxVC6+)VeJCTM2R6FFbY_>Cg;lCNv8= zADRta3S9wR3C)47Hq~`M&F;=;w!^sgIDe(*etGy4e7VC&g=Rr_n(EpvJ&E_2y?@bP zXbDuDUmV#&UC%QSdj%g2jfEya6QSYz1lJTA4UI9?wH~{?v1o72o3~%AXQ!$9dC$Eo z+RMBd?}>OmG}~1Dyi3tu=9S}+TOji9Fx540$Vst02kLi9#M#hdXy}*1mwD3=r<>|D z#`N{%6V;e~L(JbX14X<<#$!CqU3#UAQ^iZkr)50Fldr%ijWYwyL+mEGe74x*w##^K zpn1rY%g2oi@Z{~q_ePz@xb!k+ydV6y$(~_+`9T>^73)7L<5}W9o8o(-P9t@&sQ)yU zr;hP8>-j;(SfBV_D&xnCd^hFtbYHVR&)>v&q|Olcd8r`dOmDNknlhdx*4Gg6_%#uGiwL*iR9PWLo7pd6pclf~a7eh15^j1hkiph(8!MgLC9 z_{EIW*zl-fLb-E|}i&KVC4sizRuO{Pk zQGb|>CyVXZT*euq{`NASA?hjRGri4Ah zs-HJ0g|0VxjPPSj)z2G?_SSk+#*4hErt0S{oIuxG06!y*FW1kTX|{8{o-BMotgpya z{k(_K-Z~BulSSSrQ}y#kqrEk6)Krl-+Eo3#F=%hin>0=2O*U0O?+vuK=Dm?A^4>I6 zKksd{x8_|sOXOW)s(#*;Xm8D%Gh5_cZK{6Wb!czRo0=u^rkSdrH+F$&FZ0GO6mb$X z*;M_!w-?`=R~~v-*V?(T9qYEe&Y!N$_xSZbTpkRwN(KmS$P2MD$diOzd zpXZz0-}8Omd;hrq=pXLcWv#PWd+${~YoD19<&6)eect=_zvLVFp&s2{{*&+jhVM-H zK>wE<{^H9NynjxI_VT~y>+*sAFTPhl)J-2s+y6_x^?&30*L<4`-_N(Tz5MUz```op zU-SKoZ+`K6zQyh3Klzq@=>IFPapFV$ue`>&5A~}h|8w2E4}Yk)m;dDZzpb0^2l~Iy zx8lfqz9-tt|DJEf)%Wy&=WBoMeLK@${`Y)4e4zh3U-rg(zRLFUpM1kU^#3K_=nwV3 zv5X>ALrP*R+@a zxc3$IoRvp(~tU4{w0UV5B2Ew@}GSFH#z7G@A>|HzB$JC?fcuy|DJFD z2l~JB-C%ytcXNCB-}5zD{-5Xj-1@%#WqbMG^Zn)n{om)Sta;BjtiAjv-)}zj|K~&b zcl_+P5A7Xl-}Cuv-FDW!ujjOv|KzKzf8YPtx%fNZln?!*8viHXZcXp&z1quv^8MfT zgZBsezsq4t^LxGt?d3oD&iK&(f12;k5B(SK{?GZoZGB(=t-bswU)1)#|Nq2yz=!_G z`~Q>g!Uyl`DedJy`TlQmc>M4^{om!V`O*9KZS5tOx%v3-qXa%m;G+aSO5mdeK1$%D z1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXa%m;G+aS zO5mdeK1$%D1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXho{lfb+lJ?X<;@bOb8{r|tr z;{PlCsj=Mmzp?-K_lp00E+f@kKmErCv_J~zfGePf4uE+Qz`fc*eD zdIt7NPu6}a59}8!e0zd@i6hv5CINe^80`P$4E)|A;0L$^KT-hvG*6blotB4b+bcJ4 z5b(gEmlHV5a0Z9Pj^MD44-SPc;P9Ck9By&E9o~3?!>_I&AnqU-L_jcA27-CEEyM7* z84?hrJA+`m2n2h1Ah<3D!FK|nUqu4U*zq&F0yE8#Win_>mI#>Z5@6nmLDIMIdYxgQFb}9F^|ixX@e~Lu~1^wqF2_$3)_9Zs9Ym}nh$e|alqLbuApwZaia=zOu^sFs&_SZ<^^`OhI6#Nt&d_0sJ9OA! z9Ml~LDo}^LF3{ni6gpJ9L5CV69L4^*$}j$e4sCqsK>u7PnKk!I?3nBUPJIbD4YB6@ ziG1S);H1{KDoI_67@Q_LgVO;XI2{pzQ;Ptcu{${TlYsLaPjFtYzG!5fGsWQi8SVME z2uKf4Ak%n27S*g@ikWg9=|F5|ca{{o0y*XaL}Q%11N(MF<*bR9wZeC%{q`YiCD?IP`Ch6ev@iGmIZ0hEi7%*J)cIJS_w$S zJdk!3g4DkgNJoX;Oap1SGf3Ac(Lj*qxPY|Ky2pyUWJo|-&FxO9RSMD;caT1@W!@ym zgKAbJkb+!I2L(ASLehWNPd<-RQfT+jVsPsy0k^>daHB`eZKm$AfE;P#f!ji7)-7Fc zT0&}KX#ZBZoeQ`f)(Gbko7!l6N3Muz{j4jv>72pso;$exiw|!9u?M$5jF&cHp`QXJ zg1d_l+&h`hOeZJRwW%vesaR__k`##xNtsv*?tQG=?~ubY>JwavDKqTqd~o-Vo{|Rc z%5e8`a9=0@_nrC&ok{Im9=IQJ2ltC&a5w0l7LmhMjZq<_F3ixd91o*&b#GJ+xCrjg zZTLFEE+TOMmeRhp?mtGX*}CV0Ni%W*4@X^}Yd9cY0v>%_!6VE8JSMn<#|#1<3!TAZ zwa#cI1(gR$nH+g!P3f%wkDRdFKf$9gl$pkQycB?}2M=VUbT$DFh!KG-S`4yO+ZHh? z?&c1%O&Y`_HtY$qJ+!@uj=rEzt01;(NOp@4GCcuV9Ub;e_qZb-mgxerA0^Pq(-}Gq zl;hseX;c9z>xnw4T%l8>2K_Ic= zAuH-spiBRQl=#Y#6griLRXakbQa{;elL;ZRjP-(7uf&DYxz$+>&Sp{ARMgt}#)eX^KkfJIL8bGXVAxn?5Ug?z4 zW)XPpGu?Pfu9zt6QUQ1!r!7}_td~{-UUfY1x=-7G6hUWM&HCebyjVN%C@I?}hR%a| z(0QZ;I)~~Wye6enQRnH_91mg+lB2%Rc@~fDyv`juZ&!G2fzAgX5tf{IqN;lcolo$g z^A~ay0iC}#O#BHu;E2Ld==`h7JcaE-?Mav38oOVKZ5x3uelE~uv>Xk9F6xM!^K6%i z0_d`cj#yi>aupVLL|yiXpv!^gF;mDJsqjcVd^PG?D}}DjM&mW?b_8{O>gz4~Pa;eJn`Fv6rZ`;0{+;LIzyFhnUYj-O-F4o_+Bgc{?&^`8nYbv>v5;}A+ zbpK@htSsoB+7z;tNocTz>|zqkp?1z}_qFP?L!o=C7`i{_vE5&LK=z;&d_62$e}XmF|Kw2wHrw-0UODo2J?k#Qk!lx8%cAy^3fO2V}=lVEcAdL z>8e`#X&a@aHn~Sd2>&|tI9I!j#j zNX{-rJ)^7#B;;g_GxUt37K2=hX;kkZm(oKGPoQU_!Fq~V-qA%RHeWe@3OzR|gSJA? zU6!LtEWE9&?TIlD^(=6Ko+Y(O^y~CCctFoeDfGO=hn}}XmM1~aHrv)VjN59~EF;C# zuv9|NujTgrq36#b4~)=@XC2>(oJ(r*iWmI(Qh^3RFPR1{Clw1&uYtNXYe_K=^%~8Y z;9iQ!eFsCY@kZq@g72Pav5;7NxoNd1P_JonyW`L+$#nS|F;yv?_|Pj=!nWTC!K;z$C3C1p6l|yt1IT z&bBp!952vi?I0y>p|>Biy=$GJ_r3b~^W|@pwF~bk)(7 zE{U(3GXjr$rK%kZ-n~N&L9F+LyFHXR$Sel$g?IaUkP}FOE5Lh&Huoj=58i@3a+P*F zzbn%lI{#vVMa*6+obI(BemH|r$1Tm<_Tl=gO7u% z!*1}Q?wZdE^93dDnkh$Lg3q>yx%J?)-3yg+yq|g_!K4%mz zOTnj_?xSbOM{lXn;4#@cOC26spm9)PKOZ6M)8Ga^-}9lLopI_!EcI*Ye;d1cn9n5O zp>5VZeaPXrRLej=XDRfX7L*79(C=!f zrUm-x%<4#l{6hR*v;FEPjM)JF9=JmPKr!22sj6QD{iF2{_Yljrx;f{`q2RlH)HvoH z?N2c3KP4Bp*|rTMmxLjA)7bv=Izj*Kiq6-e|3S`O>wjJb{XZ`vM~0yO4PxkjSA(L- zQJMb14pOlR^?$BGRivz1V@F-Q@=)^ww*SBApr3d!fRU3Y>;Q=i4Cvwv1N@{gV59>K zh}HJrn!M++cuKH>oo@ zZ064QfI2$nz8tN90WXwYN3jFm(C7T>0s}j1?2eNYN6hag&e9@F1ZB*cXFGp zh5nJ_NkO^@1|HJvlWB-IKZIrh<&~= zD4}*~C+4Xv1YLtcv$-`Jw2ZSFgZ9f{(60)#90v3Cj!*HRI5b$Ql3jPW&(pBXk5mU&pM*PFLm2D6I+U@`>(|86EZCh27ep5Gl?Diiv)aUDMcN@cZsE9 zLJAVr&HDxW{eXNo*@JJE1NiQ189JU=lI3^}>szX?8&2v9v>lF;JB*V09lmWU91ea? z;kXj~B5KpFlJhde`X!6NFXx`uLOd+P0sJbh2ffI-+}gz^{K+=jdx<*+ez$F%jf@xd zF#M@O_4kjSb{_oY67b&~(%b?3cN?seiLE?h*B;iN>a!up4TjKQdWg)l;58PvIm3|t z+6AtpGRDxbgp~Ry7XJZ50yVfdX?{syh|&dyOqOFG7_z{qt08B~DfPV{RT&4#=7%F36 zXm36Y9qIr>Sykg47#d}|+=(3bR^T%*G+ynM%?_QTzaNYPZnq4%K@R2e*`c2qM#W<3 zbu{$)UUK9YH1xIvh8hjbzc61-VCX~Rt(T;{L4y{OmJpYiS{s9gBNo&7_ zluZeju7P28wyY|0Ce;Oo^|9vG6ANw(4Z^9f-C)>o+C~LEY@7!Sn`%CtM$Tr3+|7Yu z3w7&HklH{87?xf)?`u3HQv|~{DJ?N zhUsWWo2AS^s`3pDtBEbf9frN25BN@lualY!Xt+q{ae*X9Uks3 z#~om}k24G(W-0D~{o-qv){|l%ZId&pZO}DdB1fVHFnp{ShDY#V_zX+w1X9$EJK@6@ z8lf8wc;y1aS5V`RyR^#j5g5La!0?@PWWMfM5i!?GV0eWThF67x2!?+i($WOOE%aG; z+PZv64rMn+RO9G*x<@-m^ABkFQ`+%Tu}}-ce~}9YLx8IT1axv?1Nx{h%!2^Bcmb@O zMsNY)E)Xz50s(V*5Re|RFAV}VYtUhG?3FVFoTy13jqFdjLBK_K2mm^=h0=Yi%~Fuh zD25a8n?fXnK*pjfAjfy0KvzpyJt^^agFv^)t(PFMhpKiT1cpi>aE=55*E&Ps9#;q~ z&ml(?DDaFU1l~6^WRbH~x)~P`KSO`to7BjJY~WXv;Ac5XgCL&5>nk?M$pM07oL&m* zFNL5T&Ja`*O+JC3N;e3)#A&Ud+v9z6V1ymDJ?w}M+!D2jYt{yk+LR)4Vh0-GYp$%r z!e~{39gG+o@~9t-P-~-_$;tRo^Peyx&4nG26C3$8j3^AdejY{~3wihhjQCtRxC@MU zq`nZvjzk9Z1Lh}dkd0KVMkBp-RB0W)ZPdRYmRA~dnlv+7$pTUy+&ua*-uG4tBZtaS z1&kacgOTA1@p>3JL*H6}{fD5D3l#}p!^kvMQ%4xNPX8!{94CYU8sc5Wj*3$*9}1(E5E!-EQnHs+ zs;N<;5T(B3X6)vPMrGL?u46ZGn5-H`?V>~ld>D1q$Or6_QJdC`qt%UJ3-IK(ma-LS z>^lb-RYf16ttUb>)~s;c4x?H^UD9FHljhMLXl$8w(?nveu3fsF_rR!sD@9K1 zXrg>R5=Q&E!syY69j(^lJYw@{Kcm_E$zfMCdRjKIG{i<#v7_g?!RT-KF#2snbT4v_ z1|4JUIcH*wv$=9CDiWH-KcOPAD~$1|8E_sI1#y4tm|mQr8zYa{GmjlZLB%m!L-jLY zO!+oa`zmV9r|g(B2*&m?#NWX~q78m~abN@*JKQoN7JE@Yr4q)DqTONoJM>2yyd(0X zVC>Y0ybu^WH|%mDJ2p*@=D^r>B6jRHRU;q9?$;&JA7f~>Z7RVGjhHjov4;+j!`OJS zksQt!!Ps+=nUGPvVVt*kJjKmP!p>9?L=4Wb`{zhuw7)?F#hJ4G>Z?Uji z!UlU78owhYxw;z(q?Q73!M!aNXYqJH9vd8>zy}~$QJ+X1y&zlmZS3bKg5dEX9dg;= zX!8E*t!5DBOnN^#TYkiS@39;Nx1PB(>hOe#WSC!NXHcuA$gBne$PDuQ))k zAr!J9xL)fdA$M}zAo#86YB&~p%TYg&+q;!ia@M+g zD6yVUrvCu)@j6>Ba%)vJ+ywcj%2Bbbe5tA?0OadqqKAQ86CRVz%I`@*{=FPi%0I$f zE`q`_1ZzRz);zM6X(-_AuVRpD=n_`3y>^ieyT6lwg5tW06Cp?qii?)wmpD>!$wfIVeCQK8h2?rpqIUEkD#kM>mGT_n@@1?Fc5tvi;#1P`? z3L(8E5aJiHb0!-y+F)Id{j&{?aX9s@zTsQ!*Ij=v6f?eN<}2(cjE;9@LoR7~ZRGfM z6mnbFuZ$czq0iV(j`^5RPbMalUObVMZ$}|U`R4@?Qp2zzZIt&DK7@QN1r^^FR6h2g z8s-M72vxlwsAf5UYKb$bG95v+i-4+B2CCx>sH!@E>ZSlxX63Uhpt`T^v<5FX3t83I z2k~wmQvIw6t-!m5nipH}Zn5^oX1rT=0Po&`RKIS=yEmawX%60&he8K##kPgSdLF3Me#;hM651auP;f{6U(vhG2car zmO*H(`AjycOyTgI(0dvjMh<5iYb_c1ziF zEXxfq9s^;AYnT2)N@ExZJKHkkIF6fVE%3pvw>4-EX}R4PaU7?mYfuQnO(+a%R+NyV z-XaLo8TH)EC0u8Z3F_*c4;hEn*hR5esUnA4h`PMPpB= z=ZMr^O{yGnS}3>s4Aed9l7-|@tDGJR^&odv9ccQTYGz;U3n{rQ)8YtHl)^;=>hPFx z--Eg^YNd%)pLPbd;W0XnM+zmZx)Fi;k#^82oQQS(#*kV@j%A?!m!AHFE#HOykNK5a z*E5I@(bLjE{ZrJ6M{Kw~cRa(xLLKix_*AvX9m40#z`q_^e^WPw4N}l7j5I1w+YsR)O;%{H&od7>nP9+*!+pUk&@b2ZT4* zET2P6AV;4;M4y(SBXCedO-3+Az9IL^Ai`f2^eY4T9=9_N( zi!?RJkp?1GGzD+QsgKkHgoykaza8YXOotzmL+@;xwvgI+cfBv;AT)Abk`meTV<%` zUu;Bkn9FU5c&)!rh&5WvFU9`d4YfC!@Kf~l=16?Q1{B31Y}pt@$Iij=%{gUD;HY@}{0xl(O8G>V+6 zl-mWck+rIndWgJdJ!m6^RFOV}$S-SC4)TZOYo&zPGBu)5V!kSY$nPWZPZ0T={@#4- zd)ot|oa?83Pb{EC-sI9dIZA@4PW7M2vF9sGVHzpGs(z&qHRz!@hM3bWhh1=hT0tYm zs1bX~C9zhPPOdNtT{nnQ>0k@FQl>=~JhVz9?MUi8lmb79iq$EvVJ27rQM0vO!^pA6 zrt8;8agM&+9OE)w=Bqx(==F?w1 zIj5NSJw$iX3P+HNFm49XY20~@HgSs{-5R;2oQQ?A({ZSq204-+}B^Kw|I4DB^v6b46<4Ci&>3Si#(yEOt$NmM{ zPJ>A0l$y0lJgMGv^?Pzuy_;0TnD^d8{Bwwny=2?+id+_}PaT2S+X6PWh6X&VQEZ!b z{X%lN%nf2+MC>Yr*xz{&*InE1Ct`bS>p6tEdkV!3afY~n+U28|yQ@*0D)hDh;=-*{ z{h7P|C@w+EN~21c!W4im=dI?g3UZ?&`5k&8uW!c?Pq6*-b(+0h@5>4+xG zpwx34V!p#e-XNHeZrhSgN|Md=cfhc>_Ch@l%rLn5VzfC^ch;aBTT{*y;C#~^y9|K#+B*^r|hnWN#?e}F8g!-eu zGYP0M>@KRVYK%OBs!xPnYlI1)?R^J-(yD68fC-J!39;;imyzpQVdAK;iyPR9v9=w2 z>>D4ltPLh^l_MW^;$9I3$DY&Qw<%5qAkk6K`5`9kJ^Z zZI|JQ_s*I-nOv;56y{(?ZPb5*8JP}UL;MXoltrp44Jel!of4M)BTUpPgoW(H2kHw4 zU{VL`!M=D{s)(I5CMNC=m=qnSh-4?t|A|BOdF7(Fy46RoiDGNuF zPAVyUHt8IKNjE5j#!k8$j_1LoS79iTo%EvuO@ql;-+F}dsHcXCc zcW%x_@GhVgYy&!4Co55VL@oOYdjO@U6pl*x#llB~ne@PGpI z#d>mz9?>bwwPT);3YmtiCPj=A-+?KaHR-9O;x)BcFlC4S?i$RzRdgKZq=vx@J7Abu+cIL5cw!Bnr7AwD?fC4#Aerdx-}sburHlcae=sNFPn zY653X3eCpb&ZH3P6VGFHeEp~#tlmJqP;wEqb-PE*G676oW0>RKs}4dWK)uv2$7 zc~Q?TsI_-TJkrNpbqos|9*TG1Pm=F?r{Yi2!zLBN)RR(}T4yPAU^-+wz|^+dWen4y z+Nc@HbZDqulEQRo)lyKp^hC)0Q84vurRYnT`g0B*?vJMR&n3t2qiKE(u`6+Bnod<> z@j1@7o;F(djKXFw+t+l1I<69?sX6sDZ6Z}q?6jo{^lzA!>%vaEZa()ICl(lP{0AqN zn{J-RiIt`s-{Zt8({&3@tTtb;;zT$kc+Er8jHLqlqrVAG#{{nxC?WKy;Ppt9u%trp zdI(C$J0f`9A0^y6EO^}$C45ybck-#3|}1$^IBiWKmD zGq|ZtU!dY=!Sp?HG#sWk>hC?oLJ!WMPJbTKKau^UM`DM~@W~|G#MU}1{&U^4EYt{M9jJHBBcaj_dDu2S+P zY~oe~iKkVza!C9s#8PpU6i-nkWW$U;9PoLkOs}iJ zfkEb~pUIhd3OhfTvC4GqFtIWrdpE$0{P5$)V8)G**7Iyq4<|@sl@8}2DN;-RP0C#m zB(1VzlPX*w>2ht_AH))7+dPmQO*WsnO`6J-ZdOPF<)&F|l1%_fG$1(4COuU;J%Xff zX(o=H>Dt=k2s!cU4zmq+#WlzucX?y&6pgz)sqK9RcP^KsdYI{@qAC5EeYgmFrmwPN zXLjZYimAg)wfS5vmTj2i+XH4!yfYw$WJhZdb>^?5nV;&`4D+ zCH1Z_^Bw2V*3Lw;_zcVvIm4{(4lt{~7-og=VO9(eW_{`iv(`&t)(!#8Dw5O9IqQm? z9=KUr0<-FbFzbo_ezkK!TN69Txu9ACvwm}h*$z%H+sz(k_t0+Xw!O+Zf13nm z4{eNaCqb%)t`5eK)!>{~{09I2WjM@!h*^bXVP2F~N0{lppO42%uE z0(08otT_tKyPLDfeEJ|-`vaPDC^oVqJEwwk|K?C&=`hTxtzEbnFUYK){1h*Ei(pQ( z>4rZ}t#4BKk%yV#ywU7jHw1INLxnl)+&x?vHut#s;^$~Uh9Q108W6)_Xai`7GWP<9 zq4j&ihq<=|FxUEse1iI!EXOvJlHi)MlJ4F!x#Af>;vX zy?)AG67CZ^>reP}mY$|6!-HJl(NW+Dt70L{Gb=ms zVP1>&A)j3H(?5Jpu2nj+^Pb=9WW&QNYtz!OSD>!V9S^NG?MlVMN)1XvD6n?vBP>fb z2@>9&4eXepe?_2X(0lCsZ*m?gk-LjA?N#?4qY1zA z8mFI_tJG5tVy-4@u$H;1c7^0yPHeK-lv~JDzf+>?Y;u#{`&*{EO@nkyb-k`nI#X@d zJKSfgZ%ZNh83W1R(Z~GCxk%O7I_XzT^@dGMH4n`fD&3E6yRJ)@2 z^rqJQuI&$eZJ7TnX0ZOw1JZo8NhQL(#$eAT zG(S*Z)Pg-vq51Tp;(TR&!eQ)5Pu2W5x!qZqPygci$x@iVT7P#3_Dtf#{Mk2+poCH zzpWeHN=h3H!+Rp^YdNlC7%^vR7RY6=Ai1sEZASRyf#fD5d=)0qu?v=mmpa3OO!K*( z#1h>+>LR(BZJlByH@mCwZ?NEq6c&8eGITe&N$+>8f(6%`$M}+>*V-;J?502qzNMu9 zu@uo8GA7et9=RO0i=0}D7W`o_*O6uq>w#ZM@d@r!E)+$r@69ee5OFXJ7FO!c%1OZ! zeOo>$XE=!LCD1Kn8EglCfG%3Y*%2wM1DD||r;XHX{pQmyR9F8oDH z!`8Ycw8&R~UxkOg3B6s&E*h>CjwCh0N8}DE1;j3D4+R&^;BfIpM~yIwACSvgx_|=n ziHZDxD$d>oTy=s)pIP_yCgt%IU?P@$1-k_n-PF~@kQ4FS$z?mVs13oQFHAQzq=K~^ zGLg&OtvM!QsnYOy#F}ot;Eeh2j2jEcU0;2xfs|w$n(|1Ui=}KoDHd-f<-T=E&3L@o z*mpV}4_V|=uC{X(xkTglL{by4ToVtAzP6NHBNYYplj$XoDhVv+Y50A}9Wb8`!LnIs zF$t+!28*R`?BY-ai{nf;Pm;?iI*}E3OfsSzEIg+_A4G~8IQ!O;qy17$Y*X~;J906D zQq0kPk%b4Pa*!PEh!&^n`q{{-sc7*^PKb-watd_u0nX)GT%wEgBen4d$X%KkT>PsX z31EppL-X2o+eENLto-5!c8RC5Xc{aTX}YzSm{Ro*#}ccb2CX7>CLSz_(x60|yA{Hc z=}xd@32oUcg(VdZu;jKUENNh1$$c(rTtaXAF8NacDTF|ZpBto%c7~MkVn~U1g_KlV z)^z7hF z=E**k+G_{%WFt!T=G4oRR4Jqm@PO1&8g!p|qUKKhlVFW(0P`e>Q#Mch>>-t2)J%P|kS9uz`qnG>X*+{8TQq121H%;Tgi=5b6O^SCEUg&oXeSCpz# z&?`Bq6xmAsDTKNUX;SWxr*(H=)B4}{Sc1nQ?&hypfzn26?TWB?1WF68Sw~%ujPNoq zHZ9a(=S(V+^mq8AG|W;&uUwZ?tcJ8)MUWQ9`ORsE?bx(eMiX74q^1x|3O#h9xuncD zB0vF4`K`U4VYh}5v=f$!#jvy|50(z*0w(;VF4+T5^hZlaa_4nvFlVKf7I5&)(wkCP zTEn^MOWWwKVVBY!G8mS*-|1dLF63~dm-V9K*ky$RSa!^bU3N}!nfBjkQg+4%({)|G zCl%f5W<6ol@3`Hz?9nDh!b8j1EQZE2%axlMNk_CiESr&Z-^xgQb}$mZ0!9*4%t(R{ zGLq|PdE`z;60?huOhd~j>|-QJyBSIH9!9bhEuWIZNLHie33-epcP}F;*w08R4=|Fd z?Tn_ z1G{I2h}~gjqxt;5aKxJsJe^(n&V24Jv30-ObqF4kOSK^u$Dwq-8=EdP6`#dkN&4z7 z7{4{29)|<6>*mkIzGXZ}m+JOaGG6qGN_uxENFV3`>GX*7WW4&L^bn0*8RO;W1?drP zkUmjMfdsD@DWuQQKP+IplC@|YtD0+OjkpGP}hYJXo!?ht-of5OeiB&d*+*CWh4;Ek_sHEolwI3t;tb4y`Fm zL#r<$Sbep9FR}*1YwU3!t*)ni*uxdAzSGuq4Y{B|tM6rzQ~5e|KT?#TJwKe3Q8a4x zW9{HBq>^_2%V3p~!ZHU~{W@y>kFdrr+&NiOTI;nJZ+_J>^eW!`#W?$y8`VxYGr4JO z9nO4HH#;6@zKfXnm|feXiyuOYg0&aYNNx1JPHxyWe_VKaH_a$w3biQq8Z#?9kr6dy5Tw7a{ zhVx9Yt|Wq&1?#Sd<8836EllXduKU>m)=Q0=KS@!wxqK`+7e)~>Sl^Xmk=UbNX-198P%xy~%Lk>-8$IH~F&ZEeAWX(5VSFG()M=I%swjYW4ar;vW^!HjxjoJ;? zpE6f%!~ywcuakJBA5CiDp<1;5rpd2?9DQxB`UDSsVksI&jz&vhy;je2!~r=RG%Qu? zW_^!cuoJ9rGF*krK%mSfy8lkfLFF0@8-|7* zF|Zq01RG*iVRP6G^G(-c@v!WM$Uq}u|Tlrqt&aA^)oH|eET!>LFx4}>#H~mZ= zmyyks>ZJooOPFesKV)_{D1XI#A4C18B!32XWHJX*GzBuVIMb3@O7Rso^GgJoZ^i6J z`_>*yv3rKnzLwqSB^MusjWpl6(Z2|ff2-y7BQAkSuJC*G_*m%g`v76LBRyD1MjVH~gD0Fs?Gg}*P z@nGXK57_vlGi<`mqwV>V%UgTJ@h2CkY$MrCPMl|47;N5RB6sP|x&@nLHA@?a`5c$r z-9#h$-Q;lhb}ipRrZWQAbj7;UjH9DX z*XNVZ=$+h~>?Um%IhmxLe1X{BB6d?Q&0Y*_ z9^?j_M{@8`o{&au&UvVIi8J%_ZA-Zu^K&&P%*|?j&F{?5xm3vP=J<$$j`?U#l!% zz?O{gl2&$0Ru-u>Yx^TomuoZ+CC%OBXcla#(YH<~h22E3<*qWd1=XQmJdpZlSrztzHt?I)H|2t|)dN+Dfh2)&v*WI+wuKG_7yEC^lZ>Tqug|kG8JcB#QOZ;OU~+?izWG zD3<2_ufx`jDwhn{dM*6WMt18Rl^rGfW&F%-Y}V}N(O3C#nT=81`Qv@Z&rgM{6%&Fs zuvvvh!xfYn&x5REJ5c5VlvS07GFPLlOIav$2g<5eR?md2Tb!4jWtKwL-B|BZ$a-eW zmZ44As@myn)^`N5{;XXbjyAi{r9>MsT~@SF=my(F3~Y0k!nU5&=|>xb={`Uk!;FUC z$%%LRX--%sN81J?*fuQOsg>OpXU!{NwtS7YE#Q`aTdD(W%cZm>_0wIMEl(PvYnd%= z+-BHT$)T9pF5J1x_M%Bj$nMXb#O%@Z6tLMxLhgSF*+wCo{UUOw8nWLQ_I41h@5Y^w z>~|dH_Rbvt?Sna9+tpIoevadv`^o{f-{39{=QgzWo7(%okirf*N4sNO^N42jc^mh< z9ci(Vmte;_Zp4oLoM*k`QA1oM6C2D$=R1Dl#_vQPu+xQm-cB!W+|ChtOoPvOPXF$t zH;HzZYS&!EpFHLgl{-&SJPi-aF)RRbJX20B%FZhYc77LPw}{;(P&!FqSI4L|Q`uep z&HcW?pM>d(u41p-I=e;4-kS%z{9-0pVAtqzOA6Ut>ad$V*j*Fp*1|49p}MQ=u0`!P zjQvcPW|9hTTjC_lOVypA?+?AUgI(*5h6Fr``a!#4*N(bKdo10Db{(i$Hxu()LpGm) zT}23XouX+yIokESt?Mju;*CD;EgpUZ?fStLcD<|FpdiO9?O`{+HkD?$=hY|1VqS*n zs+{SNV%vU{IHuD5$Lg(r?`x(SG5uoQOhV_p6it)*69w4PpSj)C1))2R)ZIX8a( z26i`1n%%-~LN45LL#7!Mu}ik9YZklv-=;oOuuEoWx3%o_T>M9!hiaVEk zMCPiAXlz4cxF;H0$<27rU>YEzvA5}675$=$tWTR;%X$mq` zKP64Urt7h!Nv$v2LYiXu>>gwLCt3Rl+&)6sK7xY4dz#v*eZ!BaV9ys*JJBcqY}*!v zy<`dx37g~8Zhp$!&5ulHSB()vl`{4hp&TE?=Fs;#b4J=jPKX$C=xd8P(;OjZjtAtV z@*szL2VRh~Me)J_IlB~JMnTRw`IqI8(VR+A3w-mzhl?XWjO!0s(2u=kYK=>;nCwrx9uiu@Ww-k~Cv(+EY> z-`IPFfxTKO?5*Q8LQy8C5sJ3a_WQJ*hIV^@Gt?EKBD102AE>Co=wy$ITJ;aspkf*{ z?PClZuA|~y+lEW1xWEbaxf^VAP;r&M)`E(!8Z!@J7X{kaGvuBR?CYz)K9Q8&*0+92 zE?;d7KgF!C;?{Ma-zLTzYfud1E#$+#{gmk;7ua_qi}9wUH#CxajJH|{`+!T7dME8* zypuIvv5a>reHwiSXkRN$%roA(xr}!emu~g0))aijc+(r-`<_Z+AH{t4{glUezoRG$ z%4%J0H>+;AE)>a^9{Y+vfs+- ztNnL5eYO9YDc273bM^Pmk&1Rbb|9Lz!+~X-c0BNkqdX|k->F7%nLIeyUu)-!;&QbP zvrt@t>H1U@NAvI3;9vkpb1k%BQxtCwlJ~={ln>utWCf92_t*r0(rf( zXM!2o8~Vl-BYS7MwTkJ4^)2O0C$ausSEiFp-?D`1YL6?McX2m4G7{xAnlBtDwjA>XACmvB zVZsTLpQ-I5BX!-v#xH=pR?D$iVrkWVAtE*OhV67V@42Zjjq-x^$2wuHu z=oBt^^)`xT8uL9k1C!s&bj_6{r^bxuvH20YFFKNoRoWg+2R)x0H<|7jIj0 zBgqxIJ@RMO&9{=KZIA6j$%$4M$Y0VjJQdl$G+0}(%*S+vg8RRq{FQXzS_)^e`4?Ov zU$2uyqGzx4cYZ|A+T@Zj$Zx10AAp{5s5In1q$|kg|ETH^0|m17_anMFK!G103P#t> ze~Wtr*+YT4W}OB1kmQi!pwq zL6^zdYpCGUmVi8T@rf;<3|-{RJrpdm9Q8sMQTt~j_vpfKE=gI~o}?^nafPB$`uj_r z*NGjVC`t@Pvz(wPO$tRhhQ?o=*N#L*N4%iuxbCs1^BS21imEsSv*@O}Dg%nl;bq&| zqOY_)7dfx;)2H8eUgcAp_SAV*cUA3IY|-!cJ$`atCFC%KV*Xtp);Z%%%!K7^@nq9A z3)2|HnX2MsRlv7Uyjs<#g)PpyGw@5M(Z@`0ST(%0R19YtyITtuFpc!e+*77etoP4m z8W|@j-p8Sn4cSsCKBBP;W*U+?e6k^l!zUZ!IefAqMuY5_2DKLyUvPlpn{H5C$03#t zVtXil;tIv|mD=JTHN8hN^|^c~5ecD0x{0Yr*5NTseGEN!Y)RLap+|7cn-DY*O2#&i z037G7vh{?L2%6u+kxbnTUmU5fS-Sv71{x-RgHf`azFA)~v0eL2u^yl?VOx{(KB>$! z)O99D8gynEIbyaRSV&4``g2lJoNB1)Pf9cH$y9g)b%oDCNfLMQvSbl=x=S*eLJIif zlIvy|_~XPJ-dIw?X^oQ4+UM4wwEG-Nz6nv%dA_BW>#&7>f`q^%age~(xnr2Bcw&8GxlTuDF@Jr{-|_*4m!38l^)oFj*3ueqdk=Vcn}>K zg31IkC=1$;PHsYF6Stw0U!$`0EL0{!WhJ?&tUoF{UW6+DFOIG|Ac~`l&m0`=f!uJw zQ3Mq<7HkoXiD;sGAj)9_O(G=5lB!8Qg;*kLB8UnE6|ezfp{ZcU3aD}B3fJgGK`a!F zVrXQ7;Hsc3%-%0-(|fj7$iU@OX-f^sLq zwX;y}f^F!?5S06e1I^uQ2 zO3UW+Qr=$!Xx`5LH1D#FnD1^w^Lq~v^S{(Dc!`%`sM#r}`31V0Z}75aN1A^^ zg9hT|fm1jAPt3nyPxGIIfO3%kpEE6R&`5jXWo?eMz3OXE5QFBd1C+CU4woG65f1wRJi-L&90uKz7#TF?bofF)b7 z#$GJgZbJ)lCMpZXf+JdK8D8$C{h#5;!;+yG(?{JSsUyzge=E&Wiz-Q9T0c~tNiP~TYq5;rB|Ej3Ve z7!|yRs&Qz)Gt?Ao|M?Q9B7lT9!@0I(oa$^`SwT)cMf=^sGQeXptNwR_9P~1t4HgCY{}TT4vZMQ{lJ2iF zXbua>xdv^MkeptAcA}7+#7E!$8+`Qbf6K2r;LYuo1MB%jdSH7+QZ6||`J!^51kQ>F zEZlEY+->zCK@{)c2-C5<_~QitFcJ@n=In6J%n;t69Bb2h=aTA$lTUw7 z3rz_K&ksF&h!!?-yjfw}6?ZN6k%eenY2ky4Z*OBo%)~!i#lr6L`XZ8F3c(R_W{l|s zg^NJbCOPL$csLWiU6=;1|t1cUiyW=3Zs8esLUqaAdtbJ(8xRM|KPJ$T0;yQsyomDd)b%BXy?Z zHpDzXWNZ^Xa>KlLC)w3h{Y4S}q|3bbG_m+rB*tUGk6Wcj?gjl5MvuJcO^^K7OFU}l zMUM{eNso^Ap+_er2qI1tL|KXeJUoI3kvY+?2R%C7i5~q@PLKXf=+Qr&>Cvq&^ymRM zdbCVQk5;*hN84+@%0rz|#=j4t0l0GQY3ZnF?z}kqBGfBeJhqgVk8R-PFMIj}PZ&=1tmapNJ#UxH;&VYZUGx>6li)5t`;tWG4&3j2z#1d6+L+lH*2?!_zrl2)H15LzI*r*Feh^Aa#$&`j0<@ z`@L@Q+%K;OpFvi@V4{!G@k(rKBg*bT{@PRkII0xm&+V zgH)!Ad81P}5s7!*=_#p_p7PMC;V4UcdkGTLH4+&T;~nX#eh_s+;v(&)2}oR^zrGrY zQM$J80F$kah8nfjp&infZRx2G_3Ahzdg;&fMxsL3bQ_7NT7(Q%^cSi0HwK?2d*TwY zS5{cg2=UZ7?rlFcx!k;g6o*%=t0gBP-EuBDDNOyKgPw|-rRoq*{j!-P zuO(;U>s*FVtQO@X6wSS}6$%IdgdW`39Y zS|55Q!j7K#+MS+R!Zq6ZTw8i3-i4ll+5gOr9`p=gsLq^`(KDA4gt|Dc&(<|_o$-vR zHo!=ZE-)3jkn8}C?2;~oSwKj%LV*$i3UsDZpl6=D)3e>rU)CmwB)R?u~R!S zR@m64{dRz`v02OZ2pem)F2{t8Oe>1Q#v*O>abaVw_I9GMF-tqqOW2sMm1GDTleOB# z!p69UF=@go(?rQWdN!bX%no5y3Gdgc6m(W}7tc@rZovw9?u*!>Ly;s3QKD)*AO}nrP6waT7*e)A- zHj{^6W1D#>Hnx`Iy<-{2d&ib?H%4qWI$OZ;-myhmzD*8bhzP}SKG0A92y_A+*2gy5jwB$L2u%#+R4_ayi`HoVN zxzJK4TUzR&L3vWK(~Xw;X|Pc$c5BcZsY)=G-jj+7!&fa*5i&|kha^Z^M~VnQ7%RQgA{eIfkdnxL;=)s@U`eIR?IxtK?X^UPvc7HM(2D0}la}t{Pq$QKT=gfqUdyenvZ)a>`qQ## zK6jU`n-&)?mTjqCbc|&BHbQo+hq`LnNUThr)GvpYf%mCFEGy8UFEC&>$}YemEu*j) zh-DSvZNRoM;CaBdi|lFH4W6}T8)cgR6x&9?ki)iWB`v!r(6Sfq^t^;?$hO>ri)|51 zqF&s+eO`{}`JM*$7Jo4lonJGlx`UoyXD$$t?PIv)FG$lNraW{$tGw4~tT>O(?=zjc zjvd00cz%E7Dgzd}zV;X*p1-K~e@jg9kn}_@d+~_ZWnZ4e*BBV`>F474=M^c}v9MBo zLZBCLMPeg4dv}t@DDlDso~(zzMi(YqHlIM!B;#N^VvIHCnu&3~Wk)sHy~v7TU#N2l zC%K(m&CM;TiG=Lv^g8uV*e8iQ&E|EQ3g?n@ncK;MZbUCk*+GuQ1xq~Xh1tf7Uy+shend=Z>ci>uZy)uL2jbt;Cmvc$|4t+H??YPqJjbu&-Ra+FHMU0`zh2->|2Fod ze>XVLzi%1;u5=B>EDmbtJ)mD@^q({dQ66~aLLm+>Y?LTkKx5Se#Pff$FE>6 zz0_BrmmolQX)L^6$FKNY>iAV098c2TeD4@l1QIiy#7lE*=%s~T^b%yTUIMPQOWQr@ zr5t7^3w z$dAdvXajnUZPe!6IT+8c{rnv{BN|W&o(b0H@8npnal(6YbkI(+w*zVR!t^0(3MNVqQOybSfg>a*jr+`X`%N}Cu&19DNtJAcSM@`>`&Ka3tGekQlA=y43ac3qS)%Sq(5LlO z_n%(!j>O0DzX=Mc5%q%*)z9Q+oxVUK>d#84-l+e}8BYZTSKnYdT1&3@H;lcDqgCY< zF*rJ_;j=UxeZuJZ6L$CKbE|^s`Zr?dTs>+)?nWMpWL)K;`d0qZ^-uUGS+1;}im}Jn zsC-R1qy{?AH}%_xJp}!*RoKl2m9NvHU06B)x?(Y&9c@U|kfY7IfgR+i8I>o=Y5A5A z!9vS-sZSoJ<(7%Z*U|FMD+5!UI?}S7+O(+r`A(-+Z)9-W;ndoJ3_hEjT3e7|Xqr=N zJu-|)b!xRB!`SUktzRR<_$^MYqmW_xUrwzy$Pl^NsdWf4%-QG!1K99=k`o{!4U4xr zwYnn1vSg>$S;!EV>C`#}88)tWY7N}w)ar-~o6?=yK`br9sXYZ5@)Die$05TB@Vq#+ z$EP^8L+d38PVElJaG}-aApW%5_(y^L6SXmJxc!rb))X*R&D=Wto2Nd9k>fJ{B^xw| z8mhPwV|WxX>j5>6(N1?IXPT<}>+qrK zCrLH!AgOFSNiE$*Qj52e)S`5f3N$XMS*av7eG5rV-b_;CH<8rX6p{*4OX{LzlDZ&? zq(&u@)QF8FRlR|vs@9X#zyy*yLf?C*G%PD@iBU98{Yx5_zFiuYoF)xhhm7APNW%i0 zsqrU5jj^CDOT%Ok>X(LL(2k{Q&_G(GYS5YkrRq)q#Y@#~;N6v~Vcz;)sy0E0U#c#J zWNN9p7-IiYb*=_4k*c#ac!X4)tdR_ns^gT@m|#PVK#*lT#^D7<8U{Ic#tJDlHh~q3 z@DyZxAW&mhtMnjtk8r2PH+-Tp5ie@;<_4!};1&N3IC|0Lq0eyiU8o)9PEA2dYWm!P zn&w-Pj9oBw;U3$N>F3%nMqtuqM@`GDKt{D2HN|mo<5ysBnKlX3wD-z@3q(IgjSo>1 z*tjN5i+eBpiGS#YGHR-t{KW>*^p7=Dq12ML8@m+q;o!`yL!egIVNcF>>+vBR6UWhw zrZ*66p=Nsy#5B7CC{r}|srsda9P0uW4$(ZMy!t4)5M^M999XEmokh0!bAm+kCmLYp z+vcTl{)B9kX%sKWHpG<`M;l}g7N{AfZF5A!1Yg_(_|44o?WuVw*KQm|D6O?@)d)Qb z>qBk}J#s4&-U&U5D_0y6dKj(gpkDeWH96<2vtNqGe2vWMy3$Yam`I3`CM_YKJuEvW%GKw;4BAcd5-xHrbUzmV)*Xx=ki zSlY$Ysm#YG4p>ReEX1ybnk%8FSD?oo(9eG@PDogq20ay)s$fnQmI83m+$~Ux)SX(u zcz1)0U%pIOdR2cr6`hN({?Qwqi>+Mgj?Tr@&R&4dEovBdADvsk(>l*Za#eM&SN$Xt z*>g9~{_#>|ISL7Bvyi26kNx8*$nx$$wBU*=`tOex98txzf@r}8RV;^uwAH9$4`ibK zf-3ZoiS`|;cmNq}U!h86UbGO5DnHAO7RI5^bk6f_!W1Id1;48ko=6)!j%IikweTQJ@Dvz4f)e=s#UNJ%a=9Rv z1#;;imke_JfyIJc49G2%(ke523DN)# zuaO3XMtr`JR$ZPb-At<)OFtu>Zx z#W9^z|7xSvK39D<;`k=;?a=DsHIW)}(LdzsVX->=np*(6Q)@%3=UBGAKzE8Qscq;^ zksGaETsho=?!;amu0eNV%IlrbokgMZex}uNSG|U!e{xO7iqJoqe4?p_ze8zt!DPc~ zT79C`_8__gKcV^p_X<~6P4R+rR}Cf`^iR6Jr49X)r0Y27xHcK)KgYFk?qeu~kJViC$JbFJAcy((0 zc;tSlpEIo)k}eDl0alor2^qrB_-tWluEwEV7+M6gsxTDhXfI)C z1drzrP3H0Zp=k-i&{A7kqn6T|S#bYv5{02}P|Z*L{=>kHKSCHLgZuxIA`BaXYL@f+ z4-4dN4O2lcR`XsAi-KNkNEe31@Lmjy<-Hgd&+jxW8Tyh6eaXrYhQWM4PZ-AdU5A;n zg<;LSU&C5C8mI=?E+yE3hDdJEn&ZKe9I=L#+qGbaX1KmmzuOKw6hSM^`etwJkZcfQ zkrcc?RY(d*pVPGFHuR)RN^7wjt#u4S9kkZNp4RqL(%KL0Xsrrhwb3}HBDpdeFLI%^ z(`;yMl)kqy+NP`W@Rj zmQPKAY|{FFrfm9~HaKukL&I?1a>H2O^9FFUH=ImB7H`y0o`@`8qlTIcWEqDVn$nSF z3Tn8Pf-LaOG_*R=hI{aykQr9$8f5MQFBjqjX^F_(rUAX!-0VUdp1~@P%nVj^nD${6 zN9J73=2T=(;-p~>|M6slMt6>cXdE8WvXeIQ%qFq%bB)I!QsGtk`*|#_;R|u&{MztX z{INO+BC?IY+R?_9O4_*IhBoe&(#FCo{->}^UGY~VIkK{@ubTV~VeiJ%a@t7UXk$gy zlC$J^RK>=3=e*>34*0;PypT$Y(gon%ia)IF?7`T@syLg}@&)y%sokzG5n!i=@Il#D!sCyAH9;^mtM*LK)iBXfBFPz9Hjg14{|zE zf8CE9P1j0`$?4YmiC^L`oAp;d#&b(}2>QzT5a}cFiUDjI98hW;b`l2^d(kV+hM0pm zAPZ6~aX`9m@D3c1T)xzd1LCH6enqd`h7TOOL>YTezzX>2ue|$w+zHwwt6I{EXBSV_ zdC(^170CUX-CSPfhkuK#iTW1LeWe*Qm+TGTfZZlPeM2-k4O=EO4J)@~$#=!;?f;c$ z6$dL`icNtwv?)a2yk4FK@ZY8g<7kt7m&%?tEzsSXBH!huzY-$PLY7_L@?BY`V@u_k z3!#;z!S;Ey=@0$6(ef;r-nLzyIl`Q~MZOEzm)hmKWaYJY0c*&#HB%P3hX9@8y}TTGvJL zT><449`ejI;E$5;^3(e^$umpJEyDl?UGAorXPI=0XLT7b z{w&W*uSoerp0!ecZLU14$X;wJf*)C`zaA;iiZJKd$ul#{t7plx0xQ;=zW+Y?krS z+N`WvR)M}P=HtD2+>Gz`i_P<_w%p2^Z)W1@Mb$t4h^Oap9a7POnwM~^pn0iPcrAxE z9e_8`oUd=J5)N1m(&ppVM)mwlYyOv~)kz}EdmZp^X{}OwAqtX9uFm90+N*QbbDoG- z|B}(GJ9*N_)jTP^daQ9|JX#4_*HvmuuU0gUe2-Rwv*qe#cY3wmjb6P!b;A$z>MMix zAX?*BZjqri{+EZnMSsF@yny~RPKzB#TWq<@r^Q)GTcSpFVoQ$3wj8ZaPC=`;p%z^RTAhztj2xQVQjv&O*Q1uJ>1cHa zYPn(F(~j0iP)mE|8V&m6m3h}Q^d~m&-;4f~nNEI%{#2OrccMQ%y=luo5ue|u*C6Zf znqVVdb8D5@<6l+=OY-S8-)pY>u_W0#r$kuil=$+MD@j4o7o+~5*9LPE#cLl|#WdmR zYU?ahR4YCt`3vgB`y_wil>z&SK0ea#7kVvd_K<$!wa?}B+I;ROxdu_oYd|Y?EmJ>D zL?aW>wPOOkc3w%ZT@H~hr`NGNz3wWd*FWTD%=K|D^!j8^di|TaX^m)Npgq05sAjGN zjqEo5{RE9PRsA*;jVy&AI2yrSIO6r?{Jmd~=XAH%_gm6tpb>$lQ!!}72nZe0>sA!t zb!|47bujF+5r_siK@Giergq2!5+>>(30w^m z?ak4;6#+!;r@M8EsQqo}O_h?~oC3=@QLDgsBkG7mqK<-Pfv6Xx5cQ&TqK@&VH)n(6 zov4$Y>CFXB^ybnGqAt?4O(E)HEl|;L!>)#?nXYLpQG*Nn<^~ONCtv_dR7BmSEqY7B z1S7sg!U7sT8%n}Lb))8zu!x#3O(ZN5z8ex258n+5OLnI>Qw4f68+vpQz8ez8Kp!Mw zMgZ85ur7E)B<$5>y&t`KN_Xo!Qs#CMN+zgLm-aSD3#eckk4q$I|iJCl@kdx))a{S{YI`UbW39Zt%U>}l&zW7%s`qCu?_9cgQr@%%_qDtge?Ih<;;b)mjt8!2hlAL&cV z0;`sMOUh#1Y3p*WX%{KWv1FVgrBT74pR}&mV1H5~lhf8M<(2iMwAQ?{B~ zI~)qxL2c(OiJNhF5_i3|UA*dL#Gfd%TW8_9#?1)NLT#p0EOF)mv@$@emOx;T31$8~ zh*lC0$6j2=EOEA@ZPoDI2qn2tmkCWbTaOmNk#B-fhOI}D>K>cLwhn9A1ZTpnB?cwY zS}KK7p|unUr4iPW3`$6UT@XsUl(g-6dCk8l^sNv|PXHCBP+Fvw?G;LM zxP!Gcizf({rlks{5s+iA&Js$YwNe4n?gzoKr+J^PP|^*p-qirs#^*3<#}h*jiS1I; zOaXhJu>Lh_@pjoKCZrl;8J1O#rdcW%|MH*n7ph_-u}e{IJ-R|ezBwB0{QYD3#c zRsVDy_pGU%iLf`QlI@=v|9FDEy3MKm9j}*s-cArmrCDs{Er=kU`30$3||lb!_E^R)>X?Ep=SsPLqyD++A|Z z9()kwLOcYk$%PT+t~1HSsJdyh$;A>Y1&Yi{fzoZIK(6AI^b5MRZZl3=if(QF3n#^H!bu6}*3NA>DR~!8 zO5cr>&ZAqqHsB;Jx^-X&PO_j|hqmLS;sl&jk8T}H$4M>dmL(AC`xM)sHJ2|8#<0Bi`v*`R5X{ z7lD=KIJ<}w<=q*>6_Gn(+`W0GP2cb%$!{wUZztJp`Y+N+uCEKdqe+mC#!mE3Q-*Z3 zLL+IFj`nk-ciyAf%2V;ktbZ_Dc}XhyAh-!U|!N#A^yWGAa_J`wLNhDU+D zyPEpJr&Uy5=|wIlRe$?4$@j1N5b_IsO`bnuXJc)|VRCprWR0PkZiA^2)pS|!WM%ok z_g4lg%Su6IX)vduGR`xC%D}`9LuIktNGk)=I}nvE;-*^J0&c*SMS+tQm4T^$0+p$q z>Aiaz`7Trz2#!`%2BF^(s0;uDLr|F)w5))ZWeJdLZ`o8x^5ZS(EhIBk*FnhsHh_4f zO-)eKd(YBHHR6WrzgBZ4T{9$}V|4@OMA>VN&fP`XI(&m?uwAnU3s}C-{}u z?2)f4s`{TGkBbhy2v`4M-aAsh4j48c?6Id04m!{WrRB8|^7t`uq%GGSman_3KKwm> zP_4hZRlcry;?05dLCd77)#8KOLGTnGfb+MQK6o>!>M4C_Z!bP{op|dweK<%v-B})I zEU)Y%k8jf%hsxszX`Gyej>v4GBb={d9X9CUNWPG@zeW$oIMRpX72?A%2m0_!FZ%FX z9wzMY^!O(}l3iRPhclvNWpQhWbz>@Yj!>YAHJ2tohrNmubZy=xO){A$9 zw#QIv1`Y~XZQE9E5!#Bk3vGGmVH#9tLUj^UFXE&(Z43Bf+ZGAO5tC027awl79{a}b2 zXw9_NB||CBT8f2IG|*Tq0uGBPD8oYC3JY~BaD%Do!&7|5f7t9!A7Lxaffru~9ytNI z0UjC%LTgRQ1sEOPV@V8mFG{+B?iT+x4n6v;#j`h#d&T)H9tG=1p2y?w zTFYM}@zGMlh$ndbg2`Gp`e?1z1F+B(YB%sDOjxcVVavFg{&-xr?c_ z+ktk$t878=3BZii+RSIH)>=ZlPTJD03j*ybccEQ1&a|r)0!FA+C8b@DWwh(HllX+g zBvCuhx#vy6KLn4bsEU(B11Yb4?X>e^WqKc;%q?!fjR8yr@@lD;?voctDDG?K)@f6!V{xSqZksJDY@(+0f=0M#3Rh%0alxfWdkp8BD=l}ZV@=G@};jI=BQKfd_l z6Fx7Iap*;G#k$d${Dxjk<`I(@(}E;9;)|&2ueC&zT#&h^ZS=N1w_S# z@%UFdoQ*xb(Tn)%pReL+cP;oWCRw~2@*?YOc^JLeVAG0ylg zl)gv_pHnBk$Ux$Ye6zO

ESJ$ML)xZEl|6g8|?w_+;wZXA3?G-PRR?4?OW2!3X$A zvjv|>UCUI#$4`r^1fP|lX9+%{);3Y_2?WBJB=E`!K5G5t_fDN_1!wjF)>s~df} zhR~Nu9LM%)QHA|^JQl)PFSje`%L4*^Spr09cucJo+HBMxwSLi^fVvo{TLg6rpe_pP!r@pA$0|4ugyRt~ z{+#0c;h!JWctMTzS9HT}6j}7OoZs@p>GSOC_2Qq4W>im&~Sg#i3@1BUu7&p!&dsxw@>Wo+qb$lZ!~OyR^f(*MFsT; zrtbuI`pzu~2xZ^(u1NkL8m8dZ+PlH}P&YKRR7u}`Y%q19p~=BOE%h#li0>kT@ml)s zM?;UDlEtBp^xaN_;g)1^cjIW2WQmu(_%5$}jAYYe8PlEvx9vo4Y)#ns=PmMnQ(`MZl`ajqpZR}o?vn`;)2+Dj&;-)BziuG)x7 zbGdQ$egdy~KRI~%ar%D$wAHu8_Xchvy}xa(e+4VG{Tmsq+V*dJs~3G||3+c4xnTc3 z)LQnkmLm~EU)*qNvVRi?ybkv7FkGoZ|Ffh4cS=8u43^DzNIt5F+k;2TYCmrwg(DL1 zI#&$H1cvHU@H)Q?ye=RSuT!PtbrDVsJ&-XBTnbp}&al57!vM}&pN(;Z1H(VqG92r| z@Mgkrt~bMH+!$tF3^!`NIfd8fAck*ScFn{%uspdQ;f2AtiQz6_epX2cM|9cvnY@ia_1_HJ1wI|;fPkG zl1!fT?I0$ZJz1Zrl6-g7XAGYHSji+yELqK@K+qvAb`A(0`d%ejU6DA998^^;hA2pD z<%$_tY6?B~mr9a&WzbM^1=Pnb?COXmhpR>%!?VVO9o)?%#p=^OD#@kK$0ad|rFO_{ z96ulY;5bfZNjr$+MpPv9$D86Sl49_tWaE(ecvHGNlXMD9(oF#WXvvV{P0X?_32&;k zZ0U_Rb(-@B;Z2X>RWchzMS_drBYJ(fbzQr+M$!0K{_?o#IlD|^QY_c4f z&AFMOQOriKWHz;OW-GO0w(uHk2RB1X=-8smLr>zdxdO8lwf`)`W0KvNZ8*&7%ytf8 zwhPOBPU8;)T$t@L*q&Nt8wcqS%ytu`L#S*I!Cz);gc_CYKaHbr5S^EKbSKe9)yx@5 zv|hF0lZZB}W^@P9Hd)UqtmRtkS%ps10`Qd`K6Bn_{i|P>fZ(~TCg`L~1 z`-BWK=UgD!`PjZc`1tUbZB=2xI67`3+QjUK=*FHVrT~K`l@tV2f4_sAU`l2`x^8NJ zVlhq|ID*-$4D2G=t_t>FZZzI7H zUtRk=a zbC#Uh@3&|6X8_s2>{$@%VD^=6%)S`{y7sTTT$ugs^7+N~@4d{~k@oNXjK62uzxOx( z{hR&!06;j{zv8Uf_OD_9*P#Y7Z zoT0p0)OrlU!_~E4fcw+y%HYpP_8@~HnH=*g-}{Ulkin=&sMT~Vk{s+Tujx;AKQ2Fa zpEMNfzpWy>Pq;Bb;m(9!&P*6*$Ar(7Oju~kgg+gakYvMztzg(Vu2T3gVRvwJ1QU+Q zm|zCO$1yh0l?ip=fpT2gC1=7lPbS<0Q^;{;AYsBY7nSgzUMg{1lMWC%$2G~O?-n_( zi3j2c$2BW699Pf6G9krrwIi1KCpxbB7t5w;kf-AsuXM*%7A*TN+i`UYmMu+lTn(rG zNN`-0k7baW@~7i!EtchGIj(BQvi<5q8YU|;=dO2Lm3(=~eaBTREjx6$FCY;v;=ZCa zdb_}?FZpoFuEhX~X0ZMe6|ojLpzRv>2%vDH82 z;@+^iP7>~&6t?FDbJ$>&-W(B;ta3OUdf{j0aDLkIN|l2}V_$+@0K^D8lR9FDs|^!A z!c*^pogkeT5B7j`o(Zf4>AYlU`i8(9?m-t`fVCi2r1KVO*Mv)Fr(3rCBAuNC z7*FOX;dsy4CqPYMjw1~kgLK|};ER*aj@R(3V5xG<)%L6-=e(*H9w(KpTALpDlLYK| zRDWe7Io^a+j>W(P&KxgRuFS+fi*%DEIL;X_Da=DX&Rgbq z5Mz?nM_UNTh7gHN)uTg$c4B2<<(g_@YO0z2gzQe{iE5T;ZEONLn0wuMDmj=o*$~Eh zJPL`_FsEK&`3}tKYbA5~-}I$3nNzI5oKnC*VNMy~I8r&~>)d4Wj7%LGCeH}QPUrPM z7Rxhu;7Fb^4m&Y$xyds|Vkd(mbE<^kkvyX>c4~$eZ+bDOf5Go2&j?Q!UKS?`FN-pS zm$@mz%ghAfCHLG4FXO=%BE00DTj6C4_(6o1+;c0uT;R-{tTYHOBLFBSyi|kFR(Ki6 zZHbrQv(*YO{lV!Vyz~RFuJ96U&Jn^(83$v%MEnzxOWl~<-;>ElgX+TMDn1*_=P8-I z9F!F%uj9{He%Ey9pwN-bNgX=krhYPtkE$xAR zE(H)1BhT%aGv*(yvlIAxnX?z4*PTC^8LD8;Vf-5BFAdsb!k3~!`wux>8*DS0IiCmg z0&_N8@qb0~BY~U_1$DvxJSeEScKRh0R9h1{5(ROvlghasu79dz&hI$@DkVq1*@whO zcrk^eJyY~jGKIf0Q%pAJ>?Lu26^U&m&R@Y4v)q|tp1>5p=sFA}UQxL+g~S6~_8X@7 zy}T-r#ETF=VG2mVRO}3L9KaNX+V78HCDY0eVz+Ox;xylWeTSOgUQq%=#&H&RFFDRq zDVd9#z+8OYnG1I>InLtlCC6C^jB>}BK*XBuII{@I%20~q%ub*+%Y^^i;Q!Dx$C)+o zKWsShyW`9S+G%$jXNn+q0_u{tIL=%M<#^aa$8qKuFeM#l#%gSbIL;KbYi2vnjM3Ok zcbw@7Errimn9f|rRQ;-v&T599h&Ie+s!6^W#g_tSFms8liav?fMa&+wkGXu`FlG%} zA3JqZfXZbh$i*4Y*P(R_OsCt?x<$rIU!ireA=CU+E}OXfV7=9SupTA^mrVF|1<>{h z?mk#=bswxZCZKpn>~g`0xs-!QJ)FKJV=k|pnX4dWu0B1O>i|3EI>Lpyj<;p5p@g~4 z(3S~uo8N6uV-^kS~jU=gFZZeU*X#;&V?d2I-G{S%ni#$nfW=_p>_>(*ENiot48UtQp)%Eu8@?uv-Pie1a#zi7BoTsf*o@r9cH`%ruq+$v`~T6YS&YPXEtkBqr!%{%}p0JlqL!rV8+;xjop4u5jOmV z-4;(mQOxZ(P9VJzSwgl58$c^RuX6j7`yMvLDw*3(KA&ue;`7Od2sh?-fX^lyRD8zR z5NONXPPj0)GH2$dvtw>o-I?3n9?b29lqr#nDIJ`c(k($)?~Rqc(}nd>iNbnyhOmAd zRt`=P)(^(Y;kp~!r0Q7ht#?v&wEl)fs{RfuN0ys^m#QPvrGOBTszG>6hEyGdm7glW z)K8JB{Su^Vg%#`%^OH)d#tuvw>durixuY|*lRHC0c@RJv+6?obG!#$@-$+AEFyBc- z8F!k7mbx(I9A~EdQ6uq@hQ=$Ias|wH($ED?DrFr1SVH+wm4?DlRsJO~PHT`Qn}md+7d_wo09%kX=A;Y9( z9^qI5OyTe^y` zLg@j#U+J*MXoYk?TjtrbFAt`g`9Z-;&^r$5AESkKY=Yqdgz z$poJ72xbs(3*oW+QwWdtWIa`I-4yKVc6Zh*$ADh2i{ds@yVG;ePd5@INhB0B{@@KD#}B)UItS*Ig^=1 zWD63B4DQgHPGrHj*R2d93rr!hfCM7zhkM<1W4-Rfn*oqy~p%qy@TLWAOeF=nFysEQzsO;v))r-{1E|2Achbj23|W6qF`JS0W`oCBB)^w zC4vYm4RHvBRfjl?;9K`P_`_;M9DHGAA`V`#N)ZQV{goCXYn!ONuj)Og59__Ex2ks; zw11;+$~^o*tv%}v&X(S91yvtA?#&7J<59hE&#M1@fWreCM=iwRgRDD713g3p4wqFV ze1^jjAH3tOgLga(-aak@>(kqv^%=|u?|AFr9gljjJ|Cx{@!N185q=<=0E%iDn&8jJ z=>$Lc?$HDp{1i048-5}h-vvJ$jqlU~1?BiQ_@QWgvm5IZ&cO-ewYbk$TA%OG_)?Bi z7+=J(3*&S7P#zEK`S=9fXMP469|PCJ9zEZupz#Y5(D?A0*>z|_R_&KYG$9>0H&~yA zlZHQ3^@%H=SwQNe^i5Yu{d{ZL&srX2yf~6n#+&!okji9T+rOkT-FyHxOU%qC`?~YU zzOG!duQP}2Yt!C-OZGK!P0@J>?z0s*O3AVBz>#PyJxPv5Ijj2Y4w79}^*Icbhpf-J znz>s^Ws`Y-q%4>9QuWbRtW(KyOM|Lasy_9VD}rRX#ZdcNef%8rb?K$@?OhY4k>w`y z^S%SBqJNd;#zU<-sCGQ_{T52U^=7`?;s3MyW&6AeWcw8PvVF2V**=^r+uNNZ+uOBI zwzqSyZ0}X<`_F#aUJLfUS0LNV@@0EZVc&;&vb{whxDES00oi1bjR)D4eVFg--po(g zOXa8bV}470ncvzz%UMZT=>z&@<6x;N_^1IPRPSl?=>`?nA4=iZz3Q}t5y zTLR@QD8p~;_ozU&BPL(AV^N-L$AVnhj;I{jj);A-9qPTZ9b#|xfmbip2Yx`<$v&78 zwCO$jU^X28;Kx3Q^<^KV^QNe^<@K6`>+ADHyiM8FV#Tzer(`pec8bIer#a6FB^EN z4;%QPHyh;HOEpOB!v_5X<<(F=Fm?SRHmI^LbTrAo%g4hfTuDWrFeMw*A+SM@+}R+$ zTMzn#>j~(SI0(LCiP}m4C&-w;y#w8o12~(LQ@47ALI!o zm{p)OLWAstLH=-@1th4sR$kNpll0vIQ61a=Gr+z^Y+wfCa${mMDoDkmSX~?!qpJfY^u)>^ze^P2M-rEOkkx z_gdzQTWh?+v%~%k9T)SgTR0sj$4p1!kP6GC7s5radjW{r9T|dw^FwOt=p#ZwwnY*f)h(@s*i>;V>Gb0IaUOJf9uJ$Go+?rF5+z%rXhLWP47uODW;=B&=h$h^ z6}S;6^c@|7uI?l0Ox#Z)=oLmeCexWWrG$lEQmR5O1GAs_+{}K$xp~NI219Cn>+X9trQxoNRKiE-ll*P+t%zo3R}&*v5z0rk}BmAKhS>l`RZ&bcSJlY+%?R z{vE_7@kJ;$%>{;yP{ObY*isT(;R?gT`GOQ{!4(z}UHKw3OoR8?%D?`ZNqi=lnQDA7m(0v-iD@P?ugG8+ zYCMM>kh*nYRRlL zJ(-1_9QIre!+vyxVYodT-mM!9M_GBer!5R0<_yCp*ud~`-pScgCugVO3na6%l`uTU z6^6(24$ih1uAU`x%@~yk!&jrAB@W-nJ2F@5$Xx8maNPM1$DRLhY3Dyz>d4$w(}P;( zQD)frk$Gept{-L|mBx;vtVf{X5oVyRHazcH9;cJKQi;27}SAI6Q<^ z3OX9~Q=`3bWC&WkKZ`zo7Dm6Druhh?|C+XO9E_23?1C@5LdZ%O zT83^i;mXmW5|nEbM+DO;-(Xf8LNab zQFbt9@nu40r~cXCxNgR#R*_pg$xqVdIaa#dw&`;}bvFurUgSDj1`j*zP?hxvR>$Bsks zk@?8E3f0Gn@3(wB(EPxByc}R`s4a|*;gg_`3aglJe9@ixsJT+rCjlo2=97paEX*g# z*zth*WE)@BFrPeQbqw>-^Oee{m`|KOWqg0|(z%{`Gyzr*=SV5~H=s6pZSjYw>p%Bgnq#5*t z4an;yj&09j0}OQR3tr8DPF~G`E-f4ICms7UR`XHE26{L^pcStMUDB|cfw(RJ8yKZ$ z1834eXI{;~1Xl=jcZ0xwyqbZiq#9Vss~K3$s~KqNB?b<*hQLuy5EyQ*8LUjNrGYaH z$#u$Yaytl&YmR%N%uqKT+M-M^GTob}+(wO0e^+Lv)4*kQmp)Twelk7DS8nwRb}53u zRdsh-l^IEU1aK zA7#d3)15Ds8ENK7xibCX6y0bDtZzJ!smus4Pn)dF@U>J#Dl@#y->+6?@QysI`+koy z0|P-1D>JC+?pgfuqp-!L*3#KzcHMA<8s_qWM~x=38(1LYd|j`CS93Ql_Tt0abQHZ91rQ;Z4s(RiWj|5@njc?#gmy zT3-FSlghL#zEY@8@fEoh!u#Zjs>@Qs@&n1h^QoU(D$B^vY5T$N8r!mXfeyWc;CxXp%f7+sYj zj{7{!$`Qsz-`YJeKkg@mI4e!N>aHH+g1ncVwRtLjz^nT=WO2;)a}gYlcX!}tR( zF#fVDjK42~3Fy2vVT=b%SY;0rcJ_n`*L%Q(I|592ualAPbYhuYEW*L0`&=o%afE<4cFiEh7Ngg&Z$-5^^8tx2}a7mvO)(a+0w}nYd zYl)Mre^fV^w9XwSrQnW0*8isA)m4@fLMLS#9R6V`)^w8A4kqozyOg0lpQZTn$t(p$ z>Sqjd2&257;)Dq(S&H27^;wqu$pI!^GGIV|GLPS5$!&BJF49_-Z0P}$zQ<}&@||=y zSa}2{J#~diulWp{ob3XW{wISVTW1Jz(rHj`4mpdIJsuErN>9#u)1YrL z#Vxt`H4QT6k&72-P%SLj(9eEpR z@C@Xwp}}foDl&h2gw%Bwkg~W4(^UvwaFm#`v8Di|tjZx3YiRIxJ*k*UgY&Vj{o(tM zK=1_}slckbw4}lxA7dL0{wbea!dgG#W5S@4vlnQH13uIm8sg+3hPdXCGc#$(FfBRL zlZL4DS96%G*Fg%Jrn|C=K~qOUlx8q%5C?)Zhb}M?=2Hreol5=?fZCA6;4wAx+o^O%Az; zwcgW{iwYXjp(7Xmpdl~!k_%lX^#M{!klzd07PrLDpjON>=8f>BBc>cjrnd)~TO_6v zA3;O@bAq`B?qqH=XvnWvTzr_hjiVv|)a~pJmxus(HE&gZL5UMaJ%*;1W4xyjfLa4VLgpTY5p%dL8G(rZU z>Yfm~QURfxTp@Iq5<H#t4-iSX;dY>j2(RxA5q2_&h_HbO?Er|V?+=l7{U9>JON>PRNaT;g0j zDCCc}V-St}(OPSWt{(uC?fS#y2s@apwS~#`GMIw*pAykWoPzhCQr}yg@{cV{og{;) zD{WxvIcu2uW&lhZ*dL~?^nq#TykXj#elWeS7ffH|3DZyXh3U`wKuq7>VhkE-V@}vX z%rjev?JI-WMK%z7!Wv?q4S*Sa`@@VyJ}~2iH_Uj}4`$-^Gx7SFc>PSgeimLoYteq1 zjDe&nIQt~i%~ggFMw8{H)Su~Q$GV1hG=*YV4BedS3bRfamu{n*)pD5iof2l@{50#H zF$~YOGnn=?JxQg4t6;E-e#htEIbmNO$o>6%XB_#u}DOy^>nFxywz&-J(?~P0^!j zh^7=tZTf)JOibGjn8^KOZNzE1FA9&|4VW`bWsXCK7$?=jN(#U|D+JTB+X zTeFL}qJH)p$rnm*`EsIAe6!Zdo?21M$R~tE(>Y_Bm;EiAjcUs1C!7tYbH*7Swy=Da z66ORMRO?w@fMG`|%k#x4j^%k7BHCG=Q%{&P#eliqb3fU@99$&lEOUf88e5o?>jra* zy1|@s8O*sVhdDPLV9qTq%S9CIT!duJ^}^r`mg~;lSLQtAmYTdGj<=gH;qB(j5e`Y5 z^C!2Bj-lbia4aEW z=HFsmVeIroh&%2gs+E!RMu6Jg5!61@S2Tm#KQ!P!qI!Y?)S-5wIwpifgL=Lbs8@t3 zoI$-Ym<$!w+d^#1L7m^cOiAut33s0b>ch>-OlsS1>|-JBJ*m39b;fMsUVxWfv;+0G zX3q-Z9!%BdsB#Uc@3t)bh4cu#JwBbnbe(bUPUHQo zAwIwf;)CkG`!Deb3>)?zh@XCYZ4vp|qv7Nf^0QM!|1TjPKd$)Yw{~U|kB)}YwXC?^ zMT}oF`QTfKPmh^zCC2Y|hWP#UHb0S{_4W2yocPU8vTdOS4U zDj*&qH2(bUb-$5&)%BTYh`Vp>jFDpe71QFch`Tq9zumlQHt`@)i~kGp_pQbF=Pe6P zlKTO-H8)8Q1SxbT_bP}O|FgMdK5>u2gn`7p{q~yoq(>YkvxoRU&{>dpyl>cBNv&|# z@Hep{lMmRyJln~|{}Jc;7@vMmJt~YZ#!`<`uBMq695U;F;=Ea=$Ge4N?QOwwV)L+0 z`5kpG<32$$HpTY_%v*{^U2$FuW|4q-|BmwY73a^roji?wM!BM7{!;E0G(Q_1+~`Wn z-Qmya%GUeCJLt;xrpXuS%8r(q=jqBWlx*nA_f1(=LV5Po?IU3R2}uo8@fpm&@X+T^ zIwLXIc8NIuhLmWaSO)WNN%?E?q;vx)+s}U(hW=UeUt-b$asGQe{>=^+*oF>Bg$2&8 zumC@b1%pFmF5-eQTuoaXWgJ`}6pO}zvxH(*eeEcrI7S8wg6(0!oTtK8I?l-h7Oc00 z1*u9{u$$W<7wmU~1?9ZwE}hn};2RfMU=UzIvokDsq<{s#a+BgTxeY9o@py}=l}G3T zRB7E3auazx`oF3j#Dz}$c`od|Q2AP17*QziiCGY)=E!@zqYIbml*2#N1 z9+cw%Sg0qk@XIgc?syt{!NU8-r-kw!sF{8)@1dg$I}79vQBTN9iJMq5Lyv zy6A*X?!1&Px>+bk$LU2+bL38V(a&1Bb1+?OTPSy0N*CMf)-TO`MP+2Hk9l(VmCLWEAvka3+IB;@DFW#?%^G3K?G%M>)>o(5lnb?LT-ci__;uCQ$rK}yH9RYY3Nuax2Z5#`^#<0h?wxPP;T8um)Pj!)(7a4 zfjM%k^K?mYq1T@sH+)^y2wo!ly#F4@0ZPF~O@MLXrVJ6lp>pawaeN0*>hc*$iK zSn{nwnINad1#*g4Htv&Cyz<^&IeAByyg4B!Q4!Wt#3euJ}a(j6#X_MO%OL?2zyUhIEbGdgh+5qI<1tC@&U}=14$6RsgjxaJ5mhOwV z86qyd9@g_mSXvh{buKJ@H!E$msTr32+&Xs{?HyzOW`uAikLzP)o|5ZztdxvYb3(pdUn1Y0+Xt5Y(OX<@I{=ms z?hnhS`M~nc-mv_5KUfZ4u>6%LEdQ%7tmxebR!r6Jb$H$XkGC%(8T&)kcd)8;(vGpJVi%XZOa?vsppMb?JPB@Kx$B#)Sxn{ zL7pf>3eUI;oGdLvtqPS+10@j4%lA^pTDq+ox$SQkSvv9t%N?fCH zf;HR24K5)y@fT^;Vsw1HKvH|@v!EBLR|B#gRE4x zmE(0?cChBp@TTtKT6@d+YGqr3!Ldx)nvD@y;#xOrSUbW%k13n~pld%jtg%tH#fCb^ zife--49{TgjL;q`aqa4`M`K}aN$6vHaqaogW^Y*gZK&2&Tx*`5a1Peqji{R~u6N;MC6Vj%E=fAwO93{p zPU8&g(j8&lPFq-aq#LY5!Ft_Sa#)96pX(5k>tceIU5qVc7rp7a79G3jOxNADgLMzN z@VxG2Df?19!M;?LurK|&nEj;(cQyOcu}$nRT#~oWbQCVpySAHzi|uV8-Gz%SoPO~W zr(gUa(J_7;>1c&mZ*^-oCO40jL^@ja~$X#K5kK5}G5}QiNlzK&q z@j%OGlW;b*v3R!dG{EF_M|c{EUP{7K6?!QNPlIqc$-;fPR(1UV)T+uti8ZVrp_hd# z==#sKvanz2`e1ZFh4tY&S=iTf{j5S+SS?*2hjq>8l6{z73G3J9$U?i*^_z9FP;a_^ zN1-fqBwfGT9oFY-WuZ&y`h$8|Xa-$hKlZC1I9w2NnJ}y~!5=eW@`J3h`Ei27ZS0{Qc+CAa>J7xBDNYkHxcosgg}#cTQiCTwbv~>O@b_z zixWr?;0~DteML7+(GgLsH%uX-m&DX!eDo)-n{|e%L>y$PM7llQFaw#Q3W*qi_w7zM z%+Zpd4lEKw)kPA7VRUvRD6bZwZc_cgC7^fo_PG(0{Oni!B${(fHP;%)vAs_0$_y zw8pGr`*Yz{7>cX5V6n4BPPow)14 z%XhjE=B5MB$hmdmF6Fc~6am^SQVG=ry;vKnaFB|>kOmG0EB$k{l!oS~l9$ZekL` znc#VJ>S!yFN@#y*%feq8w@@SNn)+UA5MRH0xy(57+cJ^$8| zysRM!g@HR4XcGFU-YF|&caGAe(K>b~7Z)w2t;Pk6X{9V+S}6;dR>}gVEye|pX$!PW zE9SG^SMr%wRqNJ^Y27)9juyR|i`Z_9fdl7MwZ_b4>CluugQa7RZxu&nNH0YX82H)o36#pBRQ5Lv=)uyxOwA8jnwbZRr^~Iyq)7(1 zc$U^>9CMAOp*dzBOS|F*NoZb9THpam%axFn;si-L1tgu8K~j|qBt5lEFkRxo0vw=-%0&FVo37dX2|LX@@@?Hs>KDt7( zl>;P?;Ld=qXu(YWT!7?h&X61@gXCrIkevJ(Bx9&oa=sHJmoP{!uQ$#S_Iz@LWPH_s z3VX_UMwMv+_r{p$G_P9MLqdM4<;pMAvyCP<7=!Yt7j0h6sApwZsTU--M24S&EG+$Qz@4KfY0B)No}K1z{-B3x7lpW1WK9Imc^=u7MSsLgnqPiLO@F1idG(J5?RG;dNL&!MC zI*)Xss=!xHRAu=3iK>rqnbqb|!%| zEG1h^X^5RILdyMUWu}<&oL@Cg#qTfKksRK)>07{dI2(qA(1*u!&@5sc(nDs{7Y;}mL zf?B6{(9dyhKro)3bh8VB@xW$xzA@hH6VZL0xOt2cHV1Q99_tTwuz4B>jj(R#i43e; zQO`xKEv~RRp221g!$GZair>5ubz4McZYV1vuT>4Fcazrv4afG8*S?Jf-sH6x8U@K~ z4`oUAQ)r$EXl9 zkjLm6B$CHCIb9)-D;!~S9rsCnT+CfjAM3f>(Bo{hZjr}04J{*&@$vYR#|c~ve~gBi z$02kxT7(`W*4tz3^ACmOv9FFi_Qd{>CtPnwo&dH2!|*o$8~gJc@6Qv-Vem;-1iXPQ zR>5v+*rKdA#FHlsgAB+MbuhUiZt39&TYOB9`j96?w2xjinRNsG8ls~ zw)~0tw(WlTI7B9B@HIln|&t7Z=sojZ~+VghIW!eju4qxF? zFijm;OKu6LZt@4<_*Q~|5+w+z>~QZ=NF9p9OiWerCZ@*nO5c~+LFxit@%zfQeyMiv zF}G?TyARn-S-y53%9N100aHUkYQ~gayT#OsrmSAn9qNr6h3i>1ka{gba|Tku4N}o; zkcu%JseeX&w1T#24`jYmMI_jZHyU1si~9bgCPg|3i(iizo$ z`DftWiNl6^wdIVLkoJ+47urtH?+W; zW(dxZ;cC3(M+-*MjDeOKSgP43swt%xpnzmaaW)wBB; z)^fixhuyD`!B%?|mDzp0Gi-Hngstdrz7;+D7O?vXT&B3Mc7UxT`09UOl0NQ>+%A1T zfJ+wleesiJ_r36QX7@e#9_YTKRJ*?6v<-WZOT?`c>IVGA9>hr0D5JwG_FypGI@xHy zOeik1T&2Q;;(Ajz;Xzs5ARnqR^S$cU*!x3{u@H>JjuN*na)+&JumM}JQF%RK>jA@S zPsM!vQd?l_3FAv^#e9|VMWbTAm#OWUV!m7{Zsi@iW_bO%LY-9W`J+Oeh+P8HgINkS zrexUq#9-X5PzM->j#8*G3GV}i+Qaa#5el{3;Q3Aw_rdULnj)^l{H?uWUbXrAjf#1d zSnq%Ej(^|5$ijIAh_0xZmlt|so0w_O?>;ZB;lx(Oyu{m^t}5pF8(#NT%u^d)Emq8n zG=zF9=Ee4eOgF=TBZ_!W!=Wt2JhUnaig|+!;hBng=sFdlh<7qRTd9c0RWL%KZg&$i z`BwnG;GMrnjP?zZy>Rdz63xyh`qDF%wz(Y z>5b_pg)|1x>J7>oRhW`2bSRj!a}G*--Ik%-Hg z$|CjL#u1r~wcY1pf28Eb9GPIO{+>4L8QJ$XElKQzCI;-OeJtY36f7Vm6BmFueRP zi}W=|Sj2&OFNGEPKj${sy`C1N{?;RoY4;gH8#KlAw9N-4qdfUP_^yJt!2}ijs z7#9|UEI5suse+4p!L}$@*fx`IA%k&gQptjWo0NiaQf!e0cjm}~-_dPLb+X_Og|gsJ zT3LuS-L^q53z6f(BnxrErA`)tUjm-+3j6FkaoeWPVB7ZQRpGLbQiRWxg;XMxrYxk& za&it+qSwhbU0p*v6H0HPI!>uKK6{5*J&X%2OhD0bTcPn;4O5okaAC?$Cvn?3qwil# zSz-R}2PSlwzWIqMD-Gm(CPW!*&oLp!3ASD5?!bbMZo6r&TEc|5U}6v3>S|Rwru<~I zab<$9xw?S~?e#aGFrlTc;WATpneO6^-tT4t#V9Z)TrqaqF#)@w1lwA;8e`jI zYuN78oZv*Flk_Bd9^Kv_BSv8RaPBm_eXN#5tN6}gdsrcfR?zJ;btLK!x_zNNY+vFE z+t<3o_H-p|-{}I|_wYvF9btQ&Eo{d?%k3Cwxg9-uvux#% z)!hNI2Ii1RKbkdMPa==ftPm}UOe-XjlWEpe9f=%Ivk;-=2-?7UK^7_`up#eJM*vwX zP+veM`=aiEO!lm=_Yg|na$nZ0eD2Gd^<(pjZnUp>cf2F*btS^?2xLzSB_AL=-WdB0 z?c;Bz`)MB!?swjYNEA_`_(~K&jqS^a+-~qirv-k5j1bUKYALeg}9;Mkw z_)EeTWtZTUC-L_s?_u^0u1sq1aYa^xhT}=N{tN#~+9SwMM?#D*QU*Tsbi2UpXbTQrW9MArYXaC@5 zCr}h`h1@vF+xbK5bQhs=kc6V<^^Ne7Qra3-t zkTcj7a>m$0POvrPOyjv7C*|=?y)c>PEU<-~rBdR@*jvLQsY6;_!ztnGG(_WooDHpW z+6AbVaz}o=GvzNjH+z8D zF631??6eDc6(;WV=4W?K;+#9@ah{!9IM2=l{0=)WOPq}o=NlR9vc@FNu*>x`*yR&y zizRH)nQWD z)40~UfeY!z7(U9Nm8%JNUFPEps;psGjh;b;mO(LBwStcBsxL%Ah3;z9F<4D^HM_tr z%#Yo|AQl_`gby+Z;1UM-@?i-ce3$_m_jmn-9r@i6c2ir}{h1r=?%xe|kCee~^zhpq z;Q+g1bJ$I5x_h3U-9)>@O&n`Cz3J|iI(E~U?p`C|;H{GWl3UvEgi9?UR*PWwhI=FC zI=sj-lSK|ML+I`j{A?XwqC>^*)A-Rky!1se-{GZ~sc$!jmrjje9CUaoHwFCc@Di2g z8({a9y4!mlUbNR0n;l-X-ktE+;YD@bz+(u@g z;V-C`s2!dUYB*`6NtKO7M`%)&$#w^^Nkk+|(C(=pxPVx_pxUCQ3|HanPqRnv7q!P+ zKwEBJWuor5(bV2>1ufcpwU4-3R{N$yK2W$=aVsxPxL9WS>Q_27nHRKM{%c9kVvS?8!iw?|_ zMW^Y^5uN}k9NY+O*_bqzBj_1ejd=&|4U-^j^@r$KyG|6y8P#^ zXzSZVHRwZ;o8ltoZnqrWLO-wMmcLwG^U`?XNe5!oKyKl^F;|2OZ|f~z(cYG(Ets&1 z-6A82HQwrS-8bW@dsI{6a+-ycc<$Fm&)3vltq^lfE|A-5Amga3lRe}748g2`RV_NWggFT}iwn9LC{FSeLBn?PQI$>j``qvI$`v>n77T;a9a2}>A~PN zf@x5gO#$TXedzs2Fv*{FTP~O)Tju;AKxGGgBbee^;`<5ZNlgkp^@zPY{urI!^~haC zWDg&^Or;Z}9{6_BsGAc0s>*!5fzl+)*Cmvyxr9hk?+=|qh9n}^onY>4{Ni`A?L5sZ z<}uF;vU$w&0t~yxvK0Tkpa?a-bU_)9URWSO9xten%2oB2VBxrv>5(XW(^1#rCmis? z0Vo`BBx2qP1>{xo?X<$3Z?F}9G|#~I+X{cw2~x#mnrHEVyk@@DRybmE8LEion{q{9 zusjIz+8d9^ST}uB`d!xTCfBUwy{fb75_Ac5!FL6nx^DhFK^ODTYdqBinrv6HOkSicJwemdCn{uTx!fkEJ3dJFl_aV67Eivye1?0<}#C%tK$oKMq{GnV; zxhWEx$2J9EYuTnjxP-G!Uf6!N$sOCsHaX$S%{DOx`REdQZ4=E8bA|k+d&xICn!oi3 z`6l)N`6deQyF&^2r*@I+YiRyuExF#E=6}DNTt}t{w{~`t>(vkY`;qHV{w^cf)Vsg!o>Uy^<*g>x49YySRs`Iv7 z%_Y~IipVuqrs)<{s_7=4)pScbqv@7-TGK7zl%|{dq^4W!2~9UuiKbh?aZR^Dr5XiA zG`&xVruP9+_qxt%6ai;63g6Qjh1V&K!sDby;Z&wkAljXMC!+DSBN|^TqVZV}jSmow zuNu+#D)IUXyuJ*tFFLDnK(srDiqjegM7wi9v^xhxyK_LaI|oF&b3n8^2SmGbScQBE z$friWSmcXBJ`wp;$QOWozR2f=eD28SgnV-3quBmW*!~aLw=V2kC-$Qq`_YR1Fk?Rf z`&o_stVF&Fs^e2K`HfP8A?i$%UD`SOr2 z8~M_ZFA4b)kuL%H;*c*E`J#|dL_QVr1t6a<@_8Yj2l6=~A45Kh@8A=@gAe$wy6|0f z;=5|cch!pT%7X6-@Lg4DWy2LzH_#q*BRoJi+ZlAJHlW)tfbNO|=>bw=_RiADe7))3^*OS^lj+`# zdfCu6x_5J-Y{+@KH=DuUT@J7pznQ(q4X+o;{40%)Ei(UVTiAQqc>JNv|08~yvH+f@ zMi#)+)W`z7@bi@g@H92DfB^igWdVUFgh$z>!lS%0;gSBN@TmBd@Tlyx@TmO0e>(j< z_Ff>N)3WdT2hzDt_eM3*xt>JaTOEuB)V-!#d9#IU0qAWjPgB# zP#}m0?0t&iV8V9+;q-s7_utJ4cZIJ}7~cEa-3eEP?~1s!&&|S(e{MJHYri6IsvCn4 zZBZZTq7wJ9wxD+MrnqfV1$mPlvuKI9&)baAf^YojzTtJrb>vM?x^Hy7RS& z-}ri^hP-}jE_qAdKuAKNxG#dMyx){@mG|qL%`0z{*Pohf{vfXrOeB@Oet0|iU*vUX zT@&IqWE^JPR^C%?;3e>}#m%Of^4M$3jKNVHX0l3FzsTQyW_)zB;T zlipJMuDZ5))V@V3ch+%`#A-8#Mtm=oE9>@7rS_%9#m`L z>J`WtizrnZsxpu9(op|GQiA~qH3g{IBQ@DvC0~<<{UO<$)X04ENT$RqK4#s$8+^E=iYHwoRnqON zq}x~F4pv2~cmOl0k}9c^Dyfnxsp6G<$1AClDyfnxsgf$G!b+;Jk}76S$G;U^Q(skz zH0(xILG-B!u&*A|KPk3L3EB2>{B`jQ1!^7Oi!H6Q-?4wvESHb5e-duxC$fJ6Ef?(B zKZ%o%IEr7KXgv5g`$yfh$$>1azLg(K7Ts(t{5Sh2wxO_sEUGdG-(mj*pnr_`#f7$z z-`PLD{2>=O*MI*TSyXJfq9%*3JnXMyf0u<@J%NHTn9W8km}U(H$vU#&Pg;;wNERw+ z!A>n%IFc6R>dC?xv|x`xE|NvrJo8OK0rv=7)PfsUc0CF=tn9kz3I)d)6qLI`!Bws( zyDkNlUne>2D(Z}H;zf7(*u55ojF0(_=$f=6x+d+2u6ghs(KXx=6})za0@QjH{9y}v zM>o*-=?3~CGSH8agFeUs^wD~D6}LiHaVvx=Z}qcu?5Z=>$MJ32H7Vhlem>u(U5m2@ z{VF}XR;Fdw^o8tN4%H{?*tLyRpK5yApWI8d2mRJyhu=V-i+4R98n_MgU%7z3zOJF3 z-19X(zD4d?Q+-?5ph{8Sjy)xJCCkiRw9M!`8R&n)#(lz`lDm>+=5Ch6!=DD47F=P! zjKO{L7RW3dx-ey1!6I?yRBvkLHj&YD7AR z{U`W`amUvM_BZkmqV)s+AX=*(VgExuN?TJyd)$WoZ?T2{APi^_;)6Qz&!&N!*GVHqZnP)kb*)&!IPSM_z_zX}N;c<%A?cND zPM7J2VX`?L#tb{z94AAs`?5J`WESps5jS)PSGvK$G+Q{hAMbw59u8i#Tx};4s?Fc@ zBNHmk-|Z(8Dhw6J$b_<9aIi`V2XC65rjrS=CQK$VA zI0-Vr!%;l=GHUz*IQYIT)Se`L^hBlnpVLAuG6a;sh(W zv}3rj;DY;+-vk$nBOJmZZxCE6aR>@76+G{eOBoJh!KD~ybiqZB^St1aRr@+q*e|c$ zGfUX-Xn2(^>=zBM)xv&X!>cpGeqwmNM%b@1yqYBJcQ?F3o&dwEm%@I}ht`+rgf6~c zI+V*ZDpLsau)ble@<>I)sS(QJRSid9Dvy*2aH!P$?Hy%NR(++H@`$S8tf#Wr zsl9iZvM4oVVLBYT=qMhlGPszPN7C%XL$ywD=yqL0o3c34a$%mbxY&H-hO#)j-dLwB z`e>kcl*QkSEHX|?Hb5Lx;$3}bT-e`Xmo zlPtlM2#2$DB;ikbcuyf&qM(NhxQ_hrK`mJ_k{&*(Crf6~!)N($Tq>oSSc-9}htD%O zd>y0hYWZ+nDy5oOn)-}&w$uVtrEL@n&c*NU?qTxERb6ZS?d9EQ|!T&rIg zu4iXDwd{2?jtCP7Vm~8b3j!S@|@X(G8 zP&A`;`Z2O~G0#jG*n!b+%A^ z+!=~5+dy$OUzyT@uT<$>y`b1oi$NzDluK9{au+CW;$mHf2iE+6uQeIyD3KvHZGTHL zV(zMXk<{wOqNOC&%*_L7d?!QF_)dnT@tq7w<2xCW#&j6D#uRY8H882OP<5+OD8$JRA4^MOUlM*VF0hn*@%W`$6VF zlVT)C$4bfRvAnU+n#PE?cCVu`f%S3+8dYXNbx~AB!-UgC@miZgW$i&+l zwh@0v#C3pU!e?+y#j`UWLmN>I9MiOgpCV&Em>#&35juKo4=%FA|DpNX3=*L?% z&%FM75|PD4#A5}mh$0-}VGYMl-kE%ej7<2UXAk24K8%cnW0f3r)_-xE_#FxT6v`Ch zv0JS(8%b!M6C8VZTXUL>jlw+u85wg+^dO;0+Q( z@hAL6j+Df%ktlW@S28%>!wrrP;4glpBzBD?t#f-5|Ja6dY;R;}(9duj^Ho2#dsu0% z>PLnI*1E+LzYFwuw4u_03<*HZF&W|;OeVnbInHo=p$v|%c7fxY@upesaJ&F3L09PG zXAOh(#1D;_2I5yHhvQe!-HG^R8(#Myeu>6T1@TKTSUV6ubuT#ntsNYfJcRtb4DP=W zKPSVOV&cbmeS<4`eS<5c`tD+Vk9l(j`_bcXvErYx;y>J=#Ks*;l&(du5C81pG^DX05`sVTKeG53EZzS3rN@j3G z-$*Bp=o{%_10@Twj#X&N79vqPR|%+y7tmyl*#{$IbwZ?CFGMEvgpy=*Z4)BX9HC^J z1C$gL3X#Pe(KoV;Bl<>G@RdEXk|X*?R%824aztOu*@~G9BRe^wZ{$a#OR^Bf_p(A% zYK{!6AX*AMh?W8mqAPJq5TdK3&-&MLF%QKnEela{^Rg)i`fl3$I}{Di7ihlQ$BwB#Sl zr5}ZAb5ll?a6{h~R43ecPEW`PoKV!YUJ$AV89P4{s=uZudP_LIzNU6Jp(e&kJmD=> z@z(rJuyCWYmW&i?G{&p{5^D6Ody9pdqJD6~&kIh>^n?>T`@)IKec;3oy`fa*4W+(5 zP&%tWlx`g$mYzo1ojy?3tuK^~^Mo?Y&sSE^56Y^%p{&CPPO|=R(tm(>GOjnA%<2Ot zEBnI9XP$7%-V08R=m)3bz2VdjA2@ZXKb*QZKs-%)!|7pt;Iz6goX+-y(-*zq^n-qI z`X6sNGuQ{t#Px?WIRnHqmA&E2qdsufx-Xpd^MtcAz2Iy{KRA2J8_u@+z}Zjz;hfh1 z@my4IIG5N5&h70B=dRVhjk=th(#gj^(Vn zgNWz;G+I^D9v>uZ-J24mFXmo*PkZXaj!cB|N#+~u<5xmbRd>yhG6wuyTvl`v8y&QeP* zvbeX>ab>N1Jc~o4W?X44S;XRsv6RQ}5Qp%Maj7~Mhr8*xIHX46ZzL_hj_|$;T7IK| z#d+Zw)NZLMzmu9_DO)NPU<^Vnw(d5!7~mRO-r*sZzg9r`Z%$$buadFV zv|=EEicv=yyFn|0uo%v568P{dW>~)VWb$e>xyDJrzZMDjw*@okF-Fmt$r$6Rr;25v zBwDOE&CM@ zyh%J%{2xziao%xqaTS~&G^0rFK|Fu|VgFOK_Z4YVC6_i;YH2&wE^VheWN`lNt=#1_X%LqF zE0sLCd3~Cmo7bmd#@Zi+^JxtwsY0W>>Cpq>Y;GalOY&&mJbHl?(7aRx_ci1H1SET4 z8H(xN4*b1ErK@8;%`L|N@}qQb3zkdqSFWSEztRgf2o-EiFWBRi-RT9#FHmia7k@;m zym{m$nwO-fdjlNdf;*nU2!p-v@zO55^b=mtlV0#VNb?XyXs;(;5S35!5)ab7Vr|b{ zx_2hMFbJ<%EFHuls&ELsFa)87v6it2HJn2)#325#H@z?mp@u&qjoDv#l*$lE^yIb`Tt1z z?trL{?f;pD3-`M0UO*vYCmNL)4a8?5ChB4XJ9f-VlvGIGH%b!oKvP(jDr#627c2-^ zp2P-X+t`@9EbLMgRFq-?v4AEoqRA7izt7y?9}6>Q&Y5ryv;L8qv!Otg+D(k2b@=^!C zbrw7)jp2^7-lQ?iogLf&oUJ-`@IP3GNJBPfN;Yhw2aobIb3+_IA2)nU5B|nklMP?d zgEoG4ZiwXP>V^o;@oAVr4_0w5Xu~*q@S-O>sB(WA2GWBMxL|VqC3>)f%O>-tJmCzU z`u+6a3;PWZf=_VB#on}x)X$)YB>UAfr2bucNQUJk(%79If|H^{y&cDXB@M0g(3|C@ zairlAJroE-N~FO?4~@2M*-jdc&_nN)+tNt`H2YVPdjGNu5v1PFej}9BdriJM!Eh*K zGI%_PW>2CHc4(>NaDsMmM!I%!bc%NIC-l(g7VYBs^w3JPcJU;7=zEiPaWI;#UDQMm zfrU`0T^!Jx9g2q|D(zxFv{k#<3qqyZ#S$EpYZpO>;m|sX9onMMe`y!Bl-|6lU4+Ad zQM<^gV~6r>WUh8m9*#8HML0rC)Go@Ha^-7w=m307N4T>?HLtNljp@SZHpC_PoA$H4 z79fm*LF7ZX&BEws^w6VpVN3ujA&dzH@THa=_OJ+}BNb0CVRWhsJM4>ee|N*-emKSn zqYSn=lY~)e9_;X7WcGj_9&3Xj-Dp2wc6dBWiIdpj=_X-xJxU8Ny|q&q6@$#8k2UanncRfSg`43iLAztvAy6V7-CuB#_*Q;lud`GQ)VCPHse$mt7(^aCZEV z+`t#9AvfT<;|9^O!!LMjmeYq7LOQsK%rf_2g}r*S!r^>+%tA*Nj`3!NP-ZQh;l>Ih zeOciO4_5f2zzS0&R%kYnnZsz|HZz$CKK;ybwD3O`GIItk+~1QG;+1-4EWZsEo~pD3Hd51I zCN;Gwq{eCC58tGQA6-aIk%`n4@WrAg4+m}n(`@K~8Z+HtQ)cEs$uYvl~FysxF-v%E4ZRnK74pubPUN@6YBye&j znm8X^OxQ$=)^L7I!X;YtGiS*pRMMg(G%ZbJMcaH0MZ3&md^#;E(z7D|)*>2FyI|4C zUsgmTU%NzOgexmz-mIt+wXAbvMGt&gQI`ih;x4cwUJ^UfTgQ&PVG`qq(IaoCi1Bef z*^zM;F@6R;G9_J%A4iXb`52DO;s#!he8>&FBm~y{GeI;NxlUTVKi5f%S2;iNl)nQ&yM_B6<;7GMB?yZIC4BhUc-)*Oy7FaaHOhiM1nrq zSyr(_pIkx=N9qYXavR;Jk58aS9@-b5(IVL(~s9(_u8TAWi} z(xp#+M2quF|2;}?3bhFZdecZ+yw}rEe4rxdqCO>9V#N?=D=weXYcVUnWb5BopZE?f zZlBz%J1hQ^+h{%N?!}IFFJ0}bPl&%99H>u@yfPwQpB&-Ij=mn+OT&(inDfOncJ$rp zyS`>er#iMb>y7#J=*N!V-qj}=EAoHWC&g869H~!QS$4iopB!FR`-wgoeoeROlPB6f z8KyS{t0eX5`h+6vvikTEksXaHS9a(VOX}V!(I-Vtq6-a2zq|Z?l|HGc+%`pTs;`b) zr8nhOZhlQ~BHrw1eC6iT`lPh#)z9=vVDlVZH>KeZb~Nk!|NT>+G(gLa?w<1NSB9fU z%W69Gi3WT3NqS>i=^-C|Jaop6R@gL?_3^n=emKjHHrUR(>WwSaS{10ZT29!}KP$H1 z&>IPdCa86Vgp>6>c8ux`$9jdh&1T02+3S<^@efrDr%D%CR-@D7CF9u0N!{nLW5LeN z8};$<|8;DVnwiLtwXLD*--)&G4Pvulv)KF&J+_cfv*rdk zISWH#YkMeYLegp5wx&wRB!fWmX8CSIZCn$6EPqYF?)& zJN9P}!?7+d@OE5K1>RsGW;i?k)|G%{x^lTH8dm_(IK%O!sx)1IJ3IclDpA)&kAI_O z$G@*yaY8<9P$lX-D>IVm=oVE<4#^3sl$=48l3U)59Zy!};{1BB<5{X)TrS9WtCDkb zRUtd33fW<6JzmIf9mf?_Vs1d?raJjRj4C(BxZK?FMlRBJ{C|__M}`xcvJ0c+;{kPV z{Y_Ukt1@&W=?T3mL)Xk@=uY$qfehV=0jdlg#@mUvRB1VfDj^rCO2`GOl5>?g?2hbCrLekD~VkIx|#1HiYAIo;C%FssX(6y*C zbW2njy6`5Sp>PS_j)2zahFqyErjob%^iTXsUhr>%@F1QEDxb9HH3}7 zIaQ@Ks_{6)VPdeE?@y!|J7gef9;&8;AKe_N;`k%PSp}roOO2zfX2Lnox45PpxfRGc z(zm##9J$3c<;X2R&b7V;rsjUjW9R^A*>#Kgu%@S;cN_8fe$;`gUbb+ zgqhO8Se$EwAyEif)zlJ%YSj=_A@l+8mZ*RI5FdbFkQ$o^H}$b_OrMJX z5Q!R60N?b5+(j%kRE)ev$OrV~o29*B04scE%6h}ez|h|mcJjRty4-N`Q+|W0h~vkj ziWpSud&poX__C85_zkKem>;Jq0{M~bg18R|9oUP**uk4*~v#2qQ4;y;>)bE z#H{^M72;^|Vb-_#3DSXsa1(K$Nx{TX#QWQ^pIRsIL!u+km02fwGwUo*W?k&YtdSnf ziuVlbDxJalbA}jJ0)O@=@xKtieOtt^+)OcSC1}znYjw<;IraNx%$mm)`o}}-M};@ne{qX^|Q8bp{^~N)U}CP|K3Pl zYcr@T3>vyZzu(omp1KyTrLF~AsVkl!T+J!eHO)j_jalSDfQ3Bp-%K9(rIQC<>*y!V zv&I}{)@Kzvn@NvT^taXLzq%#77t5bKe~YjFse|Zy)8E!j^9(fnwviixXb-6V;Tq9< zRt=m$e8g$-p6s`r+65Nk6UB|K{PrszBMhgOrwS7qHVPA(opW{w6WYrvCkPWBn#p%k zH^ZraO^zb$R4if*aB6QjwOf(bkne)I!=>+{0d<7XAK0nFu%b(bQ>PIxP_Y~$J9XKn znL=R9@a81)U1tahHJobZ;Z+eFz6#TDmH8L4DtjCG9&P)606a~SRT#cVt4}AZ_H84p zPFlz+2hzYYa*$Omwpli^3cRg>thu(M*QWzVTDY5O#XvtoIihPS0!>zmS_I%Ix&KDqRjN`A^ntHJn~jJ)up$ z=67CKP zALr^AiiB-ZRycJ@#lCJ-H?hk22c4c){nJZ2Jxcu>r$Uxr>;8XqUN~2*K3&E2E>G9n ze(EFkF>{v~efFLIv_-yVy&=2OaZtM+&Q9N-3#-AWpF?U(J{ON&h@BCy^~j^|w>SB$ zq3`9ours~5kj0t7T6X50vVCvTcMXkxo8{tE?s@pk_}SBZ3}>bTYf%V#;mpU}Oy8M* z)%$Ogi?hPI-DPKDZ@TQG?{&<6eUjl!;$)4DoyjOK0k@_k?5(Nn%+_$Rz;Gsa%DW!y z%z+U4GdpvHt7bsC{><5_3BR#36;*+c=zCT#!sq6?ob zp<@dnI!1%jYDfFa#d+bOOATi_F3$dp2FKJOm$5CE$DX813!8l2q{05Mhh;cRY8QP> z$23=D*2ufN_)+4lhu!^K`D$UVE~R#-F2$LnOJTcoDed{Xl$JbQO8stKiWTu9#0wD5 z170@p%)m=U+=zG_;xULv?b4+N=j&2~@^q>2GoK1S^QrJNpX!OYgg6CW7w{hD=#pyp z=#r(~x@5Xbm(-Q7OX|qeCAH`3l3I4^l2B%n6L1E&74Ra!3jogpT^7P-gwqg?2VNZT zVt^M7yhx-)AT1nep*gz5$MkH!y}G2JJ-Va-p!x&V52#*1MRk%;oy5+3U1EElF0mz7 zm)L-KEyx@SY11X56B4aREkbGmQuC0SjnoW;(-1Zy91EVZE-_}eE-@O|k-&}sb~rG? zfMEc}*uA<$l%I(56H$I5%1=c3i6}o2<(tqQribXJ4s=snuFi;0V8kadGR1A4&RD-o zXG9&0sDlx8Frp4d)WMjsTW3r|nh|MnNQ*&QG}0oGwq%da7>@KXq#KYPjPxL+2O!-a z>Da*E(;2;Tbn&O?*};2t@m+g#@g2K$@$I{G@h$nf_=Y@Pd~L2S-nmm3UxF{^#Ft~J zq7_xdC`hfvS8mv?OKnCPDwT>#rJ_=)8AwY53?m{HBO)~huqePH0gC`E9I!CJCSrlq zrFdaHNfOB3qYDb()^I-i7iuSLhqQ+J0_HCLb)cCYeKoElY4chqCGlO!ET)?4>;MtF#{(J zI7Z;a0cR!tBPWv=VE#x808S8agF!P9aS3p`Q9IyzaMS|O>Gezyz zC7=cgs6hg1kboK_pau!3LBdJEFhCOUEfVl860!ly*rQ8G1C0?haY&CrdNk4_ksg8c zxqycOZU8(O@F2hg03QH4KhSw0T|zp=;OWBP>A>J=$KYwM$p6W`F)sAPBg5Gd+=R&6 z1L@g`Q28Yzn&{c7Q0FBh(&^cmHrE4W#D02qo-aGQ#D|^z7WPod+mqD(6VSu?jV^0o^-dkiL2$p}o?Q}%ol8PQ75GN=%f5ta08Ic%|#5ykXuHQrgsh+XvT zCHus!WW+Xl_PTw0J{hrzo^5quXWOT^?KPZzeEI#|?)4qr*|~u}?A$x0x6{e+E_>5J zGQ9l)EI|!#>Bi25*yGNU;Z}RyPh|MMNzyoWZgHjM2QnhadFTik;b*_>OGZ$8<6biS zVQC8}V_Xd9KCy*QAj6|eC$^E{W2@43km2FB*PP_78h3VXg}rVNd8^R23S}9bhXL)h z;h;8LDqS>(yyb8f|4H7;F3n9NZ)L!K4SB1To?BB^`3ZUJVd;%iWH{71SCHW_)iH_; zf0Ldw3hZ3Ug~;QYuO%0DF1zevv1SESQO@nUGJLmYMfQa+7HL)#RBr66S-~nZf;B4~ z4gE7UUqe#nk!D4W^H8Z~MQ~ZwYq;7ui#BUkPEJI5_Gh|#JPRCIg@UBS*hcbvU1 zMh|fQS|~=twLzO0?P(UHr4%umTEr-rM~vz)iBWA_J~yhxla=UotfU`j&qXnun8hf( zf7-+-ToIbYsB9ej#VGJrOGdb{lCfZ^i&0@XVT(~F4wPx^>xm6Xtwi@}_+#1Kv=GGW~Hn*x5Fl!_~m|G(_ zd(s-t*^^d?UY9K5mq2SU_;17tUT`3>s-o9ch+bPEdTqsp%u2!ve`F=gJy^-X%kTb^ z8e=ZM`wwc2gQyfMv6gN0py>n9Q`9K=GUh8XHpqvuK-@Q|(SV+%MzGF$h|`)Vdk+X9 zrMm9oH2Cykvw^mhJ20DuqtqATG@}b+pK|U0X{z@Bv`B6Vd0GTFraVn`Vm1wT$e+b& z11VcsdedE;raCd37EqbFUYw>nF`K43F`LFc1G_&9;PqqQ!yB;s^A30ec7NWfW2^{; zmcWFE`->TrRl$UZ`-?Ph#%j{tU$#>xEC8@zz(N5F2P^`xNWh{2TM1YkU}nGy0AqmRG0|iMEE}*Qz?^_J1C|C@ z9$;3$Y5_x?O&x%BS!g0np^1{2CVHA^qCeuOLn7*s7?w^G!x4`_dL&>`fT6rZBjRXd zVg{EE)%czN>NUEuX3AF@rmRrU@=gvP!BifXk@Ja_HBR5R=@{tRDn`e#1S$nPtO-<; zS0hVAkf8oagsw_YCtX?6*!wsgkk@WZEa)Wx9Gb7LaPZHxee z@`Q(Nb9vbIiGBViDmlY?YYn!t+Mr_jP;IF&gU%5z8D_|z<*ArgD)yGhpK&)-th9IK zvW@a*kxrPT|BTd)7U`U!A}bx@%SuOLOzXF0H1;OcXr19#Ybc!%+HF27T@hAT%u3f( z|1^UBoHsk>G%HPUX#S=@*Ox1)bX|5gR=T6y{+O;SoCFV&r3KCdZ_;&+ikzo(owdAt z4_ya~Hl@e-d;v4N)PYEq4=b&AVWqeD%m`6uMu<8yLe!ZNqRxyEb!LPNq^1AmX2wI* znGvG8_X|;HMu<8yLa=^(A%+BzC}%s#3vs8l`};bpK-sMs+=wnwAZvVA#M z{vHm*8RcV!(6P`_?L|lWb0MVc-qb!vX7>3NbK_}Xkxl;<4GeM?o7^7-x-k1Pe$TOg zHKkjN!M>{O-ClIWIBH)LdiEV=|9KLfX0WICV0O#Zp-bs#t7G(08W?vm?6AE5p`O`y z&D1Ys_QI+km(d`{WL&}QryWyc=&G`*e_3|*O?OL;rQrhVk>3 z&gHD|a{DSd*RQnog6!nZFy+_y76qw7A6E97$jS!#va+{%r@zLxDEYN`TfISk?OR$l z&gR`LzxFXLGxTO<(=4*@V_FvO$;uX+WM3^U`yxg5ZK7qdX4!W?E&Cx|_HCtQCS;bT zXJtFFlgPfcI#yPI{Q};Yu!G3HRxW+!iydpa>^55;l=;2nU! z@MYx^AOGcC_E0Q`gxJqwS(gthhyH9i22vlfj47I5#WJf&EYs5RAQUH-<$1F52{w(V zSY|ehWix2`j1;kK5-ksZjg>F#&B{N-1xzf%78od&MJR9V6w87XvQ8}X!EBYQit&rcEU$bKKPhriJBqda@Nn!R1k-mYeE_q1ug zragn9^X2|1mJ85?sanqwofmq;{ZT>HIzM?wuE5G|SA9m%V843*fi&2Q`>QLzP`Un^ zyp>*jJxo3uSh?w+@-|37l{Y!}H^@bWR|CG53nOYG-jRz^Z|b(u(H&vB0S3n)^#OXh z^GLp&)m{dRpjrDWKFoLT^ec}lbARQDYrT7CIFvfvJAx#c;P@ABNmnO$8}6N4Bh9^2gjSn-X9qOg z+@BI?t+{t{tu^;fPw26^cePBx@zoLQ+-Y-vB|#?Dy)ythYVMs&h{2KJ+&5I-9-Jv} z544--$w$qV8@uESv0+CpFvoV>g0-vXa#b70J|E^NP0_A4a34VqNMEhc#>SYmv2*b= zQOB*`%+V$>$0JYX;2JsF*wjAE>BfL8Ogj;ls!<8TD4(Z*#d zDfe)(SY-(ka^5FoY(mwdAFO4cDZN^+SlHYkvra zYc=hfF319C*BD_DLc1o;qFsZgu31W*5aw7jj5`0Fru_j^3m9QYGo)+R1gB`%pj~Uc zsq;sZ_J;s~YBy`woTARe4DISi)S2qeoa;a)fuO~#U0tKNT-N?TftQh{UGoso285l+ zW*c>G1`SBFl*m=u)kP-l>V3$o5LERQd5PN9+2!nGElh0ob5L(DuHE3dkvg}}UNwL@ zb5)kPAJ zvdY0&FSOqg*mu)@m+j6f-vN9);M08#m5Ut5mePK#tojb^XPv&y+fW%*vE7&U%d?O8 zFYTAnmsNh(&rrFYFQ!u=R8Uz8x*E`5;|uOoE>s{-jRm}In$DF~K5~rwuROJopAaga z^D{$LPradPLZvxdocWkm&EQ9ks&HbcDhAV3SjL&A!ZOY@6_#_$b1|P#s@hk|3Wb zD6KKdC%`MMUZ-BPqDyN!DZDO$(mLNm*d44rv-_q(wCc<23H4Z`NU3mk} z_wZro`}bkz-`2D96X4H=unrwNk84s2VNQP0VSIxk%*vl1SP_@YUFS&o72=h~D)42kC@)E*?6GonPh{x0;j$aKZkP zo-nOQO2RNuNy$W;?hYyOw{=@bN<1raJLNsh_Rau#PrMsDzk(}}oHugY$w&OSYt!>) zJ`m0q;V--3+t4>vSXv(nQ&JcH>CG;@sMvl$f3mhJ{WJYZR+aWbf6{v1bG-g!(Nvf2 ztVXssJ=CA5_hB`MxyO&1Q=G2mCj5`G+IVkPdx%q?pq%=62d6&X&Z&>LaJu7C=$Q~h zZ3(BWt>XFBJ_Nnt;*#l;7P5;aQ{CS-T)e?kFW%>=m-IaK()ZKOOk$Um3!^;f_^8Qb z1iN&;ddS;!{F1s+qv?bgKCv&|y*&0WIw71tU{085a~Z6+WR&)h^p;fhZ@j&SsJGze zxlC^vszO$l&8ya1qVW)~xAZK1eX`yXY1Ugj`HlEVgh_m2;2ZK2Pks)#uin4!Gaq*8 zk;pE+^ktX3@pHg^_5OW7zN|J_ydPY8YJqq^kV~fA58!$YmwmYbtNZ?C&_=rN$2F7g zdtt(e_a!c$avv{Omq&ZB%hPr2@=`s!{3VuNu|3jW(c29eJYstguTOgbzni!F zyRyrP-t6)QPj)%Wja@z@u*;_6Kir33g?73i)y*y#k@OFXz!K$GL{f zmEWK|ZMf3R&mdPi9j6<~R}QWs{S{Y{HeBsfxzSx*T;$N~6c-n8IfSc2%ir2BE>0`e zezZ9Ojzf_(yU@lZgg`LL@I{ED+!>dvl4_hMI9DLop+ zMJ@aaw5Y*_T{ZF>)gn9^&lVRIxv{I8IKN?0wvJuh$z3-sisRgdMI-6e{T}RUQ6F~o z4D>d{MUeWr>U1?+t*d|ilDt2!thPyZ?kl^vP2QDVk@va03#JyYw%GnNkAztH+h|CU zmemg}M=>D*_AB^Hs*-BO^dhcaULVMvWu?Q>`EfBl%kFkcHikKl<;(G~EA^fnALuxP zFbzF4%}~GAx&N#jFHNoOV)dItkG;d{bL=cuHU`-nm&?Yn&I4|8yx+7_CPV$csizWI z{So_>hqAHf)Qjz`{&eWs>4y5UY4#sjea*D;5r+Ckg^ZJp5v3baW#a%JR)5=(a!lS) zN$Ve$4jdux*jJJN8yy)}W$K_KVaKPCj*JTJ-=Ee0@4{!pX~TXKiHD$Hyoojxp{1%tg!t5Mtbqa8!Q~1Q(yD*(gH}J{UMiJe`I!d< zHubPu#ryMV6%V#t!+E6V;uoOe1R@lyh9;s9>k-456D4LLRG@|m5yD^ttOOw^7yD|s zg;2wV&vYaa&%f~)_f5peE7u-AG}qA3nNExYQ1`%aD7ZAbZYGdFZ1mkg67p#yS1AzY za7ig)4wsY?=5R?VVGfs+66QeGAy=4_VXt$N#9OqnA4pGv&cS^XB>JV0#DTQ&_40~9 zk{HO_V{G6%w6TTn*2Z>TN8>}j2Pbs!r6i$?*UJR8jmDuSn$(>(hRCcj6gJ_&VM+>0 zp-BcaO$yhs#$^&~{I(BkOz~xn+uT^=;S8FxjW*i4vBs;etnq<2Ykbw4HFfX7n)>^( zrjb1jO_MBQ5=$48VC*(29^qVsGwjI&#AK;z%6c)WlW!PFs&%@i#eBm^l(<@JBEC;j z3?J7?(WtMOv>fp;9!hLh<3WgXNft2)sxC=A@x!-dVq!ZV`H5ii28xNPYPy6p>Sk#A zM5XRBiHYzA*z~O$D?lt)4P_(5S-4`N5g|MQA{2vAltoO8OcxWw5gvf>M1%v&E4~-P zqEdvgQ2YYTLRbL)_cRHiUAP(vp}5o32%#N_^BjfHB82wgmxsCup=qd_5E`YX!~wce z4MiX{mw)q6KI?=~KI?=~KI?=~KI?=~KI?=~PaoFwBYKE@ig$#jG_K0rl*vVJK7&=S z7V*FMs-YcX!@S~AkMbYs`RMZWe( zX4hYHXV(J+c73LnUH{ac%V;cky4SzH zZcCh-;m!^s+H5M_` znJ#8p5l%&TAHrEa?9K^3*UUw*;Uby~_*^sR@;PX3;q%e#u!!dDbkUq<63xr;ALFt~ zG)EvdmX~Dq zrZigET^Hw(aB{&oJwL>7S9W1{;S}obsLD;7$XTZ=yF0C1=}*pD1$Gy5%+cg*T6OF| zau#L}?|#^W-HooyeoD@URsYzRoHdk}8pzo|i~(|v&p&dG&p&dG&p&dG&p&dG&p&d` z6Q!*3Vt12L$XO|!oH<4BuHQz^RMNYd7ILPM-rZp$X9_lvGmtX5yC?L-m+bDb6mkY; zZ_kXQcOm6|=1qFH3KT2xTMk^wOip7vIgLg5899y4tRbh{y0N=8fEVJItq@OgI)=+1 zoetx&NT&xN6>8n5(?As$LN*xgHX{~^*j%KvAq?TZ(?b!4Ow(yEgh}b^SHyX&^y}{8 zJUD&2+sP%w=M`4Q|0>SQ27g(cm*JSPS)3PKu5%UV1#(v&^8&cW+dSTMaUNc~a>aRK z<>+v6ZY1tlhI<;Ar4qwyFMPI3oU4BB@B)(&ZW6I0naDby$Z710JZt@DalY z;OEB&%-jw>VCFj16b!0x$<1(YFpteG0_{FEl#Nh^8cIXR2!@XsuExWo`PL8~13Fa{ z>|S8Se>Tb&=hJ)d+4nt>Yqa!Uh>qP0EvtAT*Yu|MmX_Wy$u-^Sz2&8?o8+2d^xo&w zrX(Bg{Y%U4r9ulr%zxOE-P_EI%FFX%_i{biy+Sv358VHIS3KCg+XB1yUy0qr^HrP1 zB<8x%HV?Cy8<`^JhFiqkfwZk>x|rLYw!!Fb+W=SA_NL6*{=r+A7tw>Yjn}ibP=2Y% zi-+JUYn#VU+-)E6uaLjQg|#J^#eA9~=5_Fexov|*%(Kz9?df9P5!#mPuzo|%Kc;QF zeOcQ+er0Ss%CEC+^|;^~+U~2Y##)utm|BsaO5cs)en-~DDa1uy$9GV9T{Q0A^14Wc zd?K%lSg7@4?cKoVk#7vB*maQ(>Ei5@_HR^pDl8+*XNy$#jwN>80Qq|RHP=e{dPn)Y ze)9FsE5mDOkFd+{4xl|IHoH%w-eXlC4Fz_0_-IH~!+B~r9@eJC3;r&w9e1(wEh*yp zT5f0dd@*f*ZV}J#qW4Lwfq0* z$?i|)J$XKuA7{@8xw8A=-t7Jdp6vc-ZtVUVUv?kbN%t)RyPqeq`}=k5{xOqS?LzOL zGmF*uMAi62)dT5$n6|C^PgX!-eFgUl~8a0Dq}YF3|0F+TJ3w1 zEFTJoDy)1c^vbX(d2eotEG;q1Qm9FmhGKt`B|noa7GiamB?+M#HAE32Yh|(1n?1M* zj*Tojec6L{jxVO6`(@F{pA*DLZoozi1~&3)lErp}V%78pgaToJT^92Y^5Y{_iVn)F3JsuxQfn9OrGuOoV1jf|#>LQqbP&@T z9*wGrm`4W{)z1Hn4uVk}PcmNJ2ga-Wz<6~Z7_aUF&b-f z^wE41nb3Q!_!7>Yd$cT5J``me__lm#7=83_o6kPE&SCe`(SEg48s0WM`mr=;yPVx& zZ`vkjw+&#A;Nbkxul?Ag`o4z8ZhNH!=N>76?UoX(yQGAod?{fUeH@r4C1mGH32>{L zkd`AQ#J|oS|HGd>o-lwt{-Qs7oYjv#t^)Z}ZdLuSA+V}$_-kZ8_Sb~IhQB`CBdrVG zEv*aMC9NAs|4PZ1*7@a0>%4NMb<$309nF!_JC(ctmUeta|4JXg{`#dq`^yH}Cw&cn zzcGOQJ+D9e`=fsB?_|UeDYyTW658?@ zoUk2I&cwY^PVgQn2h{)CE#>s4Pgd@da=btwRYx3slom$bDZU)oxmCv8Q8x1zyYi|LcZ9BFF-P;-I0i9SgMb{epaz>Whpny@t* z*dNd*>p*}eY(*2cq6u5kgsnk94FIY?Q1QvO;*)K~C) zvKw|u*=RyWzLagvld_9)rR;*8Qg$9tvyfy4JPmLo;NQ|GSx7?PZAWSZGMbBw!jO>x z83hA72-pF@_6N2fP`!XB0gvpJvby$2SslBjtoB_}Rug@an=fUd(OGD878;#}ubj1? zKH0rP%0i>F(CB?2FwrLmK!cJCK^6^~NYF%pW(Iw76l4aFjYSel&O*sqC^-uyXQAXQ zl$?cRu z%0%IrC_EDdHv=2}+y;VJ{DMIg2#Nqu_=Ca^6qp5>$UGC7Z+=Xl{JvA#+@2$CM&1wm zvnPM`W1TL24W0i`t_4fx0>qXeX7Xp9zYbuX$NRI+`~6s#PhUgV#60Pjm|W?X=$+Cp zkvY;Y7(>5Ir(K`!m3~2Y{(|oOC1{uQOF+K#i$75Pfa*fKqIOCfx^koq4`|oQ9nuDL z=LU4=hDzG?-EL_ETC@Qz+JF{q*hRZybEOU0AV{ZOKY+#v0?g74F(8Nr_6M{p9t1OJ zS0ZS}(XLdG1%W1Dr?kG6cCE{i);H{s)>qK31AC?QY>%|wx?5V0nY|t}dp&0Mdd%$g z$h{B*MqqzQyN;Tq^>gtXnkS`q5j*Loxjf~RcfV~pf(ZG%b_I%offt5aqb~!*33p@ zcI}eZK7jGkd}(b(2kdNMn}M1JR3lL1 zfEokTPiWU=U@rk`I8eiYI+AwP1A8FtY6by@_*yRzNFbm&Qd(!Ol!iqy4Z|U=l6KwR zA*GelQW{$NaF>*}iFW;!FQu7iS7)A-76+Ob&_shK5(FqM4W*?`qFv8H zGm<_fpur$W!yriW1GX2ir5q{s8GY)yLrQ(PS4wT)Bc-bYA=oamt=gGWPF!oe3xW=mt=gGWPF!o ze3vAAm!yt-DXBeAN@}7{|B)*t;kzW+=+m(|Qj&Ftl=LNi`rckCDRPgLG@m|Q4w^8~ zjH6FK1z8Zt0zl&r8b1(tfj|T{-6JJ-?UoWdc1elt`BEajdEzDd6q{P2bElNZa-_s! z`t)Brq{MxDrNlg7X9GK(KE0L2PcZ{cExROBL%w9H&67;dT*-t*)r3XWRERj%7!wvaQ#Rt} z9g-;(q(+d&fi4Dg(V&Y2UIg&MffoimtZ=4x=u=EvQvgu?f$9fTFQ7_5#Uhl@h4T!0 z6Ei%aMtuSg<}YPWQ+?Re&8kV}Tz;S%!kXw zJ8j@U*weixk~N7wb*7UnX#Zz@MW0ri2{xXmwSMeroy4Br>uq=@bYsu@n@ENWeP*zb z3~&Q7yi!QUF#2qYnPj{}pM|-yXR}OX6A-?zkWE0?1cXgM_{vN+jik?3_!*vkE3jt? zK(f(iP>dJV8%U4ovwfUvyQ!<2;n{&c?AZl3_N>iBEGGI~wh&8NIlv2lP4k6d8}{^P_(3 z`AL2n-slMr}6Qu^Abmshg3}W@NM(8KJ40kmvq+ycZ%GuiSDefe28_A=T;wwmZmSe)F70=J^Tt^4WA z3^Tzg@a1Md!%Mt2Y_ZXo|3MB;%nQS>7XZw^%g9*E#mKJb5^?e`%?M}0#P;M??EQ<_o1B2`MbYKd{E=YUZD~-;sYmQB`Ssl41&9{ z_@E0g$Sb}2RmI3dEK3cwAXKl0tO(&I4tg^}scI+=p_LZ#K}5Rvz<{ub@KA(%^7pj4 ze*8UcUX+Q<4N50-jTSOD7yl>X|6ITdx*1*_#eXaQpG7UnTqplfV7B44C*03O!Y3jX zY>gT+w;mz9NFmgQIy#W%&tu_3(WZ-t1*tI%gYa-Qv;?6@HH6Z`!3_u|^O_Mdqpc*I zw~T~CCpjE*G`u67%)^>EZ#n)G{P*Nb?0f@XZs&*crFVWfUxw!|;Y;%TNWMJJkE-am zmMoBJM=mCdAM$s^1zzQ=e{)tZJx)_s#;fcNTQ;-dTv=S%}_Q zP^+e+cNVC9tcB>Eh2d%ly|Ym5orUO~g=XF-3)MbZSOiW8 zS!e~cB!w)9Hj@RZ7-uij$--wE(#=8^X*EO-#-BpG;gC%s15(H$xExuug!kEEwa*r- zeYRNbv&CwkEmr$%aTK?~&`aV@PkPP8WAM-r0;{h7=&6v2ZVH*{rI1JnmcVzdk3urN z6>?Cgkn>QWzEQPkE9srrTOt4BBHg_^d=;v3QE0!)O$DT%v4=uOhzfnTa^rZ>BcO6! zvFKrN?*Ccz2%i-@J%mmzZ5=7^wc7g}l6T`<^p&^9*&CkA6(^^UnXJ$y7d~Tj;ZRsL zqrTSa*g!hJ_R^$aI=?xDys6MnVZbVc{yc@$DKveOW_t+TahY7A3qyGi&;r!q1fPLh zBlry58f+1_YBkiBE@nT~(DI%Ng}t2`G27{*&?db?A7B)U*+z_UF*}a42eV^b75X=v zpuKWaG?K5Pc@0DF4G%>#PEa&cUEYOG@5UD zv01V9tK-D2?chi%nt!_}8WVpw$!-xsG{1l~6QbG18ExKaoayGhGK4fKnmyeV&0%2L zP~;_dMRQ$w?Krt(MIFeUB77HeCmWTxXS+L%+=)R|9{VURF19<-yTtbEseZRNpKMvO_HpF;Z`c?$;(e>D8GyMoU6dgPj2AlC%5qOliR%&SDBZe zLVXn1-n{%2KVE)Hu%NgO@lagH@bXh)x+$)cc=;(n{a5bVQXcX@W8pV5pM)97YQRoTR^{5GrLQlA6 zqb4+p_XLeXPq^+uP2lQu8jV6vxH8m)M)RJa(PM3QX3*u-RdKB=y=kJ$y~^3&bh)3K z;@Z+%aedrJ5rlNQ`~!{PRoe0uT^_Ahgn{MDuhZpm_NG|6+~}nUZ+k1kIQ!LNx;)!Q z5vKSm!bdjy3c9>VR0OEr?4-+?5F-4*3yL(lDncqRDALReip+LXgw5!;y(s8pI*n}7 z2&KHB$aWNTg%=bF<+)h;F-1YQP|$yQK_B~}piW=K&5IZGageCE4dexVY!E`+-sVrv zOC%S?Z5*E!ZqxXab3}{d$K7Npme(c2G;Z)4xn!KiZJC8EnW1s}q^IKcMG9FGXC_Os zb&6Yzq`0l|RNRsT#ci91;(==fqczyACf|rV!d3tr24yRvb2a>t6F-Jzh-}+ zzGiQzPS=S2vFpjv89c-2Z5r_no?&!u**CDk?%&|$q%lk2###}F zO_Pm^I3|RArHFGxMT{tQ?Jw`kp7rxPiuhT@zvAWm_=diV==&XWK3^3g#;6WB{G0{% zwLg2#^tz>pKf;cmBL0GBq7ZT0ROb>!?3kIX3y~5;TMGc{AYQ(|uJbf@kUu?QKFVJiODs*rl6N0|O6Kil^8`ql6? z6sBLD;W)KX|AW!7=@0#y_zTfH^{X8<%M$c!3d*XJ^lPF>h<2m$MxOo$iBqml&}j3g zTzRT!_m#Gk>wg%j(N>k6chRptqS4j>_42G$_Y`gOw3K%hEmkBCSI6ub`ZZ=}L8yLB zDu;S_mOG;LKTa$k?x$Z3`yEaC)%z+p{it6Pu40C%m)K!~8JQg}8xa#Aw-9tQ<*{{si$3@!ie4~%^EK^qNW4oqS-wg5i%180|H^jw4 zJWP|`uTeaHQN!EtSMm7Itk{MSk768w#abth{$g$J`F`JtwN`t-U&Pv9H6G^_Nd3V> z3ho=V{IyQ3Rfk5cIwEUvTdBo6U5!|Sn?{X#)2Q_a3lO*KgY=3=v%SeI%=WL2+b+!RaQ=E%nB7@!?=H-a zv-SU2m`!k|7G`eJc-%+!!YnfmzQU|@jmJZ*-@+`6r6yrkH0mhK`at9HKMXiwRydY@ zVOAJKON3eDG#=0%DHLW6)95swiq6eV(e?LLbZwz#k1M^Int;+q+=F)@0Y%V=0 z%*Ihc_n}>PTbQ-4+-??TIm#-Fgjpx;jq8P3SzJ}yfIx4ws>E07djRUbw7*j3T@a#Qs0rwiI=8vSyU;4w_2|JEXSY|~)y3Lc+m^qDDw z$7+o}&qL7{Y!mw1GA7NK9UMqZgB^xLG7*P4ZXYc%pklhALHM$YLGBLCV=k*j?a z`C+=y^8<}%FO$%7lg4wnMeuXccm|t=o|PKU`6)urR*mPfZGzuB8qY7fDW0ph3BI3b zJa?oBzTGvRg%-hgq{j1Py5M8dcwRIKJ~oZ#9kbx`RO8u^BKWvyJYSfF9uG8Lf<^Ed zsPXbL2|bE6Uc=Ib9&0pSWBMpw)4M5N-)$3o=4revDMF7AG+w`gVv@$|h(#d#HD14g z!W*6*QiSf0HD1-d6t6n1;`P)dbl;}&78LrM(EXIg+siC;Z_;@8l@;#*PsMwtKExXq zMh60AlSLTRTjRagEDU_6@h;gW44SU-uG%IH{7U0}GhKKcz(+`%q3Nz>Ht-!ycP~ll z?vpMECQbKuO@eSr(|wjja2uxS9$^-QXPWNcrU-7`HQj&SCb%utbT@4gh>NECFIxp7 zZ4`)K%Kszj%LAe+*Z<%5#p5|ggaaxJBQ6Y!0&XBKpr~*_Ww1pwOVUbOZc_J_rj=AK zI4X>|FfIYPftpEbNvTQQig{-rwiyUQOJ?@e#N+x>mc`Tj98?|Gm1c|Onk zp7Wl0o_Bd(fk4JV^^j&wDCYpVp}V8a_M0)>DJH3D77hPT)R zI-3puyh5N$+3@cU3G@UT{>vbB1g#QiHybg*BCt?4Vs4GVo?s&iZ34?kltrevuuL&DvNrcLXh^es8xg2sMRQTgGE(Y z1UZ&P9jy^$5ODwG6)ehGA;>SVs6dqv*2>o?ce9mGpXmlo&#S~VMx3L=gRLp|X7kSKm7IG5&r)=b;gT=gEMNXbz zG4E86lQs~(VKLZ4adHiER~;aphCRenTSwmioyA-V<1y|C9s{M%)!E27TNOEHtsv*h z50cY4P$G}Te5bjq9;_NU3j_IVRs}hmwv(KBmBsv6K~7{{CW9GqX2zKWW}FO_Lzr<)HDHihfO@eL4K z4ia}aGajubZm4m5FLB$q6L<4&;%<7KxEuEocReJ*Q)qmF8BZZ;w2?-Pr8T1T+en^a z#&@bnBjDewAdMRw_c}>qiDNF<~nA) zjokgrh>^lKadYmV=^^R=;U+iWCcn={4#IuzVk0F;vxE2s8#xq&HOSR&TnHN(f!mtE zM#fZ;h8t|;s0z{$K-oqMY4Ed=aY#1UNCSpC8>*4UO|OsexbGw9HsD!H-%H-SjV@4z z$1I+WOvIyy_1|YnjTD8;B| zLJvwQ1C9@xq|{``x1Ca|(Xn*9l#0P|kG<0U2Fm$FLH*}^n7&=Y$hEC%# zLucesL#I?}=(t;9=omO<=2aZoxtw|ekbrdf#131 zh@rK-($ESSt&q_Q8Lg1f3K^}C(Fz%@kkJYmt&q_Q8Lg1f3K^}C(OL;!v|TIOt`)kr zLf2NbT`TI;iaNETPOYd@E9%sWI<=aBN5Ln9KN50cju?Ev_W|Drd>`)2Yes! zeZcnt-v@jj@O{Ad0pACFAMky^_W|Drd>`)2Yes!eZcnt-v@l33A|`WAKKA} zcJz@-gBRFdV0(e>1-2L1USNBH?FF_M*j`|Jf$ash7ua54d(nR0E#L>f7x-S_dx7r- zz8Cmj;Cq4Z1-=*fUf_Fy?*+aW_+H?9f!_iC4)Aw?zXSXo;O_u`2lzX{-vRy(@OOZ} z1Nl?Dge!GU&gpdB1&2M5~0fp&199URcn z0UaGp4F*R;y}?m;)Zl=Q4)k9~5oAEN1F{{E?SO2D40+JY0lgg1%K^O{#gGM^9MH)D zogC@lC#MV?@;T&l$meJSjyB-H;C06g+;-H!fy2wU8#r|0(1{0k7&z+BQGbs5bKvl` zkOSEqvN>dP)R#ji4xP9W<)H(I4jgU6iy#*|a@3!r{q5kfgU1dYJ9zA-N`oCT?a@aw4Z8vyOM-S@gLA!g8g;jSKp_puY?4=|cTosJ{#KccK3}f$apg6WC5*JAv&4 zwiDP+U^{{B1hx~{PGCEM?F6IP;P-&v1AY(qJ>d6%-vfRR_&sQU58B^@_V=LuJu#;YZs_TT zo^I&rhMsQd>4u(e=;?-@Zs_SQw;0^$e{S?YH~OC&&z~F5pBvAg8~ASEyMgZpz8m;% z;JbnE2EH5kZs5Ct?}q+v=K#D61E_ZZ^$w6z;uYVU;uZTz@k;Xv@k-Ni@k(QZc%`9Uyi#{e zyka{lUa=k#uWYFluas4YSJv(juas^Vuas;PuM}IvD@EWpgFhGiS>R6xe+u|b;5ULl z68tjwNu}6(w?gb4*dg}b+Aj9?Zxeg_EMjl)l-LXWUf}lvzZdwu!0!cqFYtST-wXU+ z;P(Q*7x=xv?*)D@@Oy#Z3;bT-_X58c_`SgI1%5B^dx75z{9fSq0KW(LJ;3h)eh=__ zfZqfB9^m%?zX$j|!0!Qm5Ab_{-vj&};P(K(2lzd}?*V=f@OyyY1N;`T(aJzxq4cuc-MgTN00KM4FF@Poh)0zU}+An=30 z4+1|3{2=gyzz+gH2>c-MgTN00KM4FF>K{b?gQ$N1_yORfe~IW{BKntz{w1P+iRfP< z`j?3QC8B?c=wBlGmx%r)qJN3#Un2UKi2fy_e~IW{BKntz{w1P+iRfP<`j?3QC8B?c z=wBlGmx%r)qJN12;KME!VHb|zmi zu?V|Z>;yi10uere2%kWNPawi45aAPu@Ciit1R{I_5k7$kpFo69Ai^gQ;S-3R!0!Y; z`~tB9`0xot_yi(+0DRim(es z*o7kOLJ@YM2)j^(T`0mX6k!*NunR@lg(B=iu><%W!0!NlJMdu}im(ku*oGo(LlL&2 z2-{GEZ79Mv6k!{Runom_;I{+69r*3QZwG!m@Y{h8A4G%?BDMn`zKRH6MTD;+!dDUD ztBCMb#CG7rKM~=di11IucHqOV6k%72uq#E_l_Km)5q6~ryHbQ*DZ;K4VONTRoxjR>DcYz2NR@Zr~p z@M}c)H6r{P5q^ybzea>#Bf_r{;n#@pYee`pVk_`lf!_-JR^Yb+-w%90@cqE|1K$sP zKk)s)hs`U(<`rS{im-V_Kk)s)_X8icujmIp>{=0atq8kTgk3Abt`%X|im+=%*tH_; zS`l`w2)kB@ZnR5@To-jR3dyT5k8d&pGt&JCBml?;Zuq5sYLix zB77>*S9e78l^+#-)?=ct8g$5q-z36s65%(A@S8**-Y-79UwkrTDi*TGOrZ1Lb z)QP^7;(GoA`cke_+DBh9JH&74%bAXmuhEyv9pn%CQoTbepfA-q7ADe{0_~gcp)dKI zH#6wVYhArh(U&peI-S1M?-=wjeL2OE@E7{>VvK6gml2mKlD@nOn*!f~1 zm1y~u16oey4|=*^PhZu)gO2Q-Qe~r7va;N9|4gZJxn9PnQ|*J*Q9b%~vwqzat&X~` zUzh3EYxH!fo?fM=i}ZAXp3c?Nn9;O8!AAXTk*u@XDC|SEu4ALH`_#IWjmGX%tI9?X z(xM(&?_yDiWF3g&qj6T}=n>I;bR6!SWNp@>A6n0Av5YDswe9kidT5GDvQt=6x>s0I zf`tjfl9&pisJccd!o)=p4kal1wV+xlM5EBRR||_V zHMJPs0(oUfVLAh;QlwVl7%^eV0JbL!OMcL1N|$KsErcc7(s^Ntwsc-tqAi^lmgw`V z*gTH)hntog`DjE{a!Q-bph=rDwYi2(M*o|4q>WbBmlLIp&}xsg(W@=)-B_+I?%hOu z=W=Pf*?ajoP0w+){!@8-I2-*y;h{U~=!dU|ips9VxFdh6QpgH6`h`J!^tM<&x*9j? zxck*J3b}<_^&XZeDa4K?OA2YiGA4!8xm*9H5SwGLOCeS)b5ck-mOLq>%;PLj$XYCf zQb_4wKKheUe6%0yr4(Ys+9`!ZVl9Iv zBUg&CmXh?=6LsvgeDW|KdrveU`=Ck-)Um5t_qLF~_n9wjR>%I?)%P3ewYmjLjB!^J z6R2o%cQ~ksWnE)8xml17{)vs`rufEGk(e#{Lw51F>7A=i zivv?xTz>D29dvMAlp0sqzHuM9lHwFXi1F%mX(8ziIFwIFOa_a4aQ@VPsd0~EE}i_n z*+nLjzn@2tXu8dZAklPNV8I*NJnp$*_S0nKnuVj+^0-&rPvw!`YB%X7zu-9yO0i9j zCpJj2&CajBkYeqwFP@WPeU9%!rPu)DaXW|_hxz*+awS*0S21@AAKK63&iBlfh*8q= zjkol>s+Wl=)zZcSyb8h(S@9|eKZM18aa`hY9nn1QbG!<|5B(oCSEpuy`W>kM0QC=@ zN<3TEP{XYP@)$P_xiWDFH3T~Ep)~3cGex+`eKcZu7&k=^;ihpaP2;AiQQS0lFgHCA z!cBj2ew{&|G&+L2=#yqthCW#u%}p=3uC~%A>%zHd6K7NCljyiV(4*T@6f&LJfqZe)3lquMXq! zSeXPhB6T%#B_6+AQzQOyT@5SQ1GoLl7@y}`RQv;Uc#=nA6JEC~}KJA@r$kpDz zh63atMgCdjUzk7m3m)H+KW+n$_qT=-$xz$+#v{sG#ck!~%3F&)4^=6(k%8i^bjjl^ z{)#jF6n*ej7XPIife9Ym%HqG#56iachhxuC{T-OfH zjxqB10hKs;{O|efD>Xr!|EDINFg%}3QWM5{5LxSt$>aHp@^(vpOaxDupvJeU2{ZG; zM(~8ZJo%`aaKBdO?S8$?mi#aiPgrr4{avYAyIrX&-Jw`le-SmG&aqvMucD*Ot$Wug zRb>kt^VEc=a2yR!ICEIBt_kG{A2`@VI^tHOn&3ciSDw)BC9U*663r9(5JZzE*hHT2 zOB7G|kEA9J_R#&xCudkx_#z|U!-h8ZrCnm3m{ufVNA6#0YT*`5cx=8Q6 z>-w^gK5}RAqT4+2wSvELH4%$06I*bO5KnZ6^TaOgWar5^sc=7?T$;y@@x-s9c;d~D z@Bw=7>FeTK^j>Ubja|VKZwDWUlT#!2%CT!56DsA{)hFdt8N;1&EVk~&VyR@}zxh|U zBbFo`lZ`7_lA%gAo?%I`4YIL;s7YgMb~i(1mhLKg7PeSB}YJNv|J}V-~Tb!@K2}EFM}Q$E5MhOR}Mx zB^^H?8~UuW;SftYy-hY)SkgOSd+(S$su?_?EXlb?j-jsQ|C9}PG}(q92v71lu4c#v znIEo|4b&<}|HzVhYUF5C?u!aJ3Q~Wu$WhO-aYCp%Zmd-vafXe{IUq-sIB&cwM-}gp zqYBx$d;l+E;|ll6hS_Y~eU8BkWkdQ7*^t1-J>accDH~#{KuZ&e8d@Gw>sc_{X>4t<7=-{yRzOAc#1Acq}d z<9AleVYWSTn1zk6J}HM`^JCbvZ2ar>a#)F3ct9P0IEs%yjxs4b)=xQxH3e{;ge z|GGz(S7|E6-s11Bl;r`w{2y3o&XU;BlgNzI+Gj&rpUI+ zTe5UHiYJf3L*GAwCr@x}l4J>6Mx_LnoJG{+Ij+w?ltl{Xa|vbc^V-+xTT zGNt6DPI;Ux-Z~(QU$Eqds%0$bNnWv67901-;(nI=_zqdLu;k~or%!-+p1i(eU9z0C z%5n5wdC>1H8T0y1dC+aGCdv4ROg`JKE|Uk9_)dQ-4_a=O2Zgd^=QcS6@ByCrj6BF( zEf0cXpYN0h;f5xEyH^f5!IFQ|>K@X{QU+DaAs?}nAz-Ps%ItlX649D@PEO1{Acwrl zQesei4dE%H4#?~fOG$S8`CggrWGNG!nX6?M%TlK9kg?J!B@^`7EM@KTc=5*O~gQdQe|H=%W z`mSF51HE{G_tPbEf~g>i@zhU6p4z%#@Y6i?GjQKv{v z(!$;M^3>m4*B+PSeS`Rf5FI51p=dPW6QVSR38O(s(g~q#!nDN?y}&2T(#Z=n>74Qf#*fqqD;wm^<;UdBWk=-AYfj0VOW%|?mzM)A^i9}?z6smVH(?w4 zCTv6BWI`G2f!~BZ@S9|m!Gpi?&MA50z?<^MTPNg={m12veGT%)V7-R=% zE8ln?Wg1V)8yirj4s;vxt;pYk{4(&a1+5gc67Uy;UIc#NZv_5E;BN%}M&NHWfo=pn z5_I5iB&X!p?w*ui!>0DvZq>`L^&gX8>t+-Fd{lnTcSL@z1?1*8<=2`}r16COS_6s! z{xutlSy5(7>%mlVGZ&kx_=GjPNrInEcqKn1ici?mvF>MbGo^yW4`&nhAp?GDvM{aG zk&N$Jci=a26JaisfAFkmCpRPgho=cs%lN1zBtFpo>J)-#z1TOB+^FaHd_`MGLS}y0 z-}r=g^!(<6h$-rX5BCt$7i>aTH8K6fCS2`&Hkynt-AnMuPWb8|iN`Z^^8ks*GxlR8 ziH`#?Nhux0C)`E^FFsM!Mek=5v31tepz9_L;S&wc^;-#++E1L}yEvF6y~-wLw{82F zB$nnszDk{_qU`T%;u6m%XUMoRtU@FSl^%O5Nxo}tO6L=`us`a=zgS2-_{tonA`-uX zO)Re#redFN{MsrKe>sv*tZ3gLlW`>$V(MlS_v>oF&xfcDV4T1v*6GTe#^OI>I>aWv zsh6k?;S+^-FrO?6ZKfX$s+84?ve)!q=1tk8wUiU2LUoMh_P&U#1I!U;r-I9rI zD13%Z{L0;0P7;u~X(RDF*+eW>GkwG+{xU?J_#X`?em0vVdM`aJOzR62l?hXQ`d!NO zZk2_p#h%R{3K<1^NZc1}Qdl*K`-x2&uH%*LC2=>{B;!G13T2bV9w4R=HYur+nEtP= zOGfcY6STITq-s@&8_p)(Q$gbXudT@tK52>G)($plt=`t_*rb>Bwl2}ydeTOe{hdwP ztvAgUy_E;FR-Uw9YvoDUvt)XeO}e19vS|gI^w%m9msLYdYth=b^@pSh4+)>d{f7c1 zA)x0p=z5s-$0;q6Px@TH6U7!1*Uct5g6kTNrU; zX0u6u=nWRs8_cdZSen-6lLw(2a5gzSgiju!-JQ5})WoXayAu7r1w+-z33>~r>vt@K zO`d`~hUXI8(^2>go4ioJV<*_;du=3c2%EeV*&ngV4-HW#KY^P!oK0R6#V5a{D_Ej~ zSL@mp={;$Ue&>qxJC}p89D=QK*jF=X&Go_KDGi8x@~-ZAYbXh5JE$h_(~cI|s~;`0 z*9xg{(@=-ivRL=N6-`KZo)u zBMf{Bde)T8NIoTB;ZqiOEfUC|2MRv=51;aI!KHnC$}k0LSd+WTje-8KB+=;Wpy_NTD{TEM@o${>k?%SUmzX|Uy&!-V;+Gy`5L&5_% zwBb^Cz~(967apkepL{etkjK)-cRo2WJdlZa_TfE6h-V+(lj-hE2oIKM#e&7|wvFMz z#a;Iv3-7k(lZ|THq~JsI!hJM~r)4cLj8N0&>q<0wh%MY(ucu3Og-UdVmJ^;<#MHFs z^TLBX?bTswT1EcX3wfGte&4e^?MVLTXVo-2cHf70hqAQpMF*Z#(>`02^FB|zRT%T8 zn)a{xpB?1s;=-0{o<5xL^dyzk@bvWipI)z~-_L1Ecz1E$u)S(}2~pEG8+m$_R>PoP z!(gQ*C%p!8c=|Q-meT7x#y|6^3k5!Psq5W~;awIs^)cs@_2FI5vZ+tZ z7hdF3pLT70G`uU6O|5b@&I%9ygH7G*?w%7K47xkt3J-eqN|x)DEYmBwCh)Kt9<;kd z(!+!2^>njm#rW{x>4M8Y@~Qj%Cs&6DaD?pc@W2{R%b&tK2Lg}15Z-w!icdY}$?Oa7 z3~G9G`kdGE!aJJ_L%vd{zT-c!L3-x~oBCmmbgr*TI_I-U=QbRW&Yi|J_EDXy)C=_0 zNM}o{q_fycd{*ksm@B>0%BFrYgiqzjxD~BVZ3CMgr1Biz2?6OGc2k`z!j5!x>J|Mu z1=kqv1;vOIc3_>uf0kW3tMA@9+YHxCI$N#NvA^u>-NEYAo7&Y`Ebu;iT2D0~rH$T7 zXRS!V7)7cKDZ~yqi+BNNV-_8Fg-`vZAaJD(Z`B3NB^FZQN7alcu7{3S-fwZ<_?6@iIKqA) zxz(<#H%ZQA3(2{S&z-l(4)~Nv@p73|cGvmkB&qDWtMeV@uT9$C@wzPDQ%MikNAhXk@1=*m z`D8br_N&usqi-~Cqep*;qI_%GrVPb8M8TUV7*wHr>C69)@i? zy~9F}EMn8I1MNr1`6q&cHKTfVeh<|49iT^p+v%YuG)vJzdZfIT9%-nfM*<`Gj6t>Z z^|c7A(Cuyzhym>#ZjdaW@qP0fUyABc~JqgR{(K{A;IDyRw zcl8aWwKm5{f!3a2Gh$Sd!Dpm8C&;w6Ka$UwVWo%5cGIJ1=)=FV8S3lwXxTP;IK@Gp zq=(J=U9v)dJ0{EM5j6G@JCqu5-guuL#RkY33qtvf2VGsMwAR69tUg4Kn!)^kHy5Th zSXQCXbMEeRS|{gWJbA_|sApd-MYKtJIF!xU>J$&rIx{L=3%F79ki~&`OO!!uHW^QNb3gn)1xsE5X)xlg94{TKI0=bWGSEFM{U*;KI2Ddk-=vC z2R)A-ZPIIDLoJ$saf8hyx@N@ys{`0aY~~=~{m5nxQ3tvC%m~N+L-dU;k$mPTj5@0` zlikCY&>6QlIi>6_a(9hZb{j=La~c;?}E`>|D(a$#nW% zZGd=Yjy4z_MU8ys5;yj|@4BiFKi{oYb}rWkp9lDoFO{8ZmZc@|nU6YlOUf?W9%WZ0 zCx2FUL)NYU-&CUPF8(aJUD@SzU42~Hoy*x@mEBp8P_$jyoz|W6fU?`PTiJEF{pD54 zZkjJ2HKlQcb%-l-kP|d+jdT1B8W-tt z?4xm#^SP%fT#cDq@7?s7I!c=AmpPBdot{4gjZy2mqR_Zx^ZPw~=5g$P!0UBE z`$y_bKOz;8tPb~ZSGF+DyQ*c%^cb( z-=T~85RgF4G+_pkE*t1v^(%dl_Kl694@S7dPtpgQzY5<)A8c8&@C%+fsdMz-XvhyZ zn2v_@JIN#((t^2T8lue|(-3X$n1*O`$26o~^CkrSWDlX%?f;E@7wK_NAm2&zO@HH= z(=Fu7^LP!DYuKpsWecB}N3QiFbzM*OA$1l1dE^?lw|t4VMY**|Eykxo%m2~}`+$78 zx`upNP(i-bW4d3{bRgF>9mq9J2Xajh{dP^8zb4mkhEE~6R*L^na&494vn%A9(P^DZ zu0^=MKu%GFnmNbgmdG_T_6n10Ssuq<$+dLnSN|i|FvA8}ajoGW(`Dsti3{nnYOnYh zUDnk3%wD=I5L~u|jwotPsHY<`Smwe7q({xXSD&k@)2FUZ`{QNhJnr99LwF{(foE=X zni7=OYAw4d7+e;vG+kZDBYEa76?3eawQv>0f1LJ}AEpsYS>|!=^uK>JyZ<^({D)mT z5AfHtdpB(2nWyKINj%e|o&NW0&UHbQiu({ennYjsOg1SWn6=Xff2G<$8)}x$}CU=>aYN&G75@B+llU zKk1dP%OCq5&-_iV_GL}Y+ja10#M@gm9d9>kIu2Oft&fNU)%m25XN7ohis3+Q>&V68 zK$F0;(yw5h<3RAL$s!K)%mV?5M=aJ3PyblXrMl$%51W;aO$#zr3qv z*&=z?M>vB=&GO(ZAD;C!&hk;Se$%Do>QZn}Hl*b0Qd;zUv!3tM^UZp`SI;lj^ZWGt zVm<$=`JDxPma=eqhdOJ#UZzwpb4xE%s+ZA%K+s3);9Ap1>$NWNXrtCW9zCBIGM&$w zhq!5cR#6bAi!H5gt-htabJsx-a4BX!`wO4-m$p2SM!=#;YCJ@f8s4Bub#*k!R!ftt zH8iRGb(&OmkS47?K$A-M)1;DpG^yA|lZvWol6fyp%H2bgvUby?^j$P5rHUq*cG4uH zl_o`2(j>WpCea-<@$Pn-IIxW--m=id>vf7X{SC#Mbx5)1V$#xVQ7X%~DV5gkN~LXw zQdw7_R5ny9m5o-VvT3JM*<7Vm+IK0HzTHY?aF0^iw^ym`uU0B=*_6tGeM;rs{fdLqMe4M)JK>J6@zjkJ0=n{~u(-$AP%X0uM{`S@!Bwc1(sE3KZuW;MD$e2rE=&St%< z=hy4sr=9LsXmuW&b97|LgT>hZoo_YqB3 zTZ+hM1r2=GwP-%;dw1KX)K=&Ae@ktrwb!OC%UNQlwld$lJE`pl2YZg%avfOeW2<-2 z2dJ&cIjDl#);L2xqqY)9%2sMy-TrDewKY3JqNpw4d2s`^`J(u&UmYoVbYD!6uBH#! zIx_arhiYBpev>|Hc7}Z`ef9>M^Jl zXfbg-G&Ho>tfy17a?9^HzFQi);sl%B>G`xj^xYHjH9zHOn)1Nbe&?;89T<99KB zQ}7#y-$wk_%AY2D= zeYMp#Amo*{?tNRx+fpIqrFs84Ovqd796CYB+i?HNUwQUJT?>6eUbXMSKc#T1_tSVu zsdkloD23OxzZ5Gek*`b2Doz$l3KT1K-aIBLX?0R~(1T-570KDXPEs-*#tl+<-#$sn z@ve80LYsTNn-o@RV;$Mghw$uIwSZOTel3huK`6_v=*c=w3NV(IUFH46FD$h8joB(J zYzRC&j4Z0GBZXx(q!3$q3v(^RJa9-@c!!&Z5_4Y#F$Zj<5Wz4D)9`D?Z;HEf3Mo8& zfE32E?3z7f(J+>M2;9S2_A!vpLkNKopU2rm76p*9Yw-ZhSO%DV0#}XNm{9h)aGw1U zuBzeF6SEbmax@+(`~fL+mx-kCj_=%3vZzJNHJ2i{1e3#L(H;F7L(t~M5j@+4-(0P9 z!GLB1;R(@7XCXIzu$tYeU!~y6gxh#cyDET%RnU*{DrC`h{kjj=T`c?iAw2tUNNU4S zWKmyh!Zfnz2ju;Rywf$L0GpT#%8^b-%M8?!MW-vsqB^aG79pT&!BRE{3r!cH<^?$x zQrM?SUgX8-30YL8jlnKj8^z}c5}y+p&F75PjEhBC+L-O4wD}Wv@Hr_9CjY|c%+rD% z#Fa$yIm`Hn)e>e(4JlGw*-k00-0}PeQk>QC-V7aiSBh(Nq}ipo zCdUh2DX!VEzetL+i+oP0W5{+XE;ve^W783i=m_s?2&M)g_%sAllZIew-YJ>v8iL8E zA((gSAd;P$CSWkr=#!a>0&*7+qrjL zc&*8`t~I18xJN@=3KDSunbDxUlbJuDwUcY{5r+B>jo2*}BgwhwGr(~q?kKnzlja7PsS~)M`Or%NP zEgIcB0NSm=>Re2_;Hn>2eK-+K@}AePZMd$~Q}`G0;tho|rAV!+k-QkO^u{19IT4C4 zqS1kqPsGcpAI-MmnW5R3S(sac`kg@i-bMXrb`hQ#rE-;K0?h8${#IvS)l7idLG3Sh zwpaTTo^99ukY``k%4DZ&WwMjCGTA1rOm>V`COc9qlPziEp39rGanI$=9qT`*%k9CX zAJXMsU&cAbyIB8*mp75m{YYi?>ReCf3Wvh$+A6Op_N((FXYsk+MYKqr`?ZsvposW0 z_b1mEi|LyB&tn6M^K$Ff52<2o-8zGY*ShF&x;(2p?Oeqi6E9HI1mGU3(+kopj^#UdS1EXJdX#dJ)uq|_^x^ajO}1zIj>1)vpyRt#DRXr-X7 z1+5IUEudLJvw>CzS_5dPg9UZ4pbi$)*@8M-P-jaYX#Jqw0_`?vsNXi!ZyV~j4fWfG z`fWq~wk0DUb=!uzZA0C*p>Eqyw{0aylx=DFpq;m&owuQ#x1pW4p`EwcK(FGNw$>vQ@tNlS?qq+Vkg$&oe~xc?q45pP~=_FoMsUqK$prA8>WVQ!0C>UZxK%zNR!&a@SS7pT0mM`MkMb z4mZ+!PWyMz3fe>uT^51Uh55YuI#=CChfpnW>%50?I@7$&RY`W3Z1zU}m6n5Ldovb>92>6}oea-kn>@ zk=mf=p*y#%(o;ZhSzIHvn5(3gETj>}xCNcOB_@Q=``Ak!3%luH^L);t55jIXhx2)z zSfLz-kf!WJ*v$sl*r8!JZLNC`gx#$4j6W21<220Ru$x%vay#s1jH@pz>;{;>z?g8@ z4R2l84dP(8!up#JhV`G{7uJ7*&FgahN+yTYzFa%phbby)24{)d6-D%%-Za zZ&u^C5x*NCp-fl(hrMAp(yPO6Bzs~R8C3dJ_+sUg9HQpT)W%>QmIEOl(1$ZWA5~A& zd|wY8ubj2|=S`=N-PX>8iUfqY^*}MD%?mmYsyX*}FSta97`;mfWgA-eU8Stp8H4ze zr(H2gl)le$R&=iZgg&&oV_6-&H`(7Xjb`}#M>=SR-G4--8O{EN0-Dj}KXRC6H2Uk8 z(~JiH8?Vs}%)b<3uBLD`J|$H`;bJW)TVaYtFk@IhO(--XRUMpnQ~9zuAbdchS3&!d zZhC@f7+DpV-(Q z#gs2IRkDKToNA+AC|{PgzP?$xR_8wYPvsio1LnNvKfa!hroM+}&{22&CmyDwZhPMQ znJ&5&D12G@vLK%x;5p9j+<((Gi`#azEAN)(v#)th_g8~Um9JttC_3V;HY_8%uP=n> ze2p;(y0$?3z9(JaIX`uotaM8=#v|w!AI2c)7OViv`E5Qu$a6#TL&vGPO8chSG>jI| zH`LtN*6066!*BU3zgFyl!1Bix9*Ot6!qXPeB|LXLXVaC>Dm61dHzS(oKHoDtlZKnT z@BEd9Ww6{=L)6@I?e8FbKg-?iU&3j@YW;tqNXuM#8uJTe(`B5=M?;@wxivVVkA}X& za*sIXKTAWOV7YKuAETl1EcY#c{pWOOG9s|kp%FOan})*Y50$(hZxXy~aQD329vuq)Y2o`wy>_)?rjak=Cth9sVC_ zU4t_$Tw1r>7b8gP>YOpdq;-q$-SjcfZED|SA{U8`d{Ec%;xKaQuB+!Pxzt!qE|yo2 zi)9WWj9e_(Nj^XoJ&xgoe2{Wwax%G8?D0KHE-lW>+^*)fx4v#CAK(S`DfvLwqJ3Wa ziRFIgaZMqYvf8%YC6}Z{lPB}sYr$og$t5g5$o<{%G$S7v-5qzy2N8%1Lq4Fsx5LRL zcpNX1i}>C{E@rmY)sRaA3tB%`RiWdB2g#+fZuN6=G0p#`L@t>f38ToxVn@`U$VD6u z=_40&L=LwIvM#o``XHsx-Pu4c23ij;A{V^}$VKe)Ne*FZY_%{2^PN)?m^!vfNXcO8 zc#Dww1XCy2gh@YPX+VuI@k6H0L=n(tqevc87of;GrXrl%q~Do(ABy;yT8tu~J%S=d zpqJZ($)QXw0TD4e)u%vR%GAxEmVjylHI}IsP=6T2)e3d&Ij%NT2x*Y;QI(LIZV@J| zMVVijikNAWE)V0X15~hF2Nj)j0)*W}fiIZ)0}9kb$j>N{!SV*90Q&KSQc! z!SZrIjYD=mO0=@P$3gW0kAe!2KY_X!l#f{6GoUnsZvd26S>Ae3azJsgybYk(LAeWx zh2^~lO2J^Bw`C~Ld&4TE0ptQI10DUbJg);4$^t+c3jfaXwCc7%%oi;0XJ~O7qWVCM z1oaTh9|GzsP=8|i;X`=7R^?lunm|3n@+X422GrkKKK5s&+yx~al#f{cG*IBmriHNl zOyFS(dO{W`4wjz{N+l@6S$+;Ek)W7C>1O%)pww0iDaqAB>SCKPp^D`fR11@?Rtpo) zJHL5XNUOufH6g7Wconya1e_}6Op_(zrZ z`!VyQd|cXf??cMR-i|eolJBjp2W!dq5cE=8rHg!D)HWiQd|#ls)ZeFTF7@|u{x^q_ z?{oC;O#jjE$oD=E%OT$du4Cc$_r!m)ntbQg#+CD5!?^MqdL40wdHx=3c~bNLtqo(( ze-z`xYiI!P)-~)F-n(n4xt$=y+z&AK*HF!!SwrE@%wH|>`Rk+kd~L7g8mhT7Yv>jW z`3R#$Y2>41U!$8`Mr5ta`t{}HL_U940-t|6p3e`M`20I@d_mk;zMx1E3`e$M4tYDgrtlZBE zGPSLoD;pztK{leLl9fTtyIL@Kv_OhqFZ=n`_>ZJNBnx?Dpo%<-Ga?>cg@Z6+J`l%h4jPTr~OA-E~ zZ%Y^LHPQu`nipVdUXV2N_hOMnx(M_4qHg$J)D7Q@FnllShVKO!z87HlUeFES3%cQZ zK{tFa=!WkF7`_)^_!b~yUqHG5!}kIV--6rv6%5}Cy5V~PhVKR4@Vx-T_kwQtUV!0y z0fz4d7`qo@G-Lf@lV+@6gqM4CqaR^k@xDdbEZnJqXGBZ_w zh;*-ZcYR8_ZEp8v(v1hU8$k)X%PgcjriOGcW#(D9%tUsYg>=O-^V~f47&ogJk|140 zTh(%NvGwAM9{R$D!3wH_m_TQuRVvW56-U03G`58ka7 z9=y6&ST;~CEW33`Sk@1L!ZQ3_EUUyPSRpKXgPA`>vc)DWE7K-Vmz8qE$HKB=PQAi1 zGxoy?%d&7*v#=~hiyXSl$Z3eMOoq0faPqA1Kwktmd!hGLtb!39sDs|$;7n)XfvhlY z#(L4;g$Fi3@>*STi7pu(7LxH0Lvp$<*`!O3)Fo3${--WE2+4m$^Fo~N0m(Lj7slw4 z)5CaSye@f7m-$=ey{4{(QObMTy{AKqwV{upix1Hurag2>q?Hbl^fJwQ8O%nQ>9o7r z@YOU-Cr?z~)h3~pchfPEth}3oNoD2TxJ4lqyfF3au-E9ymg}M4)0N)q;`ek;%=Hit zU3pdid&za?SScxRU9pilM(^d3B)iOcbvDUHw8J^nbKwh;U59PkB>M?gIKwf>MY3B$ zcwx5dvv`ud+|?IHvT?jZVS)3T&q($by`T?&ha?+|GYTJo2+7(1KaySQz3@57PImT( zknAi+w1dov&_U95j(~H*TLdFL;G0v426;4s7e2F~?O9&)8g^g&y zi@pm($QnFgg`cVv7@Z0){AvU*{4tyt{?o%BC2O@(F(Dfk4H!`Av zqH-uO+{2+9ns9i48U==XJQzR$PYyMr$u|Zxq*ZJK1amC7?BH?k*e^Av+Tz04%fSX(R4fIYCc zJiP-UZT-4zVQX4`l5|FoF9WF_L$-&d`Yg3}^D67%zX`KitgG(I zSN=&iYXdbJwmE<=xRW6ArFO7F1(w>;kD{R>3CdDCiKzyX;6*oYHE?u5lCYU> zPM*a!rerqz@-0H$M-=4%aKbXokKY5OA z{)s|C9x-<}XV@>ri!t%yCp>A@do3!d&5vqj-fkj^Y*GIEq*J4I*CQjiY#_ z5*J6j;*JX?ULm|Kzv9T-^(#|q#mn%AdwIwpUPeFbW%$Lt+qd{TN zXn;l~H27NF;^j2{1i74meI4)l4~kIseX#GLNbZo(4|*A&A|2wTF|=Rer4G~)@lu^~ z98a*z!X#}_3g5egr`V-&7j4iR%3eG6ZYWX ziM)wv7h9Ydq2$d1-k!f%&PPe!H1UVUoAoo;7C67_CvUbJ$eSITkB(}$EDCUmXIp%& z%gaeFuEKhfHvs3PByaG6OFYSSf9w)Qj?_JHIY@Ffk6m6TM;bnTEr8_u1Qouc-Ll?^ zZ3(fx4|f3#U0gfl{?bY0NWMk*mgHvf6pvKVEwNph!6dKcUh@wmudREzk>uueb!3t} zRON$sKJ}A>j^-Cul0$*~W$aKSLQ$LuoaqQ!L_z+mWQl34&Czh2_YSZcIjD_ zLV8}6kZ!6G(wpg)Tc?Ed9=fINw2=OUZn+CD7DD=Xosc2a3mJ}Qg^Vd@g|tDW`!n6r zjdY!npdXft5Yl0`y5&B;>V@>O9{cA7I-snT;xUwz+B_+lq(5)>$rW z>(apM|6*HT*_3dIZOwFITQ3DV{m!=Dn8vm~3~a^zm(Q_nWIEe6S!UZ@w(d=5+vWzo zN7%N-;a=C+w(!rrA3FTw?v|s+*|r!yV!bVO)0w}rZDaQr9VHveKJ|S@Hcs@sB#`BI zyUh29|45hDCbGP465IBR9SgQI7ynz%bhLPP$eGSq0$$G4XzBATST;}JEN9|HI@s0h z)h=fSIoF~g~ zY-)bWLJ}tP4ug$mPm{x7-Tis5k;Q1U0?6Vi6=bn{CGpBNe}9)Oc4aQl6E6%MFLqS1 zkiQ#muIDkc$Om% zXecyKm2fmDo(Vh;o_Qvs>E?9LENHD&j;3+C=NeA;)In3^bWdka*VJ*kW&kPOI88Ji z6$EZgK-XNgOy4U9I3lJTkZOwS#mf7}l+jwVCpxi#2hE8SQ|0>^} z(%lv%e*h;Sp|kwSc5Uc<9@;*9|5&lyo`*;)_=pI~r8(Lyao;ktUVmB)%q*mohLoadXciI+Mg(B@ zA&xKN_#lo35!|8e3T-l%N#!=utis&0(7Yy)xQ6t7%lt^=VSg^=($oMdk}Gspjf&0n%^*GNGCW)dHxN zL)8S;dZ;!+wFP>u&})a@UFg}MHvqjM=#4;c40_|xo4BN!CtOy|bG)LOH|4x)p6dnG zJok&Ld71{*JfB9@Jl`hOJilhuyg=xM(a_^iO`@SDXoEiQ5Dh(r2u2!Ok2q&(=y^nI zLA0ARv=K3J+1^29T*Y}m)6n-XtL8gjQO%!qUNwIn4ehv~n(up2HQ%p6H9xRXH9w?D zH6JBqYgWxqyQG?*1?@a&=R><3swSw`L$wjAEl>rb7pfglwLx_Ns)JA+f$A7k$DumW zs9GR2sTQbd=s>e-f$JsJ0{6?R1)3|W1>WaX3w$rA7WiFMEeLE-EeL_C4ytMo@2=vCFHx9jtCe=crS+&sdl4_yz zWz|C0E2@QV=T!?e7gP&U5z2Pal=G@Z zDBDFS+eKb9^s@%lBA^!my$I+Gtrl_##eC$79FuDlnnyce##7p}Y)uDqA~ zWtEraiptCTyvob>g38P9qRI;u-U}7pO9wsBy+HQ@-3xTDLKUA3S8np35S<(URZfFlcdkCr{U7aneC1X$?hw?EZ zwbb#lYN_)T)lxKPOWiN1mTE4lmijcPmijiTmijfRmIgvE1bRB?>7keU0t;IQy#nZ! zL(c>~z?TBPv;}&t&})a@U8HA&-T?H5pf>`&G3bp$Z{m__8E)xij#pI6a7!=4ExpYB zqG}n=dl}Aq8P0nd&U+codl}Aq8P0nd&U+cods!Cr@}O4$y>jT8pjQvQM(DLb?*{a4 z)39w&wLx_NszXp6f$A7k$DxXg9(Gyf4Z1hz-k^Jf?hU#(=-!}vgYFHwH|XA=dxP!` zx;NwumOdIQiKg5C)9#-KM2y@@L-A97yh<9I>k z<9t!&gBs_98s~!==YtyOgBs_v7RS)E_@HU=(czdL$I@^NO^Xkj79TV%K4@BeP{Dn0 zd->q@^1n6yv)F}9rbaAUO)liR)cJz-9P4HFmUZ zh)FCgNegdJVf(qs!E8S*31ne~+%{#lyN&)P%yzcgQDL^D`GZA*>yVXvE4bdZxm^|J zC?T_3xFfXLWj&{!3UkWu)%;7C&ym|e-cxn|oGITE9; z!t4pN{UyP5l#y{^suvAA!)Hx|<+I7_L|uKUs6!t{yr^?m_OUjRZc3>bxu#N#)R@G` zSr{=8MH@y8M8_$Z86=9EX;=dvuNJFl*d+{$i;j_e$XYbguxln!)Y35Ylf;W+G!47O z$Gb(`-S~)JA&Ox%><%CG7SGTyMpzhTuuc)hYc%Xr>^xF--lJhZaH6B25CfF-E0qvO zB_vo0sk=YDS9I*?J#s^I9K0u_3K%{P`x~zCKl#KxFWl7s$2(ZPn(21R`&hkN{inUa zwu`Wh5j>{R?bEmEW7ziDL7}&`+ZP7MXxaAVUpfC#@YH~NlmBV0~Fa;kv+KVX{6gLHf~4Cm4WLYvh7X$ zkZ1np`Zl)x*5(uY+4jy&oIALQvz`uwtrNU-J;#d#FX#K^LxShCfw1L*S4hu^KEccJ ze(7bw^J)L`je@81y{0}v_p{}@1y4UEZu@#v!X4rUvRJ!g`e)0vf~UYKnhqu0O$lGzmwip}4D800 z(b(?cDrw64a{L9&c(-$)plQ7~b*iArv`*V4%$hK({wmB$P~>q%PAwOXr((@d;drL9 z%`F$o-7AIT0r>giSS9u~kUj9LYmx_V(;aJ?<)JkV^56=(;}wKd(;ZtI<-stzBNPWj z{4%kdM0dnObY*0&JOpdLp)&jeq5hEWNQIaSoea8TH$-=YHqjji@Q=|QhY)gg{gq(0 zW`&@uf8uUNLcM>r4I!Rj_>H}ZhJ{sF>7mPAKUS;YxN>wjdNel6=99g z+4DeHt+5Ui`b68 zTXLd>RTE!$L6$YbGiyEkMB zOZ1(+yX1>E)?rFd_#d5Fxc~Z@GqmBs5(|%U)`r7~#7$TUuX|r=!}kuo=p(EcnyL-2 za$w=tKJ&52?}R|7{eGEFSW?j0A;@o|xv^-&tsCr1S@=M}rtRABuln6~3a_^E#^}|S z{#Po6R~w22$Fy^TqhF06ju-{8yh0EY`e&>ZUJ5uPtZCO`$=vXNbf35^zuRD5-$))k zrs4l*ee^223cLUG9(g>vp>GL`pn*3JY9poxy#G%Y;kKb~g*IZb<)lA(TyOni6M1a% zf7{F=)^r`NA&;>jb2oXcw|+4|9!J`-h)oVGA|7M%ux zkRPh>O-wo53@B>hLlx31<=T(Gklc zo^lhiNN+9qJBv)w3JEOoG6o>EkzcWXV!ujLYQG9*C9C|{6leQYZKv&54k_X8mG-MD z&EH?QU!7$cZnj^YTWY^jqa1;woz++vaiyNoVf&S-2!~P4%KX#ztNm~!*nBv`ex=E5 zY_VTC&NWtf^ORQ(E3z+(ZM0tMh5xDnV0xC0x$8e z$L^g^AYZrRaKNSj1Jf;l)N$`@4fz_#cgfeO=w=~7H>ob(K!VVryhWLvu%(>`Z@Gbe z1Jz-u!Ix|u;XzwR5u{8=*!oOSNyEO<)4W9o36n6l`0;-2Ev`HdK^QCcCPCebCaUok zOheFRD0}FU-hycex_x{vXbkXSMH+(CuSgw`+G()|JPzam^+Ek9qr9u zMYav|=^eV`c1%~#r*~{orgsF1wPdT_NVfWc@wA3)okMk(Pmpa>sjdY_uoM~=s>NNn zO&LhqrVJ!)QwEZ@DFaE{l!2sesjv$n+nTBF9T%p%i6vUdwzfG;_c6a1Sipevhd64n zGC6E2`f+Og-j@We9o2nuL0GG%x-TvYYp2mD`vzg{JQ}sQQCPcASy3O}GxHEI@h##tM6Ma81tb7fH&L$-*8 zgMl#yZ4|ERW3iyoAQoiedWi)gkOCCR57HX^eE1MjfwIJNKEMCPd_GPf7HDwHeUdhc zUvaU(6?@Ju+98c`umS0yZ=9`RSegR}}UKUaRLJAmI z$`3~9TCP`6z=ygD$`M{^5)0?iXlloz#Zs}*nXlL$J-t>ectWG+8pVQ7X!OFq*O!U~ ze13>%;PXR7L)Huy?J2V8RW2-Ai((KBe13>%@V2}@O)N}@x4VL=frP4<8(k~>#Gyf;IS#(18xnYT$b4F)dKff#$ zPC>4YIBTQJ%y!cx!+`Bts#JhW^f###L;Jg>(kw<^kxDTy_$#SYj|(W3>dcxqq|y+S zx>OoaDHXLV)Vd9Q*I9JUh90EXXbq~C3TAco3sOP-y*-nq{8a1bDyi7fGICTZ>bE}9 zO2vXvD#GpafmAeZ5vru3QS6WKBSnK<-PMu-L;Gf_aM;!|BIU!9EBYP${%m<*l=9cE zfAlSj?yz((mh$Vm?|4e3KGp}XNJUjN`jZW?jEo+#um-6J76#EjeztP6RATya+DfUU zd|+FoRFZEy_l;CUFe{Kn|3^96svK=mjy5P4YK&hr!?5M+lTu-QXTPhIA4+5FtdC|( zCBC-S^HTBf#@Q+sm)~VE?v|9lO2uvFBrmDZM=6i~jW%Z%#<$nIS{C2SI*4y&idTZ}7`RxEjXI#;FwpI8S zi+TU?OuZ2JG}`47i@~c?w>GA?Ps|e*`}DpxDlDF5``_EbYfqK!u(BOhwuAQzR|>BU zaJ%8x+8!hi2%%#=by^{G2*WShm~XmVT!hf_^(0Cg^CN$3#{9=o8|(Oqi%AHohyArS zc2=P935%VtSX=A4nPB(JG}e=wV8?nZ#@8mr_`174n#rtVV28ILPjJ}eW*YknXG;*) zy0X{}JvDQMNH`*{5~AG{bM8Eta|??zVH++i4peNzBZ)RPRIv^3P;A4!v7gROv||q| z=GuD2P+R0?+_7bf8Fv5-yM@I%#mIY@8-G7PrWk*ZD8}D|ib?l~V$waJm~;DK!nlwh ztQd7`>{)D$V$|KL7dx#!WiKiFyG=JN|88EBlhV#=MMn1+uirr~$F zIe6SliaB_tVkRy?S!QlLN~A_H`d+J;d*4+i*qf{lm|*X=X~jF*xDCUOUBdRrja@5Q z+>WrdbF^{#VFw3cdl{Nq@@G@T=3^h2Zi89Zb=c>JfFqAJCDVgwcmTNxNn|galc!>`?Kgh z^amF2;Kt%z-C4ZvTx~qw>`g2_E%53+7Qb&()1Wr~C?99Lm&RX{?MJ#0Gn_Y9#eCjjuHjn;nhEoK2gvk=R78 z*o`}~pLFXDq&pI;o|0~UF-W&N_GjTY#7EUJ{f&opSCZ~vMXJYy9@-|SzC3-)d&jC4QN@H?@kfp0%{Rzd>S z(KlFp#|BrbjqhDY8(91|k7sQcB8C+mM@46?BD*Sb@CGWg_#XpY&uSAIdvhbmkK?_E zlE{yPaGoeU*KnGQKBfsDogmYHp$Xv5t%hwPbXphD^7k37=Jw z(GC+CJwy}!a*~Xu9Ver@V`Ma>gp6v6$*6k~nchPa{%s(mQ2wQyz~^+r??y5$jOy(x z$+X6^WO`l&nLbix?^|%n-nYEk{?*)j>tB$bQ9~`^eoj|&k_0w)&J!Tp=PT8sh-f=RR41wfzx9B@23fEripWE2(6}xYpVz~ z(8S$lgHn z_fN@Q{S=m@(o$?)Cb1+pv+o`lKA5MzL-r+DzF9~1j+p6sviGU74dEgCl!~{{k5|Wi z0g6tmaujbd&>X`ISF-mR|6JYY3O`U}pNI0eL1o4xse6Yih$VRjsKQv%E1RZ0(Iy3S zJ~tviGI3MCM|FxOUizVVR|$tV8ie27;c|^7ZC7ILRAOa+wlYEfa-e_G7xI^6SllH) z4jxu5lE17R7P{mw%ZBC*$d;C#iVcDsO_PrB22&2DNk#YUzLG8Fyj4xA7znEqHU$pa zKM>+mc?+9#@yqEm1WBW`uA0s%=Y;^%qv!u1e|6U?Y!aL%h&JhgB8@{Dc|4<4aCLvU zAw!7m{@kZd2=%)^>9!E+eecqJAymV=BtxC=9d8vvMa32#UjN@_Nk0XsTC_<&cax8W zmz_I_NqDJ^-!>ome!lc=Vbu`7c|I)rYRX;t=8$z-nEX+~){Qe*vi*9u-P&a5^};Qd z{DM}PtWDMg3fEZjO5TpY(r@Oj$J`b7V#Br*D*2l4y>kIV)YSl@Vab@uo4hetXkp1= zA!3U*IsOZeGU3&s4XS@=lQZtk4UzBE_ey&CPQ&KuLkz?1+T`-#$qvG+nV;D`l<(Re z&)gv29UWG^CA_+}Z+C+5l26xLUBa3M?)EeJl6eS_5=-&JvNL4a6!YBa4nuyH*l!(%d~LVhcNp@qHW?g-IM89p9mWX`L$21fXB>u{ zEt8@ghD7WA@5mA}O$oLR{z{e%cKQdCrOqmr5^iZICrfTvK6{HS>7gl!md}dFlE(F) zy{}D4?d(Y+%NpU@n=Gp{%k^Y&hyRlwwJ8TTJb06(l$if;iYy-Nvc5wWk9JxAOO|-| zGua9+EPS7{Xrn8i)`^JCM*N4qwHGLzmtNY*d4NUDxxzPE^YWjM2m+fUC z&b5o(6?C(d`5(BVSC#o6Pfy7gxc4P7O|fiDQ!E?P6wAgm#j-JtcSMP4m1d`TVwx}i zP)zf&?3*U0!S%*sF?EQ4E~X9`#MG{d^(lw{FhNM&Y`;;?b$9FIS?wc3_`^r zx@*oHw#%bbsA#6U0!%{1ZMrMimF?PID^v{Q|4tR#m5EPoq2MmwO@sn>yr&2S9(ZRF z3Vd*Ng@QGF`rO86R*%1n8}Z6u5jT#)S(v!71boj+o>+nl`slzXgCWl{IoY<~wE^ODGN{9dZk?wkkZ2Taz zUH_fKQl;tI)YMOQ z@aw4M&leniZG*=ehhID3uEyclemJ~w_;m#SZybJIQ|j>BEQ7;u^JwZph|x6lh{@qM zWahVaqr-25kbNNc<4`WNGH7Zc#OpM*1V?WGR)GN-r+i0r={c1>b7iTg4ML%#Q7D9? zzCul@Q0QwC3IiE&5e%-#{S_x|>U+Jp_vJ5Jtyrk)tFmG7E%~bkYwGKQ^SJHZ%|f;( zP5q$rUa!ONHrpJP!|wz44pWEU2iHy8!&2{Z3*OXD{@$7TP;37JOZ|?w?ZHF5wGYM| z)*C`_$Iy$Lgy8nxeOH9w%+I~CZ5aq(D+K5AUW8~fO?~RZQvZd{95H&(g{3_=gQZPD zYa>SIp=}YP)4JOaiqZPc2VaQMI?M21#OM&qrgkwp5G{fjJ&&f%wr;v9M*H+0DHo$P zoommC(XQ5WO=7gO<;fy3+R=LXS23D!R+O2hxq&dBv!Xcbk6^7uiz7zWqXiSA6iSpr ziBc$03MC4ZO`HzNB5s9gwu{% z!OYSo>{z;;?dso!(|%pHCBo^z?hoe*r$Z*Obb0;fuG;iDe5~teL4RzbaI}(}U>_~- zkIfd2^2L~hqb*8EtHjdRV$~UKdQkUAvxQ@g-G9m!jt!#D2*+BwR_zdu4fC5#`U9VduaMGCzf8-dGA+Y`E8nh$$Ira!t(1h{o2O$CE9eXs*!%5KWEb6yD|Mc zD`SFA?CSi7u(h>6DpAm-;`q-*n_+*i@Dssj0G{l%8Pk;)0iS#4XA6woEdx>Uo0Zmq>@T)tRtz1=J5h;2iruq2OrJ%||ZipmglxoF_znf&n~&*YB} zc_x2+$TRulL!QYWAM(sfbe738kuz*l+4KYQOr1e?@kZImGr1EHc_!+#x9l>8&;b1G zXa>44_sKKeYh{-kmGaCr_${XyRwWSMNxt$-N4(6+Gtr6EFT3awC(@i8DZ5NS!lMUa zHb@PMlm{sjzYt|_O(!0AT?LkhFNj^ami6gkSC(bdQn4%5>f|JL>3I{~g^xgs*cHql zj9tFGleNnmXC`)W6Hl?rjaQ>CSN>q^nu1Pxv2)z~{ZC@&D8{G7PPh`yu$k`|#Ln)M zVrT1Vv9kra7CVEvU);`mzJy^XOgDSQ&ax@mjL(Y2&b(T&a}9oTXvSA2u~UPnZai75 zlB`up*2gD$|_e=N5@45F!{Zqd2 znC@PtCG~9g$_KmDa;v+{cKbb^d04jT9?kiy+|$^%Ctd!+?ZGOm5UA&)ae=vLI~<cjQdJgnHK6iUIEp)BL;PD z|Gn^g>Vx^pu{3BUD3Uv*SzO9(fldQ+iBcc*MHu=OkVYP#3Yr(Gq%nr$gXYQ74j zw&xIf9MrkqGuWPDe)OP#IHUOCTM6-QAp9is)9_BD&i(&MJZEc_lx#S1w^Ds5k?yHS z?NJ~0#?EZI=X{O&(A9GF!Dltdi-*}_UVd*AcUup0e3sgSVd8Ki{3mF{eh^^;fLmp>{9l6GkK3=D)i3oG?X zkB~{}9ST{uB|^LBE9DDdMEL^9Jgd(6h3>&A=VZ`5Kbh1yKhiz_sZr;QRH}0x(>=eG zsB>&*)Hxj$>YSVOb-QYHPRj{(PGhY)$5gD&fs?bGf>L!(-Z6Df)^T-C5`EplsLs)q zsdGZg)j5Hu)j57u>Kxy5>KskII>)t6o#T8;o#S{?okPy5vwxwls}1Vx4EnlkQfL22 zUw5rhXM>#mn7;l(i8>qPY>=}-UQn&h200t#Y>=}-&IUOfb-O5>q~I&tM^W&udjHJu|U1I0S$$EFK;N+dwD~l-pd;b z^-J8~A{hynzpB z$s729mb`%vXvrJ+fR?-=)q!Py(Y^VUyrB+1)4irDdBar~mOawD|CzXL97lhi!LolB zS&o`N@N;JJ5t*&)XwE#`(&E;aY0i=vEN3;22fDDF5dN04mC~HVfY+w8oSo=r(&l8t zT3QTx#y<>##-KF7WJv8!EawTL{$&0kOANwsMCLHOp{BnD|P zm?;LWpgGUbpC<<4`@t**1x{f(zwxh*pd7k)G9UE{%HKHmIktD|ed87(XwZCgmVLa& zj_sXQYafxR?7185BOPgdgbk5oiXo z?Biahdl$Q~z01wBZrH~KLu(%0`-)j~w2wm<;sX0PJGxh^99ydd4nX6ovTvKj_68_P zv?==?ih3nlA^QmaaI%lUD=@QM&1O_+#*}ut>a_z+XW2hCUG1h&O%Ee<9xVP-J&SCMY#>mrtT6 zcROaVy>TMjn~t;E&)*O4@=5gMZvJ|v)9x({Xw71KFQFMC9RdD{(QNNi1E4bmslSwTticac z(>px)>*(D)7q-u)WFeBfx!?C0cXR)qGk0_Uo@jYu5Z`mRWBVpBo~zw&Z^!ndX0#5K zimf(2V7UKa$D=$m2)t6VAR-fnC z{)nk;Kf2WRXG~)I3tZU#laty0^Y(22TaIjhs}tLQ+aTVsqx-EU@rJ%syb)3>-k3-C z4;aN8Q|bQCkn|9g|1q5%aFy7BWsdBC-*fCh_#}29(T*M1J(V3e;=&G8%wz{n&teCz z$n3y-4(z~(Dt4fI4mtI0GoO}Aji1m1!(~#VtyF3>(*r-0NR7AYfqxcDjW_9m zXGK!ub$Z}8gVfkebDuLwP19(us!D2_Lvy8Csc9X}{ljUg>1CQb|CH3Ug66KMk(#_{ zuJ0MCDVpZKRw*^9Y3{m{Qj;Cc-FQN3{DtOjJ}x!>OmjnwQsWbv8*xr*%AmQiXQd{6 zz0?#^Cp86DOHJ;_q{gudsd2FToFugk*{)rX+H9Y!BU0N{WCHP$kcl5@?yh2~$v|@t z)Jo0B*_)@OX7CKBq-O9=mPt)#Y3?ax3D~n|q-HOg+lVYZrn#4qrB7&X%L%EehvvR> zTxv4Y+?z(J=_bu>FO{0AY3|48q~@35qNqe_GSb|Ea;d4A=6+QqH65b4PYhC14$U1q zD>eJnOU>?eQnM>^J$_7TYFlqNo#p;xgTpdyUNtJabf+DaUAogcgXLWmS>AOQmiK4W zH0e$rD!X(ivqWl`L-WigsUe5vJ*biz4$-_%ilv5WH1A2R)X+@x#!gEOXKCI)PDu^b zH1D?>sliAOK6gfHxJ?hLDy4=DdQdtkH6+o4Q%^_@(e$9paj7AU9(>*?HLRlt=axzh z^XNg3b5cVOJ^06RsbK{@_)?M7pr!}c8l(n0dT_&8siC7@YG|#K8d|EQhJs^KLtur} z;9Dj&cuZvngYKPsTl%2!zA;7mz;wU-SLuWDzA67BeNX`RU+iF<_3=}wy#)@zw1+4! zus=KU0{hbhFR(uia-BoX{ODmdzaSq^@C)+s_zZS9M`VYOy0F7WenCEN=NIJT8|S3= zUC&DIJD-u>cdVD*7wV*26SdN<@zc_+u~X8mks9gNP_=Yxph~)BGfB5PDy3WPC#74h zC!|{~$E911$D~{JM(LKRLb_F6F5N09lWygeO1H8~q+4ml(k*?F1Rn^}t>AOgt-!O= zEx$9;E#G?SmJc`@aNNOh1;-g2M{qC#esjVk-5jr!ZjPOlZjPLgZVnxnZVnuiZrY5} z&5jD`W_!7Gv$agR*-|RqY%Gy())z}RO-0hpa)Wd;e|;olhif-D*0RF`+=1HRfAcHy zCU>BweKSpw2e^D>iE?!um8&DnU`O_e>mg^P>w)#s zb-z04x^Jy?-RHD)U2{sh?p`BZM^(J;TqRw1G)dQmO6l6fN$J}73F+F{ap~H~G3nZn zQMxuzAzibTOV>Keq-&_~*IG-YYc0jnwZM4%5R-Jx2Wh$^O;@Dpj5HmQrcf@mjF(9*W2I8dNQu-kR3x?73{uNoevU`# z`8ggHK3lm;crA~+%|5yVjbbKuIwE#Ze`I?@OeZJ<+KY;$qKkdZ7@=rVQuS(D0 zU-_q<_*bQ8@UNA2?5N%(j$uyo*nsW5CE{3XtvJ?9kM1#wV^#EM7H@LL3V1U-I${t< zhg`Hr_nDXcDvsqTVK5aLt-~Z(aSWQ!Ny9$gh98S#kj7x1GN!}RMjYeMM{x{KPi$Qg zitZRlj*#GIa&&_COpXrlhJCb!_eqZ8@q^UHaSVPYj~-R_@JcaSrbu~^GHb3bbNoA3GPp!j{@^XzCfe;xT= zGl?BN@4}8^VG^_Wy(51)`CgdHj=n#I9sL-ymc{Qn?Ag%=JYU~6I%$s%8N~0>cs{bFs&b?qp*Iw%9#Z8O z8VyMm_=Npb$G)s6k~Ykz?WZc!_R~hy?C4Jq18$PEL2ee6>dwt_j^g2*28*s#A0tUE zs3ob5HaO)t_Tn&;sK858wASGB6 zrVgmI`53576zH)l0;QYs|W z8efuD2FWC8^H(b2t^D01tsP-^@q<5rboc{EPr!fp14xHIfb=;u|Fsh&V=B#GS4z_1 z4dcNctGO){u;r zdvBGIjJy0@C`0jbli`c_@kj?g`123&cc%=+%S}cu_eGPT_@c?EJS|2v(|p4x{_DgD zWy$-9T;8h?feLOBBb2VE2=~s1Rxw=ZfC|sF{7;k^;mTbzM>yMQ^UE#6c4CBJUQ!{3 zKUKC9X81M>FSD#YC58|1PO5Ml?;?i1El;J!QrjRo)?BpPu?Jr<2w{7y0IpJVy0Q(69P)Ez78#b>y>PbRbcaeJ2kiz6$rcVY#y zNvvor6)Wm%#R`~{Rg@dVihO%k;M#Tfft=q4X@MeLh2*11Hz2*LNOvIxDiV*eLy^iL z=@qFG(jG;sgOsO84I8H9v4UdDlcVza;hy7a`P^Mt3%6LXZsMd5Xu*%xX+^^HtUfGk zF+G)^+9^NYAe{0wh^LeWsRlP{%_u)ogT}6A5WfN5G}W~5CZmSG`HD3*gmz5Q7W_lm z!wp>1>Y^?99sl*bv8YLf#BT?&CQ{j=fvWLVBn>1t^OlYF+cfJvT3MlEzz6SX3th4L zf_)f&Yqk&T=#d@m!`l2`y{#==5Rh7}Ele|zd^B7ABtOAG3JtWde6#6Yo zF~Ai-VciK*c-2UXMmyn%sK|rAcojAqgyI&XP~2WC6x&LLVk}BuJi0OWuC{R4D3ts{ z3;%8qO2%u2QvT;?5=y5af({YV5Frl{umVzX9m4bo%R*QI!b}KjKv*lnIuJI1un~kk zMc4$Ab*vRi+)ITLENxV>7U>HJb4D0O7)mh0P!fQ!5QOOvrbk#B!ZHz-hcFYuS`gNO zut9_sAgmr?tq8LrY#3qX2x~-GJHoKMeaWanD0Rm|sGi;p93?VMe5Iu<*(xx)Qf#;Z^0QVMb+1)f+8Mu{0t z&0vP}yq=`kxND`IPYomm?e9*szdOn-KCh(?n-6!4I$T#8s0}1ZnIf8g+o{#%FyM&3}4tW!(U)OFTD8@ zHT>+%3=?Wr^!#*IwAevg^oqoa0vuQo=5-gP+Owh~7Q5*}UZ9EXz^g8XdMg8b4 z5pq}1qG3dWgD;Dai-`j(SkbfTtoS)KD|T_#7Q<)KfRO9w%!-#fu;N$L+TwsIta!Tx zD>UTl>{xMvd6u(q#7K)XFa#zX(%V{o7LG(hJxgN6$1UI77V>Ti|{5YK)H~c zHAP$eu7xZTa!ml;HVfUtp>c);54T~z3voXI>KnxRQN@b?3)>BC$s{!^k<7QB7xHk1 zC2ovd6%G&ZGd?_M+3^2_+z~eaTj8L)R%m7=7+PN`92zwVd1_knk`~StN;a6uZ^D~? zmSG3sh=`;@omolZbXIZ%iD5jvOS2+7g(vOcD-G)Ch;*C-a}e;g-(+cZD}SnkMgTX`H;Xv0?Jg`iqlynl4P< zS$=-<&it~;JM&5>@60NhyfdwM@=krxAPXs>^{6z2* z!A}G~5&T5(6TwdeKN0*y@DssL1V0h{MDP>APXs>^{6z2*!A}G~5&T5(6TwdeKN0*y z@DssL1V0gcJ@|U?_2BEl*MqMIUk|<>d_DMj@b%#9!PkSY2VW1q9(+Ccdhqq&>%rH9 zuLoZbz8-u%_4}LuO z@!-dU9}j*!`0?P!gC7rmJoxe8$AcdaemwZ`;Kzd>4}LuO@!-dU9}j*!`0?P!gC7rm zJoxe8$AcdaemwZ`;KzX<2Ywv*ap1>+9|wLM_;KLJfgcBc9QbkI$AKRQejNC5;KzX< z2Ywv*ap1>+9|wLM_;KLJfgcBc9QbkI$AKRQejNC5;Kza=3w|v4vEavo9}9jg__5%} zf*%WhEcmhD$ATXVek}O0;Kza=3w|v4vEavo9}9jg__5%}f*%WhEcmhD$ATXVek}O0 z;KzU;1AYwnG2q959|L|2_%YzefFA>X4EQnN$ABLLehm0A;KzU;1AYwnG2q959|L|2 z_%YzefFA>X4EQnN$ABLLehm0A;75ZW4SqEE(cnje9}Rvq_|f1;gC7lkH2BfrM}r>? zel+;e;75ZW4SqEE(cnje9}Rvq_|f1;gC7lkH2BfrM}r>?el+;e;75TU1%4FxQQ${` z9|e9C_)*|Tfgc5a6!=l#M}Z#&eiZmo;75TU1%4FxQQ${`9|e9C_)*|Tfgc5a6!=l# zM}Z#&eiZmo;OoHGfv*E!2fhw`9r!x%b>Qp3*MYACUkAPpd>!~Y@O9wpz}JDV178Qe z4tyQ>I`DPi>%iB6uLEBPz7BjH_&V@);75WV34SE_k>E#y9|?XW_>tg8f*%QfB>0iw zM}i*-ekAyj;75WV34SE_k>E#y9|?XW_>tg8f*%QfB>0iwM}i*-ekAx9*Tb9RX&oO? zIUQ&aPp?(>@m+G-+w%Aq(KK%TVwPxXk&y5d8=rUd4YC{^#RAK9)X=#D72h=f_>~U%XfQ|HRPt(_$!m_XYcuioqJ*&lT)i zD+W6o#bD8el`4Mqf`<%Za6iAK!DEnyd)&^;HwNy%ut~nr{e|bh<%g+(p8wXC=E2TZ z?zf|*#n$l4a{pXfYE;5a%J#gng=JUi`>-jI2Xrtzkq7iJG?53=V0$7DrPC9E7$z9Qv8Lf=_Kno(@fK}O$M1NzPyVAI#A zQBw1U@=zn+B{UZemjc`dq~(J{~1Db;>f@<~A=P-Kn!zhXpEAnTm%m}*e^}W8?#QgH3Qo(3PZF!KslJw@ElaB?Sm5=$Ik&pS-%g21`x*ThsYo`K8)PFK*Ote3Z;lrh)jP0q^f{Fu5VhrobFvY1Bj`rZji4JrH-c^i z-3Yo7bR*~$pjUui0eS`K6`)suUIBUq=oO$>fL;N51?Ux^SAdRQ7P$gfyKl@W_6-@t zzOovzuMPWGv0ul%Tt0B*UM~BvvP7-;K;XVE`=0TxkiG(Fqw#qx_Mx8i1>l#)dw=?r zvhRz;en|IwOU2&hwEQHm1brF=DOIx1719*^9C@Yf9pODDz3sfKv$s;wL?!LT0_Ww` ze6JT>yS-yB+H(F$D)zQw@2Vm-LaI|FRM}po%Jw2Fy{NLisIt8}$f&ZtsItAYxLM}< zTvt|(jw*||K8<&5uMgl|-Rt$-)@OY%Hx%<9=lx^;k-Ue_f6Pu>-pYII{71NPoqr>@ z((^ZQTR{H;ZVTuSd!lyHU(am;{e8H>pT9FV`12>c=gz;K_uTo5yob&|m>Y$cf27Qd zYDX1&&?Eex5Gg2Aqx_VqQ7_Z-ua&t}S8@0|MX^7}mln z)a%F-Rxz_bzRKRO{oa+k_I`I2Gx6Ln7PSjAT|a##SeQA*wxnN}uIYNoO_+`{cr1(K z23Z`2FLPO}N3qFb8uq6YIn@psoN5QIIMv!NJJohva;j}_cB*Y{a;j}MI(jg5$q~VJ+d=TFq@m&$$ z8S!B{UP}y4rv}QLPEC|Lof^O5bPCK>sO6DRU=Q;$Wt})RE<1UBTv=HQ#JBbjXYH&Pt~}7)!@_*LLnfnNoF75G))SAkyzeiist;8%fP z1%4IyRp3{FUj=>@_*LLnfnNoF75G))SAkyzeiist;8%fP1%4IyCh$$*o4_}LZvx*0 zz6pF2_$Kg8;G4iVfo}re1ilG;6Zj_ZP2iirH-T>g-vqu1d=vO4@J-;Gz&C+!0^bC_ z349aymEc!`UkQFC_?6&Sf?o-KCHR%#SAt&&ekJ&o;8%iQ34SH`mEc!`UkQFC_?7=3 zNnaii#gYAARoK!cG6NpS;lZH>G$s;vvl4eVGI)R{S%^nK;|7dLAcuh@y9>I>f};#D zFbBsl!#M+@AZox9)Fe>DHQ)hWftc(fNuf51^5==TYzr?z6JOe;M;(21HKLTHsITUZv(y!_%`6%fNul7 z4fry8UCB$ zzZw3U;lCOFo8iA1{+r>y8UCB$zZw3Ufo}%B8Te-4n}Kfzz8Uyt;G2PO2EG~iX5gEF zZw9^@_-5dnfNui63HT=9n}BZuz6tmy;G2MN0=^0OCg7WZZvwsv_$J_+fNui63HT=9 zn}BZuz6tmy;G2MN0=^0OCg7WZZvwsv_$J_&0KWwICBQEMehKhPfL{Xq65y8rzXbRt zz%K!Q3GhpRUjqCR;Fkcu1o$PuF9CiD@JoPS0{jx-mjJ&6_$9zE0e%VaOMqVj{1V_7 z1HTyf#lSBHelhTifnN;#V&E48zZm$%z%K@VG4P9lUkvhG82H7&F9v=w@QZ<8 z4E$o?7X!Z-_{G3427WQ{i-BJZd`!adH3HuVd?WCUz&8Tl2z(>(jleeo-w1po@QuJX z0^bOHBk+y6S8{^sVk}H?oG!Ls%XpG5?iAz_p@DcoAWtVL6WN|lP!@B3dTqb^Z{+FK z>Mt4I*+8Fljj-+XS;zIwU(jc*%CP-s3*yL!x}pd#QMBSpPK(x_-oy>}4|BsrBR7n> zxM7GD#W=a4yPg|5>$su4mK&OnaDxkCWvu8IkSv5`UIRDeKspPe84yi_XbMC(v7%T= z$3i*=qRUy)dPs+`q68@TLxDqq*7N+SI-WmS%k%FY;rVx1(X&lFAIM3Ed46{z&+lyD z`Ry*A-{R!?E=W5dZPD`s&2_wdq?VUoU`5-G@bdO1Ufyz;m%AEyxub!XTV1@o(8Unt%M6)284!JbQr9dtTa&T!EM8hB&1W_MW^cJLP9WR@zCz){>h3)j_{iASkbK}UW15@9p*J7jl9Owz-xd~)9K_jt*q#adS2tI z<24TGJi&@4pc4g!Fh~bM+8@#!($vYTr|Nn2SIp>N$E)4U7*xxvJC5*blyWsnxw?!Q zmo)O~!UkTQ=i=2lP{@M9%gnd}3bU9ox}LkT>bNVTmb+4qa92tbccDhP5*oP+d2t~x zF66}(1-UTD0l?)4ISx5`m^-H$xpT6CJMX)=bHd4;WA)rQQpcU1TJG#R!kwK>+}RGf zYG&L9Q7c3XA({u#9EbwiiQ0A`tBD`TILr^EHSz-~4g5foiyugE@&mE;{6I_{KM-BZ z4@5#P401t`^M@RV9Ch)$DJRdHtmk?6>o|6x6v7LX;Q5jBi02xz9uH^N{;I zzg zx|taZ(H6v_Er>xw5RFzK3avmGq;bQQ`Y>ZjEw}!Y8LdaSb+UYikNDCPFj;qOlN-fm}4? zP)ZJnqLi#CB`ZqFO6s{~s*YRGUR&-R;g*ReZW%kwEkn#$+sG|w9WAGs(dFV6)Qu)5 zx1d_KK&KEodC^1;%Y$6Do|_X7ax==ujPfx( zz%h)Y8HWW&Dh`yVX>AcVg%xsBPysjj8@P$*a}zzpOQsI;l1V);xrYOKB^F%g;GC)F zM%XaIhA|mOyq*_5z|nyt{U9%j#d$T3$V0qv;vg>^)AK^;7ec4dj-vnvbP6F;Nc6k_ z`~vu5cv!^q3k!LEUIEX~G4TAXe4d|ii07vrZUj`yDH$R$`wF(iKa3y4TH$=T*|Z z9wXh00SJ4M8e*k;9|VcwMIoa2F+H*9S#f+Nu@oAK1y(JstoRu#v0&hoB@maPIIp%! zDOm<7XT%<(7 zf&GJ$+YM4OoHHrwjU_vbQnIyDN-h9ODTWWYS#fJ6sYDPe5roR`Sn-EeQu$X_d|J`3 zdCSn7CF+s$ilDCaPfKhLu!3!HPdcSd$UnRA3R%h=oOo z1$?kfA&4A73}(f5^u!7qf2t%_*s#Ke6*j)I5^J>LgB3nl;j+~g|IsF0@t#!yJxEs)fr*cUs4(q=#l5$uthxJ(K!}@yY7ee0(eOOO`ej8-%5K_u$ zEoe6@dA5?2Bdg`eYWa7pB*{w3k=1QVR`FOM6@aS%Ttzkx7)`;A26rotv65Zjz(5*q zKNzThfeILS1-BpWo{|hW5ywjQ!wFR0y{No<-(V$wfD@T|VuMv1tlD7J0jqk{0tCVV z%MQ3&f?9Bzl~~n^(t(NqpQ{v~EucG5z8NUreXOJc7Di!7Df(p8Q7=}44L&P5svC37i)yTvb>(Ul8;~%h=Pd-~Ed=E)AT^=b5R^*zQwe`6;g3BK%|IwD!p|!3tH7_y z0cZk1cd(L{TTgBzRrfKaj#NGD=f3opR*%;J-9B)8#RWRgqby=OuUq*$Gp}9Qv1wjQ zJb6i!oVfjHKAmTA|1L-VLt)S7E95_9_D!E4q+{UxJ2YTw;FC=>U{YCBH{iaq|8oF_ zp_H6nk9o}{qEi0eWt6{17w7khpff?IDxIF+qZ9ReKZW`Z9i-pg)04z@T#@GoKy|Cs zFsLboN+Noagw9q_7`L-^Co35>k*&K~$>(~qRh8HViEXOHHa}1p?Xw-DeYRt?&vuOV z*$%DkA*^KFL|&N1O70rS_9!da9)IQcXEmi!my3Pnnextgm7?U{<-9O?X8N|D-4-Ul zp2&Fe-|U{0+2p@}Wu{*wynI8L;)bf0%08p+4I9XR?<*_Un0_^Iu0i&h7(Dzpd1k0a zn0`H2bC4{|!H{>d$d%y#o-n=sS@0gR=z%iW%=FvskN#1Z3NGg-$TQQfH9&~kFw9m$kr|L z%y`d9KQcdNQ$vj~J)EO6FH`pZGC%o+-{WLa+tsXtvd`4jclO9L$x!Gyve0@f`UkSm zF&5fP7CM!coaV)-D>=n0vvg~sNv@vF%x{*!*Dv%m=`4#?h^G(>w4_eOPwjD0SgXj`@9SmzxJr$I8dj-pP#r3T9(;Rq zrEEvFw&#If2^uQ}+Se+d7^*UqPYm`z<U z;&;f`-QYQuZr3X|x zOxYp$?gNFsi(9FO+-Tk)ELP@J*wg z9LiE@r^+u>d^pqv&#tJ~BZ_~A5Lf(;sWu-X=VJzv&y{@V`ztZXkF=2dJSE=-$oCM# zJw-PkqlJfwVI|UBO8yO9s(uo%v>+=@ zTiJKxU78x*``Kcex>mQuB`gbv?1ZK!ZJh7aSt9#-5@>4HwbWZQEmKVIqp5|;nii=8 zy+c3HR6Jf%hkNe!(^MC{{E6pe5KZ$RIQa%m3ul(t-m&LtY716-q^T1_m8CQ-z2`VU zI7Tipyyus>?eSm(wN{mv=u97z-W{d5ygN;CdG|fV<=rug%e#jamv?6VAjA*7_K6nsfhSFGLf z5LBMhSGhyAwBsJGt!NQRit=5Nq^MI6Qt(G3r3(ihiaYR7+<}MU4m=ch;Gwu9gjs4# zWalhqabgH5*{KeF-Kh?J-Kh?J-Kh?J-I<07He_eIG9h87Rhf{m)1gdA*x8~?NZ8q? zOhVY{QYInn?7(vM+H}}GjOFUJ>ARVw*{V&S%`7LeUcEMb2eX{QV)9xn%R^Ra(<7Lr z!|kiproYZCmjro9o4%Y`1QxK@roY21eVDAEO?O}kdM)k`Uq5a7erCB$be8Mtg`Qdd zh>N?by;N0pM3K!1R5|G?XH4aUs+?iSX`K zTwx7WzRvC+apUXkevk59c7LbxU3P!_V0DqazeQO(Wxq>VI%U6O5KFV{w<=$1_ZKQ( zYWL?U>#yv`LiR0kPV~l~JT9!uG&<{=xW|_X>&A@?^cFXdm0z3CF)pla$`FOut~2YU zO8K=d%-Us;U){{CS2rDfPn15kxgt%JJ~hz!y8K#@RetpjE8U=%U-e?8c(}c)XQg}k zJ+<g7F9eQ1>T=vmoJMRiYN&%}rF z?pUjwx{8(I`+2GtD~s?JWzQSs)L>Tj4mdunY@b0+UCzo*_=~bnkPyzw1_MNyvVx1e z^`5f*T-j&1zKbo~JSosS@QR?wdzNxvYd~YZSJfledOpGdg)|&&GXWgKYC@KQ+vX(E~rgcz8cN5 z9GvG~ULXe-x;Oko4z^Ag<(mw0a3d?=W+ zX7ZJPG5Ly@kgrJpb`#w-GP3<3-8E{Fmss!_L|)R4l_%vT_mx@fOQQmG6#;sAsnRgX zOH(cK(qTnr>7*j_2%6b8`H>9BgzDu-&>RiRk2rlr#j>HY_ciYq4m`J6^L}pMV2fs7 z0k~KZxk9&Y{0&j@T*7buEGl+h&2Vad-#JTEq+fljLGyb%(N(;I!=NLTqM~e|;~mZZ zheL*^H2WtM$^B!Bz5OEtpS-TwKd}C}4Wgo6<#+VGbxgD0HCXn6WC!dXSd8z_x{Ci%4Ovw~ zEBmrP*JR({Z2G6JLfLy*lkHOZYgIZ=r9)LZL#5@b3*VDRFpV)#dTtA|Nj+bgrRM_s zuKZhiE~an&5$UXJjM=sirY@D98Bk4Q zeIVP5M(LS!95KDO9+RGl?3p%KdUmAePg|sCo!4H!Papk}+0vD*Ic$3?rSLnQjx>OLVwJ+aW42pHDXfawCaR?cYnbhCmC^zqR_Se!=6}a3 zgRRp1Ay&CmFU>#2Dlr3KhBWK1tnw+ngatS&b%I`yX0KwE&sn9}n_1;{gEV_LtK4mr z=Iv*duj{3Gr&;CumD0R_vC2Y!QCV-0=E3OEYAJ9Tt86t&UT#+Tv0m~*`iNDU7Q!mO zsFbFyVU^e&d)n))^8bv|jA~}r>ZKV&%pPu)W_-u&j~OH%FJ^zLQt}CBcAZV~xnCvu zOqNPBsX>}Ki`lnTNi)N6xzZ}ljAQm)Hfd%dSXQuH<D{kNnoXbm5y^3&bif6 z*nVcusFcDsGy884o8`=IHcFw_nZ3GPT6BlmPnJrHJXO-7PMfr-4XjpXKLwTptU|Ez ztkRX{tzvcoRuEVmEK)9oUS#&GrBY~1l@#i*Nud_7jLd!=EGULT zF*MC8g+9UTx4}vQD*>#v1}StlvwsO&Q?PX(w#Lh)h5usqA4;W#h{3`!o3wBktRZIq z5v)$ITEKEyrG-0~{U2aCz$(Bw59cf-rGriY9Rqr0wKQMP?Ek5h=5Jx>Xp`ozVvgmg z0JE9nXL>2*Tjq$XmKNe_BkIUk%(25DEpRi(dsb;dBXi{Hr3J4uM{%_j3=z9g3VNA2 zTJ=&;HFJDym4YrZ$Dlz9y1^W`E2W?xnd6T(DG=C!Nu^R?nn4PDhdI6l3$6tgf@QZ# zfvwE(BUqDQO_fSPyj%(jW{!V?g}eoJLuwf3F`Vz&q@b`WDJTkJF%a9t9RI17Lfov1 zRZ1actZJ@73fa%9ke`s9tV+pGFsq6ymjb~2l}!pjbqGLp2tY&vf&SuD$V_jRlQ)7=4Mq%b8||ixrGL4ZUd`&39NRoI>8zMjl!6V3K>AEq&ZXI$)(a9 zWMa;QO`0=mmF5hwsy86!53wL5N0m!+pJG+-LJIksgEE`rG)Qyc_3yw!K<1#V=A>Jt zfbUq~ zTX41$bPMQq&|RQCphrND8>MO0tU6OKO}oLW^_7yeoK>5xlH|jxTdF1Lc~*VaDEVw* z)jfL2r;Js9I!{!8EPftmfCHQiQ8Yim=+Ghytq=afj8sZj%=CDrvEQskAt(Tw44DtNATb z??Y-5EP~`1B*%hILh^1_v#(r=(6gEkU<-93A`Q~XHYpq$;n0Xcixd&eY7QU?N)bs& zUJW|3QkpfJ)tHp_)ZfTzn)Q-@Bda-XmHgpFhe7iHk=69rBndVp)H4b7OoHc^!D<1k z9jq>d2EsRO(sUHhbd=uo=yGYgj@67q3JyqcU^;5xbX4W(2{%W`Gq9Rt#A2pz}Z% zf_5PJ7^{5?`OiT9vylH>(1oC_pq)6k;M@ToaHj)z`WQk8T-*1bE+CGSC2n+MhiSYu$_2kW1# z_K;1IX_X}NQb`UnNb)jPTL>0vn2gFICt4->Wmb#oDZ`cwTQY3PuvGyTG9)8IaxTt= zIHSSk2(v>k@qe*eJmF`&!D>73#9YN{uN$Npn^-LxTp2FNa6!HgJhn~XSIygEwo8m|0+ovR4QpBt&%p5)hz?d3YG&b)Hm$~R`(=WNnpXQ7Iw8+pmRVM z7$v{ctPZa(eqXV=gi6W(WmdNhcU%OkdmDG0j@9jlcsIm75Jv^ne#`0%U?ER_$de!P z!wmg4`XzqHg(`CZ(uw5Cf|3Q{61_9*mX^aMipVX3slzg$`>3vyUm zk^~DaO7pR#!z90#>VD#?{9Y`!O_twVy>8-ny1Hs3IhtuD&EK&)bUC#6u)1y|X&z#A zy#~_UQBC@|o;0`NOd*6$lxB;vAM@cTEVD=sM~l_V$l)05jzkWllcX7&+8iCWlA{me zUb`adJTr84f9%0r?w0#Oq7JKi)%~|;+)WN+{m?klM0-B_jx>$*+)6+(-s_gdUCvh)i1S@Z{VW-wE$6HsPq3p)H^~%eVbO)V_n7iKl+OLsZ#RcyUaPQ zh&1eE&X6GCjG7~yYc<080`^lP&R8RHZf4GRYRIWo%$Yq`=QJD1;Y2GrJd88CjSfH1 zlM|g*a-!WxPP7=v2`8?xhxQ3{+nl(M>v3F<;d%tu9$a@>Nq;;X2kHqp{swcl3ld5C zqu_HId`E{-e*|-$HIV*5sJ_gciuEnb`4J4@t3WqwtcE`3J+~rWt%!I-x|K9wJ&lG@ zJ!uTwP+BFNy}g%eN#DKr3&A?qOg%Y<0`5b|`f`orSmYex3YjZhOM3B*z&X{sXeM#d z;bcZ!{K_AmCN6(vr*v2J;F{^g71Oii7IDQx%UMMFMuT)NeDZ4ne#@wpv~+;ByXU+^ z8n8mzV&cLa{Ub{LBjIr%+?7!q2x>#9o*Z3kBu$~^|_vYF1vK*{0 zFE7Ji@hmy2Q!hthS%jz|=GrVWj>yXrjPkO?av58NW9j4Nq*`8<;*gg;gvfWy^($56 zo-z;ny&w$Er|*Sc%`Bk5iFRW>_TR2_AMd8WP3tlJg#K=AA;pLZX=l4Z+SzTCc6zMR z&XG!K=a^mEIj)y5Igr%j#;NyUbjV00sUNeG`fb>AxOnlMVc&{FKeYHhm) z5dAG0;d%o!mWbOG3;s&0^di2Hzi2f`FBa;h7vE(ryxF|yvP&-_W8Kn=IRHoVwsR8T zQxK*|eP)>nz5|k4Feh+D}NQ*&QG}7=udNB-XfjE$f7dyf0Mw$mQBaj^h ze*%2ie-ZX~!Tv7T-{l7<@yyv31xI0P7uv&JN#LiT$5PsrswcJ422zU~r8dDzYLhBS zZHk@LD*O!av%t>*A3dLQNbLe2H%TpSmRj62wG-gq zM_!u2|0{DD45SXneU8J7W!GXFM@MFPG06z)*Wbko6)xm)}I8X-% z>fk_~1$;o)wSbR1tIh-d2>4^j4yhYQstc*@NbN=m8RzO;je1g_fYMDu>87A`(@?q@ za4Hl09PsnNF9hETz5{%xTXU35i@lz(jZBMo$Ly60To|(;7uZ$Ws?r5mX=Ei<8i}fu z3v-NeVLA@XhbctYRAIcbDpKKnWmTlYaW|%l6ppzuRitoOnLJYHL0y##yM%T^N<>j7 zCS#zhlxPK2sPb|PrRTAM*DU#iB&+He5*)eyvn1jAT8FRIuF1=XQb=(2<-#R&`p6J_kxq9GneLJw68cix$q%dh zI3`R~SwnRN`3n+U z2gzUJSi{j8@|Wmx67n)@IAI}wp(W&pi9+%NYIewS*3f>4gh1%LjfA|x8a~P=Aqhn! zB*{!daPx#@RgsYFa`G2{C~`CTVG1~tP#*PY_j*ryu?F`bKkq#`gJC|KlI1)Efdsj( zz50UpWWw++o%duS5e?UF#QvJx$|a(4(K_-+U1Ri+*ClexF;w+)68PZCz8q;|A-bAH z<1aSwN?qg2y@~DA+h1A9V)}zY_HXJ8vN+}nK6)>X_R=*P zdMv*qi$hpr+3<@(T9tQe)i=^Ai??X3G)k*lS>vfnY1Kv6cqv3Q3UnpXxA(Kg+jB+Z z=T`dm7S{O9G|`Cl8(yMsb9Yw|ecOM!X#B@~ahRYhlD-{{K6d(cN`N>#1D&DtZOjJ^ zp>JbdlegP1ACTzVp24I)(6{gPNt6VoswH2|4$n~|u@X%0$@D9_pOT=KtB>v`K^^_w z(d35|RX9_XNm5jTS1P}MQI`28f%l%#za&R-|;ibI`3dj#>%2V-&Xypm{L#V1% zsK!2m9sa2r`(^C#8YSwtCY97%_Z4fmMseqAs&jDXYN|7F=W41maOY~OQ*q~Ns#9?1 zYO0k*pEcFds!XIR6Q;@psxtnnj66QWo6Z}(98pi_HqQ`;W8FTl&^`E6b%E}==cXBC zQG&9Z#-8k+;it)>6!(l`x+fBHn*~Xsds5uqo9HW1J%+#2S7m_@J&SUBzI>PNiAiW7 zy2CHHzy34b!!H;Ag1#Exd;3$mr%;4`PhVNxGjWaX=^Q-z0e!VVvGYn{+=7d`!*3hu zD_lW-B^y+VYA^Dc1z(86`v8pW){gb{H)_Z$`z82p%5=LK(@wEJgzLu!^8LRr4 zsrs3u`WXs8F~JUgCaHdgseTgGZkFn2wCZQQ>L*qGT&wz-r~0{4jfG2%#l#HV;q$8B z?W*6os^594-`(&QFU#;d6I7CFKUDSmit4vt@9hElT3+v(U(?qLls0vqztXiqsuW#b?`CO$ew->0tmT~DdZSxG)v|?TOI?=SsU)S_>fA^TSHLAb= zE$y)Zg+4Z^(iW8;tCFJqgdGTrX;}zsI@pVSD#{{QQ_(EZWa&AvoR+O(O%2!@gO;sfO<4b` z>A13GMcM1D>0@>2_oTl0XXvvJ)^Ueu>WN?ZS6$P+!Ih`z9FIoV^fxT6Et>v`Wwv!k zcI!x_I5N1-*H;|DW44DzY+^^gQFa15@;%lrrV)wjv1{LvhjHPTbw{Vg&G8mT{rq)D z7xa4d)A@pH(NZnwmps6WwX|9I+$r%*E1&zIk(aj#^ z^VZh$d6DkBU-DUHtYz{DpP!qs`b}NS{}SCXFY)X%eD)jc*j!u$)$>^~1Lqs~tjKac zD-4{UC?T1Zg1qUDZ6M-UqLcd%vSY9G#`JUl>>@rtwVwO0Rx;`DuVmO?hEj&-cr*9= zksZ_HqHwV|Ru&D_TF8C0Z*qqP6ylw1dz0=b{y3 zrDwp(N8+(~J~K_sb8sK5UfDXV6|JB7h}L`Fy4D}D&SB114jzf;JhX_*Mt1!73aXsz z$$9B$MYdvJhbr`mxGkzI&N3ZIKe%JrPPd;v;-XvfF15Uy|fby2F&Ggy>N$|Xm#=srJQ?gBSrlPPChqemN?Dn@6&Rc z>E1QM@&7>4K+Qb4121=+hV`s&;WVh{%T`YPl>(yjwe)v6nd+_E%*lg#J|FKcEBKt} z*~vXa<dEAmtW_Af6L{=iyU5U%br2yRL5C!^7_@wLW}UHJGDXb5`=uN$54J>62>u zsG6RqrZ=l-iLjmllt4(Ma6;Qui9%K46g%0i3MGrL|Cfi2iEj#d7~1TxciG8)=!CPA zS8zq4hT%QcG$2QyS0fYF5D6eAo+WTE@(WzrK-Vs@AG zM2mZkR$3*i`rj$PA9@Wo%_2)PuVri{OOgkhPLajg^Tmf@u-(c&T&|j*Qq2cirJs0) z>`l_DbTO?}dLkZGj4ZjsK8#-X*aPw5&r~7wREEc5^a2TwzxGA|S&Db%r^yn$w@#N< z1$m1Pb;^pG;pz(YA8vwQR6#=6uoi3j+yV&e}FAp$kQspk&bP{F81qbg63S&>A3g zH(1-VO6c0Q=t-Cww6FwKyiE<=uo}8tC17D`Y5@Jcb#1>^kzZ9qN7c}M#oFFc?X6Wq zXH`R&i_mo=bbhLMwptJJ)Orx6)&rMX4~P=dwu5R&i7I&nzoPA9C8S|$NZW>0{&&+w z+uzlwWh!+tG*k_A9BZd)potpjBsI{9YM{r}K!>R+{z{-j(-lMQ-fE!R0(9-Ol|Z*I zPy>x+B|}fM_9!*`YVB@+T#a7}N&s~pG0akHH{RBcQaXS^XtLRb0cob5>+bz2{Eit-=9tZwULj#2(Qm{%(QBg|3Ce+qM`^8dmdr2N+~`}JlV zC2vOcnQF+J(cYqcP2cs6sB#HvYLX7S9ksu!^ZS{u z{SPWXMU_ug>B7G2Ysi}xm3H-I93gLFMUQr4Lfd^^du9J%6nPVW1-i*w7*gHdGPv27 zycMSN{fDmoY=6%k@>XHL2qbUa@9%CVd-0FXN#0h66uzwvDSTTUQuwwyr0{KZNa5S* zgGW=z-n@bH2gqKl=R+OY>%{82WbY7b@9rH9BYQ^&YtE6kt)8+i&CN-d=H`8; z=H^7b=H}>JaVlWLyg9m4;REDt&2YSDZ=mKzR_|v!H8;{dul!0gFoKc#x>Ju0todFu z(A~Fqjb@j{ZTMdaC!%4$aM=-uPEEH~ofFndT z3UO+~kljafqm77DyRSbxrMYguvG%m)hD-73)ZUHSJaKB@jSYX%T%WwT`m*Nw{dMvM zaVr1vfeOv_k>Q=;n(G7oJ_DM87}bMl)q_Z)J5`G1H8mqSeba_CBUwE%e$)GV?rSc` zB(zf9sl%Hco5ZO$g)>;Fas=?isjG;G?$n*Z^-MEDd%OlT!&8Et)C@o9v1V(A?{65q zs5|xTy6H#6sei1eSH)@8=k3xAw+~z>)eN`vNWakxJ5{Z;{ycEd^#I*OS)A7N(h5z_ za&{X3!%r`k#OWt{R(EUM7ue}%;*&ze=|p$nQq3^w8~&MQ#MR&OmS)86{`#ky8wv6A zelJe%2ok4XO%7i!PQNo(oIbdI>;-Y!vVQcp;&cs6HE$Twiql;iABz{KuTK-F?{0kb zF>(3KIVXBo_#mbp^LE=orW>`7%xMJbV8pXny?UFdNcdj^-uUI)#s#rO5c$zqK zPO)-Es8&9W_ZbjpzK{Pzh%*n@-%p^dZYfnrnC1*=(giuXXj#o1?kb-s?46a}Co9PTn)hCx>r8W|L21 z%E9pmXes669#CB-`FI>V`$4&U{0VmUfXc`8gyZTC-e>cN9KV%M?qg?LG4hX@eDw zjI_ay21nZ9LgOQCXhuUMZD>a)hP0t$e1<`Lx@~j$A#t{&KV+tC!U_p#vT5vQOtWmt z>0Px;HYE*}{X;ed4VfoolfRcZ+l9SQ#M#Tr!u7>OSwzdEE}~`Xo-59d%Hr(Z0CDz# zGDOj&E}~`1y_V)Lo3fz!e^$9<(jb>iD0`=z!L!&IRk`5_K7|k~}3O zCE2*nQkEGzw-6oiGG=ngC8=skEGTroKqeYgB#tnALy(K7l$jvKBg#yWVtgS%I%a~D z_)piJdsMyRxS|2Nb5AN)#VCs6E|tOuqT)7{a)Gj|loeD#rCgk6l#8=Khk{-UdZnAJ zki%VKUZfoUz$QoJmCF(L401%bd)E_k#Q2qMRrH0}TTiso7vjgKd(#~cJ&ShGgy@?; zkE98So-;eC)~a%%ZvNa!6B5QkchQ9O!P@EcXZUt|kuFTvh;wTu zF7}}-5giX zf1sE;+I#-^FX|YuP{)+N?%V-AtuAoS>7x$ZKIe*uUwVT6I8Xe_)~0Pn5evLHwu#y^SAJM@BiV zcKV2OKe)fiqE%5nf7(y0Ff9Y2TiyG`pJ-LAAdk|j&FtJiO|-6A0jb)_&XZpE4O*36 zKx@(pX$__$*7&0cTFkU&tb*2B?X!LXwIzJ0@>Ol| zI9}eg7Ij%Ii+|7fd96jh9{(?GiN8CtMO%DtsW@K|F3!6ai}SZ4#QDi3qGPV^8?&zC zkrHk3MjvW;p$;t$V zxEN(`@VIC{(XrdT=uadrNLh9zPVN^ENlZ@P)jK36tM}8BBqrT+;xJh~F6Mkjp33R{ z{A==5R`1PF@>IIgdmb~U^q$8I#~&=wb-Z)=U#O{;B7A6l?k+qQ*uT=1M3*0yD0 z7NqFt@|;Pae(8GeAM-qC_IUp_S$VbWxU59QN6$LY`gLHd-;Qs*MaMtTGE$%Jxw=lK zY##XI2(~%#{&7;-RPe`fmE%$AF_nK$@orm=;?cH(0q>XSHZ1&SrrQP-$;lQ)X3{en zruUv4!`3O@llQT8iudG{vYAS!M%k|L#~4Z18GMUrzneWl;rAbkrE={FLc*#9Urlk#h^b~&a zd?(XV@cbv!lHGx`>Eal7_<9VLv6!SK)GP^SCdzhB`S*SIye_$)Mj=QIYQY-!}YpHd#cS{+yPTFXx+_&@} z@~3Up@~2@I`BN{}S>=#F^*70%Vx5NDQ#SebWQlzHeyMzWTop_Y5}kF*zm2#_NnB+jMm${XiUrme&kdWp`Dm1!$+S)S7`$#EIV%AS}v$MHp{Ar(*DR;u-J}Hy#oIm@dV2|z9J}Kkw zbrC)(BZBPnN$D8)_$i;A)U&zVXUDy1qI3ANfoo6YTt3vRJ(cBYj@Djn>HUmqZ%2-M zzotEvI#l|)_GpH(Tk@%Nc=fiRmzk_xDpE7tL!G%=Rif)pS04fmZBmry6CSV!V z3q-x5dJ_L|egFHq3o}4TqjbkweD|{%-<}B1zF@USX}0i!&a+0lZs9lfsI&$ z9G0M$!?v&sk6Gog-R!~>;AVlF1FoK3coN)FcHt@LwixB`OhrFzOwkVyEs(>xf*BrE zA%`z#7k*hThbQ1N$t;JbSmf}R*#(_L4$oF}!lPB4wI2U}(8XC;#8q4H|LF{sUaj)e zRC=XKr>JzKN++pws7fcObfEG9Z?IGOLq3T48FS>p7Uf16bSk&bphNjXK4>w@9=sWP zFbKor1Z_cy$OCw598k8er$5hwHvM@vwCT^YpiO_C4sH7LG-%VGC$Fa!;=%?`+7=q0 zeC2~R)R%9dRk{n$_tFNfh#5}atrdamNP@VKyq-L&yRe(+F1$I|xQ9M6seF9DP=xDd z<+&D6s668W^7_udN4HH}dDB7P303xlc|J$kT>SaWE9qVIg|wj(9epR>?e!d`)$GEV zzE7T^+eU-Lg$sB~&|T=cy3Iki1uA=ST^RP9`iRcJtDro@E{yjkB~dp^8GSZl`8P8I^jE!to4gqV{fnAM}O5h5cnGXRdc`Q5)DZAoccfQaYuj88`|TRYqW#r-n)Fz zK^H`OUU-r&h`OA6g)RtF7CX4`KzXcR)ZCsvqP;V9d&QI5I}ZfGga2RbVzB4bZ?qp~ zUoQF|`n2ovtQA!2TsO@sE-q9W@ket98{VS}FiYz`U63}|xPvZ8Rutz#aoO!TQ`$S3 zSDwD7{XF)noTL35A2R<#H|JgUDW$o|cyiNc!tms#x!GeOBif#JPn(%)a|gY?(DqpI z9)>G4R56t&tSEyE;-OFBqA zK+3!9(gvhpFy69uq^x3>HdRV1h(U_N@b^b8ka(G0dJYm4;HUhDjmIW05k%E*(RPg^Nqruw_JAhQ81B z(uxVZlSq%lVw4|rwy;YB(7|Mn<#|@=aW8i1GIR!jWd~*(@V;V~hJiN$i6B@-#%@9a zd3|&>B$T4~E4wrbiAhLALE<#K^f@H5ArTLWVVvPzHBc;kzbhjXmzOgD0Pwe zs9>S^sBMAnqtT_}W3ohi9JyG0oERZK&JGtJJHy1s1EJ#M2aCiffeXbaYZvG~Nna{H zu`CgvbSxI1j7NwrIb3wD3=>_+p`t5yk?3k(D7uCh=(-**72TmrM0fmR(VZC~y6xej zyDLm|-wPGOZ;=qI7YdQOKqm^83emPih|$HuO(KLlGF-S5!-P9KRJfgsgnMA2a6ed} z>j_*cde$xxJ?V=@5AxB|5iWYh!$hwfDtcEg61~X_MQ`o`U2pSJ(L20E^gdiH`a&Z_ zUwpXe%M25J_E6E+wMg{cTPXVd7U=p{FBSc%OGJOcV$t6gA^Jzd#Q+Ht1CgO(AaRix z$X+N0oC|aV1Bk27cBS9nXS>qx@3UR$_xITzin#i04?9Y-+nr~COtbDdj zfGYTZ99?@r6t~t-W*K*u)eN8@h*i+uD%uBVAE50m44~i>ioFGGZ^0*MZNc`t1w;!j zxIBEYh&+Vbf~bZ1D73Xg?X58TfaSd)3Ze+27HU<{S^*Wmll+m%FDEA_nM`IT$w^MY zb=Cg#ntDwlqnW`_iu(Nupn`q&xWb`UPHr8_8-{8X!5K`v@ihz!-`h8e`?XG^` z5yCOGrYq_-RA$%_^_tN`>NUeD>NSIf>NUMA4V(f0z9&N}Ku9f{ky18K0bVcQ^#EQs z;B|45%`YBNhg2O>hg77fL&^%(Aw|IB1U$CDqXHfT@gP40)m37=T2(L42NOQ6rLy5n*R{S`^-O;2;+5AW%CNdjiti)0C2*e z)L$4Msr&UiVZ4fV%s+&6;INPxeF!UgKP&)u9jb;QrBv%xuGla8jgs zs|J1K4GTulhogX8z$MZ7ijPtO!C8;yFQ5j{N6gT|N1#gt^%QihKV~|QkD1QnW2W=? znCUz|W;&0LBTe7#6UJlA-**e+p5F-Ly03-tR70Us7>}#1{X!UbNf*W=3^S$)<2D9y zrZ65>H^V^~_pz4DyO*@Al+EdN-?s_lj*zqP6Lg+_Vx6a-p!4)okv?V!b%e5OK>g8I zQG4H#x#f~M%Mh83+Nw3Et#65Yk!1eb`0^vvW~nc(Ms1aJ&ZIq`&Y!f0Yf-zm7PaNk zyK18?0$E1wWb=3Q95qXygKYlE`1T`YN#bSmZ_v7sC3uo#t^>I~G}PwB(m^+@6d_b8D`2g~i_a8(STNZ{LAr?djI+iqA$CgM9|9+i}XY*@Db^IFm2Cp&8;!b`| zpe!}f(v7zNNd>)-|RI43k?J61y{pT`LI-pEC&9uYl;|(02BnNZh()V z+k-!}m=!(v9UIx^E%bJ$q01G$J!<;tDtbHBaN{z1d$9JWZuGXmC~*45+r7rFizr$p z;n|Y;PgBcM6z%2P5+IxZuC4P#JKznoplDyN9>L)mC_0eZw22OvaDrTK#Q$+DxU1M1Qnog z-0Zdhg+t2^hj%6%-kETCXTsr~31NOe^QL07Z@b-k~0aqD)vf<@`su-%KZ&WyY zqr!*jvyO0r6cSetl^JefES-ShhY!iX7D|0CM7z>Zgd1&C>#HZo_0a!|@L)7AnVXK* zLz9aLW?@*K9}&S&B%q`zQvFHr;nl1^%c$(tjDS}&qBhottcUlPJ!-% z`$*8SF#GT%6w_BO0mjrdC}CKGb~!>ktI&jUsF}ME8l~L3u)S3Oc!Jyj^8mYGGz=8m z=~9ClLagR?%>~5jYnLaBje&41L&daCca=f7oEoUxMUNszySnH{ZC7tY<|GuW^6S4K zHP|=gJwvhijR|@bS6*NE9*PaT=;eoYCmL}dicQp@cvk=MQ2+5z-c}S3^AGXtNyNjG zhzE&4>rKZ|ndx}z4_t#Qz?!`{s-A232TIsjcH4QhUCV>{vfNdxOvBr@@85~RFd+ht|dHOd9w)8 zoMhbR7`L9KZ((U}u{6UhO)yIn#?nLvhy`-vlR&4fQqwHP204^AM$J3 zWi2;pA-&XeNFg`*uspZr(#{ta+c` z4xr@Ge439zo>FspxS*7pf2IG4rDpoiSXwE_%_f!H+)3o-QEB>na*LplTO23JEw4Ms zEkXK@f1rgAxt3UGxg}+a+;Uh#!E(#bx#-QsT#E^8w4eo(xt3NfdQ-!-T$I4FOA9FH zTLvb`t@bblir!quwY~)gUC;twu2mA{)+O{~`sPBe^?e(;HOyLS-O=uAK}&SBNK4{q zk(NZ;%B`sisWn@V0>x+ycErSJX!mgSNEf5Q(pqaFj5EdPGDv+w77N7aVEvTkVss@e z=o6#EmNffItv~1$FNx7HhUzp8MvO^<+ZAJU4zeXv{{s?Z%*L$$ z6Jt8BK#DJu8dDF8U#242QmCJd(1P`x>Eya7*Y-3z6Dpw1n?OOUzDk+dQ}#^j4JN`Zn~IW9EyxuJPn!`NYOmUZ^ZK+MdEp{V)4AsQSp3$eCwH5tnHj3S;m`WzB(lgoGwOC{Ang=cw zTsXJ@jkQZBxFC)7tiWXJS-~mRvqCl2v#E;&mk2HsToJgcWb4^ADb};=0CxfI1%EWz z`bC^#{i2|;e$frEJNO{?#Y&C!KWsGCb3Br*=Xin-vgUxSIY6HS>F3VXSi41QtmlD0 z5AvFq3J&DW2Y-HXvi1D36zlorfFYmxeSilv)-RWW>(p4kg21_ei#4AqvVJ8EP$szY zi|%1w8V=zfi;5ysq*v(RJ`#N^ZZ2sI2 zyA9Ua24c6edOJJp1}gUU&nUMWwCb1f+)5^KCzUcUj?(8vvTGO^@xg9kl*i4H@$9Gc z`GJ%^Z;;aGwK9qaqf{|UktM_z&v$FspN!{KG^Taod3lx)xChsU1I>6|S)=wio|oA! z?ZorES{KA%w>W>81aALk2}}aFpKSMm(yLk*EX8g~jDC*M_3Tc9jkaERZXLt6441O| z@h~dy;JK|7&mCsCyhQ9OKPq;f*U_I9Am1(yg8-lNWXuDu&zFu*HZ`*nbI z;KCqygKO^xmAGL7e9lzuUK3}?II`f*Q#3UUh&s~Ww=bfsOx;OK|Jl$t!Ts_!lH$XWlMis zC||5KUi*)jZ^Jm+m*8dc#Yf8&??@fil+xjVK2@@XQ~%j{jVqCt@(y(~&@= zjy-;8i`=1OdHk1^%a)aEKrieRPtWE$&KhMIhuPGc?&DQet!vlguyDitVeuQC@q7~w z9f6fb_?4L2wcBu5G%L&?yWJXgyF*8?yN2sH-*Dil_%*dKYUNK*r#q55EVZ>R=!zTIjzXP9U;+$v!H~`!btN*6Xm$;+ zy>K6O33}%N)aB+MK1=H8hm`jx$sOal>L9RZnV}Ab)j*eEbIhgJ52}MQxJ&<>B3}w3 z@}&=*wf0N`(upj@@H3#{!wVix< z+7$WnTlVthr9{3Q;V57JT$C^G66DKgo#o4o8R}IvI`t};3t3faBVWEv*8~5+~o6ugYDihJJ;53D)0sQ~UR+q^>OB{Tg_GhP0NY z)xbSCN$z?`NHn-44)eSNm8Dy;IdbW|h=EtF~?(x;|?Dz8YO02rL82 zt5nn2L)Si`o<2Gz=rPly)O%pG;e~qg>9gyJqa%bK=+l>@9(dlZs0W5_J%Xv}E3_(_ z>N>i;DRrf-2CWnrg7L@-P-U)I$91nbfL5&Mx`U{8bQRN%u3*~Hl}tOj0<2H2g3;J& z$1BsV&}ueqvbqc=xX@~7k{eNLoCdXm-fuo?g*;nrj^ich8|>}Ob+0MH-t)QcoyFLD zIoDlq6nkfI-9MFJA2ru)I*$ER$FOA0bz6XKA=ljjY$06tBVaqkT_wOanY-!&Y_`B= z$6cKd@f*0SuR(kXcQpXwXLDEALcA-)&*H9bf(Si#H3}kpxvM)N!UrNG?&@BkXt=BC zKuH419`5SlJ?18Vx$HE?%F%^Y9!(fs_ z*i#C&GKD>ZU^-LSQ$*F!d-`<3o;7CURT&l63Y$w z&xtnQaPwQDP1C=6ne2&WJl9!lFOshg!)y*o%B(wYP4*Pmowg@?!X<8jeD%p;l963FP@rYujA z;$_~xi=?>MXFC#2OkLF?q6uZCbW%vyLTZgX(bNQ7m?mBOxMkCIk{s2TQbm%(4PU-N z_Ow#IJ)`VC29R`ZoB8ww0hR(;|X=wQDr zUa@)K;XC=-;Q;Fp`P#S3CihF%eynwH!0+k&(OT)+h1$eu{N6C_T!p}bc&LkpCAdz$ z*1=@a5Pg|CQ+qjk3UmdTA0!@nkp@O)>5TqSS3?D zy4uSpH&amtI>|+&wb7`@EQ~Y0C+pIwGw*Xi>$-w2Csi-g5a#lm9Oqrzgh5@E5&F=4Ui3v%zQ8B*^O zQ;UzVz)OESNLUc3zx|=GAP8bUnIZRP&XjvAUF2R+X7qtFrO$n~+!y+y)R*{z+?P5- z>N9{pHbcHKZKixC4Rk4;|#^~!QMFH1eo`Mw4 zT>r%!q$uI~M|4QBle@Vf11XksH~+O4DQ0sww`!4MK6f)C8z~NPH>(aH#Rl$XTPjj4 z`ZfzGG~6wHCIUNSw_5fi#X9cRwKN3g#%>KJBgJIyHr|I6A>8fRhmazj zyX__;du69bUF8jBr<)+(enUoTq{^q$8`gPC{I$|;Kl%#Z29q4c2rM3<18BfKz~(ji z_Qsa2FY#+(CD?U-ad|EpC^KICFB(YFqRHj8`eZZ!Yl{Yi`k7&9N~ihcSu{leIr8mK z^lxuKHog8dhCi#0L->xy)yo)BKGL9x4zbMAA{Ve}ZMZ*C-|4;14|405EOz%vx z%|*60l;?Ms!~EiirCVI(-y`(+JepX}sBx=4e^dVbe>wc(5bk%F2>d@}9W3J;Xc8Qp zSBxh8!u{UF_$*p9VIeo*sNubII^L@|m-nj3EUk3(im2!T;0N(DN?d5LNfT zbTl!*bo4y>YnY}Tc(JzbDgSmDFAprO`;FuOSwnYd41_?OH1IL(%8>>%jPIcSa4-@p z89rISFUAczdw#JkobARh_E}ojAPt;naZQE1mrt#}i1!LM%xgpw9&!VBq1xqvzy0_r z(x5Yv2VbGpJ?K?`Vl%&}mu57$nkF6GwhxgYh7JSrWHA?5c)-w8g2W2wBJ*Az#`c%- zhEhuZgIhZLb@>l>gXha=VlnL`O`_E){UNc~HHAn_=l=MD1>$97&A$UHDwpwI8@NA? zQcd{cn3gTm_y}R5F5mGp z+58tTtEoHlD_)N1by**y6!lvFju(YqdUY%IssXFr;%{+HBtrZxRevj8{L#()lTQ3$ zfEMHJ8aR$vy0hwl<)elT~!{ghGCoz1O6 zE?%5;**X(1j%MYr>7KeotnRh^YZ6}M#B7ESw7P7@@0h6;;hmb+w=?lOl}i^F%6C37 zPCtVeyL8Q%f!}g!igm|pD_I$aSs7xPv@RxXgg(qWyIEdoEH9W0xs$G6R)t=J&9$e{ zYl()N8Okd$rj5sxS6mp5qPVM*;RuFPjU6YHSHc)BZA|_@<&_|Ib*I;^YL-V1qX_Kk z$mp$%-)!vAC_4vaq(rZ3xH~1wzHyZ9z*7D&^s1-HF9^M=(y!lzUP)};eg(bKYHI99 zujCu!enYS5j76#Fl~m*9arAPTv2iVWxyaOb8NCu{YQ2SCLGrR@?o*HEm?G}8Ok4TR z*~YJ~a-Vj>&DB5)6(V8iu}p}p+@D^?u6llXr|Fjjl$yc) z`Hms(Axd4${pn+>Cnz0p(x3j|J~LMDLaEN&pITGRUX+?>Ja8SQD!4y0Oy~bZsc}o= z?nr;?fbUy_DITS6;QnlokhT1$WmyT5{_HiKD@5rL=FDD{UT4ah2y6H?C_~NtIp8e+ zIjTh&3%R?~btq#wcXytXe0M=E%J`hS>z9EtPI7kx>Q8WJzs)51?h1SPZUorFK>OEm zccUHTyI+7k477hIcUJ@UFwp+<++D4md^ZEkV4(f|+}#treD^yrgMkiA=I&NW6Mdz- z|ATVlv<&WUBa|DbhtYE1ZKD-(_Xbo5PM^o!v*VIY|x7|wl-(>>v?ak`Jm6o=D;v^YJfW5ckeTx{m9+> zT!+)UxqF&ioT1?EX)|z!4N&qS2;H3YR^Zih_c8!J!RX(-VDxW&(luup2_W?f;;@lw41L8cv4+B2};-Y{a4Dr!GO9DS1 z!f_Cn2635yivgDby&QZq&@6zvfL;as)zHM0&m2t$dX&#>jd2f@&+@@$iSilj5P7bA zW;RtPD4%(=*kE{&%4abDaIb=KMAg=$D4)f^LsULXWKxQ>%JI26<#-VIaQEXyOokid zaMw*r*J&rEr)5k^PtxGzXmC!sIN42yli_%$0>ssk?@auH+ zIQVtmaNvlrE=?ibYo`lm*Lec}b>P24`PVZ3wTypl8RcKg_}3N#|0Bx3){`CtzZTO& z;n%{l+9HJLT5*IxS$iMD9tLq!Dr{O1H-#A|bc>sU?dAKE1o^%b#QuZDI)YAI++;(q z8%JOqEN-04-Sq; zsc3P$W;b2@HJFaJhG0Y7YxrHK=FjHgccC^T#JX+VP@b>t+tSe2!PaJZ=sT(^8v50= z_nBCh$MB^_&{^fr(x?FYZIVLO&;nDGbL$-|B;jhUsX1nm}nExaLkJAQ4DS_^v(7I<&N zYZEiYsS-E*VwyPBM#d$AsS7Oa3#Qgg!IV@Wm?HNHrf5nxGP)7yMxYykZjA1=QsNJb z8k1fT8!VUS)rt-I^~Zk|FFQ7*%n-|M>kAf%b(!YVo5UYHX#aM2uI2rwqS>2H(ht8? z_lccYo?ln_gJ?z#SxsU^m8sE9Y)*s*uN7fe?uJy3ltMdIj?mqZ%8^RC zsb>^xlp}QcpK`Q|?$8 zQLl96s85D+G;p7CGWFf*ra(Dr zE>w7~F_1B)JFFaoe8(W)G01le@*RVG#~|M^$al=LS2^Z#T%9ml zqD~k-s!kXzRwwj*qfY4hTAk1eqdj$;n?@a*2S0XoJS+{18v+NPrdD-aVw!qqBsiA_ z=MEffp^bXj_mcZWytwS*v>?1VfZ<|>y{}ALh8Kr*z3?Yq9K`x{ZmeJD$@**+tj`vj ztNuJML;d-%KE6}^xrb^Qr>%=apIN&&$EN=c;!A#}3A^12}d7#}44w0USGk zV~2aowtuL<01F0ig*_&3i7;z=W47o6x0~U{PEi^*UOXU5gT{*wL_b(_JXiEr znc8ec|1e{Zujn6PxUpLFA2D8fTl6s-I~AgDzNs}(^o=vzP>R0TaPyETRT;1TC`u|* zbCT$5ZrFcZ^uxxB5u#t5pqECgP?*q{%$#CN}RfZc2MZYLxha~!AQ!CtWpviJhT;gcF_LAsbWxV*I=v!>O>>>K5 z8Tvy+$;WsJ>afeyI$87!HuU$4elZ4mr%A?+LeU>&c8E&^RsijKvcQy7ni2j+zi#?(bJV9tbUWQ&Mt(&n2 zo-FL3Gl^1?;f6x=b2GJ{75%(St+k>bSiY1+zdloY6HFEwZiu2!l<}%8dV3lAXNtZ) z=CT(>NmpwGo-k8$zUXH*URogfLB7{T|0rYU8=`-o$?~qa#72K>v*_bv?EFOZPBQdA z5q)7h=YS~Hn3|!Mx{Q~KM8Ev{B0JG<*w{WI`umt#&x`(5#*Pc3|B(Jxo9N?Vy!?mg zgN;`mMPFOvwLwv;Fm{|2rNG9tGSN?Ey!f-|m)39~N%R{uw%-%|os5_N68(dW*Rn?Pto7ac<}?#zs%4VBKmil znuA4ei-ESZU_)=WD8cT>8d0)s011A!ruO$mKQOeBCi+#GT2n;7F2nW5qQBYD|E}oY zYP=jNdWV_XeMR3S1HDtTsr9<(J6KzrDf*5YFB(O^GUKJ?qCZ%h=n(y5j6E)*f1L4B zl<1$R|G-OJl4K(duWN{@5LS;iB@_s&YZ~@H5LQ<re~5mxp##e6TU?rVU( z=&L&$_r(dTEscB2gw=JmF+T~bE1>5jtQMK+j_Nli3oD1f#+Tv6mHCt82Uhm-1ELXDD!2y@u=QD33DXeW!b;F4c?m1SY~%;iLEj{-1iezZ zu+mEgs|71zUTc}K(v1!YSGwwil};JLN?Wb4QqT!2`n1A|cJ9H88exUZJ(vex1^iQl z6{$dnrLQZFa1ZG63}J;Q1VXq6ZvuTi_uy?tiw#%IksiR-ix1*5_zxm$&Henx0=@ow zgkc1t_z&DP{0AfG^CIG4zP~BUwr|@G~9l9X=NUgG<>ME!sR83dCqiVY99l^#W z=o44dRqv=eGq56Bey~>m(+Q-nqHAaMP*^Xd2Yuyyqz7Zg`n(D9g9Fsy#)AS4GA!mE z9G@;f_@1r_G!)y(51=X^=%J1*R2OClqsa_`8OY$vJ-BAtcN7`iYS%j=gHuD!0(c|} z`9VL(yRV<~0g}5W$`4@t{3()QRPB!B&$)*RnqID=+B~^}P6^3n4)Q}KHG3rMsMb%8 zgU5#CD5~j`Bj8yeIV=~+i@AqjAz5C?J+#k2vYQskt}UDAA=$Bc>P4ilo-RLhqB8Vp z`lVe+p9C^zLzN-@eD2{ZG;jTE?x6>}{V=+nTOSO~1nC3yFg>XEqIu~(ScQOL;KN7{ zGmUyR_wWtK>LK^g3vwG^RT{;rG#q;8!Vk=1#1A$0^22uo`5{d9KYSnh^1=_yV#E)L z^+orDA22<2vx4r*6e^hQiwZFGUD1~zRKOa|io^p##d+>wM7B`z9rth(#K11SicH!z zE8-wN09+L4Lxl=xsjWgqD1?KlJ&A`>0uHJRY0hep<+OJ45a}|WC+Sp9-S+c!+y*1Fu-8( zqI@o3M>>#xn6X?rPyXxGW#9Z;{%eu7^w*MrUyjIsy|+~DDF3x_<(D(%zZzPF$H>8B z>Ay?mzb-fAxgv*ASi8f&9pH~%k^kyf%6~no|E3Z-3@@2DNBZlzA6h0qdZ96n2=7!$ zXpZ#gwT9WRBL@Zd$g5%IB;*j*aL@*6dz%tP;hi%7cZu}q-3HBV;hhRw>Cy70gsZ|k zHRe}RkbPf+h9C!cKVCxi=DP5|kVARHd+#Iru9k( z)FKBTQ_Ti6HQX5f19AvdOOK-bFW1VCKBsz_M_>5~jq;<6iwo_ML+Yj1+|ktF%kzWL zRIs=5GCAi$b&ThHU=o|0b2oIaAm`@7^eH)44RfUAoFhynlXEtr^yt4b`h}c>4k*M` zWsq~JEJZokfFC0)+075JQoxim++=SdDK6k1 z=?h4)?g%N);2xRtNpVshDUQn}#h-JJn)Z^SJKUpo9VzzZ9$n8O#ml)z{VAlVpL_H$ zhZJw)9*rI*#T&T4`BYN$g!@~aO^VlXf7@n~;>FzGQxB5jh1}oH2T1XJ?(doVN%3s% z?|)>F;>q0K^R=YtA@{dO8YxzAf4`GVin_VKeKn-0o%=iR5GfAcM~a=&NwGj@c>i7< z0H!AXJ_jFA@>>~vK*?`K@Bt;i<--S*{FVuCJNYdQ-gfd^qA56t{1!~_`)sHvJ)TMH z`s^sJ;j@DoMW0Qi6@4~_2??hadNz_)?pgMT&j!%SJ?jl8F_T});3Q`9OMbgABEQ7y$T1D~ z*dvP^%itcrkwT8m<{ta!l4Cz|kC*3=V<)-C!H3DQ67KOwspQyV?(v3fa;$)R9Cd^o zYv3M#mPwB7y-c;|j{Yy;Ne#;2w|fCnblt5vvSRqTxoYwWMSNH(~>YQ*$HJ_mYxz+{mmnQt~-B zGB=r&EaygE)sPZjZsc_+9L^^t!+E4+@DM4f*+)v!(n(2NDk+I9B&S9T$f@BY!J4mk;J z{^W2rISFMu*_TC5cIn7TOC~vKK1fd193UsF_LGwp`^d?%405tKot(^1BPVsKlQGHUWR!-Sq+8+0g+bb;FHlqa2MhBe##ll&S?-<6TCjom}!d>nmaMkV<7kqdNs zja-1~wb6b-9=-1%k3NBaAGzQJb6Vtr4NWzsr2J!_6_PXg1>_8T4$h?IlQZxwI0N5; zGch^jOw?g=CL)`h2|Gm21ZR;mfjV-=CzG7A@6ox-XfW?$VIct%c;YxqzImIYLfX<&)DDdE|6iE;(JCLr&)( zCZ~1TB#ZmOmaMMKRNCJGTqY1aa(vb zN8tl73~ysE`>Ktyui6m%stvNQS}*&mb+NA+)K^(m0V%6ELdweWNm+3oDa+3#Wx5KHQ>BnHoJ>kbHKcT?fRy$fA*EgUq|}l}O3k^Xv?hm?Rvjj#6^BS^aTY1f*O5|P zCMiujNJ^6qkkYvQq%>w9DTTMGG&q%%25L#E&t6jM1v1@0rVH4T!S6=F{1$%KGglZK z%@79rG{PWQyBaJ7%=$0`RT^QSoc3V`%z&yj!Y$Tox)lxxdVHOKK>{Q_N>4u+cazgD4(v0 ziz>=STVZ9|Hb>ZuhqjtwV;_-$ib z{I*3l_$?Pc$fMK2{~DxI!~d$zL6SF^XDBX2QYu|80k<6=GtHX;HM8gcy#XIB|&KD27KEj$=Fm!(NPD$FL8>tqcb;+{th-!@UfLF+4Cy ze)597{KU;peiGy$KiSI5PYyZCPtK|2CwftSa>+`5@|eg^u^>OSnIJ!X(@B21OoFpD zp8iWIKV5GlKmF8Je!92mOP=hjDJ1)1rpQn8mgn!2pB71|Uw(S3{=>N>-4RJo+5Ke~ z(;;X!w2kacaIBY|o%#(*_9{ZjK1We^Vk19unJ7PdStUOMTX9-`wYP-+Cq46qF%lm#45J=CWJH7Z^l8zafy_x6i@9fe zoaASFH8@3;fm1*il;WnvDPB690t1_8InMI4uP4dRzO$E~{cJ5i`;D4aJ7}3AJ!{iW zO2qP%$Vq9o&eF5n{+-vQXZJK{BP^`h2>bIk!v4ICus?4j+n@JrOoKMT{=AK_KW`(f z0oYgu%>ivJ2UNvSH65vKWc%|r4%63V6D+vgByr;cu)z2em^%UWF$5}~aRSk!+&yc0$7dt49BYW;#e;*_l0*l$`eoE*g%Cm{<1uI1>ObUfK?dI;YEva zd}yuV297Ju!LgIM@i$EtGmeDebt{;>x^Mi%?eUcmX^5vetpDh$E;GT!kG893-qo|r8 z_!ktxHv(vbfO;9~0@MmUHQ^|oz!Q$r2|VFw8T8bIqjUmKI7%n*grk`UgyMm0p}6xK z)ExLVYW9XSsF_XrHM2>-W|{^yvq`^Zng%szCZlGry{Oq^KWcV6h?-q9QL{7m{G&Y7 zEZED>Bc{pEw@s3t{~w)RYjUGgVNLKwX+=#=wA;}*O6?vu4%6wf#z8tM*J!qrpMPZ| zKTmUzpC6*rc8yVVny!&bD*shzw}MSbx_-luM_c*aXs zEf&wXHzg#Ad2zLRH!&|-qtdp5%L5k#&K;bKMzyyWTx+r_+pJM#SMOD22d1d9eIV=w zVc^NO(Wnj$fdjrnmEhnYjYEmxqEl3dT$5FYoB-RTs zRS=f0DhN(f6$GZL3VgJx0(NJ&yG4&84cAYK?|N&{VOa5c*hPas3DTgvJT1zD zcAM7;elO4p6lg~V{-juoKlwp{{-eR4c53mbpbdL>odU&Yi0=k!P-dl0eAfd=^A%{f zMtoPHKzks@HdlNXIzX8qGP6j_hlOkSuuv^3bhp+c=&^0rD^O7`-fqso+bvqW{f+`1 z)8Xwy5EBZ)a1F`_?)-2lM52}t1zSm>G2m}gpi(%tj}HyY;6sCR`Op9j%7xPA($Yo( zg9{{dgoN~tVgW;3ey;`{0ooCWJK_mNfRTz0!a5hdy9E_uz42#M=&JAi5*7OBRRk46 z<~m5HqcTCxc1I`>q|FM|qAZBZ$^*X$e7J>E87K>0>8w8#s2pksI6no>Y~aiehj1Q* zfioMVWm9QT+QX3gFwhPI?QpLKDr`2CpX zPkzE5*JYy*U@7zmLN@vrbQwQzJW6zs{a7){e4@a_MUUrwLoy#Ku=F*_yraNtj}x8k zF`^TWlFZo({2z!^D{wkQDirwZ6C@Mt$Y+8b`Ao1QpXmltz=nLL4Xg(7#U?+lK#x0( z8?K`1bIm_Z7Styd_>aa^i!d>C2`B3DP}{Um(9F8_Y1`1u!7I}?qM0L0Iilyb2ikt2 z=WbJqbFbd^9=a2(pS~a62?Uds=#CGV#zS|!42R0l9e0`$_W(TuA4j4fzpqP<>W z)f9UH&8X4e`U%aWEEj;b3*wV{w!E3^o z*1n-v{3poU*;lcr*9s``qbzXD7LK?a5b`!C6sg%l-dcrXU&|B^;mBy+6i?v@bmLlu zd|Qo>FEE5PLf!~7lB=rwSr*L!+2~e@LIJ;@TZsVSsKT2tqN_l3=HLr-D}Y{8%#@Nc zphbIeHrnfwiS|}yBdraM(+1KwZKM`ylYn+qp~#^X5cs-5?HJ{mvBfM(s5RM3d=>Qzwtxz1(;P4?Z<%J`_go-_mD1#*5X>o*^ zCRyJ}ldP|zN!HiVBSDeQ7od1w!Qk6uMrafaV`M3iph{kOqaR7=mqY zYoYC~b40$1YFJMuMtK5iWXK&*Cqv$VZZZ_Wu0gQIOpL67^=4v}z$l@NVrCQ@16Lv> z_!w-8gal96F)t*z!LAh{fu1)gBsei_qkn0dkf1WQbP7AG6?(-GT@$!-geuQs?c z9rRY5;A>?=8@JW;cJKW{oM&_NOG3z~9}4!h+6nPm!?G$N#489LI|8wWp#mCjtz-` zL7fnir0}(Bp!?yrbkV(WTLzG?)#by&7G18eB}|^tA#5(q6gD>~tojRuEjtxf5AuX9 zF)704*$S(_bA&Am6;|WGJzK$Z2Zhag1wY|{u=z&?Z@pjGoWD=ltV3Y)|B3Y)!>h0V5E!WNt@Y_Wm9xUeM(HeU#vhZv4vct9s?hF(;!uz3W8(eanC z*+)h}!e%Fpuvy3uHm7C^8ygh7U5>D6p@MfkENp^u{3A=)1PAyQvpqZPsz zD|nwQAtF(!=ly{UHdG_-DERH!LWBi4iZg`>rvpOxaDfnBu~!IBOB2GQl7(=1J8lT; zRW^D)=AaN^3!_#cLRBb)7lWv{6d@uL(pG>E%J}ff3O)%WyRsxu!4X{$Sq4H!L1+~S z)q&6$5E={v8X+Q_@!3M+d=Os}-cA*$fpx3EYs+}S z)JK7^CJYRJ2x}t2P>8T5jmgaPN5AO#%ElLJg&9dpUw_Saf~$=72-EVX`%Zu-<{YYT zUm;8@(@$O{Osmui&N_|YT&k~GE=+@7yPMz~4Fe{@S!L}z0iN8L;B2cm-WQxBz52S^uSH}VHlNbf&a)6%Jkh@55iUo=A?fqDAQqecd z$x$P`6i5}@FM&#XP8vmX@|I`3sh<$hw#tX>%9N($=_hPoqFwAeA;JIZV*P~F=1;#S z39YvJ2}cZ3-;rH*iV3G6@Pq!97f8Yt#e^CN)Ye|uMiMfcql!pEnuK!o6WYz+X-L9a z#RNFrWDSY8)crb-#8=kFc9R5Lm;E)_IaIs7jO?tbTeY0*EQf`)BsK;X*^*crFy%nv zDos)(i3KAfzxz&j%wqFdtS7^93_CI$!7wt#UL~>SR_c9pPQU z5}Iq9LK&FFyADIJFNb&S)bg(970Q)BDh84cNU7QU3n2<+SQh_+&k_Cwj~xDmxj>q& zP<{j?TOi>ae)>ojKOKZj2O-lt0b2l9gI@)H8PJN3@OFb)yxm9+KN%n4C#$mf$&P?s z0nY^=iZ~gHIyne@$ZB$A4nGy{WNJ(nKQ&3qPt8&&BY*^jnpy^=N*zD7L7}7^!&z`H zKsN7ar%-N(pao((!SBuC9i5Kwj;;{x0nw5|8I!{+Dvt1qsw`e%*7Axg3g!O+X%I*t z#0ur`R!}Wgwt!s#y8-q9?32w~!E@UMq%a^w04WAYdlbr~9Ns$Z2yd;+;;oCdy!8(X zWeSkso~>(u)C#x@a3A0y@JF+FwUEQBq0ZD!fZeir^&*85Bus?rnCJ+;7x)3-m%;-` zf(HOac8$#DT{kF{`+*b;Bp)Do!IQIAC?Q{04#Fw+J2%E2W<8lbrV&BX<<6JQl!bcCM)<(^TU#m|TU zA6nFOXnV)uu|k=r$H8Nb2GTZ#5+2EP$YDCTNp(Rhv*`F2ppvVAWCl_dkjep<0nP`U z2{;BYG>aEPv-oLF+59vRT9d<1!$fQ=oA6?tnq`lvLn_#^f*rt_!d^hj*yf@(w>Ll#~oC4rxG21b-A>0|8zG zTku`8`Kb#O%DeFXcmc@=NWnl_uTTy{(G?2iBdES%sIxw(J2=H>TDw9C>8I5IsRBr4 zI)2((h4LAYih-m9QYzpi@S`9c4Za8XUOUMqb+P8uab!7*2WmgWk z3@|)EyG+1ofa3sz3_D0;2QusewY=SWg}}p?AQs*^V6oD{I|nbA)ej267D!MqD|ld5 zV4l(HjzVzC;d!ScJnx#t^YE(kl0twQwi*Qz1`^aV9|(RJL~T_F|A05p2i`y+sl&9q zdWS-A15y%@(twnw<6%T5ya6P59O?=nR{o}Kk zkf6%`pQLkvi|RQ4{v5}ZbJ)Wk7P+lgR<0{*SVUn(6j+g)7uH)uVO&H#>~QIf?^1a^ z%qYQ(IP=CmM|DOQJ{u0{jBQ9mn-SWK&}JOPCq}pqmFt>eJ>=<-r$gSPw4*xWKdvLg zhjirjah-&kq$qsu!Rzc%-2_-<0xU8C-=GuF`z;Wm_Y=^2ir!Q7z7$`oCHPV;g{b_9 zj@D4Q3?d6eR){k3nUBwnkfDJT4Ww;QK7xwwLX!!~(cKBSGA1;mK{yQo*5kSW+aX;* z9zHMN1rC1vb#f9uVHO$YQqVA^^N>zK!<3JxjIVZO6rwSF`x2Kfa59zg6&{Gw5{T0h zh|>~?)55>6%@A22!l?^gj8le&2HicXGnkI+3}}P_ zjWE>Vg+>@)3d8szorbR;4UN{&Xbp|lV7XxTab0kGl`goO%D7B}{Wu&xqzi^KQX_@! zaE=@}H^n$NrAKteXQ+G6V>r^JY1DX7XZ0R#5 z|Ft`O6L|$Uh6l)L<3V!z-U-r+1($92DoH!`UbhWZ%Ga+^dB`PSXH@=Mt$ZEh?|O!&JTvAq?~n%qo%Q;ch90G@oUk+euq__rzJGZ7r4WGR*F_m9o&MNt?NQil4N> z$A^KmZLKALz=X`JgD&#wglWQ-^8NNH!;t4wgGjxP)8+I-bWqfr~{3|3P^Ic1md++ z8G<+;;!KEb5L+RRrpn(Sh6B{IMkhH_y`P-bQ{^VA{vHiO;WtqDjkPq;fL5XK8<{2s z&p;Dy5_d-}ai6awueaBd*WFd*^}{sK;v%m*>&feTATEcv4B~Axa4y7H4f}dgAPcnC zlMbAr4pSBBh^B#gwWPyZNjlQJ?>r#?G;l>NIcKgU=Mo?;gSZ^x zN*Y)MF|2y7p@y8hN(0wblD;$-=}W??K-XT|?jo;ktR=7I9Uw1nrh$(gA+OZZz>07d zSnt6c-zy_0$Safvy2DuD=|iLieS0H2VFwF*6Fs}gXG;TbVI+M{1OJyT<=gcWyzfjP zZ{q&s0QtiYR3GH~V=U<%*W2}G-zz%OJNChS{luLKUm{FDw=cPqxQkq*cQe()!o7>A zKI5o7`|N&s_MT(9EnW4xEpWcOrK482rM*(OrJ+iWC-axi;pbPw#Q zA&sPxG>%r0#<5z`IPQW6pL)`y93xG^m81zaY%~oum5-kiS*voKPm-mT_&|My*Eetqg{WMJNF1sFik?HY#(^>mNcm% z@4ZEK{U`m6Pm&Kz{mQrG1Nl<+0%Gd8Jm(=|;(Mp<`a9zNKRo8?H!ds^B;TOrUw633 z*X>p0>t^1D>-TV1OTH=RX8+ndJmZh#o06XMpObGAt}if?ua9|lJx9Lb%M8gkdBm7)ulHnfU?lGIhq~>4(WwJymB9CjEkZT^hpFBqLK7 zU#tuwk;NAgur#vVyJsFTHNYhfQ}cQdkao&fR9)A*wS!Fgj;fn_W_Zce2~-sh7EM#B z`Y^grA{SBh7hK8L3Rm(q=?*xj%dOz6BXZIHv%1_eEDF%&mSUy0F1Ms!ms?z^%Pqo+ zG+k~!R;1~2v%;AAq|DT3!kM}k6G{m1jTLFSTyn8yKZ!B-^jUQCF|DH>s@A299gv=~ z9h9ERs*|3|KO#L<@}l(AFR0p>_s$`vo~n|bIzQn0lWu-h(&}Q+JUvc|JZTJ;m>WrKij|PQVMz&y=3p z(@PRbWE=1C2R44fO&gi^7ZO=Z)%W_7|4mGT!qywb`&Rz6bEd6UdSp8bC?GRS#c`x( z(kAJV22a0UdIZh-hxEv>N7^p^d}D6cvrO&IJ&H!%Vg7y6Bb_35)N|r>>Cuwj!3^oq z9YRlu&=V~5xP+cALHdQBBGmVPM3T_>1J@WdOK9v6+=$RvEHrNCPtPKXeUUmjqM5so ziYOVF{DU0P#@$Lql=i>$fE;m_-}pzA^`~x=BhGUNR1xLgASy>(5br)A*)tsVE3GZK zuWML))z;Vdg!al1Prrg4DM(lM4*pm3PNqT0y#wEAuMoaN{V=6LX?%zJ;Wad92@hFy zcsJjXei+y?zA63iCK^;AuXLB9=WLDd*$QZ(^k9zV2;k#{A>g_{%p zu2t{GqsROcQhe5NDc;r~#aoX_@s@fi-h5PwH@zsu8;?lw%3 IwZx7yQH|WIw@}S zpcFT9K#CjQFU8^H#o>`oTvv@0*XfkvI;y3(_9`jPy;q8BctMJDJuk&ME2X$SFG+Fb zC#1Nt<5FB{gA`YCOo}V6m*N~prMUbTrMRpkQk?Cu6lX;}*xwyzMm;9fgBX5s3hI&e zNwEkz7duuX#g00q*pX@}cDPE4_3xEpeJ@C{UC&Fgot0870)I`GqEFDEBO;F2MtG)` zqDxLlhpy6~V}tA4q(hlP32p|qaAeIp&R;(@tz z#1g=Q&P1@Fb2tm#y%+x~9ck$8za<@M!y|C%h=NDx(h(!(>ZBulX`FP#q-Q~wd#3(b zIx^%TbEP9TPf3?_q}=;nzjS2O_ePEM;&u=>u?0ms(#+S>9C4tAf3S%!NJj=e^h4=L z312aBWILPqnsmg)pyS0TBMZ9MGx0O&#RSjCPfJH|5yeX{TKTT+7t;cmA&}SfVrHdu zw7ph3+EFhZ?W~fHcDbaZUda5A4MR2p*(hXqW_J|lqMrXLRZ>0TH`Yg1O7&*QFm~## zkYP<$J@nP*L*{_27_t(`wnJ71Svh1CkU1f%gRB9^Zlv3g?m&Jgj=PZWgKQA8VaQN_ zJ?gK&d*$J7Wi}oWPu*%@hR{jOFe8E)W_OctWlC`vGc3ZFurei!X&)$4Y~7RAC{rwY zW>}7=3Ca{jVTKJ%+M<~7NY~U&4JBT>LYcB>IbF#Nk9wy+rC4rySKUz}?{Ot2OK;9= ziYe-dGNrkH|6C<<=*lC5O60rL@SE-l_bHJb>>-m9+18KtScZLD8WhW5_rwe(!spR1 zS0cLjPgX+(dpuu>Xz!NhDiO_yJgG#uvdM9~0V|O1QzGzP8>mEV_rCp^5;>I`4)&+L zu0#f3DXmk&?=6pg-)?v@C-yosxV>-RQ7nh4;mz(zxk?yf&i$l>8=I7HnMa|wROiwa zcEdZwZn!B@|KRVMnRiM{^w_*gn4M}*df)z53A6RR)2f6v3t-pagDpykvv11@C1lSB zv;U(^Yxp4XStY#Pv+>_bIKFJO^A{_|mC2i_*5{3ytW0+BjLG>cqNdrkOP79SP(nqPHF>=F&$h_|%j!GpY&-cd972}&!yM5`IM8(*JhS;^Qq3?g;EI9D;&SOe~FN_6; zU#tmJf*USYb}7Mim!^ED1mEj>d{7DY8|}fd-Cx!!30>W%-M@Kzjc zk7mKUy1%Sc;%zfo@NZ|>gR8s0_*RKmy1y(?;^Ew>U5Oh9TlfBAuM#)XeJf9i8-}Li zJU^}rWm>wwT&={l5qt0}c)jgS`<0@dJ)*>&put|x$8(f8%tn8vsKpnbGb*ZMucGER zDQae=5?4!uFV-sh=V8%3b+tzjO4@2Xz?_Bu1p=JDuTa$Hi#28?C~vP46nRy*PKhgTQk302wmv0p2S4A+sHbARqKxnhMHyz& zWkvDBZ-t_G-%mTG#Fbute3_ysd;l2p`FJ$0=i^bn&26P*)hP;|<{5YJ;b?q@YnAgZ zRt_t&)%*4jikwid$i^x~mSHysbKFn@?(tz7mw)LQqY|)N=sV_pdzKQg9a=`hnDIM) zwN34eV8;I%Scoo&g@k&4F+%wvvWq^1`wBRTrqlmaz$Ql-TX~sieUBA9+NXh=10rl!<;1F8qnz zG-O#A3n@SlHzgM5^&d*CMPnfw(2Ad<6+5`17+)X@c}8I&&-FAGDlx7}EMz~mhcvK> zk0?_+JUf#WdXL*+VloYB^S$!160^1E<3uH)pBL}D#e2d)y^q88a$zANL-Tdi5z0d7jl!3ICyScmM9yEvra6SR!hNyJ+Ce~@=EDCVo9qd<^ejn zpq|V^u<-eogSz=?LcN8niB02B>^)yp8;SRM|NM+Dxte{sog|dAzRM(`lnwrwBown- zpOA$7No;ZvyS11kB#27Qz0yt+GtPCz48va|iMh=s@#8!Ak7b@t#MW!9cbU9xR?Qi6T*&$&h7#=WsWk{B<)FPprVR}kx) zWS?9o8eb-CAPL*Jge4M$ZL);iC?xr!nKsdoU||~vY?CQ$ga2NzO;Rn1f^A-^Cs8|K z8#8Q^B-ERQZIXm-?h4y<3ES)uwkZ>~DG|1D2-{@AHZ8(7QKC{4*An9wRv8gCxGQWk zCTueznp7v`hw0>V{4l;l*rr0*rWxwaiymJPBg7>}$R1&_B4M!(Zm}o=i&=%mJi=mQ z!eZincJddZk`n0Cr4;p)zDyps^hgef<{l_()&h$K(mAudj8Vp|yuT?oI!P!8^*cyh9@G6Y2APEXeS_8NVM5@5`qQ*G5|n*G5_l*M{AO zYyM`#HD8nAT34gt+WEna&l#?^4{rL*aJ6~haKGVN!{Fu+!?n7L3)&3Vq>Gzp8Lr;D zzM#o)b>!k+tKsU9?`)Cbn$0e)XQ8{jj;V%g!3B}0S!hjR=9~7=mwM;SGxWQ8i~AdR zJNoM&XQ8iyv4>vJvCymDoOpvjkcNKJoBoo)pCt}w(a_I$+XuUNnL$h(_zi<2J-S_n z!NJ}Dw_$MDqx+>{(95QG7`(8=myG<^(0!GL-XZqTuRV&*z`mkkDwARiY!eL&j$mP- z%r|6U^J!QNy!;#3IU0uVxv)8Xx^lyyb%H&tXx*Y6EbM{au6GQ+yyB=t7PfuaOoKga z*Yb!^7WRCubc2P}VI!J7?4>IY7Z|P>m(MlY!`gc2WW(j*yr@)r*dImOFVYus4A)uM zMbBf@FwnVtfyBc8=?O9$26psBgc>eNh!$=b80Y-JT`n0I6_ODl8L}s*vap}v+R-p@ zY(;Gk3pe(=vJC?jen*I5U^gEg1KVi$bRjI}_8KV4MdQPxMdLHqhMobrK{X(r*2;Zd<(L&fEgl##-2o_=K ze`J-x7unbKw!uG&pw$NdNFTQ9`GsQUxa4ZJ_b-8ltK}lSgV-YwlszJskE^RD zkrOP^5>NZ@UA%vp;mXYe@*nb!$bU#%QCr3$)?O`4mtS{ZTlSXxdV^g$#3CLU+&D{q z-C49H*^a4+N-Dp;=ZJ1u-T~dRjZN}_@kaT;T?A2<4=8*a|2~;-<3Au_lb5_Nm@nKu zAQo;P5DT{th=tn+#KP?ZV&V3Ix_Ws(wx;bLt(5m4J0S1BMk6YZ$oso#L=|$*Bd6_v zyl*p&s68U@E1(ev@k>G8H&!X{yDd^9NDbwXcouQ^^3NZY?n}!d`7DCRS+GaE%EPQj zSa_KA&^nPei*z-9AxI~so?)YOGRmu$rIWki_+2`#)JeyKdESXGo_E6AGn^td;I~`2 z)UcH`B}onCE~#M;YdBMla|B7^Cq_1aXfaVf zHMSF4P1Z|VO&+`TXU#RtZjETJjqKKb&E=oTB79~RaU+UF{GEOAspcwSw-n7~X1Cta z>c-fY3$(h?I2Q38yOpigRnKA(Ke1aOnyZ1`nyR^6?29qYrLbH7(dq`Xzxakl>e;P$ z&DA~=b0$$NGM0Urrn#)MSfrKR%G2t+?2E58R|UKE8_i{6w`OQ{XS*w+G*{_#7Mam~ z{is&g)xB?>R>udE6vYRV6je8H%r8Yn)5sjAER~{Qwj0EbHN*DIU=~@(*U;OIeD{={ zV95zd%g6E)l9qP`e$6&S4H*6`ZLnURe@NPpb>$Zar40@~Vm55;dH)Leo3UpY?{fd+ z!P;5N#pwmwS@X55H0`Wd6B@a`4^F)!#`4I6_Q(>E?iA@qi9PZOJ&P>oGb{RICKidG z>>f$)<4xAr@kRK0KcD;1pU0IV={xw0jlS)2>B9lvny#9i0e8>C3wOX+C%kY6d@FlM zdcfT-&&{s_?p`>lRXG~9s^TWCs-#(~Ds^jBWi47&d8<}caZ0Olp4O^dXSAwg;N0Na zdavlT7tFo8cWW=Wy?>gm?KSs&oUiR|zWCfq$zk&sej+)7G0Q4BtbB^y5mhPGVIHRL zrc?efl22UPv8v53d=A<>xX(d*CSQ+eZ>RRiL)_0Goa4afpq+64gZ4B=!=zN7udP;V zt@b|itS+}RoJBVF&UjImd(1`h@Z2b`48hN_uCfqog9}j#;m}7aN#R}OI+0t7bX|=c z6^U5p3HZ#z=SFQlf_;!3&FEycQ1|Hz{xv^o zuR1P2HBEapkJwEmaHTEH>x9E{>A}4Bmp&k^+QHr9Zk|j{&j~NRCBjQ@67QI4FYi)b z!ZPY*rlZ*tUbmZ?Kb-Y|cEQ1(Eti6};>Q#-z0xD?lGLI1=RPcH!@f7~NqN;Q^koT8 z*iCQS$+LFTdweCXX1Tawx0HuqB?UI#>P;r^J8Ps(QT*B{Z0FZWq5Jy6Z=?caUdj7* zllSJNKJDYPq7mXXLGpG*&paD>8+U+XM?7e7vk`YPaS+GFINpQw2!cXq;&?Zr1rvPkL7d??sp)2|&eDO1 z$)kufypx)~K%C(;5Hbj8m}9k*2kfTXIJ$|W?Nz#@XgXyQ;v1V0n;79K=Y3A67*M7S z(T^iRG=n^kPhOK3pGEwdy1$%XU-$1ht=>O=O1*!qRlR?-MZJH-t=>P}tlsZ$Qt$UQ zs`q!DRPR53M!mlSTst^7xCU@8a87U);L5?3f!hwQ1Y9vV2e^E2S>SBotl%u*%-|xy z8Nn&wq*isqc#FDW%&l%1ZB{poG^rbg8`TZ|lj;WV8FfR~X>~*ADRn~!xOQ-Ea1G#G z;GEzp!0iTC2Cfub3Akc#4siM4vcTEES-~ZMGlMgMGlEmVNp5xhc(b~GtVvxz+NiD{ zIjOE6I-{=lpH|oVPO0m=TGjQPE$aFXaP8pS;2OZWz*U2*09OvK3|uL=5^%-f9N_Z7 zWr0frX9Z^gX9i~iX9TB!lbY0Z81B{#JFJuSH$g zze!!=YgE^ComAJHKclYc0M`!A4Xy#43!D>N1-No>W#G1hD*;yw&H*kTToyPRI4d{{ zI5W6Na7J(nIH^^2jJK$cF}La%ZB`v4O{!zKQFZuFst)fN)zNiYb#$Im9Ub7>!MVXT zfOCO!f~x?x8(bNtKEgEf9 zi$+eWMMG!QBL8W%$ahLD>S|SsI$PAD4sh+@+~69(xxiI}s{mIHt_)l$xDs&1;2hxc z!DWF<17`(i0cQqh0%ruLfRmck)#Huo>ammR>f2}3)g!0X)x)RM)&5p>wXa28-Q`wS zcQ&i5JHWMrbAvku&IQg1t^!;+xH531;7Y(1gL8n(1D6HP2F?o30?rK11kMOf0VkbQ zSKT|Kt{OY7t{Oe1t{Q1oR}HtQtNdTFb3b%1LF=LXjR&IQg1t^!;+ zxH531;7Y)41m^&k4=xLw4V)F61)Le237ip}3{E<&t{gw5t{iJsSB|!*D@WYw%Hd{p zrN2pC>1$M1cAZpLo;Tse&JC^soC};2Tm`staAn}OgDU}749)>AA6ynV8#pUC z3pg{lNN`4Q3OK1%T`}IGt{8KxD@L2u6(ddRis43eh5w|w!h1$t(REr~(RoT;(E+X< zoEuyNI2Sl4xC(H)!Ign41y=&D7@PxKKDaD!HgHyO3E<4&OyG>*6mXJTEgWxF3&)z& z!qG;xaO9*~ICMrW^q*D>eW%pIu2!|MvqdfJ0M`!A4Xy#43tTn03UKA%%D|O^D*;yw z&H*kTTo$-Aa8_^@aAt5Oa7J(nIH^f37;jVy#!jjQx6h~rBd67Z;ZtgXzf~>pwWtMM zZndDZSuN-Q*AC7N?ie^1I48IYaOL32z?Fh40apyp0WJ?*7C0L?D>w@{GdL4CBRB<| zbW+W~cSg-0JFVuAo>KEiTGjmF7B%1RR`Y$$YJOLfn%~)|=68T=1Lp?U0L}%@39bTM zIk+-#rQk}yZ3O24mk%xroDG~6oCTa2oC%x}oD5Dnt>%rNQuD@I)x6ObHE+bN<_$Ni zdHyCf&)2Btb)8i6&Yw~9I>5DqbAxLD=K|*hR{^dZTp76S;7Y(1gL8n(2bTrT2F?o3 z0?rIB5}Xm70#0gGbH`iM+%dPBJKC(~jx?#c!;NaL|D>AhJ)`D!omO)@PpP>b;M&2t z!8L$$fpdbZ0Jj@l8Msn#CE$v|Il$$E%K~QuX9br4&J4~3&InEcC%M(-wu_kr- zXrsD(`LCZ&h=AEox4e zTg~ZgR&zSQwS#kmI|j}L&IzspTsgQhaHZf%z!igYfXf4y1BTZ`daHE>-KdEMWj5r`7DvQ)+exxOQ-E za1G#G;GEzp!0iTC2Cfub3Akc#4siM4vcTEES-~ZMGlMgMGlEmVNp3Z3yjjf}Yf`gD z8`Z3llWNw`88yp)TFvsEQnR{R)vV4IHLC+$J2*GE25>HL)!-_?m4hn-R|>8KTroHY zxO{L~;L^ZZ!CAnW!I{7r!71RRCN*=sQOz7Xsb=0jqh^krRx^iBshR#(HPhFkW_G#N z%+6*tvjbc^I5)Us;9TIG;3~kCgDV493a$iPF*pafJaAdyY~ZZmEa1%GOyG>*6mVEK z@sxgjSEYXaZoJ&f=H0bV35j4+W*pbAD?hAXSN4K_UFl2ubz5=ViQ^8W+aYVlaWRe^ zNasVA>3wUq{FAfya=iQ#;VzXP5H6K|ww?XfBmZRd`2H#XZ_u-2hWwMl;FRcpW4-=( z`F}%|WQ&RWjM;*%!CM;m{K%H(f!a~|f0p4HI{AO3N&cUsQT|_E3%TZVlWSeghf^m1kVv&7)htrQNacx?12R9p$XPnCxiFmVV2oJm%|IM4sL%(!c9@|2|SSSVwl;LzVE> ze2hG8?Y$O2o=)>j3?pTho*VxrWm{cZSYEvr7IlPd-At!69UxmDqElK2>Xwm*Zda16 za79vVbS7_g)napsE?fQ&lV5VzfYdss<0_9 zd*6GG?7oLor?36bLWVkjS1e>(FTS87yN#EA*+6z9 z=z3{0UW0huLx^uSWgy42l}-5&0l?Xmk!6XU_9z@|O^-r^Mt<%!EzG*r3Ne{4T4twYmY}yvT<9NU~b-9L_BjIHtmJwX&~R?eVq2L(9_4mvVSgmH0>G>)c(2iG6KDSUcp;FZ8VTg`;Pa2+E4bx zKe6cwwNDS@p{xI1p=Z<0yj5Q`@YYVB%iH#u=))IhMcOBF+juWN>*T%o@+QjIuGyas z_~&k+Eyq6Ln0>lK&=rE-Ez;#8y#w@Skt*TSA=4if_3jY*N`<~=5IaTfzvtGNUcW5p zCpP^x-n;34Mp$_^-G^ZGZ2AoZiD%RQj`L-o{v86pv*|zi*I5H@`uQ20q33p(5y5RZ zBU;pF6Rn8KoAe-?aUZXHMmm>#Kg@H#7w6&&EJm74~&b%2AAMn}D z%TFW++|kpSiOa7>vzZI7u6ixto3mngJGjwiW^qL`SBk=0_@y!PQ9cA_mWk@KL{q#X zZ4v47dN%Vnys~tm8Yjoycv5l21_zj)O*eyyH9H%o8uD6>cN7I=D zef>|7obBCYHL*g&l^T-LO=n)q{c{bQ3731jh;@L@{0Fwa5^Fl0dAEuzZ1a5fIayFh zXa0*axH6cumPO&YV9hFpc^IXg7`eY+&D~B*)n;og~&*Y1EgV zPY)AoD~-D2{Zleo;J~U!Vm(2lzP=JLo6O%sS=6`LhU3hv@aV=VJE_^k5<5vsG&N6$ zXI;D5Oqh9o4OuXYnwJv0*^a%$Bsr0q*E7jOl5Rp&>?8{wqvl7Lw2matr{<@Hf+^Je zE6U8zhz_SybB*uy56Ju$YCf=H=Q?ISv^>4XZvNqbw!^en+u^Ozc4Xy*w%KQefY)~5 z|KKCq4(!OERm2dgWtZYf>e6-@c|&JC5Xfddj1BeLE=%@=X>8Vxa5n21RH^OC$BH>^ zSFy*mRolfasqHEaW3y^lN07Fwf@#OKT`rha+l9p-1=_C8o44h$Hr>^$tQm6hSFXk|{E60NLRknQZ@Uu$J&z4Kouxnq2+f7ivoHoo&-CgTC$ z)%n6hNp8D`oFut!?}R3jd>0XnNN&Tj>0bM+FMO~4mE+Am=S?J3% zov=qw;&m(+bwo$s^trb*2r__oOMEy_dSADOhbXw=551 z23%I6MA|RXks>`R(n%scBGL&wfab?#xf2Ii^m0V#W6^7Rf}*6vW8G4ol=v=6J zj6M1nfh_u0c#I__h+szv&3Kq4C01}r;%@O?CM4T==z7ZtmsrePl5o=(5+o&#^X0b* zLn4>&*p?Dt6y)v}q3%nB2DqYtPU1EGBw6x%cv$*skyb={yVnSvHW7-xj;9mGMS4f8 zHr$L?3Bt_Z#^(jR2Ju3$R)ksFDncuLN28y^PdO?1DvjO;pJVpuL*l(ryuTve-wtBY z-8{^a-y*^+Sw)y7M65uVrMxSlv!pF$m!@V(Tee5U?{RFJ#* zf5Yg@{mlQP&Gd;VOs>A4y)6BV3`M`Hy=M}^kH)-M zd_H&N$|FalnOiUYu2)*rA;K(Kul!<}wAvwf#rxK0($DUCHpfW|P2BU{!eF7n7{a3e zA+&S}EpO83dqPnGjs72hh?AkGF%x^I{ZX1wim-yx19wIIrC}a_=?}Ms3Q0tKGIJ&K zj9ke)NoZI@V**8Bs4fJCvd07qB_%@1%}e(^E!}6j9Q~GbUod~-6EjVOYr1)T-fPm# zcWKPL-bGWS;so!{&PYG|zxT}R$yA!zN@G%b14pFdG*Q+f^hAmFyG8q5qUvJNFu7;e z4QYM?U&9r%=+bWwkQ>$FeGjq6tn<9JgbbUmY&Daiv7Wy*k)hkZt?^`7^1Zo_4Bhj% zL&@-uG-hjW_Y>qJf>81ZKvZPHl4rf6!=-nW2e6o*W4VLA=&a|jV|qttAd7jDefXKa zsJ&KSm4S&J*Khy_ZRez zvRb{P6m=AVDS;;ry+dZT?RtkJz#h}4u$XhmH?vSh?~vg3Q(rWuvY5Z1T>~_xpRKm* zi^{R{PhXS=6{B2X(GZ$6m<>-WF*hSv%pKmi6?Z+Kf3Gie*Xmb~x%8`jXvDV>_Lv_O z78^K;#aj3Z$k^0M{pvSqY?jJmbHZ8d>S-)?vweqy#Xid3yHCF|ld{;IMiyHgz+w+a z*kc>(^(!fjeHG$&p!Z!G>p_m!rC-raW3NIw#A{w%-t)dqzXINMp3xVU2C~?jp0}px zR}2IF4*LxCD-xjcN6O*?>h-G;_vMa$z-Qy9`n)lAutuMIma=#$fW-$*WbqN~r9OSZ7|JxrEItm+VDUB<@Pa=Wq8wPeB&fkXOe~MD8%p&8-911O6J%$;t0|;Fvxq6SX6{di!Qff~Kqby;%!V=!_VaN7F1ez~)+ z=Ux4>x}F<>*g0@qpHrQU&3Fla@r=BqUv>dCc~QgARF?2XUw4JxZW%b})Z3$EmVi)U z%eK*k|2L6YCh*g3iR|n7P;bB2P4?^Uw%&n4{W8UKf{uk?0no!-7% z&n&YTzsxfkC;M#JFd^ZYh;#(0-0q8%luNGW#V5MneDxo zOZA!6qSj|4n5D-1&N+Q%61(-dK5G~?A7uw!`mFWLTO#C**Rtub*Y&XBEEgwx~mM_>BTlLvCg;~ae*lZfkW^4RM;p`bK<1Kww z$u#@y+4cJD6Lj{%o&(GESqOI3u165Bk%Rio^OVhA%+Es>eh+2nvm97=XrH~N_u{BN z+w6V!6@3=TxuUY!KlA?OA$_LIol$>+DTFTaiGxoSd=ho#(L-c)#pNe=k=4~la1U{S ztd^Ul%yc^Y7j7wYGo4-2B4wiL)>bKNDxLk-X=$11l$1S%&c1;B$LMSi@?WL1KSq8c zo&62+Ey$luXa9hV0XqAC$S9skVVbxf5*H+g zXyP$wHbQd>bNZgRL(8P-gHkVxKs*)?K z56FAqBXiGI9J_IhWoysLN92loo`#ViQ_5Z<1K#(jgYPV0ZIAHs_b`r>~F-GWo(7Lm5^~ zOL`A?xaFN?)5 zgJSWEpD%tXz95#y6pLjtnPMT#WIE?fz7S>(Y=(s}(R5CaSPOHz*KkTI^z)TGg`LNx z;=5uMOv4BFJs=gBunLCF8Q`m9HWi7rFm0ml55-!TB)%YL&ga?OT>r$*R%u-%yLCri zRL0gc$cswZr*FuM%KI*O3X&ID z*)`A>_StXbMP~M8vb+cZ1Rs$X8QDmxyhvevjq)PWDBBvEWShHLwza!uTgOS+R(DcP zGd0R-<|aAK(k!Q0-Ex|(RZhz~EvMz5k<*HhUyS?`H zGq`qe9pE~_b%FDN@q-x#GXmx|WMeoUKP}q`>Q>MeBidp@Tg)x8&C)8{tjM#Sk!_if z7eihGc`4*&r(|0>@+#1We3Wy5sh-Z}N;BBp*qLmuEsD+kxtYyqu9KKZh5}7MV=39&Cfa|&(A+C z&qu4~Zv%#}SWn3*w$pM-))_e^4;*IcQ;NZrfP>XiDv<9)z6<#cC|8DZ zyTQ4^p}#5UZwmUGg8rtUzbWW%3i_LZvz2li+!(lfJ%2kdFOYh!hsg^Nh;ORAz}Pdi zQC?u``QWO&z})k}SMmZ&&xgO37g&3)?~)fF*kGf)AggEiU3o#CDC-bqi$&QIQMOc+ zEfZxCN)cr%L|LaOTZi%(rwcGn7hs$&=xC7_bhgS1x=zard}rhZgD2$$!(c|hjDi_M z{&=gr5Myc~TtF{`l@`KE3t^>&u+l1o)UFmYJRlAhPGq&M{}shhH-%ih0+ z=r&pWYkp2bExj%8l1cXj^AWbL={B99R(W~Eer7dz|5Tu}Jw~lFaJ!*P-#xHDm4u~J zYfJ>Q&gKQIDS^zo&B&}z`(Di?Ve_f=*=sqWBrNMCoh`qQHS27}1IG?Z+s+I4c?1P! z){k7eMdjSLKt|`l0j67`)RG`L7%@f)=61I9E=x`hX30wt!wh$Z$%R~=yy+#PouJ88 zMwVQQ+YMd1(|1lG#tYrCZW4N!CfCE+Aqm_}lTUO@pOMh1G}$f6y}WAYLYDmBqq_7> zbiP8^{Ghzx``COej{JyBh7Uz(dWFtk&e{1Z0@(b++>M0(M>@Yn^vL0Bdz6F~3(n!S2Uws;!}zMV7iB1;M2k`#D&f%;>FrKBPT771NT zQ|B5?w!!3D+8`D zT{`}A+jQyqx!wW$f+;+20e;H|lF4Ypf_0*)ZuD)d$ic0QjVKpH`e1H!3R}Psx{FLU z@gJ~3;#4iTezEpNa=VkVg}TE18`(l*pRZGge|i(EbQ!`h3#ak&3*&sRj}fD{I$(&> zh3N+ZhK#iVL-KyZy$Yw{p798|?LVkn3MbFohCFRk4EN0Vkw9*5h1^$1Ztw7%ttH=& zao-x>ckw-2K_}=!`y{q7zX$uSzIXLlzb2DL`{n--eFyi%VRZ50AvDWRKv|iKL!kfIZNw3m{-*8ov z7+v^pUe~1mtE(Yf^0I|L^1WYyWxW><5^XI_4dBj0f_J!x3a>fpo=Ge zm+nQ$Ib7wxKc}h7cn$xCyV%qMu1r2mQ`d=FvP3QUT(|7zy5%mR)haBLgr^cZn-LSa zI$Kl?nS}YD)J=k&OjCc33LY1FjnMm>9CDJSR*S+$p;{8E6=5Kgs8tdbh;wEf#Ob!D z*1x~xah+|D8$GoZ`p)q(f!Ap2rORnw>N2u$26Rg-VfNH(z3=YRWt6a8l|$eQ&L86WvBO#VbY1=b2 zEr-n-AgY;Rf<4IQ37SlTZt}5~R@ir@RJUlb|M*RvZLHVas>>+GKq>3JmZ)3W#;4}L z?chE5)*Z&u%0=yF?_ZAV7S*A3)drS!9BumpKLuKsFuqe5)g&xxzWn4wV%P}sJnzSt zKO4*5)A|PvzNK3v#)9JGlhbL#(0yy7J#9=-uy5Lbxt6gC^jWuvQk$k@wlJa6$eW;I z%j11y%mJx+60-@@DBLuB+|L_!w~UV*8=jHaekpPsy;oq;jROa^5(A!@*=l&B zWh>VpJ8%i>7Q4g{F$)?yzIGDh(7=JEI-3|scdbHS6F2AL4#6lwqmh##k0m1 zDx|h6VRqYfeks~Ud|ST@7|!=(z8Ek}yelqFcoHE|5+*z)GsDYdV%F9C^<-jR5VL(3 z#un*%%D>eW=J$N`cU@st?}gv$3avfEC8WBmr|t_3vJ zx=_km{Xu(XE{8 zGwq8;VU96W{NE~4Yn;H+70(%&?E99cM^uv9sWg2moi!|Pdi{2}|Htq~r7$o}!% z@~`aaEz1`Dm8G9!wp6nJDoy_$+>bpMenIh)0E7vn3HHUYcoo6*4<;b#;$3k4 zLk{Ta;@!Rd26Au{UHn^Q?}7Ipvi~c(xCY*T$o{(!9q4`MC33KmEQjzCH-tEeOA8oY(KW5bj{Fq-A z@T0>O@MG~*maz<*0Rw*A4J!rww*)2%`0sB1&~EcMe}K37F5fD(dDJCu_VNfRn=5#P zl+8Ohu}Kh{1hEOkCNIz1RK~eY)dTm>&>rD?*tAFLuoX;uBp+MBv_~=#?M8dVhV5h8 zBT3jarafX2OcZx1|L|?-)gE@EX6@mPLUL0`8ib?>-lDXJDa|M%_KXeuVB3#0;~^Yu zhnFdB+jlf$S2uZ3+x800cn02uv~6F}jAy&ad~I6`&3GOjsy#31AsM zzP+j`n`R7|SjH!TEaOW|qDm)%`Tn@$sO($4J>wqV1b6&9x@02X6L#bTghl zYJ+1Ud{Ho-UrQ-A{%>wc0iWbrvVnVtUh zX|BodcvO3@Ig~AV(!iEHBg*alF#hk-eZ}u*Y?r1bc(x=<){%k3=~_cnzxxkbLlS=s zH@BD>9+NiJi6?y{{7GL+=B0uS(!8vz1rF^!|K(jR+IvGc<#;LeUhncH(%j*`cL$_R z%}y->ZnRspj-7PLp`qwP?ZaTP1#xu1wOc!TbKr2Jz5qGJP?P&F|cN{a>BS(C zS4Gn$7rAq?RllH1F!McMUNx02`GC7FTlElK@(~7!ylOICa+5nYTeXQUfw#+5db;Ee zzaUnvp-aBTXpvWo$KR_C(g?(xuN8mf1f8L-=*)ehlAHEmJ zKR%~Ruhf$}j4u8AizH$)T{?D%+AVBBP(v%o}xN)PInS`VNrM zuKi@Ra~~On!_m?98Zz2SGe32b(S|BA>fB34E1oB#J89;vN;0~QW`6a8ZmFwUx3uA~ zZmIj2wC%Q?E?}A8EDQGAvjVP^I7ztcvi2kiKjuk(Pq(?)r|l$RV?O!`36IPTPGVUz z!|hq~yes$X(uVt&e4$Gl8PpVA+UW4~r*vs!J@aG8|EKBO?|Ns=?7i2lS?}|VX%saRM9pDQ6W`Uar@wgm zS3^iAo+V`b5N6A^}Iw6OD{ewV1<8|}ME59_!(*JKE(w9nl`#suk}e`lX|*}BWvr}?d#g-F|pVXJv?phcY`<`cK9he_rXX{~w zSHh4QITQF&!jXR*{}GCNy-fSm=W``L6(1b$X|@9K_OjzWd)V== z-R!u-%#Jsev*Yz;?6_qYJ6^k!9j_^6$16+Naq|v#++R2^vC6mDG4nQd%v8vZ<@5eNEAjsR zq>f*KPweQ?uX&H2&*1%dKDmUQ4-irc@BQ=fykE~7Im3E=KkwJ`p}b$uhwy$q9|UWk z1MIv9HktSBdHmq5VD%3<$$I@D@7wx5?x((!U)B0FpW9D>O--G@DS>QL`rF*{X7wNrc9so$vf$w5{( zjcN_$?9?r)T`Xg#u2D_j#ZI+Tt#v0mrBm%v2|IO$YS)U`sXbKdd7ITeOSOC3*{Kq$ zJutCTN2xZnjh)&|wO(Nq)M6U(Jc3hcL^y(@G$IOg z;=j>|*q!X;Pc$MPgyMH;#DXGrQl}9wzs*jqq7mua*~tbPu?8gL*Jwn}Hg@s@8nLO6 zojgh-@~haX)Jk?Lp@N+>=j(;h3k&kNGMLw!!7x}1`UF4%$c9D;2 z*+sv%nWd+SS-K7~i=&cRnkty3{s6OB_A^WEK4z)e%Pf_9n8m!CSxjbTDJW-_yfS9V z+QlpxJDDZ5lv(giuq5nY7Ndz-!it$?MiH|FZ)cXkx0uC$8?*QpG7EkmEDG9@aDj%= znM3Hz0bI{MT+AL^$*#Su&asEpHSK0~^=4LQDQ9)HWvs4d7ptq>$?D9dtPb~jUBM1k zmuF&iS;ed_qlnd|ZfAALZ?U?BZLH2%$m;mNw|ozzKKDQ(wF=KztK>5BT1^#strqeF z8nL~cthz-bb~Z~#$*R7cWEIv;fF)W1OElrqj$v>>4_X&~#>x_+x^`+2W?jl)Wk&m_ zo7gT~;7_9_73mS5yXXtw~}m>&7(QF`Pvo_b_> z=gW_>9f7U(ISh|u9~Lo_(sFGQE3U9ge_^{SJ4#G!$2r)}Hh%Px9+|`~ihrb$%WV35 zw(}N^e9hK)i0!mkd!J;b1#>n{)FanKO!CztH=)k<$QPIvS#E6)V>=PYi7U%Du;QSI znhSa)sB0pzLOaqLamJ!YUKR~|(a4)ESAJl{Z_>zKsz=^;(W9g&lB`F$89&^nMNPVr zUd?t^MjqR&MNQSDPCe==$HxcR&LGX{Z9OV7%K1wzDy{A41-5fWM}dXya9rAWlI>`U zF8oo8T01XomL9b^+-a>IbtLN3>sr)FRgXIFqD8e>shyQ#Jv43gdTM@uG~qB z`kMdQM-5^^PK%a7U7ZZ-YD{!@f{wb-keH4w8N)|IB7T~oOK{erM+s>LN-#u(&L5JG zkUSyDT$(pjXqeY8B+PHk>p?BNEom~$%j3T0g@}?=l;Ecp)(MbeM(EK29(we2jAae; zJPS#Ir<)!fuISO=l8auzhns~CKAbJ=(Z`%7g#(;rv~Wl#SI9Pwyhpal2c)uUvs+1U zky}Y%i5rN@rJ~FO(sp07R8|4m_kdIsXqJjXAOl~w2-qNH`6W`B37^1EEvtphb--;c zYeF61ww3`iwX6@i0qBOHdsyUVQcB!RWM0Y(dUOh?y|w5qd&#oJGq}@iwWT1OGjpK z@TDVu9CqnQActT&;sLH*z&cfjn8;<1X7V+;gcPj~fDDC^OGZdRLJEV#Z%@+E1nC^M zO8(T+!EZCtfhUg+PCP|AaI?Wf0*+%uy%~`3yboW&kl-KINCxYu#s+FDc<>XtJRD~U z520YA1z*CYL2TU|CV6TN$4H)vNzIt2u|>7Yd4{k8Oh6jKOpcWyhOo*Opp}PJ+qvG{ zcD&dS2GHVO_ZNmh_t`I`K1ki50=-6KUWM2Qv4O@ch4_bM4WtSv_t2OX5c5T)EJ%2O zs(1*N(Gw_)$L0qjQ6P6D2ZK3u8jX1kpmZu#rIApA@L5htiL zclUxy6V#ad{qR{xW7ZuY`%R$L1hmcm4`|E=GudC)75}+3&yO!fjx+Izi|1oCn0~2Nsm@vD*nCYly<|dTaqpS{P zwJ57WS>;h?HbZZMJ|Fr#=(3>8K%LYYW==-Ec<7C=4@21u=z^gOL>+&$7YZjBlqg6)t+rA*#pJVeCl86^ztlE^>m+al@R5bG+FGQqJ*}wp(#ze2w+1 zDl*>Ut;c*}D_Ks)ciH|~NyhgykBTAV`l5MCMSp{CsaD?#t`1%1T<)jaMO+PytxUYxe21V z38J|PLnC$L%kAcAWTNM2-MEf7JTZiGkB;jROr!mxH}XVZiT?Rt`kcRgtQ(7MH|~>( zUDl$1lS$;#8%?s>B$z@6IW4mB-Eb#GH=d7JI6*UZbiG(CyEettzpoj;jQM=2Zv0p5 z;cvBA39pJ|*IP8!qvLO>vYK-Jxu0d%fiM1+B&)eCtXG;ZaWIA=e1GU&t0AKNNC3moo^T`rO>Zr#?6L+uf*~7vQSLj`!7L@l#}# z^E@y>l=Bn}5am1)HRV@5cDnw_^Kx$2XgzjGC#WlPo0`c=Ik$%Du^F73GB=A;SLSAL z#>(7OYwtI5ZZapo%uQ&P-jH*r(b#NT>=`*XjI&+l&al0GM9%f+OqjVIw#W=QaR`qK za$-M%$m8HviS$ z^{kwjY^^Gn69qkCqN#ZdH03yDdF2pJSzh@N+?UVN*qSPNMHkpG1Hgfqze8SW+$XOH zs*+cRf-Ext{Fdnd73Z*Jf}#@U$E)OJFkco>B`=4`a+oZK$ugKM%iJd~3*I3wj{%Wn z{!Tfi7aWd(`{dLiFgW&s7Sey0oGL-z1s2C9usBwl_AO;fXxahhLwGCT~4%ChyaUCj_yhMR$sj z$2M|8$8mwSFMpQD1%ae7tVAA{zh53#yhHZM0v+Q+Y~7shaBB@`Fx*(SZ7InUInr+FlU2kJsG6bn&IE_nb z-?xBmNfAtlJ)8+KZUtWUv0S86fmboc{#mSHb`mtwpZj23Dg9KHw)afuF1bpWu1&k+Ff6 ziFWNa&|a{9!Dd7*XqsTJ2B9v9-~7Rl$>hRu)#)6MT{As49mqQd_B?w%CLUWM|pxi3Ng4r z?{)%yAbVQC>IC{Ir!H__!OjVG6m}G4G#}JcU7(=q#d#~Br}Duk>Uz|J`N<9*sUgV2 zs4v4$CiseQ$E*X3Q76cSyl5OaUrzdha!3K?5NMb`Y^QOUP7NTf7$SYx1*WE^BJ$xm zFh6lFC_6ZxxQqqFUsOOYE+i*1ar@%RAIz5awach5q5PA>- zMmnbySW9q(_Uhqi0FHWH^|;RwX%`~yLGX8J+;xO9gis!WF3Jzl6=O@n761k)I6ei( zm_msIG1RF(wAT&C=&Dl=s2gY;h@nndpgw`aBn01s;JeVKPUCvfWN!`(O#F-b`m7Dk9a0`RH$S_K_lTw6=sC zZJ@J{naRB7C?-AdJ_T}%Bv0c|^fy>>4TaN7&`-eoE zv%p4T$Pb;6mSQT(5IqA^QHJOsj5G|J%zgt={ z!&RTNiPQhAsMqg3CatjBZoVz881T^N?9{*hSz75M>eYyPL!w@#s27ZS?}>UAQLjqEVBqMip|jxon7n0GS8zESM+#U78S;P|N!d$5=6sXZX=85Bo#VlU7}CkOKvt&;=L z1JY!F{swlk562~)?1>(bCd=q4-dq0?tTlK@oBN3F<0J9hCOg@dxX@VLJJXv@c&re(c_?mbNBo zUrfj5=cK*$%|0d4K2KY#UE1d}TA%x;mdop`i#RRU|QHtZ|VAxjP@qd%Z?SLudB%IF&FD_51#c-^*E8Ev+P)GDJ* zoZ(?~L7_4__r|mv%KxV|(Y+EBZycY0scxQ2v1@AOEH%lM;v*Mks z(;&q=VT3++W1-?5(oE+n-hq&?x^S&Nw-DX56GLSsFpCpAjJM#-4)8M@HNIb9JL=Pf$0a7z(hLt^Nwd;RsxOg`rI#C9a9u! z^ZwIK2@J+8z7iOy=yUt*^|4Bzr#|{G%6JoJfEam=&i&3qpZg;^@4r{lFDYI+ou|aU z`>r<6y=%d@idTyMN|)jl#!u7hDLOCC8tblj1@e>f^5Z-k^Aa$yQ#@-qpN3~mq2gJ= zc{JvwSwX+x*&j*g>hm_}Nxvu_#VxD56ps(+yp|T#Rq?34wB-@S!{Ipng5uFA9Ce8O zuSb-rL)FUEfy2twzIT+V-R~+>oA7z=m@>75#!on;OwF%Rre?t;{XJ!BDvFaK#~)Ru z8c{VAx?p?;qKY4?_~O&!a@O<8l%aQ&DgB3(DP8!qA5o^%L$2eo#!m(`v*Mn_BX-|R z<6n0iGb-+{(fI7c%J`6-ikiD!QH%E~u0uNbEnEjciUJaqN5)ZPkvjdRJOWCQM?jwP z2-&Ns56hJCKHHV?;DG_TN(#S)$5R@=wK?Q9MGb|)G#X#P8FEzrU5e^ct&AH!thjbN z-hZgLcD5h5t*G`W@bttN3Hp>l-l>YzcJxg}e#q~PDXEw8(iORn-y3p|KK-I1x6$}Y zZ$19-rRet+xt!As$$9O&M=G+o1LX|Pts$qj9sO95ldNBTqf7}Kt;c`hp~u&|>+uez z$M?AD@%LTz_+J%0!No;S811Yl1dh-XLa3e)c|dVVqzS2KSp%-8wxSCzBWlA`TS~=tX&F8x4S)|NOx(% zu(y`*`(^wajmpCPr@d%6tc(iXri=;#pNTKXOgtdVy(6Y7t}$)zjUi<8}Xk;CZ{{=FOK2sE~j{K zvWds*`I6YFCxpj%;W1cv^n^zuJa!6;8ev^0yqn?OBD`bRGTC2v4-?)~;C-_2F1NNm zAg|95BogU@P{JtWTtQ#aBj_tYW(WETMbKCHdh0JfiQ3N!`ifdXUvaK2%}&<@Wd)0QbT8A6jPG)b)^MzSuW z&G$*x6+*NZtFRr%b_UyBto&h((`cg8RWB23IZ6|q`MT&t*J!VOTB0}65+BEwzMJW* zXkvcLXD>7TeVVupx?*g#?Ox@~W_Em3#jLea?6RI{wSTsh>5gkr3t6*8`1av!6p5Xj zjUw?&9@Ui^9@Ui!Ar}Zam(vVf#)Ed^zc{x;;(tV$i2h29kduX7sE`wc93sdWjK;{R z+I;6IzkB-paWS9H)aFkW@>4>Nv^PA;E<1$x20A~1lNekUBnFpjc|-FT@}hPN*SFVl za)b72q3IGDhtM>NhRmFJqrJG@9LCzyMHprgMzN5ygq%M2_3irnG*0y}e|;3YuFV%0 z=1QlOP(PMk5nO|}bAkb~j7SgznWIaW6m7-1~QTb5rx*z1+RP^l7YW=SpPbCa5x&r@o-;<>;go(oQh zBvV|r{*J%INQ*K=s%eHuH4TaOdR?FY9S4(-#af)tq&S)HV+VxhME9eM-5Znna)bH5 z@O)J^+?gCMxz$F<=k=EW@SZNkg!N1sc5j>^mSbbwX>qr9^Cgf|d-y&t zOuBEMA;{b;>4xBhW|CwGPJ-2NxgjV3mihXO|2~#NM?CB`BZ=DRBO|Gl11$G;Uvq{>@}OL3-i`rYOsF-Ji(;B zBn{KtX|n#&&m_0Y3Y5Rx9zA6~$?X-I0ihYS`b(|+)Yaqqg8Hvq0$uL< zh*kwUTt8m5FR-BneL=S3w$fH{LB!^;KID;byF4Vf>2VL%&d7KO9A$o1$;&5_HU$~NL3;%#=OBK*- z{g<_kE)Tl?G38sA2R&C-E_Zog7k=x7-wL$9RkUn^RK+3O79JNRHNvX#Rk!o{!ZUN% zAl8epXzzHl#^u|LE6c~b{2=;l;TIx;J`vydK-tq4{vYqlB!%mfMhjdz2QN+X7r1ow z0+-HmHTH9tUelFj8(qE$5}0(|yw$q`->^4d$RdK?Tp@$nE-6^_rZ0~+={X!P7Sc-I zn@M@RHa3%$T-ykE|H`8x-{O38uT=3g_=hL-$S;W3q; z$=9Cn7$!W<5!z&-Ra7nc56PeRd*2{O9!y>?$|RuyfeEl6yF`RRLUsr_K*)cbksp_P zd$|5yijalPy(I3Zf0+B}_u=w@@Y(Mt{0DRW9iQ26ChN&rQL>^XZ{>CGcnbe1;%rQ! zm(qn^5qjbCyFlJc-v!~E_K9<<;TC!aUQWI!>ZkBKBe|X58Oi?;`n&v& zN&dcdHdS6EfjZKIr2uJ?Q81-8>#~Z@iGRgd8K}RKzhXdKFiEk%52T zzZ>ND$0ARDe=Hiuz4iujuV429y2a&N&oSDfsrGX(x_q10`ufu@-;yuJUv#-+AXXORIFz5|qS=sM71C>vwg~Bu=>56xdv(0l191mE5O-Feuh5?m-@+`> z7fD=myN5?~+fKAa)!5Er>%evg8>ZS8DetK@&fC z>MC9Q4;aqt zmAyQyUo&{wubI$(BCNVZuy&%q(yrg1O#Zrqz5-gLS8ikbAAlKG(N~!+t0dT!zKZx> z^}!bEt-t!>X#Lft9F=f=B}XV+Z|2y7>kBx};QBm{VYohnV-T)S<=BSnlQ}ZsdLu^^ zTt6e+X}tbw4o5;+-nuI?zPMy05Eu)?cmQNQdjXk&^13BjFL= zJtudUdk%0MbL_aL?l}VeFegNm1n+Rq>EvjLIbou#)>V7;U5Eb zp>Vff^jANl+N&-2B9XuGpDglB<5#%6&(wc~L4Iov{(O|;B{AD!meo*(lqlm?SeF|IMV5$ zw#3hl0q>u(IK*&9i2fOn1_j#PF<{C5yx&qZ+a7Xi&&zA@>T| zB;;-(7YMnNXVY}~i)@biyQH_I;XdJ_b2LHrTOJ5bxugDYFFn< zt@avqPI{RdXWpaE?JHF0-l9vc+dp$sXXhficDke+`nqy8UIJR&w{*#uW$K(ny5tT& zwb_7UThfmcFqWyaiwo7bG`eIkoSf2@{OqEokTH6SYcrXr&i3PQa&tZQsB!4qdHI0D ztD-3*If&WZAr7-P$8WznyP!yotMk-Ty!WW{nh;O-ZZ*z`wlQPYtj>OjimALCX7}?m zi3=-I=LEsB51%?snYde>oy4!qTpxbT<^Vx(cE}zzUPfljqcAx_Q>K@z^8!)BU;kvB zI!|dPchouMHd3k116sqM)Oj8U)w#pstQu&_GdRmZT#+p9b9S|&r_6NIQjlnHTaA@m z^^`SQ*$ypblZ&46CI=IX!?R4YYOKd`DL>bkMxfbcAOT!8=JD%k^yKByo%-!kHFgGv z981~HFH4MPi5h*7rc|S=j_IBMP-FA zCJ3cB_p32}{H%=*Amf>f)EFOJCjZ@POh_{a-|K_638(ch{`H8h#`XC{zeiMKfc2AZ zHBQpUZc$_7_o%T0&E#D*1~_f8T~__CsxdR1e6FYbZ;YPmx?hcoX?uH<8iiXvDpNq$ zh4GGv>bBkghxYsn2o$k?D}4c?YG=(+fXRBa^N6 z3u;6^V#3!8H7fPuD{3TA4kL$bpZKbg_|BxJ@m`LsY3a;UBRciyST)iYpmsobOMM-s zc_nJp1Da|YqowY_A<&IeYk7A>`?uOU)kx1WHKG_nO8T3*YD67E`-C?Xk=6EgqpDT& zJ2N8L+Vrd%e$ICDH#I_PZJMKM>5dOGFmkr;|EH?;YFp-Mscjs3F2djX)~BjA)OPTO zs-;pb^@jb^J8FdI9#ymNQ6n;dZ>)y*^WT|>VCz>Os2a!-Us1J8>zUoEX6h(3tC~;C zmESPm-g516Rg1CR^iZ`hM{SxK-nd^42YWzRpY`h{YFK{9jwjS`N!3&D;Jp6DzaHTU zEnSsrSg{^sRl~AcEnlgjDSOoL0z9#+VIlj~uuA*6|EQr@n3wvn)nQb_25dJ~H8dAC z7Tr5p4G#^+n!cr;Z8iOB=peMUEuVg@hWZt%;U<2QhNqXP;hX8w3H-|u9-}ADSHlCf z`v!gKWUIVN4NbSb{e~LuYgNo@D7x*e8ulAq`lRj4&(+X!$LSU|tj6(Xjv5xzst2i| z!@_rnh&7k$OP>;DVYZvg)zATJWtbY;J!i`ted%*?6+dcAjV)JHHB8}GD6|)#bco_5 zAjmb)rE35O_bgosl(&c(fPj0MXON(xn#x73YJR59m@IcB|;pFJPAeyVvN_UT8j`OTUJu7Ml0zQo!p)2BPMl z=+d81GXMtdbm^}!aKNCAra8gD2!mLf<_v=v9GY;rgQmISuo$3pGFtnIrZH5_rfGht znuV(CXxcPr&d{_cp|L=7nx@S}?O@cNPSc)8?I0LjqiNwV=z_sjnl=Xp2{3q(rX|22 z9*1Te?xtxk;jkR7d7#c+nwE?@eW-JXrY(a}DvXxWv~(Dy;IIaVM`+q=99E+aVv1~n zs+p!`Le&UWD7Ji#OSjFFwisk|FTCXGo|N|O1skiR*cvu zwIajX9td!0ce@{-=yLDl6IAXFK3V0KDdx)Z#Jm|s-D#z~EV+i;CD(duX@|t*mn0^? z^7-Uf+DBsYt3XVC75~#?yX0mUQ(&HA3M@%Xfn|s(Fb_V3<~9JY^+M`{)a9h7-EAf@ z^32dfa*%Xb4pOS+AkP{($oGgGOU1VaZLbiqL{j0a#mgtFmx7GDi^92GMPIh`Q(AfNN z49u+QsVI*rl&5EGlc#4wHUpil0>=Y59(qfj{;*Jf%6FUml>b}uQvv$Da+ye0v_Tx! z-bpeUvflYkmMX1xqh+bq+UF&cK4bQ_D+=vmfZg& zON#Z*T3PDT?|mT?ED&8KlX`342eRaAz1=QL`PSZRGBH|j{~(h->)n+y8L-~HD@&f% zJMan1BAFyud!Lo11nV75mQ2<=V`Qn^dZ%3`Ko-9uOAf2;m@IW!dk@P_itWo_*(uO^ z2eq)OVwy~nt$mASDc9OpB@@)0Buf_C^#<8#hV9E6vQvVsdxz|ltYYUZnI;W>|$@hxsIr`15a^8b<5Se@(4$(Em=C_^LL>5AtfU4pa~TJ((n=qnOhFHpnt_SUvX z4U44~`~Mkcr2z7WJsdFTE7AqZ%nX4tlPpkXDmlu`EaCW(F#J>y&)(Rhc5OxBmDIar z%lPFKFkv4dO$f|ZLvuv%HnU+%i*arh?p#dPICf%24V zCL@p1mDNRL4yFFd}+NYPo*>N84 zzaP-_ivrcE$5~Ie@fhVQntnx`f|&yYCD(`(5ok#+e;{Yo^6+Jc2w#8-jhtb7^pKR+ zDDbm<1%6h*4&ss@ftj52F9g0*H~LMd>9>Jx=4)@*!5%7aNsmmX>Gy#6L`L4F=?|`? zt|DH;0`{rSek|QEvwwu1K4ec_#~zvtdIk~DOg$XgBEv_Vav+Cp%9wg3eVHM-o(Jr; zie}6Z*iJ^??F!ylW)oKrj)UiwO*3Aw^|%<41^ik@WPAA{_K*myrD5UCQ*>Y!su*TD zc;7x!F7VZcc{lwkG;T*}#yn>|<0alBzgD&#x=(&~jMg*$xX6DXt=01`^sL|*YZgow#_fjTIXZ^J)-1X1CJgA=)0qs=_r&3x4@3EgP0;ww5puaX9wh^Ma zVBRsx44k)N)`_-B$;2x}*w(^g0W6n_9?246T6oWnD&TFX$Xa+UhkNi0fMve0$^ht_ z_F5^9t9VcSG$6F8+-g*E+u>L#&15ChrL<~1s2OJU2zappJY5=QJ>)UuNonK z*?f7`N9_D+3@O6MmsfD-T|>Mi*m>};qCCulP!v%IZ^{A56#zJ{0l;zf0Gex9KyrET zMh9o`>$vJs;Y$)h;$1a>v+8-RM(e~~^!=jAH&9U05zaSOs0bMl<*;oK9 zTNTcYN5=3VMpL?Kz5v#$7rc_csk+1Xn1)6nJTPa0+pUP*tG{8YgGtDd5kzi6dX*e{0-@f^JR_>ke*AOHeL zY5DxF`*Dc>;H}OQ_oqPma|Ni}>aF7HnZ?bNY8`b6bA%6@!` zltzGUO%(oPD5c>EAP1`|FRd{U7K-##;yIwt1b$&UKieU*I9pHAM={r*KVg{Z$HP!8 z+^u^kmxsc5-zolF<1xs4YA9F)!xvylX(7B+{v#OCQ|>`Lyg@UsJ1BZw&J-6om0!Za z_)8mWi2EM8`Xe5gTp%u32sd_qmaaY{yoK?$WQMp@AeCvW{|!tsL!5&{lok86T(%hE8e6&+8sbj4>94zR zEI3n^xBj{pM`<$^aFjL^pvGRG+5FWvq1Pf0kAiuK$O%s|5+nDeNf{aAiVkGvkoOQ>FXCDHe|2!%Pl0Oadge!Y(2+GX>#);8YxYqYo3KghJMfC z{COS9b-LyS)Q&{$Yjh3Xx|R0u5K8+>oGK z+OCZ7M>K8ii^5fx2nxV(Yg1vFY0%eh;g)3zES)a^x^8VTwzJsmaDBafR}U#m$5F3{ zyO!53vxxJq=QmGTBg($R1(iyga6zR~KeQs1birCG=@IyJeXx^C1~_hA$sotUD~aLu zrn@xLnO89l<6e?VQgC-krXk#EQVGAuq!KKLhdv+YE$x_0GanJ2&E9&ZhwxQrFYl8| zgPgR?@ko}FN}r{f0VPst46dbAx|nAE83!oqwv<{$QTkmmd9nJKP%W;*6FrNc3N`O)EVG&2YDj+>gX zF6rB_(1eP84Bo;?Rq-O-Jy^VD$`I-9J>FX8aZU(zHOu-{q5IW5P6%~1 zUsxE$d#H+5r}@&;!S?6xOY>4LKet7Cvi_3O$I_G4LbeF`#O2f=>4m_{UQ47WJT9%B zCcWSvNv3L=R)?@?1Q-|q0_s9|p7Zy^QISP@mKPp(^sGq7NBQL28X#}W&m{x4T7E8#W+ecu zTYfH+KNoyIgmuy6d%P^$MZO#2SxMg(^XH_jg<_u;t@`L$Yuh3xknix8z?D2ufc~vz zodBQ@`Mw@q@UdtHkK9=ujuU&yw@6rVCEu5~>Kn-S$T9%<*Z{|Z{SH0yU4eVu{VU*{?6%%to50KYBI%I84d>!tw6Tb^a&C$jFD$iR>Fb>SSS zdWS(Y=0YAB+v5U2;}SL3K%XAj-I{fE?u`kRDK!mv~}qR$Ur}e4KFmd z>Oc<51&Ua@^-tA?5XpAyk|6}IiS8Og{F}#A8bX4EUJ-gc4?-U(^dTLSpCaBWU6*Nd z1teO3ujC>>XTS8!cH*7N2WQ?H9T>EFXLS_15$`-c^jWtdf*AF6ZweSO{K2g=cg)#I z#uS8;KkMt>>5_jj%nG&nJWfV;wLP(ojL8%bVSxf7>;&*;4I%Y5>SPFM0_vlfP)1H%{8I12u%$3h7hCBWD4La0UdT$fLNW+;viP%{kXnf z0EpGcw*t_rK3%{ARpJMQT`b2!M|RPKUmA8X9}6AXMSMR150uG~nJySPGSdZ+u@ZK{ z$IQ-5rt5C+W@o0+b$8!lr**pSrvvOvB3(DUpPh-L>we$I&S*4SEoP^0(QF`coq3jK zk2%QBtfJWy%GsH5G~2(7o$;dCkMCk@=b}c#xg0qS@8u>~tK> zez%OBM*JssvD44e><@Rc(@)Xt(+F-d%|3_VD9yg`Haq>I#3 zdO)+k1l-YGnti8=ovx{5r_B}Yv}q4J4Wy*gfe6kQ-6Fed*;ogAo_#h0-$M3TU5yj6uiiQl{b^~yn8UlnQ5X~Cm zc@zx-(d_)UG>7m&8t{s9GHU>wSp(qA8USb3AmGdz1f1D<0cUnzz?q#FaAxNPoY{E+ zXLeq|nVlDKX6FT*+4(wvGW|$%oB_(zDeF0|E_#mli!n8N&ZCageXPv4?bs$(=6NO6 z#>$kI_8Dx~V5|KbwyS#^+hsORe@DytOSsdMdd^He^>UcAd6_bd7c0Ffq z`+iSW9MDma!FCNq)cje?S=8EUXS>>I&I;Sj*=$#J+k0QLUA>D(yX!ftlSg}MIh%== zvm;8{tL5yUT{58O9J!pcmlZdvdd|stDX(Zb=k@zfvR!$trzfz|a@1-8!VfD>iD_uo zbFLaKE47@jIBwBSGsi93Sse4}RW0XR`25A@)Xa8N+NA%o9p$!HpJ1l0&Q~90rWhA} zy;C@usjYvc<+>6tbcumnUZZs&NEX&Ys4&8EFWg3;6uz5V4eIIwqwBhXE$c*Zmand z+gTtqLqgN*_`r+p3=^SuiO@ljjLbu5g`B5Wv0wStn?OLKl>Ql>H8kr z>G>|(sT^fHr6a6#xQ3MuRkP9oKxFkU4ES1KKfgWa04uJu{o{3Js<+;WW2QRCi9A-C zdL{KoRvZ$M`cPk=axEf=75j+5hDCD&k<*rF>sO0f4pFO-lPM7bNX*>-f4v^5ETR??OjD^;bu3_v8s z2;ib&YcD{Ja)sO{HJLmZP9Nk;$cvq~jnn9SGQkf&S7ZWG!X}eRxHw&8QYUYCVjSHtjyF8vKXijXZ*0RZ1C&AJ;#&FUAuH8 zOjbQP*oqn|H0^Z5C75;r(nj0R$AN7&d@qcg=!V}N&;2gD4kBY)n(xOEh4cJ5z;G_0 zZb!;_zGgXhP~gGbq8kmip0DKGGZZwF90z7I-ROzrKRFjK)PU~dYkTki;)l;bdIb+*4l2pM_M_8LJ|-Yu+=>T&_W0xApkvaCK-qhadhL~&E!nj0dgh;5Dgq? z0r&|gaf4Z!Pz~4MP2e!p0*9dn@CQ;jg=$Hk~}4)7EnXjKN{0pJae{YE!# zH4(h8i- zkD{Og$@9{RVes8bD`lba7p9%UG!>@VbmK3gpj?;^2~z?Lb$7cWDE&w{iPSg#$6L=G zA?vvwqxIbJKxmbAH*xs)-AKWS)N=z+Ivu6YqcoP6?yKab`>I9h0+gnpbTvwIMd^Sj z9o#SNQ*4QQq0dMaK#VfC+y-4xO z?C~3^c!ls3)zq{#qDhDI*@aK8r9-bWIz#YFx z%N6&*ww+HX<739?d803FUZ4cje1W{^fXZg_of2T?=mr6WG;b1*DIib(0xf02^l@)J z?zswXx^RcGd@$?^Bu>= zDDIiNl<@&X%loD4rAdm4>8KACwH`OMqFS1Pn4q4x^4b_h?&P2Zo18c(fqcSM-!uw~ zLKT-Vs%@H7q)eGiH%;YN)5VEydV*sgOhHPv^RVN%MR6W%P7GG2cor(o-_lJ_jnOyF z`FHwoBobsctAIuLVEmNy6J3Z(vOOvhkpY$eYRb3 zSwuHoz7|@oIM?$ubLYDD(tj(4e57qEhV-`5UlfC1ney0gbklX)m(M7V4RSb($KdAX zUgfcF?)ouHt9738*a^Ews`6NF^ebQJn|isP0!e~Sqno}fRH&c*nRJDEG>>>raT+dB zoDj_~g^JTny6L|->axyDSDf%c)LL!J168mA=iln@-UG!o+%Q@6NH>^^Sj3`*R*~*lU)uFa;1>{glrb_ zA6dY73ymz`6p{aH7IKo1O+t=$B#dR(g2Y~=e|L%W?=BCX=G`Td!@B~kY&o<0+t_8+ z`T^bSB=W8)-R#U?uWxqc3Ei8YL_)W|`Q=+aw^(z0RM#u|=50J-dvhhP+kBU9{^0t| z4_R}MtZhC;w9Uv{-Q3gSwU5~<|LHk~+0waB+c>)U9)D7?)`=!2)6M^C8PU#KrqRtm zTFE=i_AK4}kk_&%(#`+l{sgm9~;)v%uFx9Ap;TdWVs+LqBgxGnxK&LaAjptji= z%;vCvwg`|8MB5UMZS7ag&RUIM^0f3bbW4HoRwUBP!*1}ih!GCoq_hqsjSmS$o^K6$vQoSMz&I9o)1`$%x0H+qx|maTTb)L z@&Cu(x5q_YrT?GtGc(LM`T@K`mf@C#TicIOQBd12I1YlEp|%COEsi+K7&o~XR2t@u zn{x36UcpXd`DBV*0gXZs}s7jI$<8)+MQj=Sn`j*q3-QWBA`WBd<03AY?bJ%yX3UT5?-q8l&DapK zAN$gWV~aV~heM1U+sff=3HVtsg2m(@;NDJ` z3&^tR3x`c@J#6KaMH<-6nHI6J*B5LL*Wzzb7CnlcO4FiYWDV6^!)e$euVo10{)?>S zE(MX%R?8lOXo2b0qKW)&=l38=(HtR^Mb907c8b#{9q@&-ua%&_$f5X81(&0f=}n>1 zFNz^bFX_#&rCjtf$6n#sBrd%_mGvvwATCmcEK-DN(UBuBPILMy z`Iy%!r+*YtE=^N7mjL~}O#i`fr*#6>r`#K(#9SDiQqBj->?5TBkR=*9x}v+Y`A z9{bA;%B$|o8BgZSd`{y*T)tS${3#{t;){E^6zVH@E=-IZ6Z0k9!` zb^ciKyz=To)0%6_t1HGz)09_Nk30XE^6FNi@`4?C?`8{4w&lCbmZF!NiQZ2vl=tU( z+{TM3*UP0$bskaqWjS%_0h-~;s|tDw)2qh_=CUv2^kYg?oHm$oo?$L9=PtW2zhF*(O?aE~8Ltc1jz9S?bg76ce6ZnLaexAyta?;OJxOCDG%YPqzpsolN5R_hoG(%aA*)h|G=U1IW&bsE#yC2 zjJWv}cQ%jTYw*61-`SotB@OR|oc0REUl#%08kCeb5z5BzZ5-MzI7KKi_iXS-kK09n z2g6lzeilG;g17VUdE$#vL8@MMB zI*MT1jfAw_j|#Ckfc$4#5I2?L&NKk>Pj#sp5uoCjAjCv-j1OX7;xy^b%HmB3-_1Ff zApBhpuSS@IOjjn`12N~|TJnVXO6{%>gn19OhtuU$NvBH~KhfIB_>&07YnrP5N5zQM z(ra!8<+b}M;(87NUgr+vWA2tdZeUPPDt<&bcEE8?c|#QOdJ6=I2a?wjypZ z6TFyU+p>k}wUy*MU5uE`6myzu@O%(8&+~pqMW@rp&7|=5T5-MnitzR$;;-!`;;$8v z|Gbg>=RH33P84PyHrlz%ZchP59(=AbtFQS{UA|U)f?$VrTx#E&VdLFhH6WREZ{?MA$86NmGnh4~)7&5U%oEOnLyGGy5q^qgX$}m$e`A&(n1PjjS7y#<~$XtQ(QXvB|6( zVITLvd&<(}%CZFKxAOWcOP9ggv3`T$X(O9J*29={qqKB2BJyEDXIfgg&hT_9-gY~g zmcGB*Fi8dLOkAdIRF>Al=fxJKixCb+!f-Wtg<+BoR;b&hr5}-b-y|3nJ_U>0iLj1+ z!!W6l^RUC-g@`&T(ifc;G8pb6NfjG7lcAf!#u#-$a1f$ykk6c5*alFB(*bF74K8A51 zCZM#s+GGgufv>Rgbd14lr#f6Vh>Z3EK+3Y;kzrhbYh}Y%hH-ny^3uE)>Ak4%0T#S@ zD62rm73^sk0z$~7Frd7;YQDjoS9N@V!Bng)7-|?dh0GUCC!}TG%4e`t^shR)-!RUn zVpED?Y-e@tB*WN7%EbRl?amZ~9|-o}TV3vB@bkPO-@oVggUZ%E7D#cL7Lcp|6b;7_DwrxDzU zpqX$WdVCq1&Z*dFE{UIzEW>mzg?iX!AM;vsv03_qX}ONpnxSW;UrH6IzIPPWqBW7WnSS`W(lo3m*2-B$2lasqUoS+6pV$cb)!Nn8ei6pno`m9m~K>H zs?KY1md+~`zxntrfGeqlDU(`N4O1yKu2#0{)UFRH%O8GNS?)DhSw7|wWqIIlOv|6C zu6jVHo^OA*mrk8rT~nr0FRm_sNT*J#DITCxXIGw+bn4=&QCx4djNL|Z%u%0L9Q^%({ygrX;r&)THR)y*46&*MV;2|@JgZ64yt)$ zs7~uyUFE0K8mp_f=(N6Dbz1-G^55#TL4`W)ld9&aXc7h)$af zY>N>CBRy?CuofPlny=H&MCxLsE-lb$_f{PX(`m~AR|B}?J9OGpfQ7M@wiy(3ZqYe` zCr;pr6L{hTt3;*#HNQI3aH*aK$MN zu-Smg2TUQtiV;_e@Vy9!^~*51;|l2+8w}F&Iiz0Py@gf0Ye*Nldk5`kd_6!~9!Z-T zU+Y-A+>@1+r$BRDxI)H}VOk!yMl*eev>ao9@9xs_rS^9Z37j1C$|Iur%6iev~y2eEdu4HSr8MYY@C&`d9`ofNK_UQiMBR{{C^pdF%5J#^3HjwaYI z$Q^suh~9^#<(t-tKESbUqv#VaEiXzFm-f0U%m17qnkP!jE0%~}OQhwsholcQ)6>#L z@5iL&A7qK%FH6hM<%vF)e9_0YQuLu)-=XL=QS`8_5OVX$v0us^fn!x`l;xj7rQ5Xp zAFTGCM)VronK?t4C#Q&5*Ah}F8~s~p`AxnZehA+V@6WfxLzAaW^fcgJaJEWdYXPV2 z(h4U+B4@iQD|(V7)#)$N3U{&xSn(h~A7f*L4mL&z#el9EWjnI)hAOO4)*rFzn>4mMqiXcYzlCO{P_2p8H32MWxAjmTrq>S)$^&yt!Z2{$y`_+HGozd)qZP8P4Vx<3fo86xq1uPw5YN6^@rt-8PbYe(mK3c&Xy~oFvbymyts7Jq!oEYr0Ya! z#U`e+fy&<60CsF+cChKiia+DVl=o^UKB$@Q0}V;J!v~@~jW4&>6*r;zFM5YdsZL}F z(4(I12wDiMzKKxkuW-=|*qY#dDYYl8(oCtnF%8#*){wbM?@+QV=q_sBIII~Hi%O9@ zJYmBn+FG=x)CbuXW)Im0q&~(JbsQbxtC@bBIMyo(b}=n%7t=tTz3NV!y{a1vyO`9G zS|v4r?P6v~sngglW}=h|GpN+(`zWcA(7@D8&*`hACb4xJTgf;bXZsj7&NJk*g~gr^ zM-0_WPhv}<0aEHxBu{5+Id`DX2clgJ#}9~(sYvER;qwxDTCBd+%0#${+FX7ntj zE~J-WoHl#EzM?JU@Y+9Xri14n)J?KzrkAiSN+(-qbWoB@m!#ANwiYQuxjvSa)Niy( zn#|3_mkpd26LXwvsQL8H=W?Y{DY_40a}-ZD_sD02?o3-Sn|pAH^n0oHoddahaNba9U8T||9BXe$gdM4uMsM=|3_sr2^{y+SD6^+HP5hKV}5 zhL@$Z`>1QU?&Yeac~HT%pGawsA5CZ$U)Kv$8aHz787XZHp^@{bdq^A0&zZPSR!Lh# zZ82>nbqub-9G*`H`Cpw+g}iE^s&aKe?yP;#Nx1k?*K#Ems7q@%rij)?#eEF zs+mIeqYqF0T%LCLxoYQfTWx{PxjZPS=&X{iX?XZ;)r?bhOZCp>`nuO1a4z?3 zl)g~?QO7nhHnxeG$u=WXU^t+eh{KRS)=cz-HH=8J!5T)Sm607vS~A(8q}kbuWVHmd z6d0kT)mK!#B+^RBU?go1o6h7@!qiqaDltgu{q1i)D$+a|gNiW(auQ!o;#d9tC6S&( zrV#0QoTQ2DP|^nwO!|C+NiQG^jI>S|ABgnS71{&Cr1VGEibYB>c;8u}L|0UY}AU5Dow>jGV1u!m&m(nM|LS>1RKAEjg5IGeV zCb+;OvoJ@yzYN%p!{~)APBL(6Q)clB?fx@T`m;b(0|4CqkpX3tOcd=;OT@~dQu?ez z`fTlfsFP#{Bfh9&mri6hBd5bs`W%oeN$IgU+5@iZwFle?@_>#FRi;Vl32YPM&K4eQ z5|nkSx++OzH4#&?TG&2>jdrrw;3um9WqZl~{zdJ9JupDh9*FI!q%ZHMr024ANKh{& zeG8klOwlOmZy>eyU^cOFr3Gx%9+*j0u*yhPuqu#kQl@bJp=1PeAQ?ny4}dFwL*jka z<;%1O3aBDiBFGN&D6y)Tn!+mBout=M6IcnxeN0VYWijkU#Hu}1dn@y~_CBFZRu*!G zaZ>tMHQ9{<%BAUTRqt-n9%!KCt3H*|Z*am6N(ePnR68yYL4{|jK(+Ru3$y94l+lBW zH$ckhMMhCsgP1Qv$dc!vJC}Ee#+2d4Rx$Zhii5|A{DTctu7mkUk}qlxa#6A@RD!G| z4&Ph*`V-oNWj&RQhx;lSUS!^rmBZMInR6v#NDkC37Qf2G`epRL`pjEJF zQf2z(s4|1uj%6|1vG}k*nf+eupU-|z_S@L+k)ztGcQR!Jk_F0ES5r^fl<^$fra+@I zOtp0oS)go<<&Y$Pra%^?+L}ri6mA?umlbZDC1uPe^}~z^RzLJ)W04#-7D+uC*~7VX zvHiU_oNLJRM)!0cKl%Qz$!|*;37|zcnQlPSP2MeKyu`1q{JLGrNa0tyD@`|fqm;3n zUmNqpm$;Dp;z=nZGhKWMJCBPGNg257=i+>_{k~|)*USjQ{pm=*X`}ARZc@fJ8iKwg z%YWUINthAno{XSb%a<)Q5PjK5#)Y#b73B3KK9<$|%F|sF{9+OwXscR21E(8`D5^!lc=w zR5zxK4E8T}!hT;jCKw2HW5T73Dwt{N#)QxWF{6$Oez_A?7xT#=qQ3{lJ?RfycioeL z%KauAuxxmT>9scxkJW9Hg3ReVyi`e zA2Pk@Ur0O08Q0kK!JlIz*l%Hfs6x;VVl7{_$s9iiH@2>~+p7rwn61XDLu-{)$0S){8d8A>#Kbth{_b+BVv7}GG zKZ!K#_eYS1{eE1_f)HHefLww>g-lfJ_hl-e=f4kX#Gr@u{P(k-|9;l<-}j!Bc^?x3 z;R;BAlK(y^`R{|0|2|go-^WV+`&h|;A2gU^8`EwaZx^tiSHd`s-ffm1#=miYGy5$^J;8B%71Jz{{oDwv<6rOap!_oGyq$6@^Q)UXQ zkGqmScegs27lT%x2 z6Y}I#`{bwtO6G;yalUe@3*#HaTIFsP;aBCAMYw8t$Yyz^2U9$VDR!Aq__31NTG?%z zoa)Ew_4%w`pUt|~HLPpx(BCd+zn^k`q&Qdu9Y1lf?4~>D7C8h9F=pLeNU~bXBmB;ncvWJes)trt<<(1BHI@HPdFKTlOARf~Z&l`GoBfV^z+ z;3;LrHSwShUyEp&k|A1PNOnxuT|d63#+0S0-g`#Z-Nq`;*xks2MR?Z3ReR6sx(6Mb zdr;TC2=7mjVrO?~NL1(ygSwltMv->3A;=`EO<4h~GJR9Zn#zGgrK}LX6jgW9zn3nl zk)M0`nL--KxDyZ>$VsVn%g*VN@|6*P(sYV4rxgbEU2#4$_uZIYkWb;n$4FNmehxQA}<_N|2r3Xq3-bNOY$qpd>>&TbbEuL zox2ArvO4I;dO{C35K6e1c5N3>z4d^4E&Ro0(6}vt-vfHK^p3b9H>82i?Ca2#-R1*r zTD%u_KsOe;#XF0j6>En&Y%_FW8=welr#rZ_=2wUh#eFI3RC9{p3Xb%AQM6hk+XqFmNO*j4D=u@`~sp=Q>+6Qv&lGm)G6 zzG9Vr$e`u=UL~aQHpGpdy*`z)(i9amJsWEL-q80dvi~(*@7kdF{8jIHMS(_}OUj86 zVR}zUWUPA6Q-@bRu6J=eq#dunKh$8#%0esIxL)sKTcLNEsYpS3mnnqBB``zp;y+fk z*_8D<>6LnxF4ub&k+!O5D!+%~eacq7XW$CGr$2-TdQYF)UEk;*Ee#s+M-HcKyuOWEB? ztxyg*ymF9Uo`USgO4%*|fD-Cvy)5dM6zh9-X6t)GOSfkWWt-iHZXhyDu|G3JZwNdw zey9GCP`Xt|S4hgAIvnwy{m2Tv({H8hC)VhlaQl&y8z&oH0}WRvU6%flZc?^iJ=7Ej zch;Py^=1SSYYxG(W`7CJ)YeQ-`&CzQY7wWZf zQuZI~pU>56gYA^qFEH{WC41IVz1C~27;nmsuJDP_4=$~FBTTPV;hv%n`x`U$T9>W* z0VkyFm+EJX)eo=}%QeTT#b|13cTCW0%Its5)jQ3so$`Txu&8_fBmKk8bO+Xe{EEgf zy)-jRFZmvZ@}uNMcRY1#WktwFVaiT{+tEkKZXj*GDhny!WnZB2sA`y$eS-$IszJwQ zJt3DhlGb5mIjaZmC1t?M?Gi2mB+bFaM@MazE7`QEay})g3}G-y%2D@Ia=KTzU6m`! z38-Q(p{~dwpo%ytryr$PhG17fR(eZ0_tj7DhEt(gsZT|z4t0`h-=St<%6aI-BSYmw z;Zn}f!)|BgL%)%7Mi52Spkfqh1Xc}@a()j@K~v5+Y_iH#`HI*sS7o1=6eU;OkaB{7 z;@P_1S#nkCIoBe&awgvApY;1qt{6mFRCJSaz>1vZf2UlEKcp-|J|wg!rJP*Ks4BR! zU#(o(NysZ9DcderHnU11=w8Qn>agZw&IZ=#{f?%Pw%<|KvOJ0* z1G<2GXUiz_$^PcL^iKI`Xx(dR^3mX`$Dfdo zG;wS_`%kgo&bjzyH8D_DazpxiI-=zMg>w3boyu^jlylkx z?IqtM3p|}J2G_o}-RYu#9oYQ`n!(e&)+3r9{Q&4HW+6DJtDpe3Qua8Vb2yPmtGH zS?PKigW=9 zQz-OoAwitYW`8;RalKz}W%a9MLccm~qWp@odMzz;M+Y-5ZW-4)Cddnw)rCjYUkI}c zg|~5dn@YjBIs8cnqkIwP+Alr^q&kk)8M zb9O!x!uzjl9xy0ten-`It%(?TjkSQzyK@z|P{?^b`vqmxK7>-X`!H5LVf5~xkTXpb zd#0Z7oYe!PFQq)=i1X(-FNB&Y^mHddozW2k*08?Mu%^=kCW~RsRfnet<3wrAjM~|+ zIJK2gvbJJE-?oR73;~KTM(l-$zc|DB)ujlnu*6gP9<;+v4DU3`$Hab}huL)9jDr?@Q zjE8TR)+kiN!}AHp=$VxG9JljxE?l%43ODONGCnIt6(h%wK1Q-psa&dLE>$W~cDe8Gp}e;@Lk7=E1SAAKc2|6j^lXDY?0rg2bQU)z_VxwP$h)CI2{K=s@nM5MF_)AQ^Q z3OVb+x?raa%36nt-W4MI8h2-BgBZe(p}EbRb)o#va3RicgFj=ToX!MS4LB~$IA`Po zF(OHr)&|%QjuoTJYq!RU(RTWn%yfX8Y3)>kzOE-M*WKv(aWRE_%vxh-!K}4^;@leQ z!Ps#@DCa!>(3+@LylY4)f9(R`PhmVm7>^IrRL*pia9J~fr-)1HN~LM{K#F(ZDr$CL z5+fvO?P;#|4n`8hNJ3cAi`B9c%lLAL7QPaqqp|yV=W`MD>c2amn_1tx#rd2U5Pb~y zB}wtte#25cFI*kq{8kNL8?cAecqaR18zvQRFii4O`YbU#WzRB9hI-IbC8P~ADQJUX zVn?oF^5Vn!|7DnzPueGwyy%4br*2B?bfk1oU&DC!x{SSsajhG$w!pFD4ORVn8%zxqmSlr}XvN3X2EX}LM@$C4kcta0 z8~j}>E^IXT*`Z1T+Im5iq!X$nH>7nBKqDj>8X+%9>jpt1Bnhv3@%o;$ZZKZ=KrzGz zL8lOOT3R;*K@A8Bfy+ZMG+Rs;rFFkUa5I7;;ELevh=Qhy>8iBOh^STs#ln@q)x$ON zlwmPX87wqSSm#YshIRftSy1t0VGvIx{2H-u{YTyoUiVa^_6yZSA6Z!!dcMaY)kLq# zGY52PoP@hrr}nDdoT^jfWb=1)>cGmgFY44d@qDgM9a4E#ty5d7PDbg}5mj|>>(uU5 zCk#3@j*RossXcjn$cLYSB$K?%#~SvkuiaLnnLZOI#%MxmXI(VsSAMWrG-vY`PasWT z)>(0wjocAYThvGH@LD07w@d4mlYT(6e|2SzX8L{7x=j1rL7LInrf7C0Bkf++17o^ zyKEu6vzAZ0VORMA?W@TgVk7B>YXldC&QsKccF>0LwNT#I3gmsaB*e-cH4wzg9obxz ze^5cE2U9_N-H_JZ@StLXi;w0^LxtbdHg&GjQm=zBScw?cD(&_oD(xA5ktFK>hv^Y(8(pTp;%o)5w8 z)qH%)LvlwR@3`4XRvcQ6(N`6&!)B>yYvS!$-qE{WOdVER5^rcOk=Fl#H#B`*mGv`d zL(}OKY5iQ@&}={fUf^B(Gt&AOc|)_{i0VFZxu#~z7s8Ol+v4-Ft*d#`PF1jeAp`f8 z)^B8B7{{zHWR$;=)|`QZ#Me#9)A&^Fc?H94ED~U69#+maf#~>V{ce9hHq_I|97g^>o}!qx@mp8 zz@08N8_$TZ%PF_73rP9k>k{f`ue;Lb>2(z!tb2v(=UNjn{8~N5UMr{AYo#2^$J1Wp z<9o01cIP$T7zIso z3-94h;XPb`-oxcH7~4GvzFkF-u0y3fBfVZdDdml%kgH{60` z5e_sItCH~49>7_1wc3NcJ-O-%pG((KUG}o}ppX4M?8kXqRN=RZy{vWx0zdOe~!!Bd5~YefK&zhCa*-6&3zwJp)gZHr&VM2e1bLe_KkxodAA>xwDvoOL{3A9%n*#OB;SqgnsMJ zgl?BMjAPaqqzw~@hzA^mI*O6Q#m$g51T$-vNE@DI7*y_tKQcPRMH9MP`EJUF1m;m1 zCj!1D3_Fwe6+(Gmfp?C(KPGKR>!)m3$w5i*ZDL<4?G!_9ialjRDxHpE7;?Dx4V@u` zL+HFmogvoVQ_>kCcw@nbw+nF4?}ptKwm0O3wbF+DAN>A;_y-QjTPf~?Nc(qs_k6Oo zch6=?c605fJiSXY%cEoMZ?x*~pF(%GyXXq^F2ke^%2vILXWa;^{?TG(M6~|Vf|}if z^|H?zz3geX*6W>pm*|}#+x(l}**#6~>_Ueb$Ty`8HIw63DI1Q}?&_zP8||E%6etECNR3iR0e(#t6%;@!}Mz(D1Jhx9$0l>6HBJx`HbwP!sPI_o>-kuXpky zGyO-5hgMeUAMqqv>mxdt&tIuq+*hwnWnpd}YdvdrkJM{gq35h@=nQypuCh@RXs|0A2aqVR`nb7wjIz;-%(bgaNFKX!Y#(Lg zY!bPa#Y-Dwt3v);E}J23OsEgpCYNoLHYSmzv}~BPaZy#Bzg!kv+dD_DvK<+rzPISQo?9<+39o z^TjN!DWL07H$`wrD2H%f=WXm~VMU`W(RQJp=xpTm_JuN93xB~Pv9AZnFTRI5E%by!VhFH*{g17T`&KGf#58RSl0wViXQ) z`aq0osUQ5Z7}XK_=}!n9i2^d3p0Ovg`5o+eEB6aUmr*;IzBT9r+8hhGa7 zMl`^W$(C%>Cumw<5fiQ_Q=zV>K#Hbpx&eAko3$kA+YH&#=7(9N5JC*Pc8VBu%})Na zD)OIcCI1=rpKgWyU}w#nr-ivpm^OQH%pitxVY!_6k zm`2*fBO;J;CW$3B#}ZBFSpIn~)-3f=Hor=mprab7+{5=$xkrc4vT}F{+bovmiBW%% zHs_w{*G-Hn%Kc{012M|xy)5in%wn!-(q@O4i^a$7Erh+jgBaMxZMDsd@QxlrRW+K6f0pw; zThG9$^s_RkZe11aU%D^$$09V8kXHwD?UsBE~NWK^3!40(8?Y4aJPznxoU zyBoF2b2fvr`7>ru2)_s6o$;LWr3+I>xPXbbj;R}*&pjk<(Ko8AozMC4>toUu=kq<9 zoX-XFoHUK+q|NIL6Il$yA_Yh+OgE%0eIZilfEXbOZVy}o97!Kq`J*4iAH5fM2FJNG zs3(7D6WgaWa|bZxygouT(S;Yf#rA|u(fg#d#hn(IGyF+&srM-?QB~n~n(1zN5VKp& zbPrNA@0-M{DtBI0>3QAM$*ZasUR5>ls*2UIukGd7Vvfz{*i_>EH5=7~>r2v>$9Pqh zMgT6f=GDwFl6F>KPv}00J|3)?oKMR_=OkKNZQ;dHtDTUv*ANoj9$KJod777toz)2| zMQ_%m-7OI9N8&I{Tt^LXhvgqDt5QiW;D8eTfq5ZCvmO?wEW+4fOFEZ-fhPb91GTV+$0iBTaMHY)@i2KoR~C2Go4q1 zSAB`%tDelh#mv6>g!#IK%71+d@#4B4Jq?a^W&zWagOZ!JoMo9u3NzP8X|D}p=DIR- zSzWxnm0+A%1NbU-2DY@(qAQI~VHatGXswtAAx)+yFS;ze2rFTKKKx=8Eta$gYM|Pv zJy74xv{h7Gcuuvo701%4HsV;Jtph9cn^haJV8L}0)LwK?4wbh4mi1}eS)WG7em(iq zItt`f^S8>YY+K}2GimLx)r(gaLA1+CIooVYcmHKXS z#u;gA#4+n!IU}NG_x*CFpkqB(xg8n1Mts*q^W2+brLECam0J_|nav^WpcJm5Nm~^tEx?@9RJeoYqLs&KE?S29W?>23XXE7~roy(m^ylTWX8Y{- z@U& zZJWRo;bxw4cJlNS2D{KAs9`Mv94&-NZMd{8n5VOXX64(S<^BB>;F`sA$s`sf=~~{TX!q zh4abhJPU}5(`iJ->6CiS5HXHN31b6cF*Z@M(e6wIQ)Dir5bv2ZNAR9c)0fd5Y!1&m zJ8k)dt&Ka-aTbCc-ccXAO_-0<%*!0a+g*MJW!nLUn?j>>+e``_u~FJqOS7+SM{7fi zgt;b93|}p6JN1k$OCuF(^nA&*-GwG8=PWdZI2X)*fA)J3VdsVrJI;CV zPEE+M?t#wdf-%N-u%VeB#&ykfSCVi~=h4SCl}86(9vz18=-|$ygPz8e_6{EDOIZS) zNBLhpBW=H*cyraxLsA+KNy$7U*?33_nD1JzER=Dcb{d%J!*!mF<6`GY_|(qB9S-Hj?(*#`&bZwlN6|(LL$IqemK#9x3MC z4=UT|!2n#&@I0K|CTE5QL{*u#Cv1~5ath^)G{~J6L++Fexl=3;4m#@Uwx{$`wzJ^p z<_psHoh;*+E^XgU{r?Rcz1|36zlHt5G&*2!wKx> zW73XM1a@n;F3?_vEM~|&4k_i3 zLJrBMkZ&30TbA>Dn?q^u=d_(1-pOe@Ic+DW?c}tb**qNiQ1NyI5S~sDKbMeqg7_%H z-wER5iTH0)DZGPGb>y+1k#sPUTMiB+^QgrnbpX`{1_P=BLe&9OyBL*@rAQVQ6hRgF zw61%seQuSmdkG&6+e}m39sgaIW|8fx+T3k&b!*MsWVsyEkrQ(D3~9%n{-zxVCPW3B zcG!80;WON-(aZ1npl+GJTz=}vqIGh4y++ybIrrLw`Y1cvsY@>VTH2}p`=i_Ba$lB; ztd@4_2t(NtX{US=2f>w#ny6`V*>-7XA4O%C%VyHhP(FmZ`Erly`zqx_tXXwPQ1bE` zlK7UFRpH?Mip6wlTm_3(4yEz10!hTq`>Kw_%ZL2>Dmxz`EvD*ZQskjIwio%_GjJvZL|PUvQ<6QF{(|+ayLOlcwzGY?|s`ZRTF2lzWkK z?nRDsFVe^pVI3R6u}}0E){?9g1-kJetPoLjX$$t(k>^a>>+7aPo@k>dh@rDLSWzQ;b##) z`Md2v5_hHzNgia{kYq3?;atAqT)yF4zHwtM>Qk&mU3NHXid^2>63~_9402m1JtWOyhbEQg;2p-g~oL_J*`;wt`tr1@ewKN^>$S&UQtf zcMVLz*Fwec*d&4=5uJGWz&RGs5_$+;A+`*>;B60UncuJxJx z4CZGN*EImrxa!KdmK*t5&(997TL5G+0Au)uF?_=qzDeRr^WrB@-fy{Zos{sifS=p} ze#0=|Fw8eDLGs^~U27}%Jug@Lki@atojWS-Y^qh%sZ@2+l(?#uSp;1Ru;_%es}THs z9qw?|i{R@`Y1iL1%C2+Wm0ka!^yMyGs#qqym7lSsWLiC?N^X>^{kSsnSg5N=yKd5~ z^K}(>bcdzaT@L5JDVOc#`FaUaP+=jQ6(J;sEpI(G>!e)XQsuEwE)S}tDYai^$eVI` zXyxp8RwAFXYZ}p7eS@&JJ_q*Y9KElDV&p72p4rh{5o?h_dI@_{|ot z?|x(G_mTe#kN5W9Fh2jT->3Q4>nE6Q!$o$%E&3O51zm7&{R_D2F1XV_1Gi_#!%Sb! zAqSnm^>X~aKV}ez-=^#87mo06`S<7Pf1ud`H-dkE9xk~HZsos#d!q}k{Ab|aXHI&h z@Xs9&N4WStTz5y@uijrd@GA#?<-o5T_>}{{a^P1E{K|n}Iq)k7e&xWg9Qc(3zjEML z4*bf2UpeqA2Y%(iuN?T51NY*92Lji?EtQ1WhhG)k?LWaKm*%T?-ej7iwL1q22&D${^GgaL@J- z>PEPkJplu!lZAQ#+y%JbItz6I+&ge>a9%FJ0e1rKK3Ach0QVBypWzzdM)VTuUcH4n z04@&h5Zn#8p?!op7p@F0sjpC%z}58?x4wd_=qGNu^%u7y;a-8ugxdgD)L*Dh^cU){ z;N0AVYAoC{aFKA?aDRiVgZl)o6;8Mdl??X?oDbY&xIe;0!o38S3b)B!NPot!9j*cH z0$dwhxBG;u7n}#&6L6#8CcuTjMZhJ&rNZUG{ROTH?ljy*xEpXT1B7ZI+~aVQ;r;{{ z4Yvqx6Epn&EE1^&2Rp2?IeF+&s7>xYU6{y$;UhL7`5E zYlHifhftq|n>t9SkHSs*jkuKnH|imAYX=b#+#ZYu0ZnG3kE*qGVo;000t6gJs> z57>#*5{89^8I4}nxVV_Ou<)4K){#-x_;|})t9b$Ed>dluEK9sCJS;vT&K5m4D17oX zqzaF)#DyiqS!@aMLE&aI!hD&03ra)*=HYW}(X-9r4!XK9jX?Rp9^VK{eApaII2bf_ z@-(lop|;uPJ2SiumKbmSXJ9_InD{V|W*srtngGmQRCg6?XjoXZH7Ox1HqI7hH9N?- z9reE{kGC}rS)p2((tiij2up0N6}$OVnOe7tRL^na#|w&-|kTmrQ* zqcJS(#W+iBm@UB?7X${H&Hn*fyYNT-M=B)F8gF|E?d0F5Ya~!v;}-lU8n16y7#thGQ6b;m7oP-vu%v zG5TKvfm@Ctu&bZH8_C`6!aF`WKEWCl7T(2`AE(Cn6m5;e2zu{me4OzZO|7%TUbI?X zz(AZBonRau78YlXv|`BN&>thbSBkq>;~k!m9Bai${QcnSD2n&b@W~N=FXlL6?ke$! z@QApW=osVB+XW|{8jW+}VxqzlFyMwIm?Q6KYWD>9O^CS-J9Q$>1B_mrG&aT-oe(t5 zh&hy3n9uCQI7@;pCORky{l7Q+gC<29&2V1)hq(s*-I*JR!8F6s8F<~<3v|sa?kdbk zn)`$=NC*lKrvWBvR(t~L5k~eu-r=aVIP<(a;(WWl(-=&Z zco4g5@?^fN?uapt$<~kNEwPdJP7aiJ&AoW2{PC>qnXqTP!k#gPnK2|!8xe-?(U%!B zEy3s=Z%x2l>BX=JOZ4nWYg}-^f(6XZX$d2vV-ir&FH&KqCHP{{jj~09H(c^(o^{M- zhlWQEi-}DLn*4KK9)N|7y|_*#6-rtXpM7t z6ESau*D@<64g^@j!!cXOZ=@v-BwOObhsU7;W5^#LPG6GX@fnl!aPq!njf)ZJf@qO} z--y{Z;0lk7iAN;=I09m#=b-J0u-VqgF!Zfd<#>s-%$|)vN73hWrJ55SodCk;%!$NY z0KYMq*5U_)(QN(!;yLkF>kG&hI4z)gPJ$KLAChp-c0vNy6Xf!JP#$x?J zGbqQ)i(D%d{9>Zb0m#3$cuPV;vIyfZ-BH#^$_gQloT4n@5n%~ncUZhNat@lhV|9gJ zYZUq}$8S`u<2M$V5FQn?0NKKW590Q3SS&KK#zh-PAmjyRQB;(L(j`XoucOfPfnQ4;=+(Bua72H8``hLH!D!e4dI%*BMSIp?GLMhOq9{6H4uZK> z=s5>13H8pbpex5iS=Sf0iu0Cj>5 zbr_w9rsSxsL`MRQ#kOd$qpLZDO@C&AS6KAW;pqAui_>2=Me%YBkN>;GcMbLbqa6Qd zWZsq2FP!s?NQ#e5OyH%|okObe7bf>koP#dp$Mc6@m=rlvt(k+v|2G-?#z(}w7-fl0 zb}Y=fXN*mZvxY6O#U&)-)U@x#A-4yRd-l~vU(jGqLNE3-x2VZz^6?}$2>X!~B`6BR zBaMX9SV}R~I;U&s`0*HZ?<~CV(R2xMx)?Q)qE zkvJ%NPE6NG|4Wh%2SXgWK4+Vq5b?_?9y$^!9h4@;LiS{x9S~`m`{RQDl5#ud2~UTk z&p{UyZZ*&QG2?Hq+cC<#vhEZVT{-|5#=Nkw4Ze3$R3vh6El+q3*aR@!cx3)1 zX>E?g5RE%_5HJO%NuT+jAs9Mrwqv`*u_}VN<94ba=X)<1hY=eV7{X|6f(1hyiPf+u zg`6YIh9MNWCPXd>n-vq2fEg?1=-7x16FoGd#lC5fk5!q+u;?FBZFzyJtfhaXr0YrUDFt2oaM!^ z7?2E!nOPiD`I=&)@s=2#z)+JQ-;2i}|Dj4P>Q`W+QMxyq-b}Iys**#R>k6k zrQOtG-*?y_74IwscsIs{ou$NBaYls=n20bm0L)o_j6nPq$tupEAlULs?`9P1)kDRH z`e9;a&*7rZ)m!Z7GeR8fH&Ptz?;~Ct;4796REyMIfbSL?X-jZSzV5<#@3|Tl`erj8 z;;q!z7*^~m2$jZpkenr1=$ z4v)w4E;>XGDyjKzMC&whsoNb~7rV7;aXG!O5Lz!1x+icAgXg=|M)9U*Xq*)&c5VHO@bJ_EQisVIm#nkaIbLC(;g? z*onLwKJ7zP-G&Fs1>$uL#6;0^yK-g0h@;_|XU!64HEOXV4ZG2zErXto7qLSvvQtom zZs9Sp$q*cbLHdfkX$vMwe5K(&?YBizQAY#3mZlPI}>UiOWurqCpoWKE{9+0mqo zVwYjsQ{!xwxzXrRY~ko3xD>wAViM!RtxwI3qsHnz?WsxFFa7bSZ?jE?%+-g|a>K+d z9{+&x@sEqMBx6N&Tf#Ry)?$kbn#Yo~$N?-!S$r-S2%brd zjkNk;_aT@hdUxd~KI?(H&hL%_8F@uxjSjFyTFs6@#rqll=9AIZ3k*XMk5W2{NNB|N z9yACO{L^vMo*5T3*-^}{R7iBPhj$l%e>{36>ROo*T@d0FGQY42nRhdK-^oB)IL;EA zd)%>(1XpH>BY-c-fclBBXVJ5M2R-){LJ>ujop%-*(RfGSb*aaQ?_2kZo@%kClIo3n zC|c}|g;-x)?cwMxpR&Z#;#I7dBgJlK$2Wy>!a5f%n7hs&j}NDu)#B_s)QK6VTB#p@ zhV%>Y>3r1Fds0ly3yHB~6T@EsZ}4gTxPnEFj9%^yB*oZqwx{n@79&lxCY!}x84YFL zJnmErL!$Pa0+% z8suQqw1i~vK%8_=zE^OX3qIK0ly=8ws0Ya_h&PlcbC*mV>u`s60ksfpLktgyiJT2p z2lU6hG#6Pe-lUV^U@U)l=6OE{TjuH;fbNViJ{{-CC^!ZqJx{Yn{D7GNFw9L2{@Gls zT-74437Su0#~0MA{E(LTZWL=>Xb77#`}W*Hyxt3KdNp;eUR})y?aI-2UvV|Y-W8TC z3ITt1E%kIp94d~D3&?`O>bhP(G~N@vOro;)IPq<7^mYJ6kDO1>r7J0$A2$*-)}Id0 z02zioqM>u*eV>MmlmreKBN}?yX3e4syt8c^jZZtiRL8h@5Ls4aQ#MAk__)_l^S@4V z6-nM%MfBAZ8+hF4=uP8BZXX&q^7?>&#NZEY?Ao=f2OU9Eq(4*+7UH< zyL4Wxw+CD7the;FC0L?uiBaRMkbccYyTk|>93C7MY!mOQOc-leCb z0KVcgf}dv=`F($)$MY%T%f6tNUBrD;$JJ2yGpnh&n9#tUiQzs69l=j)Mzr_siY7?v zE4&@RVsAgNhH+SVPIa@dP256Qb2{PW(Z?ZwqG{=oemt}MA>cY7?O#zJf=x1UvY&$) zn6o>$Vf5wgov_)q_;9G^&~_4fDzFHGAn|s8wb*op%24t(6cV+V31jA!@P}std=g?H z?fQwRX=-rz)ZmE`VsHOvu+8xTeY6l}W5#R?H;$ZYoeQ5e?)xB+yV^Y*gF%AT(UJW; zcR-NHcc1N$#M5rz?b#HmT5WZ=#ZR)%wL}tOU6_JF21W|8)1AJ`u8%FAN1h)(r`_)O zq}Na<*?N>d?EmIkyZkv+97WM){;&ubLJZ+H18V}X1_wuA^PWB!ZwE_gbnW_< zO5KMI{Q-L3zkuFyA2)!c4AstnnBNnVS(}4X`vYyUSACfoshs>R?b| zs3|m_+g+*gCmm?P{Z`t_z+PQ~CDO5U#mEP2WvA(*Ky#TTKDqzSDaI_Z_yN>2qSfMq zGxQB(+n5xY!fa8oVY6&=(dZY56%UMvk3iNhgkkRv`*5^(ygj#27&@A&=2iuiN_tEaaWOV7xw_l^uee6 zVR9S!NsFlPxW|y%hE#d@kcHkT-tRV4G~ov8y3xaN(}K5Hy+$?SnK&D!#6nrC8Yx=W zseFP!7fB_=N9$F-;_7;=@rzo~yfr57rD*!`bYcSbe|T!VK)gNZIqaO$jPK4-OC0`9 zIAjKiQ7GS?-vmt`?_fQB6q_GH75m5kAsRb^|K#gQ7TX`9Pf@(}5I&eZYQ099P*1kL zh>W`?;!v`pQNuA)7nQ#mO7nO?{2`i>{v6zTV0>pSHO~ttxhHI(r#+iK>hF8%AL^+1 zq33J~5xnyKA@B@vT!!*C*6U*LAfsq_c&PYd@G#2u&c)FW36w{NgF{_oi8%HscC=1B zfeQXBl?>kymctK^6MG)U!1peKc;@hzhiT?e_b|ntKujxQK=j`buMUp0V*eTBLPkXU z>pUG6J0He`9N&*|1a&)4$IZ6JJ!Oe|fm-Mfizw(UierO`)Tz8(Kh0{L{R5#N4rZB@ zEgl=pkqIa{+QDp=2b>v2uCKeZTH| zzSDp8?XOSuqxw75hs~Sn?&|V^35gF%H0iE>P;3MRd04y>tGiF3qN4GEii(PgN)#0p z4Js-|jHpC0q7p@eiV+nRBfg2E-rsM`wfA0o?}eI5oxb;eU)2ikwdR;(jydL-V~*GK zQD7IKls@75ZMev1H?$6z3@~zTQhH=Q$>K*>uLc@))_zSVlltt>!b1D+LY@0_utA}W zDwc<{eopqU+uP|s#~J#&gSqiTP+X`~auO*YP8?yvUnd9;8#rjl`J3stxNLxT% z{tL_$d;FeDPoMY}VGU3Ze;BS>|3{=+`lBf9I;J&1%%3>anq-Sk&CinPgt(yv<1IEP)4QXPK1n0p&;}aPBj~d!7mn3Oa9Uh`lNuh#@dUQ7`z%A z>yAZuky{QS(Hi-rj5w$>G9QUy1*y5N5O$Ryfqk*6`7hs4P5OjCFe4)&-#s`RaUfY^ zahN&&KQw^BP!kyv>59xT6;IDE5Fm4YggLr`^cGbkb2M=5SaUbijmZWGeKP;yKN7X@ zM?$-|QZgnEHzV|6;^+X#2b+*3qbk4Tb2b4RZ|Yb>4M--$H}~B?8l63H7>t>g0mQ2P ze+jWI&5|-K7$3%nYdB&c{uL4LnT(7fC?waB{qXf)O30UL42@J3(Kx7G_&(l+yag3~ zOco-aKnkx=laaZ?{fVT<>k9Ev66(}nV(m>x$-*A}=7=#Sfrn|+uXP{O++v;lmrbjf{+_YDui^WFW3Mjsy97;q5{g4=BT?u$OccJWmJ6!DrpUU_`zQe=S7X zBEAbE7b6A?=-fdRs&?b4T8M~qV9q$`e=wpKsLlxQGPYlPYI!8I<90L~;6luqx)~8F z;V2X^SXKetj0103R2aT{NyG2F9@D~fmKF9ZMjQEFO+BVBP~}}M`}l+ zpP|-&ZR&+(@!Z6!xzRrPRvI0X3+*-sMu5OUsAZ5+eO>Ie3~~}O?F^}=1%cYAk1aWI zsxtJjF6Mb%j7cSXlb?BVL{;`x~UKeZ?g-lz0tq5&G#Y@t!769u-`t7cE_s1 zAHzu0_+yxJi~kCCu45eEYYZYHag6y38+B$=`s#OOV&9I@j>ovE!TdYc=a{OdTokMI)e; ztVBzg?C1P-Xb4aooCNgJ5XZ}I7!4QLrt~M;#3qOabhPXz;QyJ#r)LJA-JAa#2*Z)T zm7SY6IDU)(EKn6ciBO;aBvP&a+bFg0r?9!eTB4kT#UYuH2jjT-GpVu!SazF!BC3kz zWE8w+)70|6#ndk6`DOM}05+fvX`|0uP^l1tPHir}AeQ#D)qm^l74zO-x08R0A%M6H ziPmRq@WKorUMXgt+HFY2Rqc~G2&8HH^+hA*oS!mV5$(t2B*A24Lsihxchmr0P)|Dm zzP1<90^ftz?I)qCc3_yxRJccGr6i2c&Wlf@#ev2w&$w`s90~Usl0jFDiBf0(G5Wov zJcMFrk5kRR0JM2Pa6kw!q*W^?2Z6k1Ln66lApKxw52KTE@DJ}$9}bLGlLvv@zlnqV zTFcQsSi{i<%*MzpIfH4W0vN}wo(_bz9)$Z)Q%~olATi+VAh1mt=dQW|gZ~{@a#9uo zewpc-;wQS*g+Z|N3_)J&gcJm==VoUFej%(8q@VMEJUx~?DV(M$fbq|cjZkH=k?Q`N zjAI;*hz(zUEe*&B$bL{pr3oCL8%8u;Tk`K63gNYZ=+0h5bY)mpj7FvPORIoR($#^X z@j_gSv8dn1Zng7GFz{@43NsR{km|_Leh3c-k}EL+DG`|%`fW5TU(aURzOU-vget(W zc!a2Wrq_ajTKN`)19I_kAiKh6*D*9UT%lvLvtYx5U75bK0=4n281>{W_=G^b(8MTk zS0=qBnUgRC8=uNbFjf(=`(abgz`{2bvIlQTvVaM)4Bxb%Hz*26g~Z?sn7%U7U`9eO z;K1QJY252~2M114m2q&_j2sI8cz;})q3#28^EQBkNAJp@P?uW#cBGp3PPF=PM2spP ziBW#czJivIV5XN)$=l>1-&(Qbq<|^zpO2=#gRSEN2idz19S_hsD}G!_Sgw$P$&}n8 z-PO!@#-`*UB_>pzeJ3YH%^dDw?}Nj!nYMBOQNHyfCSkac;9$y?pee3m7+^w&AV$G- z{$NR-d?(tbjoLU2QujDP(h}Bxdyw2>NOh}9981AyGQYO{JpW$B2#?V4e-_~K5mI3LN04T})b0=Zhhw6NNDksF`;o|2T;`9#rmOqp-pWj<)U*%!@715y5_#mK z#gZ}DulJI4)n_^JUOa+UkQIfK37*;4yyoA9^~+78Az3~n{Kj2`FT%i|hTBF547`76 z-vC<2VM1$p%4bDsjTI*>?lE5zOb?#Oho#BYT0{!zegn^-VFa!7XYsB zJU|#`HI4r1Rq-BT3`7K|`oGJ06@khzbNsma4xOJ`{bm|F8Pc%Y%dLWj0SJPVGb-gt#U|w@vvujOo50 z*Q%dmPWaV;5LNthm-^!8Vfyn?T9!b7y!d&9`tTpHMjrcs>1fzw7@|-7912A|@;1d? zT!5RPpxWFa*OUP)hAXY)dump>Lzyp1vGfmbrkGAJGh(wq?LaRzL}0&gY3GRpvM1hQZJhX^$XA3nIt5BfOvZhFgj7J2|8vsA(I)h%Arfb^3v4pe}s(J{_s3R7y#Iwe~L)PStv70(zhfq!G9uE1Aw)xK;0@;y;4A` z`~MiK&i_-Gs!N1|w+6oraB=yL&Zaj_66Rs@N=ud^DHcUVwf<8i)M#Qj!vFIj{8%0N z=P3KvaR|u$AXa_;&(W&=pWjeblS>^8aXhhx|Eb}>L_fCyEG-jb)ffL9fFObsb5n-% zxJxnwjJO)(YTLmPm+$6?L$VME0fkP!mjDj$Ksk_r=9y}HqU$9C5x!J=e$mfhhw&_( zn@7O<%Z6XTWVDJWwA09V-)?g&5 zro4oL3JDU1N&SHl3GXDjVi49~{X*hG>7%?l7V6wR&WkxM{xjmm6>_!OPRj5@4VwRI(5h!S zsnj%Io;@6g3p>Q-)KEMR;CtkyHjne2F*B{2Efa>ldNXz8WjRhb*;j zoQKFRumR6xJZ=tAr^iFeRe))nF@A?Bo7t@xH&}f!4t>h!C}ksYx>g76t2yMvIK;yL z2JMaj&CV61Pv=b!6^@T56)xjV2Y$m=zMSEvu-MT~cYQnzmHeYPZBkNZhU(77I3;Sf zstF-0Z-gXZE|PM{Xg*{2+%y*g!6#y=@OQN*Jyb295N7^_5EIr}8-WmAM3hG5!MBeO z$+O8B`C}2wi0Efmg!wP8?A_-;-YfH;|flYtdAbZ^;aiDh)6I+kI{MSs-q!g$6+#xm9hZuN${*Cr{8x*J);`)SP=6tTa`a<%5-G3+pg2sst3E-S#qA z>8jZRE8P~>0?cr!K#>(-vb9>CX~y0OE9}EZbpb56E8C9+&*PJ(J!HY+szG>G^K%@h zO4CBswuutW?_=NgUt(o|byV{Qk!r(qM-=!z3-y@Py;+WEwPPOQ?mFio?yhg+An;C@ zUKR9VBGi!9j@Mdz*tlgW%ECgz3DXe@VqLBjADnsTgrz2z@bzWOE3X{H3h z%A1!1)+)plBD1gSmoD`@Csciv8>VLEhvU1U=*ai&*ZC3ZNq(fdQ4pmje-Q24&WB_t zd%VmIAM}Ia;SZoa5vu{3mH}w>=C~2!O9MVsLE7!z>L_ZIjZN?bU>2wr)}7#BBom-O zoyo=Y%pngp&)~Cl5ufFc(Z?I2AowtE$1#BM-O_1pEKHN0^bXaX$AXZS5!}n8Q37__ z@glZ}ihGnB85ycKFIPB}O0|}s*5xjni0fzh*;MEmRT`_F=V24L#aXF(z-Q-EK5G^d z&n8U^b9e#ND9V=sSO(vY$lI~xKa&#+)K~eiyyaOCEl^$gnE7b}eHNc>m{{~Dzw{Pk zk`1jmS;0IYVD)hUB6I1%^mZUqaG?MqV?G!7AekL+=bH=28W#M{;>!o<0t24OXC*pS zPU+5{iLMlQ$s3!YO{EA(L>9*T*bikM>pu+Y5NgTF_XB$T3=PUTC?|~@s`7o`?qxqg z>M2^(m!gG%(a=fh!S_p}#WY72wqSRV4P>(Mf^l8nhAXX`x8tL;l@J&URw)1u*r+UqNDsqcK*^{ufXb>lKG)-&(Y4Yijzx`2BMi8q5Z^ zAUkFd;_)gwaZwQtKH9_=z$q!sU7^;z$5@8);s5I3zl8pVE%$@G6(VTMIZO!&KJ>5D z`!4+}LxQj3eVGuW3>acHtR|C$V2$~t(XD@VsRREOs*3+tm|FMiaCP_B5o%qbBT_A! zi~#VuDUN6#-7SKtd@A;%k^_Ke1j)+1|K=|%AN`xZtnB+Y$ja>`T#IM`4EI zF)+m-xvnkjSn+G1XOU1K`LBJ15_~{^fbH8)Aaxu0Y}f)&S`Jx9c8jq51=F%Zkhgd< z8@257_V3qUMWF-D)K`TLukbJ>qDafRlL4XdAUg~Ym+gg)0O?iCZ#(MPTN`>qpPRgh z`r}nP+2JL+-y%Ln5WPFtxDcht13HDsJUKbA*XL|fwu>K&&^$ZY@k*(=eG1w%Hlq#1 zX3-Si&@FQF%PEeD>N-Dc71|f=WEbA;$%8fo$K@#w&+h7(57WyiR{)FkBX^yB=NH}tUzhbN&PIFLA(L%>ho;;oA_^pcH$!c^6=s7jWS zuceL$UY>qPa}sA&eetGSHs-f)4Q$Zjxq zmP^we;s`-)fMSQ~_`rTu%<19m1m$&e&Mb-WZeVX?YiKTZh>7nMhgoq2Hv_0X^bQ$z zQU1QfXUQ?@-aRu&1Z~@*gP}lOEH>#E3jtQ3ie>^v>oj~dAPiD7OLZ1I!sU+xGaV7? z&P+!X|1~fpEh%>-%0x&(1UFJ1(5zz#fy;p+mN6m#w|Ayq=AxTINM9lNinT5%?P39|DuVq!V`Q_#$Ks@tfx zFv~GIdjwQx0g4~45mwJecH1z8&*o>OM#&C9hXHWbOh>4CINK2xqJ5-*1KVgCY(6Pi zxPuYll{OX{IPr;9VP<^$?G3#QXH9$7TtT;;w>iEAk98`R60IbI@@8kN#{Qg@Vkn`{LTVZ zt(xud$TNJ6Ks@HJy44+iUAur^yVUIu9WvisuW&|W!1jrgq$a3aEhEYur7~^bkaBHZ zd^X@bC$SmkNUP^)d{6N6_GN@j!B?O@o8zD+S)i^6pfdzidy^o#SMcxR!_Zd-xKgCZ zo1U~Z^$?_%_(vHF4_5$iYOX^EHF|lNv(h%#q2pmk4}8=60>59JOM={I7Zr_Ed2shS zM2AZ~1?=1jX>>I|o$DB^X7D^+B}=CqPik*idoZ?Y*%GTJ&vTf@IPXte*{1UVbv+Mx zQJ}#wB0nR;|5)3`4=X_{sQ5xLSv~&WT%707V}+$GKz^yR60~o-!uB04h^+jcD|dWG z0w)St_1K|SmVkYy%y)#VujV@R4cv)%I@liaF>KhNI4sLkHb z`K;W;Pm=7=YoG7POCHVyvWXciz5$cMr1*CxO`4RR4mKxpt@Q-)U>BdwI|WeISlgj~ zw7~H}KUBCvNaec;qv;@EAnK^H#1T0*t$=3?bjX#L9zThS`1Xa4sC>MEOrQ}jByrLx zJVs4YbqgJts&OIIisuUfuJ#)VzoKuP)JFEHQz<2~#syhoDNDoi>0P7HMx=%6>P__p-@sC0k z5INZ7W04P-zcK9h+TZ8>pVRxg^lX`A}p+m#rV1LIDuy|#WsWax&COwIO0IR zBNJ@=WU7?yd!FKk9MvFDurMudZnbPN*QNEB!)(!e#mjk)(9+M?TB8*4f zF4HI$FU70kyJU-+eU$LLJH=Ah$={A0Y5K0=C7ip|5j`+JFM9}Toh9daqj1%^rKA~M zv5Y(}UFrx&k!t8t#M93t?#~$T_bhirfbiI^Uj6O*-5+XUpW_|Xw%fo0 zdGrI&QzD`1kaNM)&gI1DW;pdR{LnC1Z?key1}lf|cpWs&Pop3MwB_#NatC##VJRQL zB)q2_D7BpWdx|pG#3Y2Yn2Ho>5TUnHT~Ib47mKUOc$P#0QGL}gqg zM>3wvV>`x&u5iusbo%Lfd<%e?%iO{zU3zXaWE3@TBCNYR86%8M_omqO*R95(@ai8$2XZ#Uh_=lbd_o5;d5E2NW;q8wcIE3jOBHal&+!gL6 zrv6eLSO=K1X9Fhmx*8Cklj8?n!p4JeVuZ{3^$v!(XhPV`da0QC zN1#E+ZBnARos=*Y6IT>L=)f9J(ys2%1(zG*wR!goCVy~N9x9w5v?4q>cN+}%@p_M5 z^IHOVfmpY;5#I-xKjMqy7#1G>^9|q4<|qab`Qb@f2#w28N7gxpVc$1L0UJeu+P%@C zmz>DFNbR}O`>ns!v#w75?Fx_M*)}aT4Z#%H&Sd#oB#tJLZ!N|{)uxuR5lXM+%pCq@dQqTsVZEwD8-ahvwm-($`l{nNBZq#9XVnwN~Bur|#zy4+wOBg?;om=w-0FzR$W9vT?N(D#E!pab$&m7W{%`Du35)MmOj1MXU`$NK!NFiaghYE= z21~d8z;tegS(rrwb=%;E>i8CP)cY}g<8}iAiH>Y>h?ox^Es_fa&7E}8ci!f+v5}B$ zB*Ofy4$v#gUkFjGa}o%peLvfme1XBjWS#-KZ7@uj?v$TQDm z225Iz$a-C_z+iU(m6|<3;`<@2?IJ5&cGUj*qDmtlz%Ar5w^;q{sBB1Oc=T%=t{|2W z(Hh9}TuXK&!q1Vyp+jQh)79D?j!gO z4b+YujtI_?w|<5M?dz6Qq4(}^;BKiTkjFjN6YA)lbx(qbm$&t3V$}gMQ_E2<;-g-mBRWVju5Awu4Fc0SnH}fnx6EWU@QX|J5 zTG%Qgk=~q_^Z~3hGFJin2=-4o{<}haXC-~!aJekISFCz=0jO7XjsRMNan%kq>_A2O z;!cO~pX_Z`6{)w>r81~xZNWZ+v&JI;uBF$QMV zc(+=&7qEL{mF;sN zcGt67fuJ88a^|;Ix7fyIg@{8t0$cWB#H~9ir2_lb$1PZs$x;idgN;+XN| zny8StSNEg4nq6d{`lWof+@LUmEuK~nXx$!JVn-EqYrm(@R4tVyKM0?ih{2xBs?Sdv zK(Pttk@G6KtUXwBU6XL zNGG;CApMwoQui$|?|>m`<4_7vcH)4URrd~(gVXHiUEOJP#HAt<-?pvh3b%UL=oka% zqU9qvz8cYaeF;a@`$VRGG3YkHTzb6+DK-b<{<|E_!O-tePSlSks}DPLE}QU%4}pFq z5D#$Ra^II~{b4Q@-2?PsuL00@4QxIPj5{kx{m#=AM3tLBIz)#~J_IY$;v<-d>Z6V@ zJ%ZtB=qWclH7^mFoI(4<#yA2x!l#)#9%R;*1hcZ8b)#>E5NP(cU=E_b))p#EU_IVnJ?_(JARKY7 z#1f`^DRAqwbf#_{cR*kzPcWBy?!W?+Gmv~htz7%5(l+`wSpX+t&m|7Iu~~?ZgVMxN zXqd6;?r}_R&t*Wny9(B_+)H93dTl4?A}h2^>k2kX`ak2 zPs=UD%cc%Z%1q0cWD8!l1kfWaFM9&&48|iu1BQpOB5c&o79aBrM(i;TGvag+{;}W~ zpoQ^Gu;r-ZqyyH@tZ`5gI4SU11MA{-f-lxGlTyvyHi5e>!gNkL{B?-}ReB0-T2^qR zjkl;SAq6%WH`tDWEiofNhbU0XPC4G>6$rvJRQR%(AvqCOxYE&QhotG|v6$7>{L>y5 zSaBMi7o8)2mTr@Y(~v1;e6+O(>s8FulG6_1q4<0^DyI0_S;(oLvZwMr?5RCy4WP6M zu9VK;X*M5*SrBS+tHUGveDFR%C?-NHSSZ0>?6sWn46OZ(c{x1-BHVI;!VDROpb}CIZ4b6@OS#nk4z@V2 zwUMX?N0E2F=JeT*)(&OM+yE_T*7)c*6US%HIEJf^v%sWqKXIV`1HXI$wk_Y(A%^X& z!=ta5$KQB5bFCW^(wP9IcRTmLo|P1xG~|ghd6jzPD+Y&?lOus~P#F#2nvN+(O+kMD?DBviZ2(5urOF!BO$c zi`jeQA*sfglF~Zkc9W;BUT{RwWTD+MNf}zUST}kYJqKlv_1657CFWMQE(Cdw@ACNZ z;A#PlM3O8s}MCV=~>SVUm%A(P=ZBcmKb{-b(Lq8?4+T z4DyIf-2E9RzTqmjg&s*z@A;ViDe*T=y{IEEmS05MKK_n_l1Iyk8YSp7BvL&h(z*3qQ_2opvV z`f@Em)n}K)L(I1_49~uW9)ppsB%aMx|D7Z|^HV<8(I4-M7BfOZGJI6Yef|9nA|WCH$+H7!}X`v9H!qzo5oqUKk*9T zn(jv5t(<}S9h~jTGh`>^;maJ5izI@;b}%nxQGhM$beLh});Iutmi52yN(d7knCBnv z$@72Nbjn%@qO`ht6+04ub?i5R8LCvwUScRd-!ZR%V)<0K6?(K zJD6$~U&i8`-{sAQusN}ikz6j)*z|(K-PL;oUjL$-=%D8c@eZCx&4XMQ9*XGTB~G)r z2Ov4rit99WyxubZunWBv)e`Mcwf4G>yfit`pS^*uKTEjPwHuxlB6|aL_}AgNbk%*s z5v9lGy$K_ruipaKsCzdsw93yoFMXH!eDFD+75n(?=1fUw!t)yvnovrzSM2AzQn>a_ z-N3tby6EV$?Eg3d+;lKNAU-W6Ic3o3oRk#RCo{M6r<^(Swwkjy9q{HecsgA@;_K#5 z@OnUmx^UAGsqehe_oXGqc!A-5&s*diwfC0ZlTO}3!;U%%;nEsH$H@v;+U&UHC{V}v zu5>HkHKAuPWgVhY^wwe?xZq; z5t!=2EpNcaL|6l$8mY}a#O?k&q=&wPMeh%&1JSL6(4j#MNl9ig$%VJ^?d*BOL7Y%^ zyW0^qkuX%_Z8HjA^?hwd-m4pT`00aTIHQ4U5M_>XJq7BM+uWZgnBZ=HZDJFLLZ5nb zbGhWUpLe0JBDlgYfMNtxZ&y&#DSmq$>kHR$pm`-Wc365S;w@Cq*TA*y)No8p%Zy=o zRz4@*SSB#;nYcds+M#tC`8U8t4IeIqijk*m_jaAAvv=`Y0c zN;Pw|wU-IFbDBIK{W1P-JoOfrA$6gCB0yvV@Q-BzL1`?p%)7dH57Ozo0_nLA9IJ03 z5YnJ3Y1$z5Djlpk?>T&`u4zzHpFHsMp^I@#?SBk7b*KK2(bZwlc7=y^nXm#?_W%rX z^`RqTR0@J8v77CE1fT1XN0M`IyS zwWf*BcEO&3a4R0m_Vh8oXgJDu4XdG08?I=~Nq_;$dZbgkA6xPemh?IXgO4aif>_RT z>QE~fY}#GH;M*Q~)?XwIeN^xeS}xsP+efZG>y zZ>`+U?TKoSj$9<^M1SV~8mvQOk^J^TOBw11fRO9f@<{4YFSGh^vh-UQs`4q2E}TMD zAR;XnIdy4c#|-iKV}du-;1kqGPaJY7od%A;t8qHmfNf1*Di(HdyOr#?@*(60#O|gi zo-{yNLj4$iXY~12nf)g=(nihcLr2XJRFIDk&>hOe++K}EJY|ZLVo1#_a%!u#YMF~_ zMV-r?QCK0)Xtnc&Ge&JJM4Cb61qc;qxR~@4yv16{W`SEGhf04D4lOYMd%x9(*+YHs zkCi_IIz^oX_$F z13Vl+h@YC`ghJ0tEuT)ohwYnxin(ul`~y6@BvA)FiLxh=L0l(R9bm>(RHz=9g(zB` zvl96J1Uqj6G*m!TbEZ0Ji7|D5i%Y^E=F}pzsgO1(YdX~_7Zk?xIs^DQvvc}wx0mgD zfip&NU3q%MN7ds3x|O+l5S->J5tF3VZd^bgNaY+?{f8d5N_J z%0J9PA5{%Q0T+~1(`Px!)#Bfx=1r8W67#4qAt*jlF00!-r z92U5%LOY@f&u(`w`;bVOE2N(pMI}(@@RX$7e6uXVu#~KExa`BOr`KgMHERwR5!r|_ z(a6GOLN|{vqtdN2T*4(;_LzLww2IG$8a_+cU{(wuj+OT<=Vq~Vtn>7Q9s6Ioaz*{? ztXI`I7qVAGc|aK0(Yc(rjl{WeI%XWz9gY-1{w^qShN%OKoZ)KGVoRmS%Zr`n?vBJp z+{v*RnYtIs0HR_Y7d$8?*g}&=yw6Lf;q@_}G0&L+nF}vlc1}F65FU>b*I*-y)WW2w z*(E@ta6U%xAFvLd<=Eh0u1%uLI~$`rOqfc9+HAf zGI$v9MFr@Iuym}pE^(f}#0ycayeSj8fecX5Mc54ss4rK2$-j$5#D3e1*1e0^v$YR3 zb1~pI^l^?0mrxH4c?koH_f%Y}(D41h%vlg& z=E`EH$1-Ubbb$PSK&qZk7**2=BS`q$%m!yNG7Tk%{-^~MLOnCs z!E0(A*t5zc;mO~U;8tfvo@rdk@M_x(7RlDr{oxOjFMdaJ)d3t&w*QFf_&OCjoe`FrI7`Q`P_TQpX<-MHC)ncYc- znBhj9IDA)eyU5jwFW7CxDD)v$sm zvmhowd%oSU65Y*yfQ|;>o`tlWY`L~d9bVzYSrkOn<)ytBL&YuAPvj@BEK0^cn3$*QTbnm!1V(b6Nx@D>Nio+l0qty^o935d$Mf6E0)kJ?j6fI|_3ormhdL#L@?{aS!3E&2-p={U;Ct@@aRmrDbsSV0CDMYML zu~NcBeY{qPt-dCJ&K2MY=irGG|2NPcDqR%hwi){+kt&L`?#p|8nP^pHe;ED_R z4c;W#CjF#Fz=@Z>36CX3S4k;fkF`>@9ymOhi{7~|cwW%G!1S{YYd&eL*N}iA`1oFb zq)OPjt1AQ}sJq5zPYa(dUjd#0cu%k5xn{j{qPDo_j>H{*h$o|EKM3m6_0$+; zc!{I6I*NeKDiW)m;H!?1ST1#*1X{QOqGErwGdy&>#HS5Tfb^LNgO?jNr%asWcPFZdi|}atq`dL0$)Z#5P=9IzT6ffF zT)wE{WC0fs=IYap&ItTN7psBL)reuq#Uf@J7(Cbr0aduk`GzX1MXe-ox?t{g(dlGO zT!VOW15r>@Hln-Y9Yj`0gA-RlT6TV}$pHnbZ-Z0td^hL#>a0eOpR2j*;1{Q4<|&{sGd+8VwEi5GzQj2wn-od1D9U{xAk5EEWg05q_XpG;KdD- z+a#zhi$W5l2D#Pqt)|8-YKzZ-<=Kf{E{pKTB@Nr0aD(Cs0jz$U<5A0tEY-G+?xSr^ z&sBT=M|a=EYXJwv3qHeV+nq5`)e&isoexb2%&FgcHew4!;P;*SF-L|m;o0yLvfiDa zfNVNxOjoXyuuSklKN=U3nu9g2(4br!BBw5HbLPUTg6b+hs*u6_5{H9`sUB|!k|pT5%bZ00~@F1F5&C781C5mork+&7yPo4K=8rG@DRUT)Dw=OlP6DVfz@2Fxgy2 zAYG^f0!{To8Q#GfiwpB;X7rY4>!Qv2whF9O-K%Nc_rt;zY^b*SnMf^@~j!>OXBGgASTaCczCy6i zd>)t8Klj?r9}e*8X{w4SKe)wS+vXDS`XzjpPvf)cE`Kk*Mr<^Ng9bD#B{lEu{9Gx9 zGd6al#8^g>8$36-3Hj7vFr5%M2*4;_0Pz|SPLi=|4->7jFNs#+DWc^Yzo*jAO!8Nr znz5S$!+`~xuqPYgHD%c3-A+2m#7m@UN<#o-NSNs%^XdAg&BGe@1-&GW<}@57r|kg_ zeKSLWZSA%HNg`@)848hVP8Q+Dke#4_AwpY}tk4ilH*KQAd|I?X&vdH~_c$Xp1j%Ln zO+b|G1xg*4iHu9l-{TC8pq{65HMJ0+bD3;S>xNa_>)zBk0_f&w-r$N%GTrPXNp7B6 zxz~vZ#w64})xb2x)wO+`6X-#Rz9l=G6Cu>|^GN#ywcYL#k2ABnz0Wz3L0q6wYT7s? z6G@RhEeeCp0)ASn?$&KyT7Uzv|E&eQ^Bxxo-QVZTeLoGM=W_EMj^Lz7z)8#`#y9(* z6n30MF-XjFCpW#~=X|zI`x88?xBnPq*10EcY&gILeTKxPCgf%2$tl>4IvM_|{)>Ii zVd~%~pmEEWe}&db?vxD7{Ju||xDGPIc1i+a1a}i3&$Hgb{earE960?)9Wd0cY5CNt%N48kPqEqFJcuK$L{NU-=*0ETxn}JX zubjp<+TZdB<|~9;y@T>6YU3f80CpY%rd?$me>_v)(fPgxRGMT_?LdOvXu!7(PcbSZ zUL9DhXiA! z|Kw%kj!*ou@(}K^cMGNTB&bTVR@)lF?tQ^$>mH%+Sw1_ia4glh3NtByOP z^t6doE^Wa{Utw+@w6Mux7Mm@I(1yQcWwGoQEeKjWk*5bbw?o&?;UkUCHvGg z$c2eWJGWMzZ$Ae7Yxa>r%64#zz6M;Q(PZ z)esg>{4x;?595MC78UO09heXYVBJS8lvP#4reg;Mo?dw{zj%rl6sR@FB#ka5CvTFT zB3MHT)ETxZDPeyl%VhODLa$~Fwy(R`+(>g#N)S34es#>8Ep0k^lLvsbWUejC$O_bq z(ok}K^<7t5DL~BsaUk5jSxCN;2sc0Fv-4hz zhXH}y(&L~TU9<&i7r*bi6N7K6YBLf>>jYc zwhMoT{z^W@xUdMSPGb=iwK=14&&wbl*lh-N`8>vhBW!=~ub=-7_DZg!;OHNj%BY|- zt0=WLhDcNkCI^+=z+enF(Wh>clTUM{IlY=Y{0S3CQ>dOt&X}2gGRT*kx`OAV_pH-z z!ac5spf6#}I}3z6`#1-^6=eJnb^HvDn#WNh_k>R=m-U z%ZasFJhs$Rhuckae4vW`ts#AN0V8f)#u0lLl{F4iJ^GT8%x6@gCl(~McUG!Jm&Bp! zQ=)$Ru1&T45>RfNLT~Tv?O}Mf6$5=7B_UZT6ZFb5Sv2T;pqbPru_+P>-rr&p)apzA znvO{#C?XA)ovh;uU!Z6|U>7Jpk|eJ8a*M49o48Kf@yoRFQ_;;8qb`5tEP&#}D;B&O z(8wGHD%yOmu73q06|SK^<}$5qL(@=0&rje6sx8n=5GMeZ>(Al=wT96kYR_el36K2T z-`AvOptUhJ)}7{N#K@OKu)HT4^Vd9^b6NC+Hm9b;pN#-2{DpAKPq8RVnVD=d}2;rfGt9Q`lfuUZhr-1ybe;(veXqW zlr?1ljv3!pZxhMxiqu>ru(V%Z&AbXpvgj(Qb(A*PVmv**LN5PT1zMPhSeR}Ej)=bF zFGlL|QCS0iKom9aszuEFZ|vv#HH@ydmgDlWc7Sq!|0}1|W3l=(5)dbY<+kZMTKAT~ z;qvb`Cm&t6DpRkbj5^W<4BLPqHt5;3<-5Xj$=+DXB5S|BmJe?6&RRfNNxzJAyzAeM zK6N)S;I3I>N}kPW{r6fdpWOUb6Vq{vczzfPm7`7Dzk9E}WSjn0`@`*9mc?UP)(C$6 zHrvcpsw6Fkh?RGT)2AEJZW35k3Q;2J{w?O<=)F((CL4!&>I-s`>Rm-$CiJpmYF@?n zD0y9`RHNV(T_srk*UGw`Y3(IkY3GHZZqtvGIo8g}92?|h4wlmD`fYeswPY4+!T&D@ zvHC7kY2&j|#|a9JHkH~@P_4g<`KxQ=zEFKh=1<)Q0eIbn>>>2gc}1#_q9204=6~ zHM$?rEP40pEmMlV!)zR8>VnW$U}Q&(7ciFle$v0;q>N*+t*D#s7R^=Abv!>w>DOGea>9CL#sF$qWYTAs)>VUAJNUuWHlPBcQqDFA} z!abrz4uYHGAlG{ZkOL9#5O}1za2p zi{rXpl%gSO0ez|Ky#Ua0Uz*}|V5GWDzn49relu#EBE}uPl7Abks(Z0V1yNDs+{Z7A zx@Dp)Auj41=xOZnRJ=eQXx}*y+is1-BErTs-AiT*%FpntXK;e@5$2-3!iNBoeM$|%|#;Dhe_PL+!utb4}{3aswK~zDZjKu zopZM0xy6#~7re!{YoB8X-Dd{c%{iwU62%`swX9EgAm{}*&1kbg9ttks?*rhrRss$| zR940$n5X?luPXYSp=O9OJgq6?a21(nmsh)

MA39d5L)V~f|hwkhD5q)hPp$?#< z7s`5_${Mosa)(Dztu9&P`vr4D#$=(soDXmiSQ~=8Ivl=LXqN~+60PJ72l0_~0enmi z!^a0nj_DtOkFO5lgEOt+gS$fTF-Uw>)^n3J-#XbF;N98SY_j)h;QgA3oD2L|D>}OC zwsI||N1Q5GVzwS~8AEB?nSNo~3SGZJPJ+<&{vci7BIkto6?C;|XDB$ao07|OwKEiw zp!xma$e`=^p>gPRy$)EV9XUfma2m=ZovFby3^+F;oX4J|ovF}rIC`!pmuxldNxLmk zN|n%&d-SSW<-1U+Y9k(64-Vyg40-P4wZ_q3@!!-nq#mlD4e-AYciUGj-Vzsmb?8|2u{TJTHeHJAdFc2xslk3N%IemAy0&-u}{4xq4&wJjBy({s;B)v!HHSfEcaCieGQvSa{au>VeBg{ z_xovVF0o(4ehRg@M4#*x{i*w%9omv$dexLgm;yv@d?S#H6=VPwuh29r}jW zB}e6->Yp3_j;zy`F+XyES!^)*pRbPVm-#L+0I{>`iPNdfJ6SifFoyQ5?alnIl>Pp_ z!8If8d=O%xq5BuY?M;dG%A5&*zUZjLEKI~KBH>%^)_iy*IYy!PYf7$>;ytilVh0h# z4kEqerHUPa>g(t*pr4021J4dO-V>amW!xz$wo~|cQ)RsD%N`mp zvQ@@H?5g9RHJ%$TFrF_GFFYOxFbHksbMDzg+$p+hw%@9J({H~7`-O7&!2|cGjm?}N zsNIFKm|U;=Bu6!CNXt8UU(ScSOqv@mmAJ4nU{mCbjd)i)@>A!Xv-jbg+ZX!)xWCj& zKYg^3PCpCC?bm4kPv~nu^nic7$|CMJVGc_S`eJJdA{%5*|MqjGsdmJ+89nCO8AZV5 zL-zpkvXr`%h2l;QqL4p zPjY3gk3Ct+dS!LzSm>M%^OT zG&Gne>oBhX<`3vc;uH0P&q%>%f`(7IL+Z{Dye+~vt_N0aF8}LTcON(_&K&C&UMYZA zhH%RNb}b&6YhrvCt6`^mz{i~U` zoaLAMsTUKh_i|W0>BEGtC3?GyF?0#uxJT*P+X_8GhtRqDP4FK+AHJMy80Q+N$p-x# z4cX6q039~m8j`FZ{V9>TyHeSG=3$i_3GOe(Pd&(dGu1jeIu&J_<(9%c>uOnpRjMu< zQ}|FVbHu`b=52?(^Ifat;nNP?ohdqkSPyH}igyR+=*t;uw6Cw_hP34)PG+f9I>Ke7 z=(YBE^iAdrY=RFvnMa#j`LATmaJLnV(9YR6&DOu0i;x-e?)jULA)2l!JwtAZ6Io$V z$6n)1QmRAGa1$9d+*+reyQ_EuHHt@U;Rw!PJdRHF^*{3St@t)s#8pn9XSx4sK-O}P zZ{_U7CBz(+L>K<@<^Zv2U)i?2c9TC>&MXK7R8IiCD0dyV7gFx}_sg&D&J) z^W)hjPY>tzOMN4c=5lz&XveAeb0y|pPx+pe+piZ{DLPJMbcteiFT=hZ&bBnpjKgQ2 z^31mj^L})rWAG$;LhQ%-{e^k><5L>`yD%^35+%Oz>)2`VYvF4R%~JNwOO<#j4}PmU zfBsCf_^X-B^+s@6zry66M4cFHy&CG&zE<;ecrf*qOvd0$r4VZQSgRJ*_G>q+6 zn5HCz+k-Mk0y|`mnusUK97XRcCbxB8VV(zksC6dt6!*{mJre$dkKwO<+2mgR zJ3LpT-{EGIk@kA~q=f=y#=9Bp*G~deElup*Zj$;cJMRTrr!R77-&T)?_ zzjwOL8pYiW>3cO9?9^qj#A)V&|82xQwCCSH`}`)E2b`;3B0N*f{Rl6=w7iQwbWv*B zHL?bp!Q7Vrjm3?nUT^zyc~%Qud{dA-c4(uTWUK3j&fON}z^@S$dt zuh(qy)tXIC4&;di5gyG(uO55OUGVMM3~O+q_Ml>+(|ewC*X1H*DGS z$0Ot2dV#TK@J@J1KIht=|GReXki5Y5yyFbMQi%Q%-DOb<^WH*tS!WgIML7!d@|oAO z#uRC~OXlEy_99v`3aWka*m0t}W)}`x3bc7Oq`Nwk!*myIS!qk^Yr0D@nX>J2cU|h|noi8RMF0NaMolMv zX8PkS>^}X@m;>;koR1eSSKL`jf}vZtkI;2$)+qgZhu+RPaP(mBc-P(F*-#<&Kvcht z^{&~-IXmlU#Lm3^*qb$0<+UBmnUh(ZBbTY4--f(MkoV)rLkgJ2^&=Y>?x$=xe0OE( zXM9PSZL}d}$}GxjcOYZs8Lzb6K99WlhW`u%6ZyA9b&(}VDk!K;f0A~l|W z%ddZr3*l9xZ-ngtx*~keVRN4|=O)um z6ma&!1Ng)md@?63LHNXPlR4#pMh7XQ&&9JipA62Wyp$1GrA$lMTpXI$?8ldKGOveg zDzLePFcf4++?bnIsQm7qOj%y$oR)27bgOt%j$8gK5K z;pV)a+!pBAL%S7O#I&J>jrR@FN-lBs{tU)1y3iUkOUu{nWBsTIyvaF0@_UX(yM+E$ z>Py}sG}g|fw1B@!u@=^mqP2I%92FZ(=5)n8O-5Gk2Padiho8?zStoEMkb4;3hOCJg z^YhSi`ur66aELjo%>`wK`)TH_F>Yhbwf?{qV{YQxziPWL(cLF!h}z7at~lc3kzaLN zAC2&Y=n~z(+N;ke(|_yVg*WK_l?7cS=k7(t+~l{;kT_ISe-C*PZQwhzXNKE-v(jY1 za~JR!a2Rm4hQQlL-sfV<^kIJ*<+dIDo8et8{$S>A9nKa1sedQw@U9*XUg2}#T`ln5 zsl&Tk0e=NLysItnioo<{P3X~*AI>te#pEc z=c8H9f0TKj$vo~y4l?f5Hp-SmQ)>g~;YYIm)}VB}8Em@`9dWU?$#EOD?Me6+7iwGf z0!G)#`8Cm%g-6-bkM4+moP908B4f*`w)>wADYK4shvCZnf-<4DoK|1ed~r)BxvS85 zwE_-9{p#0wc8_TWHgCMQ4?gYT9C*zRzBkdmgP78ih_AXA?PrYx9+dr0?~c>&4&P7N z;o1xBKNt^K9TpGJ>AyWl|4#k;N3%439hUwwezLy3+_Y9gxf2IZ)?q%?~xeMHh8tRFake6-usJrO4DZU zC)}Jh(cS*VP(0*41wGcbQ@@L^@E!g8>x};m%65J{3;#F1KZCU|sYC2;Qy0&23Xpy9 zV*|V`HUxTYuuSDF?3AKki2v$++Tl0;GiMhjA=kxbr~{sVLEbf7UeIB+XlMQHk=$(2 zfltxKR{H88w{I)+p^;cNb3v1nG9Mb*%f0}7)!Pfio|#{7S387Pp}RJQy5H!qXZ=q9 zp1>GFVeL94ckyBG0E_Ias6`j8M;9%QAfFvO#2T40HcCw#OI&cQoOk0$aXb`Zo%PTn z#v}1>C-jo|r^LbPGL)wEjFG)+1K$}}^a~lIjIRwFN@Ba;i73F1(#!5lSDKCkry-M1 zd}Bwu(6=$G0ADbw(!?CIGshY)S3BaXZQ9%tU-tCfk*qHcp9gZxJd$&6M-b1}?*%zU zUP$OY0cqp0RT+~)j9Yxh^cc=YL(ZmKl&S*8(+n=J8>LKXW=wVXlQQ3A@7n?5AP3p= z_z~w2jld_nKT~PiW>uzf_RM^_k320#nKB+9kh7WQ%Q-UjrrM5iz@BbSu8bnK^<&mc ze)6!>-@!U+4{iK`dHE4`o(Y`Dxkfjdm9f&7tc9KJFU9*U`e_}ZOp|t{pX<&9v@^1t z&_|Cu$7^wC8OJ`>nLkIb;-{{*{A8S4@&N=6D{#np{{%D^R$#05neFzn$du*#;5n`A z{BdrHqdmmfwf|3XN0?a|ls#TU>r-ZJJ!-Y>IPXH%qqH0z>Ccv`J$Gj4YgF$YAAXIh zA?|!X*jO(y8Gdif!;q;xeh=M zBDt4-31@pQR@}#k2N6GcPx-a1y{x!i9WVD~nz3O`@TAI_O$zrQ*6wjS@PmmnYJ9Wp zu|3yuPspm#+xA?~ox(r8{c^XD{dz^{q1?**roT!4huD3E@X&|L)hdUj>~B)0NqZlz;C#^Ry{vDgHAeizaY|uc zZl;fPb7H^BTz#H8&LW0JVv!8oO3ALBepJ3VrKj+0n)us_MC7T7bwU>6_aU>x_w(VqqUe`sf} zgU8E8DpSg{3N-)UL|u6&`5a}p+Wh2FiA^!4oUu!8bP=$mvlpcnScD#&b+9m}@`35d z%gPMaPsnw3=i^`Wj(D~?y-KZ`sy^F14LpU?2%A@q6wjRZIQIY+>taUJ-&k-jAE zEqKWpMrGEq-+mx2;F3lUDgHYoIKQ~PG`aE3zQG_*H8gic#Roj| z@O;gk;tAnU>FT7 zV}NNau*D;L5FIT>4?qtOxsUm3 zQlH42bnfn}rS56;Vm|G>KT646K^~d>cUC?!J-tDxYNxzy+hcn?z(p)qlU<8BcSYbT zGmr1zPW}7o_g>1kG*TWJ(w_But&WX4$fLemV%+;&vphZN@YDB{rv2dJ066K%mOKp; zde`jD0A|kY7}WZ;At$u;Q++JG6CEA07UDFIcO{TJCH)Bh^s-mI#cb_g2Tst*c>=G< z7vIDihU7D`P7PMt7%b*oCds*rrmYrax=&)IYP`Lky(OV_oQbqedAT#mH%XW2oH;X? zKs}+!Y3z|f)~tR4A6oe@&+>_#9;H3mcY6T7=;1DMIjdnmbm2nrrqhg>J7`NKXA1ZS z7DJ07{?}9f99YO-427khz0bqJl0)6Z_TG3I zx@aG;i0l+U!l0|fBW0YzLkq9{Npp*7e1H9lpEk?+mA777-&|==RihenG?EM4 zt^&g#>xrS``ZxL!Smy&<=<{cT&;KRtxjXpW$@8P&Mq)$8Jl*o$3(wOY=0>^_ugUlO zfjcPQSBJ{?O!(r=!wZq23y`Iy$kh4B)?1M8H*>FYq$YE-sC%(9a{}h5-A9q5a^K*= ztHi=h%X@X(`a{JKqM*P!)S{W8s;eSFp75&3S?OBi*UAD>(N_E?6{c@?vMk z*Up%dQCli97g;;zxH-ixZKRWnzU>NR&<1i(ZZGJFO0>F)*&|cNd5|KrM6QQETMV8g zhSGbxs+BGI*KK*xfk3W9d{3cIupP-IwxIXNSKsIV>FL0xdUBQ}S2q)1{PQx#6fd^# zs_QqcOnR_B-7(!s9Q9W#6Z}r#ZeN)+z4d@;x61D))72&i?|g;Wv3#H4?#93p@+`jx zO@Z+(nJbKaAM#7i-0+R*w2u1?|(M&BYc z1$Ut^d>0s$(}Afz@FTj6HUEXDYQ}p`k5)FDz<)h;1crIpYLn4sJ^q)`&aoZl>SGJk z>SIThYAM&o*!DxG1KB3`!6mBu=~T7JH*wC84#*+*ySEJ68hL)2Z=>Hu;O-duvKJiZ z))7DMefa%k&PS7ed{2(dYhWzp$gI-`&`a`c<;XlI_Kwv`e2+f&(WlsE8RT=due^1- z6I;JNgL1%j;wI_@%L)(e12&_qZ)Qr}q7 z-|6`p2go;umjqYsJa0ex!F0*vXkEF`AHnncEXvfqE67z}!CCLbAcQC6o_d*Qaz3w* zGGaR%9jOfdHTy;zzB0Kgs{13sFYgbmE%1DNrK5v0s5PAV@LLSL3B3%wjl2ln!pAK4 zt;_uG0rqqywO{4V^+k+N=5_<~ryt(&b!+jT-Na0G7xy}jus*+V4tDudy)|np5@TJ< z_9^a9IluHh_1pg4s$+1pI@&cx9mCn4m-=@{M)*G^uTSDhEoyAPiaZ!a`$oUU_-FF0 z58YIbJSmCgtbFpxOJbG5V)(XvKIlLvCgD9J5Qyy@Rk24r4uKANb> zfh2NszDDj*3~fpdPm-3yljQaMD6*-O9G=bDQ$4N74k;&i6*(}Kc0XjTT;!ZKUgmte zIms`wROa7p*wdl(TuoaduQs!9QK77wp}4KcM+>nold5)S(w@jm3$o85`Bs#bJt_Oa zv$Q`m#zGG0IBx>ywHdf~For+TZVz)fT}kU-^!vcVz5H(zJ$OKuSt76g{7ht1@A?u? z_t9Y}qrz(@b}@9AS}acbmNvyV z6@POPZGN3kV5a1GspwP_x)tB&Va@aj_Zp@q;vaa?S!IO6b!X=NJsPpW2bJ z?JLJV*yg&*ljRHb9lz9X>6^D^aQ41 ze`x;Bc<=bhigu^-QRMP|KE`t!&$neMxxTI2v0Kp94MIV(Z+dS*KEdT+u{ z7hV`;(QN*9aFM(i8!!TUkQnSAS!c{Und%<01>2>>a7_)BWq`ft#?lr zbtcJN=FC`p(M7boh`YUfH#s_r;_!!D%HTE7xaWt&qgIagOJ6;4+h! zUSv+|?wdHDyzaFANmoDD{4I0|_Y`)MkJEqM)%DHNo)i1QA8)EY{pl#ru^rf2_wdP9 zt6PC@-+HqfpWS_63HH_w)qM?kY@b-=F#&`4TkDvkS~>79{PRE)=QW-RY-)e;`$79L z5uK9c{Wti3H~hR6xZ79G_ooAo2^t%9_XE4s{~q-PCSo=T7RHRvz2|dm9?!~K{M@k= z)KO{P2HNvbE=Nt%aN(g%1E!U-?nqyPvvrht3){f=L$O`cv@@-HU6Qk&=8`e@uK%CM zd^}lgl6KmFLHwbmOBC&#saF1{E6JK~bS3Zzy|KeITfL35dsE>dx%Xi+?<6L)g*o?& zl~;LW%wnInj8~f$eG-@=ykH@RuH8I#y2cYDb)Jx1+4M-$z>W|c@8_A|=@Rg?7rcby zc_w{@@`#M_)R&;^uNY&S>-!!V-<0t2^#&H6q`x9?+JbDAd2Lbsxx({4WH57Fd#2^- zUdh?v8euLu81_+C7DK zB(CxnZCK^}F!uHP(7!e2_oQ|ZGK*8acK{#H|hmjB_SpI-V=u&<=v zer%`TGF}Vq%5%=g(%P1C@v&*Hduj70oR8K!%F!WXk+qC|U<@C_f45H^$RZ{w=igmV+eJ6$xRlt0?xWasvevsFIo1X|`74zH zGxE!6O6(>kIk28~8c+OSO$lqRP7`t{e&+jad~X1cbHLZC0=Ww>%DafW@Z=6WIop2~ zpN-rdAvSM|DXPC8nD&9MuhMTN?JUFQj63@1norf#?$6Xw-FK_8u2@qX=aO5!Tj_Hp zJmq3<@lvtT5C34z2I{b9(KU(BuGsP2yVO+IF8ckH&u7@}+vvN8zIV~@r+h|_4fc(_ z-U>~Ya$mDFTZ?`bN%ocYPcp|1Y=vz6$Z3RDpnp96OHb?KD z*~>$`Bxi8e8Q+J>8Eb*+?# zJq3(n!}$E1K}4N)e0r&)$tLRD36FE$WYt{ep{#!mMi+KSP9giecfSdb`{D|@C)!e3 zh;H(k3kH*K&GY*xlY+fG7;VzZ?j^rV>{juCefUWF+J(iv57{TO;Po}wsMPrkIV9gd zB1d3v)(m%NFz=yO%NdyAUi?~wp8vN+{~mX}()45AjeqebY`6ma|519J>ss)=BZx2c zhBH0CQ_oDRd30^y30*g2)i~DnP~1y)Ap`3^+7 z-+xwV`s-H5V9&}3?2S?Gk0ox$oWK^+Y*N zIdtz(TtuV!2>&!+;5Wos_y!lJ7`HE98BD}4XRQu?OZ4snAHr|)TkOO;Q&hbo zYg6qSU*OXt`N(765#7HIuy@AvA~}lbWAbF2DN3^I%@>u;`NSvcgL_qKkptb(peJto zj9S_Al3h4h&wkUjtPx5ahO_4fi^yeplYFC-Z(Z*8!58)X|0Vy+|2%83ELy2LeG|C> zFDe}s_+7F#+5_*tL7Uom1_QZ0&pHNwF8{;Eb>CoMGqwe1UFqYwTE@e(K)Q)D+4*Ff z1_jQX&jY!8;m@sy@wtH~6qe4<1F~0Xwd7@Q-#Dnz?v7XW@4K&4nude38~n3}VlX}u zTiS#Tw3%3bi?T2eoPLfh4(I*4pXfZlzg7Rfm3A%geVT_Hn-bAs%9OR)80$3gw6TTf zGGDn%eN4NyuWDSB@Qs{XxYuN{{}fxg2K`*inwT@*F_>8Xh+iugnLbw5;vCF(=gsgJ za!UG@XTFKDrj%gs-O4)l)Rxp<`pr2ra#|<3VwRLN_TQTZof$GOEY){{XKg>RCet+9 zXnPhe9jP?s0Gp4xe}W#lpO4IK$KUjw3Tzf$X(x80wP(BJoc88lpgrWc5)%}B^{i0RMKqs-jk5F>(kvUOtqzDw4Dtt z$l-j?wmH$Ap3ePnjN$#3Q?7l~)1^7H z+Ll{Jvlmrp(vZ$R9{N()vkT0Y`HH)UJ{CjIo)X94w?95hi&HN;I^6@S*^3Q^s>{_Zf`V>*H@*;7#zT=}SxG4QHfJUwVPAlY(tF zKB_D{wnP~W)Qt87T=E^~2|S<-=As+o|6I+P;QE}DnB#HmhfS9{ren)%{NMXa>DOwn zXHEFn67G7yUlJSRNoXrPJ9uqGM_p8b=dT*?Ca|6x?-hAWj@F+0P6VF47M^Mo+qFaByZsNAE8SJA7NwyX+A$ zeIGeOKg3)I`*M8UCx2<`@clvV8n#rc(;X+zH7N&&y{$Q}d6(D#Mdaepg( zO~@RvG0MydmHY5bZ|0n(UURZN3!9V}B=> z#E*iX_Dw>+Q}(&l*f8+s0@czT`urK5OM9}f{y%x&@&NnM$9f$U`4G2qnr>u?~~$+%?R2+y5_?()3-P#~AR2g%6(JJrXb$iY{uA-`SVCI+pYJzj}!u7viJW20QL87oRT-?E~fvNryI4(!B{ z`-NX>qLcx-S5W>hB93LjULqGxr!Vz*UeTJwnKINVHxX00wxRisYo2S~kGz@9J%_!w zD}!szX)bH#$OZYaoLiK_eX{6Fk3khN8*c)_xk_!;y8j< z=i#SDO;~;wH{&(jsOY_xXk}mrI_-1XmH3C?PQz1l!6_@xo#Z5mj(nKBiL)@IdH(6@6O8h?>T!+%hfa7EMq7Qy4Ruk=g=pgA+@_j;SV~JXn+Yel)p$qph zavpS$HpKmoMvw9cU5?^k7qQ+3|LqZa9DP_B6n)iOqYRFwk8{Su{e|a`N7jhayzBqY z@r3f-NA#!JKIlN1&%&!R4{~+|a%KPYQJx!fzy6&-ZaL46F^4R`ehj@Qa^NU3!jJ>b zw*$G0C?kAvF`d_^GnmHGV^GFEbm zHGZ|}{3>zVbrO$ae}M3%>`|2bW#M6LRX6*O29O;KpMl3kR=j^Euo=GH^C{yu9<~8+uXZB@t`BLeO=b6wcPn-bzw_%_pOieIFU=`$femS$`mjpF)UBtN<6ED zd(Rxqcai19Z%gaJSuZwP3bG+5S>$dSa#yX&NI(xygFcMgmu@=Q z=YH2mp}p3=CZF`S<(-clc0cV%UU=WcTRjrLxt4u2z4Z4j{9B>bWhstRoSX3ec4ULB z^;;%7JiZsldNjHfOmzENl)?5z;C!vAT6}5AXX#sC?CHx+@)Wi5`~c7Kk7eyOy&f4- z_M1ElcMtmhs=AedcUd>Uzx*BJ!{+-O*?9t995)l5D`fqQ*qdS|SL7SDGqgMySu24z zv%%{@=I}&xhsezPKM!o$YD%jt#K#f4p=Tv)K0l0X`fKp_IDYqhgLb5^*N5+`mYCed z`ie37LWdfCr9rdnur*V`o8fCq&RP8f#97Of0m)@M)e;FWM!J)fRM$hCQNcQ5p4jG^ zzQl(Wcmp+wp1|vhCObM$1#;!Smm+XrtaX3#w7%ATYJH-o2mDG~dd^F#_aVj>+GeLe zkh??14PAx5WNqES+?_@_nKNSZi@fQ@f2fCl#qN(_-$|<$pS1RG@6-1_YB3>raUX4% z@p--i48k*Frvw5~o?hmV??q*3j#&R1m@0Eb=77j{)hB0JBIn4Fm3dm<5tv$Eru%da zlf5xw+zDpFoi|jEP^6Q9wI>DdFEd07xGyf~yoyEGX5-jISuj@!;ufzxVmznTo zFYk!w<>8aPYWqvNJ0sGuQHfMGI^_(X$JukiIWI;X5_hn561_FxMDp#OE95*MrJJ}} ze=oR_KJ_?fiud40I$Rd|ASSiRc%DLCt!*{MC2f}*=N?VqJep9rzmwrEL^tdSwv!0Y z5a8IHL3`GNkCj@-DAgLwcV@WRQ_5Wq!~+Con+~&9J_zrEF9GkC3&49A`V9+jrZT|T z&*FJ?hC2&*U!a{I0p~Q}oXvl0W)QdTFgPdZaJ~_#Y`PeI*U;V&ZD-)KgJ+GdTQb}x z_^Xlkf@2LA9T`JDwFUoO{7}ninIE(AIGZOvU)E$N-QwvtN4Rao28xIa^y+d{iw}tm zU1bjGkDL`JW7PUPo#DO%`LT@tM0aWW8+$~XkE;W@wcIhNjhAzjdJpR7C}}Woj*`F- zUZ?ZvK<+)%5n1^c&OrEkeWI^F71^|$cEWw4TX=5xM3uZ3ySv7eTzLn!+);C~{bD$u z8jACez&ZO7G#nhvau=c}uLynrHD#cId?5}0yR(PzZ`jchO)Y79#X{71(^N$E0X^LMh%B@RwsQ_&GUagFW!b zNqlyRdFt~x*=uKB7_ra#Zw7L=pF(yr{~zReMxnAvau6D_=X)C0Mz~$Xc15-s`@eg? zs_*|kjofoFuWQ(Mx8$qjB@^!`W?o5*k28UEp8hCnNG4@if!j*FsO_!mm9`bv%e=7k z>hNpli9w4_Y+MZ|?`6B+Vt%}JP$AA6@BL_Bn4APZ=zhKIc!@>(%1p%UVxu4z?QlK`Fn_ultI63{hn~!i&zV zH=Z+(ad<3apgDexYSCjWhA)$GB=GDwvtsAI^)dbE3(v+?rCm?Gtkdp_%g&*l$Rq{Y zEkcIMJxj-k9kgSMTG$({@nYR&?##OSW+yR4j1=ro3IGeY3q6x1pD{GBi~e>1idliXf=y#{Yp2)vq3N$~z3 za3nUAp#vqK=d2EtJ(%Rx2Ky2GlIwyF)SrJdh+oO66+8^-z)OfF8uGne$FB)lJBD_w zv|;_(<9j@j8;^Kay>rAmZ{Lw2otLS@Hyv6Sws#kC^e_Ja`yunxBoTN4>@~==3;U- z1;+F3r^NmOeBkvG zd;ndOloh2X;cp+$oZJUwX()a_y_$V=S3i@wojnmftS1OQT~n1N$=Q+h1X<5J`a11b z%Gwyd-&l)l(TMHsQm&42$mRYz=C4*J4Os=91vZfvuL4I8eM!EbVJ~iwXROWmu08wg z1%daM1v&uT+@(&%vh{Jz5?=wsPqMoPRwI`b%6! z{CL?~jgM4mWgl`iYtOgI zUTDU8>zR?czd=`sE<(>Fi%w0EInAD)9`suy>&nHPA=8YkeTLsfoJrV@ENW*zz4%;` zuX+jN=^;Lpxy!GwOTVXo`^h^v!SnA=QJVU+-{ZWjQ?EAa_WXauoqK##)wTHdnMvSG z!Yhym31|qYBtflUc%-6Cf{y^&UIIRACD3Xoplzd|MwC3znh>=NU}@9d^3b-KSS(oS zHNE%#2+&6;)}mDVE(unh5Urwu35xUkuD$0ZC&L76Z}0E-M?NQW&VH`7_S$Q&*JkaP z42Etv3#pWaCs^ML{q$peK~jm!HT5&0CwF~(A_`=$moc`omSk+$K6M@$dDnscKy0JX z_s(junPZPE+Goaec)xkhV@&s9GmR~0wVCf*e`TIUlnYSqL-a>7Uy)lg6YH|%Oeg0_ zZJ!()+4=i!BO?kN9a`J`a%5pDbXA57VD|54PfITsMd?-cZ)pQwu`CDv@@@1wA~!hg zw!g%k9^mSW4aiEoe}v}^{GD963qM~KtOLe&O<(>aauf4}1Q@E?^N zH1;_~A@007I@Ma;m~PcJvcEm6$C48>mT}}AdCIHDvh^ABeB)v>7Rg=W!v-m5jid|w zp(4&0x%huB;VC@xmcy>|1!umnIbW>JX-(1Rz3cccd3mgb#i4$CD|Q8`FZ=2>lo21A z(2xg9k!#n^-JK$`QRSTIIZ(^BoC9s-+T*|L%O!X%{_DMat(@Q3-;g8L=DeArWrOkX z{Asi;@}J~U5IX+v2TdLEn=GTeKI2%bP|F3_ab*7%9J>9f9bal6XKOvaw-_JiKxcfq zZuI=;^z($PpAKx8Tae{)0vDNa(f$a=HBdj;{;|oXH5Wrey6vc3@pEXbe=ZWq*@hjX z+ML_P-)x`W%S_G)C&$OJ4wZTk9w9oT3uv=WWHxL|b<9=9>y#57KTu9m$5*|{70)kz zwbTBL9*ni0x|o>$!K%k5E?=#{m)2V|Q}nyPL%tdf+&sXx6Fr`D=SC-dW`@LSqGLmD z*D?Os^pEaleGL}=MVn4s_iyXjZz(77RMLj`CdqwbvJ)A6X8P)s)S}EI-|T?(HYhB>Wo%_+pl3)WR0LxRVnreRVn^6gIGJQD*F{XCg-*v zeWu2<*F#J~0x=1(Io-{j#O)7zLM6bs>AkTt+MiItEx%MRIy&U08;uURnsIkNQLJ^y zN$8N1wf;D{D;fRq`S|5=AJhKMNh-{=EG5g7W!H1N0;J*S`34KfaN;ULd$9akU ztZOr6WIndSzen%`%?bU7p{FM9Q_y$IiS7)22XeXOK_;gh?V#_Fc4GUi*ZvDe{y)n# zHmAQW{nkAG@yuZq<)pt{8$V_*YX*JJ4UB0gbi8xvMM0sX(TuMxI=(h%eE2av2Ob0m z0uKidS4Q#R2M^&%@OtJs4?Kt-e-Y>NtHFcM8qzS=8nR#D?_)i3ch5`VPZfp2#M5?8 zvO*7m2jPo*X`0Vy9v!85Wo0z9GUm1@%(o!h zIKKNb=7ZGN@WzkHoe#fz(dE54{w44JKTDUMe*s+u~yT5=h#iQMJ+8T&<8Pm=$32tL(w_b9yL%Txb zT9?9@>X2Ra_){eZZEABdIQepP{DRY18lOXXDJOf;7hUBX8eh!!zJ1Nmm$+#B zt|XQ6KcVwSZZQ1je?sT`Uo~`|{R!wik2%O%h^6yez>loWGtqfw6b4PFrE`m%f#gq) z=E87tZf#+l)p714$l|X0>OrUGWuFZ{mwnyILn^s3>XA<+_tTm37`Jhrl03$;XWK2O z<}uc^;>u$@hWQgGS)kv$@))P_zARBqy%IY7XYv@=z?&7cD6+ih*)>hMau%Pg<4pU% zkh56p*W?}%bQF@a7+c<2`!qR=7qVB~3Vy^dBsOR9*(W0#jXW1WXK@L0Ytr|CoccX7 zTXgl=8QxnIf;HRVNS6Pvu`iPG;=`oG6{yIFjg#Qw{9v;be43YF|XF^<17 z^in?BZj$!29oXFM;V$E6@3tpMo+tE;+@Z0FeQ-ld@79`M&=gy z${r-}4vQ`zI(OEjo;z!2;X0XfI&;Q9xehuOSoiL2;3SOQSFbz0H<)`h-uP=DAKdbp z;*jj?B6C#%k0#1HXNu9RPs0~S_3i(hMQSNSe?gIBdiSqhyqh~f$R{_IJu)snwut>C zro0-0jxY*u{Y}$^ob3#(bGZLZU|a=^MNbc7VQ)11upT4(u*fK}^9e;|72%~J*SxhX z!(8@8zZXH9%lUrFxW@F+!Hq)0d%aWsgLU`8($ruoW60tj6`MWQPBG_LVl6{G_(OEe zR>3D&Z@N8A$9;40LW74nKcvB19J*=6k1O811tzi{QowC5UWY=z(6X$TTE=u2z>%28w(0SV zSN7k3PJHQd^3sqCEV>T~pN!o<-)FCscqk{1dM72%03eswmm`cYFE$D*jv(WWQA0K~?=y zbU!V9@{b~Wrsd1~J^U}vg*?x5!xtXhyJqfB4Hnba!^~ZL#6#HIb3H(do*!13C zW^k^p2+?QrkM2h=$6Z=cy<8>rn%?{T4Dk_Y_I|!wba&z#AUYGy0`s!bl@(0hlr@O{ zMaOcJtWD?MsXXuuooSk^OARie&aJe!{5*f*3FO+}vfnKp?=M`-_>c3ux1O9YkmIH> z-ug!$E1iR_Q0r&vQ-i(=_1qk{F7E;}2Wj&)+SovznBI0SfM+@V|133FOaE8U|DR&| z^3hk`6*Y0;66W5UF#@;89xbi^%=gSVe$F_2s-@pJCYy2mET-)e)`Xt_a|RFRYIy!- zMw{xM?xsVB_BOA+xqJ6RqVMgaQRmwyq01-fKOSA41zr9--yFJphjIKX>GI=0s0~|I zNz9oFj*Zgg)4Z#Wfk!M|cKrFDqsxQT`y_N(M*l*W&obX$x`;!UyJ&Cu6o27D%4@nz zs%|;L`2Q_t{Qodr(&jAMI1^oNqW`Heef=xx^6Z$lW9hO=!}IiXIp%*rmmeRBPnY)) zzxM&(9J-wQFQ&^Iz@fu0m+`;Qr_J+^^w~=farosWz~i6cmltR_o|#|%l>UV-hn;~g zGigug@@dK&y7adcGXCb6@&7Y)`7PQw6J7TFNp1LgOke*>y1X~0?O3|}1O3bXbUJ>y z;=zBOU*16f@%ZJ7+*3QBZw_5P2_F8H{PLOK8(I{fsXs;kU&6B>1|Cs(Gr z%hi&@$%FpGiynmhO)C@0Z*2vSqn|PMRwoCA)x4c?w8xP%^Po>TTZWM{cOz$h#CX2P z?^Dj=M*h+CBl>fZ1rsRi=p>zUxabs`D7#Rf2_%0+YV%aqRoh586QpYU)26f4(K(bs z&rmA)dK_6&%BbjB*xg=8p9jo6P14@$$OY1djlRN07E85K=iSSgh2EZLOm2E>bH*K| zx8G5Bd9A+)NSE!iTEPyqVNLuU0LyFO3ZC6w!JFDg$??JrNBo(}12 zK5hJ#JcYgOAWsU6kmq{oa0h)|71LMw0PUOPIqc>4}K7Co$>AT ziJtI4UQcSD&s+ySa~;()^|_GS6R6!kY+Gyp-du%H%d#c=7BKKv)mUDbNut`kM;ZM&Y>f;fR*m9XKejMVbK`dVCVtl)# zth-zZx;Xuta;iq*f3>^mI?B4sh;L01`MUz{ve*r!>^#b9xZ+EabGc>wH#VB*JyEzi zWAIb9Ejlh+`rB>Vko+r78&{x)XUwBA*#DyV(uf|d zE+%KnJ@oqlc|RQAB!`x_1f%{t-!jiE`VFJ6%nYH2-8;_c1<=oi$0Uef#CF?v#mR7X z!pSd;_ZNj z&#dn%@W4+y(J4vXRr<9mgfC|ccFYuFgT7_B>Qfj3O1o%j+WPBxBgHR zYKq3z9AeMrz7BM5CZF1{X#1k?6FqV7_*X^8|26Ym9UXscTW!Gf6>W!4Z=MDmWDE)) zmd%Vo^tj^t&`Nx%qkq?91pYF{I`K=3-r?9r{I~wieRs3aHExzW8$|awSUr1xEUeg5 z>(I?>SZz135X{k`OORWB z-mhI?8MQ-5yHeh1_c(MEJAd6SvP6^9?hh|CxTW16(e4$ob3@PBTlRc2x5JcuGCBsq zYj~)!xikOm#-}Sy`(%8ZGH#gGp+~J2Uq8veQuBf4&r2RO&*9+H4eu@QM;4BcHx^)Z~^-2q+t`gC0yCg{3^CSvhYPJD$!i_4oj;zreKFA16}YY4f^xZGw*wWlD-_dfT#*MTI6ikX!n# zOYk9OJDcjXPEmBrp`~j|^TJJ9x2)+I-LkXhz;`bX*z!fQc09M6=QDXq^MHZ<8Pl5c zqvI96AU=ZQSgStqt-wc6&zZFo7mltqo#~qMC~%|cL=|Yd8xJj|xW32YBkWqw#RJi^ zn^!&9$E!a49X_JN(Nn`io08SkpVD{W>4s7**FAKR=#49L(Zhnj9u2oBuX5_xNowjg z>ijdXF1pdccx#z?UjGST{Ts$Bd_SAq0<+mKmblLolY8~R2Xd~yD7Y2=vX}BVy2}8E zFmTuF12_o3{;3NFoMWd}G9NwvYc2}rGxsFqzr6Xxa?MvJmTRh!YiGx_TMiz|ft}{_ zz-}OaR0nqK+XdpASinAoKiArwo`T>Q_S~`1&QFQu8ih~UPbBW*b>rtH`OqZa-(!qd zV#`-fAhw)1-*2&JYQBrCb~a=Bz`aMY7IX0hlDcB=6yCUs`cg*LY|a<(>x%NmYBB(72RQvOiNM=L^OC+6&;veKzpS!bfft z_6S`T9$?D$@7v_}F>}0@F?Mn0a^@oaiGQ4#hvA)9_nSxmew&U*7Iq&OpV-mO55&x4 z2Qq`)gAn+PzXv&OyU1zCL)(!@o_&En9bO`RVyln4mZh$Bqp^EB`<0xT9UgWrW0SoZ zI$N9LTF2SEclNt@ZOEC~*_%Ggxb>Q2Zx&t^aNYUaW@OuUZ1&zN_O!*>YN6!$MizKS z_KuFvnmvPP7`=xOm$pIIkCyK^i=08Mqi4@j1^A`EbPV}u^Pr;cdit#LsII$es~b^3 z={P*jmi^Ay(KGw@9QMUx=;_ESoG;yFrV@7uELcO^^NzXJ&|=om;^-PGG;2uKa*0Px zt!M0W;O((CtEDmR23TX7H@<&Sa1U#DKYdFZx!82a5NjW19;rDh$a>noGks)`^|SUB z{3LItYz8*{bovq7rR1snFY1;?+t+*>ej@a2vo4?oZ8O_Y%sQK`j#_*xqpqwW;m6yk z>wNPg_xNe2pRJbtDwjIOPKsPeoz2vdGT*9Q)SbzmlSO+OVxwcsvX|M&@bc|jI=(Ek z`8;qTwq$#C)K{DJxepn)&-R+t+(4V~=F}6=gR#ew%d5(<@n&foZyo$W{9u~8BhRKs z$5~B3!)TwptMjD2+n~$gv2%cTgrjrNFvo{c_{W!{^AP?aZCkf8zNpPvrOabNB{p_6_7-XOC?M4vyW|jW3D+(Q6LB zdBM|c#_C?97LJ$B_%EPAhWj9Sl_9#HpX0zj&D?h!iUM0e2Hly_s@D| z7wdf|>-{cheUoRDzU%1`cOTe((IrI3F-6C?zxA@(FcFyOHGb{nzL-rXKKCf`xkqb_ z9_XZ5OZT|jWPOygH^f`>GWHVg%#?92b=P4W5>q93XOG=J_u47_Vk$HpoNM}O_eST) z{@t;69C;;;&sHpsa?%WLYR8)Av?z{7{h>I7KajqsA%l53KCM4u-U282CURae;}bus z#WDW=b@1_*^*)-`{FhZu{6w0LpK$6+Tu|(Iwu~_2@QgOkkq*s(D_ z*V1-7cn#~A0h6zyfp-$KS3d&XT*_>Gc_kkrdjxbI8SD>AY)Wk0Ky2LJRN6pqiGM_Y z-Sr^mEBcOJ8ZGDig$$x;^!_QVAM)O!C%hFn^~TxYQN+ z*>{+>^|*mwF7k|q@tX$5weYqfz_=F=vPPXU>SDb{6Wrf{Q`q^QdvS0N^*{Qpt3I$P z;XY;E$K@AmKL0-7dvORnnw;+oFV=WI%sT9%j4kuWkH}`d5-(7yeO-w2$;X$Z)9cG8 z#-PGRU!M)HZDq|&VD3$G!LFYGlvhz!@FZ9k2 zeuS=5=D#*4!OX9ix%`;<2_A*F^!n;w0c}t=s?*s%_b-}8EB(=&3Uxx;bBrH-CUey= zeU>&E+i&hq?1L$NN=(SUs85QfDdYg5srTJ$g8rI}4S-xOGPcy_Vsu!KM90QHB{?F4 z|JDQFkLHQ#)Olha?~VD9I@OdHdJ+GrTJYdQ#&P>k_0kf$8j)pv&1)Q=ob7Y-3i{~> z1Ri$O7e@9*@w>Vt+P`!A_A&28x9h%pMd+Xop4xjK+3K^qk*T*kahgr&9rbT?-=nsq1*_p{)yQ%Z%f%Bt zCNRSfQTE(+WFV16wXA?He)q#l%jv~`GS3a?YI%LT=-r*RTFD(2p`OTl;R)b8Do2TI z*THYWv9$fZ{Li1*L29`VSoRD(24p717@MGLFKyKElyW}Cm0KC;mby)=Z#iY(*g`$W z*Vu_`+$QkZd!`)w=B}D~NAXoH_V?7UA$O9c3O+z*oW!2QdhNZp`$$YaFu6Y{j2=9~ z*`|a3#il3q-0u~>!pnjGhT2PmCy+xc`Tx2X4g5;j4}EjEC!94tCdc1%|8QS}BT=NOYGO7>})lZ6~J&>rl>@JVcd*dfNL zale&6`9Jp}Z1KR*=82xR!sf2KJpK|==G@m#PD?2xaS`&=tR_zI@n}rt1 zUl@-T$rbIUMZMOxvhkeXi!H9)H19scJGZ^<$jZdjxa0Y} zYdqt;LG{&tXgu5PYfb&YXBg98_Y?*^-O~fN>?v&GIna0~#u@MUnDPD}##>hE@ABl|3!Flk1-U%N&@g)zj5?*{Y67bc)Ph;hv&`Ip}5+6}ANbPj|HIC~$LoED* zRkL4tg0HT}mIW{IVu$NMb`c+k!}t(?R;dlU2NPq1oy{iqf=$i^_=>hi+OJ)XjU~NH zVwj>j8|>T@gGGCe>pkG`&&+ewIZ-=zw7leL2#uc~YF?KRe5$s8J|!PEU$NJ91KVWy znb_;(PNmU|&9T?T<2Mekk$RFdB3tq?z|TCiu}G!s+zg`Al6ZhunVaOdd^SQ(4%(Hn zWzeBbS#oXd&inJzgNu2u`OcH+!5sKV z(%IxFA0R&H7=GCTe~DR;y*R1F=m+aOt}H<{7=Av@%I-UPHhq(hv#w0lcEqJh5FPB4Ov!S(OXs$1Ar*5R~cYfVS z)#8ku{keDS*O3?gW#B9R!SUjH-UBug^Ch%79=a{&Oj1W&Pp%c1R_5^pYq;O60GI{X zhkem}!RMruoX8=@LB3auj|g{Mh4&hIMvmy|NS+<~4*2d!RMUUVd7|kSe{UYSP`<>K zKi*g@Ipki9@ky%SJVCn-{>3M0E9b&P(Q=|Ys=1}8H$F)A#5&6VPM?`Gnn!?_Hsok` z+$8(n{%qO}IPF3|PP_YPcN%b)`TC-Hs5>Ypc#t?N_j{Rd;5=iO<}RUy<@8e^cbn%M zya)1C%XP!>pBQlMjyh%SE`HLiS+7}4JwK;eyCM_G+C9!1+z*Y(+Wi7+ z_m%PPwcF<>?vACCIvK26V&1yiS+{$rzlb|~Wq&p6Hnn*<_>;DzOe^cPjWW)9eHVJ- z9J)4ME`AMva`Cq!d&lBx-@77bn;7qQ)~-dMHa?xT@{51DgWtqOWLS1jHGt-~X(sAS&-|M6(ymA3~^`@7UZ z-q3YwEnR1Z)S|p z+t^`vvUMM^#n`;EROUwQTV`DCRk}dl?y5tV6?@wp{N@M@RKau&# zS%^pQ~`*YWP^@+C7ybN7Ml zYn!;|U9so2c(Aa>V8LwUcVv0dD=DzUhE&|B_MoBSy3iO9@S|G_N( zeOtg?F7VCva;M_O-zLA3(0f+%r(@d5CD*^`N2*yPoyDdtA8qB)Rvu^8PH$fRti;Sa zMNd)Ans|!6eG%p7O#OD_GV*%Qv3&dYpQl10=t1t2%pm&}I+?k#5zVfYu#2l?u#x|7dbKcExilpdXk>3E<~T>?YMt>je~fI@zjJegk~@I?EBM-a6@2{* zD)`21S)=%%LC-S=EAIXwmeG|L%rCZQfzRA1eCC^H$2e0~;jb&_0vYS&^b<%gsG-#tG% zzlG-cb=tcdyt{G!oXqXhR*%N})|tV3F0*(V zwG7S^@`JTPH;f6|f5*tXUQ>Ln=T3>|YmJ}i8}WUu_g|!*U2dT#sz;|qu0>xxe$?>afX503 zm}B7h>`FWFtSN!x-b(3H*45Jj&-|jE^7D!~Q=5A+%EtT9eWo{m2YF~akK7MaHlBM8!DIeu@R&2v;IZ~X^ZZ5j zsk|mn@F@KX9_e$%*5N@73*Y&{dT3qnsChEHI5Lao+u^_ z-|_b6$1f&;yR9jORhM~szA*wnl)1${ZN=DkKJAh9y7q(HihIKH`#5+l_JG%sLDuft z(N7ik_@JNiD|uJyF>Cd}Btu*9;=e&(ukrnLuL^$ZG<>+|Yy*?6=bC3#9M~NGJ+OKG zDg&F>2Lqdt1~%tj5S;T*z$WK>1Cu=$n&*-MV6#TUCbh8YyB-6RMvsBTb)KHetpba* zpoYQs`TvAS?QR(PhEFD~$bYFFxm)K`o0c28ZMMeWBOj@SO{S zhvBz>pg&E2DHjF9Yr1rwch_>$Bt&{rT-s)~WE@^6$=WskaAMr?1Cf zr`#84*6CLA9?Lr2>TNks_-$=^Fu)uGx6L#B_O|5Sby~_=8i=RvPBJhaKhiv3MVBXK z-TVHP8rHNcYf-l==cK;;M%|09LKeId`H*};driI|C%$IS&c67X0^)0a$ejU8Zz^t) zT)RsSa33!^yQQ~yTGA`?x@*a2wX|`z{ThrHO<|Wie*J$VYg)*fcC}tjLl*Lbg^M;UT`_IP&Q~b zv>^43jpE$9(V-tN?g<>u=(SsnD}Lqa`N0aYIj057_7s0&+sotd zp`GxjCgD%4$#T|X*)yNK{&MP+eT})+N*y!**BD3H-7^1Cz4LG9o$do3SPuUyEBy!h zpibF&v^T!5kLRV2bm@aT^rHR#l6TB!`%cd#!4>SGXflJ1!QlE zw)u?o^Hbh=hV}Ndjd#nhqwLpsU)xt!;9qt(?+z#Tmfgy`7074h$Yy1w|G-$NQ--_~ z5P2s$maj|uSuz%1Z~JR_r{}z9dT<4DZ#i;r*>O2HelpwJa*>ri-1QXg<8JBW|M-FR zpx!f(ar0pfNUF_{``zGEQU3oEUw1{)z-lk*A6H{Z? zY%aFXF#1zR_bonS_>~;2)93!QK3}&?Y~-B%MDMc`nwIs`!FoBlvafwv)AIKx`Z?3N zRQhWl>y=#nv~)R=2_?iT(MX0p+lS# z)d`<2{;Ci@y+-uH_dTq-yU7JK zj{5g_$L{a>c=q~S&iuZU4;-wYu6A~QT)bZV#d5hjA_APoO#VjWhhn>mJaDjS8fR1P zlmF)OMD5G!p$+NN9-i5d$vy5LA|H~s3O+Wv`2>BKy9zU!v%%;6QQOOE>Wb~$iTe_L zTY&L7I%$aoTa0g@_$bI+r2P*$>ju1|_h%QWqg~9w;$HVBrWlw?U!t#^NFVsu5(kIQ z#|K>?n}e4EX+!M(E19GC302{vT0Ih1e@8X`XI0pW-j1wZS8aXv#Ilt2jlW^v`k^%}`B7Khzk7!_>-y+x zL`N?^+Tq#QZC0GUATLFwDgPv!!m9c+pWcK>HC~bt! zQ#*Z;KHBJnzOuc(E*~_OI=Q~_eehfN@dF3z&Lw9%_oEC4&!WE-8WDP&IectG!SK|E z$Du!?r!v0&LQ9qJs?hRbs^@p_N6_(RG@rODal;eLTVQpUr=c<4+-Eb+oO`nuw7;r? z6&89k@Nop($efbF$+B}x=1R_f!O6u@oMa15@MCl1q!s!*9ZsCOf)ibzbC2M}8Qb^y zKa76vL-ET*H#Uwt?7qQyQ2fqsgf72{UTO4yUPx|6#sw_YU~|8g@4cdK-%-oNl>EE# zKm1_4=%dGUDemnFGY`o#DRWv1y~+Gy=k;mkbv5nExZ2U(x#v|)99aK(HTI)5r>--v zc4$N9rR|i!^zdF4d_|c%qZb_Xhs6KR&hZz@`cyvueVbXMwJ#W-GT|4k)W@&ApZ~nb zN^*u9FS63F&|gsgLuevA23tc#U*6zNtjSL5%l)#Fr|f0QN;!p{a4>EBS=$B2HYY(V z0mh-oUxx2gPax`dE;%C6@#^}V#Si3cHn9nNX-E1>8|v@5KHB#&WIxk?Q791gt(X4u zSZhxIJ6-*Y4`0buChko7#x5midT?XfEsD0g0{^V?Crk{Otb2ilE{~2{%1fR#XCA+Y zzTNXUhF_3-E+x>Uj785S>es!NF$wI2&WaeTPsSCEhjWk1DU$;{vZDCWK6&55kMvE+ z?T77~Gspe3_eJn6{Wv(fhj+)`_T&4U+BJ^-QSdJR`@13w%eXtmR?J`TrN*CHz7=ui zFO9B;B5y-ug{%i?wj{yO?8VrrQae{3JO-T#J=|5xy(_icX{-7EqHfz-{*3S@Kkbwe zm!Obg%RH*5=T7d|iMC}?za9D)K0g*a(h9woq&2@onKPXqgr22ur~VM?PbKbGXgvWu z*o~2mvhV-p&i+0_GFQ3lCXZ(UPw|bfowB~Mm~%i5`-bB03)F}13+(%|M98J`9p@{BdY-;xiXp9`O#1fO4IB`-S+F4y3fAbR>d z1xa{2bnk9)C@ezMmvzO%ivHMEQI@uV@ihaZBJp9BxRoqa8tHok5pCzYfPZx}{jv#eoV)!4v>0N0CH*Ke_x9iq?8 z;ByoFy4AYo#3LznjVW>mhBYF2xU23v-W2vV>ccxEH@}p(tw$Pv!~e0eot($gfCWxP zaxR{4CzdqZKK@ep%PDF0o&2|h;>YV_f1AafBEMyE1O}iQv4>s&#g1a>I~n6?`jFv+nQSWOQ9;cR!k# z(ES4b(OaNTaH08}FV)v@gfdBqo^H`Q?&jOeK2=bcp4QL_JdOa*^P%WYui1i@ngtovRC1Qd&(Jo zt&LxxtyE98++BdJDbH3TZ7XHuoP;qK3r*Oms?zdXL2F1tx7OwCsh(N3r-r}UnN^pm zmT(d=Iy;L`mYt_g1`eZN-kKQF?^<{_O`WW|Q_J)RY)jvJVlTuOWW(W0#pX0EaNZp4 zcd!JyDIXv8J3w|6pP|59)#5(4Fn)a>gV-s>&tOsoe4nzydyq|a`yF1T*W~kj6I$)? zve$kTDbU}sub_iJkhdaD=R)@7c|-hOn}?ms_dr5O{K~{$gzW_%vP*+oRvDSt^ELCV z&C=(MxVcisG5#Xz<+NWKd=Vch-G;g}xQ%hh7=FPRwH}|fa%#>Le45!G4x#^jihFOj zq5nk(to6V9;mxsn{7%|(^f*hP87EJQ=y4>!tmLkd+|%y-=3++`KQGbAzK%{7I5mIo zrN~C%3x4vVglBQLJ$H^ho~0_QZ-T$WmwWdo%F9{L;9@rI7ITIy;?4j|-0eR~ao}PV=gXFSCwu2#@@9cZ^amRn=e1WyDc8*96iQ62eH#e|MzYFc}Vsk z#dE+r2OXVSy$ z0qm_QOTVmIEOfC0mH(t`zUjr&ta)O#Kk1rZ&zg65YAoOESZMfV@y+I0klDvKeOvx5 zk3D4``iY9w$XmLNXx^q?-dhdt?fF>y?~cDFqMs9-Hi+Tyr4{M80AHF_S;pB~bQ^bU zjBG3ehp)e7_)r6MD*LF=U+lUMNPZ#fl7ET^>AG?J=xgXNmIvL>yV$k(|6(k+#2L$t zF=ILQmH&S{sON=KdC-$@4$Omo#aP`u==ra(?+XsaUqUIJ<4f@DymRxQ|D^8ec~GB^ z+z0SP&4ZvDIb%Od?sDB`M}h7q+b_DfjOO*g@-Vzga4Y`IGuTI4J*}6PB~@R#6B-N@ z6CZB)*IN70S^fMN^~fwf02kDY9c(zqw>KCo&hsUH3AVurrp#G4M_7%u&AC#vC~(6_$HE zg?YdU+iD@YzrsMVr_kZ4-=>{dp4yI1BA&n4L*uczosOTvi$6I(jqeNS@KX=+Qh$CL zzx*fVrwb39hM$tt>67x)1^6M|MeJ1|8uyO;xLxijT>PTpgNM&Ce6S-!Z9snBUiS>~ zQ@e|MN{FZLBnBY1-j?4)`R4>v?;!Q0j_>EiJ@&q0WCjz%EPK75^|TsalTn+~f*t$B zAJx$IQ=DPt`wYJSiT6&t;~wzHIk8{7qwrvf6CC;r?DsKqDB5qvvgd3wmhI88jAAUQ zj784&t?YAqqJ5X)_sss=3p3dJ706TC&qnTc;#S&X z;#M-Q&e3rMW!Na->p|g}5}%dJ`P$A*T!4Q$?aJdkiF2L8J6hDVVeI`}lCx^f9K_AZlTVvLQnp9odLjSVo+vqn$x1p5U zrj)hK9>+Wd|2>zVROhIZHfJY$++af!DOa3SeswatPPN#r3A|4+@6&j{miM_QLmAG0J*=@a&`*42l|25#L7dq!^@yVyx* zDb-cY+H~%ev*Ag3{J}4}s+r#`Ym|vwEe4IVw7xxcZaLVtcUz#!5GbmraS9k_DyoMb!V$mP|0>sC_ok4C7V*u|ezYHCFJmMw(iy+72RF0=``EELd!P?k&W(ZnZ<5qIZ+Dpa9^ik67Gv8u<@W&dJvE z^Nbw$zHJe=3Qm`@R^)6V`xmUL*|kFvbUmrqRI8(J9%ivtVC<_n{vL1{{M*lf7HCc!_}v>lT_oK zzi7Uys3Q(E?<_TPKYcE<@`6K7Mn>ELEj;2I+E7iOvuZpQvpKieW3lVF`kVEDN7rpS zE9QNXr(!Q~wa<=uf2pU!=6zkvd-tB##u&t&!&)n}%R@6p0kd2sFdNm?938)VFO|K7 z9F7}_fvRZtuJ318b=tD$w9in;bOP({MgN7o)!(*|COp@J-V=W0?0K&+j#u#AZ1VUE zf48)0M#muR_MSA2yDs1^e!$!@wy_;ubdx6opCJ`|+@o~vkj21i9JqD%9eCRjE=RK99qZGIUAjb)%-`sd(#tnL9walPE_~q@{H~p&l>+v z#$U4RKWB`+)W6R5%J|22T|?a(V(?@ROPGU7!e-65Hf6vIpubLhE}o~oN3P8au7Um< z%I}{s*O%PjhZX`~%32UQ+dtz&c$007NtXZZTeB8?(%;RL_v!LU%{zfwmh~w6J$J=`JJhj_^)w2OoXKLbi{eQ$lPfg4dg( zcs-1*z>}pGI(Wrbs&o^y9p%x7hO+0sHazC@sc5FZA$NVulsI<3bNr=?*=N zGW2kVLl1ZL(nH&Ah8`pi)V9)l>EV<9cIe@_*P(~>$e0cWCW)<{jK zbEEw>F@~{}6S_O}lD8rUf1>Q4v;W{9ve=Tu1QdVQuoI(+zj zb?G1B;hQ-l%6#i6FZcBHE1%fE{H^r6`QymcFlUqisWH)+=I^f@PeYyBJ#Yog7-R) zXFqu+UkuaEEc}MP08QkcEIKIq=lq_kbtMT&4fAXjd=vh6N5`Ujzsy-{o3HhzZR~w7 z@O**i9Ua;Ce)$S@?<3R~{V}|+3;2nzS7jb$@+jjwSKXU!;VX^r(P3ixN*H73Au~of zmzB_m#QNm`i!H}j{aADl9{&H2fYE_)wy!8B&O48Ll{&$h+yyFkeoi0HI0cXAFE z{9VgEx`Lbcfp0ZBIMJbm6S}99E4r}i7F8&|PI6zAiLXC3*Pg`U5A;oNFLyjGg?45! zra6qqvV8lU@is7C`5$eyTOeKdPUJ3f?~PvC2e?&FYdU-AN+I_@*=btICFa8L)@AG8r4-Kk+>PdXk zZG5i9mhY>nZ#?or)icTJopssZe&*zc#`~eoqh8hXos{nX=%(N5533ITgt3dC?5p^6 z9%XHhPDp966UH{66Ydee|3zMJS1YtIc5qt5k`qZP(!k53N5d_QZActJ?>*fAWmoA7Flx7w-;e{jLPN z;U@2>t}@oBl3urR<>^ zb@U15^h4I~qpaVj-@1R@^F37umsvv^hVVDMNChwA_lQ9$4I?-cj6lbC)H|$MeXVL+ z`$My~y^UUe1a_dw#U_{fE_@$1Grv2~yFAMJm3iRjzVH#|E_fDPsYKs?iJK7p{)@m_ zc+3)XC)Iot+;s4Mzoyx-XWZkirrf`M9XrD6Q*-&wO7Op{fXV6Bt?U5O1pKQq)oQc+CR^KJdU6;eoy@)N{iBYWnzJOF#aHt|}h?yXQ;)IRATqdvzWDC;P7O zKikFs&V{D_Cp@l{F*!W$2LAt7@i_lc!{ffM+y58vxDQtK<8jOS@wj4mTyY(bj7^5$9beVU?>^G=@8fqL@;jE_9p~LZ{O&rw_2+lR zef-WoGrv1T{ZGvAj<0g@yW&&%9XROacgR_B`Q45<{4N=Om)sv#1M|D$D8GAP-Sfv* z_2+l|9+7kgem6db-=W)kl=T{y-}(CZ-Q&P19>4oK?}hiBNv0n`xzxuZ8*|=Yb#O0n zz9Iw0=JCxHJC!v{=VU}Tw{x7tD63h)no484+Zy$u{V}<7b=i)~g4v14<|hx?P2_oX z{6U<2Sh{}uWx+q+hW}P1QXprTCgfr+)_7;bp38!fMdTD%1TQ+s9q{Ce2uL1=3#JCk z$)ggUs6y+vVjJRJ&Yuf{Tal$-UEM#%U--fjV%)2<1{wdd7v9oO*;oIIKC8#$W6U#l zT=D$rQ_EB^ZM47VT4G=V_+iTVU*e7B44NF>b0s#io!{w;)r+D>ERRH{shV55AE%FQ z_QtX-f6t1uRZo!Li^=0Aa@}Pfd?@f0M~2sVqzbms&ujM`l=H;~@^|d?SVzaP*VNU1 z{yFaMnH+#;*TF+`X}cP^Y!z)Cr)_D&qMnC(B43yB&4aycljJdGzHa|;ja&A$cz3Mk z-fZ#|<|i1LQsmn35NwLc%AUZ3(~fO|8~^3q#JrbY#}|k&`x}k9n1L%w09pPw{b6@;?X(b)4H6^8EyZg zkCocgDIvC?jk52wo{C4#y}j^r@Hin39z{;(Y_{+ivb3D9LkN3&ZGjheTZia3@9!o*xNAGsj zrkw8c&xR()Ll5szb|6@vjz;2*MPT3~epBG&z~J}MvAAho=ATEorWYP9t^3TDJMDL@ z=QRKPcT|3f#8D=wZJMSZMfU*Q1{&WkkFRUzl={XE;-lrMUKftM{jQ85oP*y8cQifk#oLDy(Z2C&Rqv{ zZ;jq{@Px#C^uO!i6~4(`2W3litOL1g;ic)2xRm!ie;mJ?C>b~tA(X8tOZ_&M-9d8{u{K3T!=euY_@?yx|a2P%<^z#wt z4UEJ+kMaJWqC@&~)8RB%Up8&J*XdGV9e16Qm$sLGo@3VOR(RXRvZukr>~o=4xgP=t zNhb%Yrdx8LO6;UVXG7`FO)vlFiv!b((2|>8Rse@jLN8Cy=9%cF<2!~1Ew6wZYk=URv$slum2@C3$J|dtm=O?+4-oZo2uh(9IB& zkI$}Fg#*=*FcBQ+L$pJ@-0f9l1Y5_U=R2 zGr|)$Wo3KQyHw7mo5IM7a*nlkY;4@{%*MtH;`pXBht9wH3v;=zN2Po@uYVhp7M>xJz9^1J(V`#Z3Wfn?zG(kf4^dQos8e;3;SZX9DAzUb}V{{ zGsVtsMF$ZtcGk!HdVHz!;8XSZ9*SOp7^cb9^dA#D%Qs?5#n-N;W~QpCp6OH7>wLsi zWK3x*PlmYSY?I@}*}mqAvwe*?+tI|?Dl0gYIhr`zvClo4Xg_yKob6~|1Mx~DlM)lq z^_V!@#|Nu|pRhMg=Y2Qy_N}b62BDqN*eN6@eKoSWjcw35XNX=|Vt$7_kUnOT-pht0 z&0#OQ0DNb&mu-P=a|OSvTW4CMcF)X7shK$r9IMrPX4=G@vo3nZ)zr?+t^(JRJJ02R zXGdBKOFoKBv!4*3FMrR~Ur5~WS70qTl)X{*Mf61)f19oA!C$@J7x%F*UPxTDGY;7o zWh|}WU+^b*%L8xNc`~-Ve&0b^3vv!}&Uo(J@iz>K6KDJ{gm=6?nR_72_$BwO7dw3d zz8Z=6hz!CvcQF3dcIc$ngMW1*v3?1}`g!rWF!v6~+~@~CZ|v{rCXo+%V~{2Om~+|` zi9OE5H}?c(Zyavok5w!5)C%utg^p(8=NyecR^q4IZUybli6Q-t_+wRoKkipm%Wr-W z*(ka$C*GKR{tL^;`+F`CAJDl`pK0Qat5(^e5{WHZQLOJ;k@;7x@`dy`txxH3&f$Ar zOELKb+3y&4C;T<{(WLJ3CBnN#H#dMcv3VIeXms;MKaXq-$HXJHM&pH=Breev>k}w8 zv5&|(r^G&fpYu<=*r7$gGqI0#tj$2NjA2yskM=lz!2NUGpZL<{pabgupVTH4(>}-JNIF0l6V=(;naz)H3_}yAz|rNbJ@Cp}(|aama_b`- z&*9HcS$WrguUE}(I=2kuUo!A?FJ6B#g<^LjI~ zOLA4xn0#cBImiH$kfD;xm!TUUy^L6ug?6T|LDe9Or0kyAv3~VTxs%1(B0Mj>`L|_> z8{~d?&fKL{$QFLeW0y?HvxZ1cnBKPD^bT#7_BI1E(W{E=+P<0?5#-kJ1Z>_Ps}N_$ z9(cxlALmr7BJ^$1ef$^g3!iWIRA1UgUi~mO-|!-%OFe@Aa~3ka=u+?X*!#WEM=xz) zqicgVY1(L-7~IPEz6g$3qp2ZutLO|1JNJ60h#akThG|B3Cp;pXHaoemu#5h4b~NGvX%k*Dk;=`&7rK1zI?z|`tosix-a3$J5TA4y90H9PZe=@&Boro z3Z;JZZbT0-^|8nJM)bdP(B{rT@g*oi4pWqudk8fTCl=xb;3jhk|2?90GQZ+C_pW4| zTzNHXYaVNCE^BQLYp#U7>?-y$ujYBeKkUm{6D57|VgH*v?@7jWI-d6t-(z{+2fX`# z$@5m8VT=dkj8Wk49^=xo#MIs#67aqHUB(vlMc((u-J#RGje2@dK{j&aIVX0{{XUny zMsRgHpWV~NExTbZ`<9cpM$3%Ii&@BvBJ15Zu)Mfd^3}x1i))ojUYz~2$i|Oo!;u%4 zpkFCxZbYCzJF??}ylu#i?EAg4<4|PBTa4Uh z?j0YE4@w62tE4x-25)>F-iVB(aZ=do$aQ`GHL>?V7@1D&vsqfETfE4~bO+8L)5*Ot zBE$9hMVnZ0@wt&$@t47`k!vLeDYN-LWK7P6mG$J=uw60Yb>!BlD~8{pA4ZPd9-G7C z{hzw!Sk7X|Nn#(`_FHqerT83$J=htFvBSXo>VYx(gu;5_%lGjExtd`c2~84V<)IqT1S~hy!H5YF0yQXWHnsvhU}- zI?gK09w+A^6Eo9_?Bmq$?iM}S?xszPj1EfZTmGEyW-ld%5W8m^cF&>|&R9;D{Z|hb4`}MqL z`d#ko_a0Zj?zUp>q$2Z3Ue%rWdB~YTu|M8SU9*PmpuJFqQqeb~?*gv<^T#S^BHnv} zYh<3U@&_EXUvt{bHbTMXjO8^4yKRxNvdZ(^s+bFd}nJUCJ7 zyCv3a+#pqO_h0?&;Tc_i>`s#RR@<9vCu&*c74&@atqfhrG0x5}(~j)-(3@`0kzb^( zYUZimb2gUu@~-{igwU?lW4rZx^mopCIV0@keRwtU$OIMK@~V063Lktz!}~ z#n7m2alR%0;4IE`<9xI4YexocpXdiybAWpZ@V|<^_)2Vy=G@`X<(szv|C5n|vZ?)P zY3oadmOA+El>Hn3|BX4k2Mmy>fQ1$OH1F!hVVha4D&GSZ?Z7Apn(e@j6H8<7p;LGt zor2JsrmK!btxK@+gA%^=#Pw>!v!5~d5;W}*yHwh#w(r@e-WTS`?slurja5*Gp12;n ztjKNFjtz}<=%e${#d^@in*FjbmxSm_KG>;Jt7}#8Sae>A%qd*u$V}$GKk*ZlJO8A9 z*G}XhzH2!MJU=2ZSY_nJHOP;SyqL=wpaz;OlRIWr0rx8yxGg`W{XV;~w0|(#ejEB> zN2epYFGsJ_PM=?)jdSEfHc=;LNuKNd^`H7>b zW6&S4jsm=sH6VOE{DdlWaM#A#YGZz~CJMma%cY*s@4?&Mnn(2aLwEeoMOWe0Pl#@b z`>~pLMJ{FSo!yPyu_Zzu#PRp&{x0FS=p4`w(C>D;C|U2%@cf>pC-ARBt``~Jf)_;h zr_y!Lpcj)h?=$)YfvK#?jtR1+Q=3~qFLIL#UP`&9_x?JgNyFKk(VcVo=GC9?PS-H* z|Nc~(R?VsSTPy_EE5N<9zsrMv9_IqFMacgO*?RM1i@J9Kmum8S+KRD7`Goc@fj*aW z<~X6pb*jI||7M?miPPurKR=KS#+;G!I5Rjl^AhI9`Dc4h!c5Jl#b!PM`hEP5{?IP$ z9=rKI5jaGBGq%r7oEg_QL+sd2oic0`%ULrU$R&I|=MJ0tB_6-AVfj zKDSwLEil#iHnZ9|czNVgwXC%~|h9Z;s_B{nz{N7?bc5Lx;xps*hw&NbfpfyWzr&T`f=Fz&JMH)9ZZx$Qb-(C30)IvF@m-sBJ8qS z-VI%%?bBtP`|S>YH}|G>+pV>wfeTfod;W@e=1j&cG7kFcwW?#!rMiAgVX&+g9d}|e zJ{g{EDid4xXl&g(elb&R{~GU&uZBu2;=8pi`%SCR-EJ7###XRBud*;0z=t3CSjU6p zRxp-3RW~_1`;`|uJGs(}Uf0BBh2hgaMSdsvbbFLf3x5`VY;g{Wto9;%C9(G>BLkr4 zS~(Qit54T;9dd)nOrm4i0UWh#QR4|_Tr0Gd-uxrp{SLiqn0+3@x&-`6`2H>CjJ%)PIcJUb|Kg0&qH*;3nZZ2pLti~%$%j3e zy&QRmvFSc!Y+rSatq7YIb6J}cxQOp*n)m$W$BctL$nyC^e&AKF*MSOtdpl=F8JkM) z^0ROJo-vF+t~M+HuQHx-l-Bz|$teMfQ znM~G97I}vTp&#~e-ZuMRiO3--v(f32KhB$}^Mi}6ZZ!R4&`%NjfdyU8qTkI*?fhVf znmUgDzRvGz{4s6xk@gMKx~t(Q-&kekg=eD|WSxjiBX$sY%mMp>;*;Uw>f{Vt6=WgX zgpu3FP)2mJH(~QwgB-+}{u35{rhjHnl(P?Z+n&R;Z`Mz` zmV@fZOO%KH)T+#0Kpi07bMSQ8Gomz#UoJ5t@p6`r)5* zp>JGK9`j4d0iMKwE-MF~Vm~!7^)}=8yD>ZgojUjvd0OzRz^~-15`DA83mO<3*`*G6 zMSV2p=K-^{Yw{Wd2HA>pFZ|&E7#{Pv-j&%CTpx9L5^(2IY=oIlsv|5IX1& zhcVsbl)p#Tp+#)K+)pWeS4x<%bwVr98*>?*l*L>YfWu9UPhgS>Ot|czA50FC=lI9y zq?(Ww{-$A*5#={>pYdpOR_DG|@hiWJ_TiJ$e@t$Bxi6%Vb`7jDHLM%M)q_%^0C z7xTa9&pyY!sGSqIcLX|4x~Q%Z8%9GqabBIDOJLu%+-28&m$Ei~*GDE^u5F>Y=T8|pqmc_+2Gs9o- zw*+2E$g6q=c?F-7aHQ;DfE?vzFV)V>rQM&xH;%Da4m7W%e_&qMP}T!OOpw$y z2N=!+hUJ#8VG;f{b0*g}N=!nCGaNjraM2EAM9vjSoHa7Atz5@=@S!<+K-f3kNRPxPnR z^)ikpiuw0|@0#EUM@B$~|gWXes3yn6R_<$GOxd}VtJZo{?wuz_^i|4H{%G6(CGO4R zqpq+0|IcS8fzM3X0ttbjCIpuRT#6!)R?Q?>O~9pM&}v%~;A@)@tHrvZViHo@gkU9t zTkzWe_O_X6=~r5)q__97*n2NaU65*Tmsks_j#Xl-e-HC_xGC`z4rCI3H@y6+rBpSDfR{1ESaPy7+H4+4wiQ1eR}+DEe7D!RNu=f>ba2-`NEb$P?uQS8B+VA~h?Q}8vv z!jXg~f z`R-V+k2p_x7Ig^v#(I5rtoH>k)?4oI8*+WbR7BSbej+jnO~W@=B63uEavvqA6(#?p)Nm+{}T2bV(UPP>ke5+B%rdkL&Dp_W4Q)_btO zQaR)MO?)9~#**2}twv_Bch1^n)y>?KK`kU}@>A*ivbHxGKL|yA*!8)csj;vc__Gt8 zYtC3kMaTtkaoOf<2+g1PeyVw^VOmq1yyeLP0z_9`gx|2QDHGHG1qznbGn}HRb zNoRQV;DY=(^ z_DHzTdhr>r2IBUc5>p0xr|+}E+f>?lbBW*9O$M_Lv>uC1`J?TdPm62${s!!Sm2&$a?iIH#@B`{X_z(_ zKN`N$Fn+Ymzi+#|wWjK_obQEl*~heSE;JperVc6oro3e83BsSs6QSPA>(q98ZRXYs zPpUPlb?YO})R9d7KDbxBL4HftFXqFF-1HS=X z=gr{#$_1BBZ;`3KqLA>7ONe=*-F#A*J*x5lU1?vK(>|8s`^T^tHqXXl3_%-KQaOuC;p zXHDqAz2_{|p0m<{<_v%N@w@u>lRNnxCVFAF-MPHmS6(wNN_?te4D%Od4*~k!Ylh~} zUvnAp%-js-Pcmuy5^AC{e;LdlYf!P~Ip>WF8hgmQ}#jA_=`jFDvp#GCxLMzJNkTG_jhVT|cm1Dx8&>d=B+ z|KJJv`rg9M_&anph(6EJ8tFbCLiXKe(}nGu>_r#(z}VM+ef+MQ(8(RYHG0OyHVo%I z$oa+idfYavk%g+?w;jC1)6LB;-IVhETkeCmuOV)MC$F>jJ|O6%dp=cX zsElzn;a4(~soObUxaVAI#(f^RZL6{Kai;C#9D;qroihUWb054s@fLITcjoDB@beD% z`MGd&31=Gfp8Q^_wcbVzI{8lT`j~yB;NKfJwqsba0M-b{fpPMDwYU zueoxlM^*(Qlc4(}Ukin|uQeLd*_%^KjWNlr4C?gZ-z?5&{>J|mos+T2ho6bhGdj;` z__Zy^WQ#a^4c_B^M#a?WZM+{aQ?PlB+?m*TGuI?F9Hcf{J8OybH0_rpJ23vi_sn6UxPi0 zvT4u4ZzlWPUI(g&h-M?%KQN;yZ`=M=yB+FuE={N2+;FZz+q2Ym0@watBfcLC?Kn2x z0rHt&{=4mSZfWMMO5OYAe#1T&-RRR^w%>xj(&{bNBYe~?)<|mdLQCRl$r9ylWdgUo zm*XY&h4{^NrEeJ*wZ4>aoY=ym*hTDVvinbotfkM@!;C3?WySo@`2G`mAD#BV1Z0nk z$5zfg7Y#Xk!bV4q%fARM6~cp~*M!1p#1X^?|2fFGRD09y98vJ#?0rjvhEgMmZQ$a0 zeEd4UliYzq{7}RJW`BZ>`6fO&)mqm5wiAP=s{V$aQC=xH1_wsNMMwIPf8eMU-7a4K zDR)opG~XHxUHV1fjfe9AU{tZ8?$73kjSB6?pSD}Nk=6^@>e=$AcsT^hEuP`*x%!0oKYGy%M!WP{;$(uI zd#%)+_8GOa_wR$2!0UG8b{laG(Z^3e>YqOP;$HL7%6suNqjgkzs&r9`T0VTJ z{zN+|kr4F$5$_d4vsJe2bk>L4y?J+O;vTI5Zd)Pd={)XB=RIkz=?8o?fRBd$?4a-8 z*grjaZRexosNVQao|CTo(e?O29!kV!&|X(Z=J&?!S+>rpSf%t5^BwT_lB<_YLN6JN zUIKpiGu8&yiS`uj#f6nMg>882! z@*Vwcl+kdB3zx0GxPA1Uzvi0TPQvFJaP8Suo=sH)Zu;ihXe~@xO&wMPf0voEE1O(N z$<}*rGQz|Sz+vy4e%T-T%(=1mIY6UIMH{0cmodNlz<0dAP0t(PS>L^K)q2r-MpSy@ z-z5`;Yhw4U=!Z6LUA;}X6+bw9ORbn?wx9kaS#mmk&%mG8TvKuq&a}RY_Pq6e8u${- z=>v?_Jk40+?QYi>&Yt`s4c7O}=(M*yzu3TeldQSj&rj;}{E^vx@V~{u|6hT*U;Hya znQs{>vT-z~zOZHgnA*oCzRk0Za5l7{oLS}Gixxx&edkkkkWFY-c@X`@cDy)1e=;rm zc7fUcz6 zVAa)FLVPiRZd^{zU|4%{O(XZsM?x#`i`X^IRTD4^d9~3ude<2AB&})Q^J93Ha~ZzOKHyZx~F%knu-9ttepv1$r&!@5B!^_&B- zOKrd53v_Io>g+SvO&dp$*DHf5yPgETU|j#H^uN!SVxJ*Bk2(5xp2culU&`9(!Sg)6 zG{IBNyA9m`NNb^u^U;yt@V~!#(OTc%T!@zR?i$*9gm*nW?W1<^cG~^sZ1M$-=!>G4 zd-=c5Z|*vq-*@v{dw{%sKNh*Pm4>ggOnEOOw)}Ykai}6QFe_)V@5Y%AWQ;6fo$sLD zcGDT5Lv8-r^z)G&%UCCuVHfoHhe>W>#Um>H>BOENxqB~3ktQqk}PLlYN=x+-{3*+uYf-5?sRH%&g9vd-?*Y7&}nQ6sm`2Pb$OZcz^`@o zhJ@@2^lkvHYF?+E1basxrrVAGNi2CB_*F3+K>)tyB@pGW~9q;vL zo7nJhzwf=@{M1PC@;Ge!&`h-bIq0_3P2Sq9wbPJIX(To*{|Yf*_6oIDQ2#=p_Q z`OWTqJ-_rTi7Edt`1s+U87t41-_?~zk~bRT{~t2t7GMdS29}G#%jwRhkRFPxUB0s5 z+=i!JS*txBV~};5sIRU$xA&V&cIqA3^2W7kmN2*XKx@K>_YQpB6{TMG<4m__5mYfpwuf$gh=yeN#)!jatam_7U#gM0;;OWi%*vYaBIUtP#@9$?q6JdlT4m%(Gv8pEZgxIXb(u2Sa+h?thJY z8?n#X8<6}Ya*dh-6E&B&|JbSVVUL--zHIDg`KAhJFPE_@-fID);LF8s%}$ixDG+%U z_`Gr8GqJ~4POgIV=2F%wwY?WP*4KC5_q?J0auWX*A!mubM=4(>Uj)9_oT;y$!W)t~ zWlL1w*|2jBces5y{+rau5MY)ZJAyCQyH>z^HvaLW3IC(mpa0z~#SlkDG9ELhm-*~i z;quJRZFUYO^zmWs(FFO}TJS^U#xhu^!3Q>HORUltR_vfqHW~YuEb`W5j~+hHnFsN! zp{^3@G_|4omos;H<=MbPehK|Q2Oi|J zn0p5FL4MOteBA^uAMd-L#y05vW7o4+Kx?8o zs=gN7^!Mys{QsKI35@Bb>%b*rcmukKlWDJ6)#r5QrXk~A%UFLUZFS4G*Nm!jx^vU! zB{;GzlUfUXpH&Wvm%F0cc{6~|9?O|#LmmA$pO=SSs$8|RHrdxFbDi~~?Wg2H5<_Yu zMx;G5Hs40c9i3t9exCaAZ_UT2Faw+;L$oj2%RTXYC}wg`4ZKs=GwHwnUABA}r^s(R zUF|!3dMDK~v9HDDw~hE!pW1h<$>mum*S-Um*Q^@~y5tit^(Cwmjh2y%WnW*|ljCN? zoPVKB!}#G|%h^bd+iafW`M$OAwlfdXZA1tAkXPSc4^1nVZ5TC?e4$0Pqb6d0TFxkHxR$o6+_$YOP(nE|%uS%rrM;{Fm?B%SYAuU~l1X+Q&TkLin-q|7DjJ)Bk_`zPyf zUhNAfH^M-sCD5jL*r0~Z7HFXve~v}2Oxb;B-DsI-jx6(KTv8VFU6RAIJ~J>=_QSuk zW^5ss*~uH}?X%4`;6wV)wuqxy2DLy~#~4#KzXelYx-ocuHS6fhI}G+VvoD+9f4cT$ zdW-oJh=B)*$s`e*8A6O^D6ty$ggrEzc&EQXb%^8#j54>%PgPvm#z}X4BRzU0_R{v- zoEjNBSzm}J+cnji>2ti!fk^nG9do#n{tpL3EjLm(?jvAw$)m(MX|m11n(wSnIKlKXr+@+dReswdvL z%Y5(QJsW|!i?~ud=aIFKGFlFBzp|8g4zNW1V<$Rs_%V_1@qRU9&LrQFT$Y8U|3xbqYt`HE5&w zm%yyJzRinq@w+vg6)Jt@#6#CZ7uP`_*Fq=PKrge1?X#cGpJ3-RO<~UoIcP0W#-h9d za5wv@0`h@AczBQSlUsg&G;c65Z}guWV!}Dh$rstnqrIf^?PW3cuy1VrQpR0)Lw0?j zocuEIU}+yD?S4XA8JzQ~JVBi$>Ag3P_Ud>in|CtEQ|#{@lbk{AFKF{y6Q#RpJkpc> z@cu*KPw(twJ?iiNcisD{2|gtbXVv&dH*hWQJY(VW$TaDQ&c12ZIoiFQ_lWhj9EFxd zPtw6N%&{G7IEUgP-V<&0y|6a-cQlZ}NUCJV}mC*FMgXR9wtkd+xk8F(rhZ z;_a9H`iH~@um4x%0P&+4@D02#eb)MML5?|s{n+X=!AvR%1S)o*|7IK3tc;=g6$kiT z_Q;icg6G^gvld?m^LIlXG9igtj+xg^(s>JAn-h)o&~R}O9F&lgY#2)$Ew1Z)>{9Y! z@sF&}1gEv5-T5DFw8$qmo-umxO6Jw3JMGt|_kTb8fs7ji_@wg;bK&^OwfK%xINQ)z zsW`CuzL55)JNd+B`dv+bf`zjlj?Uz)hgYGy3UgReg=uxe{l0&AM6j1~hO6>*P1=Zu zXQ2HQUS9v0uV4#&y&$KFI`mz>+H~?DCyut~W^^P5y{sk|mD*O9D!y}!|5fmm=IUPN z(3!8%kp$)tnKh-EbMJG2{UBqizb?0)wIO9V@WdFyI`i?ypJTVq9MAqPGpVBr-ns_= zSkq8rJ^Pi5A6tCoo^|J3GxJX78f6RjhUdL!NI)#}@vp%}ig> z(GQXzYh>P%hx&}9`1cfJ(z}`PnO*Cf@m1NjT}tFR#&JLMSjJqn5u+)CHsfm>#qXJb z{<`;T|MmlFpFS))%Abk7Y|~Ky9+(w3ADW9b^b=1h%bDnm`LoWg#;Y+5yv8Gc zp~dn{_EKUmYn;qQS2-W-law|3&3JdP4wZr5EPH;X6Iqcbzy*5(POJ_ZCxnk?=vKUX z5A!P@H`)6Z1B)wvMJ)qrKUX&f zsFANRSnTU%t?wGO*hih$QIU)2XEm^XSLdGr`y~Dw*ah+YDLQ=w{x@hN%y;~_G8tEY zzU)F^${u8Flpn)`s}&jL?3Xtq@Zf9*J|FNc>4t9%@Ciq0mHt&31~dqcm9KLJa4COQ z^TAqf!)mzK46da!N6JS!Hp_W#3uAhb>l)L??zK$xnljcb-{Mrp?&NYrxu@D(4^T58 z-tO`09uaSfpflQIU(jdl``+(=?3RU0mb{A_ukc`xxl426>zK}%4P@AJmV+&*z}pYeqmpK|)* z^Z1ax4TEUQrhy^8hPL_In{PJU!n^n%UKsxxwWYC7Te+_Gl-n%2>6*#9?l&4%b8jEl za=qu5`5S)qys^8PPb>HBd|7mtRbANbFYRG7o-_nN_uipk3w!|-(y{?i+w-}eUhv?U&~+X{qQDty6N2WjVg z+PRi9b$^i>nyz+o>~nWLPuhqac= z)^%xdR}Q%?3+6mId*+;tcjqh)j>AtpMS4sJ_z_IxGBi}oH5ydoBE(v92m0D$(Cub3 zr9=5I%gFU<;x}i67T*j#x%u0(KSoasuy<#BR(O?Z=L0#lvCx5^?v9sSsxx)$`0NYg zS0`W}jf!j|UXs1WIZH1{|HMRI$XS!nFp++P=$1KiL(u?9A?OiZR<08pX$CjyYsVvj zRYrMvR64!l8S=-8&$XY}^Z)0uCz9AovEu_!F81ii5Vt>_mnVJpW9DEV>z3|?z`y!3 zsI~8%0hqx#a+$PINR3OL?^Q$aW^#c9PbTfy^86WRTv@E6hgLh|N{huVZ9dn@nfkwl z|90>n1Sgk{kFZudHTHLtcPSk}@X0s9-u+!|=NVxe&SiqrXeaBOY#W~#l|0otv?(Z9)azj zj4e2lT0sfyOLhEQ_zGU|a(%=8^l1AGpW|bhWl-aYT5{}LZoS?(vHyOf>i zzebZUt;1lgs>?#IVQXb2fAy%*aLZBUoaF&e&Lwr}Uoywe$}q=tST(ijXYjl2yc=`S zgDYlWuh>2fa4-ME2VY7E`{CU$7bdR!;7imypfCA0#`wqX`VBmB92@o+=eci(_Rvx6 znl*}J${&Q^X5^Q6_v`Tbb9)mSCUbxL0Q^zjJwGSQrz+^OM?RfAKOPL4C*`Yq|KsOt zn$0h&4MmQ<@|2mUHkV&s6u;~p+`}&i%x-=e2fwuWGbg5*X-Nkrg}RP%uZ<6RhLI)t z+{@dRi`-b){tz)BV)AthtJx2Vq^#Ju%m|w|2WXtT$-{QPLXcWBWZpSAZ~0 z@bYwfzLwKQ-H)7e`Z&)H|Btb-V0~R<0q3pN!51a$?^Eu&)*Ri-=U%qq?DZq(ueewH zW2q%G#wZ?-4{Q8oBO3NIz6WT(cA{fT=7Z}mL2L1C>HWLno+(Aw^4gS*xrBF=ZxSF* zU!*fX$Q{xB>WNh?4bRc`g*|PbP22fAb0O_{&(y{}Qw4v=x9#QJ#J8>d@uI=bxiR=W zx^o2j!;$Zk*v1iVpUE1-g;g!o&S_8|3+UJDt2;MMo3%zY%g=zGFGDxAz)?w|bQ=EBh9>x}8rlC8>xs_tQVv%EZSCb8 zuL9`k<2*C>6Z%Dm*q8-=ln3TrbNmPC@aP$l_x#4n<-l5Z{$sOuy!_L<1CxzSb>I5o z?7AQR{BFkPoLh6IlZWDs&uohoFJ!%_y&B)Weg4;&$PHIJ`w1<7{x3hCK)o#RE;(W1 zYnE=dm;12=Q>U+Z^Qv$qdDU9WJ_bI?_U~pH4Z2nezqaRXwQ(N;ftZ{*1GDmKB7$zDj%J{7LoCp-*QA0`=+m zscxip&UXhTHYhgY{eJFwqv6))jm|3MRu}lpe!L`Fm*>o*#XK%Wm+)Y3J1>#_tnwjH zU)7PXuTM%d?6cOlvPReb@sV&P^Xi*0=V&4S1LhceJ^zHYUFRAce(I_8+B|At0B;@b zmH_Xh^|k5m?hWNW#frdqdmaExGN;=M1j3@m@C94RS}a-Tp+I<>0u|pW4y9WL3K6>mYoT$MfyX zr{;*7N3*b@>NAPq(El#U=z3xynxAj*?t0M<^xsvHA3iob)N-7*n`UI&u(r`&JMG#1 z)1Lab%rol6kAn521aAg^`B*q+gv+ zyA;|KJ{gx;XUomrM=l|&%L&qSmoz#Z&XiviWy&__S|ck@crj*FRiCNe1jCNiA%LZ5-2kQu$_lKwwNx3JtcOk}g{oSD$chg=^s z!AQQE-=a&8Cbu!~9z7JfYpQA$<@5W1^c47Nuy_eMCq6ugoD(l_Fmf8CC@*p zvGX!gq*J^h7+4#X($C^$Xm;P{sTZfQde8q+xB%WT@W#KF!+Y8ElTmFfd~?uKjT%cv zAatmT{AP`XU5LfwM{Ir?UyDUrOt6zpE=0X;;$4Qk@F zQ&Yk7h25 zR%i!mA=!uo+rlrV>Uto*Aiv18nKFII-3jWQ#{FdK!>0-_y;vKy&KWO3jfa`O0 z@_ipV>f{nj4^jTE@|U!a&N?1@x*J}6G*-Z>SI&m`eC1lBvlad?BY!N=xp#V9EH>4s zzAgGq@Yn@D?e`e7mG48(_Ug}#iU|L$Uvm7|t$9Y*t^9A>%zS8WOPsmYd5^XHS1r(- zxLQo|O>9N>op|Ub=4cMSiDLW_iFTe(EB=wI)fR2obBpdX!)|*^?)DSRHT!(?_^&px zZF|)Ms6y|(k$cJ;KYo`Fe@6f38Xu;{Mo*2J1biA!{#h5k4V^*#&oe@~S7N^s_ln@x z$o=Sg?Uk3!GZvhVfi`;T&)klmJ3cSkJEQmbUE~(}oxOb8Yku@wCjJ*6WA!uU_gYQr}&*0#;YMh?y(cM-en{ge#be^>A(0hMm=8onpXyOqi zA8o(SnI*ixo%ioun;0&gs6G2Shg!Uq8u=3M_l4mT=I8#z=!+XT!}bXI4f4PLh&JSV zFP&<1Swqt5sR?Y?j8Pls(Z+UagQy1Pnh95iQ`ryeOH5r=R-iNOMn!6eS;gA~8sGiUZ`T>I{HNo^xVEC+A9gd;6aR zwt;epR9`}|_j}TeDe}MLGnkFv*53bRfoHWXd;Euu#>zis8OgTJZl+YUrW(aZ$$MME z+~thYc}nT^x4^o>dS(;y=`FUh!D{*~R#yEmR&X*>SDbR(}F&(`qT`BTto16{3nOI#a8!ijq? z^vNmSAJ+5!3vut~>V5ZKh?+42_CK=c{nc^r=jeU+UI<-j!1qV@yuUo|{jqxAy(ish z!1vR8-mi*#KcM&Bd!Y|b@%~vo?|&!m{jO(u-@O-lujl=F@AmA+(>%`O9gl`YGf(2D zFN@pn(?c_Jw2sw?8>XVE4q0j2T5U;#vFgbYemv~Jz51%L?7n9!^$H|K@jII{yE#lwqFtFv$U>zrB@j5v& zo#P&DYLu8<YiE#1Z2v*#HP2yeOIt;9dJqgD8*Sk#TT zCjYpO1K>>f+Dk2jq2ycq_0#+nwLD+WHOZdrN1tmv_(|n26(bLTvvHH_8vg>${^F0m zSbD5-PX@4&b7kXA{+Z*0&Zs{&Xk5Me7G9ma?9q{Co_iX+tpacOENr|5xpstSlso!a zaF@mN=azGR@oDHS#p+FOALsXvKlu($iNC9jP4W241V01e@2{U0*mw(^cw`Uu>PF#j zB6%ex;IOcoSlHo5TpUJi93BG)WwA&05F^@H`pF}EUIxZqcnpHaAb13Zcs$<@{UDb|0(8_+YO!n}%4%%}0LGm~z zCL3LuiB*@!bOr)^_Q_h#4q0O~#IRvhciU?t+vnIrV_kbF7k|9$ur}Y=UiMHH;{whDV33iu}85e4r@(8~{_@7Maq>8=N<%SCH^xy zCJZ-<@dc(>mm4Eh=fqkZ7-uaWeWUO_=Gw5G!|T{BXG9Lr_U-uI6@%{EwrYfx5!(qc zry4`%Sw`_o~qw*0mT#1e& zx+^6o)2RO2Wl7Z~(F(4W4uW@vZ@GLm*8|mOMcdCKU*YhT(WHeL(SVr|4R&TsXYEXO z@SWPRAG_`h`VMjaiTW#IOzLmuVW+>DP1FXb25SkvP4yS@_w+Z5YoWoPt-lO(oK3!G z8i|uN9D@$;8=Q}7Vx-Ii@48lK$1+CS@ytTz%$qk8{11fhB4AT&WG1vPoSuyD^j`S> zHM}W&R~?T{)0_+66QBv{M>Bw<`G?3;&VljQ*tDaxAH(wC%_5l@FMB03Pc?U=`niGP`+abjLR2rs!HS%yMZz;L^V4Dgzmo0i9RzjA;DbNu0k*y`Y)+WrJKdTsc<7 zy$*0)hU{#E4-UcuOBWi^T9f@c$WCv(nXFU7gW~;KdxR$sAL3Ktp}G$qB+LFM@vy|j zgJg&!&xrNA@=S6y`|)o?1&8FB<|D_I6_RJl8B7P}HhiH2@zm#?m)l%;LdaK#Zbo+; zU|zCq8X4PB0L^-Hq_tD)Owe6tf^l<{>CTb2t|i6IPbTv-(7L9%`QPx5_~~~iVre1% z+kQV+mNcJ;J$)4YX(RXaz3A72rz)oaU)^%+1}8@;sl2?qwtU+~tV7y!9=FeRYl{5c zC)cTsk9CN5%WZlM*!*kL6TDl_9Eg9}mrAwx|}s$vReG=t?{B)t?}{n zx*%?yKOMck3O@Q9hiWg*Mb0?7nxjjdYFwOMlG0y0`REU7XCD4&wPoi$g6~tcRqpS< zt$oDNy|(7>ciQuG$Wyh~I;j8l=Gte_IeWmho}8);<*-&FZ%RLeNBf!gebCfCc*?<- zY3rV+YHw(Qcx%*vyk)OfYAd=P#ua&sCp$wT}86|TP65u2*qE3F;- z55|h`dpBoV*WAYv4QiBVX@C<4UJs7qesYfvroV{YDODra$-*eF`V+1#`8 zDe%45H?AX-J?`?mDDNi;+veE+F~Z8bJAx0WHObK{OOcyp$j$5|Y%}=r2zJ|cXy7^c z?PKC$8Roc-R_JRxJmAP}M_<_v{fAv1AO=xfz`C$y5NEF?QC|dGuPk76%{8ko--~{` z7aBiWQV^ETxdr;?K)#06*o0fqZ<~2`JNGveJK0>ohj}xQGsofSWMIu=-phce1v^MO zz4*+a-CExn^+xqeml{cQ?&2*S9jqdQN+d%70jZ{0wwd z!>blGLu40@=eO<$?0mtK^^h^FTkRQ6&W>`Tv?hOzYZ>^Ze3z_p^bq0Hq92@mBw%Tk+%{T2FIq$#b;uy4X9GYAu{$>sblHsH1dxxtR z?R52`uX3%Dy|?o&ex|X&)>TIL(pBEQya%St&e+ta=_A8Gs$(0xv69~>9-0YV6+vHx z(Am|{+g0Quf0@{y8z=Moh?DgnvvcAT?w%IaEOF~Y=p2cu+6TgCFL>UW^x zqZBzKTSfHH3e7c3W-{MlTV|TkFk{>d-E4*~s(42B&r;~53Yu9B%|y7~iY%?-x^$`9 ziPCTLqqT?3XeRNII%sJKGEVEHN2AJP^0S`S+OizGfOWH$`<@&(h?8WIPg3f_^a=dl zHY`8-G2c6Wh%5&_N2Z&$OpoD5*vm(@n_w)BgRzz0lIsJ(DEP|y@KZdjRm5-Rjr(l) z5xm0BL43@u`W=TK!GxcpOJ{Fv2Ug)|``tz~1dhsq;U(xZ9tW+QpEVy_WwncgUuRJ} zoN+fZFKyTk&CE+0GR~VDukQ!o=W^z!)SaKOJ9hPdAN{w}{}J#Q{-Md5m>(@?p33OI z4je4tBj2s&qmKDlz{lS2#C#PnC+fqd5uUSQxjSUTk`codGid7rBcSU7yn-=;|kgRa$dnNpJ~+ElZ*Y+7f-j+ejfgatVQd{P7A-kU@B)nRZjWI`Syp=^ULsWh<~Jmig&X8w?)N2`k#3w z@)cQDCEj5@u61?NI#(wZ4x5m#P4H}=qamN*AJ#qrv}+}7&$5Um!Pl6M*5AbuVk`LIr>W4fcgr4@ZGTjbA5>T zO|37jUUemN9xsSi#_20f(4(WTq`8Q4)$ALBEQ~-AKxN+ zh~$^y>Ml*_F3%Ht7fk8BbQkBp=Bf+(i2Ti#74Xwhe~>dP2i9LCQ+K#>E?$4}Wa_+r z^q1cPpWvzju7miJLLbJaDo(DtK>ELp*!D91i{7%(`$Ti1x7V=`rR(bY66kS)drvuJ zm!TueR`l-Gpf8{8-bYtiY0HC)PL>Dxc7D{U?M3O%+E3U@&GY_v#6V9T&Y0|dDyQ&& zb`zs&F0 z4A~NXbh`v}yhL=pLFjygu_Y|d9QBb;>zoPZ<>K{{Q|NNay?X?FYVWf0Xyg-dWOk45 zvfY(ERp@nD_>WrMb^9oBzrx}9(JsDo9(VMpc3019hZag$!}IvWkU0jt(u&NHAJf2} zDV{&*>JnPlrRzDq%=~U&=C8>iX+sW{$H~GrVDa?2Qh0tXc97&;D{UNu*2{oLag=}y zPrP1N2`thpq}OQ-l7TbP=|jL)%KM&PCpg<&-71^srCVix*NC>GTl|{56X|#H@>%+w zC!Y=ZchR+~T$pV6jQnuqvl;DAJ};16A)VKg&w>XSJpImcVX%PV*W|58zq=k9*I5b! z*%M=-slL9?`1RD$aeDhc|MQCuosp+IZ5r)s4duM|;_pFcr?Qc4Tgv7AgRbooZ%g^$ zea9azTdJ|~U)_E}_*T!PAK6j!up@qG_meWhu-DKUc#?SHVwThOe%~X5##j8IH}AMw>b#;rLw% z+)Lz{K|DK{ci^{&u;F=5XKnZ5Gxk@hW{mcU9EKmYZ(ce>E;>SXtS2U$hK`W;Q=^l- z)ne#xH+#)@GnP(!Uk&m6baGb%im%-eAik!WXUf;oxm9B#>ydlpsO-kKwi~|d9Af)0 z*~^8j2x-65V594q0;8dEp5i*ABbne`G}nqhwM>3kWI;J}-hS8@Eyu@Ki66F7epvAd zyb**(#lxjMzkutl@NgA0S}Q(5zbdq8v>;jtjmie<4PQ#64)`8o?bKeRD(uBF=BXDC zu?~1Vgsdx`fS*7$|7(Bb#PI5fuZ>^-jO{D`x_srXUw;8|L-Q>f zsmHstP#TR*(|NdgteZ>dLph0B*A!D0?ssXA2)Jq^?%GV;HM?^As?-x--t(-ByT7oW z%O6|H_{)%Gk`s~_TUi@~6X8nucwmC;l7eWCkz&VA%CND#HYY5$Ry|Ct#?hfhMZQmc zeritb20yzu2aQgh*Lp4bf-OV9_cPdkh2TrJcj0$sBl)7w!EbrkNs_NUxK53nNn4WR z$U1vZyXJTc@cj|^w}Z>VqvZ4=zh>qpu9VMFFzdeVX}|Lp`kKK#jp09S+o~YC9h>g2 z%xPa3hk!qw)u*_eU~a>Y(pS%R_)4{~ir|q#c;#w%<|=sS%kUMoe`_4R%BRoM@l}y! zbWUO~mQ$5fi=XE}h%cp8jZK=&N_*DEF-(Cf4(=oSj%=8+IvSg*Jpg&gl7eWgST<`8_zwQpRy*F9 zH2JFVT-sHlea0C-u|4_+PyFO|r}dDs(Lv*Yoy8~B8a z)^}plLbR*=y&8N5m-G8nIdS4a>6XfCEVPW>qv6jq)-c&qAG!Qg&Xcp^L=JXZOuD0Yg&+oL*W;sZX) z^WyELalEZ{W~s~DTj1-Lpx3>OBNG{$z&Q8)58|!H#Tzs`?cZIzErDjY@>_DdzdR!^ zU-NWvf#_j^Gz#`FkvId$ZqK6-CFeh%2@;ZuvXpM-Y$J`JC9aYGwJ*pS7mT zP9K7-s$TN9%UTDU>uQ&jXn#&u)2CO2o0eomC0h*EHML!My|ro#&jymw2NKZ%mi*|l zI-Ze?p+;#gIp3W%*vVb&$7svg>dJr_)aF>$*nWu_R;>Fv`0NbYdY^Sxd+_$N)~OH4 z!|m6dVYjcg)rb6ztI>5sfG`6xS0xkVbY z;vD;sX9tlzg~)*&JTqyit{V&Qf#&y`#IGki`2g`exepmTiurq=HSS$xo9KTF_OlZ+ z9o8`)AHgT+4)UjnPpjb5TKLq0W`(cQ`QM#-2Dc5EC*i>>ov-NnKsU_y0kdZ>icjNV zerKA)r{tASYlX(7|0(AFeSV+LR_wwSl)b1kaO3R7US~+l#-7jqCh@d#Mzi6W03XSH z+gAmyyqqA#sC?veiN;#Nhw_PJpB?49$hFU$*uQdD&^gg%&|SL}|Ig$d;cy~4R;H_0 zDgNKe^-QkIUd*<0U$D*4t#Z((Uc_FsfMth0|2_8W4x0u79R_~qCHT&^Kx=~MICNAC zJY9Ty_UjJUe%%4BGyzKcVssonAKeKsKz6A7GLVt8tV9(^BZP3OGW*<;+!MQq42;His)sSJ1|7j2ya9iaeRRk<*I78`aZFob~Nv)HgNvUfr@Y&pDtsyvHy z)~Fm;&!1@P7EX+=gte!@i_xJI#nba6CY^vS>^Fcz!bJ2Fknh;pY}V*V+6e zTlu-h!&6pMRyaC+8f(|7ZRKldQ)fbabiEJT*iZd=>AK`_n4FPzVy~5L8dL56UvlgBsT1zHc#7moFmTtmHW?W_=vS6TRvyb zh^1c5^6@E&4J)$DhW+S22PPN|Z?P9yIRTerAB7T)5gD2OH}-FO$NrWbJ(BjTve=u( z_g}!R-EZc(4a50gdd%&2&LH-+#`XJlKmBG8YFKer!b$!9gnp|`r{AoNJ^g-kDg9o- zzIVR+j=gW~LrZ&T%L}^CGzf6+g7I~Dmi+B1)^`KiX$A*cpKAHmIm7|twAqWwqS`~t z`!9IsqLdLcj6cufT#Yjen*o1n2DSV`v^Sn>!dZMR%YpW@SyiFj+M&iYzibKTVM7ChxxOXn zT<>q+&(|+aOtI&Ov-bAg?X%}F>mKL(&cDT;?$sar4((@GpW85ZnuGss)1B}6JnO-7 za1VVRi(Oh<;M8&>C%eUl=h)zBHjVDPJ-3-U%J%Q}+jH6Lve49?49=pK%oP4dJ!(WP z=r9ZXr@~vK&D33g`gF#MZ1%JSPAuM2#NLb@?A4Gymd(D5cJ>nljpI+Kra}7TrHx0~ zmr)p7yeFHzmLc{Nd<&dtjg@_^{ViR?#?~JlmR^5JpZx?M@?6@tjBq+@(7o>dmf1Ew z$-##X0wLF4NWx2m_Tek;DN8oQ@B&n3^uHA*?pB~Q6qqaxpW%Fzi* zb)HLJD7U--x?>NSeWuH8=uXh4at4&2v5$97b*9Vp^qmRZ{hfpHf0GApXWV^xppOIFmM{lkVq?lkCj=MQx*0-w~cKa(KRtb^n0hXn2rwdc>3TW$)p(XfHr} zdY7|Q(gXA-+zWsD5Ys$Go`+LwPCB1z&53_AM%ev!%L>adx~!DYBFju3X{8v8#*mXl zB4uRy@Pw#+Cd!to%Zx46e3o36ahdjCaPGjyf)nKDpRiiZ25WU<*uEBbe$tj_$nm3% zvU7`a;Gc?ZmUDj67jMaq>O3Kfcp&EzwNzf^%Z^h_8?4+T=X!&i@)zTu1n9u$6t2xzP<+GlSJ#FhzLHwZ5>~_&9HSMfhF0#)O zE6YUQ-)Jm)?r~$~TfDcAwM6gw;7327IpNsTn$u=vTv-8iE}`3U!OYyU&zSxA{h!CA zHdd|l&Rgm^GtAr1?!}><^DV{#>zIh@K)!SdauWP(WR23?eieF*Kl|p^DTym>{D1=s zJ56)voqZF3J)7(Q4cb`lts`pxbJE5O{m{k>MjUPAeh%6oZ{$OdKSUe$e|L>^X#;)3 zp$+nUK0Fm|90GUowDB}DtsmNWtuJk)4nP~PH2>qYk@64F#$|D|5wAO7N6r2N_iWyF z_q{$d%-g@-bJlqk^|WIz8R%;|V;-Fi`J#8Wj|9i8e`TBngI}PbhJ8XGVdvGc|J$xd zcssQkiCLJc1^I_T@JC9dfIf%R$JMvpZFH8Sw+1u^`GXrW6GQ8aL}OiM06Q;Gb$QP2 zf@om3FKUfojd%mP9buoFn_@rz#-N5w(}}fbuGw;#v8L*>T6{xRIr;at4Mu!{xRVVF z{j_20+pvTNH>~A+?}JN?mGGfmn^&-D?J4DdDQjgZdsKe_Zp4p|^6i~x03K}FdaC_( zhq0HvGu2xMt7e7PLO(H?gotFr?}<0d7w=qGzJ&5_M@OFJdK>stzFc7PQ;nQk?TBH)p!fP)6KxHU&~K+ZoVxym3tRvEIKb|%r}g1 zv-YqdJDb_V)>YxbT*fEe^AXZ7%*TZ2ZK+h!xW2WjbUNfb>NcvAA=UU(!C}pi+9cs~B$Lq9}j~Lpgb<%YMTJ_G)S@ve%di$zH zZX}DI)aOO?xte<8Q`mbW-KCQMTcpqN+xqGy(fI!2+r5u=bx!KXHzyDuNrdkP!FMKn z=YzJK{L6q9YN77&lojx&#T+WPrhY-$^qHx_q?u+?5;&nAh1D^anBOGM=PBiEo}WV# zwb$m<2gXzHhVwdfHV*rZN7mj_8m;9VnvC(r`f7C6&4Y}|sm7D(+FM5LwTQ9iFxE-#IIp2iYI$%*O4aP< zM?xz)7@NjgW>O=O@lITny(#`YROZ0u_xQOP&fGNh!INqNP=|Bk1&hAD>D$-?%fQj^ zPsBD1F_MxDb4aj)@u=@>sL9gFcvLsuJ3}*xd7Q-9_cPW2^T=Gzo>*2s-Ac8REHjvt z!#tK?%V{3V8CTHYe4L?4^%ab3NJ^;X0-k@LINK{`Ab9;PcSf&geBfZD^mwl>IQ5QZ zm+`LbzKOi6GlPVaDsX*1aQz%4BMs!s3yLm>& zrZ#w9XF9B@IV-BOZ`RgiMBAtNbKd=#5$@33qnmDLzTUqi6mFk3Bd3G;Kgj$y&nVfn zrslTjT5{0fzbU^khb658N8T8QAEk}~coLrBpWPVuz!ZEm8=#5T!T;<@WecX~qz005 z;34wQzaADy*39NsFs<*sNgXHbZTO2ON5cqYRauz6;L zi#PF%C7wyjV_g7m7hOTkX)8jUy(I!4y^Edr!HZ@XA9%}y;O&DK^Z6ZWd6#t|0*|zl zceUoCN5VGGBvppRGuzGKN#Yj|hB9j=X@D-a_ud5MAspgm~+my zBKI5FBE4_3i|=8J?$~rEbi59@-$P#AVGgnJ?Y(;~XVVJbk(w*QvP0jc-gpyd>{Thh ze(~ZJVdgfoyexW<`kNEWv!l)AG#Oh%$H{MeVSMM8PZ-}@GtAg(9FOn&$ny>A(sKQW zT;I&~-%nnaUTL0RAz$Bk;1GX+n@-h;%;20LWYExy9Gl5}JN@#jt*hczd%h}yGE7~Mt<5g1@tU_0BXpM-bKyH&KiD44nFimxi471ozE`bzk^Tf zJBiolzhhkQd&|83J>sJC8!Fe`{=LlWzR%~JDd*gI?v!)y{MPrfu3OIMz#0E^XTn)O zxwEDrZ(T*T(XfoqI@+Gg*bgK3Hd7-?`od7)(|N@5rSJ5mvi}AjBl9_l`8?m9&$`KV z>E)x1>C$J%!#9;aBiGZbKfd`TyB5tWjO%|ISF8S8xJqzwHTeHNt{$~XjejVHPjXTFxyx}wb9W~6`N?R9Av1_1W2Rxy(L2Y|D zZqH;saT1-Y5ML)ST5=k+YG49=E$-j}xP zm$WfjW3=UtAG@rEn%Alg*Ddd!kbK&P-YR*^S)TRCz$c`0Oe$Y+l6*TK9YpfCKiRv6 zF?f231FMoSJN8rSLcr`8rEA zG7}d+5gfm>@%-|N=uC4+1e`{(BRfrSEVd;ajF@t*Jh70gpl1@k|O zvw(eeq#wJvrZJl}O1NGC-ib5q4l?H+&L@HIigR{0g40M+Z17YMSE3K^chGLj2VE^` zgjaT>^K8iEJHQ&22mWX~QVPE9Ip)3p&RqY4a8L7KjfYz{?D4xyWQ7m;8Tu<736XTyP;iR@&%**VW&t+StO{+ZX01fmyI-q9f{zMZCQmEt$bw6(HA% zBe=G&`kO1gf%RQ@$aedCm;O$b+xlzlN(zCelnR`)gt|8YE||-j&JuKMdy%bw79{wc3kB-?5Kl7Vmq;#|*gZqx72AF#0(Shf6 z7oPtDo|~A%In1Fq?s;*veDeIAT$9Sn6`LB1O;J5Lubl(H(sw>Lxc#=#Z@k~lt3`@_ zH>HdmkT&c^&d5v=9gw4L*X7mqX7HeT%EDn2x@d>roTzgtE-<&`Zr*AvtiTt!j{R}#bWg1{vJ-mw z-uPcW(ZJ8vpopZsT;_%hvG&*zc*x8KH)>-q3U2JDeg14S=^#^YSWAxw^ z{R&%7k1IFVBZhD3wG}r(Qj7#IG~4K;gsErx`d)k{JHT0gu&B>g<%W@y*5)6!tE`{?Po_mb`Oiiex|y=CTOLt+ z8WZoXM`$^5?+(7_%| z(Wc~)YAkLY06%TukQh*BD|&t3c|q6e&L`-JG0E@XkBk5pnk(4^ZM$e!4-LiV3D{rOktYd@qB z(b-YZ!Ru`kjD}wYj14)^L|{?QCh(J7>Eo;$bXxgjaWHI9n>qCNX0kD*iTyB3$VHsO zJr4Y7Nn7;YO{K}k#(LTdkoy}6<;`adHV7%8~AgFR}8=S@pSp~ z@K<-r{)(BSE6R{p;@3)YhQr95D}duW^r`;Tuh*ygn@oRO)t}F-7%_-L^3)&gUPQb5 z7`OT!e(vMxkJI0W{$#r!`CIOA+ZX+4U+_8fA$%1R^N@TQooGx^pW55&j5C#S2GOsa zaRzwiwYV_=KRWdY@Q@tg{DV&An(PLaEZ}=y^4;)PRMGEi$;NKM`r72>jU#-amJ#6O zeRKtlyBT}k8+Q)l);wUBY|NprczA6;X*WJbfe#0^PGD1>#H+w1cslVTe*jE^$57wE zqxo42Jer>^TzeIGGJMGuuTFlv@dNIE0M0%Ho|)jHo$=-YR~vG2BJGthALL$=3m=Gh zbP~dL8Vdb2*L+RBj$ZrOtHFWtK~6k02Yx7qFK&cCW}_41=csY~9N4Zcz_w9+j-{Qx zGKw?DBkigyQ0;5TA)oP}evdV#7F^3YPSjP;Gt5qSgE|8}Ytf&f4X*S~2O!ir0Vkr|PS5|z-98o{Y=d1tDFZZlS@)4slht*Gc z>%k{pKpx)dORgX48(zQ899;i6aC|@<LwRO+ z{YAdS`XxTIb1t~*L=Ig9jtugK8ffziz7h3_%O4Jp@FmobclybO4j znS<={B>RT$d><@Qa8#rQS6v7kf_?ZkkA(eI--y0Ke}eyinF;mZ zGyL_BzVN#hKLZDX|A)Lg6xh~rhT8CiE2A~09~=(lGrWF&@k=Y-G6wO^Q2U*4+ps6r zKhC`a@KphGUCn-gyp+(PQh23`_C|nzpDEalZrJ_F9_)S__Q}AP2>gS`J`#S#9m9p+ z-|fI32(J2t#smD;<%_|+1N%bL0Ee8}1`d;4*fmZE_QCbF=wXUIS&Y$|vwg*@j8k*a zx&GA^x;~US{D6C%$bMowKmO*Tg*iV4$2ObE{R&SHPJNdXI`)sKVWd@B-%`(%_Q3Fu-i~`7_W=8FIlwF0>&R3XuzeYAY7Et+S8t%mjG%f1W*RhM8SE# zYd`ZOGa*5HPCvijANfq4{p@G&wbx#It+m%)d#yFii#mSktBU-KukPV?U$iOWV151J z=avB1krhXtvyG41e8yANd??oAJJbGGU#6|Yht01ke@@eLGig_+l?Z4h@}LcxWxsJ1 zUL@|P;LGxx>CAB`UcA6%0WK$Sr2|*aYVG_!#v(A=vr64F!OdA{4_BT z0>08xmV96u`g+%XwUIKdEX$Qzulq4ic}@QvbIiXnxXp?y{@to>OO|Wpzw33jzpMXV zN8PLX!&h3>cZ_xwUDE5#uhV}M1NFPZ(vy#{#AH)#o3hl{EGA8FSIcsp*cO0eyZ&nq zv>U%|T8H~S{a%De-9riFP((ivTfUW8^;%R}!AN`fhFs*o*zDcl4_-iC#phXjj_hRi zse6#?QZJEusno;1ta2mw;rDOl+J-{4_t5Vce0C?ga+rP|?hn%%-m`HYz`4A9Z{d5{ z3)cdZSU>NXqd)+qH(&-y#5nnjY;lvlN_P)k!8LI(<+@ljXZlul5K825f zE7j*N`ow0C!Fvn1Qf=?1?SLPhYCBhNTgH2cu?t><@wGmHuL|&N33qhJ-cx+%dV*d2 zT-xWE(4)-gQ{44R&s9$9u++ljZyTF2kpyq?Ad7@QTvI>f?tT(nic>%m2amoZ&bFe)~@R+5>Hj+s<8vx3-&H8%N?F zgS}Ah=WOLZb}hbxUhy03tJSw`7v0$G`IgMRS@OLmc+&XaJKyB07|DI@TZmmZa=JU+ z#2n0Ky`odP^SR?TUF(oMW&5G+idUwpI{^5&&z1enci5MSeO9*_#(Pr96;#`a!+3?m zBfzaeGrB*+cfr|Ia5n`V6q=k$OdDHxTwqLdWK7+C^`(yRtg@2=zrqH&@*GrZ4ZSzxwjt`@7F%`q3}6 zl%f&8XN;1^MfqAz)6Um&*Hh73U6isLPl4uY%CrqzBlNb!j*-t?uI%f$k{;=8Hq*{X z%lr}8j$XHPM7fk+ejeXVXt5Q$k|VKj#ECG|V(c5Lz5jfwt1XSbdV}kMZg5pFhVF1V zf$MsK1=#*4uCKb zT6)n4H3!JhZ#q^BcBSrP&0?%#Z(X_0u{bq-{)qp{cvmpqZw$Mc`R;{gKI%3%d5o<) zye8o7q$_y78@kC7_-Xfl0((BN2e+4}em{@M zl{=i+)V|p~2lruRuP(H~zHx=31!5@(E%elf1s+&I->saHwEj?Yd-0((XnrWL3y)HD z`0c6AAnJ7Yt2KPPdb^`Fgj{zWzW%l>w1M3sZO5f%5EjGIfU9PJ8Vm-+!BTv^~seiRUkLEEYVyC4~`Sx?- zj1?g>nZMn^`b3;lT8!KK3cQ^xvgHIQZ=M z+;6xNii6;`M2;8Rk>lW@jk@TVs?TZk`7n7$FJf~hKUm24rF<^5^~gwFwo5!?#zwqD ziGL%pmN)~1zH5EQC)H~06W^L|I$FE@|4t&WD94~$+}2h1wJ)N5qufa@)ve}x+kZrv z*q91w|6z2F1w1={+sGZF_K8mPzC3<)+=u!O7Q#P%Aa$u@p^rjzjJZ7bR99k2w5EkG z?k(oD5ko@FOYZ1ky?wpLe&ECg9mgkU=)Z9RTwBlzojgyh4 z`bFEPy0@6f{er$`HANk?6`79a6q(L%2iL5hF|F7|P1H>Trwi$)iGCvJXCeLAo@&}` zyYk&*ViTP4QRz>uDualoajxSjK z@$4z9|2|twzkh_5{=kUUroT*nn!GKkf4qNo>K`ANUA4#Ts^@Gj7yIOt)u(39!`|;; z&eQm|=#NFS)2xmwh3SPO(pK)7EboC``ICx|%n9OkwhYy-oGdS&{5fmP8#AT7l~@}} zm!eJhy~xG&sfku(RGeo%GU_`&(eIF08LOK+&sM9J?SAzcC1bpzQ$};An*GFQ)Ypht z^Ve@lJ}&(1KQi5_X^P4-RfFJ zzr%Si{8##Ne{b7zv4vV>KZ3p8G63C~b{go*`CEN|v=BKZd!#(}Bv# zC-}1R{_Aie{))ML`wr~}e@kBDt8m>y9q}I)JXU-|JRS?ctL6|q!prn_19&{hd%@Kb z+EZ(5kG==0g_j9z*TQd6w{M^i*+;L}8ppU!*!6WdGE@J(;XR%ITRNt(Cys4v1dbEs zQx>0$MK6;(^EX>X*IsW!*G_>x4Sl;M($KYuMf13hBRMl1$@hZ$R%pw>J+T0;SW*i| zVB4NM0N7z7So4jD^|MnRedb$y$|D?tNMV})HvSh>SHc_1dnyU zQO6S5Io2718{(pgPAPlZ&T;;xcNl`(T#HWG`HeNQSKO#RkfWX7+Qdd0fO)PC&wS?U zdVwcR-?u8f*FRn0e);9)3io%={xa?D>D&cit#v5eXX&_?Z+Nr+ z{rv~*_H=c@ z+R=52FRSqm58x>9O(C*U;47l7hd-F=R=Nu9h_ITjf5XT`*o@2gCI=l?%6-@AeaM-iahFz0EGwXVgW|`JM7s`f zH&tSoq%-H7w>PMM9MP_mQ;At}BXUoZJsx?@_vt2P6WkL704_4@YkH(#TGx}TcNMgHp zzte9vX9G`XTm$D<>fZKx%L7x?optz%3(p;y1-hY^ezLW20I4Jfs?47}OPHkve!4u4_ zqB$Ac^D!>Mh8Wj$;&=tmx4t$)Y~;kc$#U)S`qf%k)vEty2iAfG8@V@3 z>rixZTZ~I|=Tp$ihtSGLVYSJMnvqzm;v?OgZ*))imV1MTp{Xv~PV_8a1I-F; z$CMF|OsDbMUTEAo1$#B;O5PZJdAh~3@S=w*y1|y4_0xOoZ!81 z3NcWgxCPtwG;G)3#db}cD*q#KjexQZ4qG!Rp@BV9$%- z-HUzZQkvP_Li}y{UTpi4v*0>#YP2IUZ(G6~9c!rHQod($OBgEy{JbZ;2)}+Y-@ox$>Nuz)&)wx!Fzw13v$hc9 z$%!6-{}}xLYhygA@9zlEK4%xsU+h(|73MoSX3?)#cu`;t3!e>+0rfKP3r9jTQ@Ub6 zZKW(&KK_3VbIRAi{8$e#KhzE8{?H@y=+PR@t^uopV)pQy!E^h(cc5PrblerE%!q#& z?{IWrcNq5;=YYTBT(TV+`WCcSv7K|OzS_98znG$8xviTmx|+6Z3NhBC?thNZVx@j^ z7=DAld|{K^!!)6TRn)b8Q_!KbaV7_}2tEv*Wd2CjIWegHtS>1)k*2xs9SMz< zHfC3#a~budy%x^H>UkGjCgt+?D(x8hlg9qgO+7`&@M+=MvJMO#FStCN^6s#Tj`cHv zAqE-ne1DhQ5+rkzwd~ep_OzVwB1aoeP$%{N{<^*e?C?X$-xUI5@A}o%wsVn(!Rt%l z{zqctC|TGoZc=yoDEvP-Zqmtsj&)VsBy=o=t3d21(XrHXsE&nP)MMi)x!9|YHQ18@ zjdjVP!JhY#g+cO%{UR~H=u`X@ziIrVAFlc)`d3%%qt&Up{`EVNr9pnN;)ll>uHB5^ zy4ELmaZ5jKvG~bN^e5AVcwuLW#s$thaLZ-6J+ zA!oCJy-d(Ppn-Q%{%NYd54e`Tl{ffboAVxi!?%ChpM92lTa@1qv0e_^$qi6~ee)pp z%}DH@S- z*ZSJFAM{m@Zt>+{yS#gBg^D{Tcck`{yTXFK)KO~LydU_@@E2rRvy`=)Et@5-a3kmE zT0f2R3uGX19}=`7m5tl=Gw3_`rcs@t$9W3)KYoG9^=N^^y@kDX8FV^?wvxhzRBk5M zdtv!sCl{6f*JNk;J?(kr!`gGpH&1qyZ<_32zf{EE6ux&X;D7#d_&=LHdIjH!&EN!k zY76Z=e7D2x{J!S?aRf11#TJjgunYUV?0dVPjeYT>T<$vkp|*g1>8&r}O&h5HVxAu3*}2F2}<{^ z>7(auFXSB76y~@hJ^-U=M{-C!&N-6gh%ZM+7u!~PspF^E{R^=B6R!t*|CJvdv(iuE zEbC^CZ|Bp-9{#kH`6EVgZYO$03jPMupkvX^mg6Up4gdHE{CbsNd#uV`@g-~1{Pf+I z9rXPG@=w>(hkE`+>PwDv1;@~~sm#B`M@!Ouk6drIgL9O$?$8gU)-Au-Y$$v0xmSs(l%ut|{v8k-wqw{jXpXU zuK@?PA8?%D5!&jYZ4>h$^z}0EnsOjIWe<;P7wwJa>u%vD1zGGK1x;h4q@idez&MSWk%x{z=TO z@RJYVlkA;TeivpKQzIEZ}W!2(OvWMvZ61c!8niPU@+kN5%tvkQCH~Xfq=d9XVkeC4wSwa7RFuW zzWDn0W6x?q_qU?^lY94xNcWX}W@2yY{!l7TTFi{ga-OygnlNjQD=F|U9VX7(=(i8> z2@S$~Hk94%xUw<;Pq-SB#6uhGiDH}zMl}|_Y@*bs?#jSgdpZEWv~>gTeg+NXkcXp@ zm<&nkjuUsEw&8zZGP%TlExAC_q5r%2lbmvL=2HldNMZe+2(<5ymh*2h)$%rq6BucCx=_l;Ud*}gumjSIq9 zZ-YJeQ@<=N-1XukW_v{N)Kdr!L9VmL(V8t6}(5AsN@&vwld(@u7>u=0XC;vw{-?wlk@uIw= ztm1R>+OTimz;lD5fotb-^G_8T=m!m0paCm15CIJ^|4&5Ww;1L!&coSbs6386oo7ov z$8`9rq4}ZU)H_wD`$XtoVtn@pj+P8ZN0Q8Y&Yo5`26kdA12Dyj29P5ochH6q8Fj zbd4~#&ji*8G4zZzB4;$hFIUp8z}L=N5+3=Kz`12lw)ZnXahL=6k+s1Y^9+_^Q@N=Sdd)34|xXXI+=rLnM&rT&JxB&lF(&+-!2a$e+#=5=Ot$=;&^zdH65^$+u%?$;3K`9HjWRMk&ZxVedE;V*^M^TJ=8d?R}U8RL!ePT_E{ z=LY^4{*sQLjqnmDzuj}}7VtD_QI$L>ftmkQ+vNzsS$-6td^*@Sxfa2HG9j*ts?`q5=x@kz8;f%+C z>CtX5>F;EIVm(WF_c?hg^gDW2k8f}{Sbk5;9*LQOj?(KlpOcHDxAwkH9!T!7$iRME z^a5uRJbppx_=idyoCxsT${eRu zPrEF0D|09M%q8wdeF49bIWgw#Lu7neAg7p|5lc?7&)|`btOLn;imdN=Xt=%)U#ZrD z*0Gf5%kxdH1o{@;LE0m?jnbPgvOkhOuzj(oi}!2|_{SUTtBf{(#h=Lif1#Z++I*Wo zk^A#`7Tr7&TAEG{COOBXNUi9l4tmvdWN^X!1LGgrLU3s4>8AK$R0gs zs;&cK^TMVCzd}wZeg(cSf$z7#iAnLR1kYgl-U?0>zQKPi&k~E{$**#c#iCDt3C|I^ zRR}L~!iUlsXA0v?d5Ku={GC9q^yixt^syEE$=OjP&+nRtwQ;YUxHm&RwM$|*4~1rG zzbHK>Ha7PcrEdy-SQHHARD_%p8hRQ$e@Pw5&36JkUqGgpqEoh@XBd3c zzM3-66N`)~eF4~y$jX9~k6 zp!44$>?`C98($RW)N6+CGMDM({rb8&yqZ=7<^<2X3I^bXZr=9Dfqvi&1{W82Zh4~HEwb?hgc0nh5 z6mH`@)xfnII@zOe8}C`m^DsppIY=uE}nhhwkNsQ&bgl0xy0Yxh;1zTn3Nv=| za6KOSvlsmvrvh6A?Qni{z7Oq6JE{%#lg=38=$f^0PU2(L@|ysk8+`t$W8y3R!AQ;J z$JX0In@-w{D}JA$CW89ppLu2;N7aR;xD4i|ee6$>c=xGcbR;uW#O#CfEj@$`cuo=ftgf#(36kA%Rv z5g5uiFD=BLzmmGseg2IVBlfEOXT@*Xhr&l>|5jeDV8)mWFBJfS<80%VxohqZ4_}xR$E-NqZ$4Zh6yPJF5PnL9YN{U^JPuTOmF{M_FSUt8ls@pD5rc>Wk4 zitkV3L;G`f>;JZRm-r@j&i^6c4?O225`%-akQ1k!Phn4y6GhA#gtxOz6Ut6^Mm@v{yVPrP*s6tHvW&&pSAIBVm6l) z!FSNnvMq0z=5&Wo3(!JJV9b_4{NnEQL?^h~9?1Pw@ki^7tvbA2*01>IoQTynlzv67 z>;(K`Sif4wcdy8O3~9HR`@(WhSA616pVC}&Im^CCF7TE#?TYP<=Qg#9Z%R4(AMds! z0~P?|b^MJwLtLM!js{DAZJhY^!kgVy@XNv^t-*M{mpc=MBeaIksE18;Svm7}iMm_+ z=DJ7q<=se4m5VK9Yu_nua*1y!{1#<{v}IFO*;F_2UN;olwT7*Ir@D(G`G2(5kP@Yh zzX#sdN?Ylp9B%Pn^P}_T5c^;|_|3s*btO6x=e-S{tm=Y@B|DcZny@-nG_6?!twiKA z$9CcYjh9$_MjiYe8cKK}si*I>ld6g+Tzr++}uY@+_{X*VH(#|2;p*$bj zu`k3IB4?By@0dP2Xs?+*efGsieD*^h`{=tVBGr*yi7zGfxziuvIe>0-Roz8;T?=*d zsVlJ>fYXzXPrdA0M1DE>6}w~}zup^(9Vx%Zv~2lRzI?IZA6{(Pljy>C;PupZ2D-e& zZaT&P3*H~-`iS3`|1Hrab1HWze#hS*_!~jQwqBn9hIl()&vxcq2|0_&# zG!%Wy(eP?u4n#+lc!CaOc4LynuerCK^ZAhYH6qV-8QGm>EQ%X<75em@Ablb)p!E|U4V&f&I4lgmDpI|Sqq z&17v=-F2g@8rfL0V&P-;)|kqz$VTX9MN~|UqL(PEBfCDPtiYhF9dX+DpE92o`~t9> z9nIl>hL`)e@Rtrd?d4Cu(M24o0W*oMT4If=ETlc#*);{$|9E9NdR49nTSgynPpc)7nO z^&Q&3r7w>*kIqJeku zBAqWpGuC?g0v7usXsh!*unK(gz9iZ*a0hiN;S)2$wec^|9^>waI#=C%W@nvmX3P=a zqN=k#;B1Ow-3P|s#IxrhJmRXl@DrnM01v8e34VvCq3w5Rzlq=9^wf{>?N~>9{mh!? z=p}W&!)yNFtFAxe175vPVw(wV&x4oBy?I5`+X6IAoZw)Zwu4K>FQMthLGxA;tLANm zyo=ynT{O64e(kgi?Dh;|Cp-^)%tr@v*36vwD(esVcFa8EgMQl_#AK9r8NkR~`6@p? z=qtJ9WA&~K+mg(Yz$o*yi2R&__v)DHg8H5Hop|3ltEyn<%(~_ErWiFpGl>OhM<&zm zip)bb@L}i;*k%RRBG2}P(eQEN@$6y!2(8o|UQ-bDqx$9ZOi?9e*du1>-#6@7TvHHP z(?vsz`gX~Jbxb;{j*<#7ssvRS>nB&hG$KdXZpK| zUl|Yh?4o_vS{3Vvwv-HO0`AT_(E1E$R@S5FG{pDPWgz31HCqL)>%g^qD|OcK9{7l} z%6s~c;E(fh6$4G~_p5d@s|sc;39L=l#iFR{fAy1K^t;B#(a^g_n`?w+M``=Ob75--Oy~tAhV8-PI z+DSLs$r`H5uGNWM?aU0wF3V6QZ||Hztxh`z5Z&oM=mn9$49p~JiJ6A;;$1H6TztD}$Y!S*e(!qByP(#L8) z^N%0FTuKZp(eabzJM7ugmtE)=xm9^ary(2sz2s1J0eNxqiECs(kgSc9SXvTyUg{?! zN6YZtkn_(J+Tctv?kb$YZvrmEoVVV!3QUo`418vSg+qMDtZ z^{RJP71YmqP4PBWZ-_Ro1UZy3bEoQ$ICJ?~v^RSRJP%oF@Vq)?p1>3Q{>bQ8JKvuf zuD>_nUCf$1%oyr_A3I0nm-KZo0Q;fCH3d;YFy&9P3^dlgeP*cK6h5Tp0NhJVUYWa+ zC2I;Y`sq9~1D;V5JzR8Qdp@!vg0(03S;Uw!8I$mpOzH&76av*L9L{8nb&RpfstsI3 zy)5eOfJYf}xQco*=IEv1cGgaXGgSv1i42Y;UZ?aGG9KEf0v;Lfj5wW#2@k^l6DP0< zE$ul=-ZErI75yvtM2`!Sc{8H)??fgE9j;`ZX3iS) zD!u+<^e1Uo;RBhKMY}Suf{z;7?W7Zn-maoi>Sa<-jZtXbpb6SFXkrI^&7eOStMDA@ zW3g$)pgR>C|6HebF0j z^et;@A-K!GSz@i~d;0wrZTv*$r8Xe%^7s{b=j2!9T{?0=e>0-&vQDz74SgurT6lD9ly?)n&$LbJAM0?{K1#^ zhjTv0*g@>;rVMndnMZ1x7v1uT@6hN!_zs2ZdYZ`Tg~)KJmq$Iu)t*m1#^r0GUel<+ zp3h_zpRwI}$Yhas7W%WuFV7+)*9*i5^(V;iow^Wrc=R;*vJKhRElXgf7Q?P%;H z%aXM5e~~eWEMTs;;`bnX^b?%h=bifpa$bwhq~s6Z zkHn5ZjDe%F=T66;lzr0s`FpP3Cs}kkx#9r)Q}nMr3CKp`6S!&1;OQk4%H*QNj-`8X9>_7IPIi2!>&;KseiZ3cQ755A@?dMD-6O>B{({m zrORF6-I?$X;Sa*U7s8XE?S>L`!i+;Qm)O#%BRtu_;aXYi@CnZ4<_azwnLm;3xvY8b z=&AxIx}-55tfl-6ctt?Z)pb+uut*=QpL~(~V#6``K|1`v063HP&Jh6pV}TZ)Dk!K1QX zQ~FP%=w`sVHtN{|TXntC`BTxAMDG&+x>|l^ZKngTtZgrNmbINnxvcH|Jj>dy?KUT} zmRit{yZa1cKT)=n(irW08h%X`X2%NKOx+gBz3glc^VEr_aBUp+zw`URJ@p#aGGA`+ zF1E|HgF#6yo_q%&WoxA7jIr5C><3)Pj_GF8;OmIw&%w;DS$9{6;A)A-2;(v86aa40n z{EJiN{}lg>U)<;4h_AEC6QSx_iNnbEsxH1-!F8XbF7~S3*MooY+c5gBmNxomyX0I; z&I84cDK^|KVs>{v)G4wdzX&9c=D^zoEe54EoT7pj*MoGLiVGgca$b+ z=N-UVYG*wHKXp2+*d|(c*7>H#9Pt$|I_G2Fo1l>u5%5vwyoqN|I(RJu_TZSGO5PME zsdFsJzj`*FGcs(4Ey$|idP0Zd2QToz-wNPwopjj5e%zod01 znfGdK8M=Tzmt#$?6y`m+4@;l2=5(#mM=bbCJMnZ6j$Tu6F#Hwv@{vjp>nt0kmozpDzoV=rctIKudGEacmDeCy#`d)>!A_I+*F8mAkY z6?*9nR?84_Hui!e3-Ka)eTN@-ukTW?@orS_@4CkZGyH2G0oUW9C1j0?*IcXfkoRq^ zH(qrP>SX;}*(1pOadt8HE&R#vg{R0FG4iE(ft-yRX3lm-|1V2?v|fPXozJZ$->LN7 zG_$6_Qz!cYY}q$B-rg~j{eMls2E{lIpjWASb?AXnfw59Al6`bWR#&~Xdc76)vtqNN zEzYV-SX+XR48cdLcC?hWWrKzVFQ>2Hk|fV5iFse7F*dP7M1rS9huK>oo9kvC@-3=4 z<0~~qwQ0Q1WQ=XtzT|xTEZ-6j-yRvJO%Q*1VnZJ#r%=J2^8a}5Y|F2$8(y-yJ&)o! z07p-4XPC5!G2rLfEbI>TJC}>ip+D!^pI}GffBO-1_DuG()LHQ?Yg)Z$%(tQ2eqtJs zFLh>92b=mt#?`Qsb-v_`*d1c>V`yh7xRGyowjabcT}4|$bD}p${Uh~N1vRt9mPUPO zYiUhYXZ<$Lq=gpRz{457+s8Lzmyz$zMb{MUJQV1Md3ufcT*`cYGD`A&2l|8mN?w!E z;PebQeU^RBdz>>27Q7A7aVh$`oY#rIZjA?5f&PNOS<2X==<~sI)dlOJg(}89fIi1U zQ)3tJTux4A&L#%f53xTwjvtfsg?+@1&1PaW-+hXlcz-eWIQb5o%mH`u4bS$)*lb=4 z)cb^b?^V|=e+~GfX@C8h>Vi)>b9kNa&az&!kSpNa-k*L(gAdwXaqB0Xm&lwkCl7*a znG>GvhvBhC|NWVhF^k0(rO)l_LG@~xk43(S%|PZ=%7lN5zbj=D!zeBqeZqjDmAq6^XAg8E`!m|< ze5d-ST3Bkw`~OBu&;Q~wf?E39=_Fi#@S`h-m2 zeis@@vE1^-Iq)EK-k)!8sDpN*>X*y>41ms;jjmEOAoKYebCpFu)LSuNNsXFA>SVhD zbRu<@_EP7KpgOlwNBaH*`a2CTiU95ic+G3@drlbsA zTFvuk{$1}NZ&+_3xU!nLfU#=Yq=9nI zw{@W2f5`Yc`b{_to<+`-GcSUhXOSUe4)656h7D+G)>$9pYwC{-S+B1V>^=QGcJ?c? zbzC;QRu7*U9l#Cq_~4o!3-aULyU&E_ zFtjnJC8OmGG_pkYht@knafoaQ!6`g3^qClU!8nIkhCbtO6#SmN_d2fAyKuduuDY3X z7~hQ1hkcPX=fHJjQ{({0in>EGM%}gsZS9JF$GAM({9|BW^*_e#tHVs@I=ZVHCUIxQ4N>-28V#QqDZKP&WE z<}$LrreFqgM(`-|T4?<2MgK1Dau2rN6&Zk9%4DyI92`j-!msx7 zt8_2)FpYa}A#l*I=o&K5QHPmFQxtw~k@mIdu%ffbnO%#t16_+vRpdQ62t~ejoHJJ1XV2PE;$v{_J{mi)5gYC8tn+1{?_@-auBNwl2p!&N&&JvmJ{~-Fr3(tL1#YG9 zFkWCQ*s)W$?aF%M%o=<|*mKacS_fBbvR^4Y)+Fm3{07#!{!Pqlo$DpoF3DXRp_gUF zLNDRSCBY%M6n#VJPvPR+4qrw!bi3qZo&JJw0X=#+k5V)zZP;l;WKMAZ!UHPnbUjk$ zG4l^Wz6RJER)6}ih4JqF!z5%RlA z=z#S#6<9=uzqYuh;5}rU$nb4KSFHKH^eJ^tBD0M4$G}g=9O`QSeRQMO;kQQnOYkw- z(u16TF!R}hPx(&jiJX4}c`mvh&-Q0ITdW3t(FY!c9@aw-16EBPsOmC?Hx^YDxMu14 zq}0C^xo}9}56CON>v(WckLPWFaU zS9Jaw=w&u*QO;&#sCz5&_tw#`sd32}aJBdsjnmh06>^$f@yHV5rj4#@&ibUzSAXcR zuYTq^A9{6D^lgrI_DRjN;m0+|&U1&)`e>txcOHCHB+qPttQ#xxO6kJL9f|iWGG2W9 z)VzY5_0{#u!FkCxa;b`}QhT2w&87A}4>0xsJhI+SKo2=LkcUBEHyvXRS2sTk|Gss} z5#MWz&iG`_v*&17550VPSkZeEWu7I_o}#J19#7_F%%SRnG5rH;N#>NbW#DWHG$^zp zJfI04z7G7s%Q}{zvz}YL)3^FPZg? zd2dH7ttvQuSht^8$WzvY-0nQrMelX8HufJ99~*M9VGk!}R7r~Xj2xZ!*p4b)mSyl= zK;A1^7Qx&vq(8gX=*tkh`#|p1^IZ$`B=8AON~iA~@UW67u{&a8p^Y=}B$0JaaFQR)$JLUXl(9WV5d?Mhl@~ur^foGDJEx*`gwdbP)j=_b2WZ_PckGOUQ5i zoF)-lGWYc4J^IGG=E3@V9pP$Xrm?Y>j?bjI5^+q ze~t4!)yHJxd5e0kxxoF-so})<=udot0mLa7NW6kU#4U&=#s~S2!@|i;X7SVp=Q$2b zYH#UBEaItVS1j`9!VGn%W4+wzXt`Cp(h_ENo#5Se&I7}^KO%QE{{1+2H8wjMF5OBV zZ05x~L%*+)!aceUIoISJ_U2`>ZZ7bykoy_G>F{rqSWr^F&8F=6>Wo@qCSluWzg2TSB!tF*u1ofy3fos(>S|M*tMm_E=_Ii#4oBVlp&3}T{`oE_(KU-1uE z$elJL_L9-|2N(Ps9sRWN0!J(R9;5tb&azt=+q|K=-5IzPKIReUECpI3Mr%U_@;W03 z&T3+-H};V;QMtFA;_S3^SPsn_>p!JM?E^3S990Ywv@dtR0 z;6m;-*{CP}luc)K{%VhPWVgYaME~#F*KA2@O$)OLEa~X*-em4ZYT88ZvU3MQ8&3q3 z2ARV*=~vFDy8j5vyhVTn_M%V(%k!f{#|X{RTTV7{`NNN zmCUhME*kG}7pH1&{M-k6v7t#0l*Q-$yGmJW`?;67Fu!Edy084>7V>`HKy8_WvPJm| zCq+pa{(Mp<^Ce}Au22@OmuZxhP*%#gE5TL1sWM68C*mxdjfX z>)nhmE;Ncy%(4o`V|jja5<2Fb9AI=X28n;NF#m~3>u676GLM9}x3H#9aBn;eKh?IV z@C^d5(4w^Ig_p`)S-K+#*o33;sSui_9qVvMhxlu?(_c9_ix}eQn8A0M?Cq=Yi#d~Z*jH6|M(H#& z@vE72uDUs^8r?JVkZ%@yJkdRom7X2!F)QI+CAocFHrnH^*xXiNiX0W@7J4M7>@u-G z)Bt1hL~WV$XX07ze~HWySPl>u+BPK7WyeRxYU}zg6zwo+cr&w==+;MyQOWjXI${c^<{J-ya-7j$uT9fiYjkB9cZAgY-w$^=!P~RocfXub2Jw^wa<ec9n;)iXwY)yv18bhVnv^7EjPK^CgQ_JtYW9!O=K>nr*_SM3i~v2tu(NvCm8ek zP3U0QH>86T)VmfPRESJ|x}VFLq;)tMzxbrK!dqLhU;ZQTv`2n3y8O-8z@zB!%3uZ0 ze&CUPouYxC4R^^n#Ftq;*AI6I-`3y7syJ=x|2JY)J>eJXdD(DRE#DQ(_(m|7f%V#& zHq|C~Cx3@5ZbiP!8Gc2$HsONk574k0d%U(x)e6p?uPJ!|G zSDG7rq+>relZOk^D@EVpe2F-&u?N}TAN8Vd=9o<9_p!GSo0s6SvpsMt=Sl)sXBqgF zy>v*~&tetML%s#yB6~vWfpZy)(!mCm-Mm}I8y0Ur{u5v%e(b{q<12fD?G(H(otTEm z7MC^L;akUd)3GO7iLvV(WUZ8#z>-HSb(m%L!tK~LR%%D*S372Ym%rTACuWzeZke5r zKb#F8%~bR&jWX@+^Mw8UmtW9cM=H6iksmIJ!?jYyNFc5%^Hvn$P(1Dd{(l#KxD~$o z&=k$Zq3@@=&13K_5&lOmFW|XH)!$PqWq0wv#I`ljRxnRELU{{wldL7SIpF_xne(u_ z+uhynm8R0ql_%z+C*O{~T!7v@2mSdr;(Ogne6Mij6ZrvcN#5t+cNt zlx;HZJ-2e7=S}qG3sYr(!CmL6noD3te{P=fU;d5BeY7_eZC>wiNA!{XUPPOHJl~th z?QM!U7{)wWsB0D7f?O%=sj^#W19{rnPIjQ36eWA`1H3-hjlR*AN^D~zwuh1Dxust& z`oA7e$|(D1j3x1(G?u<0?c{zVoYaC}Zx496@K+tb|9|48IRL}|c8(iE+WFt%r9RMB zA@tIkrniOvPg?|Zmjd0Hpu6rgqCtNq{<@d{GiYaR$k_hp>)^-#>cqvBtcJ%cM zqnx-Yj|AE|$(~?UXL)=V{FFawloLl~ccA`!?gl?D8HD2Pcbhc0V zOrxAQEJpc!d?Rl$%8AEf%;R?S3uK<&4sltGekFH;+-K71unV7ZzA(-xCr*n&Z}ZW? z0xq<-~O{%1|EXVG~TlyLSob51H~S+EMnRha!0|*KE#dv zw)jk!-{?YRch&EKxuswBPiw!ucGE($wQ{Z5dQfEhLxa|B+AwJCCef{AomG7M`ArWI zw{(Nqa!~U5DES`nc`b})Pa-m1d%eKE!trLLDfgG`J31#E zIEYL0h_Sx05lb9d|2~Py5`|6@jqHwbUnvj6c4x+(s>fx+h8G%}h5b!j5wTWoRQ9{~ zwLOhDx0J7$Jg*PBrr?J65(7+Zp=pfuyd`{t*hszC-{RIx*3LXZgTbBm)mz*aQ+%a& zBKc|20dt7gxHqT``Thjod)ZI5viI7JEFNE+K1rTMXO!=HiU*_Ozr3WmFYQC0NkgC6 z*QL*d|0PH-di9&ggE04{G}@t!6SQUMR~P;>ApfEN&(b8{qSh8awrrD(X*=^KW7*zq zEIWI_#UHKT3>UxnC*xvoU<{3-+x3DAEA;r;om1Vx<5~di9D#PabjO}? zv(VpXl8;M|2_rt^@uEWqjb$G=5}W8r`t|PgHWPfacLme6(6gcQyVJ7WqLbE^CrsK({cQ`(M-7^sZO|kOU4Vo0b z_djXxCZj!MPH=l7XXM#a2;QfXmxnk#!R;Hc{tEr&bM{l8lkZE$;C^X>`)y!T4O#)mwXZ(J zXytbHg$2-2b|<0k~?-Q`w{ON+-qekCp#06CvnIUVnro?#2E!T-!|;0 zqI;jzu-|e|*EaCxtM1Gg_oBvNLqxu=PpuoE>KoOQkk zK3r^(BY>fra|ik@=9`h6&tflHZ+U8<3;Wi3+pHnPwv)ZLeotyZ&^H$9yMw>^S>T(e zLca+!`i~lX(Q9e1e{;!Sl)q`An zgXV2F<;J%{d;iR{>Gz>W$2HET_sjchoJ}{%`>#2h{uOmtr~lB|w7)!@c=LncNwM%I z&UT+bc44c!z&b)VT;CP9Z&{j}Xv z;O8d%W@^cCg`8e*BYvQbvmUWctz-|QpSRxu%yWVLcK9j06nR_TH#{zm?@F zW)|mp>pt{y;taP&Vhc1`+X_Yh%Ur_fzM9z%KX@{Rckz zsV%qPRebV!^sjh)$|Y|7gTTjpGmdy-1AmXcZef0=u}?AU`3;EoLw>|;Kd~Iud`#uJ zlsJi;^{V+ujn?ObSl}0~2Zw2)^YJHeyeh`wUJZ;Vc=slBwJIjpy?TxQ{F=nZrM!%K zkMtEgaL;g5$bI5_8?(`&mPKSa+;_0IN+;fA5x>2C^ZYg16CCySox~HocxyN~2k+&A zbI#oU0X&@O0T0hHzWLzc{s119G4A=`;r;*~76$kI-bx@8MXlB&%*O_;QKk&ztH#Jp+%iW zq<#HPxdGchf|s7?V^%l%z{eqkJ~%5;^wEBow!!&|!!7&LdE|$6LifeMn8W__AvMoe z!`7WS;o%#c0B ztL_K)uvZD{rF(&8w8u6y}$DTfBSmmle+{8%sL|Jh6VfA>;;Qp!hYgSz*7ALBRl--RXqpgcUABP^1CYiqu2bd37$8>-$^C&M0O{5e()pz##%G?=@T3)Ea5|Mw>>gs zkhAz_A_l8`=&^lRgZYkzgYb&vU1sGcn8LitJg9xso_?;i zcM>OxTnmxW5tWO=xvQ0`xz2{!JvLKx{F?(oA|vu?Pk|^_MskIM0lZtgHs-Rp9(wScV6mZt?^~m*lUk+_ZM(+D)gy zc?3FCKDd;#+7@^-IxRT2mXwPP4gaqEq2Ii%98*$T8S`eeF`sz4lE>sUdzya;9#f8j zhy91NCAw{i@B_>EiX;PvVdT6LpMnZ-XjzGDSz%h{MHWe}2l57yyCAOZ6n^;PpD*7U z-%X`Vd?;n^#E0WXd_Toj!*hV&b?J!h^S%omOvNq&e=+n$=p$HPtq`5PGnV)Wi6ySv ztI^+uPIY^A+2m7{i|uI;{7ToWMHi1#=l3_Uj%v&AYhO!S!TSpF?b3O$4wC^xYZ$uo z)IQK@B=AK+r_s=94Dzo(I^6*LVf(m3;8MP@ecgM|cRG;aUj}S|l6$(ekJj;Sd15=c zzgPIfwCwHJ(5BR;xL!VGI{LBSrcLa%BnQuJJ&fr>Ph$c%U1Li0ywYtb@vxFCqq0b0rI|0DhT|AIEM?iW|5g>F>b11$ez- z8PPQ->;#1B2jBkH6_mM2Z?Ek zuDB9s`^D>Y{qq#-OZa;_`)acm3(wTgR)X8VllJADDgAm!;G9W6gL;Z@RX^yZ(vQqr z9KZ+ek!^SE$y);oxgLwvryoNdcK_CJ`fq4>kS2`mq7AL&vsO-)z#haQqSF(B)P=To%~2WwM&SEYP9 zJW_J3w=zCrxn<`+ZJIL=pR%>c8XI~m`|R_%w0(-TW@g=2{|TMo%YGBR-*I#hlSX7i z+X8aJp-ZU$E&Oi-emRra1^gR;efRA@%wBx54#U+zUOhdmtqxi;2QTUsc5ZfpjzhWzHMAf%+uIktU>wadBh-XQfX+?^8~T5edS;*JV$7QQTgp%uV(BfoM7#vHT;w-W=br#1K} zefP8me;!!-v4J%xYu!6PeRI53cryO#ycav)wbu8%UTERzK-&f_)Pe`$SNp$Z8M>B! zEcaT6TFagP zLszxXwaFA)DR`4y<nXz&#S-ui<- zbe|^%VlVEa?8PBEQ2EeyFFZ}zgW(^Yx>;BmHsNO`D}Dl89?sa3hZXu9;I0LmSo>7; zxpKyaJ+LJV--F1qn=j=!I_?PEdr1y5<=4%Z3b zzXaTukm2Ys4bonrS!*a-CAn&2J$q`j_QJkpHz$+VuaNfcV6A)kPW(wzuxDF=#rXmE zBgl>Hj6pX7rX22>qyx8el_^{L5S>!=ftTq={4JTA>{5A0-(vf9@a`A9tK~efyS}-S zHGk!aIpFa&@OdkEoed4n!j7%epfT1{v?2bf#yB^~I9UTS&J)1Uh)nu5eLRBA8T*;C zs|LfD3P1iE^Ei<@qQ~~8!-}0r)~Wa!zZ>}-cOtSo(FE_n5e19E$9QMBe=6;C3hn{%8zZ{kInmZ3}w-r zxiTNNm9mGhv~7c58*NmWv~fdPpExFX%`Qhz0uNsEkV@;a z`KALt?MHSODtrz0I2i9+^uwK+hT5s7qqTqLY(0(N@}zcusm1M|AvDJQ6=-1*?ZifC zyS?+KWQ(u=>F>dVu?lNHTiBDVKsuU6U>T|o2@?^MpJPf=g)F#x~&CDT#w^?ls*mu=Y6g+|#MWdr-` zDd(qMX_=~BMvrb#{u9jGseStXC{ygAH|qJ?T7cs@;PKAetLFwq7kHNEo_M9fC*@8Y z`-}4q_JPeV!P{cxo2fX_laQdm{Sa1$5tenz_Z{# zmOkWuQX#n1%5ESRSNMj;Oa5`ja~aPb-mm9Z{Fz$QEQ>EMwZgL_STm8VolyU#F9UW{ zIcu^FARhtyFu|$pLj_*9*fGJW;Bzm0#{(VabH8o|u$KUPegO7D18xmE9p!N6Gat66 z;x^AMwoS5<_dw<+$4Wd}?tvjwo7bXuNNkBkoF|CfSv)pwbG<2|a;K>=`#rgfHP*VB zTpHMA8nf5SGvxy<__1R<9SQw%SAW@@*Ik>&E?hWi8!)9hos)9RrjnBI%A`_L|B{m3 zHT(|Xw<>8zxRxCW3?=k4lRLGdkBFQLLl658aP@Xybg_QyRfZ$?h;tpmT{(fj5*P(; z%c#iB$im9l^T#I*iGt41t0O3%XNqk*$@;NdiNAsTllDu|c?$76<^GVOZK3OQ%5S31 z!z$CcACqi~ zEh&QkeTMAw9?#X+9DZNMnsfHmcKb`i-2ODyD73lHmzc;Hu#&Ar@ZmAO_Ny*VlQpfv~iyypMFTr(F@3gw=eo9Y-61Ao%i}J zNyp8xZHcK{?v^@3;hB;bbd%tbx<9z!pYQ^8JLpezU71tjST}r$Oljv{jV0b%c^rOM z1Ri8gMR!byPpUM^w?QYOBU$)P))IGI!Nt{cS<75rMh<)dzw17yF#=1_oIZ4oIki$Z zcuqU#5k>QQA>9@Bz{rgwIzrbos0e>fDJ{f1c znLI5!9BwPPlr=XG+{l`OmgZQkT7%$Aa46@G));ceLKAuN%RRITenoz|;iYP>t&YbV z*^3ITOeo?W4d?12izc4$x9d9v$g1GM+EUoyM6cQ}suDv-0* z)~RrX3&;k}TIJ48#Bgokr7^5!b3~gJ+d~9&6$w9@Io&yFZM@^Bhs-xjJj>lv8{bR( zGWpiHk0fQllI^9UaBD`mO_>yfDiQ zFa3#qiTn|?^Z2C41MO7ZJmB_2!xL|$4SUs7VYi=|s5R^~X@k!3?JTopf~rd_hQq-A z8h>Ixs)^SIE$07Q`6K4lpd++FY?sK}Xg6j=;*I~#|M}CGPLdcNr|}su`ggYz166D# z*jmM}SYs~#01e0-T70app^lEd^2}WBKhHDt^IM*~&*{qZessSsU&Ho!GmvNb=zr7Q zt!a6S4d27iv)XXy#9oYvo2c~jU zE$l?b(DX?`~Jx6WOG#8 zrvei^roXRen2M0)=Yc7eep36G=8O&Wosxdn<4*KdX`>7{2C}wN)e{O+vgR z!P=r+l8R;$yaez@K&{$s0`$9`5Uq{%MyVzcZ3lwZ2+CG=mw>I!jN*Eunzk-M+ASB` zineyUZ6yKQPKdPyxy&GOe$UtEGoNHK1go~Yzd!QG%zV!0ocDR3_j%vX`=m|0;#ecN zNFGzTNFGzT7>Qi|4`iHVoBr#&<}R2!=wEA+Cm4YJ0D4rH^r*!~{ii?kt?_?B$J!09 z^S@!vow(c{yMFFxTE@rC$#n}@Uk~`xx(df}`dP*qRo#QNomek?!nPMu>E{r9xC_6v z_Vfnm7yD?+V00!uGxgouPtxap?)|i;{dkiXJJVu6_|We%(C>y} zBMryaB!_*bmz>(6*eGf6XfNmNJz@Fxzx&?!k|*|!FDb|FXhSB&3$Vf9i=Erjj>qTW z)4oZ0CPVD=yoD2tQ7#`Xf{#j;`=E7n`G>??tpns5r5U3#W-Tb01Am$cPYlC@_A{Pr z=^WOGdAx4-i>@zysD$66j47UM>)VVQ)tCiFY&PvxmqPv?#E&E+!BblqFS7LtWFr2q$@%!3?`GZf+jeX##pB+@@7VUPpP>m+Cxri8*!{^{c0f*N6sH zFGS}^q!-1?{O#G)k&$2N5qzjYe7@7+P0_`7mMA}D1-7elMCfn+3iR|6Y%;Zx4G|n- z#H5qRZXHU0PCXgtY()5Y+8VVZM5^%> zzx+yQ{0eA&4m3X-+Mk6isKgiG($z@CVQ@WSbl2TQdy#42v23bt5qOHBgT*v&_YQd<_K)5dURcvn0br_S+`=rY zIg|NoO?#IPUfU-=(*s|>r4bJ_;ZbJTIRD(OVT|O3QU32o_V;Kn^w1#&An_z8Y&uq7SELq_W@Ut_?$KvX1JHd^7 zEN>=!EZSqDRkx2Id_5=OW5Ew|K zhXeR0=MO*KU-AI$wiKa5pm$-HmsYnM^=JCZkF_#hM!LUr4$m9(oc1Rhe}97+vXD33 z)Dkj)v38;XefHC_3+J_rjr`b+Z{URtd`(RI#&SegTS&0ez^yDJiuGY`?duyJryr<@Q z%QT;p42vYqUC)X*L$`&s<-K;dy?(RI1<$qfdj`IgVsswuLDk(E&KeWX*=?O`gj`%K zOsuu^>s8Ab7$$RKU z${Rca-#=>mbFDCb`MxuL5q#JdbM%eSJPqPTWp75X6WXXV*a?repN-Np#m94yvDwJj zEadl?u-g`1Hcj;yyC?ZW%@y1iU>fH)R~V%$it&}Bx2qq;*&6Xrt3NB9x?UQGHNyvf z!d=|GOD|;(moS%^$UbsEnjD_k4Bt?GP-;$)>!(hhS6kER(Y8E%o}5=I#NVYeOP$P5 zalW^flShG{zmPb}AOlrErs ze(PtTFEd>84TF@9^wQi_#!J#`ETizHyhPsN#Lsn(lny&1R5fzeW``~ z^&o!@>;fx1tP+{d-p)9)BGkbC>7wub&_?96gkO#|UBmr=r?PI%XW$^ zOSaf5&IzrezL?&5=hLe5!+Vd54#*GNX0&{XduSz-UD^wtU2fA}<9L7UUyp&uiT>Uo z{RXuczUvQ7Gp$|HK^4Cbtoz36+mHO4W9mR;7v1h7iY?-^V54Lb#Q7Y8odn<8%uhZR)UAi z!N+CnrAzT8TyhI|a&qsRyu^M^W>Y!mfw{}9-Cq0rB75zP(T!DWn1|+AnAi(J)t&+; zZ4DQOwh&8cUBo#>WM&0;R1D}C_;L0U@v{WHk(-E5#M47@&fyU!dB%uc-b=ipH?H$; zapDH(_GR+RK3NgUd-gHTIpdFow%X9ezK_0v-Bfj`(OA<6y+onG2ys`PA(8)HHK=l{ zjRPL`(M$hsl>Wvuw)wxQGdv1ByESUGzSc~mWt@HfVqCbIISJN6Z~1dCZ8Srz=#}G) zvBX`S+E{zBFS=<{<~2%fKJ!3DY}O*=5;3th4>Zv9%8b_K%V&@e7~`IwSYEn+Q|l1V zraWKAMXlq}jTiWrwCy#PZ24d)wGBI7Z@bKh-EprOYPi=Ex*eV0q3IKKeplbWma%ou zbn2c-UK{I2@D}{OHRw+|Z<5MyQ};~`I@M|P>$YztAMxlvaE!ELX_*?sGsqanZ~Xx_ z4feqxGAlmQxBCb-eT+JvhpF><6dCp&GHkET6|&aybMNO&QZnBRHI0jgylK0q;1hkp zJ2L9SFPisq$3^~S_s`hC@A4^Z6zxf-o#l@?zl{w`4q4lfcgTZQ+T}x6qOF&f8EZB) zT@%eGC+WR;-cYOOoHI6fzVz>Txx=rLFTV}hrM)lTg7VSwbH`rQmD7097{0f9uZczq z(rg;dxZ7CMI`k`{$RcmcVm`;YQz_zIJ3exk(Qf?FxV5XfVpHCPysM0V>AcD)+;B?7 zXZ+QCwxTz0!ou@(*HNTQ{QA-HkyEjYE;2@2zVYWr78&h|^XXiE1UzK(Uv!9!i**id zyvQ=nJ|lu1?$W9K48OxLXugQ~7cL1zvyj1^z&IP26iaWzevsc?{I-j{sOX-#p(T5% ziH7~=+SYkLjjyd;4*&eRe>Js+CZ`b-S%&XQ{glv;{&z9YAMx2gl5=O^!RO7P_K>y9 zKC`{g!&%m>P~dFh3DudQ$a>)2$2;Ei7A_#Mc}gd7&8k@1nvcQ@$G{7PhfZ)%bZTp~WUka3iQ8JWWxJ`XGf#A&W4 z9-W8cBQ{%;>nN1KT{f(7$B+Lip4AOZ)lLq-#SxjGsr`$mLO$!;JQu;4Pw~ zT@4P8NL_MW;OpR!J`_e|IomBH1Q~%5gTLks4?Mbz>B{l z93W5R+Q-e1pEhGSCnjIJllzC=M;qG?KJ}7NrF4|**#k}RrRT|+%H{kqbMg=MZ0cZ6 ze&~2RbE<^rR>vK_RCzhEyXvgaG2tU=?I-Gfb>#CH(G7jYd7jE&6eq5H7a#Xbe=Hq1 zZiSZw&b}lR{`6Nuf5s2~lu0fbx`=3C|EIIr`#GTq>m9jZEOlbjTRMm0M<`D_9?{yZ zHru6}N7kF|@9~WI0`;!aTe`i-1NLm^Pl4g5C&D1u7lE@#9nb3+D`g&k9SNN#;6DVf zPMt$%;=R=0$y2T(mon3(aW6EEA1T%e-_YLCSsdvSZ8}?g5%Z@lIl5IELvp9>V)UsE z%EvKwDc+io?8h0fOMWD^(YwTVdOGm^&H{IgRsSBkzH|fiBc76*$X7mv=WR_loS-)< zE+-xj|7=Osp^ts39RJaO*=KWJiKeZ2iNDl(O99rm0sRhM6Qgdgt+#bU$I_kmA?sG- zuamy4JQtnMOU02RUsCFUHF?l6(pl>v=paMUMbO8V`mjwr=#E2)UpPGe2s~cCTgmbm zW7MSiH+2p*H@S5g#g~U}jIXV{B>@-k*l-mLCg?_(ut&FMw><{|GUq|Y9s%fRpbz);4J7Av^apaTzTb3=K+FnV>Cw_C~(@1om?<40o zc;`mDu+v+SRjRcto^#8c$eeE?bDGSSpYgeC*zC}M+q`^i_&ka9 zTk&@v+cvx=EF_=C<5^P6Z*w&$+T?-ev#)KX}duk@%Reu3pVZfQ;1?Q04 z+?4&H`=lFul(1$o;w9Y$87-6YxmR={HA9~?LPh-E%^r|UjZ;%{0sB^S?qJT>G2cGi zCg$5pd~gRmK(g1xX&rN)9*x^L75@C}Ut$h6ZVl>UxwwtcPBnzlj;FLUp8NY=erJwT z=8rR`MINk(cH<+J&V1UlYaff?XD%Z4okI*fy3Vw5=LdIjkLN%)c+oabsH|4kJZnxxlk4d6o z#Z-j9_z;hMhotDX3pn+g@c!K+X>0Z}#&Yo91&lS|^;O=#i#S4j4sbDM;401>(6$dR zw3F{77kh{GpTCm!o#0jVC|i5t^S{COn~=RO{yqo(I^T{@iEJC{?4|b|97X;_U5x|Z znz0!k@+pSPkxL*gxjkH-lj6BNs$rE8tZr|nfZS1A*Md?o8|B>k1 zOW)>7yYFjxPkk>UR@+Ith$}k#D0MHYVMEFUVT1}@3fOP$$egw=#v`R zFLeH?JSLtbzl?G(Wgjd6*N4BP+&yoCC*_3muzNNBZ^;i%ZhM{D_L#9Q#&GyoPB=?# z>G!piwsX`rPQR>$+craO!T(QE+WOTNJ!dbvM;G{2`#Acv@c9GUzRVor+mx$25;;eH z?$Xiti_+|vgya>nDygo+>Bte&J}ag8q+huk+h&#)vgbPQV$Y)^um`Zi!j=pML|KTLY111Ze>(czsm{Y=cHh`~hWk2*Ttzw>Y(RZ%lT&j~hD;id=tMZjo z&uS?)b0QulL?ca3PNeYgfolJFXO>3r0UoI)-&{H$yq5aH$I79bZ2Ax2`?Td4zR&IC zf=hd$ER&tcJ|Jyj@9FLPbiyX4Yw%5QYRqVy?S3Hy$rsk21yL)Zief|WQC>=C8 zx6{r&p6TH4T;UHII`*Yg(2-6BhpDvW)R1#^pn)_+?spO{pG@Gg5?sc>W#KSziQReT zF#oZw=q1|@<0k`uLGUM^oA4F{R|a+U4)YA!3sW=4f&W((v7&PM3yqlS2OEaDDL2m` z7n3?ZB}Tg~_m??sa?dnFM&o_d0J?uh?i3@Yx*R@yXZRUan`yIrgraW?`nKg6^!+&F zi6_0Zh@4t`O|ru;z8haFSYGFu-jTm4PVP9qvY0(@V3N&k0aJt={185|?yr_deI(O& zH~7iPrQXO;BbIyI6UrTF5NF_Q9I&orAI=8W*}N-X)XoRZXcluRF7wAGHF0*A*xWnh zY+UKsQ3t*=uJQ+3vj-mVN4JopsPC=BTO`NTZZ_vnSB29;;iem-!Ek!W`l>$~ylqHy z#eZs_W`}=>O?PLN5emNL_}f;(Kcqh>$2ov4D*RaBC*p}jBZjd#=y7l(p624`Cap&U zekYw1K!$$PULW?!Noa2m&l9=t1NNf9IH zW2`xZ-~JFX=rH?gD!xbg9oG{-^pLZ#9(%rmc^qmmN{im=Gb;UdFjke>m9Mre-Pt>HXOgBbLL|g&-ZZOh}(x72S1nJ#Sa?AZ{Y_)`0as1 zPE14l-mOo)oOXlZyc}2!c$;Dv$@^aCUyk2Oy}QKT{}k^}_n|7$8UF z8`EA=EJgc9HhZMW4_`zVV-Lu`yB`_h(wimu3SFrV_e<Kltr5>EKh)Q#L2&E?>HywJF|`M4O#F*V&C^8d0v#0{HhYi3vL63m5(ICpY#a z9o40SXTNT&S(_M3u_Jq4$A?bSJr{fV-L273Z5X8w@SWHIu?^Kbc4{-E*7eX_s3kf@ zxqOdsert2~=$7oX`>K>bl5HAG>^d976tN9r&Agpx8uDzM&=6cy$fBr;Wl)Q*Jf`jYw;a0 zUh*f-lG$)@2b(Gw}_=r+n#7OspuMd~R@7WEMy5G155K!}AmRqkrLeuv)9ZLHJ@8 z@PC~4KH4Y4yMg;E6jQnl9&N+(etdH$^z{UPih&MR8^PU&if{QA_)BZC$8r}KrPH7z zUpn@nSsg9T_qSJLYvvOtk*!gP>?kJhxEj81Dd(Jav#}eCo?ZEv?X#uMrs{s!ds*wY z4ZW{`SXV;#lMk$jy<3C5X|)O089g1;SFrJnU+Y8h%=7s^WAi-VJIv=h;HPcLYlzHg z>%(or#$+G<=X>Z{jOY6&KQ%9NdODa3d;VeV7h*cQBhbTu=d69FhkT?Y+1@w=-HYFR z_xMYT;YE`9E)ELi=iph;Yiz!YaiaWXkH*UjcnMb1AkH-gvZ7s0vVT}$iua3s= zEemCR(Em=Lckp*s^3L15W5dk34DqC`>{Zct1NkqvgO>rceG*xGH~ly?*B>ud4Fxx5 zti0^E@R!KnxCU5s4kEry@pKD)8K2zJ4D15=<*aaEKwX8?ZQM!kKh%WZjX#^8bKbvF z{CrPRylzrOXe#x7=A%O>uHeeYy{wVsqX%9q`KX+|Y(lI!9X4mPZr!|Bx6sKQs9R{* zx_>2l^XWWuW#|Y#OK!U7CU0ZD+mmFb`jh++ZGND2@StPUr)`_ir(|VKLRQK)i2%PX zBlaZp7i8x3yeGS&c92>Sy-R0BthMteP7k?PP zN6wj*^t)rT9$JdMkFUtKN#)1Ejzm6$lJD!dn7Ov06Q8ig&}e+ykU@Qd-39-3v*cVO z@e}m3O+n5XDel+lg&!vDHc2M@AWz| z5F4Y1dzXG|hK`~$k%!QW;uh;JFnT}Cu2yW{wWa%Q>x8X6$I<<^;&c2j_V~*OiF1AF zhv?KN<|W2wX6#mM>cl;-^SbFRTF2-Qfh)0&kw2NX&nU8p99*8oJ}n%4V}{nyvUM2N zkZ01DE_2tg$ygG!*U*Fg3ch&05ncKSzem2uol56;LW-BP+PF10wF$Qcj9EZGwBHLH zCNlc9`^ov53D3cwXPnwtbsO`NPIz>V?#O{J@uzXG2A`4Y4*R@JH#tc~&pz6Brt9!c z*N4z6p0O63+YX^xTId>!}QI`+O3bBJJ{cP=to-hUXqwxZuhVACaf zyB}S}l59fu%C0&CT;zV;2+5vIw(D;PUd?MeHiYtSw*$9pLwp)s$bNWqc zJHZRF^LQ2)FVxrxAA#GKZMUHpjnn(!HbTx)=Q$^dm3&%sHO}@q-a`A%ANJcAwS1S2 zk-Dd3*Fj_Kss22}XmvTZ-B0XTR5tIO!C%}v1mDbPejfv!X2N5$;I-7uUYZl?W&LGm zD8_Okox*RR6Pr%Y!RAoSAK7@``v=mbXo(oh2G&)4<@?@ocZ(+5&Y2!+o$1-##q(bF ztj$-xcZtncM3Z^sacI96O7~&E8^ao_etE#5JH@HWzGb*}|1;LIPuVuVLtp2$D7IYr zEi;7SNfgoJog;;-PHHl5}zL0;?0S^=p9?K0l41hH}R1d zo34+(DBt|VejPUe-TZ2ZF?}=bwxcJR=(0NNcp32Ndp5e>p0m;QSObG+ZL}-C&rm$1 zB-C`nOSz_To8X$p{h;r2{;-X4Bk#?Ow%uk#kLH$_#^ao~BrhuXRtI~Vb7yz-u6@fWee|l$wcq`urgoB<-OWW{!=$Y*UCr+bWGdHGHBm0(TQ`zT@=#DEksvWT|{DU6) zR9|W@A5o|G$H+16L15m((H8uCm$8Pc@dtbnxWsSP@cCYJhammv9lL!2esADa%nm(% zjbL+NrC;>d;B~Y+ukoTsggf|J7xRsphFwcCi0&#F{)usp($6F0g{dCWa@Ih)R4#TL zx>Rf;_Q85;Yb;}2H%9imzobzoq49O)!~B}Ql_#n?7kZwSnA=+3(b=j@+H4-sCM*0t zbBZvg^8E7{uNyoRa@Ku6YdhOS{!$|)pFa=z7=Na%+n0?q<{&xWnZR!$>y*oxnnv{_ zd@u8~G@`$+u=RK1=)nI3edt|1S9|&Urq5;0H#&1(NSojnoH_5s-%X8}*fQRCWPuYG zT%VY80q?3G;Wn=b`UAiD(3{pQBY}?&c(2A2J~|R(ucc3$R-hyO7k%X#roE@{rHyDi zIfl4|-}W-5)?PlXIB_Gbqxu%jhIww!hj|E({>1!?=vTPhNxNh`u1mBR9A&`3x%-^c z_$}^D!*?+pUONJw%l$J;shPLU>)M>J_1E&FzBv3dg8hVRf`%KDBH7*z3&6YMndb?%Cws-VJ&|v+ zJqzjY5M#Z7ZGRY_Wb(YWLu;DXL~wk#&sNTA-i%LPdmz~!PKN1e7bcyP7&;K9&khEY zYY#V-hXj*bpXLbtsHQl)`?chHh)#UyYS!&4*6&JWB|c)W6Ce5x?>xm{e5UlcES_i6 z4&80(Sn{uju)ZGdKelCLU!S3-3DNFt>><|ZFgWh^84u;zwqnx^$xfeZPtZp4<^1k` zo6xZ*oSc67H6Hw$YZG#J4f#sa-He3pW{rZ@StHw4L%vB~jBj$}g=|=z(atrp?7i{g zN#-s+LH4oMxOeI2nA`Qt@jCqE*WxF4^n^wK%kw`NIr;pr`WW-iPsse#x!?VF%>7K> zjaL5%oS5K-^9V~h`^LJsXEWsA);W|%xxemB{M$JR|8`D_e|z(X`nR*g6|CK?%ZxQ@ zH^9F=*Ep~pzKVbQq5R{He|sY`>|)xkM0UD<@Y%@FGHSF~*!r}6CV#$YYfN|{{a!#@ zw_VvN<3J^RMLfW#+@anwd|u=qaZa#kw?FFJZA5K<__dBdJb8?a-uTqAamE4kb7zb% zI%Dh=-KC7tO`BPYRgN-BeP0Y#b+4pWo5{V~$V4m4^|d=Wb-LdxTK!|&ZrVXTW}lB* z4u5{IiuO*7V1&~?c@Bl-St)0s_9^Zt`|{Jb`6e4L`L24fs{59w`o}r_#P6Lrmb0q>>vh%j)8xUyq)#%qDS~H8~PD^uXpmd$CG923ITMRTr;cZ75Zwx&%0n@ z0=LL0>2JILoVG^sAIV1I;4jo9nAO@DTam`sMZUnvd{1rnVdMV(V~#uSU2^pqHz#G>F)8Do zN;~BNbQcV#=F$jkrIY0YIJuBne&XQQkTcHyAL!3=^#66NSL1T!hAcxTRc?rUEV`$w zX-4olCr3m%RLb+VV?SG$QR=4gqRxWGi{wKzwt9}o>&zv}@kZ}$C!W;u7W}OPy}=$Y z$p<)Cd|5dT-33=rU&E1OLDmGjxPC=3@wI0kvHeDhhr6+mZrwS?7{kePRLoXq>h%2* z>@J;4I5rPi1K*QxEOIq|zU%SxNq!YjXX=mGZhO)~(NCj~TwwGbn~u!8kgUGg$JUny(M?2SC+JJAZi1dK-6Yw6_f> ziqc>jxj9Wgozcm;`6HKej{0ioW+rx@=w=RdGY`7qyL{oNF;*veRmxj9g1v@swD%~! zuI@?D4mSPm=%wNnf}@e~&K4Zx(l?5J&@qGHPrg;{tp(6eE4lwQ&`%q={{iSH0zA-! zU2jNzs76f|eI)#Oefpa0cN4ywnk!)MpPA17naF$izaDaZVQ;>Zmg&aBlYf8h9mUDF zw!P1DYgKvF#16|da^F14_rdwvdA4sYbL@$+*-FL7>@f>l0?3WPy^g;sm0t$IjpBU5 zspF%{3_l4Rsqa=&pP&Z0@JpVh_VK>~+YgxYV6ffyA+T*qfh|NIaj(wv4Mj(=(8GMl zW#V~DhoOgg@SmPY%l}LtN0FPI8Uje3In5;U~~o#9*gE*RjRwJK^t-d~f}D#iU<) z!U%PLb!Ieln;C7t*C>5)qS21*pB^A4-ih7+PrRcXo{t)H7W{+lhw=e-!5nZt8{E%g z4Jwh1mjkO)ujBK)uUdsCt5N9Q?IL+19V(D~etG@(HhvX0kzqk$TcHg zAo-X(0bdmP5w5>kYiRwIcMZ=bqvQS=k@)&5>t>_9p4-Ts(H6e`9n`HX zCSO4{h|YzNH6W{(lczD9-RmDC+5roO?XOn*tWIjpXeb4O`D|1s8lcRBfl7qfPvE%xyy(G`2L-R2En z#pmKRH+60!pBK7{P*bSzo{rKxuta~Y=&_!#ei`fN@&HV4w9(=+gi7s3~2(f@vE zZx~}=!C!+p+#dT(#x6>XEk5FoUBuYkj9mmxcPEYA&DhukdH%2RnfT_kz*YX3&SYI+n$(ik-3a$Q7r7kYCudUf z()^+8{xiPzZT4FyZMq7aJe_^mER*2f+K2MV>Y09fo!@lt1%93Z`-**vjCXjM%fDP$ zpaJBkW#{~z#{A_E0WNf4==J^hl*-Xgy(;VQC9%-ATV4-^Uq0A<>{kbi_QwxG59FYc zTgrKc8s=j(dRj)%hu&2`JJ^4t`7T{{uvqom__Qj~>vBzRPd8_joI8aaetFkb|NeT; z8!sL_`m;9=UjOisgLfL9U7A1m#0Q@pzKed_-~~bUP;K!gp_|XUG-TIsH-|NUd794R zjc6`LpxU&-IKF!!DrEj4d2e;p5e`_?tgXl>`&#a zYv%WNN^{no!oA)Pnx`~><>Kh;;;RlGw8}HqeFa-O*n7THc{Wi0Aph{!7{hk$ zkiq<3Iuj^4o{t=q52S(h>ExbS?(yhp<$pjm3{_VgUvJR4+kc}CI!5U%+U$oOd&o)OPYk|x#E!>`Hf5}% zX7}dV+!I;LekkVsbD_B&p8uKq5_-6+VU_7GeabXSZ#IVQf(CnTX7AlX8_Boz;9)EI zt&4eH#<(V*Kg~G4?xhbe#vW?s?;ZN`-`}ul;R6kuDjq1?RC9mXrrFDkmOB1cfrETx zoA8;3O`Ds3&p`uyV)%i39$v{fbs9_iS~#TkN2}@Y-OAsF`;Frr{1xR>r^kMOc+WEC ztlFP-wAC8S0_Oq#GN_d~NwtEh6I{%n*1LnY)bCS0BWec2$H0l^p>xu8#g~#NAUe-z zzU6#Bdwh7a^~m3D0;Ul=H@$q#&Q14>TDR%tD|Z&XJZooR{weF)@<*-fHizvj z;`d4BsXJ@=U7z*58Q!v(`4tt7S_iIna!>L)pXu9K2p`jDi~k1yQ+<_oj&hTiT*ra6 zBNdkzD34E%st&v@kDHLkjXy;3 zE4{y$-x9fs1LU^}Jtx->bZdyX@GCYX87{t5+$|ZN<;ZY!gJA5V$?#y344*(BoxH1l zeuNCq=lfthvCpodc8Z;Ajhy11+4e$ovH<6UBlrjv=gmHF&l?u|sp~Sw|KWizk0*C( z^F-B|bNtAXYniooMAuI=p1p6p|5zS-ajnN6l8@F;&aH)R6VcZ29}~ zaa{BH1o(I`g0C2Stoj@HxbGzRxR>{(<8>F9x1`q9lip&CRL(}@MVW>8p+3Vpp6l=R zP4QFn$P9n3%khQDUwnI_-q9Tc>K*mV=FO?R@5?{0@s+K}Qn)O!-GlYBEaK+{^Pg zvN6Jb@I6ow%}C_e7h!XWCY4`rVHfSEfANWK?tBThVpl02lD%C^J*A>rYH^*zGknFB zH9UKRe9r>zctEb^fCHUtDO64tbRA;v)Y|biE1Y54dF%%+;Vjq&<`I6-k1B20-&@%u z*&|e2GrMO2`p#L*OZ#>_@VfnU6T8xVA8MCtX4_|_*JK!@q&pvFd;|Ekw|pbuS$vm1 zkf*af4T(DXPhP@(uWN0;c?I*;e)#5}@Xzqv_AQ>{kA1-RHsF#ykd`>JCYbVxRVC+G zN2~Fl;JecP$xGDuI$0j8TjNgkBb0CT!3*)VTB9IvkGV3hIlzDPq-Q(Pu>$00cEDRq z_==Z(q&&Xy#3{ayP3PQ!rCPGt;l<#kx^7yD=8m5tHm$%2E#ZIg6n{JNdlYrA+MP2) zIpH(D&pFNHu@ZCB8U#}2s&hp0hv@eRcNGXOtqnP@uRHvspN5ljC4=1m#kg~Wqw(J) z;D6(xe)y*yi*NoV{8xVMlilf}JWBCl#mL}+N#}k~i^diq$M`#hEFxz0jFU&Xd`6j< zcpkcud{ZUF1@2ZJ75h_jS6!^!p^5w^-gWfDv34x(-9O^{!9TZLctvh?fuAgJs@=mX)A*fQC-^6l>#|s3AM0$fCsyV)R_&h$UqWXngtuhiUy;w& zv8~as+4}?Hdc>`25^*cVZB*k}yt-3$j)_0mb&iQa;M29wfA=nRYOlEURt7&urWz~6 zvVh6B2pVeTE(Lf{wwbl75P0Ovmu|EV`jqXa9B|#cpc<7U*dIasjH0h#g;5%jjstxq z>xYbK^B6Zy(aIX*0}12v={vJ0xCZ7mxM#G@@bi1(Yg1z|KcSD*vzhLg?szBXWp1rb zBmC;~%J)2xkF~?7$ssxw|M^J#=%bL?quCE*h-GAwOO$TsiXEP5 zwxCZ>_U$pD=8ZAq7CSW!31omfGw{0;0f?K^%Kdf=!5OMz=pkh0&D^N2rg`U!IKNyi`>~# z0&L1V+`_(AOy2!`0iPq%kL>fzW5dINU3QK1qqWphY^42IBL{zqvoDpW_$Y01y+)}m zgRlpR7GV>YljjJ1c1}`!o0?_t(qN*NVq>C~Vq>C~VsNjqWPPHRV(>B}W}jzRzqPb2 zEjn#t3Oo_;_P5}88e6jFm7t!((6?V(#CI!y(0T3Rh~$n z{};v^w0*+9c}R1Cr@C1F>H@ZI(Y$KLqL-+(WW0^b*T?~~dY7r*t7wlf?X1Dm_g z#^@rq^9b^a7@}=sw6TYBY#Spdd?9nuK8z^Gw9m%KPS_Y(;T=5Rvv_Jrkl5{tFF1J{ z7UwQkqHig0Lw6s!_J!_+`#APR{P&K15wT-d{bwGN^>Nt^!-3&s`Z_fH59i?)XBbj> z_<^@i-Z0mA)nfMWEu%WT5*F3|!srviLbnUn|W4G*KdoH79 z#w_}4Bj3V0bEaK4!oXIH?&F*fxbTg*q|cW#E#b>itXVm7k)}lbv*(T2_QY9P!`nP# z%aF7K1)bgrSiu@aUIZ8T*CQ`tM~~NM{_xTGMwIu#n(BA5a~YpEmvlcqEL4bn7x^mo z-6HhbF(zkjl}d8mk*AS(@H^F1YyaD}Jc>|@)VFv$~@cpy=m!8q)j>&u?-!nrI#_M9diL_Cztpe%li|n)U z(~ta`JD8bUu$zoGFWXQ*NK4F$+Er0p>e58g?zH0J%|jkQ=oG z8k3Kc_(Lp5W06bMt#;&4`KTFHo7B$MYZ+?uagyyL>IPQtBosUywa^^7FsE zcLsm<{-f55%geU!?ceXI&$hDO;*syrW-!0H&IujHwz2(OBk|8*cPMV4y_`dx5Y-gZ z_Zs%)FR9bbJ7(LLhpd9&a z!1&5Z;2iTge`q`Lv9Z0EthR=7uDAE=Z|tBRisCY|OMMpS3CX)HGRN$i)H`!^1Gzwf zWx>bFsIlTRK6p5XIPkBDFGoK3`WuCovAOfzufMSg7{g~1I~+N-IpZuN6oPLMtL&?B zv-(DSCnIy3GqAOE7ic4~qFtv{M~98bjqYOohou{_J@6krQ|`~|>-?c0b@*1mkGAle z9~?rfHV*Y&e!&;O;pPE2JaSSTUY)?9d@IRtc4Pbh54d&5de8P@46L^!eBNr)#fkBm z2k5uO7hmhjh(g`N+-2XxyuFRS^gZ|V%kcX+HJI{rw{YiYS{BSO>^jE_E1dt6o8xO! z+YWZdVG8fB+54H6;0JrQZ->^V^1bbh%VO3t=N=~gZn3xBaq>%@{wJ0>|8p3}ZLd5# z8MIZ7T-CAm_@wN8XnIgt8f7a0Z?DHlT`^uswl{FH-ICp`}Pt-8fd;iK> z1jug_pAE1l)SvF;>Z@0UpTCP3NB%?OTPooB*D{X!SVKL)?MG(X?d8)ml*6xmNUTA9 z$ZwvI5mkELcg}^gHf``3yU)+a-vj)#Eu&xQuv36dJRlXn-B&(XHEZa1>@zW;EBO5G zrW4M@9Hy@+9}FD;S1WaRZMa4`a0!;d`v0VOAN2|Ge(}e|`_Yn*hW8C8!+U-b-mh`+ zJ`ow*MZX`0cOP*wUp{A-6z7_2wiKbW_)PQ{(ac%sL(t6sA9#NrxIYQKcXoX|djIxE z#5?hz!Rb9K3Ga71cz=bqO$oebo`l}*z3VfU?D+EoRV$uOI{!7UALsK^XuacqnbxoS zgg9^dm^h#D5pllkWH>)P3Fm7ZoGUNk?7xNcr&qlm`qWbgXMOo+2WJg^`(PiPBa>Y^ z|D-riwFfQE!UW)%k|&BgDmG;iC(-#=her+c-4()ft-~|z7?z$lav!Gde;JH#NlvJi zk>rNPi6q96e%Ch+d|GiAH@~HS%z@|SQpYtI_lWKHSB$0q&O~EuW7U#Qr=BJ6{M9O- zZ0EuvZ>QPcRTH=AjA%sn5ABAJP(N4p1od#+IbYQhL6^2=!b{W}T4e0rhhG2oOvQ)B zgsHcS4(Y^M-5dd1rdLend1m;FjN5sfy4LDD=H#JRtBhzHYfgOrp(5@(3Z89-*2AkB zFOjb>GkhcONQZHKu_k!WGjiXQpY&w*=b@DQ*@Rze!Y;9O-UZybgq~&@M$F3eFR{!~ zqpeJ1$ymRsT>Ws#scBK=DH$D&GmH+-#dR#7kvqd^$NqfC*j&+ToN4r0ozzWkO%K_> zwe#B)qc`^}M*9W8tok&8vu72N16Q}4I7ID4f9$h26h;HH?KtzOz-*(vVLA7?%70-_ zvSA5)V1@YpnstMts|WDc+t0Uh-Vs>Q|6eQmw+g#vc_H7s!FAz?jA-o_qzB}L-vLL8 z)oBfMpIl2teC<}~RNtq~jIV9w{zu(|8ss-@`}SJKSH5>`l?VF;`hHk3cXA&6!nx>q z@L0Kr{5mHFHa6^K%_{f6qnVfPqZ};8=$`iy?HDzlmOny2%3+*#y_2KmM>in`-5vm* z+s7MAYQ8}17aPysU*n*A+B@_vJ)>Vgbz^;h9>QI>%wsU0c``n=);{v0jnB)ra_V=W z2ffh7yRut?{8t?##XohIfX1}(pP&`3M_~D-A-hHhJirUSy+55R?kyZ$(H zDlhrpgy}8DPlf3>DKOo!CoR+V;jk8WUQAt8c#r$Nh41kugR>HxJ7zy~Hk4Se^W4+u zZ{Ln>UU*GKY0*RWd7}C=M^${S5k8eiUxoYzUuf5Jctojf z`)}BB7>l;PL|+l_it$z>`4{YI_ir|^x^{=bne!`{i;X|_fxCZQyCeDcLe|>tDt6JwKU_-E-$gmOpk1^K;vEDR=cT_yzLd_SyJ7MqAB| zL$7IhI|rS^I)1Bw_(KJh7*V5F7c(*BWd!s?9DN_3(JyEFzM{) z_-F%maRd0>S|+HnZh7obxW_K8#=U|LBco%MsN&c-YhOKL3}l z_q0U$KlVuvdHep-+J> z|4v`|FL+v1H@@$Che!9K_J4nsJHnE`|F@?_zNWtRZ*z`0`TJk-WhH;V&(o5Y{QYiE z%P8oH+6(r1gRR`n`a_dib@v$cg{)m0IcPrU|A_e02x`dI)t1a6e=4K*k~em6e|>H_ zwTj7i)}640v>QvVeXe&x&kktqMfwSxS~A__yprG>%{YIAS6f4n>0VE1Htmz&2}~tt z`zC5Wz~iD@jMDcP;m^3tIM793nsXsGhDHAq?3na~@R#UMcV`8ONxN$nEueM_xkGxd zlYYv*%K6uQCu74sv=4Z*cb#^r(b7PiIi92S2lGC37Yz=E7^|C_Gsn>H$Cb+)-xd7NbW06F5iXti-0de-2G&<9wA3DQnr=*BlwRWwUGP4bYIFcPZsA1vcmA0 zHDjsiT{6*lLF*HtE=Ysds#^s;HRO2e8}T z9V15bCw*s}$bP=#uieM@`J$tAr(el|!o=rgjM3NUx!!@aZ+QpOexA3bioWpE&D~5c zed?aRkTJ+dYcE7k(E3gIO?+*Keb&^ebD*`nl=uHiyFi2gpy2qZdqsONbswAtZ8xxY z3Qq?=?1Lb5_dfgJIO{x&_AdP!qJMI?zQ@?Qb58zYi+hgWAC&edK>MeK&t*-6(EbEy zKXqN#53(0j9|8J25P_e~Vo&6o+%*radZ6KL=-H`sd#do5kXav>ZV&w~>D`Y(w}0fl zWV-z)U=ZE9yui)ZxL&_QPr>1wgNL5x@qGdK%%vVoq0O%c&aXLjekI8HmNFZHM-BR;aHHd&-CGOkS_+Q|EJux8?C8!L@xZhwpevxSNGNT00+%`A1@EhG%REQzQ!}4+oW95$>34-BW?SJIH7!2I-6V0 zKJHz5;`!s_x6UGW**W_y+WQ@UgXwXK4=E>XBYaLawrhuwhkjrqd;0b-D7G^uOmgsM z#c0T3?28LMN1twv$nC^*(4UYUy06x$f$axo-5a6#45kNl@P1jEabON-!h8n2nOJ6s zoR1=-<8|vE|HFBv6_#FFW*G+xI9phBx-q)QciL#qE!9iT`qHSKo<4qbwdtu}!1s(% z{)Y>*{0~=-`r7lk{KnqrOkMSJvr_vTZX9@=`f8SeOdT;{blEa*y~+HO`x9@@usG{X z{|)qI^4&tGuzq&?8%2y!nDw>iE3^EJlUJ`gj=oa=+_Ln%$9PkqB zu*V#A`slK)-uh)JV+w}kF$Kf0;r`N_fnmK1gAW)se>nle?a$9jz!Cr!U%7v7(LI^- zb{NJ~%lyJ~D^g&vkljB1l5v~rYjNvkOPG z`byvp-aBNT_5a47fB4kK(T`*s7u1IRp^0|;jGpbxcQN{WWo=Q3elO(r0C~z2@!MA$ z89kN6Bx`v-wYIP%KlPzmA?*AE90F z-qG{2ZkgbW2Oga9d_BN2zZ)1kh`Md(IzP%vg&VYoR^X@SfU~u@0k`&SI=? z_J=(O#(SOd4)Hx1CO>V2kMHs<`S;HXPR*TXojfZ{L#IV9L^z*M-0roapLtG*gImmT z7<)@TjE2o-{kg1RL#bI$PUL)@iEQ|PX8i)rgJ$GO&s9FPRjfUy_k4P<)SO#(zfm8c z%}&Oc-Q{U{5}V^x?i=)^66ns#=CT>tLU& zeHmkJCC}3u>AyP^SXm|iPdRNW?Dyc0yG{F!k=(gvy{UalyIJ9t=&!!nKK4VH^Rq=} z#%#V9w+-Puy`De)VE^;s{m-GPspq==e%q`!*sHnY%=&We#gYD(ih?VuYQt@}35FWz zk^RB_@!W^R-!Pt^yXt|eoAFhvHb6eGd>_0w`~mzOi97gXemWwS_LzS0D*8{FAv{dud;_#J1CBxsQNB78~q{WrFA{-ruH|)oU zsCpSq2DKa-FT&pmZk79-8J=)DwQiDhnIHZSyoPsA)^`MXzK65pHN;GPH~2$4knz)! zd^SGn!^tNTq96M{5AWc2!w7d)UO73zs(&*k{G`WNGljXyZ{53e@cTm4E{%49Co1e* zs805oe)}ePz2jo)_{5?^);*h4>(f?>FXF<6jA~_jf_}b-zz*Sq0C$rFL=2D&L6a#)O~c z;Bdpv|KGwM*1UEx*B|n^>=7q^qTfb%oi^e-b4DUx$b%9+M4*RBjME@L4twEM##vSC zZMm{HKJvlZx{^)4Q<_^7<6yH_sRpC)R2DPp4WIvOt;meB6~z5V^Z(zdWjWr#t=g_h z?3;W(KVmuKKg#&E(C{qQZ!!JULDR$81Cu^Ad|lnL>X(2C5{7pPU(lcEw)Sk3dP&Y2 zEoL1!YuLV*_{gj@WHM(b;mxCJM7tLo?OKy96Ic)Ygw_>_^?=rd>+RU=ixco~Cs(;r z?}G>Fzj@qKYjIAWngaD2voL@AW4@02UN15_4k8~`rhVIg%;3Ir;c^MJ?3LHDh`9(C zseN^Ti@()ZD`zLv7dp|JYv{+JVJA1>Vtk=jd`vtwB;hCoj!sM9Uh7ayf727NBIBoO zT>bu&3}+sb|BkV`8LON15Y9eqO>SgOoZQ1XQC}W4Uf3tdQ2fd_ycEE{z#7gCTt>W$ zbqaRj^I%Ur@td?Y@CBQuMMK*Ycu(iIO3As+&^LUl68&1A%k_D<>QQ9$NIto15ae^} znj)w7i(e+={So%ayVzgJbWtPu{x|5t51$ZDKLehBZ@&*b?bIf6VdHcDNni_{1h&?M z9MC%53S2*>-$LP-vzy9c#4fStSP8FiX|@QO&FHxA^#^a??_B1Gp0VG>(X=5B9XF^?`ZD2Z*aS8mY(#g)#w{)5_i{eFYe{Xq zLFbasePun+m+mGz#`&wk?kBS%{`R1yyZeyiU*Y^7{Oc%lK3bL5ayRoW1=jxo&xeq` z@|i9v<_=i+TmZhejF^UX*7w(`M!RIkH=uF-Zqq;fPVF*yzTT73Qv>c-az?kVg1gSl zVRc)<)yfQi`)qUMs5)eYetQMnudL!OR^kyy@sCA0;}}He!N!boiooL7N=AWv=10X&(}7tc#ON(Vr{^9djiHC;L(?W zlYKk@P8UYikVu8I1~|1}0^mN`XW+&$Pid_P?Ex?TG3c?E_}lmISzk4Y_7>~)bL_8E zGL6{q8UFTF$d=*6qSdw&p8YduY0Fv2JLqQ%wAsr4cf6;Bw>$uSAq(wi?DeO6&AEH- zF()tQ^8>?6rk!dI-xcSM!%5gH27gcEf7pZnL3;Xw>HgB!u+4Klp89gWKQqc0{qrni z^eLmqU$CdwtPip7R|1>Pt$hpFF6DoGgmGY-kACnw?7{CKKKKy*oHArQ{o-fldyCIs zdS2rD2Ks$^1pQu_Id5b(eP@rq;5_W?J@_5u-*^qbL#`)p@R*kV0`h18n+%e1f-ZG+p%&qW?O+I7v(^F5G_h;twbXMjCBN+SX zENASW;b+){p8-8<>I00q2iczoJqwm^QR8LAz2>|vy=AsvWKWi-z7yDfw#>sj>GQVy zd3^mMzV9jb*Ecb@Q&ZL}*F3dHy!+3r*E`-*!&;XG@5Sdo1MTKgg9qJu{sUUeBBMSJ zTy2>O@6%d=-*>Ssx3J#gFIt;b*x`F<_n`T}V>Vs*9lFSUW@yo@zr?5NzP<-tnn`+A z`)+*durtlldd8G3tTSB6c_HE-xp`)(^&QozVLa?Yc;oBTJ}9j|?5%&5c`c@d~G>zCP_UF z)MEd`FR8z5^XE|w8;2ITuuAU>ur{)hq1A8PJy=yw-;Wu^0+vDI%G9^APVUNuJX!b~i zn3G^>!?!M+RRYUS;bx{}Aa$Or(}8CQa18}NoUezki*{X|+=71wuvc7M+_|wLbQF8< zkZtq9W8qUfJmli1MW@RqS%!b)m&9IG_oNm%hOR%V7Tx`*d^*sFJ=fkr=IYvBd-`p! zAAgd$qGPti?(`Oy2-Vw&4UinSsb~P-Sf-kD{R>*x%Bfl9ohL| za%ta8$wEm|KrM~+uy1Pd6}=~EnG!=w^YTDL-HXiR)$Xs9ftgV_G=e4s}26E zrirb?z|UIQtJ3{yv}chwW%rnq7b1@$o)Y{9S>X$h;nyH0$=bX?4bnRCIbVr?ZrgHy zbJ0EKyfS#J7dtdJ(^KDp&P2Wre&6(YW$@OS`fiLJR%Tu@c`tgC2Y3#X=X3{aw$YQh zE5_%z2iVx-ZOQ`xjQr({%sRDgC6K&Qm`ttB4qp2I{8h{ZTrBCPV>>{R@N;4P?~LnpT{~j!mHdib{vfL zeT4OLa+)il-^-!n%b@2=$(i6j(V32Zau}Qo-`@t`WnOC8L9bn&aXlT-=^^S56ndE- z_?BN!^URZfFA3jmLxu0m@Sd21??P%bat}wH{0tWO{xj)cN#PQ2Kg619?*>>8 zorQA0M{JsKa&B0^0W`4&+^*-IB%Q@jJJE#bJU~D0JcBm9^L`S&+x0S#hr<7d(|cyP zAu&&f-n07XJ@^Uf-JMrZbO+rAsk3)(Z#T#>?J6)@J`0S4 z`X5?uC}Cerv2lnGLU}9XkhI4y;J(#~{@%`+X~%1e|EWlKPnDscdZ10$4?7IIZ`N7E z)){8UMHc7n3VreyH(r#m=SBt44Q@v_kggzjZ2!g{ayt6-m41JvYb(j`9Ozq{6Q9>C z_qHf5mVwV%;};bTTPORjd+qx1)%>r7|L(w_yxVKA*vCDn7 znP&m)O`UJqm7vwZ^q@>|n1!!eG`bzVuy*3*MT}X8ZaRPQc{dcv4%NB9I{7?rD1J$P zPg|z%L^r|TDcz`$b<-ZQeW%2=RU6px31o)f$T?A8N%oDN2|k3=N_-3QF)RjV%{_y; z&&Fqnzo2db^9ILrg>zfJCi%me;9(kFqAQqYf9nsiTK+&WIIKB zCj&Yx&d*p^x$Fzk>&%3o9~$&AciA%T9R$X*t>`!5Bb39=(aO5FSC)48=&92@DFBjURQYo(#NEuNw1ThX5beKN{3s4Uklm{9;_}=uI~o;tQ!MF z&aA27oYi4$7rj%1e@r@@cxz~gW7FC6Puz8u_JicLId+%!dz4x*lFJ;zJ*ix7LoQ1m zANde@Jds$-$C1Z2?IMG1`v6({Kw@p~V*Z)nLi7I>u-}195Pmc-^j_U1nvXp=OOaX&*49P1mfo0+-2tEAfJEeDVg3d3H=y5 ztzkMb5&T>lYXg4SV+oynE1wJKN3f~i#)IU$12?wXfbsOsINEh#a$n!#SYq%2`oEts+*gM7Y?Xi!R@fq2PZ{FykAL0_e zT~>Pe!<-+m@0hUtg&$!*=e$-CiW66F%{Jz9mr0fMSa6EoJc-~TMXoz`*R>lYF; z`5ot}{&n7v7TvonTRz18F2iqKGr^dz_`GVD=JB}TLA}x!gVJ2LKvQ~8K32WA2U#b7uKgb4 z>AfQJG$&?tDn13qIo!kjDewsF{MYiY8Pc+sxhQt^^NrkD^?F4}^Nk#kdXW>&K z7ZqDV^5_UOYV(QHhYsKq+u;Ly;LC@Wa|efFTIA@c*2qriWOn_Kmi_RQ#d+ijz*Epc zh%NbgrX=ED;xE77Jve_!7dH+yMrl9inqzwuAGi4p zysZ{qS!7Odco=>H-!1rLSLk=~woLo?4dOGd?~r{}h>yp#8r_wDO9R(-sE z{c&mg2H+D-|6mZ>t`h7C+P3`x-of+V+?prZieD=x0DWwiyd!s`23eB3i8v?nY6tW# zo|`O>3Js(54q^^l;GJExJrCSkoG(lMJugAezMmL#%kt3!pkr$*XNBlbzW?O@Dh>Hs z7#lfd&`$f_VC*)*!*7ZQC`Oqa54ejyq2q}qU|I$ruyZKgyhhox_`f2aY+HWFhb@2C;n64Bt%dMG_)n>P zL50LwIepirK+q2gH!{9sX~}kl1%yirJUY+EZI=Ksgr+s9W`UHSj#+#BHBBp?qr z0Rox?d`LiRRY0QH+<>hCe2gg7+9sg3H$-buv?AIB(Dp*Kaur2WI|OJuxv{j`fmZAT zL2AE2Y`-FEr_;_5pzQ=wM^V58!TWoE&I38QxdEJ+@9&SilAN>8-fOSD_IlcDtxX++ zk;tu)c5FjvK%^uy%h_Ag*;}iF_>z`BotL$ARYNJV?V~__!#jcHd1_xJ9oW~toc4-) z&^J%>&2YX^|B^ZFjN#WwWoK3!u1^@Ap?iLXyCQJM-8WhCzv^sIv{VO=@@lDj{+c6S z`#koV{C*Bytquli64=GhaliPk!04I~yvL@sHCDVG-?B|>)B-F*MwF(=h_9Rqt=-x; zt<6ZGwYQmz;I{RVVBaxg;@gK=chy9bA7&(aNomlmQGC!h_Yjv1oDtSfKA#YDCR?F> zXsn?SybFIN=o|~dlYB#)10!n|Gk=|_>VH$VK0II?xL05LzYUxmfe%;WFA$F^cl`wx zf9KhD(D3Ge`GER_z#{lV$EtQ#V-JZI(U)sPH_Aow&WWc1M?Nt);_o41aJ+NkrHpHp zIW(s>rO-oyXXqR?ukgF_arhC>?X`ukh(7f9iQ>iVwR|?zL+4K??>qi*3VdU1-)s5D zSp%=X2fcqZZTC_)g`5EK;Pz7Fm};v0?G^F~ zV^en1#v6NgJ~7qM(>2NDs;WF=!a-~(TPJRL7@fG~1NpyXV;xM90rHo%*El}rcE;`Q zFJkvcKJluy;Rf*~WhG6cjfvmHZWoQ)xxnxX>Cgqx>SD<;Xs#1_w!ojxdkuK3rz@{L z9@j9UxgT_!F0R2^+8x(0&GktcDL$#Cu1`v~`xyQfJ=%2{;76h0@R|5wY+I(z~_GY?+9Mp zT*Dl%OoLO=ORbAj_RNN>z-bLQtpun0U7Tutvzhy9)3|#ExXWT+%^Fgl7ohEa@TB~~ zBF36P?x2lNo(rFcnSbh9Y{Rem25^Sy!~cCTYv-TWT;?SjQ5^9w#%^Qn+F1kDvRRBT zM0Fi@Yu~Rk<9~JczV_a~YJcPR(q7rex)rl;6_2!!eVbt47P$Mid{1(Mg?+00N%h|p zyt%Vj^@@D?ymQc}6_XCn>XGZySveK{R)#$NoqaEh=6rAf=eEsL6CKbw_?%y0&Nxz? z!~N6XVE?YNl6-L7!QIA{*cR18mgQBVUmv1=j6zCQ8q=&kAe%C6d&8h)PGj)H#zPwr~5vu^9>F6mNzvj6p6Q+)EBWc-&0w*;k<7 zL%XOWu~fBZbLwxv|C0?antVCM-xqDE=KMNpW01$Cc3w@dVqjAIvg*w$ciuh=7$50c z&NppYPW_qoQTUapkz(7!j6H5}Z(Z5{?IOMvKlZP61bTU!=g&X~e){;g*B!dwcsMyx zw%~VY^Ef|>x9cm`E(d)f_b*cut?UWKIAGHZ2~ANh5qg7UGCZ^N0QF?pm*aqAM&Y)G ztd@u091rj2e13lce8jW;w;ex=aqR)upLF1~liU2PVV|)Pui&5jpV<84z=5M1^o-T= z;b8IW;h(V|sE|{!y>3^!ZH5=%}zt{Qps17js~h;xw-TDHep@FxHBXTG0Y6~&I0-+4DOWTJQs zzR<_`em&nmioLuVUfEcD(*sfDPYq+;!MjI!M-cyYwY;k_Q$_H-LMyeALm0$a476W>+#zU$BCZf(zRR0kiGt?56f*$b?)8yY18edjb| z^}p*kdIgwrn2&H;Wz$tl$uer6DBnvy0}r3Vt%uWj%)tQn#}=huH!qG#KJQA}5^MQ_ z9czh>QC+~fAb#3?wq0m-gcoa{Jab9)QMU2U^W$8`d&PC6(;5Cwpi6Nx zhMNN$J{KLch1zEi_Dfiwb@jNe%k4UN(&6)|O=mqAN@%Vj#p?VZGhs!7@laz{!nn;i zX0@;eT6q}EjnXCTiqjIJ

zHZ}$d1$9cTGWKqw)a_`eiNRzxxMv74Z?aTp>Q!O-BmCtw|DE;@&Yaj5f39u|`9{VU zv*Lx!fB)B$tKQ7AV;2E?dZ>{r)II1A@on`yVdK?WYs~aq;JJf zYztvq(=I=YydC%`<1QR`=EKAY@r;fAYT9Ua+WFPXC%UjO9%q%QyXu3yO$W z_$biOfPLC^33kgZ`1bIhbi8uma}7&a=Lqo-qR&5rXX6ue%m?#_F{caDe*Z~wW0Dx@ z0%U0ywg+~9$C!(rZlGqxhTTIW#1w^_w`skIGciJ)7at;);_WbZ37i2Px_5}le_eDI zIXt>gfcYP;MW+K+`#yoNv9U>WW{#rW%*U?iTz-bJA&WUG7f`qf@qRP9>DT+!-+Wae zXUfcRy}6vTmgNo2qS4c^3CJmbm$_X%B+~T<eR#-P=QwOufz5~$O9qEkjF*>c5B^A-n7z>|c+*<0{)S^E=%iqUoA~evh zwRQaeIRnY3H%bcJeu{R`&znYP?TtoBJ>TsDe^Wn7%6GNG>X&W%I}ZNYOhg;eg^`I( ztgYsr%TFu#6KzRPc#3?4RyS6n`L&Fd`dd1k^5j~{=i@wdVhi84oCk|cZHopy=8;@iWdkTF)#~ytZAE&Qpp|58fM)Ez5o()r= zFST28QG-oi1vY)Zcdd)YY#!K|6+d5e zHo%xrV0BJX{)o<(b)JB~wkwl4jA!kogVnJ1wXC^`u2{*MTl}t~eKN^?75EK@@OP`v zhIka*%7*an#uZ&UHHmVYM=+-BhbVQJ#OI}xJV1Nhl~|>FBjHz3c)~LDn*HqY{qUV~ z)^9wy`}P?+aLj=(r0#2reT^@q9y^}+n_m# zM*D0dd?auB`FY+EGtpzs2Bv{`G5WZ0FPpp(c#CM0E{9b$jy(RPeVW5hIr}HU8jNGD zMTcSH4w~G(x|y}q-`-wra`$Q@wA1A7Rqu>rFXN}?v_Aicvsat?$Z0*H?{e#a9f}c^ zE+F~+Z~uk-w)mTRtBt``Q<{P!I-8K8x%eDRyXIQ9K8R5=>1+@(9$oVFVxuQbT%AK zTZ20u;6AiX!Rubyux@@r{`>ax(Ev z4-L3}8!^L9?bl^_A?m_TB5z;y&P@DM@w=V-o}13T+^%Q6XD+%3cTQYPJ!|`W?AWF; z_E}zi6a1z9b4EOuF%N>X)-k}+pSmg+8XMQ)7q;a}C>;M9f9qZz?ve@HFwf}&W}f$e z`Jz*U`L-08$9(0~U`9?MJDv4wUz!o`+=t#=`l_QhuSOr%S_sFd2yfYirv~rj6nK~1 zbZYP}^256|7=K;x*8bLk_dAiUBdwnf-Zj)(Jr$bBPJwsNtW$&c>wb8H!FZ$K-MrO- z_b&9IPYLht&zu^(ZS#B5@YHKh4PN{W-SmKLTOfFoZjIL2=>MM*-Zkf)8oV1);9WcO z)Zo2Q@O}#VG*1zp6)Erp%1;fR>-_jzYR1pB_mhi1e5Vzk5`SC9pBnydOM!Rpv{Qrk z3O~GCjX0Ocv!A--=FbM+#D_OcrZ()b=HN}8_@U>|=8n3Tu#t)9dhynA=m_}7Z}Iih z6b$b8JN-0$_|9bMWsOU3=nQ|#d7moXoSic7B~wmq-ZtHkyK22S6qz}sQ|Hx9!D07{ z7ltLvmd@^6=F=fmyCdf65I^}p)U@^K5HVMWsN>t1t3#|q7J9WYQgw)irO8#21x3j4 z7<$Avz+EoqKz{l0G`hQBvCkCdOpa$_TW#a+tX}N4i`D_ zgn*+qXdKwjSw#JS=Nje(Gq<11xyPfx`1;undv37i*pmuRee7i?nfF)RvF$b8eMwwv zw2igV*s_8AbH4BNQy=$a_>S~3XU$jmrt1MaY2>NHakBZJ(Z~Ef zI0{n6zA@*2Z|r18>b?N~;NGJ-8wT(o96Xu=NA8fm;n=wAn~7BVFFpw!`~_I-y*a1E zhT-l1$VlADn2DudwE63#7oSEKk^m3*XYBegNAEK6D+h=PN>?Ywx#QA$^zu&o4EFn+?P32a?t2aK zt;@#Y3kcbHKK2>9)h~vBGHW||njOU#EN^H($4IPE9gCL+;&b3E5_-@XW6sk~eYOJn z*b98baBw~oCPr{rbCkS0oi`|!>`Tyo>KI|hC}Irdf#fj84VlIUeY=~u1^G0ppoI}e zcssGx2Qo&Ly{!5ywb+f(OwM%3KY0b;j8`vM?G-z69(cJAoGs$qqKkfx@LYLZQTCZ> zj;dc87*X%|Ov25tQ|mG6tM}Z5pS_xPCB#@f$oJnP?q?C>S;QVz2FY2+FLV%}*A4s) z4rcjbod~QE_LL3p0P=jW$#a>r#u85wZ2f^PO27ZfdOZogO~q&6E58sNTGW(V`J>D2 zx}Qa7)isRc4$^YsXylujctNZoi@pxd#&+ko=3<$n>rLolZy@jZe4b%G3%EbjhKqUo zbG%if=g8Nl;*aC3XseT#e6l!Go&9~}iwD$N?#>D(R%aMpF-vO@NLd4Cp0EEFKNNaz z8+PNpti>0h*PlZ>@4P%Pt~-No=AN<9{Qf2PHZNK_AU;xa@~wk4)>()B;A~t}STI{U6W*O;}3IEm=lk%Vc6>ZjqK)4E&< zzS_^3mMG;6dE5oWg_@c5fH&bYA3xFGlW}=wxy_*r*w$ z{D%FUsmwe*GTG~64)NHWN$=PXtjbsVztDhuyN$){LB>~I1na3-!*nyWz5SeEB9rlF zkP}r&-Q!phb2igLmIgd1txr_ES?)^-}$Pwhwn;Dg{R?cxP}2i2T<6 zHZix&CA6u=FU6Ts&wA3`zd4=g37i`PlO$uy9obzanmuFGyCuMHL zwb<|ag1eQRWPO_%uB*5<<=ZCr-4y0LBjsJpeK!xk+tieIQTN?)_6@O$J#by_z7su8 zOnF!2zH8>Y^LSUvU6y$hguJazh!EXkQ!H?Ab+xW+raof zUtG>I46?abUC{-#$yKJ$FHyi*qmglmUAG3g zO6*0?`h4LtiK~F`NGAK`u_uES&mdof&Smst@bi6W-ZD7j|Gab07eappx>&C>e(4ha z?}!JDU-wt}BAv57<;2vwc=xw^#Mghu$npoa5Id~CbS`AuN}LIt%2~l2bfBg_&I%^j zX9Z#Vtl-;sblXY+*;i?8rSNfEDa<)R%snG`i#QL0w!&fLw)boSejjYZ7c6X}YT%Ax zAFamcv)c9f)Zz1~!{<}v!)dwZ44&yjJ|FUZCdXsgwhPWyU6J%_Bb;bGW>K%p)?=Yv zzdo*WJ5L{f^c9`$a;648bnj1U9FhkwdR`*`K{JtG;P|JEI=peZBf#ih|I=Mqqdr)p zz^Z-HBQ9yF>QMEp`w}kl(_8mG@^Vr{Z#OE|o%jc6tV=#8-3M@d{o&o3vsFaB-9g60 zBmbFPwMR7S(qk*Jlf~={YB=YOD~MN&GaB#4MwmB;GiuT67G$IP^3z-Ut=PAFq04;N zXNoTNC-9ZpZWm+Ooytm^Hgk0HGJTis`_8u!!U>{R#i zlRliOrqik5%-gsBTYLEi(Mh_!j9<#x%f0E|UbAIA*K9ZV)SkMivwN)!d@*LIu3`x^ z{_^XN{W=GHX0vD7+1J9Y_Qy4RBc7nRSU+z%_;GTB+6#^(c`|=aJf2az1)hPQ zL_FgsCun!Cuib3gCAVgb;@1w)4K5UkH51< z*oe-#{r5PxAI-VFxkEWV&Kb7Vd_VJQ3V!LO(84o@ma^xS``3eZPw_lexx1&b-~T$V zd%w?xkDU_jy1xCD#7>^i-mhlP#nkU?AHu!gml1zW>_Gl`POaEf9JE7UckY31cDa1- zB76#3OK9$K=&9+$f1e!tFgag=rAlolzCKa&6JHN?PHKa%-^%>3vm73|@wB*dxuONciCtv&;E^_eTKc8VzWCu8 zqK%XB!@Yg;!_0@e`Qcd}JpKOr@EmFRpTYC@2fN`Z^6f{hNp!D#pSN^|nRB}R*+b{> z@sGQJ$>Sea!530>j$52OogRBuwD$qeTim^DBL6*|qJX>8y}#}F++ctBOb+}a7d|sQ zacpb9Pr+wq^WBZ`nH$Kn?(fuwhR?u{HiY00#f&RHSm5d&emeg5znz?SR5N#v|J321 z$?r!V1F={|xyD5Clvs{K-;Kzq{L|4(i2qEbU(xv^yB#{W0uG%I6s_>RzL|p_k@`*h zIx~Jh-?Ue{=hfsT{YbF5a$y^FsH!A~h8Pic|+bh>I{z>=Bt${vuhf*JHRswVCoSU`=c3f)X>5r3JZGLcLr|^Hh<|kcf)8PL$ z>}UQDVIS;+UHmzE32~*=r}ft3c6U9hup^4ReaAW8Icm$A`RPtM5~kL6wac%`wK^a8 z?C#<6zvmHfy0by`O=5|hA1?t}dk z`4M8X?B@eIrO(6$4e0y@@P70`R{Wzcaj(c$BmUCT(Vb6m$M(i=XT?>2aot{H2lv0D zJB97o!(VXk%kVD-6ZSm2vf|U}TYg4;m#+Quw71*dJ~TcZKb>_Z{#)>nr@O?6``6!V z=;K=ckGl5Azhc+gwm}Yh0N9QJF{V6aM~?|Ne}VDZgXls74Z8-H;_lYKlQfT;n1gC93*R=3 z)Udbxpzgfd!RKv)zt*L>b0S?&(4YUC){m2ul!r__gxyCu|F;n{5To56;ropNW2k?w zC!VK!4utI|Jx$|Z&iIEeifq~(7*-R3huUjHJYM?%mmd-19Rps)Tes(9qxfPz6gS$6 zoNhv%TmU@!+lw3RJH|V`4RiV;XCJf@@8QSC)$}Xa+Q?&0JI|%P?O@rt(f(I-+H>5? z9nH+KE%@6hZCQ1fZBXvr)#14EGG$Lq1+O~Gn)|qX!n%9S=*YaF&gW+|6Njb|Ys-(n z{ZhMqSwi`;nV;2pndg@zmougBnJInm8*g%#hSy#>O+EX*!)_lr-uDFB=5jW1C}?yo zVJ}X7n7s%*oVmq+u+Fgg$lKhf7G@ucf3;GJ*6&C2?Ef}++L7Ogr}VawB=h|`g!Nf? zDfPL8%lywe{Es}sRk9b}LXK3kF~ZG%;(1+4PR{$#s$aiI9jggj#T#ok zI!YDe7`SoyO(8blRG_t#|HW8b&VJNX}Nn z&ARL8@0@oPvX=6)4nAm7h?b*5{92c3~s zQZGnUtBTu|0IHGt`(Hu1$W{^R03@b_KFJTCkaS^?&Gt7HsaT8HYX?!GdwBQlrXicH<;u`z=?2a<=G$LSya^WY@nN$g2N3WBzjR?|Ig%` zEc=^KpWld9?Yu~Qx<7%o9DdQWUX$()J9-y+oI$sa6z$U8Ij%ZN;zjW8W~)d#rx~}3 z41BDd=buh2{(*;_SbU!Q=Mo*Rn~ zEw`Ux#0|!jj-%QdmSM)5@Dt>tGu8!KE{ka|4mP@sjJnI}mTZ|4Bd^IC&HwnSSAQ#z zSUm{6037@8t@Phr)>--6Ch^lo=)#WMS>nX)Z2c8>3h?|1IndWV+@BUdu|DuK(LR63 zvCkjA3VsfQ``FnIe#C#5F~>>dD%jtHpBVUQ4dm3vz)vgq$#LnVWNe%T(;%GVI>1HV*1s94vNmu!MFRvn>q{h9Or> z!|1x6{P?Q7BVEx~f^q2<5qQg;&{ox4o=xq37Uh|0>_@*MdJlKZfk&zi#J}I>$+vD; zj{PcwdRIBrWFmi`T*A-|kwk(upV*Ief5=FLj8AmDyb7@~WB?9$6a9>{tw}vMIe9 zHGX)90WUhK4UctB#H~@213bfA9Z$I+(73Sja z^OIjHl%z$mC4; z8oI;*@xoGLScgF$a|61=F}Hara9z$fcQF4u!O@T5?N`AQAA{Gc-_77`8got}za-BF zB3p*h{#|6t9l@OXM~O*#)O2unHQ)UKe*YfzYb0k{;P;X-f8ggmSI#5@13Mz@wIp&T z0_^{VoGCU3)o1f_S8!ncd+_`B=<}N3X~@xm_4AnL^X8!KZOEI5Pu?U0gF5h$&Qcw> zY}!dSEr*AXW3H>sT$_grZ_~kd2s#O{7m(?*S^@!^M*hfJ37?HTlg{eoMhr(sI!IbdkA`$t}5N9UAgt_*-5uW60*}|D=Frzh-a#g zkw<=KWpHH2?2DdlxELMv8FE6k9*%xQ3?Fbn*Av>v1319GD-Gs#?8T=p-+}&C-Et?^ zHP@!E@6*1D_I37s6WpUm->-n@82*gd(d5QeV;uWc^(<6-+Ti~9Am7NoZvr-hJYSw& zmj7Uovq&S6YvHeg7uY-<8*ex5qS$j$p6#Z675&D4jV&&@pctJ@gW3d&T{9@FQ1_;T zLnmMU@a(DJs|+3bRh$1A=)mF~=wSBm0;AxqzG_VzB;HJTJ?z$=Q=F@H8gZa=ro=6a zdnF8Wy)n4%GU2>!(BdfuI^yryGcEli>p6>Ef7=rIgH>~3cr*4~#mx7xA->fwQ9IYz z^c~viUY-1b*`1*+#t!4yC5ikWUYe-JmMV~cWbx>u)4|ocWTMsgRUK0E2MQSeMjT#TnBpCPzD@J`DOv~nuOgA zQS5ZBjby_bXkRge$7KNj=gcv7gbwVQTrIxOy&wztwi7t3_^G>EmJnLe~kFAvB8cl+R`-p zml@=8e*yc~Y8IWO{ur!>o~6%gHJZcJ4l#%>yFjR zUVE20wU2f3aHsOV9X_PBBLBXtF=efydj5n{Khm!+d3U{PuIZlt`DM|r>UNMb0?k*C z(Y;gC;78P!pG`a`{75+Jty9_O_O_qrEMUHuy}W@R@G@y7zg6qw7=25Q=J8(d@FjJv z1Q+5xoB7*cH>C*MzYgB=>Rjm;)U@Z@D!x%I6Ti-roYoIJ`ZRdJV0gh0U>wT(VZa<_ z?fY?$hocL3vF6bK3l{ajEY_ik8ihJD8#c#?e|V13h41nWo5nJmY=^6 zS>l}ChMQlZ7WH9#8i&cX?>qi21Jm6bN9SZp_%c%WkjI67;&);@V{~q(QGFsluZ{(h&HmA-P_qD%a zh)<3%j%mJ{$BmqkYaYmp4Nc&d;(iIq@U{WSA8O~Q%~ft2Vi-3x&{jT$pWqMg0_FnN zsAw#HRo48{;A-+c@|vf8lw5U$cOOHef?2uUdVZPv{FmAE!%uQNvI|;jL(eLKMvg)w zw?IFl5yk234vx0hc*}2%l^@feLnEV`UuVvpyz_9ro4&7f@m`L9N^zoUFMRsjOmo{r zvyF)b%&(34b+HbPA7m7AdqndK)Xfjieh7bBpt~sKhez`INAD(AHPS~beKiI8CteP?-@SX{@1Ehi zCw$*^p)>#7g|{w+2kV{UpWmXr-&ZKz$Uv6-lsE^;;pozq$&8EkBjttFp=@+ype zJR6HY4OrW2BPF@ypzg!Y*u{N}MflZXz_S|vpNW1b*`(+4zsa{IdsTIuL(mX87V|%3 zKDllUB(MIF^f~XIV%2bTn5;@4>Ge zMfbF*XCS_`Teajaq&@pkI*!%Ps>e>>8DvjxoXdSQ$aieNdg(OFT%D#ASn&N0uf5RN zlsbmaflgM7DFhsa;8^mf_7fvfi%b+OXNIVifb6h!YVDz$dY+d^C)op@Mc40>)3^U6 zhpuDL^%cNd#NH_$8QHXrxu`aBCi1Tmy1IO1`ODH%_G43c|2JaGjTjiJPXCZ>9ypjETNwVzqYtZIV@-L%>v%v8w_v%QwI3kiN9qppFVMvj{vNTXf?4l|Eum zEqWr-^=J0EV~d#0bD(Ffz1AH4gS%h)k%N`f{512DZ(aVGG002FtNYhczv>q1ezMoP zlGKRe|2$)4{Y~Ia_@5mxCJH9qpYlg)i$eGI|KbTTYV`ao&uu;_TH>B@n;$=EER@c< zhrUEhUxt=ugM(CkQ1xnxrgi(}ze5}O?OPDQ+DMT`rY%l zPgX^V7p~?UFq8a)BJK(_sJRld$WIJmn{Z!f{!z}94d9}Ue2P)-p7X~V-^Q5t1B38$ zHDl}nCyw2k(|mwha^j04*h?=#2T}0c%3iu3efVr}Ls0buJ!_`U8S&jKisNB?q0|#b z-W|#^x@xNGDmtba3ugkWMSBltiVaqqngZv24e!}E^Y;hwtD4IjGIiI^u?yq-z`_1} zqpM@ASyD}#edm}Zihc3pwm-Nvn1kd`lzo~HeYTO0c{jMdj_2kI$A(P?gYb?1$kPGH zRSWqVg8yft-)0dbLrp_?nnQ0hc|L>p(`i>u`zw&Y-0K$%+VWR&CKvwl4(DX8K0c>f zU#;w6&RI9@J4OCr!3%U=tC~c++4sAFSv2N<=z_j=U?EZQ@9&`A~JC! zawRrkbo~hU=@E1$`7v(3m3l?D8V4fylc!fkN>txq2Kyl&{7y%Hl#+udo;;oJ^{n8< zh6ZonF92W0C&`K85$ex^reb$l-%}pl9Qqr`bB$-YSRq zU90^RMMuzk`+sl}5>C(yJe=%9cWH#~%i&MbORbycbXx3JYzX@sZ|~Z;9_Zrzf3aV| zmEV|AlFkmd8ZR$t09koWB;Dve0pgbBw#?caXfv zeW?HSvA?4en7K7e*(Z8G9Q-Z&I&?MAs^9X#Lp%S-7~Er%wJfh}Q*4j1N#ClS2^ zaCZ=2+XDr46@NzdJ_p}c={IErc9q0=-#+zla|D)8DZ2wmO>#LzB$w$#cX2BmD z##cu>IU{K%C%UuAUh5QDQRUfEz4xb|?|3}3g?WqasRP$h#oxpdR1{+e%wya;pz$kj zx})>av$i+9ZWxp9K;P>KW;d6b*&Rj5V&$+~H`R37GPofG zg+=lqcqjK-fQu?*v;58nuc7wyBF=uc;+F(A;nK6av!OwMZOwIznOa-Z^VNEOTEE{x za}nIq6TS+}RXt$t_A6*i3tlQ)IK*A%$o%XmRCBj*ZEROO**5xdY&+e{lG_}i zJ!=_0jE>^m)sowMAM0S@1GUWDdJF&JCj5;taJ&q>WQK@UVjh*ETjs=>^>xd@_43c4 z)0vqyb6JmO;K<}|cznpVy&2=Ze+^=+L8_n0fA%PR{0981e#&CjMRHSXQ^?u~2Eige zmOf_3*(>sMY`O7wiOv0u1L8eJhI7A+^y(@fUJKB-r<3!Ziq~3jw~M$Dm!2ldXC2kr zm^oQd-rYkV(IpOEH-lFM26%Pyp*ygdWhXs_4^QW#F!ODFbSO2k=-1;dItTLN zZu)v|uW$6T_WHg>-+tI))W2R}85ifU4%!FfpgDVepu;7=^i)4%!X$7v6nI3NL1HT^ zsii)iXY&eQ%d4kO0bMKzdFw-xrH!@!g|q&dLcH*X3Xj660;hF^7?*D|3g& zhi5!op)-Nurm@hbNA9^0FWJZ)J1z8M<0@nf5w1LZt8OX%9zagclD#<<7#f+6zFEw( zNde-vh2uWHYo*`CqV)wMEpfNVxPR==T$EKT^D5! z`T5qdMb3F_9r!SQFv70WZ_MVNh00&yx1es5Pxi@|wi$jMHO6Fy9xN*<#eR_8HQWqk zeluuXd|a1gz9HsovB$-$>{_(Y3TLddbe1fh^*i>U{6m?*@6G=V=C5-(z5fmGb@rCc zdt@@mAP`MP}cCq63LoIarUe&75#<3JPd(3{_|7LVl4^&ILOgqxSsujb17y&%BqV**hA;LdTxq;N^uv zmp84~`0n@hz$W`37yP^fenv=k(SAgV{F08XZ}8pJ?DyuI244Brbe=02_ax)^zpLS0 zpFG3cd*Pw>-g}sDwf92suT-Ak&|ui+?|vFweOzve2A#8)yXi+Xc+jRn=ULOgcr+NU zKTd;v@G|KG_S&t;OUuh{&NRZ)2lR$9-Tq1CTUPee&XDDk)$C#McFAm$b(PGnhM&ws zW^)g9g>(r2vx7X7ED!Ka7&-3ppdqO|i2EnTDNfYWol1U2EI0C5vhD-qb1%4Doox($ zE6J+Y7$;R{^LR>#^E|Cb^k$<^dC<<<+Bp2z5iSlLy)FyfRiBm)cYW)9nZT}ja)(H7 zxWyB}nNx$4DkTPO8=S28J8|JjERQTxE*13i6{ z_boBwo?bMk;tPq@=-u+`d3v{O1y2vO?HPDLI{BmA1V10X7hRyq)dj?d9X;FOw?kNa z>7Z}HgLNKW&K_>EbpUG8paawZKYlNR9=n_FIzB^Gf6+sp1}91e0%}V{yTj9 z|GaQ&e0;6T$7e83UwnKOv1eLOc*&6JF`3044S4ut-#TkB*;#| zNZ!j=@845nHNN{^dLpq16*=s=jp&KPT|E)IvWK68yIY6on_aG+XwNqdJdU1-%}t*n z#_@mW>52Y1csOchPXXf_g|41>WI*3~qGCxLS(p?F9!E5sV?!UuIR_e$IAna2KXRIV*CF6fm# z_|mRS{GhyRr2gSS&avA02{7J^Zz!(M&6|Bbk|=y}rJXZ-gK}m?Gp--eZ)a{szabuU zmd>-}H~bTI6(4{;PDEqvjt=ue;Vq7AdYx}<*^^^Te35V0;D>g`bL`qweQFu?Rh)Cl zaPzO-Z)3zMujSi)f9)-E)pu;DF|mR7@)wKG{*>>8hc?d0h*|D>`Kysc33ZAT+h*~8 zN^RtcgY?BY+zan~Wu$$F-6G=HYJ;OY48~6YV{!GO$)(>XhM6-e?(G^^77W$T15e)! zS~JVd;kHeFKXT3UW$XS}U<7vMDi3e|E-+{>)pB=X3(xiJw?Fm#*yM95PWA+Q6u0bUk??|+AT-czHRHOInCdv zji2xO^HvSs*|4UeL&b-P4htDexGkGjR4-r8MDSD$O;scRMN`|*nTzrHE7s=~8&3{R zUC!JN>Wr1|^UaYp`rE-*PV@aKF!cpP8@O7QW-n--P3ROl%kXHUi1F*TT~g$JBCmshp1k9@5kKK1_iL3`F%JkGnZ=OvRo`TAeP?VpG?+C4rVb@teY z{m3VAbu8&X2754_4y1d;LJ#-0`TgHYUqSC!;n^F1p>O&3?+2H4@RU6RsTo0?Qsq~0 z*7zp(tyHYV&%65^;!Ip#qch0GH^Oh$MM}!?{Yjs){gU|UZ^GWp%XnD&)ryVtK1=+R z*1*w^wzy-dpVfS?@9aK^d6`T*?vcT+%5DDOC&UTZc2#bDJ9d>~%oHE5cQ2&iqJXpT zktsY<^U)aJ_Kjhw)=yvi5&wi});l9u#TcE?k7zb8We+~XI!edO59Zdy7<>1hsZpzb zIsb|>?l)PdcGrGHckC!9R`oS(0mb4c&++AP=vPnCYK5QJha`LH{!`so3i9zj_JF}G<Bj2UcSiAocSiBYYdI^p<~N*wnTcc5bS@u22Lg|h!zB~r#C@)qQ8+O! ze3hBY8e*d5oLED7opaVEAHX&nH#%!m4ydEs4FXpEe+9USx;}s?wCcU@t=|Nj|7}Nd zRcMMawA9R)zlT0KOL@Wg?a26U_Kg2K#mJyTyosLlW)z*$|1Nbd=CzT;$za`^(e1Ms zbpm*+s5z@KHMZby2Unq6TI}-opqdAAE(>09sq(iuEUjj{0N=f!jQ^A_gP-ZaR$XWqXwfoH2hOb2x1 z-s!D*?R9zJz&c`p$&a(wFe5L%#a=_l7U4TPZnvEn(a^D~o#=2I>~@j7_#EPcJev2~ zc`&iooVoSvgSD{XS$h(A&Ob$X^0yRq0JlSX{|$I-y8f|K2j>HRe}Ogf_T4nj+?1>N z51yy;E;p9r1v~Ez|5F%WwsKGJ9puC>s@{Nn&sp@RzXfCCcPl%IY3!==>IM<(D4uEO zp2BC9N4n$3UJOW2JMjOn@n1|c{*Re|dLIflj`A3lBPSe$(Jw#i^-Fr5qLI zv~YiOT)e5u=1t+|_jngQ10M@9&pIzdPoE%rE|<74>8ZQFt+)p2X+fv!Z%D4%IEMSB zmFo_kqu?@JeQzgqv0r>RFLI;+op@CsBA;qk#H-EZ;$HVjOP|~S{^p8XS40WSlmJeR{-NXCs{~HxM4*%DA zot{(2cY|QkbMZv!Z+^bvk0FT-wa2{M6z76hD>C!qgVxQ+sB%Y2~ zZE|fM+Bt53vC)(1ne3B}_a6Egcf62$Gxd1xO}?2$-p$b`CXsjZ4tQz({rL=dM9=*>02s4&ev%@a?ftRBT0?HEVRTuO z9iN`~f@m&6-)}ze?e}gUO98b4^kdXHdO#|UZQf(B*UoeJ$6?lS%b>E(R{AU;pFw_2 z>SlG+G0q%hxYl_GHPSSXUooEOG6#FO@VOzz0_a7m+y+8o#-XqJlw18 zlG-=>Rfz%+lDc>{wZL53K*YE z17A;_LcYGbH>KnM@QYr3m~LG6)%L8LPQC5ppQ7K>PN84fcdo7SW++Yjl0U5vP!PF6hKvUBn(dbQ(5J5FLhGbz;Pv*hl56FRh!R_~1hW z$Unv=?m145Jx-H7PR{Xh0*tdc5a>|-oF%{}{X+L!8Q65K*dqJL=al^|-Lj1_oqTlX zzLWXQ*i^J%Bbh-C#XiO?Fs=HFp^shgCBY!N|01+0ytMH@bL@c5XMIsLwDilJ`@oey zcX+owj(Fmg;C3eV?+kK4ot&lqe4`ls6Wui?njLeQlcy@**(4)SziNpQe+HfJS>nXL zH>j@S7VL>!@U(2LlgG|I(6dZn+d^Jt%cjmESSuUkNTN&mk}AEPjtCf0Q!qRmS9kO&Wk1x^JRatIMey2z1SIZ zlkb~p?l*p%k;mKvXR@u2&jY;Ja8To1^C&vo>GKElnTq?xC+Y`$+x?KYZLi^(PCrki z>F2JLezpzdoCTP^o3hror?eB?kI?2;^a+i%7@b06MaMW}ebbKz&J|PP=kDPic)$-( zGCyU^@{~4yTrH(fjl0Ihl~v%3dsCV@PD;~Hd2RpeNA(+hp@)0bTT*{>ym_4A^!It+ zJjSM(2f9sVbN&q?-e}ORsV4Ib<2f=P=gY1H||`^pxxLO&zA=eFvfMRQ&uprEk&GzjK$AfA4YT z)@yvtXQzGVRJi#y-x#;LduTDXv*_ANCMR~4lB-+l==qX~jr8ZuSNTVFjHHi0;>+qW zo}Yd-2hm%h+kccZ4Ds274~D()|I~RdP3co>wa$gV!S`)y|LNfW27N6_!BI_0|AKiT zZH_YcnaGJ$91!cU@(tdnK3Bcvzwo>je$vjjQ`~Pa;rGY4=+gV z^Kzc2?%SpRnVdD0wwCk&`tz?3W2_{Wb-2d!KfjRYse9+vj^wNh(zHE?wz@McwU0ut z4@YY*j zRSbbs)5jQrzJU(y`>vx3deXPV@~vt@?+TtJog~MO=iOu5UfuezopY1opYd!a#mGqx zPDOqmWUi9`)#wCaXvMMNoPIq2%N%rN(Rq}8)XMpQ+Pp^`NwuqUsK%P;TzzSNo{i9| zIBfI_>_q#WgVSO5dBYaor5h_1Cp}_v@j2$&c@u#S=@hHcLCe{&aqj-L_ zVF~OV<|AB`6U(o*KUPc)K5YlKaK}{6r*yUw%Y3IH_o3YHtqkN+H&Aic@*n3mFA-j7 zZ!s77DDZ0$>xNAYp39G6Q{xXTKxg%TBYG8#S_?mp@1igD@AbDakXwJ4e09|-3vm9Y zyqWHP!p*m);N6?w8opCI`E0T8SL*Ln`>LmHKmNwwY`1~_B*!dsSUrA%9=_r_o{Pr+rHMSvKHBtzPdd5@ z&WAM24nl^4j?Di0o!Lqx9u~k)4)bv%$fD! zb(}@=^TO58Ftr}qspHTDEeyv8GvaR#?9|zSgUcMd=Fvw1Gd?0{9;iZhXb*hlrC2gK zLH_$H*1dg6CAojHQ$ihkq!Y8gMGK>y`!XY&_Or)3E{v2YH*eoCW7GcAsJpP#DA_ZJ zyvwn&UxqdRf;#}TzjnX#t%OGdo1rNS8|H?iNzRLMn!gIp9QtzBZCdI!f(Tvz;tZUwczI(@N_l!x{BHhS7OIx*go91{<*JOz0R3SX~di_J_B8i zo5MY|g&Df{ShUsCFBSl%kGL=DUfOKzKQO-fbLRY~FI<(Uw%)$!LJxx0YYOqdr0YYz zSJjkAS4+a?2dCQ??17 zT9Txu0(gGEc#d=TVsaYKYXgZS>nr-w+Il{Q;q=*lKDl0uBN|x?%}g+@nX9Lo_8EyE zr`o$OfFtQKoxO1ROb=ZC$cM|_S)aG_zt->!{Jby!l4!1dTDMO}_asD+N0+ePZPWnE zH-_7`@3Z{h#GO0o=*C&2p0zu?njK%@yVU*ajSW9e>)RTov2EJv-p8r(!uDk~vS&EY zJji*3d;zM1`P<8kl`Wi2Mp-j!q+`p=S3!M-3BTr@fn4+P*B-vET<}8ec02DzvR>_F zoAb>^@%ffBjA5R9__6!04P2@1e*m*nzs1SBDW@-I4&AcaJ~QZX28W-IImlQ2bRTfz zmz?nUN#JfrpL~Wkf<^HDG6j~ij>FQ8&(t;X;8<`1IPN?N9KQpOdwg(+X8bq_rh!8^ zvG?;*ou38aKh%p|)Q>Yt)r`$RPjc$UR%TF(9sVL2lE0CB3jY4ek1|%`ujRbnu=S(a zpEFh#vOjfR)>MF=N&TDMS$CZ zuD)4o=K9fem#KWpX5&BVQUvyjO(5>bo+O6Up{mj1-y|6Yw9UlCW zg7MFMYttvW!);p^Pr=u-dJ~GgM|Q6s?!iZVNCQZabard+o9wj+A6ih$OCNj9S{wFu$8uZ`L)O%k%RioK^6w zm}mbm7dK>0G!ofmW+HPjy8R=D9s9bPb@0w1pF;=KIpk|U?0pW|L*G6Znll6S*11Mk zuJOW9#hGjj81)>UY(L1^tx=Gh$2ql*ikc|IDkjJl!G&-KjL zOn3gMyF_}-AAQd%cFsgZn~LKaVOZQDl55w%z7HK$G$lXsV~m>{%-!zb!mfK-(ZzhK z*f04R-1h*COZk5|^DN=_H<_pNtrJu7ifo8J=>I-5Lm?~Ph<&bfV7WT7=MJTyNP zv2)IC-sF?1e1}`0_jhgg)Ny|A)Qc?i!d zSmWr>vXZJn+;c=ubP>3T4t4lfYI|qCJ>wOUL1WX-AHWyTby|m@>+ilGZXMfYG47-m z{x>2Q#XVZ|Y~h_ezh}<{whkK-jy_3F*iFsWLgW1Sg1M%BkC5cP_^{-}waB)Dza|Ig z7X;s4^3>b*Z^co4q;a90kNMr#Sbm(?@R=9d@U?pIwM+oMZrN?a7P!!cZIAi~Hr;C= zc*=l9v@_O)iyD)ws&+YWy|SVkt{pC1Z+md1jA0Z|SLp{CVT<3Y-66d8*1?=Jw=ivD zKZ+y*@Q_k!^{<6i7teM4Z-OJt7(3BTVrTp7{ts)u1iZ#n$4z!1GTd7O&sNpgmZ^Oi ztUn(fCK&Vs&Dn8I@SZLcdMq&V-!YN9dgsUGartwCbJRK)4}Sc9+CJ_|>Er9*tv9aR zdKH=zaC`hF1}~5;m}v~F=WM&)G(-3ha(dY02i7_35c;xH^G)Y)m9kxkKl<;?d0Gm* zQ`5lf`KN}Zz}p182^-$94evbu4ghl{>mES%&9T>gn0-fue?6aM4*vh`bwuvycQTkk z>}VD?XEwIyKy<)C@G9)|t-<3qdaLBOSND49s7?zXTProJWrO8lgB?Py;|E_+utjHz z*kD=MTI;aEM%nt;8@3I`xg&AnUCB#%moyW!>8g{yrS%x78pqu8g1!}FPJ6J$a+%Zg zT+TkR(eshJRh(ZH4KXI}#xByHzJ~mWpL1`f@~xkl8!4%F{bcg#`h96pba=HNP3>l{ zjo)YN$+W}7%1btQemdoBE8jYaEo>q`pOC#*YT~nW?qL@1*~d3to89=M@QsDNIb|>> z+4|dLgZgS6mvK+0d;<=k_zc@!Du^S`!cV@AK}R!0?(z^6XC)|Gzp3 zj8CV)n2LY=(Y^4$jBot-FQ(1U!P}|A#DU=n@izY?FbQv|G;v!BOn$trL?lSKT&E9e@Qip*U1N)g&+Ske0s!d6)X1n zSlN@sUA0eKiu}mu&Y$J*pX%g8JEwEbu6R0=*f+(Ee-~UdQj5RsJu@L4N^>n155dj} z8EaH0qHFaTM%R7BbL!lu4%}cv3|Cu!yL|zt-Qj-5#W$J-tN}4 z47iH8PcJsc=$cKxJclJ&9W%kFcycB=#+t*7jj@L5;Cu!+zk&O%6nip`c{M5aEDt@$ zom+yr6_-tl?>omxC=N|%Bip9DMr|@^O_KnH|(rnq@v5 z=^V?mTa8fttw+DKvvvZt6QJeW3b!__HnR6G1~1Ov1NS#VGwQ1_3huJadSb4-#_(;W zaau+hW|C@I^unHL4)N+tl5EFoZFo^J})m8m@%~cbVBW zGjf(U?4s^%W?*?kJNbFL%z+*Fjb|+f-^=K?n0}Yhul84LDt?mk2G!)=#2I{76x=E% zx#`M(Qhm-%u`#mo22ZMHe;sz$w~2MNSXcD1#Lsyy{V0AqK1}GKmNQuC_z&^hrjvdl zn}>acb#K8R(e_6(p*?#Zb9{!mPwq-iFyQH@i!LrSx(wRn4`5$_Gs{?8vG3cF&T;M> z*MQGZ*qDr7ae#X&X5j-XK^Yvb{$hKQ6Nm8kYH+xL->R9uZ*0b$NAK!~9O{oe8h~7~ zkWbyVw3q8>>yzgjn}#=XHWV@9qd6lASHZtNq`n08JK8Ia#3trkGHp`4D8o4IAiC#n z^ic7yL!;1TxnE2*U4#eI_-Vync;P$bcHXDD{p>mJnV4`0zW_XO$KiDj|88fzL&$Hn ziGH0p)u6TgF#2vO{1d;_Eba{o+ux$2F8r#IXy@LDkNHtM@v-y11z$7CYdB!eKXb?p z4>Uex%*q3X2Lha1^IZKFtuU_3pJUFCjWA}_pkLQaA7r1u&Lh^uJAc&}t?0!~tZhB> zvBw+*4^4W- zc^%{)5@Nr*a!Mjyze0A5EF7OYbba5`)p(?vbSU0Z1Vr$?}=&M@Ok43&KCpDykj8;w&{W!SgbYVhGmYp zSmPFjeK@)56mW8-i<6n%V`{9gFqY^fk39QFefYVPeAsgK0eh~iEZ9;~^jp@h(3syu zeA^Z7y2K*gFw87HISk`I=fGeQpD4NJtxE@a81}kAYtLgBl?ORr-@${z zcgBx#_w!fZy4UO|2ocw zp{v$wd*XK)W5|vxa(Q3=8hBe7vXc2fg-#UX-9CQM8RGkSmTCN;B6DGJr(hA?g{F%~ z=U*6_jx4>7zmfN$>l}V9UQq)NsMXr-;@&(nQgUU&xNfzZYgT#8ty}f3Iln42^~loy zpw?}_NJ$OrTg&=d{BJR?)&jq`P1`oEeQWCRn(5@}m3DD3S zX!b|YSt)aq9bd}a=6uGOH4i*gf`{$w9dw8{^&uH;#_BqZ0m+jhxJ+tz_e}{>H?KjH|iof6ZlWz_Md7Ok}X; zdyD4Fd(mEq9Pd2ZEMvanO*Mm0&YNbCyDr{TJgDb<7r5j0#Ia;cZgZ)N+WugM3mGxOmM(3k!Hi=2C$!T&Dp zOoyh5!Cfgd)kIw9@wMP@twoTrZJhJ{qqJL1JMGt6*5grrr>;c}YjG>>v<_MiZ!LuP z>723F0(&O9SfvrH`3Ld_pJcc%TJrX%^m==L;%oBL60vXLU_o8QEBqF&zM%j4ZAm^6 z3n@OWaefTFZsMJPFQ40p9&?7Xm!D=YTj*m=0js&zZRgo+V`x|4wp-=4<1W``$*qrw zVOQLUZAU8@hj-&8y0xcfq}le4Wf_b>|H9G|?-*&}J)I+$kRC z?ftA%u*3G_uV@0+rktLzu0mdE9IdzP*ATjV=>8%b20gd3?V7=z#f)EUMt-LH^8T?q zF6tXko(amM!J>Br!yZfN)VH}${uXIfJ~ezcxivl-xcAAXqv6Bu1v z{yDzqEMY^{f0&5};N{w<{{C0;-Z=e4vSQtFS;0Q5ki0O?iX1Z^ZW>e&Hcl%zGyCht zS;n#Z(7(%l`&;z?RvE*<+9;XV=`6t5T4{bb(zi6$-fiJ<< z^m=k)%!O^P`z*Gz``NSZv(|s_euh0g+>`ZZGH&DO?s3(pXt7(?zgUrZza#6*JChTo z8`(J7+uh#?_gVW7yPuuQ{5;%w@QF`(daPxvuZY24KKf_n;`OFSPdD2R4=qRE%l{!h z1^$+f{W`yG8HS!+bQL@en4{D)u=fzKOLo_z1N6mjy*)IKezb>*u;B!wr*G!4?xL|m z>LWIAPO33Ei*;z@D0(9_=hc+r@N|~$Avhv`CUVrn>mU9Jzb5w-=8peiT2~LHf}8H@o=NGyNZhW-T9$s=eAay6swhYct+` z*2H>xYx4?g=%>ej0{*&z4qb@9Mze0W`9?ZfAYIn4zZNtnXKki7pnO>|a59`ayY1lF zBDP6m?1uk-Tm56-OUG3I@;BJ)$UHolL@ydI#y479gL{;$frKa&8u7LCQBEH=KUM$2 z7j#gR;nIPt!*!ejAAb#;+k6~&{B&RelOHEH31(_PczSU(I5#aX6^|4CLkk_7xfFbR zbRn1x=)&t`tUK?fmECKx_GtROqkDd})|zvwZ27ad_t$jy6Y|y>d%?r0>SCnBbGp8# zYYxWG?w0J@O8`smKn#cjJ^x~^nCbS%4n4u4xNPbU6wQ`4dW zliL4SmT1S1VE6T4a^l_ z*M{$BUgh}vLeZ!#mzx8b+l#r+BNQ#_)Vw{I>ZLC-hjji~Pmi0Sm|S3L#kTXqG&+Tz zUj07wB%dU-YwKpv?^~>oM=QVPeS!O4dqgpq!jt7&&sR^dp4vOddc&zwiy1k{iId7gznP($=uN{76n2yb< zcxS=J8Xc!y@dMGRgj|hH890HTd!upodfvPoF6s39YpIc+Z1(%YQbSdnZOWTi`)+ z_HZ>BTvgqIKK7=Wh<*h>9Pj2~&q+70$(4_Po$ceVh6fd(XBFcAGap`aT-Pc<*V3Fc zm+|RoGn#52Wh+2bK+qB;&!E0u$k5a)(Zv?3pq!-a@zrB*6 z)(P>3a!C-J-}|%AImsjhwBP4>Uf=KU_xt`auXARfeOY_$wbx#2?X}mIO%9qiwu05{ zIV zt~3VwWF>OUd*GpTXqvH4n*7L3W#eX=vpXGAkFWnPG`7j)j#s7-%Z>H}lY811-i3d` z@$cv#-QxosW_(D$+l3GL$&4km5Jt~=lKijWd(>YU4;Lv5cl)rl-RSg3HqK^m>U0D9 z5NoQ7yLs4Gj>n6Nm}@$_k~7JkYgwoEz%H1VNMD2Yr58k4dwSXX(rtU+UpS+E^VL@9 zV&I6Kk3~O%N1tNPhe-T4`<)M>qgdJreh@9f4|bo{H^HCmA(pYt12(Dd^~g3~q23aB zg65fIBNsnfPh6aql2b4wm%PFVdkoM zQ*$5QB;Fj0gORk?gzZ2)DdJt+FAfYXA`iKI!RSuR4Z}yV)p=MmT|O$8vjQ6lTSRd8ek}1 zh91Z>9(_&v8-6-P0r;OZc!~VWW#*+p@<+kY)N?M@8#BbHKK-y@y}8Y;2KV zN6lQG!W=NR>TR+y^e>mXve=pzqh{@>-5Tvh|3iUKaBu&ettW`T5J%ah)wcGec_f>X zkGp-0Os0GvIceM9=}E&!pjUqr=x+vVsQP;|a)I>gSpMPTdlR~*k!c*BCi_DH#tj6)e~rfd!Bzj*0(VzYPiQt^o6;ND$(BnKN_8lw{kej^7vbrRdK z$F=}JnoF56eQ@c+h5q_m>e>^i!`Ks`*L-IV$PSfHe>DfhZ!`x!O$lyp zh{3NvhIaMsL6;T|_l)r_XWacS_oPiC-vM-v3eGKRUCLsPwVsX z#aMljwdZE=5k#NvpLXnhDy==o+oRIh%BN;4_z;|;ukA5w!_k!|jzy|6wU4luFCCw8 zjKttbx?F!}0{p|#;gDTo&yq>!@Lx4E6qVk40($zok!4al_KG**cLne1p3BXN*0b_+ zC>>{o`mWJA;HiZ@s**jPP{=zfEZHGeZwMf_C$G4*v+NX&QBntrM^V{)`+cw8bL@B4 z*CJxknmpxtKh4Rn_e-7pdOy?2j~rZfg_B?J)13TzzuL*KcYHb2ZVvYUvc*n*y?@2Y zulH-5{Cc11AUA=Rnlmotq{-(` zb0}+Xbn1NGk1YKedbA5}W>y$p&nsV#O!LI#R%*-VmF{Y?7Titk4AXR}^UzBB2E;(68(tIJB1 zuX%DfN8jgK8`b|-&KT_`7Wz|j?EdP#?Q8H8WV-@<0W==|(jL9AZpr7%*5!+B*?P{R z9)7eDezXpM%bU^9;c2Gc+28D`cj7lbzutJ-mcH5EyWR)fdWTc*d%lDm7oYKE*X6Ru zlM^UAH8*Plej=WPge=CS47refWh1|L(AQhBciaM>$A|L)`Go!jzx6A8N%dFp{88h0 zouAYm-sT9gc$Qykh1$VkgrBJc+3hrd8xzWPl+)s|#yH-BwE`Tf+JPQT`1Gx^jv zcIRPi1+(#svGC*cb4HW%77zG6$j1Sz&VO!jv*k%0fjl;~4wyX|RfWHMn7Ny1O}PR) zhWy1!2L`){%f8F+8G~ML?fWtMQ#E;oPiSpjrO&E(8rTF=*2mGQ2Z8^)@D_iZm6$!A zb1mrj73smX7vQ(L8d{lCHZeCFTFLf|Nyvs)s5>zodz9q+GU%p;vgd%Kl?F#+J{L!y zf}^q%;Hb0@j+CFi7#vBrETf-&b&>0#7r(2EAXkZ3R5F%rz&KIwj7K%^8c*T(D(drr zlLlfg{tQ32R?^#`n>~D6@O@+^{A-mqkN0$IY{yp0){YD25i`lqeeBr#fmh@2;;w_( z{e5wl#xO2>BR8yuUjY^=$b zHPp#hbW(RdZ*0DWCz*WM6Ycu0>&e#%&33nYy`8Vr&F4S4yS@)%^94>e_0_UY8Tc}K z^1U6KFVoJ~%>4@{-^TRr`u-A|Z&SLdZ#(Ou$(Nhao$uAye0dorUxc{wCf}=>-T7XQ z&9^JldE($*nI2leE2=t?S5%|cYV#V`Mx~f)K^M8 zEY0b|?0LF;T6F5tfoSww&;|VDBY)Sg0I?`hPzZ z2rn!11Pm{mgMY&g^z|_Td)W|il12CoE_hl#lDMVD)|J9M_e~Tv2DhL z&tnW$`mDOr^kA+ZpQb6~Q~Fm(uf^BiL;g&Bi!<0a8bMmg(dZ_<2hmBIzek*4+7K?x z7_rv6>xk-@6jL|xiLgQUtj}MRZw$faVfY4X*?MdnF5f8NJsW;4TTT)>sccr;Z?9ZC zvD&J?;LcgE-|I>G;dP#qJFgoM*EyGb>7Muw`F^Bx23z|R*~$L8c=F99U(Nt;Cw!N< z&Hj!>gVb2ZEs%zhS``SPz{H_D@Sn~&(_*y84mCq^=B zeZ;zLy5&h4p_nH+U*)kLDBML}%1_b%Kl0vqcSc2%6aUK2b9dqPm^{CC@>Fr}gWy)1 zDr52A#bx~1;M$3tWhoUtIfr7)hL%~gUItEgY$D*a@7M@e@tkiAOqy9XJ2|0l2lG!f z<<7qgyXW7;cz6C)c;e?xM3&Gz<$gYMp21&j?MnBI?pVbBlfi$o@s(W=NVYa#1Khcs zaWd(QNB#h7Sp{o8W0)ve^v9mTthp0&bD#s{TQj$z%LRoNUovR(w{rLwniQ0@0qq1@ML8 z2SdOC=TZtM((b9WH|NgagT?rOFG0>beL&owJwKiQ@~Ri+ z{~kPT@eJ))&)o9d^Vt0H_dGtoWmVJsJ*yre|0DA=S3Sye{e0_%4fBUur<@FqJ4%pG z^9S7dF@8-&ANq!bC+)j)t@`ZYxAmmB`F9WT3|0R>NW63HH_E^5_1N&Gt9V+g9;5!p=YNPV&lbn@)c=0pP^U|`Vxlw5O_XF-v zd)myWA%T0+)W!;jgC;lVa$Hxwh}LZ#OoG zr$*^x6XjhR^3u`fuY7TCrIlFz7IAL8 zrHgZmEl>F#bos@WckaxigQjsdt$sIsT~A*> z{_gvDnW@sN$ zFwLgFwb*`kl$Yj~4YZ~hz8A9w1UNT&FS3O@PSV?sH%_i!P&~90VVwLi{)V;A*)-W~ ziT#;m?`3he7k`1iZI)p}UrL*?ecuS&EwX>(pWEz>JF~z20Jk6Q_zlS~Bxhl8GyWop z6Bk;WUn$SZ?c2_dSEBz*>l zCcJ+vF%Y27sbg*WoE_)TC-b{m^1SQ=4t*z($Cfhp$CUvBv1U3 z|5eU7xA3m@)RiNi=YL;X_!()U1^Jtb7Fa{NX@Sl7si#xk9rubaFz$1mai4vhaUVf_ z%dZHABGavU*75q%Q`pysFNAHaCz^7xWNc)|(P@MoH&qQo>8Sq7NIMgwKQMZki{>?D-~~fcCogiI5Go zPvnm|ud^DSqqE{=(3yD73*bv*5B_3* z>n%syzWm7kQ-7y2YUjhAc5aWU=Vam0sYmOr^0HQT@B7EXEZF+uOa3eWN|`y(lIv@{ z65MZsCfm>{r;)BZI=jc`AJHlAvd(OA{L4kFqwxP0P9vOG7c5($xewMN=gWTzJorBc zme@J&XO4T2N#$SuEOnSMr+(3jU~%Cn`%84nOTf|Je61n&4tH0W^VQ=h^A>W3VyJ4Y z{C{%&HtadvAwQhYk?&rCr%Arsj*o;(GqH1c0{y!c_>e36z@syNDdA$^QQKLZ3lXhO zarnnP&hmX;+d23os_m?pwqwV>jJ6~F(D7ZsBL1-YIQ(Jz?}3GJZvPzqFi*Peaq#2% zR^%Wf)Cxbx;Z>IcV_*7|PrDi06U^s%*gZbCZT4lL>wO@`AhdF8_GWB@{UZE(VWMR zPx|)1GJnM*`S0WFWdjqv$!>|f%l?=(vaf7*_}Z6@?P1!mg3j+jE4rgmwn)X~{%gIvuQMwA z2Jh(>el4^mn@+no)%5kZ;6~>V`r<}&_4~*Lsrb3ywwQA_#48H;>@q_%2kA$?a6(r^FQiS zd|nTFf)_mjU16K<5M}Sbs7FTjf5{59#mLAX6Wi)$?EkTT8`NXR&-Hn!ZvgcTP*LXb3JoF=eXNJ8m^Q!g0=_YLjX)X31 z%&S(`87A#!(pufLU)yO*Nn`IXyOK2dIGFl=Y^PmH8ouXtTGkYkZvkoe5ZGzNTvR_UAg$HjZ+X?aV5&(wpR_7B z?O8i*CTSUN+I7=RzH>=KPPFTL&rZuFjWh0cTGMos?<~?R(%vA3<)#}))SnBz`EN@Z zL2S5JvpuKQXD_mL<=j{Bg8V7YOPqcCdTc5sR&e@M_9)IIZ#jBICVL$Ru4E16H05r* zEXlrEJSD5l=Be{Fr99(#*7B6x(9Bc1!FHaqEk}6D@6=EHu5o_Sn4z96sEJAC><=>HIU*pK|yh}$^(lAb&>wSuP55&~F4c;mJdb>9m`UL-PyS#XbG$F#BHcv*K9m@EPDNQ4#0J@Shyn zfvjD${NvrZnj6W)W;>Lwy}}f8PFr^fyag<)NPh>q`!+rrvKg>{_1a;ct>7a9-ZYji z_~@_X>`t+7bjNMp?>BJw-ZG_a$-&l38<9u2TOlcRdH^{6{H~(TN}qRG6?rPTM=q1~ zXD@m7TB+083z2#8OXy-POUG9H3gr`l?+Eq-H%&fdtka3TV8t)uZTtt`;duqH3Pxf$ zEl`}N?a)NyAg}RxQM;YkZ)GQ%0UUx==fNaz>RsR9-9NCl7nUUl>-&J}#{9P#@!OU~PkkG$we>SpALm z#d`%iGH*BRz46c+_9pNkek(Zt;M6Nv*W=?;o8mS5T`J!Sec5*{LR&H8^#L&`G+vQU zW`>Agu{RTWuF^ZUqZQm8LzAZ5m~a>K;vL3nw^Q~e=7!-T4oy~b57Zd?5=)axi}(ui zws#Dg9+}TR9^dcar@n6G+g`rC^4_rN#qfmA>74)O%-pB!jUT{vz8pQZbS1iX0k-G2 zwCA2`{Bjg4UYZ}OH|s0b zltUBXli+A1=f4iAy$|kq-72u^KLn?T$lp4_>S{XM+Gy4$eBl=Jy)B)1czjnKvlah- zPyzmP!>6|agWYy&ciXMBEkFL7X7TMD<}U5CFjFE9cgRfY(aTkd7?Yi;MwVX9NdS&rlwH&m`@8;=hcwVxWn>4#WRk zJ}e$w#d)>9{MR~k*Xsx1!S@JnvlyRA*2YWm1@l{}b%9SSUymowUu0q+R10ja=fgAe z@fE;MCx3+2ONj9X9c57V(|0`~tuvBWrnpkaE!!|!p4Nnaa*Ag6hO*!%W)bJw8rZKMK>stTd57=c} z^0Yo^E`37&#c6gsx?B1p@*}@3`8+IpBtKnpJ#FAuP@IRPcb~7XSsMj z``7qf8=i0Yy?8tAX`W_%HfVZjpLv=IFV{RR|7_SatqCeqN12gY4}h<{;BSMMewDUOUyzl~f^SzIvj&{ii*L7duK~p6YpN>z z?tS(ekPrX+&#nQ2BSW@UU{c+;QFq@pKzS_s@I_&@*8m&VZ%`j&JXNp?AF*qI=1?Vd z%Qo-waQ`tpya&hZ->&ml+?;y^xuq0aEybPYti3Zrw*~yNGLDe4lhUqQ^MSYF@m?cw#pSqbw{ zc>1EeKHrznA>H73yk6hM>nA}IXTc+X#UA#9=!d`1UN&R$?!CeZvIS3lf8OkI!yoyt zd_QsW{oTp;S0`Tv^~LrhcFj_~LG;O2oicj=9q%Kl|KCmhso@>`SN+m!W9x~!{n-Rx8ZeM$feSebf#cD-Q@Sj0kd*ZEf z)(QQ|yc;s~8eQVqw8zd7gXPuP3Y}>I$I_rurypla|$#SE$j*NS~fiv%% z^B)r~W&UdoZNazbudFHQ@D{zNJL^TWbc7Mdt`}A0(R$%weuv5P3OYh4X1xf*w`3D= z*8=GVDMmNwfh|_nyu^WznCuIptOtE{gG2Pq)eQurbc6G$U+?;c&7~-rbwPd@SvJpk z*47PdKa68_1N;}7Sc@5(7IcI36R!(SUA_5NU!E^mf-&pD$92c93(Awhy6{C|wbun3 z)_+i+)&;>TylY*k2G7zBI^lcDTj}6N|K0UL_-Mr!%%zD~ydOaJG%`N2S}`(yp6`1N z#lF;zAoz_yOQMZd){6s_kEJWS4tpJV+o?l#fKKV+>_wCJs)hpl|u%eU|c!=@9fx_j+VA9{~vZ~Qm0#>?iZaeNruR1t$oV-jQy5l*)8treLu z)&|sBd>->|7WNd*2Rink&%5Vc^N@4Z9eYo)iaz0Pvz~j1tZ;3dwM({!VZ?+SV?9;) z-h>|Y#y?ycu=utY(+e=?#}aEcvyOO24kzUEP9_y zzRNwM>vC4C=$uC$$<&MBsnd~r113J+;JWOiSH7N&Ej@oK@vS+tGWXNy@SViXbob0P zhJP`5(>xu;8Y`Vp@fZy{nfV=zKsm;1M2F^v-kGBNh+X z1AKOwQQdWIIYC`7ICZsRhnS7dB0TLyXAOa)Y;eTclDcekqP^&EL#+QOOTK zzq0k6V7{PBYa+00Iai#twG-*TtEAn`;`hzsV*e>W~-Y1se%9k0(R z%j(qrzHsB>$)%O^fm?PdJAHI7S_$%P9OEi_DM0QQ?fV&5|BBl>)rQ~auS;VruoJQG z>(b$9hj!A!XBeD}?r23<8OvUoTRuQMGeaNLk?lz%z8P^KR4=rj?it(hb@o@z9rP`P*NfgXB2~KaovUuzd=R9D(2I*^@ubfpc_tpvjNz0l8wgcZ~7j+wHaX zRr`L}uoyn8GB&=jmzcb1;S=$Z{p>IH#z{|G9h|Wj|7kV$aTjMj-+$xOFa8wEo7CPv zs8_bx&$}c0M1D)U$5S?N@yYm1?ydF>#6M|3xD$T)DRxTjIZ00_Wvs_CCJifBbj}(1 z&9!@p*`MQyubT^BDxPEQi!gsD!8f1&b7DQp`0yX&XI>E)TvviW&pF^c5^Y@Tw>E!_ zEwau}8Sb##*QWK&=rAeJnQ%G=o|KA?GAjH}Xp=qOhl$I$X?8_$?Pz#MKI_{~=Knt! zM+H_fIy`zZx$vFA(;bm{6yp6LDe!IfM zXx67%`mFUxzWm~~R|>wE{s=zqd6*_Ys3@_&UIga7^rsjY1;1dd9gsR*@Rt$aK7uTq zh7WDT*Et$#u zb&ly(r)Xz8zP0noXPsn?kQ}mnH99o3u5W(Mh7==@v>=fY_yOUGU^y6ld%s)x0hbp@SOb7vd+G_k9vaG7Ygv_lCfjI6_;SA5c zx22D`@3xaie88G83^>n#{|#r4Il$OAR!y#0=JBoB5SJK!e0Y5Ww*Pu!?Y*vg!Ew># z6^lAg8S%}vFIz+FUgo}(myv^a&$jkSZ=3L0boj{fYjWT9kF0CuPO=SF!kWja=R;t7 z!n_X*>$xjqSHqXR1H$d3ZyDfSCZEp6fOpv*J=vG(Zoj3zq18$IEqr?*d$E(tgKKvs zCDxq+USBRP&z%g<$oCE`Z$XYzIo0L1bMqL#Jq z*Z$z)j4saoQeTH}VmQ-Es1uHUO?lwnD_!E0A)dOI!P#%P<0Z-+FCNBi0=U$EjN69b zImPR#J7vW+oiCGbx9)<0hrcYilf26`K8>l~Wf~uqeVMyseq#-)`=@1>d7Uzq-eKk} z#^iCoF?e~Dyyz%j*j6BWR%#Y>HU%420=|0K4I+cAu2{RlpJQy*5pd_)r@vVnw@&oo zub7hCN_vm3*khZ%A02%RH0J7eEyM}?8@wzZ{Z8-s@Xt(eqk457ZU;7G|AqE`&T{mz zM)GZCJ(8}Mbh z&z7DMTbFFl$dHMB`=UFfWxKqRzOY}tpqPC~*`Buy#Li4Tf>E~TskE(rwW0fdjk@(N z8^hVy7>c9aHiidm8^hV%>q77S$27ycKl+YsW5{PMknLG*1bJ5V-hXuJ>aF88QpVkX zOip|#O>hKbYz%*A5kAiP z-#p6RNA#2Tc(}H}wZGz~T;dBXTZ!B&`bakW8?rG=H%JbTgZ{K<5y6)8>k+dL%T_2` zu6@>CdwR*?bn>XIy*FZ)O(U&$yY7B7Ww1+VUF$+mbZNGFFY>)JwtA01i?#2u??}CU zX?4+i(V~USL3f=8fV+rMIuc;W9gN_zx8z}PSTOY9MbkDSOJ@!G#`k1%!qA8L9#&&}ScC1M1KY!3Y!B~a zJ7&M^+25}oSs#y%v9xqi?z8B|i35Xqr+bIx3>#?WDPDo>5L>u2wFw)F$AV{-$hM?#uo`buHK-o}s>vJi)wuoJl#uJ3Qwj z{IEKZ$xS=h{|3_5%T{8}K-&5cTjhryPu;VnOLK>N6LX#|Jw4ahAgWHU$iN1nvlYeI zEM#xcepxKry|m|*A{&RO^mGZhPAc=>v43_Hks zJUGsG^O}O|U>Kk^AmXD)F4SL76QNsJfysHh{9x||Qmo=m=@%Ck%6R0aH z7_j%gO4}RgkH+;G`m%;H`!D(x=Se)FqxjUs`rc;(^GI;_31_KFiGSq!-!H;efWEl7 zl6Z6_^fAc3$JMLgE!f)iex=^Ajq3gDdOy>CU&y=cc1KqhaE9f@<^`+blH7OEW&}E% zNnGJMW?$D6-a&cYYZcGkv)VTWR@?ybJFu)g;xFW4yPDeW%M8OK>r`InZF--z@rSLr zs=~k5&@Q%^TJ8|HuHSlYmVB4nbJ^djo?&@fIMz{huAeg-e!ta~USj37jknJ{4k1Q$ zq_QQi+3K9`p2u;|;8S8>Bl?g6`hs5Yd`(dUQDgx0Cb<#8Pl0{6}}(n=h<1o z(9{jp4SwwDmvKk=^WK!YXc}?lusOW&?RyJzFZhoeFaO~F!o1%-ePaZBr1r49qpe-* ziHmp|xNRbyvc}KvNu3)wBltqLXY^d`?>XDijV-zi6{@tEYHe7znS)AGKhRYvZ(HE{i0hj722d-~AaFwu+;Q_AvIBSE&y9-n2 z-Er$O8NbZ!XB#;8@ZW`TF>T3q9}DAo)VIm&CvS3iCf_;lvNjU^@Z)sL-@$zsMUu(W zud?<%0v-<1uhu2l;;m72(|p#ZF)?M&>Qi<$d+l!77nvuqWiO4hHhqb*+wn(EdH;^J z+IyM-E+gK8pBx3?JKg_QpG~#pqnoUf@}@f%~M1 zRX7fs5FVQ8b6%hNzjT87pN^?NK;8M&+fKcEpD73xY*=mTUhLFePu-C%E$qQ`PHP1Q zKV{wd5&`bg49*MPWUUZQv{A>mZjL^DU@7qdD!1mQ^hy^YHQy(=E#9!=)`B$kFt{MHnHC_jIoGT zTA?U@UGR*gX2yM#8TV1)Ujg@a#{IyfR){iPD&wAWoJ;%cN$2ecmTK={eDyW=tolim z(Q_;F`m4NG8~U~CC-81)KO)+<>c{gxQXI_d9@B}zQ09h*8fI!Nmss_$ui*SPPx0Jo zCU1)wBZJq2(P=tA)QX?PclC`i-+mx!#=MUIvXwu^^T+(6HP$Y7eW+#r%jYJ8IcQyw zSHFIwwQCQs_^?gK@mo%xnzt3d5V+yq!q7{P-pHP0#ruqX_V%@ff&EXI|4+lu%3k{) zg*&=_Z2nh)-#HCcg`r5c8?YGCYA5_0?sV@vv*!?zT9y|LOm`!@zu`rnQJC2jydTiS!e-&fEn8f15q!%4q zPjr@#^)Z39elRg>e4HJ%*T-bWxXCxRv+w%YRP&>L*T;>ITDzL~t!KSG4_is|O@Gf7 zzi9GZ)p?|v@&DxNT$d(8Pu&^H*s!jl0GizY7Cr?07T*+o*zkf&;03J9t+U`I7j1@|A7B0Z+P3pVAn3?t7)0hno@PHY~PWyfW`OXjMkcaX6THA&Ly~Q zU*ohr`v`M{vM=&qFyO~Et%);`zv6#iSR{`ed;ZrOW}b7$6SyA$PZ(r{*wf;C9sV1{ zfO(6#LM)hlk3zf6?C)5}0<*z~kr|L(+1EDv9f~d7iY}MH91bw=khPLL@YjvN0so!S zjBJ_SZtvO8fj)Biu6o;<1M>NAM*dt)Tpp`>&h72Q`AYw#wJc+F@OF#uA(ds`t_K$7 z4aeJggHGOJ^1>&JLfm7m?^)FEeWM>2jV0Ue-V28 z-C%PUQGf7=ZqsjIocsm13+D7+=nnVQX&PJPRqnnF;`1Ho&>n6!Kg%=ca4RtByXeWK zB`JshJ@57oU@0bV3HS*z-m0tElTgtHohh&C(_AZY=B?`0 zJZop3>H8O%x6fZ3eRweanu}bdwk_tK%GMefX5SuhVF3QwmNlL4Yeh@j(M8Ik=L%>y zfbM}$DEbfgbyK#Ke(R2m4Ei*$?B-m-wVpI$((EdVMmNRNp5Bva)8qqYlMj3Z(P2$L zcrq`T|M4B?1%ihy>XJ^A1bngSs&_Q$``NFKO-D~C(s`b~>Ede>q2UVXQtdR+uj}Z8 z#-fV(yp8|Jj2l`xt6e8Q8|DddXp5GUrPn}s40!q$ zc>JTP!x>whBZHr<-O`(nKH%`tXJh$j6Jw$B+kjCpRP)>m4zQyqSa53~HUoQit1XZS^SET;kSZwWcQ6#@ajeUzFCFLBfXgU z7Hfm3g?8l+CwywI$9{{>Qv8j9$Q#7oSm{TG^42R3ob&~)ACYay!N^0c-zj){PfE+V ztnE+lNkFzhme9FPoiESuEIf=%Q?wu5sBx&ZZ@lK?cw!lX|6*{?+Fj%aCf4d@+~;TR zam(bH#oUJ0D>UCEw~Nj>EAyKCVU<7Nlo5aZBm7l$W^*53Hnf+|_-l=qEkJWu{6Xt- z5Lh+#f~%Tx5p+-8A(WwYM0AmWya8P9dfiJ-pbp7Q#MZ3af^Xf%Giw{fCrmlo7Co&o z^fdSOz34GtN}PN9I{eN?dKw!^Ab0^}!CmI*NQ@~v%`1bKF zpLr=-!Cwq~2PzJMH^Hks!n5)T$8UmT)#LY!s`G1pgJX>icc2*@%XTIlzYBaej+0|> z4DSPfmEgElIJS<1W8i56$46Jr0SB|e#rf#4;HbvN(R9{b4>SM`zjnMlYQybs-|^VW zcvy_b@yhm=kL)>>k3VlE=iExqtZt|xcElpDe`o9*Igy`oblbpP@=HF1Uvj7MOSbDo zwtcnR9_CByY+mBsmAuHR-&8ZI6M6RmYsHJ_M$lI zLhqrszsQ(!&bQECW2et#J^2dbK4%s38gphVW09WBx;>s}B5SUmL+mqHmm??qd4gi2 zj1Gq?*-4?GtiR^8|_D-_|Zx_qV% zue3J1Fl>W{0^hOQU+=Wvl3}-Rp0wWv+@pQLuDmbV?XRYNot4zuwjNyREa~NHd&y0q z!?Y2pWS#Q)D;5KPg!cUl&aNB1sCWC3yKNjSM)uh5yn9*W4wC<8gSqeP4+Wt^&~3yy zzg7Ya@?rn`UFcIw;2{=qD@psa{#WWhXH)q92LGFUmvnL+s`tC_Cbpby1F_|Nm3eR;^I#V9pmz-G_Ivu^3EyOmQ;VJso;s07a>ocyso{L^ zB%eLusSO&Ejcyot>fC)%FFcLIH{Hck>c92IlZ%^K>;vrR)4qjYv)VULr~Nfk_?P4=C8(v_>3>~gT>4T`0+#V=E|Yc18dWW*UBB*!LI4}{K|gy7kFDO zx`6EJ_FXG!;U6(J%H!v3N9{o3O=9CZG}GEAIml1^F15Xy{$(ROY0b%~UU<9oJMq{c z@@cDCbBOU(H!CE$)X5#Cc&wkS* zhQ1VcXmofy{k!l^>~_Af9o$h@^jGXuJAf;jwfz;^pX;onTEoPvmr%A?vKH;vA{RAh z*fz-Rd@Yo#_z!HU`X@Hol9Hai58vCH$VNnyfhfX zr`x>Gfyd(Ho zr-$#I+*>(^XM{e*=KH#n?^d4d6Q%r#IKGPcAw0$A)4qi2yM^avd{=A)!TLL$kKrAE z&J@*egHh6{)TkD_ifKS4YID6CY_g&8S%;S{b;e5~Id*8ZW zlbyCFw$`#wZMi}F0qN3}9Jx_47yNAB65vpbk9p90 zr6b2k1{BRd3QW>h7V(^e9D@#DA=yp-qdH&RIz~L1vv$-mk~Eczcgjk(+0Oho@k5x) zbD7K1AB?TW6Q6@!U|$5A#aSMIyjzcW_toIJx#p^4cy~2z*m}Ul;Qk`UU_R$RFGS9Z zGxj>k?y>Y$^+r_Cpu)H-Aydk<2&{PALX>s*!Wa+2+3rvslG|A@|4>UCs3@ZQu%r<0B+ zc@cUrI^Cg%8`H9Y&#{m7tJf*6YW9U+Ot))4fo_+m{ndfqg9Oz5B;Rm-(1g{v`?-yF=v*kZ(AIC zlWe^NdMkD8jvn62up6rHL4H~XWYds*Et^Iuw!qyZ(0`D#)J|U=Lo_vqJ)knj7Ac)Y z`O5pTMOu7oC(e()e;%%90Za7-_@_GhGkeLHCT(2x2Lt$?roC3h*<`P&qZT?Ug9h{- z0Cvtw?K%}3MkD=_J!U^~GaKovg}&Y4O^Db3r-<2tUT*GG(zuK|5Z$CZZ%Qe<5t>@f zGeLF`>JXk~lhB;bppL!XcJV-DsRNkTG8a4AnTy0#$PvG{lB=NW-uqo{ z40P#Jhp!!g+;pr?B^_3K@1k>LuV>wTw08FCMzaRB&$8;L!xIl7C%5~pk1~+QTWhLk zWIM94_(-6dyMEw_`F(g|Jm*^GPE9iYdlz7jaL*GaVgFk<+~$dYV@``FO0R9BE!on3 zyj*;-Ae6;fRb-oRJ9k3lcH1;k!-{Jvn?_fK75W)CoJGDYV*D6g4BRzAKf4`WEgihB z=YP7-*Ks$rcPi~5gRI>hLoc;Mbg!DRT|W?ObD0ILsGe=mvr9Aot2UPc>QI}-_&4LRct`d7<*NvF@qCq<>mMq2YEpESd7%2=acL8g;@!u;c`6S4OxXYvvA z*_ELdA^V9JONKgfd)&G=;Pa*3a-JP~bsS@1>Y$wJkbdgQ^Y5%=T}6+Mr4iYS3y`(? z>X3)8{c5+KiVfkoJib;ueqk_lkHh0v@r;G@HQ*FHiZxJ0`_Y<#&~Y+!Jc_j>g*7FW z^?Wq8fr0g`<xFwqv1mM{BpBM_XH21mbYF7E5^$KTnC$Sn zcINtCd<8^{@^^x!&3Sf+9g{GVpgPd*KTi zqk(1}-r^nAA^LoXwIG8w^nWXDsD5|9`rm0ow$unbz*~Ff|o}xwh*w}K!=nnDx3uq(bIDN2? z_oL9m!OMP0Kk!#tNnG6;?)I{X%K{H_?S5^Han`DWPQ`1AN%6kBO6R!}t&DI}sJl?tr=4lBsRVH(5rpL4MeEO=g+rgLS+C%*M zwk?@Z^L04=j%{}_bQ3#I{O0u>Hk@{{+T?z3P=inBTrW^LuQkKaO!gpVyoYAV*TCxkGF(yij{2 z`Gb)ckXJO9b3B&TC(c6?CzZK8QF1f$GDzPwpS50h!ejR8TpMT7mk={OfjgmcJPCED z5+l3nUGK2>srLi+)Fiv#n`zbmo_x}Ot9KP2$-Fn?w9{vJh7R*u(@&q_8+y8V4j6io zc@7%7)I0-2ziXZeL!U9vAw&OQp2LQImwT0Trt!XA>A9x-XM?8Q*QWOYLx<^opWX)z z{g8b3(U;R_1cv@W@Ay%kK7;!79?<&`Q`db%^ge9p9}bhAXywTt5}C`$g85ynhlz|S z^eaWfBT;eHPl-ib$*N&L>$1_Jn{{emnS?k)7g)?Z& z^}D*3yzRh}g$*}C`CHJ94*3DVaew6r%TX_oZWYT4C5pHBRm+UhQxZQqg zu6~(5s!SDolv_Ci$C_zuDLO|tmp1#WrwLr$!g%cFod4alW%HNhjy=3rGN;_Rt~%7l zVA_y=J(#vPfrB>sR>Jt|`xM&HnatlOJ z`UzjySIrY1VPd~$@}3C}W8380idSCxb|Gs-h313&zB1wGv3Uyml=(b3?*z`sgo029 zJY^?vmIG%3{Ny^|?2w+|!igV2iQv?JHqQjny}t2|ZtK|hd+@{e6~6^m?gp$7-_p94 zGkFcR{CLJN9(#OGUithSyeyvaiS_$2c8?fY>7-84vuy6nI2M1#zJ{DHS`y8~+Sk%VXV}+X$RJLe z{LTJ^AJ2HLNh8D94$YZ7+03;6qu7SFESl1>Nb?XL+PBR6++SvJZ)|AxK1U;XaAfWW zT-{kR_j>5F#XBc~3ldku52+vg$ww^J7}3kp}@ z|GmZ0wMVljfv)|kbnSqAcG;Ua8$YoK>%Z(TTiBDB;i*k~9sVZ&Ypr4DP>1%WBg9lj zK1pi7s9>}9(6mmrKWgux%^^?9m)RegSFqX7KAI^r&$C(OvhdL@q@1x6)3-L{1=*l; zZ)GhezhYywE)h*AuG;9Z4_tSiZ`+6tJyj5z=c&TiA)%uPABQ=fq@59OQeEUpPe^Im zz@ReS#7hmPeJF%p^M6L`TE*%7|)>GQtobQrMw-tU{rWN%rMaa^RB;_4B^4q3a5^HqixbYj0n7G_+Hv*#l-A+90}BOe^RTS$A~! zN0e=)ZTU{5BSQp9f0#69U$ZxVKo1emiq-^9xVO2N`YuTf)Lm9!)weTGWRulAxg1|L z(U1*mvVrvw`*Moe*Mbgr@KW1O)D8}0E2@PbwF7S+x?g0%tPu00Uj1`rU9-;?gP)tg z&s5^ubdMdlk$m|B`tz3NOibQV(FizG+#h7_LNgwp44TebK`f8dj!bmPpYd%NWur9% zkV!_u6O&jgl36PTbo+Jm;!icmCHQ<8xn!P^OP;lLkkvEVNPpu7{D{TR9-k$f)C1Wk zoyXk^*nksj8;~{d7s!88PsPYvImlWkzAfL8)_EEmTgEs;V`Iw}y~f4^&Gq=FECkjf z`1d8qwgZfe@pt;`Kci~1hxCiaePR>ZGn# ze#kL}C*#Z0gl#Ex33IbpvOD`$cUR45?Av?tqyMOz`t^yo3k-L{Dj$#*k)%-!f`u|mBGQa5#6UtJy~T_bN55nu6`!I z@;H4fEo5KYX(NfaSWVQUJvZ@#2Mzq@%*rt|2d%&hdfV`iEPctW??KA-wS5_zSB-V1 z>?s-0h($lF8_ub}m%NgL7@JKd?cQ_hzvHBpupXMUADvTwx0BX_-Or>wdrtkGZW=m^ zG1L4y zz`R)m4s9JEIeaF0TY>kh)Dz*mY;yVJ-D7yGe8p`3qxLQkj;U)sG*HENjfHgk5!4~w zHMZWdq*>ghDHu%q&_Nrv)Mj*j+1>iu-2Mo?m3G_3li_(u;j5@aW##YIUm0U-|FU8U zdQHqc7e9hE(}C5{3oshFXV*FPT8{%B))#nsAk)e-^;1WG_4~o4=8>20HeS3PZ(-k% zAHhGdZ|go9A8mb!p9lO!eFKoohhpO$22UCe?@EM+jey^d3?1DTN37t1#DpBc{$?C} z$-YDEF#Jh4>cyXs5f=cz_CU0^xf?ldE4CTk46faDkskUr@*vw zh&4rW=44(eCJ2x zS6yyBZR{mThJBv9ymRSS9{q~d7q4*o()9(#KyZ8Ws|ugteiHdd{ggY*g!k8JQ+VHs z3>5tuXZ3gz57pcy)XW$}?kZU;S`baV>&4LRMfeHK$0lU2x2pdG`lxYK>~zt+@D|(84EmX2W&H7k zdXUe*OFUEf>r3ZT(QkhW?tWS`;sAT z)e)bs{_G`I{m*KK_pIZCs?M$Qv5r$lK3=zcEBbH>d&&nc85~N2FE*nOG_rU8DEKsX zSDww<^Tr0NIP1;qck5ky=Cgoh6!>ihMtmDrs1CD^-^4mTp5N=%3c-cz&S;rd=L?5Dh4ZjoT`OK~;oFVx z1U@G<@Z|O-J-*7~;W_x1%w@l-n7aJ04-j^x=H&%E>ybx*^rkAKkIl_w{^l9n^AeRSkZfz&MsUd}Ik{K=DPb z#TPZi9twAZ?hI#C748Ke9q0+4J`)J-1uqAS2Zi>6o6dLRLVIhh7415MOC5W_(Ocl? zZE*BXjVD&>fb^CGXt*Pc>rhJ&yC{Jq0gOpZ|n1 z{e_f0F`W*APFb(0yIWtcN$Z54bg@pqY6Z`!e~>zY_!VhwHhgq~H%~UsGI%Wat{Ywz z8Sl+&eblCbhoEEPp-y>Wn{SuN{~b@>YUt18AMfO^@#Ho0UHf{QeTlaJt$d)6M=fyQ z>hsjE=lj9y!4vXg8!(g%v!;yXnFep#ag!}~{D!CO)8YZe3DB=Mcz8DBnCxjtYw|4s zhxUHo#gv;GM?7BVtWG56jBzX+l4ni(%rCY7rqljbwa7t=9**aq?$P`I*a-w^Bv!}^=9NI`H4!dQT^J#NTiLX zBhgKp1PlF6pKPC@DSIRsN>;2;N4*B!I2x|*1ZQ0l}zy%b&RBr zD;#+HgH`%u6Wt7bjNb*zG%_?)OVQgoqRv9?|0%@Ji+&Ob*T8*;{rM}r3cUUKD;%U9jh&x;8I6aD+d1o4o42cf^`1}1 z*u5Wc_qZ6h`g5VN>aSQM+G#`U_*Q7)a^4NTzlt0L?|cqk9w1F~P~+U{6Th%^%O8*~ z-Lfx#@KDYjkN)h}LCR@74$^1U>(*J#IS;L{t-uvOtp)-kYs)z{^8IKLY zXLS{DU+4nh5}umWKhponf5C72q4YnlSJ1~^@;2opZ&y*)9#>o5ewKWiOMYBCF$^@M*JTfS{uaK_%YE&S>_tpirQ_z8P+ zQ~pZb*y%T8r`o48A>ZY{`gR}BMr1eH`cgb8<+H&tu|gYc8-uOyrBDa7yD5szM)^v; zR^A;>dF`q7rH^W40C(*>y7E%`v5@{0As1nP*yhMX3BeWqI|pClKi5hO46#P!PvYJd z=+i$WxR!l}2eJ#2>!+_MeL*tzbY$TYr#@pwSGUzx+*_!KYrfO$c9e*FuN3w)aQ)BEMwns z#-Mga`XH}uPvpC;_oW&=^Y_c*)?qW<7@08J@lTg6IyJnXK19wJA4si>oNsj*x#$yY z8;|n41m6?td20!4{Wc4k$X@IBVDBsX1fTm^!7c>$jh&Q_blmZibf4eoxaq~#zQxRu z&MoLClpW3eb)D$Wx_dJk9T19YjlH@hZ??B{nqolsXW1Rw%tl{o6c`$QD%AxakH&wN64o5^)psC9437eT|b7l;jdFFueS2AN#^yp4ivy+qq8!O;jg9+V7^gf z^cnYs@%|w1hBmQXJOY0Z?=f*}z|YOJv79}Lo1snF(`sk3muu)SIsEUy(M1!`rtU4rfzg6TJVG5|ezAT$e&}FiQjjt8hp*(D|4#gbFYy1c zw_o+K>u}$7sn5Q1ul4ueDxwcRM&0h3a=Ksu_P*=v7Sj8}@8_+v*HBLD>{zGXzU%Ba zNblcP-FGcH#!lU9EtyC^1h>|cUr}cUba6gUL(`l=D_{(fBc{(JjlImx6BxsY`1_Eg z5$TD+Wlelf&kWkWIoZKwTh02A6n>xl!j0z0Suyo(MNW7Qf1tN+&|TNuHxK`fwRzt} z-v02_#@t07%hBHG(Ur&Ayr)plE~ienKE+6VpLsHxI*%;v?Uxs=8OK~X7FYYBJ;wE zj&>ZmqQ=>?;4V|_OAalpnV~)#c>}#zvB%oT_gJ|@8+;S&P5f8?Bp2TX9yGo|#`^|f z=%#()%dsU}JSxDKdM>oR`suOXCf8A+II8!@q z@YQ!2<0jJkn-AJkIc8qZ>a}mvYd%~{9}QjN-`|M6YrO;8dg$vx+Aw{FH)_5JC$s4z zb6{%soM0}%TZ>L};Cv4G#-+!tq>Bd@iRLLQpNYGiIwhB$?36J)BxWDzKS=M7rh4;` zW9P&|$_Zx)PQ5N1uB?6u>F$1g0Wdwwp1t-gTag3ZeS7pjXloQUuN2ncRK{gAX4<^8XGKo_4&v=)6S}G_$zxK`Mxn7XHS=G=Cl3(u(L$u^J~sR-8*9%vXa&! z`H|htJz=|vi}fP7Js9=o9eUJTuYG%L6wk^>u$=z4436;;ycXIgP4Yw(xp^`Ef3n5I z`Uoyu%361Au5qvg|bBheT0 z+Ap(tN(6W^kVWV74d1hkrd?o&R3OLr(srhMQXCn@g(1brnNw&>c-R5X*6U9D2^Tm% zd@13}sY~!7@7EooF6lyVnmd3;g+qKZx~%aJX0L>CaP0@K?tKzkj4^mE8GM!@ z3)#Mj_8v!&dX(o7?YO$FvHQ)!&WBAZQtY>Fe=#!d9$#-6SM)FYB&5$yJfO(fnK)^ zep6IQ+zRV$YbmnQ(}w1&XC5sXWgV?NlX#cq){4rLt*(WJx7hm}eRbn(V!Y(!1~*S6 z&SB{!YeWS)S_L}Vb?9h~=xD4tMHT31?=he5MMtY!QSpLw2h-+@)yV(W(s_4qYL)d!zaS`PNhGS1_xK122VJh7&T`wj+4(|Z2ZBaOJ`c4vK()A$x!R)GWu09 zZb0^R^y7N^@h$rC*KgT)FghXQtvT4YKhbSoVulfWVP!mewBj##$j2TPxYJX zj;@@`*vvu4uwxVS9fSSsHR;?QJOvwttw;LqutMJ9*5+dTwzK_Jh1mmEra{lH%r&$ryBh1m1`B$43iug6Y`KzGdgJCpzGd z@-8dH_#(3&n;$upSfTjPLfalsuIr*}q{;K$6p zzmb|CI-WHhzp~A;-Rt~j6dR}PthDfHBb-FzNT2 z7rpx;JBhD;B=+D>DuB6r9k_2sah#*0^IbBdc(80t?_BTr{7kse(a~+cngqr#^5liw z1%+SDQuZ?81JWV=#AO_;-yL99Qj{>Tg8eQh0RU~NzZf@?)yD8mI(i9xH+8aRsXxg@lF^dG9emeZNyq29mAT1T>3tsH53#?H zk1w+Y-%Eo21LTw5lSM4qfxJt0`UNmC2f{P>?&cT2UwwsLR{D+R?4RK?f<-pVGs#=R zn1rAS?JXa8)EBBPY8so~xeGdehxA`og}J{G#8snytWi_l)E9V-fJT-t=OxK3quI=9;UI;W=%Llg3f} z+O@5?_LO?cL~A7Hrm}92hJMqa<1x_lSoqLM*qR2hwivrgkF1ODK`rAOVb89WGXqIC zzzdi7Y=1ELM>j9>-Oqf5kMiwe=Cr{PzF>wY;v2>qu`J7jPuO^m@d-oTnQ|@gjDq%d zW6$kG79YcZ(er4*nQm>?z9oBC+#gr<+E{+nVQjpkJ*nlhNgtK?y#|A0=%*7N(YjRg z+19~+NFAjw2J@b0KgjQ~&MbY=%DaWJISe1r`ME~-p23MpaCmdtTW$wt9 z>(Xu>v@0J^|LF7)s_zc!!?$tst>e7e+8fgvzmhe6#o__kvTZs0lD+uJR_5T*mH!6L zulj$8d;j<-t1I#UnHeCHA0&Vvzbqtb3mGN>EvP7!3EC<{0FBsIyIT{q1(FE?`mq&V zOQKdGnVFDoTa|uyfwsGnVP-%#tft$o(7G)SGXbWb?QZ+o{g_y{kYtj;vsDvY&F6jY z^E{bJ4C=PKukRmuC7HSR+;h)8zwf#CLi;bU2KTZS_uxZ)p1L-}hguFF|AGADJ1ady zor^ux`__Nz_xOxwd>IFY%{N& z>}jZ&Y@6)5k9!!LQv-AE<6f2B)C%86?qD;&8{K(#aZlm2)f2N_+(W`%W423dtg1Js zc4LHJIJ?}%=l{fgzvAD%!Q4c*vHs|4$yv_`J;-49_IfowsZ}_L&tCw*W(z+8tWU1?uTFY12!#+U13EkXs-DG6rN_3>~{xJWG4O_x*(KE`9(?=USFZ?JqFMNiv zwRF~b_{{m?b6>KZjGwL5Ph%ay)ft*=*QWoW?nkVJjWyx?sdcvL@o93W-O+TMe#PL* z3FLq3jeA25p&N&5()qYs;0y7kxu;XDjmitL4)Exi%st4SFHwWQ+P#fUI#gI+amX>_ zAvsTWXu&6258OgsCOjhhE8K&la*P6VhWHn(t;!LRNBJ7`TF7{ZXfL(2$GAJo8t1^{ zf4M^LiLxUL(6r^BX}I{9fip2lFyh8oh}9Iujv0lH7>GWE&EGSTQ$e$ zXh=ar)g(O_9cX@J60#cIZ5~Jxe}l<>S;fQO~;D&Kn2~ z+(*LSuHHw!x!a!8RD7A|n+N%Q7@GLGTf$$awe)drukstaBK6-`8L4UDbABT9lYH7r zM^3NQbjY2I)_H?WeD{N|)JqFKPV-Etk-M51dIfy4xy#c@PD$`yYP+K8%fNABOJ3() zYJt~7-P>3iGy*dReRtA6^VBYCmg_l>oQv#l;Vj_rm8?7bl!DL8fF)xYJUuJv2NH4TXLk{0x{h z9JWvHm3;T`q(^lRxcBRi@}7KtQyxEcj>$~})Oid5bG@UW$qNqE%=54<|5WEUJb&1N~kI184F+Yu*({VA^{kF?{rH#;SHT2Gg-gfR-s;B*NyIt|f z{mfhFd4SJEA6V=3=X@^zXUfjlyXc00D)!KUjhKLqU?267N%%rG<=@Wa{fW1Ef1+N+ zPqxrX{Dj@azY>$}M^5d``FG|!6qmDC_bp^qVv*rt&d$R7Xr7@N;84i>6uXe&T^q5_ z+}(d@(%^MB(oXjNthWb*-$Z_YXTCWh@Qis(4=nA9bF%uEqK8YtEAn?K*V- zQD^Jgc4F0Q3ek^S@QsS`dA3}sk*|EezjH5e#m-FOcc%Z*nKQ<(8s&c&c`3`U@-U`$ z{JNGhU{Jff^l$jB@Hlu~i}nreCGjyn#b?C2Gsn(B>=QBLG-^GI#kZ4uM4q5%uOmA@?uHXRGshzabb^pB0Y2t)muG+jB3|16R`rDB<#S^bk_?&0& zb1WT1FT$Tq+0WniQgtd&(byN+q2O_@+?!8c0oa^}k-OSdjCCpeMcyv;-B{j}b)3Zd z)e}1_er+)2Xo7ZK;Z*j9j?Zto3*I3nNUo)w@GAMr_SfN4`Aqy&FP}@^qmsM26pvJ{ zXsP2f4>d5`%zd%{WL<@aSMd4sz_Vbld<#{a#%Zl zdnJeT@tp6952ogF^$Ha?slRW?_`BTr`~EExh#P)Lygh!M&9Ecq?AvA@R`yfn%!BBx zcdl~C{$Dz-ASZ&qh&;^=b#ZP)eC;FPeH?q@)aI&poS^T9N4RIyX})!|A3yNW9PTR; z`@*|Lzk6-s^0ywJ*z%9?`FVajvB~h^(D}d9hE7U;SI+vrsJ;7kf!6kK(`GiE2M39v zt(c*cn)YRd^%cLzZc4rXOH;M$e~I0B&3;91J@TVxo^$5d(@}}dX)Leex0iAA_pyT(D}!0JAZ#y zPV1YWqz?W! zYM+WYjQ#q};KLrp@=oA@Z^79O&fVWHS9#VLTxy%R*UWrdD5rJ!3)+zE6Wx9b zwMA+_bZ+Q5aM;XV=sVxfY3&4e>0_LYofG;bG}}dunefl=#U5=W?w5ET``WpPe397Q zFBo>W6THO6AE$;l1x&xlfavhQSkdwh|96736{$fS+u z@mGx+ynirgGagR$)I|L`eZRzyWfPtJEwm9`XW2ySJKH$lW!lTTrfPe?vg(GG&F{T- z@^)yw3%P+_;-{Qkh^(V;_s*Yj8}VQI&PsIFUd?ku%ZdG((qkLHXZ(I^irmkg(f_^S zW2)_e8(Iz?HQGG((0SFTm$u`<9=mrZZJbp%wtR)Ple&u4{9N{K_wJR{E=hjuZoWTu z<&7~hb|%3r=rPF#G!`S0%Et@@t0vE{ONf47@FxoWTG``C>w*frH&;@i!c?Tffu z=!3?3{R75oxas|4t-9&bV|9JVSl_$p((@`?@&2*wD=t0O!4>1q%L5G+oot^Pfmeu) zw``qt51FF##ub;wC!>E6xd27ypNx;sSlDt$?mL_E@U!24PcHUnZ#u6vcd#?d-m38J z`s(sSmy6%%2z8R&(+cOd^l!r6UalSM;y%TqS5~x0UjB7#r31N`dA)I`f^CMj;n+`Y zXK$h{d;K>!(GQ0npL4^@j^(Wij(={;zwuom^$6le@Suy5ExB(_d;{@k1)iJpUef<0 z`>U;A`SlHZ)EiXy41U3c+kI8p^6UxP^`iF#erLh)PpSR7tJtr@j>;WWavyOO<8*RQ zgyhlcddRmurL~?A8_OIt<{;yWuieEwMEB=2?g8+ZJM6x}zTSDpL2j=Bk_8riq|g-3H>`6$GX4jI{ue> zLiP3+_AvQngZB=>JBQ#M`TT(6p24{KT-8+X&CEqgK3@F7cOOaLV?21@TKu{4k-Lol zmA;+)Wa@VDo2~l^Rh=AmSon1dwco<8mFVgt^bs4hho4q+ZN*;J^WNcZ>WCyZ7JF^I zcWC&#Xjm{nK^RYT#d{1KCA!>@-W>6O~HlMf= zdRY0yW#lNP!!PMP<>*?B%)G$E;PzoT#R;s{Yj(*`j)t>m3~j&4|3Y7h$qjvUy|S6N zvmYvU=zpmgK0WtghtwlylT)CUIi~0Y8@BR{h7W{~kG(ng z?jvg$&r1%2+8BqiZzK7k?yGFI?`*Q8Z{MFMQgf+VuIA!2?f+#Zw+-!~o}?3*l5sk} zG~ru@J-EK*(0i}#b`nQ6@V)0_#`ph+?|Ybsvi+635%`#4^Iw15_?^l(Y`?($3jHea zTQ{#l#<)8NTil)r`*yyQG1~7i{D&c81>&Nk3#)EL2eFu40^yjz0{ox0-1s7wj z%=qX7+U_%Ps5f!=!3VVcq1pC;+4k8FX!~ok?Qyeh@B`Y;G3Tb4b9?*)+TLrn^_XoR z`hd0%n{6x2wzVJB*2K5c#CPKdwB2XItvBI*?gQG&I%uqitl3)HJ#&fmz0q8=LbL6v z4}kfOiEF2c>-Cv!zi0T(yOG;b{C4+C-+c@|Z8vd0!0%%EJ^PqV@#a1MKCFCVtB=^Y zg~wJY{uW;SwTa(x6Tdl`V~*kL7tOW@%(exzecGJ&Gt@h3zZsFWJ4M#U<}u&dw5k=~ zrPj)K8ouETyy4L~c#$^^thmCHU0<;kIk*1Lg&wMH(Ty$H#PlxycfH%Ydlz}21MG>6 zeBWSxZ+>^2-0sMCUGMLEoEU86`?8x1*k<2Oa&jZz9W=ko7#Dq&{@%7?R9`LgyDIa$ zjJ_H+W(4+$(S4;RNSmm&N<6ucI74Et5VTQo>lM6J&*$f|hMyLl|A)Fs*y${EtrI;w zTK`*bccCYbYMqW2TOqOR0~5Y=V6L$r_%gL44(;1>6kQvxZ_y_5MmIj%mhY9dQe$vx zWtsirLhw}n?u1V%`*7tn&H(;s`04%Jm4UOmA%-4a4ZmfV*_U_YQx;GBR7>H+TO!&- zN2IWbbInEdpDLVK#{afRk!@APX0!9#!*4AjZ|ONdZEunH34M{P-K4F7GYwb=t~?=33uP zEHl2N_QDAbvZ?pm!aNi$s0rqNi0>jw9oDq5yI1<0S39JVte!*|ap`YYCtbVW24}a3^XVl2o(NFS7|IAnx ztiOTYiqF_v6B@mR|77eFvHv&n-Q`WfgM#1vd^SR(#jL+A`xLY~<9Zf)=X!YS&fY5b zidl3z0-Y@S{AR%Llgf~X`QT#lfbGIC@ow5z8ocjzWcD^{lX&mU^qWTe)%K9+hGO!$ z#mHqLx?o|KUFp0WQ#VziBZ?2&Tb41WblV$S7Q!D^8%y^oIOsn4PW0x;_onVs-=q67 z`&znBeTVKF+t<>4>U(rwW?xJ9sqfHzWBXdVPkoQ>%j{c(&TVuUzL<-5X(b<%jSj3v zFa4d=8nYJcY`5gHP9ht(%N-J}x9~oi2lJ<5&5GaT+OGWp{j8i);{y|>TYpPVTg9iQ z9{u(tsqZ%Koc!L8jk|3`-?pM}haVsRu5R(mm$EkB+IW4-ZtUYpVmGPta87hKNj!)S zo5r4dlkLRB7OkPIUF6`?;Je%Y_Djb$N-mi@qKNB=W2SK@M0*$e2p;g2b~n>*8L*Gx zxBor9`FHS<-zE_Z8PMc6S7|M8Tqb9EHE>wP{;E;?Rge5TiT!L-IY*$)|E2K5 zutVMtabQCyU_&Po)3B%g2=8G2)JBcp1nM}_HRLMhouchj_Moa*?JuDJhn&-)yKU+; z=X7o7ERU^)SRiym!GuAnqtuSquh5P+@blP;`75;claf!u|I)73-gS1`TAT+awyMwC z`E057?)+_9``bTz@9B3$-+f}WdQ2RD1oSjNk_jnCMZD;Ze5O4*}b zS2|nO|67dziHA+w<ow>tL8@%`# zb?#vlFCMqCj{q;;0xv?B|9~H5KIF0BN#hJ&`o3c@V)HTU0%+Meb2sJ$wtE> z#cw10ST&X(7pM90Z{Wv!Gx#x(JGiA<_)&a{p97Gt(n0&o9Urx&`-Yej_}8& z_fHJ|m;)T{zGbao%K02;y2l-F};Cy&aAS*Igfh2Ie)hCw$c2Lakm_HSJ|nf+;6IQdroKwoC>ed zmfkm)yOQZ|*s=8Y7x{jnoRj_MrMw5DZJ%X(X5S&^GRL-W-}#$4Z^9m~oJ+0Yo`jmN z`yO(xrgkrfcf9b~_wa7E;PsVhyZ3!-+Ma#fi^6@%^~r~LM{`!zjV;oDx71LxuYJ4i z{NLYKz}Xgj0H?#VvpW)yIt_1jE;S_CM$N`a_ImMkDpp=a{l(lIYFKBx$d~Oc?wV`7 z4Y+gjLv(2J*yjH-pK(Or@4k*Vyq9YiH!tVC8rpiN&NJT3qOJ92)@Jy=I2=gsNrm<7 zXouci)2B0bY)ge_u(7WxSse-M-j0MG^7rZ9kBi>UiSd01*vB5VY18+UPd`45I)G1r z|I8d}NO<$c9Y|8YqHpnc=*5YC1KyR?sN~^WoBgRHx%eiv`r$jY=`yd~)4=r_?V@u= z&IUEFvsb)tUcheO(-)}do@~_aG+dUm;m?Y+>G#dlE*`jy{cdQ;y4TJ+Y)O`cK06ASNA9M)_ikr(5fdAOy-of>ajpv59JN$uR|Nb%X5W+ zS3Ubvb3UdWdkMTA0+y{$6|$J zb}!N2201fvmbr`nD>`e><=WYmTeORZ?lsqX`Nzz)-jcc2?Ztj-iTcdiBI<@>8FfRu zKX23py&CaX6jNJN{2wc)3Lo-z`SM0%?NrUv?e}WSaIUBCM-Drw{opj^#Y%Dr`(4__ zu1N-+UWFd5fvBFnwnJ}qrwksL=GockqbAAXp@})iQia;|8}N;0J(b125IzBNvH9Ha zvQJ|t%BWE(Dx9orx8(XJR~a>8!Z-8anUG3sqA9nkbb4tVd zz3zm*ucS|ZrnEUZi*+7s>d3uQ! zinoNqq*izSIbaxZhcV_k=S6nRm4f_~7Ld*i3Xs?iHg25n=|Hq?6O z#I`(O4WqN=EYC&YT=~|czi`^@Ipv!6_dyqYhpaT*IdMY{V_R@)fD-^tjcqb;azjpA zjx7KVu!o9)wHjF2z}f>Wcr3PM2fC&vn2bTwp4<+-+SjjJ^0bhg%|dkf zZgja5opJtDpKj2{8LOt>DP;0+-(kI`Kd$Gl>(^ymq4^eK3uz~K3j7%DLbQv~E{Ap& z{PUBvW7*KW-=V#5oy;|Jtls{SxfaKfeIIiLzt(bdt}l27YtU!8;Jv2*lzupDjL)1{ zs~l>_wHL%Dlyc9f)M3sW3;$GV1pb~9e{xU%VZFOQp|9TBXV8*8&E8&ra!r3kKb+s8 z_tqHjYpyLDOfjE4+WSfqdijx*USrEs^j@v_Bs;WuXt>R?nJ3}x`S3RP{%??+jkoCJ zzEkjHF1*d#3_YFHkoJJ<4e<3d%;od&^-K8%U&~va@LrDG>o0q|2JL6Xgg5<)H}e#4 z3SX|X1^3;EUbqpxa3gx*M)bnd=z*EQd>cJ*eJZ6d^7)gCqEUU(+7A6NdLU@mUKHLh zBX2ha+^AQmn8(jVeu(=j#LwBnSdEM&YgWygdTp~}@LEXT0QH(OA!EPbK#tJc@A3Jo zw#sjk3p_>*pwfNSrGbhbbQ&{S~gh!^RlbdR|I31uNXC@D3SA;6kJA!V{(7`f>U^ z2~3xEJo!EBg2=|6u*g$HFD{42^4=Jcjht74NmqGPca_Ak#cAFZ-V%Kx_k%aDGiA3t zSh2dFd7db^Q}p*1=GquQ?#(*d-Dk3w-O6VF$-Y4HP)9_+9e&##K&DKY?q1|iz8VPY z-D?thaoFHP;pdYb*~hMdm5_%}~qNxL0JMbm@Bg`S8cP}#ih>hq#Xdi?2J<#42#y2wYb^aem+a>2mzfj&2 z$wuhz?Z;2^r}<;yiR@#q&NFBucxLxS^esVjb#Jet-2wbcfx9Jzugn}Qn#5BR z$YA%{xL)jM4VGzV$7_F*xJ=qBp2;_5)}QtX1@@KZ_*>A4t?24KZg>Q}s^GzwE`PA1 zI#1~&#g}61)Owihx8x%OJ`<+2PoQ6F(|mkL@@Ezfsfhk+ZHK<6G~GTH2v+p;Dt%;b%51#F2Sa+#OR zImY<+nd8S8U(Q6!_zM|-KI5NfeCl(QE$TAuTqEN)mPGW>Dlmlt55jJTzhygMq1&O=GVmP>N8~tEf*hB`^`3AV&Z2lAxhNIZ5BuYKFY;`xc}{3IbASG0 z+H?iy#xy>SCH|yq0J@=TdQQol!3RCndkSBzJgV2ACwkEv790zo2Tl4HF)vv&%NJh6 zIyCh~^cwU(h?i!tajk$i^OT#xH0p2~;fdMR)MVp;vfR;B*&i%f1LLWy~|;uYEH! z7CAFsEVA60ZNKZ9L?GD`4eNKV>(HM^w~UTOroN9~i{Ep!<>LFzpJ(_t=i#4;#ok{O z+a~6$dC#Hv-}atE$*hm>{I;pFTKumdv=%*$9mn=nzOWzPW09?C-<#M`iLb`Sd*_L9 zW)Lf>*lvb)@%A2v5!)62_r-Rb&pA|VceB`kH!;e(sJ<`~*Egf1#BN{(A@)P=6_}?GE_h{Da>AL@1Fx@uUTw`ngHF^8>;(En&u1n(C`L;@Z0J#0O{G8X!7(-wu=(CkRabR1}Pb_~s*D({SM#s6(CX|4c2FaNvN;@j_L+ywgc|4_cSrm6tv7pG1&1g;O{qVu2g^J3sIOlK-&h?sa2s+VwDsHaLw;xwe%f*D z5b*!u3T^siVvxRXIgW`gI`}Pz;5QWQ2qc3g5q&Fs8Y~&mU$*7<+P>r1AUH+oBRn(7 z)B~cU#WuIjtJK9V;s@p22aRmZwFX#%t2+_F*G}khbQ$lQ_8!>d*x-Wh@_in$X*s;F ze5DTfhFqJpso(A3EmXtSA|uMyRvCQRehOXHlrOm-bRD`XZ*8z5)fe3%`i{?Bhn{pS z%>lm1!J4Q)Srd%vYf2M(4Ll~f#sl9#cbMOg(%VtdwS51CV?#IkSoG9EW6Yr%;D~*% zL7o;Nql}~MySqT?S>y>BySMAh*&BF=bNZey zW*=L+1|9!p?c(dK$CmFoj+K4Sq2fS^pTsWt@>!E|R z8?nt%F+>Qy#s?S`3th>2@2@ayR}{Gxo+NLRBzK9viR;}>eLA+iRpOGKT7R-yavI?d zJyx628?m$KkoExaPG2IjL&g*M!b1{s?YUZOBQDlc;O@6Ma`u}(RxLap0r%~0e{yOm zw#1##TT1%S8B=51(UtYYZ`athSOor#*rtZoUP;vi{L=@V2sx~mMQ*{f?lYd&0pL`E zZz1>|0N+k%Abl&Dx2DaBRY8lZp+%840bMXPG*z>O} z{Sp27tsVL`iGICulBad5HZ^vjBap1`i|CcUgx*OWMf0pUTc~NVD%~^qN;sH&r8cZT z0bjn6=+k8^a0p#vw}rM}VT^N;ajvoFTgUKcd3g2pGH@d;?$ zpxI&%K;tK&@o%B=6WXMh#vC+lLa2VW))t5N1xHQGjXeN8CqU0zq309OQ}B2IdOis~ zGy5Iq3}OlO$|L=t-__9X2hi^g=%@NYzbByIm!aPR==U=%OO4y0O=4_Y=m4=OZ=^3h z0KF=q*8%8N2)(R%9E4sC(5ni1bwRHp=%wZYy?zG0eg?gsgkHaeUYWRcPSe`93vS>v zvZjA5eWn=n!LMlb!zWeH#|C}MpicwzVIHw6=<@{hkvaQq8amY$it{e2(8qV3(Qgzk z&?jOWpTFS$&}3*Mzr)au0nhxP9V>Xt5z8t9AI(&tsFOmW1`Qfw zqb*NxqV3wKKUoru>LqJC^oXB%ecE}J@nW^~KjQZ%2Ns3()BXp*p2A&&uVc_|Jhi-}ac6bH@;IlRy))OiG6!g7}BYG!g z`sP7$J(64R2Bs7LLgs4CQO1+lM!u6=tn5!+5E|f%|AF77=t3KEim$1Ba>?JhOT;J7 z^P=;X*I|cQ4=?|Z_~epjTS5B{yY~0U@^tZGhB9E`+YbRJ>Fx{b@xD`fyfme&xm@cR z?28AIeW#$CKcPp=xhyyJXg2&9#P9eLvM%v~!llo|1)Dh#4^$lShta(B?9WbMEM9=X`$Pd@L9>MiR!^o#f_ z7OXsHCafuHPO?7MoJL?RGI6Q!44&-^B(F(C)SS-3uh;aiJZr)8lTX=?5B7WJwFO&W zMqZ+rH6U)%i`So4Id>njDtVw3{&#Av*jRXQT|c;f<%JYHDE`=4_QeFI@LjRUX9n-d z8@@Ay4@~+M$lv&8V#kV!`viZ*6O0>YP5h;Ce5VxqKY6{LEe_t6tFMsyqfhYL!d3CA z2}9y`IqR7smyqI~qp0|%_&FlCLKi=I>$)pF%UhJsC~{vA^5fT9eS-LVuZ`GeyP~P= z;VYlCCQO`=(EZR&?5xapJ^o!CYj!#s*PF?uFSZxNwqqwe$Y`e<+ZgQ74jmJT_yhjh?hB0Ymo=Ae=3mNO`mRU5gN9s3 zv+!lenRD+=MfX~Na&;~F#zo{=x2E)wb?qg$=ML*WWU)M&(q)dP!~W#@D08fh>qnU5 zc6(W?%=7sUcqty$uW^g7lG4Q|sC=Pp@IxcbOBrg=_NYcBrKA4>y)nTeYQL?Zr}JtDcQ9#61`I#YuKd6YU*OKL^;)~= zgaL9?b?Ah}@q|90_`#$2;Tq`rJa+a@?CdpxK7${`XJXA>;q$Bg3H|C4;Rk#$WMLDs z@B}^-x&j|`g7QI!p{>{`!EyIiEsY~FnT6x=TDd1G6US_YW8Y0$dlmIKABNxa3Gy`H zt@t$oSX<$jFf{RXpg+OI(kG{* zerOxj2N-u?P0HZ!7d(UY$b+Z@+NB;4pU#Nk2W*oREyN~C+%Id= z!u-zCZ@b&DVcXFeh5mqwCxrF`8`1mF-B|CrhTl*BYka}vcI5s}bl63Fyh{9Wp;z%ozU0I<8g@m~=H_r$rHWM^nCEGI0(dh^{K?NT&ePc6 zueno-reA|^W|jJrpLa*}XQ1^n$m84gSuw^7J)zAE{osrElGy&m;8;iB_24)Fjsmk; zD~P=Tj+?;odT=z>Ncbx&RPg6oTOslaZHSYm#wIZKt@M9_u^(V;#hZ+s#n`to_EU`g z6k{8FssUp{==8m5dRtd~g2JRllst5P_zCK;Xk=SMv z_8@azw%={QG2*QmD&7*GV>|p|;7gnsAx>Om#)5k;s}S?D*HT{+sHiVJBJj||Q$v22qiyES`O^wGKbl95AnZejmv`uK=xNE% z0C?B=!ukQ1%!)S8fd^I~PUtgGs^xae&ly&wpp6mE$w58HdWN(5tF=szA zCiqIf1qpw}a~+W#=ia!A?U5|+Cj8^Uc%b4X{Nq{Y7TkGGn+1$4*MVCN81*@Ky(4f= zz|Wiff#iaCL_Y_Q-4^cGANsPVHOr9|yCFcGLTN<**S=Hw4bb2Q$E?^+^wf=sK=K`Y z=6xm99{Bq8<&HTq;D^p>v(bZ|_NU4;HNSJfKc`)hGYpBncT=T>SEr-%uiO}g#JTh#F(ow&)aU)+7{!3h~DcGSm3+>eZR>a*ME+z?yUB-E`W}| zDhVWay2JXj(D9x~zkZKncI;vF&yObulkYK?o6F<+Uq@2exg|@_j*KaDIu+4}0v-C$8goqDGx)`* zKl#_CVf|+2`eWuZ-k9fp;%w{rB>P~<;D?UE`0b3p+}&r)3;y^hV{BT&85Q7HLbJ}p zQT=#rLO%}8Ho<$H=(yWfHB|hNwSEWs+{;@3nzg>okrfhp3J;FA-l9)6lLsqHv^E|~ zUW#lCZORKKpTn*`4ZC>u4|cUiqDL-V?`bcxPs!n~U6nr;IGf<1pTk2>F^``I`t_Ce ztlr9n+#`f6kJZ=0k2}M`+5SvCj^* zykPL5#P$2P8h9^vOwEz^z`NkZhadK|dK^_g(2rs$$h^A zjS8@RUHIdwPa;sUi8}+&z2TTTU(2e|4m;J_i~hav1y6gHMjjS^=`wWr(6iY5_Kt|Y zGmm+&M(}iOXN6~Q9yJqROhxn`cO>+`_Q^cv#f0BIt&&4HQn5Ar8 z-VdC-MPk@GtFOn6ef0GaS7^Q3_njMfk5bt}>wA@(?ltY_wVuJJBL3u40-H736qdOe zu-yj@{xWQ$5fe9<^w-?A49?Po!QtWe>8qrcF!XMxCaj`F3- zL@(!K-^xW##^J}*h<$S*->&ko?g~%`gk0X>$cgR5pS&4=QfO=0wA=naXB)Ir^BRkb z--a&9z=i)q@qNxVkK_dzH*=3d_%u(;i;W$(Q_E3)>nEzK|A*f7{vi%D0^| zpY}TV_lVobp^*=JsW2X>IFKLRLGIN{dl&1vUE(l_*Tg1>T#BxoPwZBV+#kJ__}ldR zbBM*=?BhzDYs4Apn63Vs!yAtMVR)wGhAiJJg^wt{o~2JL*|Fl+g@5TBH#V&Nrc=d+ z`3@u2Isr}U!sJbu`@kvURdl2H&uacrf3h+h)ji0*?2*)q|BU^34qYTXF1GrqfaKyO zMm2nY(aTnRdMIQc7l+PP`YoRK6}Och*)bjt>6mYh5woFF zSnu}A6n=9cyaQZoj&pv2buweSd8%C*vSYP75Rv@1p$pzX7m2^l|E_{*`0FG6@YBFw z`iahZ0Qj%eqDN@UKFso`j5r=WH^IgEc-2p2P17bSIjf@I1EL?P$J&&V{TkB;WxR(( z_u=pHnd^akZRvVsNpQ2qAl@1^2LHPjPe!ks?K**32~0HycvR-_05Gk2oaVjs{a-iU z%^=UBa+)=tcD9II^hypt6-b`0jgs$)BOCpOTwoVoDIsT35>fkCSNq|C71~)~#va&F zHux0Z-L^Q4&BlhoPqH>O$WMVbfttC3Q16fIDxQ}8;u_oRoL$ha3mV9N@W^=U$oFrR z+BqZFTp1(}fgMtLh_(GX^HDH!CmHdUz_R)ti6->L<%y8I(C*wtF1MFlX1?T3$m5=1t`bi*`^f`F!g@ggctbCli{y3dRbE%(#R65Y zAoUenC8qoD8WWy4GWny8De`IXfU!;u;11sn)rb8_4IO>J7uO5@##*D3c0Q+Uv7u`U z8f7E0P4Tqo?oN1m zzUh<5e4b*g1&sA`#@fVKw2u*E4i3cw$suH`o!D`C!thlE*5Ze4ZSD!+`XsRtc0zEK z9Qt~HLgkmm&mRESO{M3^kZ)g}!2f|??}_&7Utd+$`YU3lAH&E0 zH$c4P@6d1V>(i&$=f%GMm}mJ3V#pH?8~Qsdw8SxmxyX+#RPfdGeBNUp{xy76v7cjc z*)|Efkq6!jE=vLt{Q|l7CFp}C_>@)H&!=jG?32ox8F1Hs#eiF%v-%y0aji8!3N4nD zhxNS?d~5b&Xdl}Kzn-9uZHis+n-Z$qZfol_*Ie44!JoThYg|8?>NVO!)2_9_WK}q< zcai_DLI+rVo9h^Nl_}>}Y3(U|Op%qbaH^oK;NGy+pDe@1HGof1oV{ph1gy}}r|oTD z$3KyNN2AR5a9p3#%pSJc??h=JSp}bzxsCneq90w>B7Jv4i@(O_65mMROd&2f^A$r6 z${ylu`xPo4CFT7*1R^VlwT#L8am)>LAhNFye2V+26FLhVT%NRrY@xNh^i^Z{sBP=HMl{wWD0x49$w-u_eik_5F4UoKPYP=($vibwPYc)SSNR@Tey?lMlA)lM&z~(xf;Q5tJH{7 zOMkObvl3#DxN#lzm(9SgP3b+g%@}`%^;iZS1ie zWIk2Yz#m*2*Si9u6Kuvf;NSB_yOK4*^R(NaYz{>9=KO@dzF%r$4V%1|HJGwCs$Yn7 z=oj(~nH4;9{yYOuVtNZtn>lZRR~L-xb#C?q8TUS0L2u<3>>Hkf-_&@1LpKY5oQ{}u zK^GiFR|&mkjGlXtL!*8BX@8(%u|K+l&%NS@?+pekj>ZjsiVg27RE<>x|kdXcGAoLuFz-gOU2#@ey37_{NF zMRImM+f|Bvmc4nwe~szWG?=tZ#RJJOyy8xX91Vf_&8$o!95ha!>(+l;NA-^ zi^`*VQc@5ke`QR0?e|3K1g&=l`@hpAi&33jG3$GB*2)|i; ztnAym4n35gKIh^`_A_~RKIdvInXq^_Q)WZZtOviYIvUr#9ch^jms9gVK3?k8GiCM+ zYd%V5C!}d%*^x3sCfdXJx~WCRSxT`V@LyUl*_})O=sDe)o|Di-<|na}=s0R@mAp5Y za(Sc{TVF%G4xJ=^R%?sx+zXv7S@gb_)h4+Mr6ci$pr7J%p|$XCxWu3IuOmk``Lv!Q z=d0H1TF>D6(m=9c;INXXf@pfZV!zL7+heY^^mWrl>VVuOGCpg~_-5@Ja)$1EC#$WS z_Oh>RcRWe*LAc#@udP4@!BXheCC~u!_|Z|HEr-8Gbzp|d9|+M7JuUd{M1 z#`iwznd&WGI92Ix_`v8Mhtz^$YfWC-8F%{?-eP0dXV_T!Ez9ip z|L53Z_XK}0eo>~MBKhOgTI@JF$LBWr5WlE7pkz|^eL!9s8<1C1#%HCb?ajpDpeaWm1qUx{FdD{(k9T~;*3U8c4?K{i_E;b92Zhl; z6RGjz4jk>ZtPR|sqx3}&di!cyj@tLOfuEASNHAF)#D6J`lf&=R>C;>J?-_m24Q}aU z`hU_VhJNW`zqbc}vpQw_a?er^bY(V>i%_Bo8VBNbBBuePcZhYz~}}>H!!vU zV|cR&;Y?!ic*B#-4vSV8}Z4jESR0a5@~;z39B2wI)tG&{yO_np4=9lDNJ& zVZ<#0Z}l$?c(P{Qf(v@zIwv*2KJjO<<*!k5uY5e-d>pa2v>nL>GLEvhx@WKqyV(#4 z>qWi}{UCGCw7WgYthUb*2e`(=^MBzUN+iPi%*m|6SGbtfjJ^Lc6hk zN{sdENko*d*#kdWbX$Bnm^|Xc7wVVzksPYTpZF;agJhEAgk2iB0oElm(_n*I*DU`fLA z#|uKCf68i`VA?QgUnhJMNyi8WUNPiC;NNDOI8v7(I)U~T%_4U~FKo`ya(~6q{_qZq zX0e}SE!||m9;dCus_C}XC$g4)Cj*axCh&%@n%AZAzEILUV#|A8&T6y#EWx`u9Mubg z_=B~E3`(8$(fnX?ZwdR>Zt@n;%<=Fb z|D!Zb#^xhJ_)|U96UAyf^hUw`-*ZRg=twk>JQ4>My7_e2uwNdBK!pz-Q&N;-0X$PUn;#z8U`$ab-^n$HMeq8K( zBl=kEfUL9FM9U6HEXnsuPO41(9*5onJ8vs7@6Ub<&GLF1(8Zbk$?3ni|6ytx$v2^& zlRddn?m$TBF?ZjHuc3TTXcdQ6erVz`*R9@^1w34;kwz2FIw2~U) zDIuAM>qu7Hzi_s1*e<&Fu6LyNKw>3@1G>T$Pw0)Q3>-=V$?_xk3r+ZK?g1laMTbbv zYi~(dUot>U5H!XY_#$&T;jA`+FSN4g)5AOHqDzWQ8jW|pPsP-sEZ+4r`U)LWHTYWK zp7c4gvb&*{0$v=bJ6 znxM~Gd_8w+1hyaB<16LdTUhd_rcFXOUqFwZDPga$Qjd~SIgPjQ?gG+Jv((S zUA=+i335keN8)-}KWC>`$osgXeFN9#^cbJFO;xqi4dxi+?qWT%7L0M4F`5};fH4Nv z46shJ$5u9Yficc7#vP1t2V;n?kr=koX6$hmf60KaV&l(w+OM%0=fw}QUJ@Jkto0|o zwZ!Xd;(E{I0VDrU9j==g#jkv5Bk$k6k(?ZDSJSqewyR6~^asp1MApdQm!{cYcXD6?gLX05 zcXRuKidTKX4$fvuoMh18)qAtrc++R;2pyW(xAu{rU|xcgtB#?wKkns@u36#ZuV&y zQ}nBa>z2E-+PDjL6t2bevv3Xh{K-b-F%;pxwY3@aV-1dkBl`Mqoc&-U?irz91@xOa z4*d>uA7!0MKkP+b9eaIwQS?^=y<+Guf${1cS#4jNNRHx%+}k@*+b}{$_MTSv_l@Az z*iX(k710~}xC<(3;O369AGH=)_7SI1-&aT7irA8Zf}q;NsdE#%_mAWbh6*Erq!#23 zfiQ9%H)V3;s=zwJMG~Qy^^z>Xu?_cKi z1(JSbFYd1VQGWJE0&r88dp&P;-&;q^;M@(4aFW{q4 z=Ryq4o@6TRFVvF@H|#;$UkKgEILJVcIVaXE$(re@UUSbAN z*Gr99qfsL^LKF7%LhcN{EhVl8wvSv(-a3On1}39J!Cmc(Hl_4!oL81S+R->QTnY45 zphI8MhfGliRk_Nu^F*x`^QP(KrS8ppjiG}+%Glz2DBlnreiZ$C0blZHK6ewBkLJx# zsS#I-_pNc`BGwo? zkz@KuBWn!pQp{U)tlSlG8W|cuhMJL~x~&4!^MYmbs*5w$nVR`>{F?n{UBD>Z;TbCr z+RR#zw^DW1i^FD~c9m!O0afFc-h*!ZL>ji_SgpNf@f$uIHaXy0GmjyA!Pt3n(>sc^ zM_(9~XT8d>btCX<;G1ETkmC$@!3Z7?u_m;*-)=~Kf zAH!C{w?ClI1N3>Lm;C|cwAz$YuWf=+f6(h!`Ld(s@G$rpGJ}6HH-o0^*8_7@E>HEZ zK<4nlFZikJTN_t(j3fPLXBqqFDjzc;wzrQOEjO}>4ci+w#*v)%arPT4!;;fZuV>Dz z%vf@C-zo5$;#FcvYMf18HEI#OmqRmC=UMdN?DNEcOq~x-H_V0#~Sjcq&7`jv9{|4K*J6Rq1T_=?r|ZH=16@&}7qg!3!aH zp&DKoRTHrmnGKLzW?c$OOc{f=#{;8jTb&DSiR&5-{%Hq~ie~C<7~d;7{4>nqC~`Ap zm1in*R=CHqvzE$vRYj+SsYCmu_QK>}=(GfR+e<%-PA+Fw+r_?M@@#~2;#)iPso*5K zC(XB0V^Z_Cg!x>^h3^6heXq~biPXJJBF>R972i#bO<_;#jwpN=?VwJ>>tp69K5P+e;i(+;YmvOepI|%?|ne~aqtTLVUj^F6^l%b z6+o}`(5o4Gt%qJJu7Y0Hop@riS@TfOJMSMyY`|jHWdM3DhF(XY*NFZfnlzGYDYs1? zIfo2yv=>lISm!f&iT(ehepN$a@c-MBmdg6~n0!Rr!mLmOU_OX*PIm0k3YxD`q_5c@=jto{RA;e64z1vHg}mpLtjJm#*i01$Aa!%*Kiu#k3@_ZTI8VT55|2-YCq?5>0FKE5=P*`lVNOY+=7SB<4%DJGY7`{ z;{(5wh6AsT)DH-Z0&qG4j3dA}tzek7XC3f+>M(m0_~^m3{__rMBm0|qLGnGl%%_w) zZ-VB0u-S|8{hGtbc?!M!mPeK4W$* zbsC)CvF;)|Yu0sKZL_JpuM^OHHGKAJfSgPup%~#{pg?4Y#ZR#mxf4^rtRb)1 zn*%9Cuf!N#)F%CO-lOoa$xx*VcqQ`Uz6(9*RVe(d|oE=q83v4dJ1=+ zRAT3)roixTWFM3m<+o|L9-Cd&Xi7Y7^{dJcB#X+U)LT#kj~^;Nauxnl32}8vc~~zA zB>3Unfi^j|5&!8b&Y7;#9Ps&+&=SqAbe-!r=`s4)d;A>sV3_)YqKt96!v5q5=1^9e z(5sk(HBJ#{a5oaCJPzJt;Z}EMz$KTw3b^-~=ZZv@j&=m7wPj!5m(VYq5*icpU^`BP zgUJ)M=^TjR_fymV?^@d$S=$Vn$r&9PyU3iAm$N^if799yNNi-b`>)XGerWV@Xmr0e z(WH?>(J1HFTHEy*Fh7VM_d}0wfYVqQJ^!M$@xGJtzy7Op&-vv?n|thk|M$<`mHgA^ zp2I7!f9%{{9UpD(J@L_VuS7OD4?l88s)`}Tkqr&mkULXFHY`4x!kv~PBi`8PxpyK5 z$B_dMa!{${z`7T6R;ArW^vU29jJ*=iKO^GffUhtnS7&$L;z%Txn=B2Nz@h55Bm?DPfGb7$a zCwS3kuAulcM!lMxHC5*Zh&@x#I~D2B^ZI*rIq%N7$Mz~>$}(zL4ZS8lgBEIh*}zTq zHwvRsVm!HLqhFU=54np{=^o_+b3bei_s!)+)qQhC0r7#+aW8lV9i{%{E$)c^cu7Kk z9G}FXyTG@F8h@0YyXapxW4IYZ?$f-De5uqY$QYr#=43VcJhZh#cO^Inm+xtHaVB5R z7B!%oJR!yAH+(Pvid zL3ra?biy(FY}LO?o1yUZzL>_dk9^xL=*V3rZq@Lw3wnn9alMCFw(7G6ZtA|=+Nj>O zhI&1BzkU#%|6tNHcue`Pg4>KxW1H5tLiCl1+ndmE88lp09@m#a!y;`)>_KQa8yemN z4WEUE&q6~BKY>yGg4VVG81JKziN{zPDLkN2CLZ3SqiOUeG@1d8?uJI+Rx|<+)rb3q zpwV}s(Q?OJMWch9F%(+6qFUQkGM7Jr9y#GZogO`{e>y#4&;9B22>s2UPLIa#|LOFo zK6nXw%o|^Zx{x96O&K~jB12gr@BY#B=!8Es_(N)p>fw(<&M%8UBYp;Fj(2_={x}Ek ziVi8{?hpCx`m6EjA$L9JAj_S#oQXzWI+2$H$cuuBytow}8Zh!gIp5XVwwZpT=@^ z`%uJM(Uj7cf=lQ-X?vl3NBWl2SMK}Z`lH&II?JMLP6)ed;oIs@rX~}^p%bO%#G*%iUm#iOquveuX5D8a^ziz5qt04a*2?Ek*B#Vj zm!RPe%jWeo8a7YPb`N5M%(!F7$NGu??(-94#mzCV^eivLU$AJGvnPXg*L&K(0PS{h z=G06bC;kJR(IHD2R4 zGR9k+w$Cay7^NrE{8r97`1>Nd&zB%xH0iCs-`~!pz4tNXlRc~IeDak(&Ts`e=i*Pp zbSL0p;$D9rdq3D#wT8%RSFRD4G=$^&i4tQUQ~aZ!_xbU$vJ|i8DIbd(i(Oiq$VoLZ zt}D*Is+;}kK5`N!pTr}qd6VP;*w5=v%SrX48EZ@K*-vZ;-Mn&chC7HVxZh}GZFByD zJ4O0~$-+cfuV*i*(ATd&XwqJ6xA5@^e=ym#D6Chp1}AET)=~=reh+JH|6#2YFx1>!DXwDxseUNIuT+WzJ!r&n37E zAsW}Snk8N{Xr%1lS2NZ+W3T3f@Rn)c7XWt?aGxvh&~NJ(`_3H=t31m;N3KQg$25GF ztWfMLTHCEM7qed>{VM5qyo7rbBPpdDMW;6INRN4|jXRm}Ni2C7MMro+=qLAEG_14a zmj2t){rgld#yGQl(3FQv7#^idj5`*K0{)7wu;nYFo40Ch-xnPM-CfYVk+t*E-qP)b z0e`YH%$laSBa`*8bo+0cg2_)5MD@S*$MwIBr=Z6qXdt?t_^?K6d&XSP5j?iaT^%CZ z;DKy6u(xW+0rsUjty4$ZgGXh`Y>%9ZkJ`ohHL!j**3ate-K@3E5d4XEGy5K}<}7@N zKJGe9Y`v~S?^?^+cRI{E9Ps_JsY5g7AIg}&?2~xV8)7djqo!>`hdvpQdn?j&&fyN( zv2=7L`bOwj?*>jZY}`-7nGTDNPxdz@Kf~u=j&|rz!mpVytE;rOC&tndx*RVN{blgP z!HC>HCUT^OdhQ#aj>q9U)n4d`T|1a!e-0V6q3^7|Iot

B>wxcCIt|Px|%-lVzo0 z-IkAE42`V5u0I>U??)Ned++#tKgzh(pMU?_^{h>e(A-n||JZx?;JB(Q;kR3|jUSS1 zPZAqU5|A+l%iYam!oy4^NQUr`B?J1nWZ-^xu>1n7haYvzjv;q~Y-7MWr;Tq-ncz?Z z2HV7~K5atrQfBHS=3z*?yB&J!l4NGSFUU`@y4$d;V#_2G-{0E%obJ;t*<`*+)mL@v z{y_=JXRp2XdhNCMT6-;Ix6kB&G{%kYwZpqx*#G==Zn$S;90LBePmr}cYHtMl9|SA$eEeRnPZ% zlrv-_??m98t~Iya()!=b?+u&o41CMu7vCYHaeRY}uEXW=x(C^6w;VN)=Dkp7iC;81 zD%#se6ZmdXaPG~hizm9g#@^PmFK+J?IVoRnvZb`%N9#EMcTsaG#NKI}_5OTb;BI+a zv1z{9YnCA^4akbO*Thq{jyc3L>}UDB+0U?}QfJ1J@D%9T)JAo3Z>cG!zuPb z*GEIIiKS*QW_=#Ly}dhjYsoee1If089Q>a&d&&v*D6lR*(Z~LTJxUZ*o;;fGawe2> zzqXyO-JaPR&F;c(FJ*0GBF4TgJxHICqHjGWw(RE8Jhud=jQ_3JaJkd-_6%U?I%sui zbs03!dj@;=>z0*0*%4RQ)@0PREmGHKN^&*%yC1pd%YncYZAX_;>vgBlz=d}y@OFaJ z5^x%=aT=Xm0!}-NIE{`aZw$P;QS70&|1jox#w-Abi@`zL9dOtQ4oAS@QgFBw9O}7` zTLKOSmiYdo!2$dD*REZ|SK#h6%X=(cJEuM;@*7MJ?8h#*a#kiLQ*Rj=YtJ?gtq)n* z^^rLD1_x9%`eN=5-2LH(z^gB~yv0Y%KJUcIn?(M)82@hL7A?Q4xC^emc#HTuvd@p7 zQ<8^x2ER6iJcQtBSn$AS*Ydz!hdta!ZMZJ*DsM6L9w9z^DD9|_jjzdAt9AzSF}@8y z8(c)5F6Qo8azQhhuo?Bv}TXGsw<b z%?%~K`oBNXA-GA7YpLGNib(mJo@JN&=7M+2Bjhb26Pv^0H@I_gp7rKBKC?8}i3oAk zjH6EU5odBrbNw>kn|+zTzuXOdkO6P(_)E=RbDVL_{QGgnB^lR23{qk%9SQn>L63Qf zl2C*N#>{B zmR_lK#8p4=@a7KIgWV3^pBCtE+)WSNL2{i7#G2hc%CS0m-^}|sY(y+KsN-rK+`-m_ z$@{l;k2yEyJR$bbR|2nIMBG+-X@1HXl0K=rPhF5V4Ikv}>}}ojsYQn`w@Q6#XXT0K z<@?mE7J3{5U*g-#6nwz{7z{gVKX$9H#mI#8U6mf~PY^5ogjzt-!&TPwP&AsoJ;9r0 zC#8O27t{yeFmK=o+3Vn+i5s)V33pKIC$tASqt;KJJ(}KB(pxeV+MJbKeK|{aB4=^z zV;B3311`k&sL@y(>#Z?sllpv;$H?Dx%&i`{HNXu4H@Ba&c*@tGswJAu&vxh`l~MM5 z_Fj4`EzgoG30$v#RBBw{)du8pJTMx7aU9$>xwy$4__=VCeS(@Bu^2t5X8U1)M+`jB zA+4+EM0OB7_Jc<+c<6qb;PE7QK%W>ik3C5*AK_i4Ob>9r5}7XI*UOz#-pze(qhD>_ z&JJP+X2ZY1PQxeUDmG5~A|D^E-)Gk!AAR?2d+7C9aMULqsWSdQiH`5?cKhUUPSE^+ zrE#dkXJ0(H9{%rnzY`{KlNW{s% zqfZ0Any&ddt+D@B?9ydcT-RY`e6jXr=R^PgYR5zWu6sRpq7$h!cTdnR7nk~+?91#I z?A*>fv7b~ec;$Sl3)*aY8|(&F54e5`y>HK8r`V5v;SlHd`n@ABn~=k1 z_M>G96aVjEKeC{u&{g2l>oImaG!WP!<{F2c5SX3l&L_aV4Y;2G_XFTcZ%BHQCm#at zlfb@`{V?W~mPcqNzGSE2_EvTJ}ZEWUO(U7#<$SN?cr_a{eZsBm+zPLPl^2v z>zChelbmeiEJ*!B*3!Ql8AIRcRXVQuA^gc0G6rv&`MqK6AJ1HqKXYd4*diUCm?nL{ zWIx$@>by7FPn{7Me-b@zV-2rE*UO=+BQVc1db4+o>A$@jy_tpH>_%^Pqc_Z-1fTu} z^d^ShG^01yB7fI2{(slD%G5z4DXQtNkx5UXaQ=j`=)1PG4fP?l}Id zp%>b7zFugO6DE3+yDrc_r%!_xI$Ze|W9^(Ld-ue_!X{WWNYm zR@Q34wsmH7oPqTu<;<}hUtZo;PK=O4j?EK0Cup zXoj503w{ ztib!4`K5WsjxzI7TjZ;kGxiC_j%Dm8+_QVO=}BbxN&J>i3+Q=w-qeN6_O@_L@Is4atf% z+=d?@ zv3I;>(|v?B-7P!&X(X;@Ek|}{yRw5W$$r|9CcY8o4V1%KC4b|Go^Hd-;{0L*YgyAX zg2!}t;QDk8DXI0v_y|5{BB$#ewR0Z-6ujqvxA6aR_C}2Z-~NJcpXA#o`Buv>I5b$u zFZ<)B$N?RT8)^P~>Cq=W1%teS6CR1JIo`^-vI~8~-)-*3_i=sILGE1pLUGlR->>E| zkNB#oICom`Uvfw#->vJ0regosaEgAPp|rBYIi>rLztS{xAYx^&uz3^NlG63}=p(N8 zwO)K>+25Ljb@WmHT7&dazaO5j4L-B&*^BWt?lb;x1M(U>gTHYmaQEdc(d;Mjg!+k- zQXkkQS^X3|XW0og4m_pjw3b!Q47{7wI;GDklXNfmwhh3Rv&sbKyXc$77ySe|9X+f0 zpCu=8Le;3(Wsj6xzfSz7mT}be#9vs(e%VP(F_&`RSF>M+bxuufv)sQlBR8{w*#JGO zUAlg4*7*OD=Pt!z52HTo8xpI;PwbB{E|QP0qUU*YH|J#fz&G>%4furDc(fQ4eV4r+ zeV+)tI{p(M^J2?3eVf@UC8i4)mCA8+L93Sif4 zHT&tuoW0Kvj2=H*^iOIyqGJnKdw^bc?d-|xh^f^Fs**H7_-e1z$61Pwa%PsfKIYC+ zYOXxWT;wh6zL@-lCb{PZx4jAOs?mog;voyM`!};E9uDyyD9W27?B`tRKd`WI&Qjoe z{h^xz(km_)YlNN?suys!SfEVrg*uZ9ifs_w_A+;Sin~wFYe88jb5M7C?ZQr5<9aR_ zdtDgy=uS_B7y|m)9&%&0z?hCK%6bLH0v}&HME-8TPQnAhp}v#a<=FA<=-$9=7YFS4 z!P!=JUD#0vQ)%^rC3mgEVU^Zp`r!2#oci1GH@A1{c%ZH$zK(l9#-mEX25>3rLTNmA z817v;I5FOC+a7YqBU{r`cH0YeiB%)PAh+}l=|d9q=FZt|JvRO81Hlflvz|Pc<}!U| zin#LqcE?!-Zq4w1?ZLFB+b~?|3DrhVs8uC6X3b6Ry!2}^Yc+BjMMqG^}t!k+UJ7DV%EGN5NJlW8WvX2%Z#BOF!YkS&6x9aKaPz?2G23B zd@f|VnLOY(8=33kK%k5_T28Q2o}9Gjs5{VQ-r#NTcI5>7w2*tDxya8#E29p!N<3n? zocxCV5;xvx`b%7l&W=Y11N^*bo4K3bJC}d2AjLYx z-AMhrdD!_Ma-R0XEYb#;3!@1&H<5uCC76d>qeYlP8&4m$x0qW00&irQLJoOHcgtC1 z>M(J{o5@9eJ8ZYTosX{)IMRE2AMz|QE0O1=$n(CysOB}u*r}>2(qgAJEcMHCaSXj4 zTzQr;dy&hf%q@8kjAlA(QmPIlV7rWjET6+E|xXw-lt8%$Vd-Wc_K*_)}XG z>XegFr)u&F+>-oTY^LnP@Mk~#nJD`({-4m4xXo+rMn_M-3(UpvXK^T{HrT{91EU66 zQ_((6XKtb4VHpPvpJQFy87n<{_E~n@Oe?`X*O+GTc=PFKwCxT26`>7(Ygp`~%_!ju z?zJeuilKdWnzeO1v!yYPm2pf^fTQYg|t9Uf~t1f!VM$+n3I38~E^%>aQp7tG+OHpXjvgqkJzpUguY{kCLY)czvHSU!?5p&kB9? z*h|ynGOx~;esSRSua4Em_$0r_zVE|J;B1$9>r4-FV62^%x9Mv+t(iWLbAF86F|L{4 z8yHuVyN{@U{@c31N~yC{!i9YRTzU*H{Wqkowx6LF8WuAA2zLQ~So`Unp>5Mmz@grG z2@bWqfg?6T)-)$=XXn@nb((Y9z4&bO+|)hgrp>2sX5OL4B5TDbmfm>$n_iy>TVOWH z`rLhSHGKaOw75PMU-jFn^UL=|zLoWSRE3iTdA#@e&MnDX#NfAL%DFB;hzJziiA zxNF(Y&$Zz7v#_0gxFw;!aOk00>-HoBuSblmBw1hVUZ0#w?ZY|M#H`?HyMfVv><5jL zk6nKGWP0Z4UXM5I+bDF=y&i9crk;$;_d=6;e$pe7+}m{9k0 zSM*~$KjClxjn(IMH1YrPoyfww)MP&5>bc=1dpNw@&O9Oyy0@-x9@ZH9HS6>C*S7+J zmDjU(cTE0~-m^7*n6KW@wErUJ+v~Ic^8Kay`^UDuhb?+6ZuV69Uhourp;P~xqXKuG z2@Ka^p~ay9Z`9Z~dKj~aZ{H7FZ4Wr{Rg3UV>3KNkN7y2vF~2t~^69Y9;}GK)1*-J; zCNuu@`^eSUPtLFjPk z-zI!0{gJDytCQDPk4~=T{$LJ!%p&A*5qbn2iaZm(zgT5#a+B*9h;0$O^lR{XGwi7E zw`A0ilQ(t=pI}ZGZ!rpv>IdHox(mv)QTG71!__BaU#qlztp_JRyz{#=>U!$9VwVR+ z{$$K*_#yFJso}bp`Q<$Q5%Zs%kWlv&((0p7UiZYDQavSk?C@WHC_T8)o73of^83_j zoyz*&rw{8+_TwXg(XnIP6&wRDGWPi(KFZg{N0}e^TzGyh_lGjqJHS6QFQG0;@h&Hq zS4(&g@M54kxif5Mzq=={ju4Z)G;~4%FDbprUgw@~YnXha6KVB&RC>2gfu^IO=Xqr` zy~OovM3;6VUvD9wBFjsl>As;Gx4ab?Q^wy1xVt`v{l6@gQZHErHS;?Lr0+PlKUEWvu^eYLzDBX$LKdIH?W_Ht=mZ+)D(18 zelGFxUgYiw^IppOFSQHmBl6I<(>YYwpbf%QyXnto|!4O(W2!->p3+}*hQKHxlp-TVP@ zu>MSiT-7I4$M`FB~@3kAG?01<6Oi|J&d%zTDZy`tdhorzUR& ze|*%_N2|spIX|ow{)vz21ka0!pXmh048vg<{?iQp`{W%8{16$HaRQ@q?Pd782zV>8}5QWBjGR_-Ip`|<%|pS|4#icw~e9R!5Hct zjH&s#dMh~QICl?sAF2LYOv|y@yoKm7bgYe7S-U%~YQrhjYUR~Z>`|IGpS!R-`{D_; zuPX%|^Xe${llMs{)7ZzpxcbP>sC)2>hx2;0zKfQ5gr1)74_()e?zY-AP4PYC9&ei0`Z`)V4J@yG5xtFCe#WY(so?s9o~euo=_`pY1~~~Rhz7= zk$oloz#a1Pkb&ZUwjxAb7O1F828ZUA?Ai1cN0ug^QS{g$4W@iWr)h~xP# zyCsoOcV*IQWnobJ1_Qzi&YRoK8Z<8!uWB4RLJs?pw1dx)QIFYj<`=$kQeRZ8O9M#g6F-s#+UuqV594Lmp>+6i@4I<0<+-n|u^T(&=#XkhU7 z__>lhi7m*Ihd=U&EKE9!9F@-y=&_?qn~#asHFql zrKXtU#xS1Z?qh{TJ}q?gY98@#XOV?+dTg$fS~sRI!e(UgE&3w(?Tf5sukimC`~`m6 z+zEv7k-F+KD%?`gwcjKUZcob2ZiYsCkpC<4rP^;^UasJCHaf+EyAB!&XSk6414E?C-FUEj~e&A28Yw> zh@I_sca>6uT5=WK$ z%F8($jtxG#O=77Ng1y^5!CqX>9xS*81vhBEmpw>$b$@W|Fggoeh@Tc~vpmFiL7uksW2GCh~<(SK{sCBMi?KC8*^nPrjB zN_<&jw7O<(zK{Gfa({`PNseWwY8=XwZ<-I2t7&JHgPckp9rKYhR-<#qWUdEUkEtVr zUyWT_a58ZBPhK2FzSD%*zsxW(-Wt9;^NqktiQ#`frt$RjlmFL0j}2ChZwUn6_&@(2 zVvW;GocBq|k0U23c%F$_F2klCPq}%_$nb3LRf$zMgG0mZtXJSloL6EKzpJi_JxAPA z<_Hmo2*S4qq1i0P%*w}CK_gAO!Ia(BXT^zwPH2``uF&o$HGywQ{9SrzOJ2a+a^6CA zWL-8mby2Uji8wtxI1LWj-n^qaa{KA+DY?cbFFW65W%HbK>~LD;Q+e_hYm)+B=huBL z@T$O;*lM4^o)Y+6;)@S`aptwAFV5U8-^u)v2O!_cUQK+t?c%W5Io|zn7toIHAu%t0 zZ}_;nszGAOZ;^Lc6R@^Pj7HY~_X3-kvgG5Y>jJNBuQUD9iH~P(aP9)8#Qw*VOEW=oY0C01Hk?mR$@_nNP4w*ffBNIy*OEW>Aak$& z^_i(2KSZX5&%&4g3O=F-UFk)5{Xk?D)Eb z$%E`J=$ww=nKO5f1@1+}e12E`#I%*j>XV1?<3#5V>710O>T^}aoRt6Vno)l*{K5ad zM)H78A12ntx7!aR7vx3Kn-l$?mh8YDSlB6lex<;bG1Ja7yq9yl_4_}**8clHO1!@x z{yw~yUT@Kn>&>#Zr(a=g{Px4$s{7qyvPwt!e#RJ6l zF6AyDbF|wXB^vn1Ai!PFv%8E*N`V7?*HYwLapgcS32kC?xNF z4U8G^OV%qe+QGx4W2Y}xFF0I6tbS1Jaf4J)U#rVeTq6gdKG_l~y|}=rRJV z(z@u?>fR9ovwso$_>1^$z-neab2i{VF}|7Kb^Of3r8Iu5pDv8I+g=R0$!7w+j={`>c&8|-miww#r?S-SH6GIaSX25@55i}@uiPaw>R=+6cXgDUeIkqCUS=~&1i#Ly9N0BET+w`x& zj0u#*HYFY^cd<`1H*dVpF|K^Bci@l2{v|MSFxlHHG5c?un7!DF@#u3kKhKcoKY*XHZ!WPgH)emy z;=o-$BxZjIy{_@a?1?v?ejHo!xD!|BP0Xlc%q4pF06OljpP1o!I%e1zs2VS}<4>Kl zo;hzEVNO~1M&{gzKJ2PB>yvl6+~dr7yr}nMw4Ql$7GuLL`bXkEQzxl9hN%~kB&{~a0nY)3B$7k0%K7$LCv9zTI7Rqv^83U(eASYUaVf= znS1(x|DL{o5i%k1@>|XNn!%$%?)N=eJjxm87Gjd`gzdI>@^LNCCU!E`A3NzkhAx;` zeKoO{sSOYKV94BN3^n)ANzo;VM+`8x$g^9s09`uoEV60rMtxBSO)OjFx>Em2bTjfS zzL)Uzoe=LoZAY!IORJj-uKh%wH`>wcnAU_k>cH=sd{Le~K9Db0%*piN2>lm@*e~q7 z!u||G@4G#EmF6$Eb?;bq+a;FdAz>@g#f_YKci~TtWJ7|`yeVO9hvC`yvKYxs;_xzV zDR%5K&YbAMFus-eJwcBZ`5~@zdLMdyOG56%Sugv5$Q}G&duzemQ}VmMgZI``&Hi{( zf_JmX+fA7~^~<4$dj}#q`aW^__q*ci@ra4@XdPYEIJAZsywsr(9lgc3KOVtf*cq6h z@A3sFxywJAva`E5YtBrk)y8mM$-SoFBr!LE$M5>S-NPv^IMkW=JLA`m$=mwg4SINb z+RT^+?&ynh{2uZ5x3D=fztF~SUwgoVwe(+>Cgy~!3q3_HSL>M9l%k!DyLP6hfnc^-lFlN|ZWY){Qfx8#U_q)2ddnGe^Zp|5?p?*07$Bs-_a} zoucEtDc7emwu850(6HUW=-)s*d;{_D!?Gtr!)u9$Gp>09HsogbE@L}|Pt04-7;<4w zZ>XRf-^Td%HTiZ!-p;<8=bacn+6;@kVR{t*)?LHf@ZAqfPZ+lpBmt+pZvk97K+g-lv`*zkU%NYBAW)_ACwH`1Z9BL_BzKRws>S?Md*7vHlqAFPTzJP?UHWLc3Y<~h88w3# zx6rkRoOHQg5qU%IzPgV4km>S%jac^p>(=?$6B4SiK<;-auZnAi9tWYv+t6bj^q5QB zTd$jQUS&>z;I)J}cqKi=W(qyl1x7=YDst!puL>>~frrGyg~#hS2e*58%uA??Q{;u` ze0X5@=I)8BgT%w96A#xoyh6PU;!=-=seN%Gt#(E|+6BPII!`tZ=@Rt|Vd&tWX?tvb&QtG=2>ae*q7I``kSYpQ(5l27CIr}4g&fnt8 zq^hiKB-nmcV&bC1A@}}j5phkaZ>#MdbUTLJtiHN&?RD&BVzb3>IEvk_^dBx`o_E0G z5O_*{z$JD8zk?h}E3R%y5X(dsu5I0%eLdHwt_9DnEd_0(PBHhXD*Q#ahUuDxjk`}( zRgu1o6}e@fQO-AE5>mRyeW$>oq7j)%D$ z`78WxljC!acqd(g@vakX z+hB`te=ZorNL-&#@TkV;U)1Tsz@L$P?vchzp0S*xF9&})TQ9o!z_wAu#YYhrA4OdJ zhk;SYv8i_-BrYD3Jz=4Vi32~ozB{g}!zp!>l~;?WH13wWt{vE(cjF25ZWp=5k-S0% zl1q?{qiOsGV&eO3@^2$2w9g1W=(ptQh-_5p`?|R3m+OnOjx{I9 z78%3d9&~w7ortz^##nWK#a+$`>dl|w&TkL#153uZJ}~o~z7t<=AwJ`J=3qYRsaM5b zCib1<9zf#YEh0a2O&t6V)_%aI|ARv;tCcu7?{swkDAxWIcK1r`?#t1F5}4Fii7ofZ z{_>FN6(GF-44X8mihbP8bzK497Up`}4tBDCfBXE&;Gp;Pxq*qz{C~rW3fb|G7afxE za}!qE!J5P>Z){T_$k^an#`6BewTCixfO@5T+pOQ7bV;4XsxKYeg9779V6?Y24!zAer>L3kGE9F)^uGTd^iqT7V%Y@%-nkXUOS=c z)6_LXw=Sv*4!aZZ*n8L-9Daoip)XpdT`OO>dIr>za_>ke(%@N`;_qZ78Bc! z&W~mz_(R=w85O~=SU$ILH+Y>jw!PP-iOiqD4)}49bxEuv30>}IUBbg<(8a6CL9Dg8 z8@uh*-3$0#GzZTT{4e}L-XFsr5|HXXOPiu zE1?Rl#8q@`o4Cr=!(-bf_FGqsZI{C|wRZLz+)wY1*x8PVqvnOusy8gLZQ_TV&m^|p z0iFD{CIn7pY+G=Z_})W76C2(R+=sxGxK~o@Rb=wL*+j-siSCR_SW-Wwj+iZVh~TXk z+X)H3@XrQhEb%)TyNtDN$_=j-61AmPh`=b-3K9B#itf*1@2qk6mA&kLLg)K~<3^+{ zF+yGUsE0WvF6+f(&lStAVf_++l2~?)?5k&sW4rZED&yG8geGcu99!a_{`JT_PZZ!)#J2ojl0JZM_h)k zHMF=j>B&*i{J}qH4qhnzXy$~lk(G4{ZiBxPW4s*yWG(!?P2Q;g8Q`53{*o&xat7Z{ z3w{sb14?h1@$BPCn|Jb#!4aOuKHv?N;8BBJSSw?RSJpGOr{ax&Ff!`z1Jx#$I)!iL zZNJRZ4&BGH29fCo%g!E;#8ty`@)CMoIxzQOua)iVOi;IpS`J~U4{Yj@#hCX-U9aGo zZPS72)eU$C*aBDZCm-NHTsOc^+v2)h+oK}#vU{9omfYh#;#D6vXcO&+5!K|Ae{&cXH+Axlql}hWHavkI} zv4;vZw54&z7Ge z3lVt>b5(Sk=%2{M{{d}($lMptn)|Tyu98|8;*ZTnK04fdnn7^t0H@jDG)NvmvGyD| z#hE*v&gj|}x+YLGnr&s@?80BnPbjGF?=6Ie^yy#?{;dcJ-562qTJ(+$2X0!Z^T-t+nLCzal}k+tPiZ*&E4uVf+M+3!ed!u z2kW3`U3PgVxuTsJwVZR9)&X>LWp_0DId_{M+9~zZ8tKzFImvv;)x_95=9_~*@hSB+ zX64A0z!r$V)JYGDu3&tZmMV{{Ce0P)_oTKS9^~DQ{x(LJ$6Y8Zzn)f<6HOJ+R(6tU4$=(8f{dW*M zJ<&zY&>+1|;m^u0sbM;~nco}sAa6fJS5CtpEzj)nV*6*K+q1bF4q}6ZNAu$F$c9Ip zE%)c8<`BMaTjNk9O}-L$q|tC%MLMO1smVbTx$*cVHd%7;WbGHQ|HvM@k z4MWbqcSKU^KvzLsL67T|uQU!(XFpq?qBl_{rR$tu#kmdsb4HmQllA`=J16Up09$x$ z&TsT|bA89<%y$JgY=rg68x{xEE3SBBzh3;W1jPC;EI^-j>~7i!Hpe0obt zt#iyd9D2u2P)q*~R~Amf%aijG>UArlUY}o3zvlO${f-*4dE+*(zm zZ$F_nTfg{V_|5I`_rTTP`q0DUD()WJ6*a!O3#^>ye*pK7%=6!v=Q|w424xY$e z`$Bl~LW`-HP>FkKKGSP(FYA^w`O2DT_NRqDwI#&cg1Um%*_G&9y%lY1uaUfcIhRi^ z()!EbtgOqZpLR-ZVDk0_PMW(Q^yTbu#%bCGu`e2*5c&VMqppJATi~~b)7CiDWJzuQ zJ~dFsy+$gpD&egae!9M=AI@E&lv>%DS7*ro_k7RN8i)zPx5~MHYNgcYgR(cWhO3cz zhdUXWJ56nxxV(obr-`1s!T9qs_X+YHS8{fheu|afmgcU}bBlcuovS?CaOaV{s_MdI zJNuG%_al7p%l-eVd6(tz>#>!akj123PvdHK4Qx4XDAm-XJ^>iE3^6B-Mz zCSzNK&I`fgZSZ&@L5!(IWTh&}z5d$u?3n_e-(xRelg`N5C-97^EwwE0`RCZcE3nCX zh>`V(-T~(TaP|P_3gCG7Zh`NgV~>9t&8W|@)99SWe^JQfJss-cele0$dM(5*p5^R+ zHvF#w=Wk+zWzOeTL7NfgU0Bz+`;NfGFnpmpT^;*xIk{f%Il~yn22G!zKCT4Im zIYBeh#CYM+&FtfAx|?<%=DpGz(C}AwLf81XE}vIxq#nn*#yLsu)s|I{PyU#A*`mOO zu?@Vzlvu*D!1&mU$jbfVrz0!p%YNBxYJG`3_pt6w;IIrFn!!P81ulnQ$I(-{PrSl* zcsG;cT##4G;o%};N*nMEfX&^N^mV3}rz^`+@9V`BIgL3--PE1Z>k&GeyEbz#)&sA& zH{x!C9AIqFsn%$AO+2nnaUNRJk|Smwt5X|+C#p8nHUw&?t1*7-Dc z&fp{cN=rDE;7>C5l=v#eetyxErr`=`IKFCp@|P|2aCdlT#JrOx7Ssyv;n#Y58|DNwpVLzqD{|fjk zKbwP7i}boOP5;+KT*nfwglF1jaaM1K2IJLbTfB8kU&TWXIgT!$^qM)}zQH}un#dmY zMr%r)vL#1B_))(8HPldALk*?tsG(G%h2igZ<`Mo@<^l@e7QycgtYuUyZ}1#)7iz>V ziX2tP4!u3qm^+sokIz5IzYmn97r)}O}i(H6Il;lBRHNg|PYYV!(nSqQ* zz3vV9{pxU+)c4ml@`g@zS=p!YVZRqjt2LduVY(==#MYMUL<-Z9CYjsBvZB;K%!6 z8F@4OY=;K1Fn68Uuw@Bqe`bKkc|mCKve*lJ@z22PDQKhPcJNp7!_E!c=%2^qm&3#q z=O@&awa~Ix^n;ib`qvb(v%wR6_%x;$l*Feh*DdlUZ>xmY$aepVoZa@zF5-UwGW9OL zZ3fPp?Zp0?_L#UDe`{c^*YvZ`h>X;WykWBij;u2&yk?!ttkOC^!|s1J2!1yGb?eMJ zEAeO-JY3%wJWjvtM6)ZR3Fwkyoaor35p>3-gTbABW{h6?ez;fq&Tbl)5oOD~sT7)(k1d_mMjE?U)`E=p$XZzY@icqn1Y{ig zdh5FKKs5VuXIwp1pchMF(5wrY-@yJjikyP8t;a71McCNjSZA&X`>6u=cFWxKMCe_W3Z97&${%6R0tho2(PjBk4?EU!$Ab>!=kP<_;oc z`frW2mUdZ-PbY*HFLx%?3dRWiDs^I{@1>8>Q(=1eIOX39U0!y$!$OCJRzkBTmu5mM zdQHB$16m!1cJREJUQk87H*dd)&!;#m!4rW|wsr#teGKY7mHa34vFfZgazMW4d?nx4 zw{F&FWT}C=J)bwVUa^TcBY)&H0Y~~Xm%|#6{bn^bjGs~?TKcz)7aJ6(A2o2LA9W8l zvIpO9FMl8Cb>VUzIn3WTV4FvUQ|jiZoP*dO+nR>1!!|!1kE>rnyIY=OL`ODpZJwGK^NyF}C zeXZzLh#I-X`u=D&i{v6}CgT8fqb*2G=6=)cIj$VLx3-ox7Ku`9yU z_Ep^P5W8L&imP?Nd%-eyV0yjo9z313RmxGlD@S`r1?b@g9QHKR*R!1dd-+amdk;U8 zeCyG~v+q51$Z1zW)5Je#QTDOJO)?kza&vG>QhFsX#+FMio&MhEdz5fAHn@D;H`75`ih5Vy z32fO%o}l2?6Bsj$chltD*KP7m2i&;YIp>;p!pwDb)6g;IdY`#&Wv*M9OY-IY^EA8s zl04LJMy%}Lh8*<|;k5b(WNv1Q)wa>KDaIa<-{{_pi);+ETG^&9YAnw~2Wus^3s2>K zjdf}|yRiM=HAV8n?2GVO;K_J7SFT3(?`cV>YweUe85dm@+LZjoVLTIAMQ>#M)|{nt zWnXtPYHP%dT@4RU_1W1sqMW^3@V&8}l9OxvZF=|l{6@1*R~CG5dw?rDX*+wZ4UNO(6o*AN>1l|(JRPyK z*Rh`KS`VlK~=98>elk{s?oou}J7vGveH?;))| z+QZY=?s$$pA-^A4F}Ti1a5m-KyrYwIb6-xa=FZ?< z&MZ?VMyd1WsMVpg+FBS?C#$AN&DYb{J9hT`sH5JDr`4M&xig?n=fb%||Cq9~KaP~{ z63(aoYNhRFEt*${$^Dl9A6X7f?_r@!aUIY??pC~Y;8z*Bv9Yx$N^)~H8@g9)Ha>^T z_kL*HhHY2{4%+uvoa=2n+&O~pVbklk1NlS_+u@JQ5M_PI3(M8R_{b_Y1?YD)M_XVSq zTfqA#onUr8l60XjoO)^t|M~2(-Anl;b4C{ ziT`p>!9n)YYIQD84l_0YU*c)*VLbb|L+gFHEvwY0!>s9M)+9Z|4rjziFn1D1GSrjA zjxVXBR$Hy;r|iEaSeJ^gD!g`Mxka9cPN6#!fUDH0KDMn z*d}6`l1KDidGE4`b3gu75#BuUtx7!J5wWv318;F*zZx~)$j*^kXaug0MGmMV%E}KzZb~5kw1@)JLX5Tyxmua(WQ5hJujF2b#z_oU!Dw8 z{}Nr&azO0hjh2#}71uaSr_7$V-QB~BvcTMjp_i5g_B8z6EwcA$n>5TXpg%QoQ`*;S z^jZc4rn|2{2G4dnMsF^S%YKCn&uyIZI`7a%(#z8K^vmrsc13WM^Rnox2e$_}yl3nG z=;FW@vnQR+W{$Aec0k|xC7-?Bva|J_3Dq8O%%e6HQJHdwO5YU_Qbub^Z4>OcKFs1@21&DACJ2<6FzZwwVE@mp_S-9v>HaIN?RIC z_5iO5c(N}CkxT6dVM`x~@)pJQi7W9k`fGe%q0caybMDkM$LBrVCGjrFr+3e^KAiOW ze0R8IeB-WE!+_R@vBx(f5AVXa4T+KczU{&%_StrWPd~8QdU%r!486}zn%g*;d&g$Z z{0-2hc(3PQi^t>7(%cPU^Pb9cmtnbg8LVXzbwL?lq-#C$GBCkW19n>VbQaWS`Dpf+ z4snX^3^`kVy>$Oc2e}0mAPlukbHt&ML zOU_3np3elgqnw{-cBa)Xa4X5kYL`kCW{_)RiC#0SP_7akdYQmd$sD%1{d^PYtk6x!>~BwbF)AI{}?Prem=q*-{_f zinerGay9$2#PQ8pL~tp$rDb$;$1Q_EDZ^VzGH}cNEB&WgHK*e2I zY3~?L2f0fZ-PU%&XA>%IooAo7b2l0c6Bn>kx=vrEZ4&!kpCFdg%9#q=b*RJ2UQ$DC zffn*@Z2vyyyYJTV(;#n2tCLG#q1Hv}uc3m)JutF&g{UUiQB(z9I;hOF#Z{Ds>>Y4t>y^L9rxJB9V$li&`d z!j|0!4mYG7b%UKzH-vNQ(TJUW-ifP^_h;0_UEI4!ey`ZNzZF{-q#nW)-OJ(p>an^m z!L#VF)FqJpdMb4Z{J9rHT~_w{T?uk^>6H^Is7LB-;wK69k5)?k9=~7K*CYFp++7ss zxKL^?B!7@?9D24|d;zg9UoW%uMjw2%FXC36`~kF=UIb?!=cA<_Cp8|tUIgcw&%GOv z{iQhH6t{=LSlz?mF=+7~eGw{S0D{Y7)!e0V$Kvs5*xI=}llN{riL?Zyw1F|6Mw z)5F%T#|0TzO|F!TleM31oNw)V+!)50J_hHW&$Z>U_Tqfpv!c7Fs>UXNfbRYSYZtl? zU;F6Tkt+BHPCp=T>IdXaeVe?gq`Rh3_;UWg|GDY%B6KmeBZMya3>7>u^z_l?TH}w> zgF)wJF4DfB^o%X{H_SNS9;e4it=VWaXLXtYyGD&K=Kq z{9_N7u8^G#>p4=?56&s+7r=Y*we8%=8e1&%9?9=@ZNI?AU+(Ir261gdIoOlDzDEGp zRe~$E*E{i5Tga_(Ozrhbx|_9^`>wj)HRm3i`^#GG&jYWmafmwmh7ZHm>e>YE3zyZ& zTO0DDqG{D&mFON$N3#xR0UMt@jE&c8MQ=WzU}vww7H)~B)Q8w;sR>U``r5sD=`|qd z<=R+n)nw>U&a-;f?&TIO!*_2;@jfoZo78;1NZ+&rUC7aJSie&+bNb<|C`9q)C7)$K z{%iLtSkf__`L3B7MA*x43jx&?Z6CJIWwEkxJkox9XcHft-FF&+PKa}}5Q;5MZ5 zMYvW7xQV#xv@$9;&xM<99Euitv-A0TcOs*r@Z@ZB2p|3DP%B|&JEIAe!={BM%6%U6 zLtZScd(eLv?aJ+zHDBnTP@!;2S$WA_WjwN*j<~jFHho$`!rO9M>TxIfRHzfaL7#A| z(bq2Ivo>PRW1vkuCgj_fNUYbASiB`{t_o>tVQu|Wmu67Uv__*|q$ zYD5^AT7D!yM_}F}a#+K=5NecQ8|cTHT-4Syv`G7q@*YU`jdEEo!^s^VM($UF(^VQL zcv>;0Y)-w`)Fk7g8{U;Zk$PK>9dz6^ZU`)R{9}u=M%O} z&HOHrgOHVtMtJjv&5-wcXOjb?2ga_;nwDXAYTj)mw*~&Kf`1b)-s17>lkYY5Z*t|J z3co>qFZsyj@(|D1*-#332&I)3mU|6P9;|HGcIZRF-s9wk$wLXA*&8%}z^P^841-f! z`ThR}#}gHy9dg}XA!?lwHA zZOy38kZ(PI5$fI(?bF%1N#~eJ4UvpE3BX&Xs?L_lYIi zmCHj8xFLLk9C5pNt-Ic<8;82^Yr>ABY71%A8XZo%kYi_S3XW=tV&}*|I*x8{!k#AK zV+{Ej``_sY?bhs&xlKH07Iji=zHcA!;~`^`;4#RXTMrMZEmOH~`{$8b2VxhbK1wI^JOS=9 zhT1JLuLi;8;3hWe_MoXhGw?`I*VP!unr5-4PkX5^kVg*W9daA=&6~XTh8A*&I}5s& ziPQ;sVqNgH?1aEqr6$cB=q~SlpW*${L;uMd3=csM52n0D`4n8eIt=Z|Qf2Lk|41E% z2Z1g9C`3;DwHJQl)?Vl#KVUrTn8bflU*N2@7uYN0eSc-`g@TvHg1--T$YyE^m6`ywWtZ z2fp7?mrxVK@^&t-^-^T17hUk=aQx`Nt3ofK)s_7Bm*6b18nGFD)R7U{e2Dz)hR$f) zGF>Zxejmha`q(3YL9GD#z>ErfOJH0gW3M)SWc+W?8&aarQZpl^nnO}20DUK>&>Obf zdLw&Q0f%}83!>Hwk!xmq*_lD{4cG<=^|rg%!E|^naY0DmZv@7v}$K=Ci+b zvY#OLsl=Dn8bG&a|2b<7P`ey^yWO>QUsh|N1G$!Uu4V0_GlS5d`UfM!aQ&CcdIKZr zomWS~VPM`}l1ORNt4mmY`X_)lsA zNWFvQ>;;YSX!c^;;jWJww)n~?f`Q}$Y}d>*^%yzlZA6A=V!HywTKG5W(0;8!`tQhl z+Y88pUM#Xl%>kj?6}-1y&f8bXnRzH>Wj|&7?r2KQVz2OOXY^TCc98h;yhuvz?<$y@ zE3Z&j44Jti<*4PEl-koG=QpG4g7Yl&ecYJ9tAe}WJ)8e-k$I_+AU1BZ+q)ipIo(Z- z1inv7t+EW~1#kQ>7%yY5lW_}8jf9EVf&p-q`UnwhPIoA!GI`#1R+;(;2f}uCc_^+n z=l82SoPwIjo6iN=D**X-i5y=H#HwXuT)NTp4I2b=(Tj={Lw!guh+LoHDm3 zXM(@feUf)GN5Fr%TVLghE5G&Of+^IBWS-}F6R<={PS#{i_Y>SzEeGxi;GPEVrNEHb zr@-}ONAywGyg*)dAulu0d3iJ1OI;KV`$!Rzq!B=ooko_+*Re_n;01Oj@Ft?>0)__`XtO3szU!p=RXsRKBG z{=$dT3u=gC7W&kqA>x>IQo8{8#D-mFMcW>zaaIMmvlMuK+LZ7g8Pw>47L*o_xMtp+tfU$NAL7H+;PuaDfRK7 z)E!`MWb|>z%07;ddklSlATF{rd_5Pq@8b7*IOOc?&(jI@d`m`s%oq)e^TYdgv~8sE zC7cXD#THb)@Amjfyg9o7dN;d)t5d)59!}jJ+n=} z&2Zm&{{~uach|ohJnjIGJ>c;Gv6K_&-Q(bY8T|En)C+DpZi>B@xT)yM3F4-Hd6T!^ zFQs@FkaN_n-SmP6p3vO%(jKRKX>W&jtC45v^-SNAp)Xpa*@xqC^#ykv54YsVNuEaD z!_?SnbX@TN4g2eF-1pu+GS;Qj)^2iGk(aGbN?n`E>HJAq_XCWR-r1+I%`&&ChdQ?ui8@G~J4AT^*8K+5K9c)+}q?wp zx-RALo|pHnRzZDFYG>n9RlfU{I@-Kr)G=rIISKx+fCisIgA1VDWodf(B#^UAT3u%4 zmEH&MGy9g#KcZHys3qEZ@@b}C+C-c*S3RG&z;d(|>)cwmF`hT+6?o1plp3`kO0==cLO-?ksI*J}du#M>JnShad zkB=q0=+eUAPtHNLratu7d~t8!#`P7m0Sb7Q&p4o znq-e0d5k7MZiu?h4|gYMoMuRXw9uFklA4{=YS_#UTU z_LXV8l|Gxy`sJetdw)i~)G6->jK8q8VDr|3eGoaFf}D5uX@HN>Df zI=a<%WgfrjI&7QR;!5717TzQ0MVpbg+qTAau5>f<@}}hTzmXbQ#!LJ!*E_*sYdoQ* zM6natE#}m+_eLG>X~T|M6GEm~&q(-G%DcdNy)(gkryz1ZZfVcsP;xncV8 zKI?l4o7nHJyB(Qe|LMnP(Y@Aw#(KYC{qU_B{2JH;uJQIDP1_FiHO0E1ZE25T{p>OI zz>>X4az-9T*BYw>$&WcFa98u1oGCAG{T9=^W}@z0Blckd{0=}PuV=^W4(Et`$@bmn z`Y3!83n1fuco&H8xf=d+e)&1H`WXBI=__6XC6P z9NCJTF#Zi|{5`(@zlM4L7CxXJi5UOp5$@W4z+Ic<@xbHQ_sHXspOya36gj_s$JC}= zi#=}=Ucp~2-&!w?4F>Pz>Q#+HPjk1vCWH@Tr_~#lk?%@+Nu5fencr`2GBS924ewn< zC&YJ_T0p$_i*fhx+F@kp8DvN6Dtn6P>LR{fjIGxFCa_KN_H#oWZ=JddO5O=dEvQ9| zJ;d0xuH2cvccV*v?|3I8xt#Jg-@J9EE?A%{=JgGs-$lc{?%0F)d~@kBC;c@SAo@2F}^k+!{9a8M&MJJMx|2pS)d{dPMDvTQB3_&wJqh7&3Sy&D%EO4f>7F zs-~f%gLZZwd*&^BQtFlhZ;-zo*dqN<$a|wl?C6;Ml(j7q{5S(i-5|MxkT=hQ>jlg^ z0KWv!CgxknSgAcmT^0RS?=bTn=FPLz4qAM5)6fy-dyo0P%Y5HuKEXqJEl3SAujj%5 z^UYyCd8b~_JLewki15QrIuzB3(7Yb1^WbF3}QXV=1zJOAGH8?36khH@)M)w@g+9FMm(6c4#4Qx&+3?oR!@L z&38E&dQ-W*=iuSVK5T6i`__U#g6)*|Dgi6YQC+a2Uce^y9C)^f{(FteophPlD;J# zRhhnx$YPP^z0@u1H1FL6{~38J`!4okgW#VVEWUS>yTX|^G!Lh^FO+wWyvs*!wnjJu zu&!%c59qrep_$CT1DaLh;h*!8R-e~X!x<~{3sGACrHR`RktGwMiR zPA$EU_m#jNGclUo7$%22L~qN&pxVdVqt(2DzRtm~k5Vfuj{Qw>&KFzC8|a^dPXB zE8$(goHID{i+&gJBj)>#%{@8#uH~TSZ10{8eqR5f;W{``uf4|)yN|ks$f5`PY;T`8 za?U)Pjz8m`-J4_|L$@p6KGSbWpXEJ#it8etDsKo%GCxtZ3|o3Tdvrnj$$V^ zhF#l+kFmO2db*Bm&*sP;g^w}d!V#Js#$N5nAdkq*4#(9U_K&0ZFt-#O|Q}P~~_qQdPnn%y-P>Jt5B47R<+SAuvDSDlz zvHdpp^?CMk(bu_+Yk$?{^||xgOX+u4T5CxUP0wB7QE|=Zz-e)!^d@>2d&E)v;$3y7 z@4SD_{<->vuFKFR|NbCz_~iX;@1b{L1CCDMEm$~Be=lQGmy+Meea1QEqO?A<-|cmE zqU1lAH_=)?*zc|y6WFo^yHaUW%4m|wD{Z@PF}8apJ{NW*S$f~h+xociahA+&oU=>6 zhaN*d)~MK3Rin#n)fV{tQzx!I-=9%e;amPjbeDUg9`Q*$`vN{oxCffad+4*#)>|k3 zrS>7kpFh}YW#@L$yRjpqUYO5*0-pSPa*@S5rcU}nBct!+O}$U?6?IJl{{Gl}d7o|e z`X%haOQYyyg#EyRzm}0{@kPZ?l%ETcr{TV=>ANTTxDXq3s5Y8?G3BU57B+@AKvF+; z5%l(AjT<87U9|9_66Qh1zQEXpj6FD*Q`)|x>*BYGt}hb*T6BAmeUk6weV>ParEcr7 z#4^jqwlxkt#@XVC<)|aADRrr3cvm@A+ZcGGZH)HqtCEl9?d*3$^sCJ8SC7SAn!&4m z++8i@ocm?`TEv~*5ApefVz*rWtOw>zz+4Z^YV=v`rSu+L5|ww+<-G@o)7|vw8-O;$ z=+4z*T!n}fWOK4_=ax=!NYdFrR2 zYfar56%3nw&fj0Yz9>(;lSei;h_2vY_-$BaZ*jdo?0~G#?2+c}#8UKL-u0SZ$isd7 z8Q^x9bM}l*_CIhF{XN3DYG)vz`)&&^OOS1;ow5{Mq=zZ8qj`S`t|3turW^e28^`Oewtw6ZqmfiCQ7>mK5kV$0=@T5u_z=WB=Ax2g#Bu)7oL zAZMlN2lDEYIQF@V^D4f}O)Y+ZtWNu5?Z7j#Cj8}n7`A&~pmxNwq#p+v(*r$s@GWo8 z%XGz~MI>N);K>}_ykpJ;tN6BB*E4TrT!y!@^o@x@hu1th$XLlaijnhjKXjC_%aC2k zg(DZZzrg$BDDRI=&Teh7&)dDw@U+m#wa4C=PV8}O#^g;q=y_MPlt;OYd8O~1jP=_o z8QYQ|*Ax8?b&`Xcb91(_Qx50(+HhQ@?eZL5KP*|pan{gGUY@K$Y`nnijBL(!vM=O1 z$Te&!<$2od7hTNNn&DlGRmuY&8K%io?Gc#3^S@y!=Bb9rFV^#rZywGS^Xj|ts}0@c z%|JQr{QlIky7KoT9^4T2Qu2f?=orFY>U@3Vr7(fMKiE;}pOV2A${ggo==UPyI*iLVpV{`VAoNf)v2gk2FW#bFxy@wu;E^U&WJ#zH?d@RYkPd1s~|K_9PYHGSO zc3#2G&cm+FgQgvcg2`vZAF=JIo0~0pu{CAz=eTF60g-th+=ev0dR(}@$h`Dt>agfT zH_wHeB_EzVj#mEOl^~xNzLT|5 zU@D0^6#_PeQ52jZps!YyVxuiK>9!?+-ER25>N?h5e&5wHpf^*!tCb5bXV zf+}bDzM1#^=8yc6oU_-n*Is+=wbx#I?R_q%^F#bB*10gla^ZK(2(le0_#K@g!1s3+ z+5_`h zhb5K&F}_&_olUw7`E#q}y0M3~5_`ar>eJHid28kV`jv0>_f7w_pT4Ul@lN!sRz;*g zmkj!VQ!syXe_z2T{cog@ZRIkU58FCn^XC5KZ$3euJnuuTTnjd@+Mo0dz61I!bO&+m z!_&``N00GF-ymBlAl^&19pi02<2k8REr^tq2T-75ln)2r#?x8k` z5$#XX4Epxh&mlL+{o`+7yPn2AT z&zGQ2l)iC5mV+{DjV*WHBeJbGB5vvs8NMYt8one;yAFE}&-^-+dH5RaA>eoLHhV`C zes|w%@A$hu#g~dQ^lcV;PTv7Pi9Y>C&)zY)*@E6jzOSaeEc&15&!9Y&qjv_p8;*CB zuX%8sy`!Tmlu6~Tm9ZrHhrh2*km=R5Ry?P?bevK6<&VJ&@nr^`2N?8Q;q6r!Q~96j z*(5)kgmNcCyiPUEI=qKm(f(mR9qLXeJbZtBU}=^2*o;c=i90L2ftqsf$%k(Co_^#e zZ%D6qcj{~YKp4dTH^B7kh5X_%X$!iLBVzO;5u{QrWyX|07iPwZ2^X|kA**A#xlR_v|J+QRY3tEY9+7~3ALc4^0 zCm+(GZF;mZ4sDJ14lGRow%|U7I*+5y6R6`mz33}7@cepwM{|D`er&*?{8$n4szF_1 z`itu-<5_A}B6FMcX?;O_r?FdP)o;uTOm5iB7JP*^H*91J+Rucl8~zvWJ-~02pBWGO zhPUxuNqT=&SRml};epV^C&sdohIlr~q!;@-5gXi~6Z>3OyVjY6tUg2Ii|aP%41FzE zx2)T!6I=5HaZkp=k@IxSMtrXT-*4!Qtt)iKJ@r23U7}}2Rvp?Vq)Bs=1+;0P`2*@w+DYwd(usSz z^%=gKz~k-+JQ6Q7R(7j+B;Nes&4Icb&M@@nP3YGf(Z4sKpUa@bqz8he2ghc7fcJuT zpp3b{Lth_5U)M0Uz%pXPIyS~0bX~-PPK*l?dMd`-gG?y?!*kgzIIy$?m^(eBLu!m) zB-w+G*X&;1{mfk~==yd-F0<#5ZjSPe1Fzo)E+P#WdSN90>hCow5}6({s#4eUs?Z6r;T37V5B%j=rG;Kf~LK!!<@j1zC%0G za|O3J0d2sTaqPdrYXJW%P;W&%liFewSg_%826?CV)xx&F&}8?z^03}Ly8J*Bcx~Et zW^K1V&3EVUxVB^HgU?a!Aarx~vo{55^Vq%-Nv!QBXNtWI;L(wRc0(=6JydFQUPWK3-{UCFkY80)YAQ+$*yHeJkYbA&xyfDu?Q6yuCjC+C zoK_D=CZw+o5%jeI{ZgxocRyIe+8lrV(B?=C6gi&!(B`4v7ky~+)9(*<*(_q9sJ0UM z@zD3$TUhX4p{un`f_wHPd+>Q9XfL-1JI2R zZ|Hw&KmGq>dvHDeCpmO~AKKapdv@#1XV$(z&rome>Co1{;n}|=vo<%LMXK{9ybtMh z`%XWigRRlS=EOmd z1sBiJyQMbi0+ZO>PU-C|1oZ#(FWI>`o2*0OCpXED_cQtxu zQ|SFdps2gKa|+Wtr}~FuEG4mE5p>{lT~41XiB*6806RJd{?`eamn@didIG#qemv9L zgXJV6Ej_*?s_^;vPVkAou*iINl`?mh#AlYT7_=hzJR$Q**atCR^}rVG!8;T9ROl5x zJ9V7TE+Y$01fLtgXL$nVPMObRGM^bNt@W79XD4jpMKYg5=v90!%8PT84mrRl&7Cgr znIZ9+;Y$Z?m#*ZUV=|wFjhHj|O!IBTJLGfQ2I*N~k`wjm!i7AG-yI)!oCxC>QGJ(U5fJ#=mpGM%t!h}wx8?}%?0A0KeV}6 zps1lK&erhPI9nWec3>=EZnQPL0DBF4;BR2w@6exQL;csVB@McGTf;hChr?Lb;lcTy zMNK6^L!xt5`f|g1J?OXgAU6y1nM;@6YFP03{7Xt)TM#uLx(7OK^j>)Txpg$(S;yMf z=-zIT%KV(lVBW0zfR~x~R*AO+^cIwvBjdb=GXA-Cn@7(Izr(!if5>iIgfZF$-e+H5 z(lO`Sk`9``4uO}eFkWFZ3VR?cdUg=c#t&z0GL=#-%?I^DBOvnAmp+W71%MW|l!Pw4z;obLs!ag&q0u35|_ccZey5 zRwj%_Ki`1k{Z_BtJGH^d@TMju!dKYSrgDpvcP7DLhaL|rG->1T7iF1Fi@=-nT zx5d=5peVpjH|GRsUK3W?0|&9jItc%D7~0qi9|wNA&x|s&L!o!Axwzv%$gH0RpU{f^ z#47wIAJ4BhV0~r}?hb{v7NczpWBi(S=Jqt-8}OnYE9#;5#qi~K7!}()6TICD9%q2p zTcEqsz2IHaUGU=0YGiJjPrLQH&E0wdKWuXV@^q2@nOMj7bKx&E3+_=HnLAVjIUh&} zd{+$pq`5PA!$5Z#8|cCNL--+|@PvV39zlN=L$BGWK09=CC1m*ACiq*>2NTPZp8X@9 z{SrEI@JT!DU%JnCH*3p0U3);cjQ-DnE~NWjN8dU#Sa9vttV6`w!;od~u;MIi{y@or zq*P<;e1pBCzJ$5I*5}i@wRHnzR?*n9PHb#l*9$w5F4P2EmzG;if}wsF_{f9|e?JVm zAwMG<*@Ah_GSkwUzMOeorP7?13OmvfNv6)@q3Sg7Ev6b;)!2vd{bgqlbL-$QHDVst z>zuRhNRDeGJij#WkbMx)4_F^9g^k$7u#SO!K$|SEhsUM)1?woO?a=A6GiyV|%qGUM zRJvyXISJKR@8q|`?mK#PSYKuMNS_?Al{AmIFeVxPA6Qz3I!e*kC9qFihb5@$u1OFF3=z?QdYlLk}x@22MsB8$?6|Fx@dPp|tFzk}lKe8Pnd*ecz z$=*DDhWx&>X=%-IHlG6g2yVF=;PapitwVCL4pHW- ze&`ZC7eU9@9z*>p56ZJtdr+8K((w!Ugk)oMOlalp5hb~(+bW<>Pe1Z2%w+;>f)4tk zhu&b`T8edcncxoXO5FDyd>gW3$6%LE0S|U;%b*?8X9voeum`#+?VGuD_9fo9XR!V> z)Hh7fDb>0OWSf81>e z_^&G=zckfrG1j@|;SXAF3DFZie5 zuj{?nqFwhK$a8n4u+^eD#VyGqhXqf;hSJzAPf2eT!Gj6stg-IgYu$LYyHKDxInB2R z{Z6qI`nw0ZHzR?yoq}B@8Hk`u1D$95T*0^scDWexMpOlHUId>N|}-5p`Kbz4|deSEhZk@o8bEX)9Y$ ziMWh?^gU0Mq29^D=ql6K!h*&`_?Y_4zIOa3T_GAezenfd5i6QC2D*cM?ERjL*r@a! zZ00=3wOmMV&4G?gLJTFS&&hANjxDG|yl4~Z?$skciuPk}?jt$n3wB%0ae-oMg|t^B z3Q1KpkSndnDSpIYx2axwR)S~nIS^;d?90Wvs0y@IOY0XWA9c=Z5R&Jap?g;73}NR& zUkSTv>Ac1D!q|~DUkgWTN86v+2pK8$3utE|>cBh1xLn!)2L7mB)F$#Pdm;M*VN4Z# zv&lyIez(i*n}+tSNBc~pz$fq~=s(blN6> z_vu1%>t^uXFb=UgL-`ZAV_0<%^i=kZds$&JcwSLf&`Qrp_CbA;&xSrM)*Jhr_ps^$ zz1@abUtt6Ki}t}0|F3G0`-);!6yvMCn^hAIJvXDj3P9f=fc`GT*wG);mp;6B82W3w zFl?TH{<0;>_;+&r>FCeR=&uI!SG(L_3J=s@@90nu`fFwS%Itqd`$WvSo_uk=p)tX2 z{LAlbHSoKW%=dU};cr*Krxx&=#yx_OgX5m&X-}SEkVf2R$9Htuf;5y>Y(dk@VOt=@ zcVG`rpMmX?`)D-Tn?%GmAI7@X1bNc$9Gsc_F{>`e++T-pkhdVOA9vVPR*3c`^Hx5K zyBUm?_&?vXgcVX5!?UYqd2e}ZuuMMkFO)Kl`E25g=!>*Ow(t0O7Hk#JADX&5mAW61 z>wYwP-Q`U&)cxb3b$^MvPf5C*?j!w7+gQ3J^c?F5?b6voHJDq_-~L_jaSI@m0Y>rZ z;1FAVGYfwA5DRw0#v|?{*$X{r4nDyBfYjMo!`w+e9P{;#s|3BFujyW_F)(g=?_|N= zIKl`e?$LEJV5@(KyxAZ*bY9FdV8S1+Ikz2M^G47vdyor^oj!8Lvh?Ll=}=Z z99j$iyAgBSofJb&YwZTV{@ch$7yIU*jEM6!etL#Bh>Y2Gga5CP*Fo_R^4Z8vP>TBMh=Q@ISY_vmvEM~1e-l@dAlLR?ok6MybsVL(^PJ~~%v~F=F>TpH ze5Ww}??pWg`t-h2unA7gpTq;T;S}1ilx!{DQD4JX`@wCfBU^u-HY|m$mhjOA!jIGj zg@;IOIE6MG$6UjG>M@c}eZ>IwaiHU3`b8r}!6fy$2flbf)$in&9wwg@b6;K7Kg?5An5Vq@^Yb(t{Xugx>eu7QhZuER*5_w5}l@KR+E>dz$15zWrC& zJG!x-m8m;`tQJgSiQa`&_{~2y448tIvsfn2(2a&3M4O z)6nf`pvpkB6*2(xsgk1M2bMhdef5FOu5bt{MlY zF%Dja+~^&RgO+IH0K7AlQ;+2qX&iuF_93-n`DKg)2_NHt@FVfA@DOPn5bt9!w{`X> zPFgcS`^#e_p6&SmS^Tf<;~#1I*t6yQeY`8~fBJZcz5XxmY28nON(iZY7QtUlUsU3g!VRzA&2RYh54B5j!<80b^n~ z4o3U&Etfq#3vrbthTme%@$BY>27z@rC>{oklf(fFBq3K|~>|)isg@TQ*THj6uppAfLSj&y=;oJ&+N}moQs_c?bT- zn#*ST1ThWhOCI>8HCXTTXJ@rc*v!{pSK7Dbf3j*;{>sg`dyucyELc}ZTVwchxbGfm zjpIQ(B)OpP|BSz{;`K^=m?6tztRZR5URxg;5;L+C@R-p&nV!W(%R|okXaMf!gmX7;s(90FZUw8MxlAr zhIf=hMe`}*1Pd5^1^S=L)#7i(KMwl^x;`iT^;rA@r^d(UegWv=6GifAxL<(x@P*Fr z7w9CLy8kSnW{mGsJo962!1~8+!r1Q2z}x`;iPmitC!-h{@&^ZFWE6L)A^$6#%cMrO z|3=W)5Pcf#JLuJMGK#xUY*oS^#qlG>$rK*QcD7MHs&$*XKiX6O|I;6K>u7C%*zx>){b5I(mGGnW$CaA?AY1qA`&e5o_MvEe^(t``*g7Si(Skj1*nQ+yAl43g z`aK(C)ZBix`>{*TtWA-hHKN@0_hT*t<|BA+Xy~|&dyG?kf=I?qur}>chK7L@jSG~tX@pg5t+chYA;u?F9 z;y;eTcstLFxGRCDIP@oxY>B$y`JJj8GZDB2L5kHjMtAg(_Bc;Qt`~A zzlZjtSF87`DJl$|pNt95fPU3Hp@cqvyk= zIMwTDbEsL5{dy7g8Bnhg^(TRkWbApRV9!gBd=0+WMRC0HS80z<7~HQ=%kNNJT-(;> z>_F%`#1Q8SHsl@f7-BvC55;*yV4wA(As|3W*QUAUOKbZju_JSqge2LU^%e{{1cwh{_;!u zj8^|B=x-|U=L-9Jnkkoev@Z^}VD5V4iD8YtnuXRn87rc{J@`8p`|)*~?A|==t&vS0 z^yM&L(*Dj`$g2*zdB+G=I1e$1a_sMXp2rr9L(WuzU~H`y#_VZmWJg~ahPkG1+*6GGNC(y&)Mm*xjJd`=2RuB4 z_FqEwL3uvm8h0L^&&G3-C-%T?e&lGCxxisrK(;A zJ&#l`=}v>D?g^-yo-e`kJ@b~XSy{&1(;@TC*q{6c>l)|N4V&AaF5bMdk-0bH=Tpdj zHs;6%A)}9BjDDW5yq`hdA1_?KM#P@WDt&V6v-+e~%-gLMz?ml`@9BKnemeJQcDmBc z-rfk>)5xXY0QyGAhxVjiMtSUowYFe?&XCR~(>jsLkser}C$xV^Jl+Z(sZVy@6pqCjL%Ap(+b#ya^$M;oOa3ySLcCKepP4`#qpUfWRk~V8UAn3c|*vx8!`qH zS@0lYj@{@_^%y*Ydg1%!({nw>qyLrFvm8@>3ms2-){6uGaG(Wu0*Id&a>UEnOj8P5 zphG`M{akpBn{-0bxp;Qe0J)KVd0u*Hmg}VsnSSnp3dHJ&-h@2KZpgF_^Og1PYuxkj zyK>?+Zp!Vb$KUnSuW?tVwyvW(%)*LwYHUC1EQNl&C;6_-y;zbJ&z)BCr6r#1LwTyh z*tatK64EG(!VJ{BkA5AR|3dv|VnUx2J_F?#P)`5Ggpxop#j(+5KX6!l z32dC$2U}3R6xfZhAG38VNPEtm&)QGdCa{6$@&ATltiuZV)AQdyi#sWilOEEg*Y^O& zF$_6)LVA53z3I{s4f3WylMXE47S^J^P2Vg#@I2}! zJA*rjrDr{Owi?eAnq1JdpsXcTY(0#3_4vCMw#|$m%p-f6Q1&s@jhw>{E}w9oPN3#P zY3@_=un|7)F^moLpRE`%(Aj9e(m&}0NLH=?s^pN~E^3mpF z&ve5jY(W>=ZM{s)vEu(q*efb0U@X#kmtv;tvm+0aX!22470NV##*dhe)yiqiz@~CU zV*w47J&d{XApHD-G4{#H;_mF0Mng^mcp_e^pl1!l3+yn(bZFmY6>zr`FW|58Dlw-L z|JQ>TDo41Kmqm3rK|_1k(*#}X3c?A-YGJi4SGp#C_~S!=u^@!^Vgy3PtYC)TpGj3Cp#;L z?F4d!zCfKcmV01V4(XX2-?wN>gRMw|FE3zRk*$c7(?xz3OT``k7=PsR(^wL7Sw|P_ zi-d=O_)N(2TkB6mzW9@vN0G1lF6@B= z;}3H>_8HQAki)xJ+Xpw&m<0A_#Frh|6BQvR2g;k#zH{*iez49IB)PO8-jbh7d6~xg zd61<6Z7jw+@=YLD=M-F?JaCtqOUc1^7O}SD1$n z>67uj9DDF%l)D<`2%`^WlJK`w)_}9dc)tPp2Zw3iMnBCr)3>m2whcV^&{w^vvx>1` zt(l*nN<#TP(%C)HN9&;i{7*TfRp^UeVAO*idwTVhPl~*T`W3*S_etO@34Bxls~hVZ z>hBWiZ(;Cn*dV;qOaFfbKAXYc!{B+lklgnbo;MOEY_$`Z^CdYYOFZ_0$1)YI!sB7o zLp;J(fk&gnBej!wEFR?1AoFM(F9(EAS&SG06m(&jBCM-Tl-S2lULe zC2oP(h%=s-I7eC=H{4?taFOFBaj6Aw$IUd=8Wxl{y0YsUTe6J%;UciSjAw8$d~h z<+|I7D>4_jB(h{ zqwT`9C+bt5U-vKI3E!1UqPr)Qwtu4^?xXhS213=}!B$fFt-98Ak=ophGUmmm+sM`g z^{lOBB=T$!<0RSoB-t8stZ1KBw$ATEG!G2nt@{V?MmfvG8})@(#p7%E8+o!FI9nKu ztOLkfjl;Sg?+cM@>4dFq9Er1Au%FL^=i4!^U9hJV7lnV-w-J3rw#$z^U^nS{m%0zLsTdDQC!{qtA@yoUct9l`Aly6IU;3SVvFsIp}*CO;^)2O(d4cCI~$I^UfGd?(JE%{UgK7=e`c^Fk^0 zJI~QS3SLLUPYdAxYm?YQnh!I7Rx%mih@Mn~nD$2ezXbW3q$g`pwmVMMt`e+0;WyDa z>t8>qyRG3i(}EVrEDw4_7{5|I-;MTdW)R<*4m?*e~iIb$> zBITFU7)irf2F%aU=b`aH&`4)f$ftZ>(rv_%&}M3{A3i3@gJL&dSFpAd@cSM|zZ3sX z*bvhc-rs8d8QxKO`_5__>KNrtff95a3`L5&Oht?#- zE1lKQ!S?0@(}})Np52N6X^zF7bAFo;n8cr_`}{YdU1+PQqHj@^{p?Jr2zRmj6j=~H z$-}G219pStF-4IF^deGwJtspb$J>io72+?E`+?#vrymgzkBNL9{|m%cj>B%8fZaHl z$U2IKu`S4g4R7g%DNbMkhq>kmQ>BKFI>&;IR42RmEH?dsX*(N8~ub2PiS zK8pIxJ>o#wfx6qDpH*@D)`jA7LyiIM6Ylq}?@qLL^gtIQooQDv1hoC+L%&9BOF%3{ zdsdL!T+#i7~er+U5M(}?+|@~T9MN2;e5^$>3p!CMAo zr04|kFHW^TC)}U0p5{4<-TxZ8m!)S$&!wD4TF=tC#trzEKma@;COJ}l{w`2c0$u#A z6f-%OeW92Y%~cbplyq#vykdggssm45@;vWV&BbY$i*Z-T0_2}oJKo~Hr{gW;7rZ0? z{SKX3hg`E^=i=E!tSizMNoN?5H*q?&_IuO=yZf#mc3YXN&7^;zA^Y|{+SvP6<>bRV zZ=a0#Y5wkEtZ+Usl$;2fw{u`i4Vbry1_OU}<&FaP8;p&ZR#!K<-jLjyhUc|{=@0at zbcA^E;*Psw;LtPrAN#w7=d-8kw*3)f&ilkgBQ2mcPHmb+K6x(Me;7EuXh#KXc|PXP znvLw}T=pCv%XhR%Bo54vVmi1PaGnHn&>#H^mqPj>`@_(0-v*wzK?0Z|A=zWW2i|P=WIk`FK~3GKez-3&3j?_%h*#u#9+*7|y#yZ$ud?OTVd(2JEpBP6co* zs1JK*(%MwvrxyKo5b-LCUFTz6l@F|Wz^Xtw;>jZN-$sdlL*HRw33ulQtZ|6*Q5ndD zbk_t6%$XWPvMDTeblxBZQFvOt%&_)YS)G2&*IE#2gOD$Wb5P0eWC$B zsrWIM`wRoO9zgsiA9GA9;))9lqH~tzc~>^}Y&sb3erZHqy4TMj?+oA4(XrrXSd-Ho zFLj6`crmu}!R9>!Q|9hTlY9&-PGvrg^WwphoG zZWi3@-bB9Nw0L{RoAImz-;VBB2^r0YUxWQe#NdB0QicUz{ zA`Bm-qZm2Swj)1^;wc;ZmmeS>rU@8ozSq5WCew`hHB#)tjK0_Ip*hZU+Pbi2AYWXG zE7T6?rG2wn+_iH5fWA%r?ryv@?bQdQJ8+n=nK8E6M0e&~%*V>mdJ$u{U|i*&eP2@T zrFeen+0PvhblN-`ekev#3z=T4;f0=)KC+7uC%PnzC170TGSeUEJ=x6Oal*rDyUF+d z;Fr>TjImk6a7U4yxp$t{w^6%lo0_K3zM=#7=9qBjPS@dv`y7b-J9_YJ65{)tbne+pQI{xR~XVyA&iPAj?wey@)9sh=Me+A9XM7N_Kyk$A3(!5@)6Wxo>AP+;P z`;_QnBIzj8tESlwd*+*|c_T$cT0F2*<2DLjq)7J&N!aA~j9iE{nW z9~zUZ@w--668wgK&k@}Zfu}{(=fA@_RC?Bw;GRQ#egM5i4vif)@DM$J%^oB@QTDU{ zO3$9fGw74@>|Z5%JR3>p%){+_8GO_}j^`)IrwBiPf!dv?!`)1H4xjU}AK{yfBgNMC zgdHB@|5?1JGwO|KSHpg+J2v620mOwHehk}w3!k5=;_dEvZ;4*&Lt0x+gxwP%s|+*S z_eZR+vRKosApA!M%6o7(o7hr$z_bK+GhMYdTNu~1F83ep0jldKsEcI#hdkJStn)0; zX~!nolTPzpL^9e$@s%_m{8CAdqw&n~&z;hpHKbdeSeK@te`>LRRzz~Zvs@`oIaJTc zAC~lDh@OS-S5dFweuTc`cc~;}T^IR26}o4MVxuSEYtb5j?)`HiZ$f<^L5;|c?t*NT zxFljOvnrN_AX`CN`|~?yC}v4_7N+~wfqy&t1F^k*UApvEhB&nVnk$zQO=h3x>b2`k zi20-=_Fq1N@{G;0-)qI4dyxOY=b{U-G&|n0&RloV@6N1U2OT@{N6}070c4gS&hZM) zh6y+e_=?W9?UcT)9QgibgO~1mQeqH?uolmOylEX-JjUMdguhSztW@WY^VC^uc4wo` z@1Xaz)(Iw*BD9X&7v#c(3h+buXouL6L$B~Y<##mkTrTph*CQ54_d4WY?45>O7enV9 zbKoz6U&kC~TiVaO7o)HJz&F5;?2Ttdb$NE1b+l<@T^_SJ;Dao}`;*8Idtnou8~AWa zccOFZ!7_#%c{*>9;aiV(9WTST=W$=62Rh?OVq32Sjk^64VMA#Sh8|V7tihO~`wnG4 zwL)(XnsLV=m46!L{LR!Bezsxq%eWsg5xx}Gpv1Gq%xsjqW`U0uLT;Ns1idu`CRLz5 z_O#vWKP0#dK;u9gq&A`rxXXm@PzmC$m0$_VNxXx9;vL`J@%@GPhm5)tJEt<{oN9u9 z>H0C|EcC4l?8IYC>H3#xE7Zu-nJX# zm5 zW7$GEN~(K=1dOSHr9AKDyK6x6hOWe+S!0cL-_aS%S40KhKyY*`Y(Qw_ixU-z|{KbjW8Khdj6VasQoG#&lzo$VT(xDd_(2q>$NfyS>Xyh#?xKCm1oW|IpyLtlh{CG0L z{3y*Qv-8e9hp{~OGauqkMfkYeJ}>fm#_9H@!>)Lm@19J3;AtK`IT8HO9Oh}BjJ}|` zlyogqvXvucTS;>m<&;VF3FFRPUwfW=9_srA=9lj>S@1E;U+r^s`$+HTY!=OnU%=Ma z2@H1W&dL5bR&)|}y`%Y#K)Vr%S56Wu?1lZaP^^UJk=e{PrI&eYp^qMU?qkrk3fztB z94{7CjN^J+F%EZ|LRS$tN%MfG?adhh{|xBvT%G%kb!Tv!JPa2!9~ zO>!sJ9SLmGvf(9xAZ*8<6LGi2aQLzsKCg|D&lSYyF6zHA;4|Ad6@2#6+^(MMbL6=m z`St$d9X4qm!?{IVR@2sfvpq;U;#Afr=s#MYM4aEU$yVHrF?AQl)}0t*cVMjDj`^(` z^IL+n7b&$DZ6>)Lf=-Twu9NT7eFgUm8ior}yIt2~{%923uItG+C>`n>;JMZ}7^CG?LhogdY3BL{IdS!UV^$WIa`lr13&j-__fI8 zliHPH4<1L|e*EM@&-~-cY!1vFRL*bO#bsPj<;{Ehc}c!y(p{NpzQr`ppsWS9h06JH zk16hl4|bysX&5^vaF?ItlS5YIlap+h{grtYS#_OAETT42dyVPj7a67a?qPkYZ60Fq z)OJOdUF7!yrz?-y4#4N7zVG^5Xwo(C(I$ox5q~1POJ{uTL2O)MYXkQa9eZ-khd5I- zpGMx-WWKqGQ<8s2xuP^jQEo{&(+{q$6(vSK5g@qhB(=$#d|(aW=KBm2jA=4{vi3Z zWnO-N8o!VC3&_I{n<(PU1#EcmAn1tiO47UWWnNz{a)xiixs=!8TR~>D{@u|KT8njl zQ9JaCbX&w2AUY{tgx_YAA=zX*ac&Ma_<5}VPoNHe-?Dc;$3A;0=6>n^Vfc~C{ll4b z|FFQerb4HcJBBIFSPp%sSbi7wvNNT7doz9GiD%r~tK2V4w0y5L!}klkgFkK~TC(@G znA_6vjP5e7mGA6zU=7p_zLdDm!+7s$zK{Iyq36*gAMsZ7%MA3-EwKBz7qyJvi<$$P zaa31X0@f{Qc$bbc87P~HK7{Y3PjI8pRu9G>&aplz$J#(SKT%X`_j(|2+@-qY`?*GM zuo?3Sen{pILgvtWAJ&+&x^%1vcHG^9xC^awR$N}3)dg@Esn9u2HFfsWH7IBAXiY=wA3fSccRnVgU0V@n zKM22w^nl(MfH!LId*D0#Ji~X1%rogH18y&9=w5+&ptECanDImBG_p|cG1S?O_lo}f z1@+~gLw>m@Le;DBp7Id1cW5eO`w`?fP|V7KvzC#@+m}xwCr&%wU|(pwQOud_vU)#K z$&aGEoT2BTM4#1y{Vm9Z`%VVm-(P_rH4bBb7az9=+vPPC%_mq>(Rz#Qp^~?vD_3FanNh`rf0jhExj=(y5A%cYo)QJ2*JuptLqnb#s<9!iD1K%Lx1 z8Zf8OH|pbkb}8PByR2|tR$E{5Fx=&;!mS$!Z6$hjIRX6|+Qsi&KKalv=<0Cj>j_&)lTsBkZQbUM~o+wV_%eHHyXA#$ZfAY#+t+N1iUBT znbs(zk2NQhc!FYk$>+)PJq^9!wioe)LEDRbJ*+{xq3?>^$PSbJZ*$J16oe;bR3SLLX|er|~`dfaV~~d;9fRk5X?vimLAyRc&Kq8NJ@)~jB~?#JpV6G(|I?;9)Y&7mlb=y&w5wsZ+enubeyRP;P!OZ9*%%d`< zhw3Ej$ zT#)mq>A9^A{6-%7+qsYv);90z(0`6+?E&0x{w_UlhdfB{kW`iC!rnX9qcd^OO$VJD z5dM)#=dkfk+ULl2;(G&_qxrg$+fo~JsHT4Mr1+u6TYMlmuU_C7TPwgRJm+z4d_3xE25J~41yGVY$XV{GaQ4m@^;Kc$g(Y$kACh{~4 z6YSo@IzwwfFYdwj5Le(#$)0H_hjG4V1M(RzehJ@$!k9pxw`QRH=Lz|=dR0xPlKGiw7Z#?^qk@W4(OddVlgmzv)jd z`Ah#-zy7YjetGixhw<&fH-1y~5&b=X;E@BZLQ*Bl%|f|_c$QO-yOxyst)S5E9R~Z> zVt#A=mGO3OgD?W~cFD;`j2ZGFD;SrvvUhKQ&aw#4hvPZ<7WkIk8lp2Lu(xMJKa1no z+ugtlK}U1h%#%zv^W?XwE$bdczDkRrPnv*vMqBqG_(OK&w|om3QC&o96@Io(%Zn3o zVNS&LFpps!j=S1TfnLOje9fjnAL4)?LDnR@K77N?Q_$e;MXcr^*5sYXLxqkJY>Nqb z8shOAg5H zwU1dkt2N4pbHHshhbns_*sorD2!3-f@d`iaTyc*K@R|nwv;4qZF9Y<4E%&acnBO!1 zgq#A5%{KTUv=(x}uKE$zq5J~c|A+ppF}&L75ohCk5bV_pm|waQGFmh7j_$IPaw%Yc zzn_D1Y>~UhqryITl{Eeg*j&wC0R;e!6nUe{6o5c%J!bC46&QFGQN3u(&vDe(J^C z)0>AmTb`pjA+KD>E7Bab^}E>TsEL?+FVq}$KD#)7818bH?IQjCKX&nKyI2dkoX;*k zg?=U3ox?7!j>Rr2bu2mu*8kW=)}gkGcI0vVpJW%)!S8?8E}orFtVeuChj=-UA>-Su z()^f@v(}Y#mKcbpa zj5R3Djb4r4Qvjn5T%YVEGLP z=3i1#iT$6MtFZo0#@yzyzPWw`>Me$ElncLP7r1p zUF&kO2KET4NhzRLXph21OZ2#(nbyEmUKE~Phr1LmXx_D2=U&0yzNB-M?}m2J97nm= zg=?)Zw2@iq3uzTJWQn~=*btuck< zzE1QV@{rc|p={OD0$dZG%l#PTa?iqdCV+Jqv4%d(*A~=y7w%&$L;^7*J2XFH@Ea;=_F zj`gR==O(P$#?C37&?j|1cl~U-69)HeL1*dii&es+ceYb(2)@e0(5D|m-ams*9l2y$ zCC=kZdELV%dDnNM>;{~Hun)=WhAt20byuFeYUHz0UiUEMb?-%9w?Zf9b@$cEdEK4_ ze8WWL)2Q1pZv7eLDpD>})s{3zcYXlHlxhf`{s8mDwO{}#WS z@cVc8-8{wroqY*%vvJN5`}Nq9bD};M{9V`}X0=lk|_Hxiv~tcK2#AKrHy`3@|- zo?-md0H63Dg);egFY%9fbnp=4{L8!@lze!ZfAHu>K8eEPWx!DCh{`Ky9eOs2?gBan zK8fcI5qKs(cSwBVJUG^|qvBobiez3XRuQSsoc*EdWhfK4x6GT_P`jqq{1z&kjbx8RWsG|gB zrT+a2d%{Y;d=(nfAGP4&{Pzg(*l>1%bIr-n2mL3Cooh~n-t5n`zq&tD*V$i!{Z8`@ zo?nSVTcM@x;IGxrkz&7?-Qf%Qzm%PXyj}oV9d; z&Wryk22@!f>@#4jTmSW^Yq0++jpyw+Kg++JW?UgYoa8HkUn&Z--=VQi^z;pzUih39 z!h^qPhhHGVPT>6-qy6>$M%WVMyc{;dUWm9i(Hg8>Lvl@nO|;7PL@8&I%UMy*2)lGR z^z;5otFRA#%Zfo>H1%zS|6q}M!T8@|zK@T8>ZiT8M7LXBmCPjhyvXI#1NzPg^lH1c zTT8nwO#5~e+Fli{#P|HL-P#m|R*YXN$zJ03PrOdDJ8eW`|yL%|@UbDqG*yc%5XnTj!t~Rd>*E>E6tsy1cXOgTvVcM)Hv{o(chn$vl z+Yp7eLQDGvr=>nRIsNQ$rS79TE$wMeOFG&Yg|??pD!CaxUccQhPs$LffmQoyKXY-k(RI6%EV6WBJiA-Nq<%R(!WX8OzeXS?Hf< z9c*Y{c;y(}VFP>b%Ez8=;t||)BisEpj!k{P0@!4?%YiXy_Yu?oZ)Nv)u$b+>D}ij8 zSS0n^yWxJjHG&MawyaJiM~VL9a39zs&}(g3iQv^znok3uV^ED!7JtDJ5V=#Ec2zfG_X59_NW=Rb2=9ccd+ zg|0z@jnbL4AW` z>W%+qW6Jvj#uV1f7Tq@PA3Vt0*@Sj3jUY4am=e<)B)P%I2)c%sbBx6tgY4 zN|14maGWOab8!qfbs8L3oQ!j^RT>jCo<_!i)1tvC*U32L9EaM)V!-j9g|lsvH23CC zl=z|k{-)||+o08%-CCTnGCvQ7`T27UIK3JiF-OL6a-1IU^HB^q2HZ!h%&*k%U0E_t zJI5jWu`LE1s|LrFA>(v$oH^jR8BLG+<)0i{-VrC4e=ywMRCRge;;Cin*U9CV@$$rfyt@3tq2(Qd zTz+Mk|6m0l<0AFdA?62+5K@3px1=+1d4%n})sE_hMAA?S1 z36HOl`t{?mKK1~c)}j_Uj#w<@Nby4>#Sc-R6*`5yV!BH?xFUz`!+!Y+YekmNDm=I! z=k>NyJdnN(q0mv>F~0$K;-TE295AQ5z)!h~pXb&5d==*BgCD}rxET5Q=}>-hp?@rS zf}3*6Hi3_3);Q}6tUJEM9@Uz3d~cWDU&-I^)VyDT_mSj#T-~1P36gHpyu9LvwC4iK zH$6?&-^(~ZWS@WZ1NgBDW1{!<+M%*lWTWouiE3Wj!@QLJ7w~esnwJkbFFhC|xj%rH z75HX!Oygj~P}$Jhwh?s+Bia{k%w%m$-YbyCN#mpmZSF*yW3@y75xpeYo8#JN zl|k;x9*rdTTR9J;0~=(ydsK3-O!{vYS1`mLS5VrVpw+cUd7H@|%#qvNrE2s4iMmE@ z72?}x?a9!LtF~}k$IET?|6kJ9)oAMwUEapqnumV<>Sk_3#PsMkq$-DeilnG^gMOF_&#II0a>IBvG2qpy@g%>_F~rY&9`Cb+pKw2O3dbgWEC+V1_I(9% zlcf1|Y7{^7mqX`QKHm)GXRDf@2gCea{QnU@ezv#BV1EktLmO5g5AbLSaXu8y^>3c^>`6O3a`6P?3 zyhy?mui*1J+VF7`oY0KptG3eIKQE2P5v@goEsCRM{ z+7=b9m*&yQI`)5EiW46@?&@` zeh$hHSC`l8hTCJg$f*F<8X)=9nxO;H*;1#WYkduIHq^j=kpliX^%BK#q+u)Z44B7dbfP zZjsmfKF28q&Lc747;;qn14sOsj3Z9wb{PEM9|Mk6gVVKA##zL1dZ15LG2m2aa9k^7 zoOd}6&5yPiaOyNTeliLv_&qgp6|^$Dua89|KN>2FJ5l#_@0*YLhPpoH`AT<8c{h#1yGrI^et*15S$u z$FfMqSro>3CI%d@2FJfZ#`zP+p}A>M3^=c45N;q50mg~9gLA%5la9T7tmN_!ctsIBsbD}i*Sodmh{53MpGLA$2^+gOgyEQnT z`(+%@2;MHpdH;U^$8oQWvxW1c13#TH;Ph(vvD_`=d?w?7pSNPb5mR^`FxfEiP8lPs zSZWj5uz!jH!+OzBUGA!uapuc7Xj5wpI29Tk*DM*Qo#T)VTM`3Kod&1;RtaaXS!BFj zzli{U8Wdt#8jS>E6sm5${5`oqZBe876XRWG*ox|r83Smx>I?SZzph0 zmqZ`u6&f6mUB>C+I6AcHa11zg8XU*!#Mr+=yt-Y!Eu;m94E)2 ze(#O}r$U2c87t##<2X&g`7{QcIt`9LTgLf{4X?? zS~NJG1R1BAFoRp)JJty33JvMh1xYr9k{P) z>tW=op11BRdEL~eu`$-Y;=>_FbsDm+`vuQ1|CC*8S0is{7S& z-JLPkefYxEz1n!$m!py#X2c3$@;)ZG+g-78|K+Y23EA?vva z{l849?=D`S4t@Ni80+(?>T8w9)jW(V!tK)?nbf6YTcx(B&N9u25&I(*Hra>*j9DYF z9l)-X?1IrpW7aRXh3p1Br*U2`+Y6k7**Ts6TY|cKQ8(d5svGUzV*Z%xF3B`C#=4D{T#&kPuSi&T z`=`d%?h8~md=cL6IcWER80*fzFzw!A&P|cVY!m8!H^#avFI3&t;kuuXvF>>nrta0| z7kPi@q1}(iSoZ~z`D$|~ue%g=-x*`w|5=%5D|YLvaNUz*tUDH&mt$;O@vRPN9J_eG z(we~})iLz$E$yCk3;I;a8<28R%dvkewlCbAG$a=n_Cdyl+zh~#&Z~nqT0VbO4(%8J zSLEVSy|LZ>qxI{!pLQNwd4$V?^!ImDVzXBtU6`@6#hf)vvKc+7`}G*>K766-zKhpQ zy5)+oZsVmFWRBQkewo)zGOvrV?)(c?_lIGbSH@U(<%Oy{Zunq-7sOciybD$L4ZQA6 z=t1o8>VBEmy%YWY^>wk0G2ex%`%k>?Cbaw080-G%Le)J%j*X!1pT}7D;R{u_ zlh;l0%GEK}Z5;of8{hKWxW)XHya$W^UJzs5`7zW@b7LLuVU^~_G(P_}!N0yss^k1~ zV+HV)xzRF;-+3nCSU3*p=1no+)TwY>J%nT7aj*wvet=UL15S$yN8&^AqvCM?M+ACp z49q)}eziGwq-0xks6QbM$JkkVE{Q zgyY~iwEj7AEsgo}_m9C6?jMDxT!kkW$Jz<3zr=uLRbdHqo;xc{|49UTt!(R5^io@1 z4b%T70=-(c6)IZ9y^vGgN%{d_m1B^;z8L{SD_1oSgyR{$nB#oLaj2iy#embI<_G(H zjuh!^&1!Rirq$-jX@leP@)&R`R5+6E7V>_g_FNQ! zUaPz6a~Bf*gPeX7^ln%LdbRFulkUjMlH!#T?J`bFV>o0xdkm{-ML|XPvRo&4J{pD2 zpgKDs)hX+D9%y@_(7q_$U#6<}bXZmgqRUJ#~|$_AcV|rJ#Q;0=?EQ7;rvRna3phc22JY{R7^Z(F0=>GA zUzF}DRkht8=KZHpXd|{gjgJkI-9r)RwQaA^wtY%?Y~2}wUfXtqDi2?3do8CY+20(2 zUfuS)q`Ny+ZC}Z0n?O4`3a!$1sm{(Y&-qd4wrT79BFt}26xtzmjz}N0K`Bw_wn;Kl zwX=}Zl8nww{sA_iHcWRU3f-TidoxvauH>{N+k;VPm3EqGj{Sewd-wRLs&nys?U{t^ znQ%);0t7S(c*z93pxn}8GYM*wfUU-8t5peT+X+!Ccq1Yvfz~DjsDmgi^pv3ewV9Dn zFQCjh?IA!pw8d^lW&`B-_oZC@+z+~>B}6r{hxdr zuzhRP+ZwNzZ-4PmzTIp4R&;_!bKS;Mc^{c&h@5^VUWO4VB{2=g6 zcKDQm@bPsUPvu={_Q!7Em)YS{2g1kKZ9J7Xw;z6y9e%_>`1rbwr}BIz{BxB5Z9Dv^ zf$%Qo{2a}7terFT%sw-}bI@nsuzxpt@b467b{gOPyuZFO`*-68|IW)E6~=eJHop@d z`Lg}HoWZ}V=WG$ZX9ci{{p=J`}!QkKC_0-tAxzjPqHZ&V!qA`^ZA zcxBq*Ck=!Tr^VqbqKE5B)Zs1SY z;b#nl_YI4~Upv}p?{mQa-46fdf$-tfIQ&mdcxmsS?C?bc;eD)|(nSAnH|a0@`-Tl( z*TxyTj*KN)GiAX3rN6Is+hD7|(Z9EmHTDenGyU+d+Tp)E5MIWf0YBcPrSMSD4u8eK zHC!_G4ESr~@X+RGcKGK9!pqn*;8(@_ingw`!~ferd}Qny@GU011ALye!zYw)jXmLM zubga=|g3k}_-+y`F_oZhq6<$7& zH`Rn+PX8|b0{90K=$U_JhFMOjS)LGQ19?yNm-DADP|jXkIYxUXo8zX0_Us+x`*ZBZ$^Cd3-!C!0U%>aF zLB6-!jUL;#jf;y5bCNGuEPhR+oQ}9Wz~}qRLCQ&_Yni<)=o-FgrqK1HydRrj%I-(gz#Pjl&Kd z*T;_!+hLRY%Wcew!{(Z>yTM_h9aej|uiX3#lh!0 zof|I~8dTcLP436nH!cp_Xu?haw#*Le>c>~lj>Fn=h`wTn<-9u!ht@H1Slgb*SI1#J z+CY3uN5^4bG;wf%Lw+2#ECDv08Hcs)fqX$6wmJc}F(VH9hyHTY?f53!=sPM7J9JVX zKcvKA0}17Jq{U%v`|+Nc8rKDc$6Pl0j*P?F_IN*GhaG04?}#{T#Kc!*(0g`R?ZJNf z;{Y}pk{)=i5xu2#Nau`v$A6i1+}r=%!uWTerz2L|_8CdU@pkKb_2NNSQhSdzSM)Bu zF7V0MuMK?qjoE?7ypq7_?-mEneD8Oya%MGUe3W;iDK94=+sdpm;%89FnRbbCx7sdu zr9IJqNLxP2yQ4o3d&UiXKZ)!>Jw_nk0=Z(oX_VdQtxGT4#zSo`HV)yZ>SEQ}5E9fnKNbOFt-~y+*t9RJuMzyD8_+q(wS- zdB_L#?9%+?zmA+#?VDNdN^SOPId6Lfo-^k;+kH{zrJJ&Qm+msk%vY4LOut!aIqCm* z->b1hZXWd8e99)L)YI-QEYj3sZ=WA>o%^g|i%bF@c$7IHo2T$!DtWi8H4 zTX{Zsa3B7;xJ!Atc|P}f>Z^_G*$X>UHhoSVS?8^z6TX%GHb2zVy20v@_p;yWKhE<$ zKi6BUFB@{+^68&JXfo>jG)XdJ><_?$Qn#GP*KxkO>3jNp(7uyzkHpV++Hs!m1xC)O zdGS2&^M`s1&&OBtt+fX*XFCr{JFD$(@AoU*A^^S?jf-ErP;jqx`6{QdoaskevWM^!s`-^5m^ zw|bXqf!^8pb#!l3&Ot0lF5b~KNd@L{2J>^Ay)5#fcWH3|SbsZnkw;?;F4Goi_|)|4 zyoEXZpO3t#N`T8xB3^iQgv9?3^$g6BD3H%Yyei>zu%_)bqo232#e zBhS?|kuUvq3yyAN6}T;05XVh$5*%H_if5*}yoIcRN%hpKw6f}PR~KD8WYKZ(^jx6b zR5Z=GC_F(`7I3y&dEO9zGjeTiZS|5G*U;uFOs26x?^bhaC8Q`h0Js;J#G{kG;^=Q*iJ8 zp7IagXMT6@6Iq=jXNZ&6bdvE@v^`}})p*Jqt1l|PP*rwOmgkMOo@&YptU2D}&hrM? zBYt1-c=N&FWm|d{&V94z*nJ-iD9guqIZ9a~y929MS9spoV$5Y>a95nsF8Is4>ibt! zRNuRGagh1gZG*4gx@GY(=$0IR)^yqGioI=H7Vo26p@EmNA!WEIBe;Kc#mdQTi)mxI zp5`swH&o^fcRB4Upk0fMIu~7F)G6=<&~e}DEsM(=mZg2}H}IuZN5Se<6~X=eb*wbv zzP)oPo|Tf47q zRbShhfcs_~&i%DD6$QuRWYQXxctO8lLmZKzGh8d+33j? z#qqK&dKt8mviJ9wT}>a&u;AKOSGcXVCzR)*JXxP2G&*+qs*3VA<20J$-La3p-S_^M z#a5qH13MuOJD~l*7QC^#BDks_)&mXX3}~}oY169ya)WUim;bMEX;#@~z^BIHg$4z0 z#AzmYtdc(NFI)5{p5|voS4KW#TE+1X$9`H7<{ZN!rPs_yK6%007>CTg4N9Nx zQ>D|bXANp3_cz+^Te-Pdz(r_+zAyQ8oC&8lU3 z!EYXTWg+ESW$uM$t|aLf*TQ=GMf#yCPRApZE&B;$hpo&(FOYt--jxE=0F7Le(d(1% zT)UTCPFJVspyL*MR7(0qW4(`s(LpID$_{8*f70OA{qB%qP=~`58(fv7N_@BY= z%+Nqim9tR&O?^+Pls2VwelC7)eyRM1@*Bo)IKL5XIZ7|oM^Z*7WsGze=H$B3gH)T+ zv^M2mF)OE(GH>KJmtTv!Q0xy~+jcNtwfDFhyszleXV;9pS>NUjl_$H?%acc5RlaQG z&8fNV2b(uXdR&uLz~gAFaOJ96cd06LJ;3}qHYYvDt3_N7a8EyaG%eke?7k{zq{5f4 zLaKfDENp|P@{Y7$qsHfbO|>td67TDl`8g`V5K0sJE`rylUu*0@HcuTt#?SEZ^6z@t z<#q9l`$c!Ib+x-9WtkVIyCQs-%O288v|87arz)C~HX|3bbY082%006Epu61-zV3+r z^DK|_>!;uN4YoOY(o#pD7d5tI`laM1P;e#A4qg(MTjLqm8 zRuXaF*) zyqA7!Y&|;di~#wr0j0Kuoi5Ia@kaN?V%z_Y4c_V|zQTQxmf0rg89l{Q{m-*szJ~C<$s;E zYWcj#QH2gLW{5MYgI7#qKGCVT`p3(eKX9|9aA^0hh@slJ4Pve_7 z>2w6eUm^6JuhGBVkH5SVx;-~eRqnh?otOaL58tH%N z|JD-aANT&_Ydf9ef*&1luJbvIUw-&G6{v!S<5Xtz5quU$;l;-c_>{) zMFv$Ze{xpU@*nbRaCw>-&j)ayXz z?P{1+TcL$~&n5ePTUE6BW!^uR;`e>P{~r18^7}5=qSaSv{7?1!?&tqv`9BPJ{=Y2$ zM*xrP6Ti|-aO&ZiBhBy2c0{W#boBZbr2DIPm(_ZT)6#v{JEGO)j@}%nTI3tGDcg;# zuDjn6jXBh!^0ZCarRcJwHCI8{ap6_I-L6{S+EE6s$lKnkqoe%3#~i&@`>v4oEy(cu zo}|o`)N_>o&+>nR)S2n`{eu5N{y#Uy?;GxnR;N3AeG|s|eHZe7g0r`3_c&??1a zL9g-9i}z0gyPN-O`Twl5w{G_Zg}$Q~ShyInzwWQj=tC~Gt&z6r+J(ri(FKBYle86k zRrK=y`|N@D%TjL6*T!Dm_kP(h?nm|CYw78FZ5m~|%rYj-3Gn~*(Y><|)lYExEq=N< z1zVf>X0%I%nx2~zU@!je<;;t+T|101ADf->)|u~(25t=SWBD$d@5eEo z$KyLo@qZd0zaslT0k?eYTs>_z^Dn+uXLh7!=$eDgZCiV4hQHa<-1b3F8FO(d`cpo- zNf|m+(Rg)Y7vp=S!yB56p1kVvw#DVz(DGGRd3Rj)#+r(@d$%lJr41|RS;0Q1npV(T zeaM~Xy81>L{{`wqJKxr2s}mY?jg&E$GLGMey&+||y|wqP(PZQH%ZH~Y8`E*zq^4P_1*&o9U~o196D-pAiZ zneI)=`2O&jdMQud)m@@a6k{i^;{Pfwb>Hd<-opE_%i6Tz`+yG)V{X3TV%OE1uz`;= z_Rfr2ot;=-744WTZMz&8Y}g}XZRK6%jcmPt&B5Vo+IqI$|7OqSm%P_wmA!wg+9o(o z=9gHe&}cMe7WCF08hWQnUwP-)tBp2+&mEe&da!mOhbvdpjyBq{hI)TRy(6giSJZpO zlkfGssg2l2-N859YTLb29|z{v;jK|*fwAj37-ewZQsFq2(_!+cBH>O)=-9 zMzhVK_itELaRdCiNgGzvK#pU1@7E9AL0^^k&N+0uah-ceZ11`W>ckOvOTKO8D*Su= zhE)gW-nXUa_@w2)YNWh09rBQTw*YHCrujHlt3PUo1uGD_vW+brW>DI-dq5$X)@ zQU1~Bh^NMxI!4PqOMlv#Qh!b7iWRepk{l=YP){c?X53~>a3bv(`lfT;e~BJ_>Fj{g zT9@uAxB=|#iDH=!zXYSp5AbXgB&4Wnk)t9e67--wt({$PXo+iT&=-OBI@w}Ph zucH6frrR+AOr#`-XF!j5F!q z$Q9mDCjX6gfRBs^d3H5&%Z}IIp&x#hMNxR}uw%4=x13cecnQ2bgJ+Dk{1VtrIcl-^ zpGD`&$4>UaPr1Ne%r~#zdS~FUBh%3BU%{@<;@R5^lwY1n?(*Kd@E>2GM(vZee(-wP zOHCV!H24htelPqg&vIXSc7w%7|4x1CY;4+4TRDn$bWn~fUXF~JQtT@o`$=?(Ptgyg zK8e}1I&Y77n8Q8H;bLQnKC{`aDtnjS!My)1=KtH#18zef_$GS8Hv;GfhCf{BC+(ge ziB=lIOCL*LTl`}5|3|UPAJRs7 z76w1zMYI3M1TUo=;U|kP=6@Kgd|c!YxS#pnzd}b1dOD!16Z$5hS79s7*8F4J%W7*= zM$QnM2t9B{1Y61XrE5ct$!gkz>E7ro^xf6qA#vYN(7`^%hB}2j5Sb?LpH5cWeD{|` zONXfKOV(Z+S`v#*%?3Z;^(E2IoY@7BcP~ApYvY?;*yX|x?#sLZFFYZ(_k6BLkyY<9 z?&^DE>*bsAz&71srk!`ru~sI(kscFksJ@;BO}Wsbu(%Lq=TU5&ssE>MLcgCEQ; ziTXsYI0|Vb=>@Uv47^g=qc*SQChd~vlY!$;>U(b427F zcI3b8M=k^Z5$%s4!**i#?!@kmjZ>l6yQ(k-{qlwP=5QZ#1Y)BUXDzE2_2N{&4zA%; z6+Jl#SvXtyHS7ilKjKdnEAwLOrQC3untBS{K2F!0E16?Lol8fGEqOb(;W+lj{tL1< z!d#faI-PZI4)qTKPZv0LG3R7!sxZeny1RXZ$ly^%-*i1+V%S0T$S=l!VHp0Wt^P3Z zS>K2So?%ROu&-SHQs0ss*SN(BIB0!v??T^9&(i88_!Spow^zzH;s+?Ef5R(E7~AS~ zxi3TBOIs`(tn8xLdhz#|&oY7&Xxp_sle+$RCbnJh5&!vf@U0zh@m0w;#k4aOczc;b zpAVpq_3aAengy$X-9vp={rAAnMjN%vW@+QEc{jfz7O-&M1kM4O52))XW8?z97a8&g zWTMgF`9fZG0XA+X?-$@S6y(BCm6y-IxTr z&%d=FAE99f<2LhbjQTT;F>A;FSOP6iOpL8R=ljoMmBNqmUDqhaBJ?$69&OwRyx{rE zT(0m{SY)EoyS>ohU3j^5#kq4#9^<2T>DL&GH!vpWGB)QhM$4IF$^wj8V~+U|-zNI- zK1S{tcIQY(b7r1;OX7Iz7*E?v)LRb@QE%aF+ja=rG%`MAu52ps26A6ow?SuomLTU^ zkxlc!O=MaL^w^AV?S1O$?2S!T@Ia!UtImO(Vct)C9&qW*i%SsLmCttW$;xl-ax~k&! zzQX^;OQbEc59z&4EA>WjplX_yJ1rERzrUo z-krzzu=gc(8EqF?_%66e8)Y0u$&p$|S-wlI^@z+q%6HYnz0s++{5{J$IP8YJud;FxCuR^*-!Y=;*;mCeQL;YS_>`+EX3;$)&VQ_C&B2vxw)CvrvcG2^G0TP62jntDw_LXB;Ls0W=$TAjz~uLj z_h4V{)29?ij!#;3aMfjPJ;(RI(Q~|6@&bnL!=?!OG(Cv#FW3=@ZPU@C8i|vsmT?B( zAs^P6ywKnN{}XucHs}AjytizhjrV3d-a7r=BxFc3vSbJ{1v$4=#~0}Ei!S;pdaCH7 zr;y8HpW1!;G3K4+#8P5$pZ=*jRoKb+8>>b)zZUoD$6_iF8)5qN!zH{keEMU9pMgU@ zKK)Y@wLm9wM7|OINc7YnGB%3v=~qo)4}lf!vpQ3ep~y9vi(_?KAO`#==t)ObD8J|i z7Z3eB#EcGoetA%kbfWia#S zZ)l&C+1QH=X1)w@-^Ez}GuPN2jk)FASPH(@7cF0Fc(jTNulv-<8TqMdhK4U83~fiD z|9wpROThz}P$#q%U6J{2`Zlg&gN8>T+wh-tQLpEQ{AN$Xya?^RDLhJr3U-y%7Q{wR zFRi^GJ%5+0cIqv20-}cq>~!R!@vZ2wz^dpnH*T5-k#0oOp_zf22ie4GVZQ>xf?Tq7Q=tG7MGpbv3n3tumMnfy;Z?}_=@GjB6 zPTv`GO%>mK^7VgbteAObGA>lE)BpSRXV+IBg#M3vpC%VWw^z*YT~a)-Z|PTw zLrC9#5BfiHIANdthwZRa60|ne}gU> z^l?BZCo%xPl2c=THhn~5lYHsfv+Kpr(FlKyx*@mO$2BKUz0}UwDn*xTxgK8uW4LIs z>E9mfRsQYhj-B+o=}XQG{(`!SYNpN1bTXz>RcHx5nnrk6WJcp~<{ETrv5~)pj2lo7 ze33M^^j`&k-x_ZuCw||8JeK)V{0s7&@v=>DlINnUoAJwQY13@lRze$#>3cJmzcJmY z_ol{l6|!(2X~X@N)3NP;gvSQTU<(-I>DMM;JwlicFGu z-UnSTg)gdk_X%UGfHi#cpz&s64b_q}6Td&gz0~cbZoM`qUA{Z%)CyXVufosGi_WgE zdRqm=2DR+ID}?Uoi;3e)V=UO$j?i{vE+>K52SVMWk&d{69cn%t3Yz?hi57AL;Nkv|Fq{ zt~{2_%(|IuawDc5V~(B!pXFeaRcR|XOklp%WbgC#5&jV6ZgSwugO;t}Q3f9!gOA+M zKL`IuoA%U(7vSR;=&u@h{sm}k>VBiTtLU$0;ZOQw6aC|5>}~kwGP^YE!+0&Md{^>XN7&_GdKlcB(js2(k?2!M1$Ns7ZK6mUtW!L4u z$k<;Z<$eCxA5i8$8~fjW;ERv_x&QgUXYBtg<^F$h?9040m`)^qM*X5<(t91vAOSh8Q{I&_6-ex}}BL{6phSDx-*p0NCg4q^$H|IWGYByf))2SvAUWzO!% z(wn=Gx$We6?u+Mm=HhEOOrGaq^rn3ayoCk$8s1*6{GJA}weU%!H(O;iBC|(#Ej=Xq z;i={LpwTTl9HX0Eiw@6hB2QCkcSW{PZz=l7m$FRUGlPEw=Ak(5>$0+%*Mq;r_`df$XU0Iej$Msz&U4YtpE7iFrJ_~K z+h?izdU7~-ac$tbo9psfc0J63&j()Bx6jh*cg=Ft@18XQxIDhe<$I}9em7BXY)|rA zT@y!=@0x}lnvOm?3cWM~{WKGuJBxT?GIqD28}-S@))iwT({jCmlh3O_27c04D~dyn zmv{pnO(&RCERpYhPTo(|7oOQ5?u)K1Cl5C@60C8bWC>@IV8kA4S#^((fIC?JERWy+OTUS zmn2Lb`c1{5CgL@cn=Q7O16@LFG2#?ciD6cpWUj&Q)JsBu|4YOz!j}>wV9kl-3yVIN zecOacc%nCiJ)U-SB7Ow$_)}k;)e(=giZ9Bv(`WyObKOzkPk^&8n>J9!yWr9FM0?>e z{EVV66`?QPL*Du^p))b0>xl7M^p#kO*styAXCC-(Kjm~n#~hW_?WK<{;#up8+r*9= zI4^*2@6uc0n_J+YO8Dqz_^AS$_9pT&Z@dpaGh;Q!d)L1Sj_0@UOn+-ui#OoXvR*IJ zo4WU+Z$3mD-gab=e_Hai%bD3+(EG%pmE;8+`R69L#)G$+5}`j)pFe-l_t%#I!T0Q}{bk{)(OV(0@2H274~!(NDS0Kp!LK z4{zxAu{L`b>nCZyOV)4~4*}{F)oTivAv8K2ghD;{>0gk>*-d z?4lFm8&c@U@?79@8oX1)#*_bDhc{85t489$x@xawjrx=-;~oE#lcu!uA3Ng2QCk_E zL)8H_>q3*yp4#fa|A6+n%JH2S?lbeb$|YZ#`EaVd@A{dn-!tE@ zbQ$?x#`}EUOI!9ZCIsKW(lr%(S(_<1E9O%9cJHoDecyU65IRyW^Qckp;eY9S=O!*L z>jjU2bNPKaeiv{uMoYC#8_Jk7r5?i;6uJQ8Y0B}hyzI4d{Apurhi zTQhR{>G4h5|H7N3g^U}~M>Zo{8ptt~+-jG0!Bv6XtU*fFZVqFAcH+Z1%K8_{ zfx7Jtnw#VXVK#JlR6|(2BeZFENdxx5f9}`5baV)u$74eqD5( zqdKyjx?E|+h0HnAT*z^GUOZY=emh3&9{K7eM{(B^-nHV}l5x44e*8UikC9V|3@7hk zos^+n=8aBBDv8u*n(saY2LC}tokJBG`Kw&(Xk+V&JI;}RQeW@Va%flvElZJqUnS;m z`tlmTQXIlpKcx!3ka{wy=hY!4k?^J01;W470;j;szztmfv`2h~y9#BjbxoQbxag(z zMx6gT;{2B&1AdqEC8G_yz)SKTB)=(zG4zM+(2?)-w8->M@)>?WKUnp)FovXF2ldXT zeHT&A_v7U>ke|W&q=WAf^OACQQ_iD&`|OvrPzQB1enks4{`I$orOwEVP0N!CH~lrK z5Whf%ou11nV=1(<(@<^1~z9l%XWv(Ib zW7D(v;T*~Sa2+{i6Y*(+OZaJ3yUCGUxM_nU)aafaYGrIZySzB+LRT8S%(c@@Q*x?Vsx zvE^Y%n@?m=7VmwEeqb(*2>it2!VX0a=tSn@ON$G?@mg%Xllc=Cx#X05lK^5a*{&&2EpS~JfFYB(zrLV0*t}xGR z^WonKGk06Gm%hl&^PXOgj8BGsKcGG%Z)}jhfUfvNWo=4-yel&A*T@15nZR1ZrTDPn z57DKkA=5JWRieL0ozk~m=&RymbxHr;;Azg#Zi{ZBuQuJz_}f=(^7>I=zW}d)D^8b9 z`?Sy)ctqX98VR0>T}0d{zGl_Qy3{0o$^3@!v#nvhvHlvpKH0CgDl1Knp%ITCJcrby}Or z&u*TXuy$!3aF4#15_s(T{gS_-ptgDoq@-TRR#@v$+eNIVUTb{%!$JWfnHY*_Z8fHwEl7sXa_Tm}f6u$jc z^q=(o*jyFLdE6Pukv*^;SAp?5b~&`r=b)!($akqv^p-g)Iv;yZecc-!_uJTd{btc) zh@suAqVrPJqFu<8cZR5&j$JZ4Fb}(B1@c>R<-)%x2^8F<-a?NgCvxS6d8w@J0oFlG zC$G-S+R!mU){U)0|J|0&vrR+Plw#gz=)`m4Wi6oZZ@oo%DsRv{x2LI4=}pXE|}>gK=|)_HQD8ycIusW9s;3b@GWrv!I{wqUigL%q7)MedBa+e1_{( z@ca^ZP6f}awXygHB>pb^Ha576?>;{02%Nl7MQ74xY5!p3NA_2{V7J&-=Z-CF@3lh8 zx~8jl{or!hT)!)0O8Pmx#^jlk^uO?o@cJqAE#Vm>uiD_5tUjJ;jq}WF@Js|a;hESj z(}$Loz%ygunNArqEljk(}v!q=gMd4v%#L9s|WP&8>fB>zVLyyro%ozfB$T3w94en za_A##h?Qaw1jSwmBbj%=oBicN%rkv^%{9Wa`cxGXyIA&`%gId+~(*ex<7xu8AAW}=YUDPS>#9_Yf0xY?oRVRR-_6` zW$gfa7rl!g=r+b>HaV*LJmx*t!{<(PZOj+l279eZi%g#m|8S4Zl0D~Wk=P((g8#yr z+i($kE}`d3F0BEZ9GT(6uDm@~bx0p7aWCyE=Gu_j zj-7v)YjQH{HyMXVSqmn)VSj_B??Ka_o{g;^Y^+)H*ys7GWlk4*_OM1*XsHti6IzO% zl?kuu{IZ}Kc4k34x}@(e<{9)ep>4|q*G9dMo|$oajtZ8=>8Z1B=nv8t&|a@q>F6bM z^kr3tMD|G8N|XHv2gctmzxZ_eXf$<*9Z^M{pOD9vK|h%CC?ohg@D-d4e967)LVi9I z$FF6==#8I<&g{sXBk=h44qgOKAM#9SF<87g=bKt6r=9ua0KZ)Rhq-#W{+zkxN-dPZ z82SV4mbsO=d}lUN)s`Ddyp7Kl1yzCjV>8$2yKz21c?usKycjHT>eb~?yh91{d+*@@&x*cc7v&JH1Jd4VDvxZp;g0`m2sQ{KuTGf)eIM_&+M;mQ$|CqGRhfWlNh%;R&1#Gn~8|+Ag+~^8Xc@$~9AS zy)N=v-X-&{5qhu|z4>k4+x_WQ9yYpfNNZF9yKKq9cbj_~`YrocTJpu($1>+kN{;dM1`VtlW59$)Sw>()KnE-tx-m>w# zXN|V~yWp{yvUfZEVmH%{%CW$Ff-J0GGdI!*&8+96crNl)^2P*igxte(Qcu;bf(JMYPg?KVrQHjZ{{aKn zf%<^>o+U5K>Wl3H8}Ezuk7L_^27g?}FSeq8f5qa*t6tfOk364!j+iGm_#XGZa%3X; zI8Ip`mBc(bgn2S0_zF4N;t$|k!?ug<8Mr<~|7LMW=078c2>#g(Ezq^jS(hPse;v#- zt~<~#7>91;ox0e)QF2SMxzqDB-Ixa(lTDo>+#8!F^IvOkY=%xwxU6rHv5-%$s9sF& zZHX6q$7Sq|Xz51aX5g48H$?xjYe#O$TA3?o@B8t!GWBWJTA3SIEA#AzkB|xI+QwQL>)Q{V zBZJ2G^s}s$5xyg)=l?U~O!64pMP5Kh`+RAS+c!?9FOMjH<8&2m3>F7i3++b_SSxYY zNjxv-T1SjE=_lUPn#j(pn1k6PW-s)Ub?^F=CiKkZ6*KZWh!>0wN-kN1G0`;PdJl7L z&2H9}H)2z$+dL8DTiVcg6>}f!6v#=8>f@2&)F=7_{+MPLbv6yBAJ8kB8L!pgFLa#4 zy7q<48?3nry^ddNvXSRkR(@|h&+qFuUDo^o?}Zkf%r`>E!^mTqdxUOn&`t6Iw=%ZB zR(*fOqT3PVw|p!0e?2X=xtw2&_O?UM=8}@aWAJz#`gEDrv_W9dErK~0tAKl%7RXuV z2)K5!C)%*_>8@Sy==CLix;HvQ!Cr7L;JxH18anx?=Irv?3)4%<@yi$Z@{&RerkuR} zQkVJM$9Fny)`>q@@2zj7KhM{fo5)Apx~@w=UDsF1dqtN z2|JH;;F}d5>9`QREY2h7XU(m&i9A|kZD7lC)z;#uzs86I&<_#^X#7Qa09n*sPakID zvui~DPoVFfWj+{9{5OivE*~F+i~WWI_)EKp|B7Ci-_W?Bi@k^3>@yT|xVp2atnByK z5$hFyUCzI={+)3pydZh%V@Hf_&K{B7Jk9XeNi0{^@Q)6*0{7$ZxC0w*SJ6ipe-aON z^UTM1uVdZ~5V!CUw~!cC3(t^ogUrR}+7(t_Z$16%i1)9oDY%_}720j3Pcx`z%-^wt z_GsjK_wO+#Hj?^_*NA~x>oKf#7z!V7m@-68n)Wlcv(B2IcGeG}Gt5UHpntb_il25% zj^BYSEg{}REW^o=IlPe#Uwbr6TpsPo{%(sSYXDo?q?~K2ve+E34 zI^b3RpBFOkrg+;rQ?$0$yVYB)|Mcfk_D%3jBXpB^Pa|}b-0z*_s@FrmM(CGVPmA?F zp`NP0$`~^Dd$QUu>r_vF*NOl5+_kD7+x&nL7S^&>|Q9Naxnd}`WylThu#zAn@ng`zc*{q;&?oUiSAPu1 z9^dh~)U;cvSM&&78#1Q}Kfp0(azVMK1t+Z10^Lhg`n})rMu#zH4V}`qu~}20rhp2# z*bjXnI)&K1li)8OGHOnBb;QRS$}Bi=Fy?xm;6M^30;d~68s#lF6b zI)oNGky)ng?S9>QcM0#rM@bRdyLJ0FgDAvi$#Vao1!tkO_X0kr_c3Y z5tB;X&qUVglZS7t)xSu5@W>2%8J6#B`{l;GvFM&`Q{ISOB6&l1un&pMrFNOq&?j?P zgN4lb>8aTI=M8ydt~-$!e-h9C4!+W#Df|-a93eD!3cdqljh5anc1AvWnAjF}d*gX@ zfYOx64B>0brl1dq`{O^e*$uJ}Y72f&@$a;nV;hkkCNtxGBmiS*ABi0@X{eEu?ud3UN@m-z! zd~lg5XO^g-*oOLlfg`?Kkt1Rw9$~$&E5Bcs?8bg*f_^KZ=lOhT%$-EJQaSk`xBeuG^mc#K=LD9 z3G2!7h&c^L8*{#K&Z$@0RR%t@!H0Q9_}+DKaAPj>%rVvt7G%nrul8$li|c3ovpHw5 z`g2+1@hr6a@5=Iv11ON3ai$h`b3-O0Y zo80(9N;Pa3vZ>`kTckQU*S)G)_v*SE#9(HHnGuFD9&sKk{?^zf5WG3A0 zea`~cv(?}1dsb~d8}^O9XCCX>>RbAr;ZvxbGpw@jelPd3E|s-y75Zk!Tk*VxL+saF zG3S=;i-#^t9hchf4T=8qb;kQK=ZIjVHhfM;a%t*_Wo;X;sQ%uL4{1}3HZ>w^hXYfm z4Xb4P_^btbybP3WbVOkJin(nL~dlN>z@ASpO*`u|Hb5+YWOCL`d_7PYaYymZysh$ z$UNibxyS^K`9kIcnO}A=ryKK&!ABW2C$E0ycKiJEV#50__~%vTDibElDC3t2FfM^{ z4}|$y0*vs@tHTGvJd*%Z2H(7zJ`iSA0!%eL@M_jTm=y^ydxrv(JrL%x1eh-TK(Agn z5N1&Vj0+fI-DyjhyT?06&pA2dj#N9XzTFSM7(Z&|ZP1)G=;`C&>v1OSE6TW*^89+P zKCahtZRA?QwUw)vtHj(-5-;6E9>j_hu}X=p|JreGeU!CM`dj*sP219t1&ceO2f9m5 z82-Q2nK_63#-`j2Ux@Enc=cI$LF8)tgmD{1kFn!Ftsj46N=8j9bN0i;X2Z^D1=Y+; z=*l(oDNE)#83TtYdq4M^$?1@Cgr7u56yBG4PF?(O$X!cDWz^{CHaX7Bng;UhZ)Hq& zfV;@KFjw(c=!}<>=(D1?uzGEn#a~+WtmXU77~uO=3BF$+Hkw{* ze#@L%>0^#{(dVY$w_lgCa=*lH>y7(j)e+XiO26IkQSW+P=27Ci_}ZCAR6#v_V)kLC5#wzj zrfl``3)mSFLy~oyU)29>(R>=XWP!_YaM=t`o4Em_g3nT>p_`&>6QipTJY{{C%$w{T zm7SqwOu7#KA-}=EITxJmarC>~5e!3D5sH~SZ{oHI* zE@yAZeqf|vdIp~p?RNYGbn$IO(sdsN#&Us<(lI= z-&XFqGHHwdWX7lXJmJ5BmM5xS!S`8U=V?n0i+wlP+ETmEL--^a zTw2k2u-6MRkt3s#l`-}J#2zrmbl)78S<}SaF$J1Ur%%KV5#DS!@^`FxFtetbKFtTl zFH{-R&L z?IK6{FmxgwR%zjl|E+Sp?Hj3kw2c<#e2@`L2WOcNIc%;`P%df_FLe&{<+83Cqw&ldh9Vu@%;_v zHsRLv_{old!?Ptk{v)iKB$6|}V)dO8t zcnco{N0+85sh?jZrLt<+lDE~pOX=@S>O!S$FEosTe=YB}s0>0aUf3Ef8J7<=j ziA|L2xDh5ALNC6@IEa^GC}rko?5nmtw1nZqP6HzSxtdjx0Q(vy&N z_B=R|Y4$w01%flr2ICF!1B;B2_=oU__%S--d?7h!avyu1^(r|k{o!~&GsanOz#ePP zQ3c^t@t?bly|r`cFXoPIExaGBuRbNeQpTqbeP}PdCi9v2L#~7d^6g;Xgz+D*EGcH+ z&a9dvjFpwYz^9qXGvZ{;w-$%y;&+febsW5#o2L8=iE*#IZQ+jQ+L|5n$g{2T-~5;l z{$?$Nzbx5Yn^f*nH@V9e&YU+*ExLAW`C}5N&O_JqIb3rTcKcv>z>Cb~S$+PRVM0^< zHTo$t*8`h&z{%N(?0N{Z!HS zc<;l;vd)RJ_R6yF!Fc9-#2javYYioCQkLosttX$Zbc8olihgpMzHIrKtWA)3_qd?< zli2zye1Z}iMXnla6NzOQx@!P9&o$V3?AbLQ|Nc>j}fGz z{b@Gq;QRN2lzqeTufM3?FYc8+_%grg4ahki=u#Db;tZR+>G#uzl3u?3D?jV`^P6i9 z{`t+VT;JsPL66veVv9QDBF$R!1t?WfEcvLaY1OKFnpK8c_UtEn8dy8y^Y_0!2Scs8Dd2lL3n4tJRO-=f($8UAA;W* zYt4QIJ|bHN+xzttbCK{}C3B0MMIbha?kz@7%Bb1JT=X!qzlGcvFZoU1K+X}DEOcG# z-Qm_<%`#`zFi+G0n+xyFq`zcN5x7ocJ{q`psx@9^ZtIg(_~t58nWKdh{&~>y-iqQ-4f9)$%x&;e9lD}RPi^)v$Cc-CCIGbQ#-BS+a(94VsL9!b zB~Q;`y~$+GmtTFIcgF|LNx^b$c=JwtW#vu2wl|v-x~Ed&XJM8~Hz}STo>v z6X&;{%Nd6U;l)0=ZqRR3uu%35VV-RLBsNv($NP#f^fUE>!SNb62b%h=ef}#|*8Ddr z_z3fL|NO_kvGkWU|J?yT=gfbD#B(iMJ@9dSp2H@wct_-mSw_Df%!*?^13zUj|M{5z z!f_wNtIUBN$ab*@#4hclhp|3oRPZEw0E?c9t~AYvS;K=m{-h3B=SNu`$am#Z>2_Pv zs&6@SYLzkHWCs5S`bP_ODr}CyU`2mUjAdE2f}NJ4OOVrO=o0sIqfet3>hXBj1mvMz zPaI$mutr;KV4d6)jXmNz#J4y$kGy)Z2?oZU3_ZUy*S{IxYvMbxt>rjG_>Ci+wO3nCMpccF_Y>aM=F~ z{C3;t@LRs{o7==){Dgz?k)7v6=dFk5baHvar(@G3=YFs@TDcrDZp6NqSPp)JMRz0X zZ=ucrI*UGkqa}jS`0t z8si(7l1sUzhMq4mDbo)!it?o2B(_=2c@xP7&heJHyfe%A`Fsvn7RK>g?a}s+j zt=%Pa(F&PcMjNv2*az^ov$!yyypB%ht4?CWohilS397bT@j0jtnb^r3Wb$QZjpRjy zC!sfDLkXXTolV)Jv{9)-qduPgPqEdrf^S5HukV;yieFdSB(~}K<`2DNW;6Nk7Tt&YgU88*T&3yFtE;PL?nJNp z`PdsC>lFPE+m1C--OKqe=jJ-G<<7@*N5~0*f2K;GyOyv|FaLe7l6y>>%QVg1jO@8A z?}d$9ncof*J{E;Kwvp(PVv}{k>oV`;ki zz6}TPQui~@DNkdGH{x1WwN_u|U7Jh(-(cgwI?G}N<7pGQ)@vA#oMZ8rb&iEc8<|R6 z*=^BU&an`Bw|o+-lvtUZVG-nNm&r#`HILqiPSlnJ-DJKU;+LGX1w%NGAy=Vy_RDAD z>rvNJ?EQruM!bLRoCe3f#DS3~$+eaIKtuDEOQchl%5} zHI7d!x>2IvOz^SF{*=As{>YgGN1n%yUSYOnEB14zOU*cfED~RV;3;vdjo8d5zeQY% zz1ytk;%k$4>%^YEv^a2>vtsi&$1X@tWb;>79ejcFN18c*)7Z;ya^X<9+(bC$a52S-UJ}sSV~!ld^>-4X0w2ZgLa{qh)H^xwMpaTlC8#MWhi^vaOT`>CwtnMvLP&Mw)*x98yCq# zXDH!Uq#TCKXvJ?-%}?sd&#kW*HEmP&o8TgTs9zALkhu$9ls(2n`eZNhZg)+UR`ysG z{37^bYu0o_8Q~1ADd?d(=M@c@|@SmGc&ibLA#-oy6~IeqZJ{ zo8KIMH}m@zzq|R>@Ow~&$gS}+pH+;-PGFCW^w?6KuOKH7-$N05K9^}f-r&}Ll6{Q% zx)uD&k;|#b+RMx{nrPGTns)RTYh9S|^8LvEcI|d!&atltv%V3zEZ>QqEpv{{jm{!_Z z!*3OK-0Bk9+hb^ruB~Q`o;S3zLiCuS#N<|P@W}lHZ{a+2gky{&sXq*Fl#+h{A1yNa zFLP7&UG!fzG($dz+|a2DdQFB_-QaScwjui`?CHK5n%z$ueg(}A7<*)2DSKq2-=x*F z)Aq}t`y%GB7Ur;Fj$x@U0yB)g$o(_VK?4-faqMp5`nS+Ry!c+^_kH*K-)!tYlB}7}o&%+c!hEI_l_# zZu5B71>LHNp%n4V1K(c_-F&Qx90BbDlw;M?i7!KR*-h~FCdqk+rXA$qi~mddR@T2R z=l#&z&#rG>@vTS~u}oZ()i#(+YlyF9!Zv>~zLx1JV=WV9*w-?3+{RiaVt?oy(R#jV9BeI<^&bBY zHb>w3aH&J+)_UjJ0ef0lbBmlybG|caPC1iUzhK!;=H8`tIT*fF8T$m~u;+_n-PU8! zUua!OKj!iCzIt|jHo9Y}vv5aiF5~ECRr~4xR7M=ZT+h>gE)(kjD%Q}>BNLVXUD|Ki zh_XIF_V8nUz(Ljr9NhVutc8x#BB8C;*;Hx)zDaNq*(-H_!Z}k??$JHU-~0-88T-=Z zN-PB2T*yOR>)IhXQ4Q!&cG^`jk3R~pqF-(svn{rM7G;Jx1IpO{4!)hqy<*JCbBS@> zzcp^pGS;yHLTXHX#WdbY+`?GPR?l8A^$vdQPgc+VWRvT!5gVm!2zy?r_8wP@7AV!~ z50+}}J$uQmceQZNRSSEXt*Wn>NIB9TIbSqNdyM(v;z#btWqz>QCVn?r`}UU>vyGi5 zeI(C(k>E3*_Wd^8yWUM%GVTU@@1+e6-d7Ew?W}#Rr;G_tae~_xE$=liY)YF82%ALqqPt zrQ;*MVbLlsE+VA(NXp&}t z+%$<{k{EvP&p9(NBBp(Q-{+5coipd`v-jF-ueJ8tYpuO@^nCpXHf%ci%Ha^ue_*uq z|Ds^@Z2pf1>k4cOC)?tkSOWCVD7f9S0-n^EV;9>}jxDMjel5>3qUl5RIq`t0KXYd& zE=2R=Xy;kS*T7oiPNMGP$j5G%;&QAeWMjcKUh;VMfA1H{Bk~NsoL0^Vd<5O${aWXY zz)6J7&*5C68O-|?z@)s70qiaOhVnj6BJblNY~6>xO#W5!J}$z3_6oEiTAC8ehdJd{ z<7l01Wt>l+m=E*6o^oiZ0lkh~C9Hubb^MKI-L9fvydOJrP%vmOn$8NXqW@~^9N;-b zuE^V6?B7o4&+h2p?lXC3kU#QS>Jg73%l`ZMBR{(Br2LVRP4WDZ<+RCsQeYVl4l{{U zh|f_K@T&<5Czq;%>MzW?A*mP!MIeW{sC#M3Ll&DL{2N<8r_oyG& z{Rgzp8FqJ(mvg<7lQR=raRy_G?(tHksK2o044v&v{rU zqU)AnN9;sqdDA>i{gm+~o5W*#`@49TiA}*m=80}}rcFO%xp6{flg9CXs$CE5_R+4_ zZr5YCJ2Jkx&}AHb&TMY^qiIA}BUk^J%vdt7ga5GOt4rx`gZ@{u7JZ6bK+5qD1pb>C zv!}=j@!Z)_-U?X8n(>KuPLi$(7m^D&fV?%BXt=*!^cRMl; zHhx~#*N{mYDANF~sI2mitl|t{AKzmi@h)0=UekEmDKy9Tw}FdPaC0HJd7E(`q&?9T zeLvb`UiHz=^alHk-Ue)&DGA>?XY}$teo)~@<+bM1`Ah$!yukWIyzuI^1V3dq&V=4*!Q~@dq|7 zj)xZCg@)tb#c2`$yhDrhc{EzwdhP!#EjB`j&|G>WbO>FB8^QMoIt*u@;0!HO`x{S= z$&S-v4z@|rq2`=M@S=68=x_?a%epGUsT(plud#73vMmpZ%^e!gTw&OQGFif-vH%g%To+aFXgnauKzvr7wq3V zR{H&eTT4$Yd}%QZTVXWsM9$YE3vIeYW{|^Y zYuWFNJ=hCuS-YFFHQSe2Th%W1!wHh{%GujTTUE3b`2K;d0ouybd_r3)$NsskZ!(w2 zW&)3g_tMr*+S(XvD?XR5%(X-APUunPLx2z{-RY@a25 zvij@5f#NDQ@q90SCgnd9KdlC5vh}=crff*P*t4~bGOeajU6;U5;v-}`KGM>)vp5S) zay=DZvTZ*XhwN{}OZrZ7UG&mECezMYCVr9~B$4<3aLuQak0@pg8D0VXdYH>gu_0oQ zdvH^|?||mmtJsh5M>Dl)=M75^+&?$L)Mz9J?k{lJ(> zKG~{PGA82t2!2iUQlETh*{Ro7_cC8KFou$e4UD1xtYCn5c@4;k2Fmk%DS3Lr=v?es z=Zb9$T_{}mJu_68WS~Evy1%J=ZT8i&B}dRRA7b29$3JsUqI9Q*3FuDbr4xT%ca8G7 z8PVPF=gWO}z4VA{Qj^A5dg@j3m&D=^HRekwD;ZTsd%}AwJgWZ@c+`uYC7vwp4IZnD z>sr0F4|+1twd&Bdpn+ENa|5l(Ndw})xSrJ}Ju8E=u+YE2q0J8!=;f7p!2te^2Upd5 z54=UJ)A`Vk=wsszR~}eBH$^^+rj5B*9@v=5vjtZks9v3NK)$_I&=1#A;bT0q)65v( zbpHGxdqz^LgV2KZLsG9jk3*jmo;Jtj%|cIv9=)}UeVcjZ+UoRrdoF*Ax%_9?`$yKH zIlYekkfImSU?W$3n`<<++{-=ZFh|7;-PG%a7d+r50A8do%#?frFD02NO-t&H=C_eg zd%??I@DkTQC6{Ci(VmxH`o2hKh1vLVn`y^x24da*D}JnjEU+o@K5K#tMYTK;@sXcot5D!y4XOT zqyT#={fP7LczF3ab9~ctH(YgK{M>QQeeP8Uo=Y9q^xOiTt#+RMi7|M`9A}@)xKB3c zOn1}IFB1=^!NW_}_Am!AZ`rn>WLJ9L*&hwgNpq!p)d#~>vi6%uvIf(^W4@q<@b-x@h9s>&OD!~d1P}0p3)huA-*fbZm%<1+uhFM1z2CRxM%%@zM*1lnC&T0G#R(bbq$=`-eBV>OU!?R@!-Y1OZK4i3Ae}2OIgt1*1fB(M{-WMmd z|8DI2%M;!wjQzs+`>PV(C(y#acVgdPoA5q?HWtR;e?8&-%7pgcj(z{Fg!c)wvoQYt zI|=U-XldVDvF~qBc%ML93*+zqCE@+Lg!bQzeSh!pd(qw^p3DAz>jd;Ym%UE3dDb!> z8sb?G`hA{kFQO&%qJNr}m?P7nCFRf8SuADuNH2Hhz1y{BMJ8y@)A_^O$4#==tD}{T zAS2jA-;eKVx$Y%Pq{}a)?znz^CBDWT$m><~0X;o^6|`6iol93Y@V{L-Lcd;3x$n^L zz3|R+=9U)e+B@;d#`Wz#rlvK;^=)H>zFltX+a;&y+ka%ut8e1^b}+7QbIw&SayQXF z&iV4oyX-Y>W=sda5_!ECxm}8{5kKQXck^q@KApUg^Oc*Jk9_p6iT+gsx6WCvrhgXw z6JFFm;vvGTkfW`!^@H%Rn*O!Yzn$hoEvx9?MtrCDGY|dA^ljLYI=-oyZ#W~lTK!97 z4$ncZWV4=FL;pBid2QN+jrQ5fZ?ga72h80&IFni5#LiZ({>=WN^PITr)IMpBZwuKu zdhi`!d&5q6z{eiLKw5K?VWeG2OklMqrK#M^ep&u5m1mxg2I{{~`~~MVr3VX6dA4+v zXE!*{48xe9^28$j+}h@EHf|^%(0}VnVj~Y5Yr28cS`hqUC>8%jQC4#f`*rl*03UX_ z3(ck3UrRc<+=|>&%RP|1GBq5^W@F1@&!*043lGjP3v^yv z0Q~nVo?_4)BJLuZ$=;0J#lGeccIaMWtRk~nKk%*eR2M(_gMM-Xd+GXxbT(Pm=Zi=e44>dt+04*pfE;|OED z5?JCkX~l{VTU#*rMMJremqi}sf2|cFPBPj~e6P2`C@w zCpVeI4!GcJH~gIhpAY9J^`pmI#~;}*&G1poK(e{o4Le0@PMJk{;+ z>>HZR{uAxry5%CH`6sirR$|{dWmi5^wnyhwyucp79L{vrvlYmaBed}>&xi+~tMQ7* zK1Gg$*CWK5qW2#F9`+^S$Irq(mDSH$ba@GM<>%+~6cwb@2WwWHnKs)gUm^0$ZJ=NBKlTG#N-p?Yz&4kYhu62dM?psW6PU=Kxf(?d<42uj$`@u zbMcwnKz*XS@{f#y?}1C&k6wcBt(U%e$k(raS^RX?Q6u}_6|1awjnGyfW6;Z)6HPO) zTg|>A@UZFgH7!~zC7z+3 z`L&U{O3;JQJC-#K2l`= zKcYp!DEQpeDZG7EHeK-Cj@_UE{93?b0f*#Z1pH=!U+srbzpngXaQ8v(g`YSak&nmb z_wXz+&U+DMy^j@z3b7ISXEn5tSG`I3Z~MscZ_f=m7ITBq{0!w3Px9A0kmtaZt+UrJ zM2=IokT}don4_7Gu~QDmh4+E4MbuZdanp-=&+a_dx8rxm4!rr+F&}Uf?|mSJdF-tv z_rJLF{2j+Ojr;AfxBmFnv3NXaKl4~&k#Tf7xF1=sbfCm`Jh?iL+k2q&Dcafi)G_aC ze>`^J**A|R#)3L~X@+6*8*Y2QxA;=^IoF&&C;HZV;m3*B<0}oy#^}oPwO`8lWmf-{ z;Hd)JSdp1>(2KoiVW>Eqlp&pqITU<9e}S>N!j<7O(8&bn`(J#<-j8YJz_S;!m(29| z)LsNUf=g&PSl|t^zcRxJ+d9s+q-HC{h8Z` zTV`7W&l~Jh%xuJOdg~-XCb}Md%GBE@NZtGwt{`c(fnrphK-|ypmW3JlMtnrx1H^{m<`Y(Rn zKc=&w@EVgiXJoH%xLE-dwJiCMc<@}!Ez7-m2nArN)TCP^| z)|N8++R~EE)kuCAyT0~*e)gQ3D32oKw(cd)&`b(O$NZ!?RAsn)#pYOF(&mLBZ!lQk zgNM$e?KgjB1n^xUM=kOzcse#WnVS=3g5eK_MgODcU`itX(>Xg)Ihi%5TMr{km*zGx zpX0wyRvg!hiZ#6H`cJpO$NB1q0S_eZzlrA)(wK)@(4o?;re1OmS%DyO!qZ;@{^51x zgy-NBEs6uGzo>z;Hk_R7 zsssEj+e95-aZmHb>v#sbQ@ig|zMJ)jbX39f9$srr= zsm7Y5sYPqI0b62OefyOd+?~KlEGv6)ptZ{K`PpmCH9>IDJEJ&6KBs=~tcQ@}UgWs< zV6S4B7CLc-YE$(1@omh#GuY!xe4O{fhL&8)x+p8YjMM8rVsq+eMh1v|JijddFI4} z(p@w!&S74R*PBPZj87BebP(HXsE_&k0D7JDr}gx~Bl<(8mxJd~+In$A!WdeNA-Wdh zJ!!FNW-RVTp1us-y?kT_I@hF**^j$72m1%-yu$j+uIu071;+Xr<^b@#ER(U$dEYq!eHNF>*PIEK}TJ`@&#BGI|lREIbC?$kQpCxY4mc z)ip2{{Ydj<`Kz~n72Ej}n%I$v{U z1^27W6RsS0xEFg?z{IAq*>uk4e%O@X?)lPsPqP=j%Z|@}o^wXNlv6o&HJ&j1QuA$ z)6*p%&E~g}&4=CNBp)Z7OqqY^4@N?I$`n=2^AHH+yMIa`jMNWl=eAOpUHA7~sm*)Y%g!Fe=tAbaIqqqT=K`W z{|t0MKXY4UaVoMA+$?j4vpK66oqIX^dhNB9Y-Qr>N8x45V?V1Yx$K^0$Ot=^oZ`Ff zHCVFNty8PzLnfz8bSi!R4YAgDT`;%#u1C!1?bB16l@qw;3DdTtDSr~Vee*`Puenh9 ze=W+M?t5h98WVUvWId^JQ#CL6j0f%-dvWttx1o62=xxx2@WGgeXMm4*f3yemV!qLO zvdo>?AEd7ZtidI3C#PpMeVMa|-p6L3ck0U=a+FS`tYX^=-5LGD!(qmuhrCMCVSd0E zpc~Cqz8m`Tg7oA9^kl*D7vNA`t>oVku76A(i5|W^Y=1koqmFOOu~X@MOFuAG97`#v zfLYl3Tnvv+!+t|fm-t%4M;n&Ucp>sL&QC|r_<(WJzM_hY7luV^0naqe|Iifr zHF+P9-PP>%&Qe_}|7B!8`V8?r)B3fJXa}Cj#+FqI@0HSi3HE>O)s{ZgfIbvrt;5+U2YaWR zA;GIQE!uQJTYurc(CzAXayFn^T*lA8DPi*sN=j|Eq|cK^@5EQeeRk7Cpc6&H-#82j6z39Q+gZ zaLywaZAS~4XU=1uA!Z95B;}xDWdn2XZ&||GZ>l>$-2r?3;p<;aow5tCKlaONp zWq@+f`wZ6WkFgff`pE_SkIqy+_zYi2GW&+vd*V4^;(0&g=X*ua`y=Az}*N$y-ax2df`wi}LhfeI{97XIyR%%SXXzv+tOTQD2jT;V= ztE+B77wb{rf8Jj24eRDFHUs}*t$B!fcg?D6`SW<=fz2*IdT>U^n>P_3=f$S>z2Fbw zr&(vEFZ!qp`NTRM`=GO?j^}BrfaZ__z`63#Psw|acGQ*&|6m1mbx(KTwrLu;bAdZD7%cGM zUo3kd*eR2=~DLCICIS*%8c|G#pAea{=Q85cHzr)(02tqY%HB4 zKDFsw^M-gd8CueubSrVR^~88pNJp3|J!_(~ch7C)$v%-ZCAjvn35!0;g>Rwp7u(ru ztmlW_4dgUAIsQ#+X2)K2^7E6{f?rxvP(jQjv6E5j>>%rAJHxx{~s+6RiYnFv_1)&TO1!+v&M1KNgLg^+_La9G_>r;=d=ObXdbIt zn%A@#-C+l`u+)XW7 zCkXa8fPE(VQYq&G3-7}BM&X_~j7_J(yY%82;JpzZ+yEXHfd?zbhkcJKhr_A(pOJw7 zIIM&FMw}xJ%`$d#L_41OocWRN!RCtG4Ac(UTor?Iwdpw+`QoVb``{hmkab?ObX?hY zJlK1@*n7x>+9dmrfxKnSxeMHIzM8{7z+vYU7|B=h=h{d4(YINbl(~@gZe)EDvYr@% zg(kAz$r++KFB+5a7r*m8a_3|tPi{V*R)8RCOazl&`OIJ24mJj|OG>jT?% z$UIe#Uad9#r1YsxTE`53Kg>sO^L-@7M;`d-0Q4dqLiP2dTVMN-bQ;gW$)@k%ZsNT} zN6ud6VVmMp%Rb%GImVbL`DM|TksJ(N#(FwPZnX;P_7Df)B@Q5uIDiew*UzhjHUc#@ z1BK9zMee~8XweE4h05fY&|F6Q;5sf}-pw81W9$Ov(R_@`Rfn(a|F=(Zd(5Y}5}MBe zCat#}nKhi-DV{?$zK7ED&-fPM51m1v{Eo-3b20YlJ$Etw*m_uJhJHz0*)6OUt>xkubO`27eD$8?+a6%{k5`lOh?~5k9T|@Zt81X))Wcd ztbKt!*r*FLjiU>}n}>G?{Ef@J+e0@qN0fTgg9Y2Ix@D{}O8KAHNL(6aYREFT%zp2| zXX8tFPYyf1@A&53L+?w?X}Zq~5uakeV+~0kt8-Wv>wi`3e;amci~sR8W)A;nkLX{+ z7$bio^5L*A7;2;6M)_IInq%e5`DCfHZ&hQyh`1KfnEL%Hc!)q(9&j08y->(HZySAe zXk$3{KjUl5fYUt}-b?1(Zrfq=YO3xmOPzXika{d^!sM8~Gn@a8ANtffbATyv%@uD8 zjE)t zw-HWGF*sMvXjaY#YrJb98(XX~&#}d3VT-k}!zw?S^{`>vVm*eZ*}@(x861q^q){?n|b2jdH$1ycz8eG&UY;x^JQd`dUKhkoRpV*`%cNV7T)qn9eiOg7;4KaJafGi@7b2{bJG zuWcjMT9NU-4g0U%eyq(p;#wA|Ufa$atM>wIU95vt?|NWgt$Hah+iMTJ(aZY)Hi;bU zymsA+O;g?2dF{GmeK6oxyY9bV#`-l@_YCTW&*F7otGX#aR5$NO)_sQRK9W4D-S{M% z@oehGE@$Hd9w=1ZF`KasH}CDb6YR#xsyk*kmK{?zzK6*@W%r|ap*?zT!8{}SDr50J zcEQb6ob~f<;(NSB1&!;1H6G^Dz3hM)YND+?*%dUyjVSDElku^Ck9(XDSAizM>=iTx;z39Q60^V3V4D>N?iTx21ej zl1nZZ?ti&HbATM7O_@uAq5Sbz6zu*}e(2CS*qB{EY4M~GL*%Vn#@QJywG)h^`=A+Q zVRSn>dnxk;wv*~k_Iap$ALW}ClD`w5R+HJ(!r5F+yBK5uk`paVdQR~X;S3O18 zL#0M=4AnDt zxSqd~JH(tdTu&?YCKY?l$oj_3H9~KzJ?sp7bB&|% zI;D%K{a*ZHmDH>IBg|EMi;SZ#aI60ozYg;6?#Biv|D4Hh#pkT6TwLA!6sPjr%8F0! z-su1Q-H%xp-1Au41y4MdbHTS$t*d^2#<^EL{#edUk8{2Iv9y~W;Cb5Tbf0t6-8{ST z?$@p=zWd#)jJsdID);WcUuD_*5{PMlk8yW zDX@BgD-KUPb`QZKn8xr+;y0FGGC$cMroLu=@%hT$OVaKy_)hv0)&&v& z*(#fJQyR}6r!Q~ZJ#f|CcOR!;N3Z(*-5=7&59r%5`ttr&fg`3N2a4yhg2G zfYtHKPqqE>FSCwOn_s{;ulg15E85Ip9aL9ryzt|7Wdo|~6TAM*4$)am<~{h~>QA@4 z^%dY!-IPnLJ6hS7293R8OPM>;a56n}&UDR893-NNmUi$-L z%iyo}_16zr$bfb?S9I0vO{^h#-~PATY+Vt1;GLm&fy?w%vHo4|zpt46MpkuXc0*Rg zZ7o*+M)s|UcG|mUAKVK}zX7ISG0yK_&0ZzO-*VMG7#V!$m~hxI3plVJ>MZQvyf=Bi zWX42eTGMaF8oLYWfA0)u?tHTqJC8Zh*AGrLZ?L!6hb?DuY1hWA?bMxDGjoq{nE@_) zui%?B&U@a5eHcBy6r6go(X*DYV`xH;=7-ddy)I^r`3°>r@A@_9yahrHNhIJ*e z`S??p*|ecCq@6FQ9mWj3a<0bcYQ{+Kd$Gw07Zt$1cQ8M+cm3o6+ZVZTI3Ll!{lv*f zB%J(a#=KD5`bh)ov$H>ATE}J3P%Sx$RNu#6pf2irX?o_?im!JTyieagI+uL{#z1~K zw7!kHRIY9P%XaRc_Vw2dRCuw|qT@(jXVl~m2%cT=fCv2`PJ4O8C|H%ZGTu(iN-1SZ zdA|sMk=pB~J$PwuY)x-zSBtE#G z_~3)Y2Seire8dL7No?>AVuLpl8~i4*!STGj!UG#I?D*i~NzRz5?QLOH=*j?3vVT0c#TniYRdf3uXy1#!cg=Rz@{LKxya>F=`hSQQ#ix>S#Jeuv`!!;2 z*x9hdwK<%3p|d5t=(nV=TF6XPJI~(XXNjL z`Lk)SojER^8|)R}=mm~$@`DM#p&@?sG#v(xBfMJ?PF_u_-=a}WD^`|-J@AS+s#Z=0BFQ&_v$ellX>Qe%FycKl7L_{lUc zRU1CvN;7q_@2uiz3i`#&^LA`qg)D7_&Q})%Lr*;2)Y613{R3AYvCApIu@N{nVh2AT zTIk0H{?;#zz;B5YbNo>r)}xAN_S*i4vbvLF=o0rPD2_qYu!Vo237rqU;Apj3-%<^}nJb7t^eygSEgPt;+B3tn7R!&LIK*N6IewhKGp6|Y zUmOqqX9F-6)!a6)Ay#)Ob(d1N;u>w=4P{jCz4(gm!&h{=`k72l>nA!L zuDV#S30Iu`coMGsHm?Ky6J1df~W9(ZUkr8)fck|#CHh#YkYcB7IQwnFOTiRpAOCygAm8R2mHIhyKH+6 z)Ga$gCbZv3-CpY6e~x!+9<*FWT{C9|Lo1%fjt?z2a^349Zxj0AHOzYk^SA6FDe!Bj z=nf-Q{M0?uR~ zK0g-c88GcQ&+6PXd?x9QEx+_hcW|*DusWny%Eo--mE?S2ADF?nUjo14M=L$k_L&TD z=0`dC3>&~fqR*ttbNxK=PXO91{O5coKOgBcxwUF^pUHaYs*gEGy4J|~#V6hCAMC?w z!jGwaShnr|ANWF~2Q@-Ve){Wzma6dA2JqG9abIS5`;iyRpJDwWn#tu?+hU(}zs9kN zpXzsw`z?=Ohff)Mi^f=Z9qqZ+Q=>iCH!uF!ZToq2AFm^JeNZxQ+@gDbPibM(-BazN!9bd+IMa^r;a#bgwyJtsve88&CW@Kk{DRaVC~MMjtXpn)AC|lTR9>W3(rj zls7FKn7SyVy(6mU!?D1CE%?yTxJ~M~oIX6n*lZz3o8Z$ooqQu47}t2V_F-4B$YWRm zE`aHUc%DFuJb~7UOJC|6`~6>-0nb)1_59Fla2DUNzb*0n?veEQ%kjqZw-8&V^Jguy z^F^(%Z9QuKT>NYk?U=g%{3iY!;BNnN`9OsECp*`8+C;{(#(Lh9UTWv4@}zsi=w^96 z_#~7g^shzOv8?--`LBHHX3HWU-HuJmtSLrK_I%ENMs7(yw5^>pK#ZCb`%J&pp6mnz z-}-cm4a0H=h9i{!^(Zjpo(2Zx=MW5G!LaE5W$has7#g0sf7znvcHV6L+}M+S3Hbs3 z0UR&K;kea-!$qABjRHqK^g9|Za$<070gi0^i0$aS#@wz2$nyoK>w7rf=EK8%zjKr} z|54x9s_zZ=FSC|7eP8tUre%@uaQ{#CeG72ZYcJX!2aZR;)wQF*aoFkm&qvZhXN(T6 zkHO&|@4(TH-$ZoK{ts|}2kmdB{mm~r?Tap^Q|DY@xE)&TPN2nYqrd>K=LKW*kSWP z_Jy41`49SC5byUm#ZEZEcU`RC(cq_B{2N98wEwQLM}5z}(kQ6> z2mPNC@BbwZ4C236Xn&*(h|%CFV;|?gztFzbuyI-YvrhlJ&)c-Ddnfn*Bn=)1=Qa&~ z)qz9&w|f*g>Q6HcasK-?aK!lU=SIP2^WR_MZPqz$itZmCrOnZBud$EQeT(|u4$p0J z`d%^apQHP5y#0rV`+Vmp?f;`buTAXpg3s>rPiV7ei)q`#x3L#5*YG#HdVXpDkH>=P z+<=lT?u^eo*L!U61?fB4-(xPhp%cHB(NVa`9OxZuMwtWHyXFebJp)~tI9%ln!XMt> zxlCtMnjMjSfo5+Pxq7yl&Ar=`Lf%|gv*)mjdw0lNl+5!n+`D)_mgg4tX2|=vr`c2F z<(~Gsyv@w%b0KdR_2#;{KQrX5cT<<{&!CMY z+F(E3%xNL7NqhCl+-LDE_eHclb85&744%WfpF(-ySTUCS$<)bxE-=lU1bo26e!AY7 znbhy4Jn+rTpibgF7Fx}dfz1PKy)!3z$cS9l!-_y))BjpL<~I zojIQNff3kxXO5$8-~zVZnW?}6T)@^lGllZp16%J*A9Vs3u=URLQl|xsg3H6Vf>Cf; zA;HJ`xjAy~*wEn%3{0=&d%f=R(C$afW{uDL#6h%O#=cJUt;jy*OhU(;MSjVBwFQ0; z=RnRxro?haMX;AM#8*A=hFFgZuYOV;wa z*!UlzFRwyxf6p^Eqr1(E?E74^)>2=>*5AYP!~AbyZ%7Z<8-IIZOS_BxAAh=#{@2=h zTYA95Vaiqhz}PH&#s5Ecg|XRBJ&}Ed1*&H`-!^;;U2{unc-JGrkYXROheWX>WVBXa zJpW?R>(=6^|6C*5{~_xg{QmxhJll6ofq#apAhNeO+PhEhJ)y`SxPOIv{K#*Bt59)au zXAmtU7so=Lr-vfGh0(t83wb^v6q(5L$$CC96q%-H8KKA*E)V;!Hp7t_ygPSc)c<5= zC^D1#FLM8DBl`Bd8$#`Zf1eTcuOHL=&L7C}v?4g*U11D#^SsaG-S(-W$knuSo$8zx z@}r~Yy=p}J_F^w4rYN?1Ei@r0Xy<54S%e9s3 zOk$ymJk9TJXZ&A$=8~Kj)?L!cUQc61-GUxnYw8vlV}d(f-p&Q4XW~xdA!ubEFn`gE zM*hI{YOV*lUPpb80;4Bb0DaNp|Ohu6*@MW33|k;MEZJNX0qPHfq{ zQhBwC*dvMTXPwr7-%mW+m`6UFr#IXA%of3;y|=ickvm+`!?w)fJmAZX%^%-V9R28y zV%whX?5WJ^czWfqUeX;~n`|TAix^?8E0$J}hnjf3yx_UMW~~W!{jX2AM46Y)$niCQ z^eX!^@t4Qzn)W02<}B815n_xoa*N@Q5xhE^xl6qI!vtPkU+VDcbw$zMujoCzTE_iN z+>2M65_t8NqG;b8*u2E6cTuLc2p)r1*Yo^7J%?ADc>V)DhgW~d^DTM~uWsdetDeKF zkMjIUJ%?Af^ZaLe4zK=F&*0VSg$}R&hIe~`L%h0=`!~23um1YVlX>;V)9`9P?YyHp z6L|G2qwwm?(o=c$0?8ON+B=)8cy%7vIImvu-{94+AoIklWs-edZ{#XoEgXect9d70 zy@@Nl+LOSmJ#k*$B7TEcw~Cft9)xwm=soY>m?9M;}ri`e@+rDN)i;#JODeCA|c z?fEQT^?~#8W{g+K2Y4C0>MM$VG`=YM0re`DEzV=VTk+|Z-K;}oONjHU@{TF4)aKW` zllirK!7#u6bp*d&@Q{1+RPihGa>c(5&e1%42XH@+uU%`W!LdQkY(6#4{ga!1Z|9~u z-JaR^@R7YwEUu<|6!`OO zx_rPp*k$_=q-$8r+MSY?FQ;m5DSE?asTF^AkM57%_V2z$SKc|jTeaQQ2$$Q@Kho|a3= z(^ABL{7c~(^Npy!%fp8-IF@^ zDA4&!hu3#qVk8?od)7a5iGiNgmrY#eROWkN^81Z&AvTUeVx$X+l`d>E3kuJ6_U$7_ zJ3LD+ztc-xdC>A{+^WOL0~Q!igD zJ7OVwO3s?PeC;-HGhF^mr#xkq*TiIfY1dmo?izCadNPCI67mqn%g>}=RmumMMn8yA zs7`OqDy^9^ftZfGc)Nv+!C~xq`(t$)RXt}LYbyp@x`>sRn#h`Ckp6k(II z`*@W83SMG+53Fp6l2d^H-z?nyig ze3Scxb~PS(;IT|)1p{yqm#|Cx)IpvG58v2zW*T98{24=oeBl2|f6HjI4!LDNuOVL} zb+Oi6Q^fTEHga?+dymCI_TUoB)l_=8?9Qfd{D8A*eVh?N8DN)9+s1jAm^kQav6+2Q z?bpPsSFf!nW=!u&ziBKj+pU;m#mtefG4}vVllm8Z#&5~3=IbMtb$QR)6wI9HP&}P{U zjD@Z%=od7oSSCFaJ?dGGu7BhTe#({tH)m@X-eayQ*R{x8lf(7E?ZNQ&ES-1ct1g|@ zl+}@Cgdb&WOKPS~5S#&M%>&H#7=s7#-v;o&*shU1KlB~q_MSi%m?z^v{qBeMhVdDb zCz4GeM=p>2wj|l%AIT;4`xWR~cs_WQ;>mr!Qk|&*jtgI>FT5{YL0?33Zy9S=2o~{i zs=@wxBclYLfp}4MbrQ>~Gr{cld_MsHP5E{(`Y<^4%y4privMh$O^2qTk^S&%z|5GC z?IsuE)Zp5~jK9iy$J2Lo^|Pj4xAs!c*VpFoyn}gL_}B+sco=&hV?PrfUOcvZ-eQmO zg7Pz@AfK=Ne#+MK&6K96R(y5-&Z4j0`A5nuNv_Bz&}&XA?@P~4-CBzLW)D<<9Wt${ z%Y32suyJ$-c&z3eq*V{z{o;8gzc{w>to_HXe2_EMzyIvkYWAz^oD+S&iSGkXlV^yt zPTpZ2e0S4V@0^8CJIyuiqFMajMTX2u9($+G6fo&;uRFCv`HMQmGt3dy=Cq4uUS`~> z_7&5sehGHHowMK#bg@nMoUun`0{lFK$i#$wk5y)Qe&gE93j%!C z_()N>gF02W#=9zzvUPFM*Y;Eri&0W^=AOXVX%i06=dy3Yli;FkE_p1uc5p63A9)u< z>!R_F&#~tUyy-asL31Pn7pEIbYom?~R35~Qxd&TPJMUlCYNqzpQO9=5`iM0tD z|6?*R%V%s{)^D`k41ZT`gN`I4q(7DNeKkICpAoU;qvpfCw2Q+$!wa34UgOBqSUt#F z%H}+y7|jirk}vbDsb61P$GH-P0b~)^yWuavZ^NC?k3x9mHR9Fw!^3(nIT3Qfqs%ky zV+{0SjiF+z$)AnB`Z{C#Sejvo1MU$ zt^RQBr9a4gJC>%8J~7Ts0p|M+%=gHX>Mr=b7q~sNp=)87xl7-1pU0eEMt{y?&M)Fw zA@@P9O&!Gn%}Yo4_8{|=?j>VIW7*7=oyZHDH@*pPpdW}YnxO0R&<#@14Tu}w@D2K= z`R^cjTJ^fIv^Az9C=PE&?z1iDqa&n(2l0{SoLY1Q%`Mv*(;bYda-yG?v}m4SS|OO= z?OJ5LoBNIQS#q~}R_fMjcbQB)!WiQE#^!I(0yU3AJAiB@vJy=>Sa z+B@mojq7zT#i@B7f!&S)@__5>lko4qS~lOauk21+PfoJtH5zI8?a)#74aimI`UB*& z(t6Ki9`ByCsGwWt4BksT0poTf^L0P6W)(b9fedP8zRv~5e%479gM+)j0Bvcl-sv(n zcVoA%ycqsG%=x_J>{DNR-5av}rsX59q`HTF^vd}X=K;+@?eKIv^l!n_+3<85`kY`> zpR?g>E2rD$X=2g5@ZH)j^tU#6t|uy$-DupIZ|`xvhB_{V z<{6`Ibi3Ns@FqFC+St>wgYhpr(`iHP8$8=Yy_`+7W(9EUq;Gp^!{!~@mQ2{mp2khc zg)uT)}4;*dUjoLK`T1+3S`$t>S>MAF87;&WA8nty+@^%J=u~8 z8qadRjj!qNM;;vF+dAGomseD9741C7dgwWH#@e|~j)c~uPT$k%^Io2b_V$}O`8(7W zV^PlkR?&>E^r5JqvnuI>_;Ck)SjAXwqz@bW2IsK0%imA^p;-Om%@Fk{M&G7&waIsl zJQLmbM8Gfo=+)Y+#<2m|?~o(gD5pImjhs=WHADk-+3&*CLI2Cnc3=?RP5w91*Iv$y zlI&*fHiz|GxPA3fe7&cA2{;NN3erNAoZHe5xa_ za^?_YEuLs}k$+Ze=}N|hxv7$R^sV+!RDYBHix2p2rkS!>^{P%}ZLH3XS>%=QY0pkO z{St5b;7!S~YFAEv2lyxxZs}tO_$Z6vw*&lEfnTjNxz6dh+}45S9B2s^h*$K@;lBan z-v;M~_}TIU`YVU#Boj^Gv}u=TZQzKr#Mo=Xyn`;PGb;knXd!j1045K#UkUA7^fCK< zX!!r?OdVo0fh{|i?}5z%W{<0$xyoOqv9;yKUkBOyli9I?JO+;-dtMz=w+!8Jv+-B# zZ1Ae)$aDCweBtfX<@u(uv~ahvMz$b}{NH)ZLtf?~c-@|3*~7YA{qVki@>;c;_3U}f zLn+Kd=;M9$>f>$9fm{!QZy#g5iZR*Ax^)vcdx~c}85hn{TueSV-(KcL=xlkbIl0>Z zzXz9ZhhA1q(mtw`>PP>Kjs{G~KwBrQ1g`knxsbK9@)*b0&7!XWu;c-Y7g(6L`%LCr z!O{uehmZx0V~kyX;Srk75uBUgPp*0BUS8Q1(9=qhePzhLdhnPbI_JCnZw~IV$K(Vu z6n(I_cW~|s*}%beHdD{^_yg8ZLohOzK8#EY(?HPp_{4XX{3){TYtkg z!&pm(1eo_k(|Ycki$0`s(->RR(;uY0Ezn(rHK68T(R-YJ&j&Yk`^L_5@vX`XW;yaH zvEL!n@66xSDcVeY=W+Q@!dal}TzfxL9evNGkJI_xvwv_)w&jdxB7Qv+a^|&DF8e8%nEquqJKyYwM$lK+bfb?M+LHxMN?z1~+X`~5sx7URZ1~Q1+49S7 z|4jB)QYQN{mrb9E{V55bJ5M?T>#sQt>tedNCpi4=*PMCMu2XH%Pw51*r~U?ez5Sd% zbrBb;w#aR^lyjKZ96^tH2%jNwK;-_IPy9h<$4c-zo3U%#J$7CL^FTIvU1Z~G$FBf= zhI{XBTDJeK$8MIaFXH=0sh=`757~J50RF}P(~caDyg#_hLJqgTKR7p$A3RAH&TF8Z z6!73du6vP@TI1xscW%CC-`Do+e|m7vkKo}(cw__p)w?S5!g=<5c$E5)9fh0dGw1)= ze3J{mKttxw2bXV<-t>^m)*HJ2rOTd&fRe}U$;?3&Nr zto&o&jpIvwLVt_*N%L1bw6o~#!MVRz8=21B6UWhE#;(E9&%&W+H!W-2xYNc_`F3Q1 z+RJt5KM@a&9pmR!6r;~EhpO$RwB7xyV8JJ<{~Dh4#b}E@=Y|HCU#;ixWX@~!a|~m{ zf8n`3;^4W0y;=>(#O@~tcX`e-mP#&(7Au~_{!x$J>B%eR*>e@RYI|aE`8>YA_B6HL65I)!H4)(<`Y|hJfZv)n_g~q zv!45|VaGI05C6ix$_F#R`gkJssou$4FQh$hId)+7v|Y*EnZmUJn@1zCNgr(->z`Lb zUNDO}u^(HLt>4u+GQ5@fKs>pc`QUtXz}3tLJDCsmG9O6q@jy$fk)3;y`JMc?@8Rjk zre44HF?NjL!j6OQCq1@Lz&<;sVw88D9C*Ki^)R$qPn*iMsnP#~^UuSEb|rY&$U4F1=_T+odcZs2 zw$JPeWE*4XU%Zs9qPgxqempbpG&JWHomhNTldK7$o zFvq!n%}?Ip9NE0cV`Pl!H@JTs9Yy8;PWeMqj5W#$#=bOc07gdlnT#H&z-@rKi8Uey1LH+rbAyv zHbUDu>w+~#5_TLbXzNf7$g|2{nB;9hx8&?;$@kOMQ%60?U!wot!&Zp=y^k~W-Y_zn zAL9JR;}zeWzuP;hsm+!Bvh@B}8B2Ry;aSb8jnF}ev1?4QNpqUd;F`w z{+hAoTxj9-oZq!@eQ|7B=drQDo@0+2V>dkZFlY6kBb}XtOo4W~fFYByt33-nk-T8F zXCQaEo{cQuyAhtJ{&RunN&0mz{d&U)77V`68i0NsqF)iNYU_=h-)CiFYa28&tAacm zg1_sA)5Fi07XeEH@LR~MBxF?Uo$x&0N7&1Gx_%F`S2l+;W$$HfxCwb=!%ja0yZU3n zJF@Tfpwnd|^V+!Xo#W7w&hWL+FEkgPuKq)dI2VmM-%r1++}~#_cC~Mo5+h23+h7sTzD#Q99iS>cXtl;_H zTbVP_VY=B<=~s@O_rVLawReuAPklpp;a+-309@@Sug&Lw91L^b>6}0b_7%^#ra)KO z9&*yzw$Te2&j#pf5xi|7i{my;;YRdT$#}{iVd0N>82nYBZ=Vhq_km;aeo+r|H2WfD zw=wD(vPP3JhyERTian+q-*&@^!MS^=I}ZB_#<@~54ZTURRqWLaSCaFtsg$*)2U}+h ze&rC}@$nDl`M>Ar3laCGEcW0n4%A@_P@X%Fv9#43cQg4BvrY%|H)y9diFuU%cH@Ka zlMmk3P0)o*;PBshc>!ZKM{p>D9&#Y zXl?8mc-wsb*jVF;UB7rT;!Zm)-<nGrsdAvgF{=XoPu)9Zm*b|Z2y4vyzzBo*M_J5VD zfu1D4u+`*9ey!sFE6#VeZd>W-*}Y@o|9jAhM1L<+KBRez^9R6zr@ok+uig&5GY$_f z_t7W2FX&0#*h}qtsJ{&xht4chS+&!4XmEM!^^P2luR&k?9cRbVN98Ga7C0N3C&kOc zf%GU7Ta(RWmpObFaR;O4kh`Fca`8G|q7JQN52~Mxt9-raUbY`7lJtPRX3VCpPTt#f z#p<`$6v8p%V8Gk)`u2WCeSY@?^8|zD`d)DC`ODxg(X!Ta+sG@SX94Pxog^pzAG{Z? zR!jo|t8?;e3yyk7=nT4(^I*{D3?L^Rx&R*~kU*aJ*e1LU<{9ekn#rdg|?Rr@k znEZc}|FZc6xsU6?S|>@j^`paj)b|o&$WIo?oH!v+lQuzqvO4hZ5iTBg^yB#V9@Zq# zlAWhcXS~pUO=+oAI}VzraYnz_Ta z*Y={*#QQ&T|KiYo6ZS4zmJ#KwuU*jmWUqT@~0I3~~)+=h(T*Kgt0|7CNwbHu>9$ ziTd&EPq)0!c$QP{j~hncD;wWOY%#XkrmJ7L(zt0)vEZ9dpFGsbywk*5q{$3NAv6IoT>Qvf5MOU!p^5Xi&zlF?RgZ#0L>g`eIfeFi`m<>rpRM#WqUg- zP_$OMK-;&9qGloM9&mE|OM_eFKeHCwy4hU&-4~p9H}S3kJ0Uc*IG?fn5x#FH7SuTd zTycL%#|O7KeLdB0$GSe+#yJJ6e?<-f^o_)Ce}vr}--c~})|kA?I{xd}qbE~mV!2nb zRrhmduYoUoG`}=wAb;jL*oys(=Y+ike6@hDM4!0g4yUnZJWd?>%d9JbYgadNQ?gL} zq8x2gs)NxzJhzA~h(DLjae!~4Vg)%L&X^$o zv(Xq_D_LJLsW_a1&HPQD#=+TJIG6K4L<@C{O*1mK{PV@(252Gsb>4Upo`m? z^BV4eACl0Y)yEU)?9f96c3MvXw6ugZ6tvm|j8!rESxLLH%gR@z>ndoe6L%;3?i$TmUUw#@yR3Fv7>l)piB2 ztB&l|*l}pb&sYWAjykW7-Hu>*Ci5Aw zsJ6eTZ@dxin@tYRSvpS`S@=cGZ(dtJVh-|EWEuJARwmRm1 z)vr0`FwYuT8%U2;?u)8(ivk+=0J@giMaT2$%oLy24FTU)PuJ8v6R5Y>_)>mlHhraS z@LF9#eHQsi)b_QEiQQ&$tevQ%vxrXp^j&lsF{b1%5?&ZH=~0RAJjBph#LxMC2Dmb6 zCNYoLa6`AkvF5inuHD#A_pftsS!tB#+w`S19%Gw*6#AlG+4kfArQgYam^cQ0+Vayz zyiA?y6@Bu3J@JM0UEO!~aUXI)m-3E|pGl>aQF1=df0^Pt_NnBtxae*_53v41Ta4~U# zf$tyOIx`j*=;6CYaw3!x7x)&o+PC@kPs9a&$qd%K13jd<&ba7Lrcv`H;sSTZ;sWC_ zfll1`85iZ*F@a8=&)Z`>;rRer0Ezi zwjaJNjM4o8=>8ydZ~Ka&`whs|Qs}-AnvIa3PBN+#ADPd>r^Pza$2yT1pgx0lYAcVn zWG7MliSX>uc(zUBqJ8m<>?h(MTZW$v-=XJap9vo`j@{VRDu{9JX3j#tvE!s9&pXjM zWe=2Xe}%rC&RQN>)`x9%W{meM7NpHyWPWb8_^F%tmWwE#7kjseciqWT@&nRuD;>E| ziLYJwlJBz=+AX7Q(W~YJ&lIFtN<+sW^-n0jh!oOP=mZ%loPClj4-qufXt)$U5Kw!UXg zv+D8D`kXV%|IsV;{txcS>COJA>q)cH+r;u(4$?0nc0u z84-!`7avMnZ*u0cDML4+K8L%wrR18 zn2|TA<8*sAWOJ8pA-ns-qBGV%iTgDsqc=53PD6K^!xH_=k_#G3*?%oJHjj(&ExL@k z709y4MMiiJyx5B@)Oy6f#LksgJp&x+T^?(>ym`i4+2i%T_tU}M(zkSPA@e=!9hqNZ zEXwawz7^JmlT5d-{pQOHvgyBYZO`LJ3<{YOa=5FPxP~5F_&t$HW>wQ(`=Js{PcKx~3Z?BO<^jk0^*FD7j28coSAdl>F z@QW!ppa#|kbb-6bUzua7V*PSoKMx;lxPf(x;M#!gC16hCOen9f&x@W& ze|Df3D4$;~dO{U?LVT^&h}}FwENnY`Z%xgfAe|xz9EuH#(`@fw$(>RKoC#%CU=y-& z7UOMuJptV%5!d+tF?Z(iQI+T7KW8S(nF))Kgg}C3Q=9~q3J8osGf7-R;!*?Fx+XxE zNn*8tRwC6&NYx1e>?n$b{!D;dZzhUX1!b(aBv3U-wE|x4axD|kI+M7u&#*YZ?{m&f zGDNWT{&oL2pR>K|^FG`AywCf*Iph~!RSXTIE9o{(?~A~K_2F~jUk%`RJ?%8~_SBAF z5i_iaGnBW}FPWb>8Ea%Fx*liO-_-BNnJM~Sf!2mS(W3j4>GBENq(;A+u}#`6#1q0c zDPvl98aRt=3=ZlmzGw!_WnDmY$5h5UN^=%$1u_TI@a=5Iw^Qs9(l@=|^e?J^W?ItQ zDr0RD9cS>kd4XFm;}u}ML|zsN%>kS4#6?qexIy=Alq+=K#obC9gzo3@oI)E0)4kw^ ztW#Tsw?ePdCV7tSCuyV~Md*v2a>tH}u5YY;pf~7zkTEI1ceV)M*|p>qoDqBpS{0u~ z=V{8s*q95?u7a;Lz!QY8h>gN#8a-tx^xcAd^G1E>=FleTS10#p(JflfMxM||@uL%7 zZUNUtKMa6NL2$_pAL!TR@d1d!@>E@(HGaYQf2GTd@1yRwgT23*b~UqxYm3?Cs9WqY zjoig=ryP5Hyx&e2xOfqLyc&FK71@qWdl_wOMc?Y7FD_svet<35^6T&wu0z)v&)MVU zlwb8zL%))KH-JY~8<4;FKzgBjeT;eD3coBs{}cV_T=wMke8~Coz`FhAa=X~81LoG7 zxBeq}Y9G9}kT%F376)TO9^LNb7r3H}30}qf2hAZ(JPmCL88qZ2-2nb&==??J8y_3| zrCt1mhMs}H#El7azSE9LH8lF~j7csyr_Zlwr%o@>aict=3zk9OmY+bkKSJ+SwTkQu zjkU~yb{l*#3}3aw5B_RZHs*le+0nIB!5z_u1(!uWaJG1gllg2V<7gB939J?B%4S_b z@G<)m(t;b=*tq4rgml?IC5?Q;2d5Jr7_(D8-f7r9bND88#=buy-)+qCz=inl&GP*M z#s~beA!`btE%6&*Puo%*Nm6qGJ2ZY}Pb{pS^;vj&4)HzTfQv7rw#I zrUaw>Lgrm%rfE}T-EXiw^KQRx>NVg^0evo_kFr0j?5pIdlsB1k^XwXPg7|WX*M15O zHUr0a_!Y?_@aOH(GFX-~tathbBr z#4=?67TPa-Xc_(l!XLNXZ<_zhsDH5TU%~%I`A=F&W0o|ur4yQ$w&DM&`$$U}EzvT> zm;Hy)G9-=g1t~+)N*XCc`qKvervNvBq3lH~`VbzE&r#K3?xYvr_qE`p4p(rli#-Sh zc8mU4cN6sPByZ>xRdeIc@O<;$Ddl~_t2=B@1jep+#_Z)_Q)jSLnpIe zL1Vvyh5ZUv_A8`tuB^$kH$(C1cH1@RcQ$2m%$6HQ7Z%+^_0#sNRgQ}(Mg&_0WjJ+bIE=;wT8ww#|X`g10Cje5J!RVG_1h-*Jw z2_OG+Yh_k;+kz*SaIVBw;!7th;jx^RB=i0u%)f(_KS>?ivK{-kcHXmVMCVr;Kb)>M z{$skOu_|P0{GaK;@$7|qSlzG}->pchg3ssAcK1GG_|(l*v#jIQF%vo0O5cweyhXM1 zZ6tc#BF+*ViM%L`!Lg=E9f7a5)t0Um%G^rQ2FO>lmGyPTQP!?%_NgO8Hmj% zt_YbbdX}WC;O<4|6CFTsT)xfWu8A!bp6rH>g5#sMBi3ny!v>xM;}su{_WGE{WY3p> zFl3U0a$|M-X?vl}9Yw$6S=uRbUB*CchGJJJMz5;61iB|5Yj|=d#}u*c$g`xgnUtv_ zCj#&h!TT=S7`TPCd&a`Tyxyh`n<(}w;+z0iY=C^TsEQ#&fMXDt>h;n;Y%p?$mTxJ2 zi`gQB6R<(irea`A?Ej56!Bb#ej7-q+RN5x<2yn{+o@w|NxQG*@uQ$idZ?-48r=Hrb zvnGn3D&yVE9C<0YH}kw9YuJM|tzZUoY<1X3t$wJEE3q-pr5`#U{Vn`n#w|KN)jT)L zc$6x>OtUgo+PedI#c-r24o4LE4vuuo`~*GF%~*urwzN3dvNp59gukVn*=r*PL~gUP zAxCYPA#^XcQa5`?f0K$|F1{xc_cRZiTsJ(RAa}@`KFY)YzAw0{HM5uJY@W+#2e>p< z_sve`d~onlYcj8tx&=4=@WxrHwj=((!hb1K&geSI|C5U=;Faa@%$e}cGI;1!#Mr-* zILc;y4^{SNV~$pk+qv3`9m9$pBgXT4SVtB^Guw?`E7C!!3$#l@TpSd8#LC* z8ndL2(PT=LCWWqrhPvO6T%wOHviunKpBK@~PPZ->TU(O-mb!fqUhQCfg`dpf-VMxc z;8!>H9u1qC?5kQUumYytqW8XS=#%>MP55`PE}g)-wCGtf7DC5jFM5l#Rs6p^`j0WX zUdE`A^2JV?k?36$WItVRY%ixdXUg`utmlBEm!R9+!`;i=OXiU({KU&YSsNw}c%jVO zC$YxX&KlHquAsS~Q0RUKb_Y#WQe-_te03yFdkz1EE&{Yk*YDr1ii-=vzIzp$qVx+L zqmll;6x?PL>k*()!7JW;%sfHr#pZoe6qQFu1p;a+w68^5+>%?!?#93DO zJ4s%-zXtxuvmRRjnC7s*-H+ZTeo&EZa;|wI_>=@*C4*lYc*dFLi;>GsCQmFT$t1?m z35;Y-rk|dHmFNz-yoHu(8OOo$HdnsWXW_+PvMy&CR=h}PTE_nfu9A5Ic*>gTK)!19D%b1IoW{OQmXt;}cRVQO-GsBDF zyVHf|0^847Zx!9qjb0qDJ4#={y^Ufs+%7t^=#`0vKMcWrDlL>-BlyqQwgIbV+FEqB z%;9I=C-P|wX)fb(vad~OZarz*p#clLa1Z$Y&V%kuXD>2$xN^xN%34aBBBr5rEf@cq z^LZS;UqBz*slS)~TjjK&mgfrE;ELjWAKzt;QHMTK%lE42_gu;vLA@=7>`%o$iQU52 zO8q~?t|oGJ`H`#i^R1b)-1i^sYizKT+$UpxHDi4ZW4)ZQ-e6udMf?t>56yY>5j(_j z(sgN%B(ZdQky}gODzTSmVDD%Bq}-y;sv?b=A@)gl zH@qnqdJ|pR0>4_yded9j0^RVOz*%x`K`QI*!#eh}_LY1nvTFZuhwpQ6Q^ra76t*It zz_6V4I@&x^+H%Ui!Bwr9q6bU)`uNj+S<@&`M_BFXq5AkDb9Ej--UQ+VG-6Yb^as(E zPgErooIvg?dYmQV25xJbE-{v@o`y=785upIne!_wg^|n9Th}P{-oh&i0`o*xTI-f@ zKG1d(emCHAitvE*Qr4VF25Y1d8LXL!TSFf0yl2)7Ydm|i?DMUG-_}})!-Vc8+X65t&9N2&Ph?&P4SdXc|N2;t58woTc z`?>>Pu|ER6ul=itZo64bS<=>}v9_K>mj8{lmDA9A?N<5%+~%Mkm9y5=YPQz>rfF$r z#AK~Ix@2kQN%Vy*+E786Eek5_F7U|#?FwC9$v5GLqEo%a_zFH0u&yt@5u#Jcx@j5w z2YxnnEpsx_nM9x3*nmC-J_!%}DP_oWrfS_0pXM3Tv?H(Noc1H63FiAaGg*bVo8a+g zczpsqpEKSUCn5V(PsDuI&JeNEj^JM;F}mWhJZ%nRVJtOH-nB{5=Nh>H;;YrzWqN8qbLZ!`5Znz0jSvL-ArZp7x` zhIe=?Mc-F^qTA^D#UTDsH?G8o%ojbVPP;!PD~t6-=Avo)fQKQ|(uo0*#yWonF+fIl zd`BgI9_waXNhAIP8azEVXB3=xjqfLjA=f~B!-nL94XvOj7x^^VbT<3h$=;4HGZ%{(h4dWPwK z%No(IOgxLH{)Y z;vm5A57w)mhUCj>x3VEXA2Nw+cdUi@fsd#seND4A|5e(UZ15AK-pr0+v_Z;S$X)U% zjneka{;m;leRJ+ z>u{OJ)O|d)H8X^bB%k$$X5t`Aoz}I|Zs46{v^!hQ-i5ax&nopj3EZ;HS#^2z(S4EV zw^?;_;cKm;Ukm+V1CjGHg$9LIPJp+)=sxe#?p}Ck5FZa2_p$WvIQ|hm^g$!-T$i>+ z_nSpujn8+8v5Mu(BhOzLvsfNeT%M1SbFsQUiOcgba!=&gXnd=P?@QY`=ajYwX;YB8 zPr&nqUSyAf)n#4NNBiq&Lp5>Ca=A>Zx9Nd!e z$W}7@WPU2=t1lzZ$9zA^KGSvJZTwhpmQZC9;~@B#t&Hw-kf(w4k3(ZWhqipCiPq*}m z9$RhJQs7_2h`bV>b_Klca(G-RysiX2wwSZo-n>I(MkZ|#Kf%GgPu8~s;EbQUWk^XV z4VYQKQ(RDSUfPTZGY~mPO+(ka^S|#a5I?KWl9h1aGE+g| zG_e;obbFSWWy^K#Scs7%=&m{Mq{1>_JWE^D9p`EvJjRkq1-bMT+{1T7{ z_3?eBPx`#L1Rm*vpYQ3ADsW5#p6S4K6!6VJW{f80N|HzLAcVX)j=Ye$llbr=E8dO8UuidIhW6$w z;j0yg@4~AczKgyr_O@my`#;J^ERcO$-SF(0_%6#lQr4Zju`g=-tEOc)V?V;~^x0*K z?->3*PV}4B>y-UVCb?Jpk(Vm-r6b(47|+k)Q<^er2Y$5s6UG$3+C`u79o>c8-}NcH z zMsQ&GRrVC@04{VwnTKfbCHX%GStN55%kt8Y^)}-Svr8rZXd?4X;*X9d{wVW~O~;_U z5cM|?!$xCOMzoqOeL>>gteMnIo3nOco1)!X-&Fc~5%c>)e^!Yl{)-nQ*x^jymstJj zjM*6U=uBk(&<=rfaz*(7UJ5m$;gG7sH1-w|G;4z0*W z9?3ou;GPyJQ9NTgJ6d8eA7os{>o7^}aGQ%?Ji0!`>*q{mDXUA(YLU2SFY~|Etk|V2 zftmCrthXnvJt7*e*B(yExT6VoBzVi=3lSQ zFt1$QzVNTp0-QD4#yQ3N9x$&I+n(Tq@adzp-D%E98HL|oJNAPh@qRM%|=(S(_-lgmmo>aqJx_~)yAGS(Yfii!b z`b4JpO0nH4^OMyzneqJ%(cj?s3i|Y<-lr`o1qHzJSK5rTJVAJPFhZPuGrSFWuBX5L zEJs*$f{z)WP#*Cp;$cTQIh0cjYy`f0$uoVex-tN7(sC6~B$7<*c_rN5bkj7-oEo4{ z?g`4&2>$od&qLX#j`8`7v4geLc4UT${jBx)=FGugUEsbCIDP~^NSw9KmDGVQvR2v$ ze(acwf6l=VrnxPlsqF{)cpf`g4^2f6^#kV_FLTC$!1p_fj(=OsqeGqF0iW1&ApAq(&Gl3er-`wdNZLU5 zHhcSbLQ|HgN?IFd8czjZ1!nFeoX6EPJ2Z)R9d_9Gul=hLzuD4O>;=De6#Z4%ufZ$k zNIZueiRS=q5?iM6zm0UVP74kGt{``sCkGp|LcHM|?938xxL|)t$gOC@sz!4j|%-seLtoyX^-$Z$A#Fv@U=%a z+BN*s;Nc$aEbxu|a`vzECvrdgHYTyJhgx=(+*rifB9;a_zj;Hu8JMF zqT2)4@$^;F^}|)QpAoLIJ}Yq51g_K*a3y*-#^742dVc*ixEk^L4Y-c(I4cgW)%pJ# zu3mxb_^-p&Pu+uI?Pr_^e+ykj8^Osm_=L!nPGWD$m<_@#1W%2aj}4WtzGtM-1TQ$JPC>VB zJJ2~@+21f-r-l3DXyJS%9O0Z1ofh&{&jnu_vo-n|3o%+)PuVeA@FSPUeghrMknuYI z>*H{`v9Yj*Q}vuS6?ghjfn6`B@#drwD+@7}+a_(bl;%8{{>vXHfpF1^N( zwbk$)@A|vte40M%#SN=-RL(@j{$coPrt9cZ@^w8gc zX-xloWDf6!O-#|}BSp+d^7Q#ghCUw&(I)g^v5#a#b>`6>@6I%0NL3T#%X_TEr~Sh5n9Mi)W3U%sd$iR%Ln{3DNIveG_w*2N@B{w;UH%XCMt1Q3&s@8?J|Nv5((RLU z!@QBd^KB2`J|TUdq#y1Le!>5P^8YMvWFP-O;p*f1f^O`wRxNc2CVy1FZb1ztG>SBCE-s~$ulwC+n2#}KF=>I z;qkeNp2%?Wq?*Fj=v8<8fi*aEE7qC9kqo|Po5IQ*;6E2%oP6HTr|h{&p7C>%JawGs zRKa;py%S2peUnPUk@5VW!2gTJczZA7TM=!0F4Nmv%KaadcNzbSOyS;^%YA*5D(!V6 zlkK%zRbOphvS)m5vZoq5QQs`u#@Q})pUYYxb(WH+g6lf2S+u2|HXdVa@8jPse7lkQ z_L9Gc{NT-8&L=7vs%~(gC-yc`?>Q=G>2S~E-okwxFsdS76W7CBE#xn;cnUZ(Dre`` z>1uQ32J`>xoZ;ZysJX*ZH_Um0^P!$E2|LTNNxsDWTJEoKucVyk$^R19E2eN@;T67M zUwM7a&58BS4wFyL>T=#p+HKr#C+!x}Z8Pa-V>qEXp^fi(Pp&FMwnXxwL1<%q=_Z?vUx)>ht{#-|R6uL!x z`cd$NQ_4(do(OO1B3_)#F-2CroHqp8AJwtD?5kDYk);N$Cny)=H*!p&{_Sz}j;`Qw zUZAX;0-YZ*r$Fc5H_k?*KRx^_CyurFic7!cz33ukkvQh3w7 z@Qn!fzreSpJ+b!x4gMb~6~DM-jT-kKN9Wg~Ql^yU}BgUT5gkc{T9PC=Td$+bY>3RrG!@I#L#Tv&)=Ww|%TK`y=Ka zl2>of*+zSMNP7smB6gg8G6zTRm%O4I<&ftH{X2?mjg%_B0QHH_l=w7;$ai916mA6r zee(~&jw3J_8ZDo+CnP;{e6jV2ZT};5XsIJ$8kMqyy*W0sl9DC%QOOr;7q+LplN8^p zr8jSuc6~(uXzjcbU*`RmtiV;o{WfcL0)x5e8oEq9U-2ED1bn~+ zo2pC^-w(l4NmB+ri=S5PzcX51BIBmF?Yse44ZtWvhta8RmvteDNuQ!s~_-TIhh zF;3|nC+0bPx0=VMT*kNy>}2e*mCXJH?a{{%Jk`q_Gf=0D1vWi{E}}FbYfz%M>Em>Z z!#9Q)zA|2G7_YClDXSlz=-HD`&@QQ4@UPXJUMKOis$%1Cn(~Dwd{dcHUWE3=mpPZc zBm!%96qZRbSmqA&D;|~~(+BB`4li)|7-J}FBu9az{tX^*I@oPze7{CBz$Tt%1lMA? z8KW1WAN3Q7Gccwj@JVTbz(VYWLGS_lM%_GMB6jRiz&I#$#dmba5rHZN{x+tr3%=gO z|9HBKl>sjLV&#Z^H-?)v=FEz*^DS8#`J~=(zQZTHUY@0I@-A&SQcC^_U#}C|o!xD+ z6qY3`61z66T^NNWZQBc95I6`7rElnk%;`s`umCtBLs3t`iNmBf=E%ZR()w{#Q__eV zl37|ROzQ1e!iT1#52+5z@y|9c_8mC zJRc{G*hwPr8Tl@C9;V+q9B0IE-ryPf_~?FS3b0W-dVxb3a1i_`V0}w)MA}-0Z_i=i zcVmop;$)PH?9t1P(v!h=gmidz?TvKA)33yLy!;s_JkGX4u+;M=caSK(X--G4#+XA3xwr568%a@GNB z@n3K;hZEb2-p*XZA6;~t0tfLLXlEg9mO91H$X0yzW*wfz#`yP-MdHA_`y6E?>o|qV z+0`$dTzoaUSp_;;Il9_Rbha|)`d2a6H~esIoP)fHIXSxi8P8=n5_i^{%*)GI3oKwQ zFay{gEmC~nG*54k7{d6MSuE@a8OlEWG}iG~;7@|iEBj4o zNZU!fDDPKw4_@lIBl@puDCZ!-e@RpS?68L`IoqRr2tHib;5W5Q9a_$NVM;1~S(5MZ zOFdV|^2r_#c&+4HHuIO2__Ne?61!7RxAs!cvRFFeC$9%)nhO56EUmO&A+eLKbt>P= z#D{0orJi!q1~_xu#@^SN=RT;fy-LjG`=ATmR|j0LMwW=rj>I~2Updo`9b>Zi2CscL zvZ{@ED(%c|vhk-1;luPkuy?aIGp6TuyK+ZE!nA@&jbHqh&o@E6RFhS1lcWxtd&k?PuzVT18X;ade` zS@+Wc{Lt~~S_3Z>+N1xkztq#0p@j8+tg#EuG_udXI|(}%aEYB+wEttqg8d^}6fW(| zmjW4zPiQK!NAWx)?aq(3oA&Rc?H|oD+Am{?j9D4LMl9{$BJ(@^E56Ck?4UUr8I#1k zV!+od1dj+KcaFjDkHaGr_BgD**Ss>%%vnsS4)4dA4)2@H$=Xd0do%Zg$@sSwfBIvP()mG5>OW`ZvmXq-N+!;!=rU?j+(|$e83J*F|=IMBNVN z{^&r4JrTN)bKJcX#ZG8RDO593DlG43_L{8~wFRTHWS_U4wJQs9W(z)$?Ni*VS7JNd zB>QgI+b!$#@#X(X%EvyDN%>XDhP`r&DYI|$#MP{=$$3_zJ6>CW&5-zH+Di?YnksXg zjFdv^44OF$?7UG~;-78G zTTB^!FHc+*?fuTxl%)oHPr~R-voQ7^p-SwqnjFPgSt*DdFrK^M5O& z@4D50uX6IxWeH*1oO3*Z6eVnHJjWBcDj^&xO$Y~y62b>762ks@d3O5k>n=6?cw`

73|9FH+d$>+uMZC$tTI)-fi-(@ox6?UZ;e^Hzs;}*Q*|4 zi}7ss_L}*AOM-V1WmxdvY$g_hmA0jD$vW4v`g2CMUXLGBYU|BQ;R7NYXR^oOEokq8 zT=oZCqg=mbj52>KI53>_@~xHqPoE*fH{g?xY@2;2@pguxV@`C=(}#{4%exnv zUNP?N%;!!dPkDBRvyjhtt6Bx1HP`4fV<-DaP zgpMLQn;s)te69}8 zw8uvM{%s29EH{kYfse6#YhawUMAn?CCl@-EcFLMg0N*QtHFkoqAAWQLcGMi~P2zKu zZDAiP?Uwwa&-Ji}S=I~ny^mD`<3riA<$KR?hi^6c1>Xhlb$sWHsH4;=^%d0L8wyfi zA@%iA-%Q3r_Ssb9Ut7ffE!jg+fqqs2?pC1J)iD+>#-fY4X9?q~f!FSj*$WO%_x{8Y zmUh&E*Dmn754^5H*2tb8=SRemrN1e_{%q37{f@Qj^{sQ1`4^IxGx0Jx6R&b;PTK9STmx}zBQ?d6)s z^+&GXay`VA3~np{rfr9krv|}&!DWe+CU~5|9_<)y9TMGEXp1?2JZl2$=G*9IxYrT8-13Tyy&vw(e*W<5cCq`edrfD?{mIA z5D!7cZWiNxKN1`OH_t@dx5EpBM*Yx643D~i&0zZWl1}(#Ja3RWfdyC>K|>O!TIfu8 zh7*4egVwQ;vByQ~K4#7=EUdq8fd7%FdQW8b&FF#H>-vQM9HWm%Xs__L!K z&S`psa{<9)4ITJj;coSd_Vv<3j*!cg-gjuYBfOS5c~9x( zVL$p_>9Z$S9bCxx^kL)T41@swUb4TwTKF{j;&1Qn_u=?eybp))7xXiH2ChIyy&PS& z6dkSvU9K3r&Kr7+V$o5v)%3nLWc?g;Me*Mho$K{S)s=pH5!>*GsyU6mc!0J$`}M`t zJ}F1^MTvo}DZl~#C-$aTA4bOYL6b8DhGV;o?o(O0SKaT2rtUGNwX`qzaGG0u>kj-~ z_n#A3x0!AY4UsC#+yUQeA`cbI&7jaX=G3EfAKAge`lVX5%>69hgam} zsIJ_WsoclRF7_U}@8_XuB9nRzy~Itw-N+1`PFT}DvtE+c@#%ErHF8#g-<)ZF;|4zh zQ939DAI6iGGxRElAh$N4Z{CS7)jN%8x;*{@IT%3>et{eeM&+Q!{7~p)ICLv(19JZo zIhZ|Hng6rgcQemoKW4&e&i#fTpEcnsdp2j;sKE7m$i8=>_x-{@Id5hwc*EXPeCDwi zBje_fW*0D%Z+`e{_Gn}sa!~qGWIC&_ftWaw&cXdlWMMXCn5g50^WM$ei7XucUB_d8 zuD@$@tet^lkxepg5qQ3z`O*k*LFN%b;A;ccR^jo$(+`iY!nW4~t{0F-bgo98-(ub+ zI5*y+pP$>em$v9KV(6$ANmm#ob|mw}LV0FjH< zjG-Q9{nv)imlqj`4QPJx0VBqY*cv6iPR=)s$8>OAtxQ%v6 zJ1MUSS=$ACQ{n%Q;hW%O&HRjVmUO~zzDdrWTx_)E3EGbA`da&{8CP&(?Yh{c+lBYicineh{IBSP#3JdTZ57m+%^2rM969!z7W1tQ zAA4+Q8|Of0yIxgSIy8y<$oas$zr%ZHvhm)?o|CuafAn4D{~Pi@iF5Vfa{@ajdPp;V zV}gq|bblG&Gqp{NYuj4dajvxOG;P{I9c`>J#oFZMJtz7uZThYJXO1TSrA=$)KksgA z-1>O#ci7v}JsLQ7T=D`0iP>QUGhj$mS ziho~A-RR}li%-uw$}Lcys;t46$97(Krt^66RH#TlNZ zoXs}A#yPFrlG5T(v)>PZ6VBTeyR&0OrSpzw7qC9yb3VJQ(s}1|3!D>`&8~A4dpvK@ z;BAtgb*gu@8JBq!dH4454vFlD3vrhPc==IK1 z;sJ~e>2WaNd&-ce>8?B_e1zvcE0RLh=P-U(;`2GK0hn7iR4+-{;Cf35yWZ#9oeeXb zdo|ia8hksQPg^{$H!ODSP+=E7nujT)ZAHuU>ZeJEpK0}TQpPg;gqL%z;Cf_w+Y-}; zwk75b=yZ%9<=}I8_poF;I?qUF682*UxB`y24k(-XR_VM}vAZtiS_l4J%J&jA)Xevq z)rwv4IA?z8$?Co82}?EK7MK>f9yM)tz6`E`bIgV9?hBV!x-WWu0k&hGz*EP&53dSC zbK%G&aQ*30_9R&PL?7{+J7)x$tFd++Rvv7e?mS1)?O8o$#Z!;26R7GmeD0ZF{Pkz% zl~ZZQy?Ltr-Y2A8i6Ph4gplj!Np{x~lU>K#duZF%#1op8e*9SR6;pq1T9Lk9Fq{0Y zhfMZ{1Z8-2iS@+2FPiKwlgWPXF6$zJfyR8o`J&>xcbCa7bSZu5{&aa|*>le>pzXf) z3!tST8x{m6DVrnN^kdxBXYR*bB`kWV^h4G#uN>5ur)i5V3NIb!<`;QHFA*4N7V1tV zw%oTU|Dr+Vub}*&mn3@HJl^SKJ0NvZp4rM&D5hn+qZLv2OB&J+V(f; zJnBnLRdkviOxrq6)R&(04_8hi5A0ILHBv@Kw2V2FF}%Ux5$r4K=MOvK5AVVs?uOTL zuBLK7W#m&vzG^QM+-Ozp-B&zV$y#-Nq(Zd|j#n4lc`H7zCmBPJsfSo0+f>%{jkT(u zUy6;awaZ?1F8kghopv|(d-61o^s$@g>UD`89ak0A({UGgicdTESm|7?`dkZ=gjOq^ z_kri-=K7gD*Q`^0tZR&{onrFA7wp!YhRWI{W@3kO$3NSJ@454s>T_C??XF|ydiDnR zoTD_mYoum(W^!%ixx`|3-pu#=Og^uaInio&;on_zrNv`Y*%!Oi7$)C)*Yzo^TyYs2AAe{HSY& z+;eNXX0);QLI2iFS)oPltFboA^q5zz&IK8 z7(9eM#L&0&p&fay^N^=!ow*-Ro!XDH2i0*ub-1JD2W-uiWzRs{QJ${-dY~?Yr~mu) zm%D-Ztmgg|fQg<=)F4r+=eHgYf>d%#TWbNtN zpztq=a=&pcyeUQBi{_;L@oV7O=VEt~c^^8CAHIO!&nm%pC%TL5y&K%$wO3-d#mE1f z?6!xRu+>U=`2YXvr1+#8I*`F%p~o=$r^_kOwan#nVS@hztfwa6YF z9zRa9yO1BXN01>=xlzOans?BfkRJiFX7AmSXm{SRtTOPDiS{Jg1M?C+>2nlMZ+4Qs z*6FTf{k}f3Bhfypi&)7nz71JXX&u(KptnW!W#p=!;2yqd(QoTj-{?lZ6;Y>#U!Ik8 zzgK-@kfp&^zTNR)Wo;tgcB{V3Vq!8xka5ljiDlHbAke4!l=Ozm@6!j_JK|4PSp!hR zW7|xg6T{VT5A6#66dj^kVkxC}{FCSEZSu^XS9#87-Ilc~zdSd|^XQHb<$0exkLmcE zJeQhy&g}Sr=SY#oeuDBNsa#oH8C?4`dv{J_WpI6>$F_g?q5!&M;4^f`Stg&%!vfDF z)JJZy2;4jp#{^y)7FixFO0a9!^8X-X+Gn;0_L+4U23r&CmU4GxFdrCJB%g~|>UDTlCEB%eRU&jVz6nI=O}D`BhOLT{F>)5`J95+&_mhh==q5aL_x~h>_h)o?sDi{@5d+%c^FxK|9%`O;zo$Y(@7%H$zu-PgNc>A8YfSdvmk=j~I)cP| znYqGUS+L>R1qDA`^LT0EU9Y6XY4V2o7H$&%i)%xImuiv*7SJd8S`I_;u&YGwqf=JUQ}A`-Dc$ z=J^P9mVtADJyrupOB3rOEmlKMD`Fp9499;T#qq_7_0`N*1;4g}6&+afl)#H* zP3||p5qqcRS^3WR*DcsSYopLYgvbW0} z9u|;$)$4ZK5blvb+MRN5{iD5I?nQsJ2lVf6+HFJS`73{x66Ic8}@+QU2oWJ z!?q2#@%v9H`c~*FEkmvahS;$$aWC38@n1Z(7huvTlC(7IT@b9 z9g(xf*E<)*Y&?27*D_ySLA-A0um)M_JdkX^PUPx!*qwp1&Y#)eEo+C_@(!O4?Be}= zM>tE~gvb8tKPq`y+f38b0zYDr_sjoTO8pb@-Fa{ck0sQ9k@E> zz0?s-kavNvyidgrDeu5JaENzwgcEo0F7TFj_B({+owhjTowgj3cY(jWuXlvMly}A< zaFF*^#4VC{84r1HK{u6m#>FY`E=TwyJw4+i??+kV(9^$Z4;LTI6?2)c_y@>3;C~k}%K*wSHKgM@|Hfipb^bcS=!47f>@As_Zojld+RF7+kYS-s) z-HDz{slzGdGyl4qaTZ=S12p8 zErD2UXR%f_g7veJ#Gy>UU$WoEG@vszogEV0=C|lIQg$orm^s7CZ z-Nf0U_*P24m`et^2VLkre_CGIeD!k+g6R1(;qL{@pIs1q342Eoa~XKP?N7^hxvzfi zK;Xi+54eYXaX@?rBkL)X`~moT4gA16v~5A~GGehxnXSMRyGirglsQZB75sF0rNqsT z5W9X8{|^h_-ms!_`-K}81nU%E^Dy%KU8xtI9RN;&6z?d0BU&@RDtIElGIE zHhY=ep*^?Up}B6kLu*h$D;TFr?4&Y| zk8*$RRGahQHQY$4%!iEFvTeyTyhUfRzlpgb{N;DKXC-^uwkbQ8(5K5U{c4pT-}tqR zbx}@SnsbdbCfa-%X&kXMW8>148EM4tYa(gdv}ie_mHoNQx%9fGoAtW3lBR$(O3FZ* z{`NSHawJWTl*2w{y*)$X>iVUTW-e(m;@fj>T$+_enmpP(j5H0g_Qd!7H6zUu%1I=R zpBUBMJe7~_5|v=w{* zkFO%D%6S(aFW&^$5=j$jiSoHM=)MmA-3Z=EnpYHiWV}j@1Mp#zYCrriSCh%^z|QP1 zwVoh`f<3a^WIz0qB>Ul1%^u0pbX_7a-sFpHY`@KSqE2 z^y8(3MbdV?y+4t;r$IZ!jq@Eon9RJzpCB-C<}Ej$)5Ijp2=u?`L}EPQ}JytSHi2~+x~Q!*ZH*O zbze){iS5$g_QK8??p@^}qyCCecV%{{bIDWFAKx2o!`yGzhWq2&zh?2il<5J?xs$*S&x`7YwR%#kHZw*G22R z@7vW`8eivqO1LP#&Zn=XpToTF2dMLbfjS?@hJ5c5Z~?shInU0AzFnQ;;_JkooEKl` zxbM-=v%K!5)VVaS&bidNB3dWU&PTspotF4IA63GM@pV3ZJ^dWvbw5F!PsG)^l{!~P z>*U$_)VHhi)0#LM#I_mWeSijM(a%(`dmVMI8>rJ=UhE?tjqh>nS@(8C>*m?{+`p)M ztb{3{m?yZLU%lQ>@4qMC*XcbGdQY_9 zV@mQkZ-1jd?e9q2Fc&&}D~>nZo@8$|CE18=Z`@+(9IP>BiC2S$S(BtTo_r4$P zC(q8m{fmAM;&YG1_w#Q`_>uU2K7BLen(1}_gMR*Fpr7uo#lGf|SJ`W1ooK7-yN9*o zC%BvsFh`E^x%*Go?+23Yt$E4JXGqWG{Kr80`KL?&57IB8j=jmAX0BYdctgI$v4K5( z8&p-@@Hn=Ddx`s-A#}uZHSvGtoSbYWqpuU5uC%E;A3>(b`&xYRy5Ya9{~`ypjvM0p zNf}OJA-T!#T zgPwVob*674YbMf`ep$mB)J5@aqg;uvc(%(LYzNM4Tb`|8~$Ub@e=}8Vk;9JM|_Iq z{uigLpNj9**V7YQi~Z?le4P`}krL6BIP+j}GUxEAp49isyy9c+z?Vqoo=;x%NabkO z#bvEc%KLGfvNAi3bqV&aEX3v~Y4Nw4&Dk1u8*zhc@duK9XA(2Gi2c`d@b9moee%ur z!egs7_F`wNqwCh<%Ov|84x&HYK-rJ|vy3>!iYGnS*aLVtS;2>UOy64cVPLRJ=DVUF ziET-2Uw7BC?t>q=CVL8oD`BH8WBM9sH|w{az3bVV+ho{)#RttcSy?H*lHHt{E&7JS z-uKsmk?e0(8YEq6$1S`EG92Lux&!-4!XKF6W6V)#bGQs2Cv+*kamEtuevEcYdmdX? z<_*#gtD3r_{AB54eFxPS>uz}bKsm7xs_=bS3v9#}UG$iA(i`nG;?mawvzfrG4w$j_ zI=}XXrK_ueSr0I)r5^b<6PRIZWv|5Ox?H}qzc}qB>K6ZD@ja2UyLnGlTQg&OAMFX~ z%=G8v;;R_jD;eV}80*XNYcKUO_WE4#o7ZX1-+7q*xw0o$;!a6Cc!ATgrd$teH^1v& zyJ^Y{*@s+ZR}|umr8dm?0sltlYuIq}fahq=dC=oj?&_RITSuwY-j~#l8~dNrI<~%OeBaBrYW69oiC?&+%g^&j%$x>rP4+7U=&v3pi+?&@>Sd*O z{Eh$mKRYxV_wRM7gL8J|TlI5aty+nnw>)?AT{)RNRi0nt*&%y?dG_&a*oV{(d4Hbw z!&hA%K5S7o-3)$|;UgFGn{%@#P}(_Zxbp78l6+4k|BqCVmh{_&hOrrF{4Y3pCfV5f9^3W z`W~ss1jcQALO4F3JAEMEZ1NqRaC!J1aH0KF-nps0pGO*TQN&m0k)OaLsb@~Ko=xQQ z1Cs#%vXyj0E-5KHpL|^8Qo1ge0s>#~N95A;alO4{Yqv`vWZ<3U=M1Bno)R!GFlm%PGI_;DP?E(#P3bhB9X~{;#~Id?+>GyiSSoNVfZ#k4v5RWX9T> z;<-z2Yl^3YIwh@x`}>sVWKQMAS6kY5BXe`z&pHl=%dE;2_J0?~@Z>@AD757s&ghHL zK?C1@$hbJjC;pr_$a5evxq8D_y02#}@76sV^miGfnQ`f_=2_@OY{#)OF6UcLAO54y zoLa`Z4+qMa7FWh)JjZbOt!E6Hyy3IRrhDSboFM6YBa_?OPc75*_kl9=|N9f*RZ?cA@Y&yjH>rn99t^Z5wD1+7xK|@+&r#(WP zVa|h^E&FH7k>76Qp^ZH%Pe8*G2THlEq4IA(HrB^78u0CEW3Ml~D3La`BCDT7R=|`V0JZnl1o+VTF@B^xN7Nl zIWQBx+<>lOBTY8{FNAl9AE+K5*DSWN0e$eqU4PW=W4UV;&UzHSz&dV*va&hP;dKK) zD|L(qXUf&_p-o$@9?qc+AF4ucuR>2{{08Z#o80|<9neFk&{yoddK_H;J|hmUe>lF( z!y0A;@)?&rZ1OI%*WdIy>2>Z8PWLyxF+EVk|L@`(EAemhNf$}-vOWR)OYBbk zIXjnhlh!?*IApqgBofiQ#61W(iFNBAPaVX&4g9BJzX)LK7W)Np#wdp~IHvM{O2dpu zgqREGUrW5Bq0AG9@q8Wk;k?VUF+Yz)hV#t4dE5x@mE1@2|12(fH|Dm)nbzmFk%;_D z_15IE-U~c?D~f&MXVE+125;>}>E7O#9O1q#%n6x`S|w(!E8UAv39)KBXVm?HvrzEc zMF#{X8D9T9+Blasx<-3zm`_?+_b6vhS;$;6IQ}MYP2m`C@R^&vH65AWAZund%=;oc zq<@*-$R1)WY%BKlre0;QW$stQ+^;4t*<;O3_BdEK@BM-{d`^9;z+kM`zZn1A`|jdes0xqxQY%i;_W}ieqK4Z`%{v1RGk48Tk(dO# zM5iQfQsvNBv77HR?d;*ZeA|Za-lX4swdzPdw8tfUYx^kC4ZHL+;>0H^xNW<=h;?TL z{j8TXWxutsr<@U*ah;cCuesk01esb>Fzz&OBK zo~(NW9!p##v1`j-H)4ZXh^;UE3ZoaxojzCQzl2R5S>De14x$6gchP~3{wtm_6BTqP z{-FcM|GS)g@?Oflhd9}y3mpC-JS1Q7Okr&0-=XKxlY!x#KH{Y>d~cd=$KY(OiXVwS zru)a8qK~cB_xTPxEl%kD`bo532NM_l2mR9FGb?dXIsNFZQtjgZYQ<01dNJ#xtCH+N z(&_oIp+9+=^o5ddv1;!lUkdqBMqq!B=IbR-3wdJj)a!alsSo~MsSl7x(nVIO_9b!U z&ZN8`?FepDUIf-*OD=1)lpV-V+8kgECC|@eWzv_0Jm&)orPJ#93H}4$6pt>3`^5j+ zm3=?*&7y?wA!hTY+V6OdHYxgE+daes%0<`jWbv{ae0yRG*){T%4o#LL2lDqZx@kE3w1wjN*y-w3PF# zY4|xXH}$6=2eOdWC+MGo{Mh3dIOAx13FoM+($D)5Tq`1qJ#_G2sP;C*oVF7`F0kzV|y{9Hok(q?$Vqtb3iv`&F# zP;bxUac5bic4S8X3!Y1z9^StxzS=j=?YE7B-z9f^oR#F=mzv=9PXHdQ?dy6D=No9~ z5pqshPfDt{inTr2^Jim>g7>egWN&PFf#$IHX0T4ETFZGa?7l*o%z1m2?abAQwHz*n zf8>5ThJGe^wxUM~-@APjbh^#7Q*c-C?LnbdPR^+;^b$Rn)d zgU`XzMB<^NGhrXX7r`j^8OnWLIVm(%c*QSupS&P`aRul)!BXN7;hPu4|4+|%8GIH$ zy#mtWGvPaY-7N2@`yG$YHI3@40WNK(Q7y&b@_#WN%-cCnIDN-)^XS3=cq=$6HXjq? zF5igV^{CjeIGc5c%aqkuGjYx84wI#&#$?$MvuX6MALjM1H+wEYo@`>zfZ%b=hH}^B zuT}-uUkm;Y*iTZ$e!_X(e%)WOKkcJ0l2(J3dd{Dz`-Izc`$=lQ{Ui_jNl(jA@+Elo z0V9d$X9ISA)=&e_;CF;A@q{_ivkLmm9Ll(IP8#u=`_@fie|8n;Jka-JttRGnCQqu8 z+43qe5G00D#AWeDW+`FO^UF#VPpWE7-8*8N65WU38mlau=mrn_!Oa|cpBdi%hRKYe!r&6(NgAk<~i65vC~`+-<^N({MJn36mf1Hv2n2pcu3RVrr#ND8cv(4Rpk<)A4~jy z&G?_pe~En;?R#OWz|53soCh*roqrgZ{9?E=^#-ntFiS-JfiQ?uy==G%) z>PoQ}5B~0g2kkKdTl#&p4!mx9XL{f=;U*3 zq&{fAFbIzl-MN=EDe%eobFXuoIU9^}0{jzOTcE<^ll}4XE;uSkrG)c@EO-hJnDZYg|n2X>YSe2a4@e1V0IaP|ykzwGl4+{HdT_k!)qL2;g#L*6H8G180X>tGZ%RY-4U~DioXK?AKr7&SMtP; z4?L{EqJWsReZZXQY zT1Hsfz6D=N@gu{B)hYKqZ`j+pAAFPcVPgsGkoBHa#w4{b7n*@K(~P)=X?=cTHyUvb zN9i_5iEHT2R=#yy!vMNTtBG+1?rDS8Tz^IzWgSd>F%Rxg!gmNPfOmj#k$x0|XCsA= zFwgvv^oM=~#7B&I>Ku8O{>b}oNBB10rC$rfkxlVsIH>n!{ zQTB)6U;Smg|63#SDY8w@0y=YCpGEK;@zb13SwGaH zD~Yir{^6pNHUXFFIp&q0j;DRZQwqX23N2d8o;+oK32Ak`3fn%0*R&%U#OH!{$EPvV zhVBLTKRS~<<jjK&1zI&}Z*`p}=vzg&B*V|mdA<(KQOEpJpjb+ld91Z3XO zMt^KqD4XGXo;|W|#&>DQC}OJ~>BC=d;Y|Ap+Voh(lopX~bHM#iCn@1QNhbaL@0%D0 zJ2)1iFUrvec8fp!JM<^Vrfkw8#kF>X)XCw8rjr;fO35*%2r!+ zp!Y{|ChFk_b`!{GCu^GvBM+AB z4$?>BzSXI;xj98^=>z_?w9QJph%dhLQ+X%OS<48;v*HqPUH_KSu|*wLSHhUNSP$$( z&e&+%W2&_x5QG6RoUN(%vnX;=YY#Nm$$c0FK52Yv&BB_&llQF z?O0JQGUIZI*`>#^lyx{MPxwgyexgw(^rWxD#qu$b6omp!`|^Wa5?f)saN7jx}`mwCn595 zAblugy}OTmRphIAp&319R0aBLiko}^p-1X{?>K8y!td4ezVwTk3#;ingg2yK{KRUJ zFVwNo#o5erDf1xykp6j`m8qn*98X0SF&7cJ6NxYvxlswrH<_C_OB7Gq{9imO>vC_> zFE{g%gZWmk_?O4>O4+KK&Y8@q9oIAFt;|afZDXGgm(=l`v};eXZad&S1sTuzC5%Im zbAKn%uih#LJ`0BLLzQCb>p}(+UnjM9l94t@{~Y*51aC3Y)H38Zhh}Mpv(WGqxv^5h;t5r)bP^;ov;q-*wF-lMu1@`#zudpZEFW`pjI{KCiv@+H0-7_FAa& ztUEON-H#16{J~wsYN#g9vNU8j#vq{%c)v`#xu;-T-YQ%3&$HGd-{`w(j}Dxc zhQB4c@ZkjhIgE2^@FI$AJ0R7#Nc)Tia%B{3Lk+!=p z=3EI~0(vp}AbA3N9@-!NO*Z^yx!c^qv6K_tZFWoyR6vKcr&X%1ef0aOv-!`A^P~7r z<$+I$=09%~3~GK`V8-qZy;QJJaBF^vvsw60c>R6APyY49`@LUIUh+5vza8wEdM7Vu z|Fgs!zr(xATnnv|JRad$r5myAdOx1We3P;!k4gEyZSdl}hxxrfM)P|;tg{ez&CbQ| z-3lKPiNiXHc@f-o*J&&3LU?)+zoPHch40z!4*mZu|M$|SKfB|r=knnnD!Fj)yN*%b zwt)%%rFi1m=falV^WA3%U`^9)%ymLn<=-R?B zd_YPM@-h0g4d>r}GkT^!O3&o~gK_3mp<(({AjV|JW9WHL6=LUvo=Wmwox?e;GTq3B zr<%6Rk!tF`G99U=?&~Aq(+$2R8fkl)(G7#AWzr!?c95+SwCWL4zrz0Ow59H;QTgeQ zH%@b)`*IBGzS^>9cv~Efz0o?aUtCPQzHH*!M(Mm#rSn>TzNue`%KOMM^7^1}F7#gA zc}s|a9T!+QrLJ)iZ6dqYm%|&~gx;%Wv2ny8Msf}DN`*J_bNTP;U*ewrWc`cam4Ix% zCH^k2=C_x*6kquT=C>Oi$n*qap{<&W4;=UQ?y0p`^pw0@dz;gG_HZAFJ%1B(c>G%KzaZ25;7>SLDDb~k)WKfI{_d|&C&Sle8yyBVRL)+VA*Z%EAGG(oCK_jUU+V6tq`eon z8lLv0#0c41RN&Q4**er0*?hVWhstKtj|$eI2@6wKHnD95-%Py|{Q|CW z!{K`JJZ!Lzg=Vh7cdY8okxhkQ)l}5sZ!UuOa8+$(p8bq9^Tt;5yV*8)K3{D~@4&b; zCR6XgI0y6&{Ez4zyu?cCX6^02!PqSSsmL_=<+Xu;G z>Le)hI%U)bGEDu?`F@Okp2)*S0v>{(=raNn@%=r|c+O9r9QHf)#d~2~(=^*n?YV20 zwl9J|nqhM;UWwn##naun58C46>0?#x+J*P|u)|!MO{}P~o zeVd?#SkUc6=3OuSBHzU`E3Sy--5-64zU6^0^iQ(xPGsF)aHcg%j?IH+|NC-mQXq>p zs92MhyqwUs5;;uuF2nBC`WDv~k#l#3<=j5vS{?v5)E9W(cprEETcL>>!?T=2x2{h- zT|Sq%Pt2V~D-+EA-$nmbt`GViWdAAVPE{Yc1P4 zAG-(J6a3NcEhp~zR%FjOa~`;dD&7qEN~f@zxi{A^v~hp&3{$7ax?K6j`J2;Epa1>z zcbO}-*E)4YuGXvgdat9bf6k3L?VJ%y7chabx3Ag-?=@SzGkO91qB`lH)}aXt=$+>B z5@4Y+vMttlop&!PklbSFEn^D0nPb7%=5*HkS(}GyVm$slm)8rNxwjg0P3ZjFtK1D- zG|Gj{0UkIuG#+3r+{qf~3zxmCG^bwoU$jRq#7<82V+&Mv0+_Er9!?c4`BG|ACUTVI z?KEUqdAz!+Jye4oOuZ2-MYf-Y z@8-KobL*4I#~JeXbcU?6AohJrrgX%3-o2T%MqMtx6;O}n`LgPJ+gH(M4dXJ9ckiV= zfB9TXR(t?Cfq6hyY|NFsOC5sC>*0F*larfrsizRTN%54b4`2E18R*Aac#q`&Z8(4b z{mD&nw7uvOcgLbrp&2(O!Sg4$>a*fsY*pT! zYf`=Tftj4oBmk>W4RNMa(ldz%?~|O?jE@6T-r$UY-{E=q`y7! z#mJ56ZKYpFP6SRmf7H3y)oXaq=(6NS&MKVh5A92CoTMK zv_H#h@h&K096S4&Fz(7S>RPF)`q# z?LVs?=$>pa^saSx44N+9LgyoY;(Vmf*~59N%?P;Bb?l&=notaHGx_o{p9`i&($Ne8`(4 z$5-=bzkIv=I~P+AvC@hEHj29!qq&0-i@m24drueko^kkhuzNH%&a^#~vUKmo;XR#V z1f9f#a{ap4#E(qkI(E{}8elE^fsBWY{z_ne4fC%viF3t{>xqx?!b?A$m5H5%!!{sW z+2&!AR}6fS2O8t^|Ii$3xF?ZzL!fH`j1p3VrUT?q?}; zW=$#cvWWRv$UH4zzUC7HVjjK|{*C)GC6{ww#)_9G9h=S=6*(-wTY+0pS%lcVF^21myU(nYgnC-l-7WQSG1 z8MFuNea*B7%w(TDM4kbAz}T^-J)pr{6ml-o&6<)uU=P1O@(s5K)cH+e$*BIqeKD3j zAbzECp~3za@I@8232enC(1%UnH0;4jUkuv>Rn)P(QBW9O6V4;nntpda zej07N@2}Vg{~GM3{DFN%!QMBEg6}k!biCVC(&201ai9+`84nJ~wk@e`3UjmbY3^?X zih^q|GoE`k0omKid!OB`(qb-{jsrL z|8ivc5@f2`$aL_OImn5MRaJ-$py2&y8!>$-I}6!EaFfm`b6wTV-67kV4D3ZkJH_|j zs+g$U+hD!TFtBI+*HY~6)R&%I?5P|c=LOuQbBy=+yTGOVe3Bam1I|a9Q9#??g>!ew9gpc+_BI|=?|2*bkPWT zKQm(9y4Q;fe5#jvG*;iKl8!^6l8y|{qqZ()Ju&C;v{hxh*i)M1tY19E@RmlkRd?T_ zk1fBfG^%{oXGbo-e5)w<_Wlvp#5TL<%+JB+>&1H;&q1H@$ARyj61zun zBX#a`Vlny-`uaw$G2{4hn-|&gb;T|8Ew|RdiEVbTiIAiMhD_3~NAI>*v^h3t8?mc?HSCf(H4@1b11yCdC} z9$#n1r%DV=#%GlJuC<}Qo97DfxF>Gp+`J3@BR=nZeMLdva;RGK_<~T$_yWmw-O!@< z$@469|Ezp{*v1EVKCoHqT64rX*#JMDx7eExY|4PkG3eiM=wCl`-p8DWYGS4{n=> zz9VMODD)d=(^sAUr@*tw9#{Nia@$vgTX}Bo8G=L1F}`1dCsNR-fh(JdSyDR=-g+xK z^e}Hbj<(lw?*(})cq{PNS=d|I)|uFo^jCT})rA~$R@o5g!^^%FF8erXItzl%KG!|n z2yWIpe9TekzWI&uKFl|*gWZ|v#K0$?KNeqVzz?{0`V|uv97Ru~Go(`H$fU3A*SVSO z^Ck9b|6+T!uPLS=v;%)KJ7Nm_*u$849tiL0t46Ip$T#KB-4|oo!`H+NwTGu{F8R!I zPsSAdWrX~uJv{9MqV3_swrFTXq;H-;kv=26xN`*M~36an0q#RZN=oVp6yxS&TS<_V#wUhJXf$s zA4hKNVXYist;~Zb`*FSL)rnp>6`#Pz-G1gKjj5q{vL1$)9K&vzb}mQ zq8AmcPkf9Ftj`O9-K~t9bNTT4Ola$yto51Dwv)a~-u`cXSAa*F_wEbiw?_=G`zF?#_T_d#l=Wv@_RTj8uFBQ*}=wBl~Fj4o3>wvq9HLEzKnxD<< zj3ctUSwTFvoVCW@OyVg?W@|xrml@)&A^CN#ulj1RvoxO6or#1tKW)rpJn~C`EC5gK zGUZ10n;>_6I=V~7d!{fa5g4vwoEm#%+;@b>UD+R+`CIDO9QJ@y+ix^BNA~@SPV9_0 zAC061zc`qd`2zeh?MKo!TyJa^9f`c>FEuvDM3w!`b(Dp-TKIfu=8mX**HGuPysxb} z({Kl4x|e6DFE{Lc#cK|W!}Ryev*R%Rh_YHkQ&>ZSQM7-!k$qKJwtc#TtaFb2;jJiLMRvH_Twygz(Ug!k7SmG{Aq$a#7sZZ`OezXIg(5z>awi}3j$-^K^orKJU4 z`XQMCd+hWL`j1Sc|4wvsvf=Z;*_e}s2~@Ow+*Qn!9xcJfYSl24ldkmIre-+ae8$Cu2;IsTCQf!*Bw>){??VWJhMu9f>B zu4l^IU!C{Rl4-B~?Ca(onIq+wO~bz>?Ur&5J#9B@9{KZoS%;IjBNOjg20Xrfp?Oy> z-~4_$$jyOz8#2TK9Tfd&^h9%J)b=+wd

8E%>v&~GaP07hOsV~rR^ZT4| zUyy093-{#&c_z_^H~CHH*UWdjmG1!G7L%`n*lgza2M52;Q+du;&F??8z86z(VRU*-^cXfpej=!l&%ul?hrhAh@{p%a^OtXwO$$Z&=Q@c zF|M+F^8AuAK7RMxjUzF(#~O2l%eDlM!}e(7^CgbGEj$(`@-`>> zLKk|&IP{0{(3LUXGY$uSdt;y(R=haT=%KV@Vn1_l#(~q_szPLmVr)|6vpvLF(PZ|O z6Zqg29Mg=H`W>7*Sof>!USgw|_p5xxq?z}t(%LpLz8puxJCQjxlB_wzcNH#sOaQE4{sRR`9H^ z%P!`h?}Zu+%2!gBwl2j^v!G%$IwR`Ou^Y2=Z(09WSn2t>X&L9QAdjDRiATbDD`R2p z>a0DN@@5*gcq^t+hwh4m^VNbMtcwA8sCE5KXVDGDf#2LKna9PzobA}#8v%KhryZ1YS_~++t z>Zc3t87DbXvW9g>t``|s{!n~f37&r(3^f!mPrfdvxhDV0d$5MP>C?|I2+cmf;J_dL zR?thDMIRE|mLCeqFYK(K<3i6-zPCE;dvzb+1xKoR2eb3QO738$p7!;Y1nb6QgR;Lw zF`|?F7#&&>vp{dg~N)+52zd43m3`^eZx-E1A!3Y}R`2 zjR|(U;G46vpaI#QYp!!_o=hFZoe7?9zDY0fUwn()(am(AW7rcja7Wj~*HwwtKKkt& zKPq_r@iV56mt*Y32EM!gOxd30Ii~)z7n~Y>0{=Dk#TE|P;|rj<_^Eiu(q(kb4gQAw zzM7tb)|WURn>shx$$N|A25c&BXs>+9n3+Es8{rnlMw{kXvl7g9kEM(X9-TNUm_Mxj zUk+>k9ny!lKZQCqXVt8!<7&}wPxU&G?53iTv z>$(Yb*|#I8T+?E8lPL*wTKL%?fVbq=w0 zWjB%9-W~ond=03L-4_}&t^F*~)7|@Nb1$2J0sP1m?(5NppSBOt2Ye75k>9N{Z0Tll z*Xkg1FFVCv#wU5wTGz>KFPM0`9Jm|#mjENqcoSvs=D&9?_c3FF{wML@g#O=z^}h2g zSkGkdm>Ls2iXWt7(_^?7ljV6uKK4E7=&&gRUM#F&Zb}lWk7j3ZPb`Brr+9u{#+VhMFAlEQ8t)CgOrAG+-s0I$oxPNQw>Y?RD?UsDe8xF~p;(@Hp7GGA1oEdS z|M_0uMCD2Fh7w3iaRg1jC&2H}?D>}8lkAdUU@Cd#`y`M5`TURgKAA|FY@Vq+d5*Jv zpiJj`z9U$BYuFD;A@8$kW4JGr68gG;e#jrn=lOpv|K$_qdVb{-rG#>y=ed^WdPmUz z?EH>H`<9OE7X{r*U=@9g_(oYynR5D2jenMiUzFSUR>gA%&no(JAl4JtgI^W=qZE%V z+8*(b;%}gR`ABKvS3Xjj`IV292Ff+@G*dRpN6Oj_OSZ57!7v{wUFd@FDd~HeUwlgX z_V9~O$xs*NU*_4vd^9cS@X!AIppTS&( zKWozu&Ghx=SaFWt%^}9bP}+SP>o%HpHyk^gcKT}eTeSP#lHhgZH}&^j%Zb;>o(??; zvBwOjXMcN4=Ze=BRNiaM2z4bCh*l~_(J}g_^Q)GW+rOVl3<2#aiF`i_ZRx(L_KHkm z3x-D9kI1gBn>33LPH7WQ)N(DhjqnaW#_nhA2J%#dw|a?w>HNUQcc(3--uH4$aJO`9 zQ)It*A@H>5dH1{Q?Ey!y0=jn;IwZQ;4V)gNZWG4P!K2W>9|9-o8WbN`d)%i4CupH; z=z8G=t{MqW`+F@o0h4&MKkne|%x#czK4_$A4~|Yiv{x`NwynIm`^lIy&#{L%b!YKU ziJqf1Sp~A6^wS6DHSYeLYmNTU^;rd?v1)%ZaJY3X{QNdxuy|2N9A_sv*#8>%)^IsF z13|~c+2$1kck#(%o)ay0ac`-|>K}Vi+kO9=X>MU+C8ow{h#2CE8@(33+Ie^=Xv97*x}zQJHzqbxEE!UPmHR( zagy>XO*B1RBL;hImjOBkkb@CtD;?Ilo#`QQU>*BwQ`s2utPp&+t*YzTP zY}aw#J2lq3VkG{>Pa9oa86hH`U>-+!|;4u%IR6}5KWyo0uR|81 zjqaPEh2gbn+O*R~WbJ8fb}V842e15jbGiC&B)Z@!Wj|o8(MiY|DQ$me-hHH->ytiP z>ur4XN!DbJIUYwfcVv0as}^6{!<-+;j`j4eAMH7~KGSn8Fo?9#M*c(l$ou3ZuhzLa zcJlX-KYHxDp;3O;=P?hPZOKVyd1&LI0Og>Chv2uB zr*%OX=IGUm7C_H{PY>g___a)vUlTpKMtZ@n3X@NJ892@ce!c`?nF9V#P4oJ=kCqb^ zcYF7IW0vm^j2XX*xkcI7#k&GMlbO>^*x1w7TK!L$7j$ByiLV3pfO*&$+5@{pQ=!AM z&(WM+sW~H`AD(pMsBbNqwjyV>S&lM3_K(x__Z0nAdAn_5z3SkcU`QRaspFJrGIjW= zYjD0Nwp9q$>IY?2-$dx#>8HR^#_If>G3yw14JKcR%&9#h!apDQT$q0b2BCzQA^h_x z`fZkTe4?S%)G{QGI-v+rjcM_xof9LjT$1P}jwiMNh5Rtb&$Bp&j;hmkG! zy8Hh%n?2}tqu&?BJANFm7|kPo98Va{3y#G5`QY{B$DOkh_7Hdu{H^=o^~8(G=lTb; zuk!9@4BZRndvj#R#(yX0)Q4`9O(lK{@oCp{F}C*=Mt=_abszlOA@Oco)6oMYn6bdo z32ZmQJh>6!$)9waXWZal(AUg4C;XT0m>Kw{tHsA%1$$C6XW-3B-!z!Fg*3wsPhUUy z&G$gm|MgM-Yoh+IiTZzU)c;k3|2wHWle+iQ?ta?sR$Ig&5Ah!2eI@*;f7@bj2Jh;3 z&IJ4^6?1h74a=EMNb9H1qzG3|6Y^|q4t4;AKTAxSb-R2tgWunW`8V%F+l`YU*=yM#r zN-1*Up?%Ed)QMgn^x5~W5!}r=s8e#{2Zkjhiaz(^dx5(-Gp>OaY+FDK7RyH8mIjR? z%?Itzr4MyHe}L}&-*xwY*WLf$)}8jVx>3fgOTd4v1=;iXpIlII=;t?kCEG3Ho71)- z-4AbL%6F`p05Xi9_2d7*5!^t!Z(Bxz|H+Jk|2lHUysLOREVq2{1<5V&QQZ0L$g}*g*%%TMY`!oEFX;-w9_wDdojo-@}P@D0{p-g+i>)`#fXGU0LLCwdR<86BY+nSVcJ?&at{pO!D= zb>p5g-#bZ<IPm-_c$Mj{Qai{yo_^wF6M($G< zpVZ1d0YAFhJZx=irr<*%hx-=LxZKY;JS(a1BJR&T7)8a1`HDr~9K_vG6d&^ROA4qst!VTP=IA_Ug0rXS0jg zw~8$N*>rRg`JDNjOMf<-JYgTOq3hYBx=JnmSvmDAryl9gZsR}aou>Y*ocznFbGZI& z4L-JN@J%cI8NOLfzpZQdt)b5Klz#wQN9oV%$@j4Gp+8$sI{sVXZG-*>rK3Nq=i9?P zn|K;XYa)NM@}oc7M!qMN5B*sa>CL1+Mf%T`j{a;L-=5@ois$E~Jxl%_%8&l+1?541 z_AF^TY+?PG({pG|iKRd5DnjQ%Ug^*F@c&KzNAzbeP^OFLWu85@v-M~D`2Hp^xizdm z+t2%3w6ScMFX3ML`Y!#D{_F$(_wir)vlIMEf7VO64|w``PS}F}z7q6jpC4I&7F&!S z2%Td@f98-rkv>H0&tmx&&x6fxD8c^m`m=oWlhU8f=2!Z&LVl$`%ctCIo$)B5-8ArOILAuML_+%Tx@yUMtdEzNUYgAr#I$;|=bWI6u zACcCL&LP7{H0jq#y(8Bs-hk-Yao!_(yWPkv3ACA*Y3x;d_OSpFIvy{gMXd94@AaBJjt1g0RVmbV{Y|@*l!`v>;IN1o{i8l z*%g@Y&rk7`BGbvwq50lprOUsn`F`gV&q~szvozn2P4To*Ugv}6``8OTangN7-Y>b( zGmUrI`Hci!`3-2}CsV)kf}(*twJy#-FElpp-Jw)t``pfN{gE}}5jp1B-l=n;tx-Rk|mvq!2>qh+k zOXf|c-hAZU=P$Hm;hvTE^y|FWcKS?19X3vO&J^{od)GRzlk8jmPN)I9xFcJTeIvF; z$FDEp+_hLV*gSJRvt}pf>4s;+)$)1!V(aTya}m1yY`^)D*n-Eo!~DhTdOpk?7Gp5u zw8NW`Y6NpLOS~t@b1(Ot(HjJ6v6FG0F4>gy&6K8{z}1OgsGYP`VRv|)=%`xl&L+v> zk1i`AZXmv~Sd%*I`T}{LUuJj=;&SWzt=MN_BYsv~LB$MS%QKh!);$S3v4vgUn)%pY z5j$w4KKSyCvLC7s>zspq;Ed?ee=ejCRt!Y-;o>2ENU{1*N*}`U0p$l@XGk^okMZI! ziS`zsbL=_rAhl+@@q_K+Gyl;8i-(MT!jN{uV_#+jYd#w5tyxJsldl?aTsc#qjjG>1 zcCh}hQ~xbj4H;MPs%Y4T$MrSpzkj^9<{pjX>T{06mt~Y)ZH{ByU_19~99It+$NnMh zgvUW#z?zTF^VY1>IKD9AIPhywHp?8xd4u(D)HuE{WE?LKsXsi9?`Rz7duzU{aa?oG zakNgvmx?)#34`qrv+vOZ*9;lQ_95+r$MJ~9k>IUq)HtpkaU89wMp=wGj)cMbw`v^M z4jIRzL+THY_v$@XTYaYGjI+S{rq{a;pWU_iP8wqb zpWn*5BL;}pLJzc~;%CGwK<~YlXQf%^XwO=7;J5KV5{pl9_;S%Rxrm*x^7op9iQbw& zYYvv0eeMj?frt+T^;z@)n~1U(ROh<%K^Xm$e*Sam5Exxao`9`HxQi3ib1V6*n&v{&_Nbn~2V)27ado-lqmWsE(L-8;E}=n$a1CvG*?XkRKMLmSbsY8FD<72 zLHKPQQhykJ$*i9PhG#8LBk2{z>{&bw{Qo#OCDPVedo25_VzL>v>*wTLP~z3SzMM>S zy_wh+^JI>xy&|jjH*+S}zBI>P`|UZl+HcLts{QSpoH^u~V+3ur-*i$TpeOxfX}|N2as9R&&LD3l|7Vqd zG5O=YIbSCKm&wnQSzA9R>9yaTlk=miDL>Ae z^9|DZR>+gd_l!Ey-75bD%Ae=WsUw|lg*=&jcUHuBa(OZ~lMj7qPU|&ZXX^xS&SvuQ zt&k^^?-?zmU#WVorJe+DP7CRLE9A-KyA%JhxeJRrTnlX-#whZ)f@3+?d0qM?Ee<{K zw5Q?co~8^>W*M+WXFKiJeET)9B#uns1is(Q_lnLda&}YyZoXG=-aUb|P9x~77)@VC zd#1hS9-xhJuL1j((VmPq=-=(NSLC)XrOkA2?%z54`8#d#nB(d!>c~BBL4oVM*aBy# zt;3la>&fNGLT{aWn7oH+i*aV&MOz1{vzD~gr1g{5Pul9*D~z?Za|-`OKe{Q)lSRDH z>2`WDCfS0<_vptY?%DD`HDR!tGy%V2GVc92JU8lZzj)AYp)QW9N|q}_@U3imOU;8owiMWH8wQn@GXvS zd3-D9TP640%CYm@!ak$C@rISR@|oI3@#D&K>oaTdadKI{3p;^a{5?AH0XdCF_fzG+ z<^udc>U$BqrtIs*533(eXj=5U9otg9Yb_&3nZ9qL({qO9$G!(21*5TbjD`1h!t=Y} z`SDM%276ZBCE~t(4YtK47b+&GbuVHI`}3AvvVWY zU&MJv>#mG;&TkXtGr7jFAGv4(IxzCczh_If_1}fR*FxSidB@h{b?oiUdznrLKITa~ zmgn}KoLa=$xY3cz`%?U)Dt`;S9`=eI{nV$kU*cp2y5R{WFg!<{^HEap>#8>fjMWsz5e;VKG^4uM{JhH7` zi7YS+SW0%3pEM55%S``$8Tb~IFO!-Vu~+HOpM?#re8+T8#P`c$&LsF=DIX@B?fj7U zUxSS-xTNzV<@q9b zR|BoT_tM82-vkHz)UWRv=VjzKZA!6M(Ybg6xFNgSOk4jfjY05xoj&fz-}iFvuFE&0 z{$~r<5@gS!yOt?!qbR4m73cw5`mxR;pV}-TCd#H*{QUB$+^$PEH6CEk&2r#xgy#g~ z?4*owA~Z|)MOEfFaS5Ie|L-~$Dtm_innT6)J0AA4orcfmKH9r4F0DSpF)A_PhVHfP#bhe8dhjx_wh%MXQRXWUDDk@Web50=$^&qUJFO#i3vP{+H} zac0e8aKDr_u!yy=kTtOYKM?a}1LyfEI6PQ>uVNjnyoT7Q!N zr)Wd|Zz>nK+x?6w2S3+-tu5l(bh4hx9abF9B5;ATVslM>iSo6?(Oft}nYG}#%G4^h z0BzvcYY%H>Z^oV6?{E@_lQz73D_m_wcoDU-tz6j?GLWy8_(~vEZ^MR9SOXM9|N5isK4F(_c30@Gl{fW0uN%f zNz4V>%;tThHu3vdfBV1LW)X0aZpY^s+vaCp$Lc@h@^iKz+_Szr_CT-UJ%pW9 zZm#?*Lsy_P(^;dw!WXfL@1He}v5EfZyL^V~JHC=;!H*`k^cvW@;$tk^@O;CL&ob^*eXAqZ{P9jTfa zZLMflU*$-Vp*)nC0A<8D=1#`GOj}yLaA7)gzB`Ny@~@q5?k(5}(zlDkX~KtG^5+R3C=D^gpoZl5!#m)MaL$YG;Y>FT0&G>ikWsXX^Q)H z`VI$cH-^1z6nole_BP_YuW_PNwR>L5GoCxmU6p?Bss!0@PqL1WGA~-YviZ|`E(fk- zCs}jxJM>YPPs8^Gb78IJ#J1K7nIa zTczDuFPicFgvT5AonP08fAQ!0Zz}S>4Zp$n#B$p1yg+-Xab)3>jg2w1_3OBp`a0I1 zVvg-*&ZL(*7}cisW3{=OHf^-GPJ8Y)!&5t~&4dY$HBL-u8ffz%ZHhlSP5C-U(w-Rl zztj=4r@%34k7!9V{xY?1T}_@-;CiJa1^*Lg=jmjR?qfZK@UvXO|2k-r>2H93`haJK zZDOEB`>cJ^9&l;X3hK$_*R;JOKYah|e&Di^7<-GMp%?L7InQX%;}OqXj_-mCCfsyV zIzlwFgjLIf5iS%KJXRiM10tHp#$Qt#7Awy$Dl6( zng~5O&bgP*K3L}-=zI}%igvnwSKcmI*lqYu0T!zqcIp=2HmU6(eEA9LW$){_m%6!+ z-0?Pb3$7a+Mn`l%Aq?<)y6Y&qp|gWT{E4NuorWiUmv0N82gl65PHe-O&+}f|S4@UO z$UleKPY=P<)Q{cMSoKU?6LA-x-^hNoF+Q$-G5xy=U$4sZzGDn|$F^+;e!sIB9ba{f z*|QxukF|{rJQP2+{=4M+-7^n0?!>3`7GPOVo^r<3gFm`W@vi#l{yqiYZjIB5r=tD} zj`A~jk}2l)y;0qhD65;L0dXf0i(7?$wNnVijm z%X^TQxSyPg&*zl3Lij!TtBqs+BHzkIgZP#M&xefbnFi0d5S}jqT=9W30cg+|=-zvb zzXG1G7M`yPe$V9l!aQFg<7I7^x!|os33v2=;J|B8M54t`V*~BXMgE!nOc9`hplT!@eNWvuz!@c&*@X44wB_=CI1FZr>S{JPhwHnx$!F3ufH93|fb z?%;HO#cwUUbaUfY+RmbFgXcdf%N)#b+xzpet6Xws&0NM>!kCL0dl4{s$<&FMajTFo z&y{XxelpKd$0zNdrcNZ@C<_ALe)h}5?3dD&^e~pBuJZQBpeH%-oZ|7~kP}+*>HP{% z9DIEqkM856$18gjdXn6g(Y^p!=&q=ABoFev8C`}OI{zrN+&p_^UBKJDt~&d(ZQgk5 zbW!Fc^s7G}nFyM)2>PkLSaEw)#?3zK1^1L^r+rMoVEy`_+Y{-NpZ$5xSL2(sc26>w zPIE320$W*&(nCA}->aBG_>_O98$Nk6{>z`BuADpKn>L^W(Kw(HRZjuOa^#T!u~{F7 zh9{VF2M%2Nc;mV=q33M0RZE|bFTj?8c|VN&roK~WzH|;+SCMZev{Oer>5O>~X*JY; zh&|w3xG$OQ7_?P#$Z_`bMptru$UZ8t-P}9IfOparq_KBcabo3@vJZIFL3>ZwlUZNV z7f8RMJn~y9-ZiOBa=@UxpGul^4dPu>OrMtTlIL9g`aOJ*@Hw9+1TG%{mv@~vh|9ka zJkGP=ftAgy`x$RH{Tkli@1g_frJu)Rjo_xZs z4}VD=(!Hq;8{cPAZxZ91!PCrsuB@|A2nO2EKfSb_Ex; zTR}_$rz?dv2W5)&&@wOY;_Bw~Ulab{4o!4G8)Kl6qo9?e*%y!jx@;cDWX;o1 z**!B$ehBQQ+@3|>X1!M4zJoRI1UDjdY&SH_Ka7qo&cH`4I1-mE**^u@0r>&kkgVPl zZvC2U%?YiQ>QVHH=#?F|5xEq1$goveC}b^&necI>Me962a3TN(K-CO zbtu+WE#YUo1x$G$-d#1p>@}X*FqI@p9ap);@J=Xvg^_(8ka%qA~^rTd~1(0 z&V=%1tEBd}LE9>+Up$3im4}{H{W?ycL?;S(3LW_4o6epGpBkyFmAbU&_2d_M4?}+{ z(4p=&=RN_Iv-s`ww!;s&z3)NSBl?YKcx@s6^rPf!rk{~IiqW_3ppJZG_@l?~-}g&< zB6P*_DKhB4H^HQJpF;9EL#JCQl;r>-FH zk}pJiz5)Lv+LOWBGT<3CPRW3CpgSo%izrt&bs~60jDe{AF`D+=OC8eNs1DJd&rz@7 zX3aq&bFjJ59>psx1#Y4{M+=L*nfNQ)&v={R-PHf}&>i`DG2cmd@b2SXF?&?!Wjq=5 z(au~Pz^{E|F6L=oiIZF#+doVCdG)1J{3`URl^73-aTC!$IH0jB;NL_mwvlHzt*As7 z;6nc(KK!fyhWY`4%YXiwJq^RurG(k+WN$?n=pi>6zN zCf(9rApPsPd}jA%IJ^hov)+!4@%|WJoTt%WSh@?NO?=in&>P>AC0^Ynd}s=|h9C8- z_+5HAcgCi|pTL*NN2c_y$vk&2jceL5&E26G3K_)ElI~_RG)MZ3mdlJXljpf9uE_;n zhU~7Y#n8n`9BdyAzJn`@YqX5GM(~s-zte)=;aBtUT?46}RQ>)j5I zbt0Q{KXe7JvJOIaO*$Ek z#r+?HdYLrF_kGR{PO-mLI&6VV)<}@?ZJ>{7QR6$sI*cCQM00%kZTZNr(iiTC!PhCY zYjr&PbJX}WuBRB6#<_=IjZI@zf46cbCEny##wJ-}$q3`iWS%sp8hDxI;qmEz^!T=g z$9E!juJ;%^^?ugA_LRuGVm8R$RQhkF*EtgRXlzT-O`d=r95;1DHd9AbNIvBO7JE`h z$kPpO97jJ;h(G>X{#!aC_EB&&=R8YCR0}Q0F(Nu5+klSfR?!^x)m-CTIwISEj!1sa za)|4-$dO{|i0Wyhl>Ti3zW2~S>4v1cjRS6vIg;y%jf{LVm_MF%A(|}xPXhWCjZ^v` zc>c0`O#P4cj7e=L=Q=iTV$Y2(8^_orM@RI-y{ym3+1zR9^*fXkza3U=adbUqJTF(o7;^qKumF#8QIn*FwPJ<{)_LbKA^ zh@l(oXB?0JE%ZF+Ip}&^f$i*%=Ghf%N3^RJTe`BxnMmhP&b{z=cMzNh^z z1KR+`Hq}6mM+b=BO6l5D4Q%St&<~wpe>jQG2>CR)a4K<%-5?fWJi6Qkgm7zM|~FdP#< zR=*@T)Z|m(isRP||El#p1Rp4u&mQ(T>r$}uU6|bD z`WV>lK^Kt*+%D&l9Sf9P4^@K_1U&{wO!hApDQngs2z*7mu|9xZVshDv02ezp$FI+MWPC$=)$w4Tr?zhV3Lj4f!7d3}V9_F2+ZS{?`g-$!+U+-#v~!@zdC;+iji~*BPE~@=g46^Zkam4qM0P>_)IM z3~$L8mEbGJ`o@mSZh^S||Mw46dfW zPS&Z~&A|`(J-~yz6P{wq=71x+4dRrupUmNXH+dv`s~r-l(Sa+(EGLYGlXnO z|H^v8aGMF*oRn@C@GgHOZzI$>B}$MRka45IrgIB49G zM~AP8h%Q)T(Osy}df75KkyBjAD{;sz@!&iD8rO{FZi@qZ0nVf!AQnZQ5j?iP#LF4n zUeRFRWPahb`}5FY!oR)oQSqkcWMX-fZ&9&Pw*P(Gk%vl+vQtZq=hhHoycu4;?kB_o z4%X}={@Wvy*e~|q^SL9Yj(eQq2^dG5iWN+njkNVV!kYu^-v^=n(mfuGF*ZvcvG57L zi|@KOvtyL8nagHvRfD?I_P8IxP0h~zWvIP-o7P?{fqZh zb5FD6ugY7-G`Z-@2D_^?*X)O@s%x6$s`M(!2J4a6yw$7PYvAJ@=J)uj-V4yHv(|QU zUb^+trp5#4ua3h5NnYt@ufGL4(lXhYp*VHh*Dvi4fD`WQ{#mSD^ew4XcFwOgzTk4= zAB4wv0DYCl$KJN&XqC~SF$k_MU^R{YZM4Ug-a!9;&3JQebbDJJ>HU)9WCOK}eObED z8_AzjK6E_TAy&zDPI2hg!$*6otFcX*RJC5R3Ey;HC!R(zc6Tur>B%LNY;i2@zewW% z&m-gLMo%spp*ADmq$d}CMBe+L5l5@s9ThSC`7gjzRhIv>$w~h?H!sbgFWcxthQn$0 zskM4ddj@@R@U59|>eEs7&Pv80c`0JQw~jR#=~JxwbUHM%2K~wKHrLS?>3EbD>Bj+h z2;tB|_Oa6!i*HL0+_b=G?|_H!0e@)s1Rrq!wf*~z+gO{Yxc8-enj6_1)WWAp?zoO8 z8ds(<7hZ5jcy8jcaEEgg$fAF<>d_!}WcV=LKK!v$s&M z_Q>vkS!XF3Hu&AQ;9aRtdcN!rLS^i!De9lnMuhKQffZw^0e6>yyOzCUQdHjLwyofq z_DHSAF7QD*N%8EguR!-oXcRcxIs$!L&UzDlvvB8bleTq12MQUZa3?v69!v&zf>E?> zYItmeeNJn0ql*y@h^7kx_QG|ny#&^p`jiBoMEYiYMLI)+I5?8p$zb39r%z8ykB4~ zJ_UY>chDL?hK~q0xM7`p%iefm+kWf>PG-YXkfz_i{yKKntN8Tt#o>!@QL1MH{vP70 z?^>PfimT7GW&BxlXX-1Nt3>9?5BY&xjA@F47Ji03-;0m=T=sR*$PIiK zjeN>x?e$e*I=L0!YMS>1n}rwf@f~fne_;JwuMK@tFZG{Ua}|8imGDQ4;gd??mlmPd z#Sij&ho`||B=&~$+TocGenS4Wqm0CZQTZL8B}Ty~l=(6JNk-nVh3S%IQ<&7Y8lM#D z)5V)$%k_Ei5BWo}{T2d`^MD6@P{BfY({W+gJU#nzuf8^zuujWPRbhL_{G0Th*po^>z`mpN_3C(|1AX(} z=E=r0EztKhw#54CSa^z(v)dKzP`eJ+-z(K>SA0cWRTupzhgT|LE%brY={!37Dh99F zpPxJC7~j?nT(s5_!R>D7r1~HndV2PK-VdP7EvG{H(&Z>N=I}I~$yU-YXBKwhqM`6s z!gaN|oc$$8dzKzK4LvevVbJC1Hcu2YXB{;Er+m*~ z&JDM0M2x-T&lK&`I{557x0gFZ!41Tp=AOfm9PCmiqF;WWadpAp#W1!HrW(N`U3PEp z12(Udu^ofQ{$H$9<*9^MuW=YhPJstkWub$|-mVIIQF*FnpX{wyovL~Em9Z1+XHr)h z{^@k?vJv>Ufg|!C9Jt!uJ_`9}VN#6A%U)=A7rbJ?&$0$D0&k`o*s_rJ8F1-$#@N#B zeBaLZX4}}RUhropy6WZZF^5*KY!9riGWWUz3GgV=9YV*&XR3bHtGYU9_n*+zoD{1) zvn}fTOUjR$;;*K&_28`5fzF^(W3@l0bDr1OxGZS#+Tt0m0WZ6WN&5kG z_DEMTcv9pQ-n>n0*PpCk3Ottf_rq6Z_J?NO2_8nz)i&xEU+JLzINHhM;XSbIl8ud9 z;d=s48Qy;EQX>4xEZTCz_ng%h{fo5qQNA14qvxQ#|E4{`|E&DO`lhv`_+Icz0S9yQ z9RK~>EM3-FWk+ah`O#3>la$d|da-#B%q(4NI{RW{<5qMR!VBR?^myzV4?2zyUYP40 z0=uX-Dh`EaT}j=la}skPIY4z5qdO6vMd}s)2~IY21BX8cAAr8)ta_uyok_jtY0U7q zTFcvdLS<@4@8>GFyxX#=7$|ozR5tD7%0-Q1d9N9BDFxYo-+1ste9)cH5$^TwZH_aJ z9EFCAMZT1MRv-Pj6Py*T;yiDkeDI}zl|2etr8T9sE&5TZJX%wnv5w&!?6hPsY<3bP zC$p|}SJ{8E=2_{Cl2})=mstt@UumwZ1an<20$vNJjBgqPjrl76C4XugQ~Dru=0UA7 zzW)c`x7)@Ip*;@vBdsm{j)A^xtX|n(Pb_!w+S%Z;@Ib%9Y3pn~4jC1khNd54&Yppv ze;R#31g~qEFP#$#E>9zOKZ7iji9OSA1y^iF*F$q&;=ke)%7!#L-!^E^{~}HMF!%cQ z*4WW0Wpi(pJtYTz2OV3-CE`^?ukB8qt$T6|8i`nR=3&`L2ES05k}FQAZ6ibnMU` zooC$6`*-N0_WkwHmRFEVUcK>llb(u~6s_q(wh&*NhJLaTxnT12#~Kefp#jj*pXFcf zmHnOI^)KLOqy6c?4BI{vW{077Kg++*`!C=V(As~`vQ^L?e3*5i@+v3Z0ey?PH|ty@ zNWXN3k})1#6!@e)+R2`#z0mn=U1Jh^;VM^BeKqjN$+uu(!XZ3X2Yokr8Rq8|_=s1l ztud8^d5LVkYfR~kshlwtGbTH{gt<-`gPk!%_z2DE;gb1Yjl&Hep}joP*XXq6^ih0+ zY-T3E6Ds?0_`jKF7#t?Q9V+_)>B7g!?33r>OJ#2$Ua*ukwir0CM1HJ-ClgMuZw=FF z#w-1ih0{sk^rPlkNCc06e}#1x(uvHG4}ZQBnPb~eh}%UUKFO9!_Uu7)%DUsKvkLjf z=^k%zhB>OOlY3aQ4V?*pnaJb2(CwW>ALD^p>(}E={(UyQ>k4o&!oT~`*bd8}2p^s7Varq8NRGLF`C!vtf3sk>u*%i!ZquqG8J>XYnP1dlFoNA$3Or_d3{ z`rg=BjDG!4EH)V2orvJj72pu|Kr;?=M?$s)y6ZZeH?ZUTPrQNJkCeNZa);@QfiAuR zJ0Ftu1vgx`o)|Fy57USh4*rFCE3tD@#+!2LVSYm>&kZ90gwJO{_6}!dKL9! z8+Wn1;dH%=C$rW%_C)Y2TS?o`zNdD3DgQ&>Kgr(uJ=T@h(QE@-5uVBm+}?E7h!gx? zS>-f!+n2G9T#k5i9nAo*S2#_%FxYqBn)b!HZtt9`)}}gmWv9bg>SUixCJ(xyDm(b_ zJb0(NT30&FeZ!?agY-gMVpWOkvEenhg0D^Blg>e(4d<^sD}QoTG5I*(=|Jb!k!8~D z#Hz{Q{<+%dVoj@!UTh`(_ub9@l2BF5|C!YD0`*L1%$vh&qjIISp5wHC(Z>zEYdyy^ z_K2)+2md!wZU*Ju@DfYthi_>$_br@N+o0_k)W075dX#zFX^%svzOiv~95}th<~^KZ z1OpEk-nHzb-N>;I%wdlogAHO{k@vmn#ohpATEI2occibo7~i9`r!i;J-Yj$6?2W+I zf8VP1L-$#-Y6*M3_Dl!grO&h4Po#b9qsJ&N)>vpLF%XzrwO>iwwPR@;ne#o?h?~2v z!Yz~j0kcEY9k?${>lv56OAc2blIR0GMb&a}O!8?t@=Nh0n;Ms~1|#FtonE!o!#J-} zTdZH5xoMm&=o0s+EzLdj#h}h}*?emsD(8%F8*As=l#6b29c`ZGzD>{^-x%{Qairag zX}5*7uXbhQ`JUDz`w(Yqw2e%`defK%f7KtE1L8ZCA+x5OwXeZn_p^@8SQ3m)vQ9E> zbhFl_k2(x*tGoECDeK!IpK{KC**3aSJG$fVhTB~SJS+Gw+wS#z*WRstv4rupnt3K! z<-&coGq+{o{E_+Ude_>=RW8zYNdOxI@`cvR)-MJf(>}J1> z=*#LpMqlRr7=2kbb1r<^#u?VD?8z2yaTmOWwJ)yY{OAqv{ce0*{0Ln86Z(-C;ZeIC z<4j$;?i{>|jywQwQhkYZ2nhjng617%^-H=c?ftu{dkp^*p+DF&1W(nB$~{l(6`bt5 zt;pMt{arfk_%05gom^~mWMmn48s}Rwa}N3}>7j$jA*|~_CN?^kJKPt0iwc0ydk9C~&YUz}c(J4LF zxE0>>6mUHXUUvg4>9TH}ZQ1yKaB&H?(TQz>-}~skq`>p0| zGJDCPSsr$v{sLz^Uj3vZSIN8yg}W)66x`%(J7i?YkZX}W{L`NL+7Et1v+ zZhbkNuDhCL=$W-P&a7F)dRoZ3TEP06&pMljE_W`vT+2pRcq4p~P02C(AY3nj*U|W` zdn?J@TY+zgI%kPt+~WJ+2dBHCQEtlZ*Zl?7d2jf?(n@%j9Vk9(S>ra^w$ld(eTkt@ z_>)}2U4->EkM2iC^UX(D>%DwC%^0-bPXdqs1RjfTp2(bwZ?5AGlJws2$OVgr@y!L~ z@qW$Xn-^<8WS@`hi35By_J-5ee4{-t%->Gt{qOLIN#MHLTFoOKwDoJ&K3f{zXI*z# z`>a_nYw0Y$8JaYWI`h`7w)ke=#e4pR=WgnJ>|^SEg?6-mpH**!Z-x$8_1csw`qXGVRWWIJjaGnRe=K}W<;9tz#7kQZv(?&;g9pP&&eNP;^nT^)o=m6i*OKX49 z-toZY?2GJAhvO~2R(s-bzIL*7UU@_L+G@&)ugzrN5nmh8GmXU8s(eIFR+}p++s(ZP z$;aYre?yxQ-gFyl=4IZ?ZIc2H)~(XeA(-}CUFZcndH2CPWRR{t_p%OO;G6hW=`zLF z>c8ri?-{Mxk^R0|_D?6kLmzfLoS&iBwwbzFC-uvY(`oLt&bH*WqWjhzie=YC>}+Hg zn$&g+@15}I%5T<#osFIK+b^CUJE^SIeG!?csaE{FFz?4(+;lWM_EYAtp()A_a& z8>x0|o{nMDQi%-C`5k-m;6n9ch8>x$Ck7OgI`)X&$!#zfsW2ZQWGwr!X&NDo| z^KQkC8Wwl^BYaB?kDvNm`dopX(t_KD#!sy%a3r2Hj%LYc_UbO=G4_A)XY!k?_?gZz z#?#ITCFZzgi{aZg){_Qp4DG~EGybrH_|RVDba!k3-xMM_$yobv>N>42l&6*mO3rK z9`qo2Ut=B*gF9+R?d|?sv0{|X5FXfohk5t;&F1vo2b=}_m$?ddFDWfhEbJcmR^|B| zd0Hu_e9j5vTM}2W&~e!`r_DHj_lLO!oyGB-A11YxelOI}8pqzxe$&o)(($)zd%%q! z!^E~Xeqd}~nvI_o&MqQ$UVm>eHt+sQk$Io?FTnqmKNlAu%gnG%uG1)EoR*B#&P*yeQhTF-qYoRi^WJcHO_6<@jS=I)nmXO7m73f{H+)1DRU zKkW%MjKU{6ao-*u73^=q_i*n;1t->x@i^E&Ub?Te)VeRQp| zjq}mRZg6(y_1N7(Kj0spAE{3Bj?4&k3fG*R1zR1L<~_KKGqv8yj88UplF{AZ{x13xfoU)6tcLNu@-gERe`<{nzg^TJTvI%HWPYzd z-W^msmB3VILW(iJ0vNvnoN}DdD*7e5rT&QKDPIlnR@-I&MLcM5qheRQ=V$OV73<>^rY%5hheBt}*OFCNF6I|noxs8uj>C;>6H}_w88Gd3veCsl& ztv`SB0o#!Z#(y_=)mp#)kIS5)P`=U@;&ZK%Z^qiC{ciFrUk|*F>eZQNg}EjZ+MXb- z2fzH9r;BK}l`|lGgAa)@F8GE@K66eV>BZDD(w^dkK4F_VL%y42OZ(_JXT5x|DqD?cQ!5vsgUsstFIK$3dssu(Oc;sSo*c+SsCk@*>-vMaJ;5^%&Qb_ZWA+I{DScQd`Q#xlg!Ht_9cM zqF)-Ha<0(hrjOR?c$ zU9i55e9?3#xg`cwBxrMoO(dS(HSh&GDT%Hx6<7wGZ}|)-uW$ef6|nA&+7lw-f6|ywd1S)SKVhC{snw1 zDEEFozJtew+hKn=XFI^`Tr$GONB3K0OW4UAruP_oU2DJ8zk)s5 z_1ABBr$Gnz(7%iW9_H}I%mU7S z;HWlp9OL%N_OBNCR{Qeff`frT~3WNbx%17l2%9^;>^F+MfI7^P3q7Vn+kox5&gaXg*zXjb zm9HH?bfq+$?|-xR?(tCgUMewwW!0T=>T#<0|>Nqp2+!%%e3n;xbo~N9NC(FZR`&I=p$;BG%rNjJ@VG z|9cG%>DOWS0K&E_d)PwUfIvcQ!`FmF7K%~Ggr#~`U{5(ysz{}7pyPQ7cnLppX$dfguwAC2fu)tp^o5BT{u+Ag-r{osmTFuRib<05nA43M0) zlK-rM4jTIT+($RM#pj0H_iFhQgyL#sw^X+m&r^caLMH{xZA3)>Z znQ`Ro_le8hJV|#!(-z`g#H9`!b<5a|wKu0@Gq8?by(QCtm(e@vx99Wk_tpP$4t)sh z^+WvM=AMtQCizAmuV;KozWO|0{ZIMkpW_OU>-(Btm}5^B*&reEJtF@dA{pC7nvQ*X z;N64q=%IqD*f*-oTQxAB>GFL z|NRB%8tLv$hK7%MVBH@jkLWfs7COKqwF$r1@BU>ze1l4SbI-Mb_DWv~jAG&jJh`6r zgskI^@~*^w-jvAT-fR*4Ewp%(cPCnScfweQuO;r_l<4ETmsUO1(EShXx$-ZNmoZax zQEh|Bre@k};NMN&uaSOm0l(CSkA7`zog4UjXrsnHs;oO-nGGF92Bxt006h%oG4K|J zpB8dv>k42*(}_$U+QB>%`i{f60Pl=fyfeO^cYN#xIk3r(M|Me{b&3AO+jnj74sSIa z`V8}s#J8c_xS`nt&`f-PCxp;#KW{`F<&B6Xyb-aVHzItTyJ^RlmC&z+HzhXmro>CU zDY20^B?jm?4m#F1l%N0P;qq1AJ3j6HeaFks51}V}0S?}l7z_SF&$pmiC_S5yS3=M4 z{=-*4@Ww={ddp!3{O{-O@dth!#M?gfxjzJWTh;@+s_|b(-j%Pzi-h~AXXPT^t03IN z|MB0_;cwmJ=2zCcB9}j;PWOmg<{Nh2ZQzbZCj6!`uWsA;u=i+rU%j@0|4A#sL;ML&=-HawXgmo;7A{YzHbz) zEA9o~ST^zxu!naC*}^7r_PUY#iXzL!PA2+-V_0ENQoy%c9%o_2Mj*c3#^Bq{dyC&k zy~tS32UUEIZ|k{xw3fR^KVk0ZVSmR9zerlU7RZz&aB0X!#Duk7_@A(QHvUmgJOaq!xOe3v$y1gGf%+{S_1QQGgv&Ad-J zv72@73ZYeSzdA$uRcNq^J}g8YbU-ud$FG|-qtDvtFByljw=H*j`Zz-wX-{Aj_(;E2 zGcMMG^OwO>^n5?Qa>r86%0w<7ZJspW3ai0}-bFtx;eCW$eAkyi!)4hnuh96uXR8}T ze;3=v`}BD`XH|qxg4;hgsQu}bDZz$+9e3Kt!Na0A%J>vN^`C5q?>j^{96$$RH+WT?EO(lMfP;ZZyD z>||yj&i{2}@Q(Cly!+9~n+)eyF6`Klw4-A~@($kt(*J|>yDAq>a}M=P@8lg$-o)^o zulG*>j>FSPKl*;<$n#yT_jY`ba{pSn&{bvI;i|Il$gHYckXf~f@F?Li!q*ls)_0W3 z`ysqn0Zmq9r(C9N+FQT;aeXIx4t(h;ovv;5jTo9I{2}q1h?fn~JT*3#H%T*?uP%MO zoc0}{eaW=1oc1kUm;8(q{nHD~Ba{&uzC$^YMtF0#59PG0XJ0$xGG6#3zKcIh4VZ(%; z`6)v^vX(Ixx_bAL=6q@@wrkz<26KsNdluVkrr34_hvhE#VGL(T@?Vu-v2l+aEpMZe zcJn}5C+9oWw}OFhD$e*f%i}9&HtUL=8Q9y=xEFR&XUCYzg&*--|GLfNyN)+~c#~K0 z$A;Pk4X+!X+V43$8FM&4J&AW>F5!2IyseV7L#s;Ok+EOv&GWcxP67 zYnJDAaH>K_U-~QM1A2C+;7XrXkI=SiQ?xnn|A*$?`l9A}lJCz+F zeaD_G+Q;_{zL)Te|7ZL?Be$K*@w73YZ~1)l`_>iTw44n)34795QI2|_H zZnN*`$l`4mzO~U-@y%h=+w$7r0qti~ReSf+-r*tby_EJ=PSWN{dmasL&l-Pw|rozgzoi9Jn`)^W%O?Fz)OA zm*D=9ANNk^zB2^(uHo9&wo!iEFAT=L>VFCD6VLDGnW_-nTfw~}*^j#xjQdyqm*76% zkNZmKUL1n^MsU}L_;Ei}5;T5m|Ciui2ty}HjRCtAo+gx`JTUR zK)&B9@=JUw`u7(_o{QewY1cikFB(YOfo$)*@N`+O#y|APVm|Y&Pw-mm_yKhQU*?dA zb!-?&<38ZlaPx)S2W8*q`6q^7mM3uj^}dhmZ7ILX>%bS8_!|GmFKO>mZFU-KoY1+Y zaEW2J@RakG4myPv~6r6PxFCXk_BDk~+fC zz|w|^IT72SU~U;=fq8j6Fq>n*Tx*3nFCLhGiwslIdY=)I*6RmD>zTO`>7we}?uc05 z&j(ZA?#T72e!!n%aNicD#nHDZ)1tQ_D>8p>1is>v82VTBRL*OPj&~b%1;(bFEqH_S z)t?EZwM|oUT=;V4SiTnrzR#TA|2~Lh~CyH zV{yTl_}Xfz?`JXUdpI&}Zyq;@`reORpW+YfIl=s~iZ;a@`H(FuNj|b*$7e>ePF)Pf;@xXj7GE6nTw?yv0%g((24mz)z8NCfpTHA0u!u%D) zg9n|jMTV)`&=9!|cMqlwH(eC54StyzxxRygsqeKI^?fsPeY{m3m;AEO(>*I<`_y>& zuC*HOqIO}V+ao!%h z_w&ns#~j%=)Yu2eaA*}ezctAJVd!w~rpn^3G5Q+p;s=e+^F2qWE%tU{hdKKFp?=+N z9eae#8M~6nOBrDLb*_H8MQ(@ckLRrYF(pEOSo-+3*%4`@c=*SWdFrQe(nr-dX}~5? zi5;zG>LPWvwhj7N@cCP0d{i6vS=;ylIK+_a2c755iP%O3GYfq@NKgEaQE|X@$AFn@ zg}E;tm^&lGRDCg*{Rh#X=M2zNbO_n|_{)UA_|6+hi%FNR81>Bv)OVUaiaQSH3__l) zm>aPlRQn!|T;JaZQ{TZD_05W0-;{~*wGV%n)-gJMUc@#EuXQf6^2LG(eDOrJ&EMDN z*sqBUH#3UN+!ALyx)v4i#;o!Ondj!szHH=HD3^etbUhRqTJrR_9y53+UdmM+SDAaee0pBZjIR|NKm5TxUHD}5{YZxo z_+DbmbHDXyar>P=yr*k})>^~;oUUYjje(t~YmY6j{hyB>uDRd`$7^Q%`gqNbw~lvx z79X=&HjmiQZBE_8ou@pRufND1dDotOc}7Cd+)2#y3O052!=_FJ`;VF*HuatcdlB~Y z$+kR&FLn3h`;69Vsq;(zI+q9PbZflB`dR$RT)?@Cl6`qa3A_{S+nd)8F0yB+_%L+t z{ce$(dz;t~wD9TQBKMQX=PKGB-2Y=E^uO#|cPw)CemEVRz(wK6Ud2h`mijV39i4~2 zYOU`Ww&igAW`XIB0dtEL<{Qi_F=4(N1Lk%s%!Bd3ylO%8wm)Zu`CL40|8@+RKeodB zMm#XPV!#ZsvDL%_bMeCHZT~kbo?nUpvwEM+FYg}$UO?W9tWP)PhADsFja;A7JN`W& zufpg#*PF0zj$5zV>2!Z4di!3ow(rvs+V{k7EOKjQ@-CW9tr0U z(ItX#{uB25a5!6J%6R+|-ioa~63mlUn7@hu^IBb-+bnYxbCi=gs_R4a5$-l05DB#F9Mh(^W_wBe5oZCiztRp~KnI;F$>c#!Z7>-|Ws18hkYZ z%r?$13ErpamTl2#Ty>Z+6Vz{Ci~wKZeA;}sEc{wCl{;in`nBqaway#G#vF#f!`3%%&JHS6N~HY*-v^?}-8P3s#ul ziw7n$I4bY2w!+*J56sFKFu!Ppxh?|C>U)Ox=k;FzFCcSe-uX|{24M2U$b^V(P<(&? zkcfOgKHQubiwCcdT%Y1=*~bgwmrEkF&mvcU5Cdkl6=oiGo|t3hPcdN19%c}p=fs0& zQDQ_|tF}LBg*hZ1m=8sU*}>f|MW-O0OvQuFHq}h!o&$4Ym(mN2B$a-RI%9Rv+2Qnv zZA^3(7j})tsR27pXnE$@u;r&;&|jW2YqMp}sxe^_V z!tiOeZK!|Dqzt>F3Tp?K)II5~RTXIWw19uLgdV!$l4!Yqyr zbEh*qB|4tPR+tOpfw?&b%vDyH+3~>ChDFEo9xKc-@xZ)12Fz+J%#S(K5@Q_gbiNz| z<`yf=qw&DZ9v;2z+pREv7Z1#)7%-o+!rT)NOk+euJeB^tFY+0%kCVdeZ-tSOK{9Kn z^YX~`mCAX$!`3$ZIhrg8lJ!j@>(4Fdt<5Uvl{<88);-~!&PQU@d&FArUFmV~O?zaR zigtyO&$G3g^TW*p^HoefO&uAruN3Vrb$Ehh9(Oro^Zr#aU@o=7bjAa7Yh;*;E>}jT z%gb@nrMlQ5y6R5nyVMn=2MjUSWd3<~r*p-q=rmhpZQt?O>+_w?Z%2ly+ILUn_FZV= zcP9P*TIBlT(qr#m5=oEU>6|#Te~qH_*!vx)>#`{%y(13Zek2CWhpjMI#{=`V7%;1?Ft3RR=JYYq+x}H6OjkTGx5j|^m=)%|<&i~+O33iB-^4qmz{EqdF(X@&V8@xW}03{%OGDCePh zFD`zYt0@bOu@T##>icd$ZiP7;^f^;52cHd!HP_z}qrQ!i)>g83UO%p2l?xibdL!&aD!DO#Vfw&CNUakOFfgy?iRWrcY(9++Q`0rO)k%-_WWGc`RTo{HACq{!#? zOj@7G*nB8*eTvpuNuFR`@>}t=;pG@Gr&?h?77xtqb0X43;hAfNxh5W%n5 zb&%cIJR`Z!r1hEb_1|LDH!E^|@19A0lLpojYCW`+_0W}J*F&3~#>D7!Ews{g`kCyN zPQ5Ce&26*up%``AtTwk1(dzWiX`7utive@HwH>D-*zwdFHHdzd%652V_;z%}s?)mv z`sdjD7@M6FCq?g%=dA76GbE<$+w5Er8K&a%A4lf9QD-v$hSF{7)#2%OePn#pSl(yt z|F1=$Tl0n+diHZaDhHptIlA9Rg`D}|{(My5mm4PI(`18osP931JTAh&S%RxIhk(7H zd|MHIW-?uQIRuG2C4L@pf=a)P_yXbtl`e4?ae_*}l=x!e1eGrFBH{#GSY2wikbQ1b2W2 z+B|6B)+}m-ttR6IA-e z5-%iP=uek;E$IZ6K8yIH#0jdr#6Rz9O(v-Hi)h0(-J=ukf7Mqn{n#;+HdFzVAbD2K zq|A3HLy&mqg~YwY2`XLU&yt^@(q|CgMVz40CEiM$pwcfO{&V64l`iqU#0e^WI`Ll; zC#ZCZ?C=e+nm9qFOZ*UVf=cHd_Dp<_=mcA!UtGDgX-PqAGC}g}%^^=gqDLoO zZ1Qi8Hal}0-v}26Xi=HXH+_glC(JPEb3U+EUYeIoQ1zcr{hudJQ1wqGUPhdt(j|T; zae_*pLcD@FL8VLlKH>zGejf2P#0e^0;@iNTpwcH3A9h(@GC`$F+(UkXO3#u$1!n@j zjnvq5Wr4F!8G_{5>!eH~ae~APB<>(iQ0bY(M-eBebmE!#Y|#mC{oc&^cNB4gN|$&Vae_)8PkaJ# zf=ZY8B;o{>K92Yl;slj0@f_j=mCjo$xlZB)l`iqU;73sDywO7Y=!*dV63=wPuLREW zDVnsULRn=84?i4#=1#BTsE zf=VAnyn=LsN|*S3#0e^$w^?%65htj0iB}OPsPqxUze1d#(k1>Vae_+6=Vh*kI6cgE&E@bIv|@8*ze4C!UF)Af53208J{Bp-C-y2$Dy~H(c(1 z;sl9TB@zEMae_*h_#xs1m5vXi+%?dIpwcCNjC6uZ=R9TZ2gC^~UE;mO2`Zhll(~Jx z2`XLURlp;t^dZE*LY$z|CGK3x{IJwtK9Tqo;slj0@f_j=RX&0E1;hy|UE&uJC#ZBg zG$}|zjwN}1g`ZX>zd2K%sWZk1_=fV2f4=2>_Ugdd1ssCJ3ncz5ae_+MfV<6( z46&m>@fODh&GWCE4b?cWV#ZJRN2}vE7Jt&>lgQyxe&5ldEq3A>xo38bt9P5+WuM4- zU3@{PbH9PJFwCQQDi7t&GzEjRt>$?e3yh{I78q{uiwFb1x?y2FZ-r444F>*2Lt(Jz zu-Vx*$%03FG#L0T4TYiT@xB$0u4piv5n!ZDwcsIlP9oA{N(2})tuV5q!Qehj*tV>& zwxuXqThb!H=&-_Q4!{up6ThdzfACss4go*AOC2Ne)ketSeR<-!a)!(Fn;h!vEcJ31 zXVOy-%;9am96jv-`mOT)=jU&$f3B^o@Igvw)`tmzL!3`wV{RolBWP)er|pKJfqJy+kvmpjjPM% zPji>eSK|-AvpI?W_dg2o0VwYi?c?o}-)8c@&4<3(uM*FM9-+R(;%VnLWRSF_4Ow$0 zZRw=#vD(tier;%5oXDu)wutY%Rru}uI&ERiIj1Jw)#@e48bjn#O*KAX2@)5%v~|1R z2dv;+g@4{~?GD#(p|lvY!sTtYjX5w|^?9o5^G@Ke=j{UUSmC$l;LjSFDYlX?aW8Ev zLDI!Pc_^*J)fo;qc(fND?S&^_a%94z1mV#f{5bLrpSs#HlTX9>W2bx`v~6JNcxM>J0|Y14K5JCjyP zS~q7)?|5`kkL0a-wocJi##FBE%9{ocsJWWA3x%f61nd=w+--q3%`x>W@Nu2+&8hun zL0&enH_qVx3$O}-?dF^GVL9Ifz8kq!g73t=yir|KsC%Z7CU5)saj9u2AAX6#W974t zH;4m zx$Pe(r}L=1vl#leqUrxo#*wL4;Kz3M9^V4*o(+?Fsp@ppUg%kb}b|Bvw3}E z@Uh2v0HYw;V-(<1JsE$+}xIz56zM;aCu$OEt|2sl3)DKdgmc8@DnNTr1u(kia&NK8}PFToPXZ) zaeec9f1c0X$$7!=+?j9NNjn&`hXtSWLSStJUlZ1@KR$%bWlYPTR()JA^_hM|#h=!E z@O1M2NpmLczg1h)F;pM${h-?7$0uCMx_WoQ*|Etv90gePA)6rZne}`v-GjzX71bu!z!gN6PCt* z6PCAmWAT!WEvLaUhG=ud=dPa*t@C|G6@K7leA>p;!%`}X%h5}g>dCpQ89N(kUy*Js-ALLJzKf0` zW81y4wm7@JS(VSB{X&l_^zRz-8EN`*@m*?Ym7|S0H8+|x5Z)9&_T1Ov4GpcO%GQ?G zB>o?NH_5ZzR$07{{@i2JD%yuMrZWzYIT9Pwo0IS#>KX4z23|=Ye*AeeRdkko+WURx zKY6=g>k#2xN6VM!M;CGiob#5gNgrLFs4u^KFaG&+&|z|JTr2PKxyZi^+N{4puc$$; zzWjl2MHdN&c~mwpGaLDykjSK?e2q)!M4}EW5h4A){;L`J5~ulv#knuXvw$2pZ10$<*Ma3EW=60}1l;L%2W4DjVGkR$kE72IS_ zdud8-@k_t{!9CM}b-6xfmb_`?f~q66ljjohn}< z`TDeD?XrHPT#>WA!FU7xqTs46+4wOQ-?w6`T>`)8#AWOX+z#LfTp5c3cN_H>%q!A< ze;K1?0cBR=4_nF@j9n?W55lLv^5ia3`2NI>Ni?)fRztF z`sW4zx>EG`;5B6?_y@=HQ?z69c7x$ShVyQtS}QF$SHYBfoDTX&^1MhJEO|=(dG;|L zknxAx-{!t&g7$1^-9o?M`ht0~GdWP-&-}0osZU_-0@hJ^$C3FnX=Y7Brk+%%>q!TK z;e@{T5O!_MoDz2Z)Ft)#*Tyb%QQm(Dzb+0sgN4i+SX+vNH8L;;m)(d zD1uHAVT8e>Gl_Na{5fZZhk}u5g<;|$w0=LZ4vbfCaDBphLm~67%*iK9t$IV!U8m~} zMd%Bn^NSAHb-w1GpM&%TbP;t2T6B}nyZt(YJBrTW4(J=Ej$oEI^@PLV6sjlKz>78M zp!I~WPYkao#Kr58_-w%$@L8ns62$L=8ZRNdo*j+X!{89g>t~8XI6gOV2wSqvWc8{SUpdBM!(j z3k)@f`tu2GIM)%re2}hZDX->Ge|eKO{W#n_a1O1Lw-Qvjf`Mi7dRl!8 z!ubhVr;ENdXCR*mZy+sv8wc!N{cj>Jl6Kl?CvAK%!Q02Zl_2}dfW6D+`nmRP z<3cUlIo!~uo-2KrV7S(UrYoPwR;;F}LOBf}#teACud!VkiK)bTFrhCq`KH-6Ab%p8; z(*9_D6^?ez*y1A5EmW_Oe5_ylX%_q*5q>WH3{Ra2*^w|kEKrbn6fi?%$G7Y zU$<-Ugnvv++Cv}t>cfpew~UL0YX2Gkty%r>q--cmY@HqxuB6=_4@~&Q#4#+)BrD9~ zcwo*RXm?nc!>uqE#sl+?7%;^Tet@RZ_Us5SL*@2atxtmV440p0&cx<%&>5uXln(H8 zco;$Rs2aoOJlZt5U$zL0BHGUQ3lAe`9yDQuj;)w@1kJ@37(sf5NsC||g`d za8tMk$y;}bu41-LjpLBDS5z8@>8EfsjtV18KPf!kvi5aA2IhTYbXLA%;?TRp)A?ZB zYs2ts+?~#Z^DMkv6OEU{z<|yNL+y7_VT3=Uv(uSs#lsy9kMMo9(>c=$!|baw;5+;r zTKl^wfX64)-#h)jf&Lr%I}(jT+BYZ~h4ht4qp)<2)ZZa|=Z@CjVPJ&gI}^qy)>rY$ zz(};d=@a0`O;#9#qV>74`RW9q4qWXEe(7*TmT3?6Fi2kE+l!6TgfF!2bUFMJ#HL#J-nIfG>L zp197A%>{eQgNfYx(y=YsTRsT9v#Qpd!~Ex$Pnfo#V0#klO>dUV%lVB1@(%gY_w?t= zu6cs@#~sg%Tjx41Yj5_sdCz%luEg6oYsYzq!#(7CfP53|yv<4a-mD3{JwB!-3%-yu zJm+Ia>R+>_4cL-iWgYrg+qmvZ_Q2h;ejUY|)055h?pXhOt{?kmH&2ywfCg{&E_ys? z-S}tn^|9SuydT_da~#k)(;(-C?xBumV03c!Lcy9aP*1qGzoD6beZJw7J(tzaS(Ui# z|78LD{|#3Azve#O{vUcS>(B|Gvd^n_2JN$oy;b&DYk(c^xvYN+fo1Nw3M_NK^?q;+ z-4FQd?Pqr`%as;>Kj75uKZTEV_}FSY6dsmy24grAnyrsf{P`w)w_5RM(lZ&l-o2eO zHC=owx~B7ZJ-kqE8+V`_p1GfQonNx0)s^T*H~q6h&Xfu-^^$iD<#X0ecxGDf`d2vL z(!IKBV?+0Q+H>D;={TORCwE_eQB6Y+XI3)oMz_J46l3W4I_=oHSH^<>Xn6hioC!4G zd+)=>GdU}@Lncl+>t1>$r*H8qZw_7e+?*UkjR$#mWy^0>XM%Q)fqi))!$seS=dAC zAI|r#uRgb@jQ{U(HYQ&m*S(i>O^-8v+9ciW=PqT^bbaqYYw>O2_uT1-m*tsR`g_ALDVmqDZ@rHC@e zMTUuOe)bdC#MBr)%|4&jvXpy1DlNzs-!CJB_77AZzg=&qvpI5oVk@i)v6ofE(}sgF zV20SsZi)w{!CR}*Xg$_ygS#v?%+1clF<_=!Va|#N=2J0Xo@<3UDIS>ZF<@p{VH)wk zd_M-vsaBZpA=6{<66cq)qxVs+73N>!f$5F`bA}b>{&-++jRA9(73TBtz3WY6ghZbJTPlwz$~=FER6@|%Q0YHYK6Hh0?gp^ z^yWU|S=fU&pXIrr&Cau5M{NECY!Ex+wwH!k%dm#PZ(ZzjhH=q341Csun?vof<{G6& zbow}K=h1Ks3nPrJM&WUGcIo)(9A>?z>Ytcvu}f!vvg_ir=Vxv$o9AT)vmRA#Jo`P4 zi2N9~ufxz<@k@lYb?~}4E}o9WOZN`YY7jPv&CbmO{HV9o}ZN+4(ywjGAaL!k;PJ?0nY>qd6LkFn5s^J?5Wp;otUXFv8qLRxl=7 zVRS`<;T$+mr0C_hc5r1O5mpQG!orB`Ns|LyshW8ZTR9ayaMWa(B-urp=T35=`*a@NDjth!^-=---h+i6;>P8Chi)B-YqOVsq#QQF6kF~-3M*pxq=Y9HV z%f6Akzu09@Q~O3u+?`C5eIxSiCf|g_%HlNAf6Ts-ny2Rs?A?g1w$nbg<^4|w-rWm# z7CC57X%}|fJrVbm()#z5YEIizO6xY*Q)=$tQ%Y@NFKa7vc<+Cn;hxeN?O{NJ7DcO| zJ&b?YpKE`)BjO%L5%+uh_b@2mO8L|FF#e%ESN?_5_b_T?4Ro(6bkANxySPvhpE25^gaPvb}AePIrJ8vo5YtdfDs)^Qf|+EXGM2Ii#f!OUxI zRv03q2CKuKmAuA2ZrU|m(azU~v0o`#zGan@QDtqUxzUg}&IaZrp=ZSNwPEnUrh!d9 zyq!*D&}nwGFnA~!ao9!vNBA{JkCHw*lRG%7k1h}MhMc)x3Yv?cRoFF`g5kEpI2(Tb zAEDvdj``QD?H3;HAFuJ!P@NYWkQ@J>qv5*)G#C^ghUoHxqG8aQMD)Y6qG8mtKk?~r zl_C0@_qE!?wcpr%e1GkqkJr9-)S}1D3ew|vk4NcoyUG6)`ClXdQ{>o; zd@MH1oz5S`*hj}#Snz)8ng}pM?QfCv{S#r%EVVFJHZoRTVytXbW6Q7iuM)p?S8h7o z^@|@KFM8`2$J?*8>i_p~hN*>fKXQht2p>VF{@)6`r-Ane;5`kz&~-rh`A;4$U-iA? z)9&AQy!`xu^}u}m-l#K8tR)P_tG3>FrZd^TReTPOU|q1tTo(XmSKw@u2`_kEFm{`iPg85WW7OZyIW0Ncq~U+3?6Di>dmo!Hf6v3Z zx2(nG^|olT%j)w}0oo3|{t(Cf|RH=VR}()fmduHMq>7Z;RG<=a%P zx2bvq>xu+#>Dab~WwY=Fcdpj!oyGUDl?&H@oakMjOF8OY$r|u+o6Gw|;G9&3Hm19Y zbB|h+e&|=9!p{!zZTN%x66d}motvsnwx^oq-}^Z7nW%5k-tW!ot3M0f_n@<|N^XVtC~-&ATf^v_3dUL5AQg-VHpt+;g0rGiSDbW;)JEgNemcvXE%W}F z==Z9PE-MU~_anlH)L#!+VVM0DOsCW5t55KrewaNK)wZ+K?*o2I2J}jy?-}Z*0Y5MU zFoeGSFk;NHp)&ofcrD&LIAPX-ik9ED_Hk4`iNvpev%(O1M&z}iKCLnL$dOQ(X-!L$A`H9M(uV36hS4GtcBFPT*PU_%oS!#@_w^9dSM3rHY=DME^v_s}x zcR+>(%a8vSIUR{kA!ngX`KDy3vZwgl&sxf2lQL!Oz`PkJo&G1~bQFIsb9h(kgArP* z5%6iD+I5g|QG!39TQ$#VGMhUpVk;Ftuab69&OncF^(wlkv+sU-sJOWve0KnzlnsU{ z_KVJoSq}!_N}7_dVPJkR!qs};5UrIr#V%?>ucvQX_t=c`=23*c6bZ)NX8zcqmtViT@L<`(N47`a2T;yR0yZqQMBSckFcTw!(17=x@`He`bPfM^?w)1-6c= z1zLi(BSX`>E3uK6?$bO~NsGKQKhsxVS^bLzRn@;-u(J9q3redWSy0M5994wx46$?8 zpb)=ww!G3To2P1Hk@q~wpWC*u=J3)U_u-_T(%j00rAH)fnRgOtrG<9jUg1rb|J(V$ z@k;MF`On*;RY_NQQ|13E{%>6F9VP#HbFwPwYVUCV*DNh)buZOhOA8bEc8xdtkgt9u zHoHjptqa006sLYz+kr*6hrFe>A)YG2UHo^i8se!Xm`yF=R{Fu8lTqV$O2sXDiyw>qn!cO7kW7dkw(gvzfL@2LJ}+rr6*_ARg< zs$Q@%Y0Qq&C!s5Ez*g~wY&CDl{yTVx@9kJ+YFY|gYj3<{P1%i!Yuqh1{ulJt+(O_D zm?Wp>nFil1)u;88-Kh88%XjxJdhhe3)vzXTXqCK?aT6yg9k!csX`}6fOC{Cdxe-bbr=R_h*y2KYQup<-j`tyky{&121P?vgICa`Jt~I zUN-LM$CurE==g!e0o(KLz_sN}FzW*4zvHb4B4lQ>* zSH0k?*R*8ME?e@kDr~SFoC_+&27Cc!#>qWe^1gbD@k}fEe{Y*mC+SjdMW9@s-Cs`f z8Q8l|+Kjpfie27D+xYHG?he0dOFp!fV3x~n_0>z7y#H}8HsSlP)x2Nk?resZ>bD7N z$%l?zEH>d(WfQ(v*@RQW-la_#Vox>8zaPaWd@gjzx}dMVnR_+Hp9^~p-uEyxt@l@n z_SC)y_uxlS^Mt0gCCa(fz0KX^F-kR$K2r9I40L2I_3wOhLho=otP_2`*XOHW$(uOB zV<-4ux?S`9j5Y=H+V6oOcX)j->W7X_*6%!ZyNx^AcJ62=;A;Rs0}l@I_HmzHe1VP| z&Yl0s==}Un124I=eILB!PI9%n6SdjmFJRm?AV z0?#xkrTUy<=K&z}|KG@4|6~FuVtzO_hmvbKzwccPo z!z`B#PiU2G3-n6no63C)jESz^Z1QGN&Iyc6;0X>AJ`WzD@16KwypVHLMvLMl-WE1= zS8p?K&e%5L|KZyO-h?!lcPj5Z7~d}R4&|5k8d@FHkvqlZee-*o_x%m@0f9FXTCbR^ zZT0^j(=zrZ?lnu>3iAAK(~Qx2-@E#f-mdR2ZDoBmJMHL$ufWf#!k?leIN-tTHSQke zZ=jlY73iBX_JFdhv_qLyB|XwNO`P+}^x@l`{O{900O0mWf43)l-rYu zhVLU_ygwQG6R!PSfZopOp01%&a}t?)&GHhps#zi%weAEsI&Bo);`Dx z^g$DK2Fn|vNjxwM!@x`mz^n^_`EBUSdj2$@wEnqL!;i7iqPdcJ%fr_4+t2Y1EHw13 z*S%wz$4@{vzh4sBr%Y{`!a0KXp>N}3=rGI&A`?D9UhJ`r?cN6eu20umJCl*Az<6ju zUwt^6!Uxosfgcm$?d{bf&-^|e(0mS4CIgld6a})N`{wG=$B#mfw6l!XoGdEr}tzVjPEhp?Dwei zDEQ}mzMywbqN_Jg#^ea2j7Et*CNg&O`0cWT13ob{-d3H<|Gmh{ z4qzOi9e-vXX+Q4DYsYuXACaZcXqv+P!E#scr$6>R?@lhrE8E~&wQ(5Kku)%Xcg7a@-sO*^GYYX z*8Js0U%hLnHb?fG+Y7Z(<3~NTrNchHWd?jH`h;OGcuv}I;#<16BUxjf8`rW87)Q_z zeo7wS>bdi~8r$-Y(qH0x*!k>J4ISi{_fWn=Jtz5IL;JesX}vXHa`kk*H>Br8qbtwn z)4e|6bv5qItI>Prot(!1d1Axj_q|JdYVOg%q>JEq}-hq|R+DR-1| zwUj$bx!ORvY+#yYze8Es>nNM-$`Ks1^_q0iPdmWvpFy;EhI}0#`{uaMcjaX3M)J`{ z{~K*3lzH>QOT3GpZD{yBZ*&~78@bX(*|Qh_(qq!~tzDCKPbc4m55Jg2JY#QOyVf&L za5TV0+G!m3%~kcDahFA%}wWfk9>*Yz%O+Sg9}5#iSjH}=$WmoZqjI_cNG z2d{uXFNaT;!LLi<+a=zQ?keIvR=b)%U!lCnfgjt$%%R9xzMFD(XO4bBBsr_ng5~UX z;6;+NHR8J`crFZ*vmf1{jGtDYz(_cPa&f*(g$=O>k^2^y! z+NgOajGX-%<-*8B@bTAa%GtgHr_0%2QdZ`=OQ|ysIeQw+t}rln24G$k0&^+!jaSaz z2fuz&ayAR!GG{7hC%2usoc-vopTMi(eQo&`r5v*Gklin1k-JL9iY`1}be`M%a&{ha zws*Ku_hZifzB=Snq6?-LAwvzPYfT1rmLB|_pQPV>wh7&pyq$FlSz4gRXew`Z`Q>cU zd#*e;a`tn`s6QfS-(}2+oV=5HN964@yonPmZym^6_g#zfvXQs$+m*~aChs&k>_$?T z{hXFsU7J@%xg6Fq*@DM4dPTy7#cL%^`Tx2EU7P<)WIyJ0<_7dak;4z&;Hz)qTd<50 zecO~p6W5jg=%dN#C%j=O`2Bb$W5X_Q1?ZkPh>I+G9hscK`rvC5kUzlgBwhG*sgw9$ z+wy8e9+e=EdVOiwY&Im9=24{`?@v=DC-xnmD6cjzN@m`jj7i;+ExkUfR; zX92S3-<3YIi}J$1k$g~zek1D}^~PdCW$|&{zs8aEjU&~lYoU+IQ?#As`h}ipIhr?T znAR)ni5k{EMA5f@Szcz#EJV|J*dzDfFG_#&2C8 zUS{uj>2#TWA9#vxeB#10li68k$V(-Wc*!~Q{~Rx64w9Fi)5G%;-v@XJdO59#)(@X`$tcqu&$FC76-;iX&8ke9wjxigVVFKZFxQo%+4=XmK+XfqhO zlo5fK_&&f(&?_P@krvEL$%Evj&tDgiMSDm2Z5OQ7OgXjBOf&5oq{$k7!~WCd)Up6? zy*%U0w@%R3!SGg%7G6&AeSo*1S47?-Ett3N1>Ruf)Nig0 z@K)8uYTaV;)$|B_^>+K|e6H@tgKlkP-h%# zmeXMVGz`oQ0hkFPFegD<;r~#dqv32rMb~|DYnBh@o%x#O#Lv!r%_4UDkM7dZbHm&1 z#SYXyZeWelZ~sBo+{fLPT}ep;b|2O!V)t>44YK>7)5?0}??HB-|3v3)2-tmYWIgdI z>^?=6I&GKZ_@|`!DlhLUk0N3$$ho>=FtLQ|3{@h3Yyyo|e zZ}5H)@7-T#ceU1hp`cgC)+M}k1e?%gaQGW`qbdAuz`o}Kx67{euW6P9*(IYb*J_btqC5|{71OPqSd zHa78tvn|+j#GOzioV7Zem+r33OpSwxXnh zHnC3+_cf&cZt9n| zW%muVt&Ox$oPyi-C(4A|$NA_k)-Nxb?>FiAC$jEw6+YzJBKEUO()6v(w`zw>yIIpD zzuio1cVZiJ^V_`PmY!x^!!NFXzsQ8!fep>~u}#^~x<2;X&^{!t@-tS(-uQ97tTiPr zcKtmY&h2@hI=cuW!(YzstCu>&9(Z(Gf!F!$)&_$&cnf)pS9p5SwIA0znhLzUx!$d@ zx4A^#lh(C7gS|o4ZXRQ#*e=0Q>rwW*KT)^XZ&`QBd)Vah`w4HN{>)Q8%eT>FZ1^uT`ufT2Y^UZw|`kskikpT{3BT?&1*3#^uY|dFZ+tXxgTL|vGT2j^Vc*?#YK}+V67}2D$9I?F2j>Jjs@k^<=&xM?{gwH6wyD3$ zc>~d3ZxsFYGFNLecJrcJIU9nV_NYC*CFk?($MQcHTX_jK`%3m>(HGa`uS7ROSIS@R z*RhOY1$pg#zBz(}{LfgURTQANb^*fxMi=w00~q-M82P>aw=O%WKgZ5@;%(@z`H6Z( zCBN(ex3W*rI+uMu(GxZ1Yq1G)_Wkhrm-^~!zUXS@tV4M=`q_XUDfafUYR}Kq?^4HD zZS8lRW3{zsV^=qA?XP@)OazJ~qu z_ioLzY5jI|bdyYA$=qMYygrxr6`JXrJ=mHJ_uL*g|K+^Mk|EmcoJ8$V?lISvl7-r_ zRnS-7Cy+f=hd#Eh4!d>+Z#GoVT5?Lh--_P56kTW8+YfKK1DLh8lsZjMsY~HZ)uV-) zNBf(0*FWvaEmgJacT`s8@7PwGzhhIC=Gj5`%_!}dock&PUI+CUz)ymwG9J--a=)VW z$Tm*;=SR)K!J!p+*iSUJ(NQPV~ymzB+5>PW70=^3wz z{oQaRF3)(hf1TA(29G$5rsEF1?Ko?#jjXj!?M2VlfGPPyW!>Kx7fqTrSL(Fr5m~I? zE|UGBOV)O!OX=j&Q zqF0p4I^|KlVjI7rr^?#tb!eZQ0=;yj&gd+8pzgM>71PJ%F4`>ci})27$-Ch?;L3R|qea>xILoiXg}j1`16-s`zQa|~M!QQGiPDE#d1C& z+Wypgyys({@9d?GdF3A8zH0`DjzV{$jR)-0vXCA9av3Ae13 zby1G&@gaxu@6#$=#P@UF1X?gw#Zq?OIf#-8rt*Ck_Xu*J|LQq zD?PL$9h}c02<|D20r@ZM0698)d?*q#d$$OiFh3F1_Kw-3RauP`0LQ z_hQ|#>_PGc=gsWp%qe-t9eJeqNq(jOj>5a>x-Ino;iM&;n|WjHD@%0!kjTt8`QBkq zYB6;ESS5Jn!o!ze;L2IA>*bHs7Od5g9~v@Z8sEzuuGSRnFz|1A8N4VwD=_8PpdLr% zrRojrO!`ye^BA^`;H=3y#|Do;k?t4{p_Z<-4)5 z@c1t%6GzZVsm)>6uK#OAN{S2FGsn^5XsX4EaAxwS+vvy&10JcNO*P0UQ@2?OeH|iq0^`$Q ze7dR6p#9W4y9+$1r#A=w=>eawL#IC_aAqFb)&^+1#CPFbr|=iDV*HwWd$PeV2mB@y z(iyiVep%>vOY9TF;%5NY;9RuGlO*V!3%xgroeNqEPdY>JbbzPyQx1480nZ-#!>o5g zpx&@D^Q$!{Vng)UtO{B-F$pmBC$e2cU>l*UJCx5!n_vaOf^ZZ-I^Y_y%z z{XX>+yv^H7v{!h%$DY#CInLE9eEx@C+g9Pdw}2`AT#ZaJ`TQfv$N321-Ix3GO>7Zd z{?MOqY|C328hT@j;_-cEz6|<=zO0RqZ+A$(?~zaRU*S)ggSK@t=LGS1CUQ>c$im~C zPfH}fjL(cT4wN{m}I&I)%*DE8$TkH@KUicnRL)ExO_**X+8-!CCIv zeJfu%ir();Hei42Ekpmw{PUu|j?0h@-(aq$T{4!B*i&;m$R~8z2;Nhmql{aryO4R$ zAfCZJpd<8I$0Ei6Z8!C@FEi#l=)*4PXnwngZ%vG?F8aXyb|>GO?Kj9dx1GUuih^`G z=XO2v_B!P5waDLVki#oDYjQPb#f$GmJ}Vs=d=7VU4#q)Sj-I3SPN>p6N&3Ujl@;1N zN2lrD#vxk8)~`0E8%}H~LphU)9Q!nF5E;uH0<$<=H(9DE=5wJgkjvb8M&I~zcncgxA`0}l*PUP%4 z-HwZ%X`rv4&jP-g?|SMn)AtEZq*dCDxwtXzZznyOcD%`0PY35OvmTN&lo{HDZWpxpJv8;rWuAQ3x1#=C-|TCFU;fUzEwX;m zCM8@s9lp`|U;XI%9oi$?uXH~0tt&UdLrb07v6YOy%txB8)OJ66C1c~~N!rwNg>R;> zDc`+RPf30g9LuGB_T+BKYv31Z8f(i<=v?J`%Chx2T7{djF`YJcBPaIQQo2R|xP$(= zpzMy_o67Ily@Pyr0AtSuTE*+kP3y5azepZ)+~TvUytJ<1R5sr_;d41Jimtd->AQh; zcA^jdigvva)b;|t?c>{3w7;k1XN)18v1CJcv3vVo)Og!6;49?30ws5=ktJ^WR@RW? zCOGP9_+QKaqe<|sjd4-U86NcO_TjqHF`6zbxv68g*7pRm$j800Z>PJ~${J+PRINgd z8|MB_`s7GJXIR@~+geNB9OkrkV9WiQqof_RJ94X-i~Rg-v^0Y62b=*{c5FE(Mg76+ zxSo<Jzsor%l)cw@3>0;ioL|f{h{9jQ*0&XxxZ}A7CuS6FAzjW z?wVP0)2?)*PR<#=YSTR@2?paXgF6Eqw4sMFV2mi~aU?V6R%h&P$G37jW!o7e?L)La z;~ZCO5jHUYJ-Bf#&Udt}{{(-tKH;gc0iFW>zK zQf&zA!J+p=virSfAKG%lKBVQ8J&`+)3Fyf7mLBdi$ofTSl%Z+8GG`c>TAmLd6(Tda ztF)tIMCHORd`8%i$+DN%$$vXC`1cby*EO_lVdW6gnfGf(a~5aAz6F)l)eA&7sKIaC z(+?XtPZ#Q*#;M2@=2f{Hmf7U1udmj7pGvZ~KCw~nt*lP*JpP!@ygR1l-Vxf?hQg%Q zC#NR%HWnwf)@;x5)XYd!>zOuWQ01(|z9+X&Q+-#NMg0E6zAj(YmYTJRy*1T@jf5?P z2Euki3*k9}ZHA}jCw@3CC*}Svv9HlKGp{*osK@*8uk!L)lePTtCT{_If|Xe_J=yFJ zuHrxKEdGO+yTz+!dYY@r`Wfp>E#ydzik!xR6j z(eV3J7_-2f9)M*9UAt(GDt}b-!!!t>H$G2h2UlU!=HZhkp zV!v$DKYMD^s7D({q5m$`M9AKWbe<3$0=&WJcjS??7L^}|*LPA&b3AEHIUcb)md?1Ocjc<3twxGoXE3ii zSZk{Nf&^`O88Vl>k~P?FDl%DLi;nVX`d;j?ZpxH>VX3nJtU`Y*1I}ET-vyTFZmW`7 z-5U#fjbUQD##Tz3Gp1mJK~4#rex4>m5mZ&cB zl$GBlG_cqxdPmF?y-?_8+E_);c;y513~)Y3TIikCXf|nO&&yoo zO=4f=a@|oUw)RK+E_-Fi@7Ha)v+T~EO;cQ3ez4|_-Ho)ZfOWtfqe*AYvZHg&E3AtS zt!6%y^~{_0M;ox4*A?l;0kJn_=$Xe}M3=3kFD{`^-IQbfddMyFDQ$~nLv2{bQ-p?5b*;_L% zs^vUcb@6OxJLh3iTXbl&tg)|N#zp&7zdpCBNL#TVeQpl@t;*Bi?dWsOw$wWGpx$Qu z!Rgc~dcIEEvfIE1eI=iJ)}lvba@RVOyVluUb~e@}TwSX*6+4EdT%2F2t;t?D^6G3u z&#OV!WvA$Q*_Ad=zAK?8-<8;t-^OoKVo#>)s|zw+|FR(8W%OhhX-Xfc8d7qT^Kz{( zyUp&&tW4aId2rK$?7RJGrJc9jlzFe#m)~K}%dbqx%Wq1^TM0fw&tD|@*9WpMxcXAr z7ff2_7`FI<)mg5*WX@`pVc%MscJ9?VYbK4xwx8?JwKeOrw7f5qSJoBE?#1{J`}qTf zDd>x?-U;x0{xtS4Q?(WMlBWjSjKjg66TDD80h%KVYO=I7%)O~nW-E87x03HZ-W=VP zlp*Vo)RQ&`dkAA%zHZ_t_BHD1!1gj>TY;A`d90jq_i5Jpr{qq*&|0f0J_tWoqGyWk zb|Yye_H()mkzqylwmc0zhCSMOHAz~XvC!43>7z%tVOJ`$<7cfeFh)5$wwE!wWT-1o zbd3BQ_D@->=g-0B%CGpDC?ZYrSJCF8Wd8r$Z}(d=!o^)9%8)k`Jf&<0<68JYa9l|n zi|nk;@n4Z6`&7E4TWl86PFXLe(N@WmlZl+|@y#}nn_?r#Wo%7b=h`xTo$K+pk(JU` z`Tno$vs_q&t)Ng7nS-r?^OYxjD?VG+HUFI{+H&6&dIk4_R*dxdMn1dxuK6RAM%8Jy zj8|t+R`#dwLjJttEo{BJ(A7G^uFW2m^xO1{_Ki4aEbvDF&!?w!w}9Vgb))W`MDDH% zEoj$;i(Ren!1EU_)>?n_;m~{$eM*3z(AUE^w3hDC&GiAGPcq z4mhdhuHQpV%&;QDc&JOl$&kph2_EwdUv>Utu55 zQ7(2?(VwfBzp>S|yhOhnTG|2h_G1YX3Vx!G@bg}mZEW&b?vOo+ym5n*;z#Vv>o3(> zg)cMMpSuG&pky?@qcbMZuOsmT^H@P|)y4vE-_`#2gS14g*TGz!>(oXkIkh#bq1{q& z7Q1CB_UsaDfqTWjHtG1jUY`G`R*_4ampJz~n2SAxk_7gc`DSSFUX@-UHV|xmy+!C- zxzM!|ef9}pt`z@fAI$B_d)@rXIe})IvCIvv^1&q>o-d&zZ)7a9N7cO)+0MTCp>pWV*c??(KMUM$`^JXV zQx|!2^`ttHz1!faW#=GwxR)m1mm(vVF?WY&#byo=ffLxhmwwD%vjd zx}9-#TjA1HbFQ7fj&n22A*Djs%eB@r^2uFmH#F+k1L$jbcWouS@lD#hfB0hd<}zmO zw`ptSZr`?9H4Wk5rvY2~OS?jAtsTNUOyKq=Yo;RlVp`ei`C=bj4{cT=3q%$nSGJ(X z;iJUS-8`2y9`xI0GY$mf&HAj^_fQbdSAl03T&v)DgK^9}yF4HKzXk0abNv4kathW~ zA?rkrmA$#Nb@je!p7OPqpsQ;4tkyR+l$~0tblYj@wo8^zt2=;xn}UA3X-d&j_-FYY z=(krcpI-NC^xHk~Q3qpQWY->a$Ws3Q79QKA4^Q5N-uUR-54~~)ddENQBU&VHC;csD zrM>@Tf8qY0pc|m;K5*m$&tKtv!(P)cjyX?spBm;1(ci?jqI5v!nq1mb5@=5@?UDcI z(H_wmy0Guis!m zl(EU%wsSp=@5876>6;_t*Uk9ly!$vglOuI7*F9gH>8sC}iJV{#Q@jJMWe-B;s8ZJE zvUe%GC^iW8s1Kj_UDoH=iWDyF@yhx3Qr7FC`O?W}@}rylUE~#ZKlZH9!R-of;Lv>*eNNtwzgmIwe zySlgbO0D(2>Vn=<@XG>+_pyJ79jeo=u@9BjvM87L!tyzXouG}%aA*~)Xdi1h&QEBm zA$S9|ZFy*Idfj468?ve{p5&a$h#_&<8y zf$p1`>3ybVtu~rDt~F(wa%D{J=LST>6oGS!blR7_*Tf z`L4X?Q@%Nzlf&PLj!iE7J$UJRlRu>I#=;-%@J9{0NybY58cg<~bthwrIV^b22{+H| zg+?;p?!-11ZLY}(nrDtd&vu!Q(J%K-LRXc$)4G42Eg)XDy}+A<{4nP_nMdE2`EAA` z?;Uzl-5zu%v)(P#EBPhuay_}uTpwK!SRaWj`{A1DyODLCa^|>9>b?WtO?A}Wfu0;( z*A=p6;++bAeUCOs{!P^P13l%yCdxOmZhD>eif#Dhahs~w2&|h-+{kw*6bH3NL+4t_ zy_`GXy{8x(f1v%T_6Y|zF*Z(OtIM$a+cW|ExNG8(;A1~W#^&4HsoBF=kukXkU8It| z^AFTpiYeXukimOA{`(_4B;VV5g_Cd6C%$BS-ArZ~%J%-xsY;-#!XRWMpcxQIg0l!j!Bm21NKiC_bDIEm zTM*PnREXGSz#$}{mZH(%H37OcRSKelnzn5M-c}?|z-_hdZVAEet|X|;Rw1GO-*tvm zB?Qs?f1Vf5i~5|}zq8Mt_gZVOz4qGdnQfm(|IelWky)D4>HjSHe>k$g^#4No-$fsq z{r_?oEr``%9@k0OO$b>9;ZKMl|wjc%EAsr3S#F1daT zUL<=IC$s+#r$;fgZ6)-`2ER(OtMgTOjYhN1WF5@BsiMu}Qm#NwbTs3Gt~p+fYvvqL zhaFj^iL*~KUhC-_n-bS0GiEbBe2hIiGMv^~e}=oOOkVG0>cZOfBPVmBn5yBC%hT1{Z`RPh zB73GIduAZ3aG6KjLW?V z%7$e|T!nm#jdbc67jM$*ypqiorP=M7^0jXm>j z(kSll49r7bUVGNEf;;-Hy}M5BUtAQ8&15h2%H?jTd7SN9c4M-8Id;X5%_$zg);^%R zUQPOZu0H^Aqu~B*j`Hts~{%ZY!{!ZV4 zWorMbhmiA%&@E-r24@b*xql{fbEcdnTR$9`Bift8cf;8WLZ`99IYb$!y?Y|`ZV7U> zjd!%u7#C%kLw||9A+rAz=AfN*-=F9g?YsKWcWe0WD1Geg<-2rbDETgYPglxe>Y3M;kS`@iuulVW>+6f zLWi-Acgg*Y?-D}qX7lbP-j($|u@BmMY0F&tGq*qbP}Wkpv~w=w5xqsTjeXeMK}sM! zrPL$##~J+4B}b*P7LK70WlWoM!Rh1N1Udxmd$C_VJJl_EhTOHPCyluW4R3^(4zlxY z__(l>hFwiEb^F(dZa=)AWv;Jbj<`sJwO>h6iaI~JgFB%m?H*%7YxnV?cCgY8zn1pH zhERK#7J9dYcenDcxAE3yjMMOP$e4PN{;nsjE}a5p6J42>cCMbb*>C2zlPyY&`Cry~ zKP97w^FvKOAAp_juc1##IxqYJ9vB>;?606bg6Qsw$omleAab-_b(T~`R2Cq!UeW!^fO5<2Z63D#%hD#Vw=(UnyC7*+ZJnFMF6yZ^RDCT&r36FSKnvi5}bmLl&)S?$%u>a(??M=eFJtrz?-x;WN&2uYeQ zR@&eW)()%*ik;x(F~1%C>3Z@jRF(P5_;1E-<$E)(=!~PKjz&7Egw1!GvDN#6Jhx%r zD|%YatyjxgZYgt)&|}Ih=buF$m$Td{JmuZ2!L+3{rrDIG-(x?E_2Uj~^kW>Paf4Cs zRQxhZy`_$lhSXWckApl4(Q~Y(9>uiDRO%(X$ca2J>$-N@PJBv9d)i5}j_(xcT%FNY zi)bseoECIO{m7$^cKw*|t4XhpHf$lEI@)kESU>WsBR^@wR@e# z9bjc1dwUhg{;auHXE+-x*gptgQ*K-stSMtX+0ci$=*k-`&XN!L*;VC2(cOwXXz#CF zGY7sQvQOTX>FB&eO(W$adCJ=xGDx^dh_HAXje98HB&0bYQCjK{@X*E zd;W}%^$6v)WaLBH2WTIaL%OjYqtAc3b_V*GbTz73^hRUZ_t-TUdj#b4U1u2BiYdHHS%Px;P; zot>udohtlS|1O2^%B;#oM_3b{Nn5uKmpgJ}I~pCe@IkaEPb44i6!SXG- zat+@a!ux?MCjO5Q-Upn0yD|EF=j3(j)M+BLr9B2yOA>@4&- z(|f(J&!-%5-hZFN*kYg8))u_zSB&4)=w!??iXDWT^4&}MB!A&&z1a;5Ei2^wrj2nT zy4hIlNIcX-)^6kaTbkSWp8*XT&D>+3t9UbIm2-EpZ#NwJCq6v$_=&8t_x@u4=taeg zmJ@dq^lB!&t@@%X(ydE$0F&EJ7H)oKqZA7Uj9U0xhkGZAgJ%=@! zo%|&qXvHl5CCksG?~1U?EQbetfc;Mq z{IG$xUYb$rNu#W3lr^8SiarkB(kyMXG_}+tg^UP zL(eirQLj?kZ;$Mw(SEl6QPr{@k#jSJ@X8c$2kkkPa2@N8Y{q6OGXEZ)QSeM{4Uc>> zlkhyssmVs@WRV-Dd+GnVlt1GS*&f;xdf2ad4rO=z;X$pPWj!ows(gOXu_&7w6_rap z(paC#TFm48S^BmC`e%+ab1yFoea3F)l$|nVoeoWv|Ky#4KBIZ>@`C(z zuiVWT%S%7s`S#1`H;U13z#H;tlZ-j+jY*%#c$&g`MaEh>YfwAom%bC+98)6Wn0aes zT>6MU={CmcvzfHPFB#(=`>DtHPnH2)Ww$cW9>2H@tVK^X&w@+j+_~=0KRuZ})Qi+> z3RTPeO4^%oQIOABtK#{|?q%qN?qFYfnZ18?89E_3D=q6!zSnfL(qGa?igTztjr~#| zXP0EJAiF|YfgOP6cev*_j1A<|?&+BKo*-j*lPQmVD*Mvx)ol?vHvpTAKJ^D2O&=bZ zGUCHpZ2rbMQgQ`22; z8`p^4#w_xoTsvgFD$l4f==}s|oQTd=){IMkBEEz8vhO^Lc%u7HrYtGcQ`QF2qu3XY z;H>!<`dM?i$3@1Xv~#y|5vQJYrnIA+FOqNSSu;sk&MzOk>mB%q3Lmk+PdFd5ID+%N zsyk?n+HRg>aCP(FGt{q?Wd~-mcVVGU0oDj&8z*soG9? z(ydAW9}Aj|rBYr&xyvVr4Qx6#mQ_l9K?XLKN##n(6xMdwBW5ne#!_gSz1-qX%8u|S zWk>pxHuGE+=~uEJnV@7pIw2|B=C@BFUCzhMeCJoUs8=eT_RUteQXaWh*}iOoeZKzA z`OL3wQSMbblXhFN$%?@CBZ52G6xvWpZ{b{s_#ktM+yu{Q!?qDU<29@g8`H5(!*(Vh z?^>{5mNFNKyv3LkJE5&&@8|0M-XK$UPoAXPV5BV3Z7{xWx2%vhThBP_%?9J3amFQ% zd90jCUrv>KB#>i8##52URaM(Z+Bsg;?7@*0*wc&irlxk+KL~wODQCD1f~*zIbz)B& zgz$BujrnS>6T@u~8cwi5i1B`L9KSTucEZ2aXx4>K)!LC21XCW)_E;E~mCzosxq2uk zuW|bVJ-*mOMMri&LucAA==(m{*o@Y799x-}yJ@dp=VYZ}$DOYG){(g<_B2b81!03gi=LQf3nRF%`NY{$?_;w{1Ws zN>fi?l&226AVXD3WRJT`hl~EiY0J4^)?R#D()Nw64{c(f#YX#^`$}0O6w~G~(NTc@ zQ*}77XVd&Xhz>>}Kkvd))xo8d8q(}{zeSeOg{<>~v^!#ixZy>g)P z?2is?{dVsGdz#aO?6uAFmt{NJu3UG(1HN>?HwTcDwmIOh9Um_*K#tnR81kY^ku~84 zm*uyaOVP1ZIh>hO(B-6;#0*Y5+m=yjQA%vgLu_%eISR?YtZCP$thc*w?69mz#x}Q< zb4gNMXznahSSM+O3?`S z9^#8WMQp)VJrBP{uik*|MG|^gkpUa}@SkxUG33hoMFxzf{4Ja(PEz7KoQ$un(2j;B zx17M^%F&6)xbIv%g?i*tpDU0XE=Ok5{WbYWBaZVXTS-sG)iZr`I|wO@=t-K!ah}Vf zG(O!2--47~^G)(wZC#PCs&P@wmx87X)fE?pm1h;Y5AFntpF!O==8Qm=i1kjRd__M5 z$Gn-U>3lv*Ic=P_zOO(w#|Ez8eCk|Z^~Pz^Rl{U#B%61EV&Y;9^9_d)L4Ho>~DYJyrBu~tbgo0xoF z)5Me~KAIqBM7|hsm4EY-FHC5%yfERSGON2r=EHqL=i|L{SDnmfndjo$A;}uw(Ly+3 zrn23>f7kdq+>fKMev6QDQr;$L-NO6eZG?9{sCag9e^lkjt30le13Z=Yvln*164-QH z@rduMBlOc3k4^JG-D>1n@yqi3Pg_`?6-rNROJwG~seRnf@N+#Fia)Qn_zx04wNK`~6~yP~`Z^T9sJHlE6TgD^t;FZ& zy7Yst@=Wh7{-u30>stF{KAqFoy|CRJGkx8z?|xZZP9*KFx$&J6$Ryk~PDlQVwmjWa zd4RS&5NgY(SCi)vbIg$E(O;Hla(JFcmB6s@JYT$(F>{8e@;G@O59L{D$wd~lWj>v1 zb1&?ZtBh0e@ywi`Y%-td>CFI@^YO)*+|pHyULL~M~2mh zJ9(es=X&A8uCa!*h7-sB3--*q9DC-ytBKFgbvzV*ZEx|96MwZmvut(K#G=*AG4A(u zy|Aq-k9xi-d$DO^WkGgme!buz4@YL@>by+XYDcEdCr{Lc{?vK8;Cpf5>+LX^V2IyV zpXm0zW2|dbWvzYRO9l3=eHxVw=0oYk z3O>@nKQeIQ4-|Z?fq$vv(hhdPFE;RfA>1nXr3P-x)XFLOc3uviqXbfg^!y+#&c?2L6$b&m#W61TQesPd-OWe+&37!LKvYpP;9| z8vJj9-(=v^bvy@rr{K35c#)1v`X38E%Sb;Wl>P^T|Hg>_TRpy%bF<)cjdK1YRL*w< zzf(b9Lp$ufP%CEz>2DPLZX^AzW4g-!s^Iq->8}Wt$0PXt2L5{;mwK%c{6Pc%Q>Z-u zC3uBVp7n`c>DLJUh!MX1%J%I&kErW3;v{0{z|=nQ;A-G48pD*~IjrgC1;*S=5ixL07_4v|mBLsinz`qIM zNrG=P@Q4Xoy;4bkh~S?X_*pu>nslNC-(ldF>A0ldU+~Wj{5Bo0zy_?Z;9nVd!bB~7 zX&+VauMK>Rj#q&HI2pXvz;i?RVZr}x;J4_w%8-!H*m5zv6Tam-06Yj(h+eUZ3)1 zDBr&bZZYsFGkaS5HUA=X}90HSid{U$zjx zSnwPJKQDy;TJXyae5#I1zWB9;j~IB7j!V8b3VxM=FAAk|t>6U)zCy=mQJyOWzs|t_ zTgRoGR|tNSf$!Dv3h->fZ`JV6Qj$e~ALEtsUn2M{1D_DWFB1GW27ZH%OZw*uKG(p@ zLh;iDztg~<*Kw)WaKZ03@C`8|$A|Dp!5=a3t93kwe1n&PR~h&NI^Ie;0l^)0#{22q^ ztK(9h{|H`d;1L$B{F3i(!Cx@&^FsLF1z&C8w-`9||F43-Xy6Sx-by;33hp&c&=4%H}Llaf8D@;uj7)=yMn)I;NOJsw*>#Afu9+nr6ciQ6a3Exew%?)-bP&)a7 zTP#8+KTA0lia%NKJ_erFS8r$1$r9XQ;5UTuOu+{jc%_a@z84BU*uXdFxCQ)d!Q%}4 zOC4{eA4dtEXyE=3o+9`e2A*Kk$|K`ssNksvewL0)`mutKH1Ml+d<*dh3Z7x$^Fp{? z@N*2jQpaUnSp~nqz@H7p@0i>1;PZ5R7Wf*$?>6w~b-a~y{!8%t4E#?zF6~ew`27a{rI8MF%q{qX zMte5s?J4oA1+OsTr$uSy6#QYqA2IODbX@9NDtMKF&kNxtf9D|FmK`~`wPY2a_` zxRn16!JjeOXW1|3wFKF>%8e4^lQ8hE{qZy}v=g8$LL{}f8+e8K;0 z;4Pu}qXpk$;KxGoM+pADfhYIxYM&&*w;A|lIxgiOBKRj7zCPs^9hY{97JP?+|27oA zzu=!6c%_cF62GtDUm19Vj!VC*f`4t`pM~Q8m;>Hw;QK=OVZr}x;HM4H$}j1EFL=9w zpBKX01pm&!uhwy?*B-$S7`RKvw}Ags@FNEPkd9Y?erTp6kKW^YnIzEf| z&4N3ux}5x3D4o9uZZYt0Lii@Z`xv-i$E7@tf)6n8n1Nb-SA)MH_+SG+OUEVtTEXKC zJXgn4nWrxao@n4t==dzkze?~k47@3n&Pu^k4g4QE-b(sU3qI1okLkFS=W)R^3_Rwv zu5qr?(GRV#lg_>F>JW#IqN@f`4L1uw7$I^OH!UWA_u z*Nc~9M?KWDkiEnEW=i-9!oxiGmMiXiR!KOQ@adihHY@JMyCpn@@EM-_Rn_gf&KW34 z!N0vXA1uk^%2wUJB}%|Gljoy6f8z|aTUG3`)WFtS{@y=)-_YG7|)z? zi`%tI{>P%jzsBPm&9~?B?PiPHNBwNO)|D4_;Ed4 zYKEWE!;8&ujUHZThF9w01!lNT58q{m*XZG5GyIYszTFJJtcPct;aBx=p&5Qd58rHt z-_pa^6L!s-6c8JLqFX0vK0IR-XW+9|*@unsZU_c9mfmU&xNe%H-(kdlwb&jPmZbaT zJiFMtmCN~~S=NB>J((x_1!Q;Bg8VG)%1h}(-bDY*d%bM(W zi?;Q7w}`Z-u#eG3zo%y_?zydr?x+a-M{DQk`?aTF3xS{ND%pRnC!Une6IM1GcqwNC z#OJ^~`_|g(wYHV)qX!%%qy4fU(qfcJ%CeI0itcWj*m4)XUm0)shjK@e=N$I7+V-H2 zpNW5gbE7D1kYz z=X*M2{tZ+EF`HF)F?&P9cB}3?*#GH>Rs-KMFNdlYcQJcDMeH#qrdZrVvMug}nPBY0 zv=3JU9i!Ahe7VK#SY>gyV~a4CeVIGfP4#pzH`|$;j?Lf$!P%!7u-oGP4uAA_X7DZZ z^E>9}+;g%6cV0mF62e)8#}dAn@Fc=l5WbXf4&i*l(+FQqIFInPgs&%j72!W$?rFc7 za3SG8Ug7zEwi;-^o&SH%^?X;%|GNaA;`x4o8i-S^?)HU*p=IAKRs#c4tnTluN}#=z zvS#xxo-wmTrQNL15+$(rQT~_nADgystEunf&>QcSp0=k*w?_V7NJtdO=13ivx z{?C;Er+FOZ{9h&i2N9qDyXF62;`bqbAO1%ZpZ_!Ee+=>Yze@hcdhDBp#`bZyt%>t! zevhH6_vXkNJVa9q#imoj*BbNR3>O;n-wfZZhp#om*X!YFX1G`n=bPc%_3#yDc(xv% zWQG^$VSMfD`QN37FEPUx>fv!_c$^--&EUKGyigB+V1^g#VSE7U^)J=Ko6PVMJ^YRtepC;?WrnNu z@Ed0MaXtL18GcF+ziftU^zch&c%>d*V}|SWaGe=mqlZ_T;g|GqjTwGf4?kswU)95p zo8dR~aJ3nJOAkM4hTqY{O9;E}Gx+J1h1)&M)?kA$|kyIfqAdy z9+cBPuGA>@yz%)vE6T0%Z0S32t?Pc?KPcx4p}EjqR|Vlm2uJDsPi!jZJVrR0aJ(8Q z+QxU#VOMKEJ+0d8KpQ+fxqRt_qSmH~$>miO=6pi@;lv+DdY|+C*M#9?ZSXNyE8%|= zo<#Tx!tI2=Bb-kdKIS?=_z2-^313h6DB=x{CKB2|q=+hHwV& z&mjySYlDxuE+9OX@EXD|5x$u4rG#H53_lYat~u~e-y4MABAmnf%X$9};Z20|2wz3` zJ;EOlE+G62@tX;MO87d$H}QTa;V%f^%KyLfoW*k&&)@LeBVoSz56`)TJ9ys7^Lw6m z^E}A&KAu1Dyr1WfJS%txd19O8vs$o2Bixr~70)Q1kMSJH^GTl3JfGni&$E{2P@XUF zOyaqk=Ww1c@*Ks}%X2i(bv)1I`8v-FdA`YW9M4qx?Gm1UB%H-_3(rYB-{*M+&uu*O zd49rk8qXa(ujTnU&+B=9#q(yKU-K;F*~)V^&wum0oo74GVxHgeyo=`no(p&$;kl6K zQJ#x=I+)|7JS{wz@a)6$QJw>MR`VRp^KqVWJfGs3%Cm;&NS-TsX7H@zc|Yy9hUYni zU*dTI&zE_Q<@qYli+R4m^HQE~@yy}*4$sSZZsM88^F5wd@%(^i0ncWOwiYwjVTYMZ zvMyVN-M+{R!ppA4Zh8gwn{}%PYIkM2swM>vZ^GtX)(Ve=Xxm?Qh0(T0plubTkGyR^ z*;=8>+Ur{34bstQTSXXcBd$i<68pYeMB3eGTk5b=(zX}E%JxfW+Y@}(u;fPn6!h@j z?tEfBB6dUA0dbDxME_OC?s{K4OR|#hd$U_T%-t1oZX=C#jJ>mHr(^!<*XE^gmk+kt zqt`yXBkJ~d4@71E^FYTt2M^@^E^4LNY^T4Qw=JFj>F-Y2HjZZ;>!Yz&Wy6Q~5aqmR zyVwi+E%vIp(3$Bai~O8<-LQiHtfx0rvF6X`4$wmESZ^D2!%-ExYizvny`lTwROusG z=Y{(QG|#E_XKf{Si3BZi9jjSi%^}S(q(27#QR0iE&GC8d_f43ill1eeYO#^`DhVlP*p>7bWE~q@zO=TX4>sx4bq3#v zj|F_ywbv6CTf9fV=H4Ibbsu?dqD?#+E@up4y&O_qkxYEfF;ADen)Q_Q71EbHXH&-} z%k3{HHzW^G#-DeNjk}t}e_;b-$`+w)w+(9gin|G$rMzkQHBaI^vT|N!Ew;iP8!O6q z#&s;=jF`0jZImS(e-9gMDHrEZ7!$VkZ1PG_NrI4Sg5;?yMv<`hvfVUqIPQDR=l81-Um$+A_ob zKaA;^l`_ZkDW8M%Gw|O%4txRl@Xe6N z`)$mheCkzdETxW3%p*x}c}cOqe`j!GCHfTeyEe+7cRaZ9bMn3`va_bA zFA)UGsGx1Jl%)=3qHK z)5-Ib&YYc}K^q5s_q!+0<<7dtECZ{jsROH>*^0XyADpe+v61|c-2IldR>4Ligs#vb9kg8NmOQ^F{MlsZ+xsRfZwD#ID&l)7hs|DvFXSlAk3a_Z zm>jYWp#J?jROqTJPR3vN`@Moz%l2b;S-9KK%2|>K&Xh!QwxkbdWVoOGJ=HDq)AkSi zfiTC?@xzlYZ>U=SNdFDVTWjr(Qnlm~lJr&C@ zmoqYj_(5vp%v2iVV~RDlT4-||wE0MXWy99vzuQ^qR5mur{QsxUXXaAh-1H&Ma`yz6 zwrxD5a*mg@?6j!^pP^Z*vOjlYWv#6W=*Dg2?fP2r zF!_<);9cWi6Pn<*Oh@XAL=@UR=Vy#w=<7ca0mL-*^0Zr(!Y8w<45j4 zFMGIjZT-XD?>?8YQ)rBVD(WM5o(mt9F=AKZs*6$;&%%+aXX6jS4Z{1TQs#xUNh5Rq z=qP2woK)R6ef^o7|5hpr@_65ZUU4^hi+uDv>tOCy-Y-6)E~GzXo#-1qq#1gRZg@!Z z^8@0lpBZ4Q{*m^Gq5pdAFYX}p(?>D1%@_1ng*C2Ozn3b`yP5n};wS46`vh|5-@T72 ze#tKxKMPWxk-QgPU1}ZHkwTre($+ru<`8Lz*=G}6}p+}8}e#JFs(Jxz|L2^#0je2D~?DRY2 zE_v?6ly>zkae9v7!(I54{~s3ar%8ZL#(THZrqq9@!|2O7l*2|oAL0+~Bj*3NmY>&` z<9@{Eo-d?TU*>a|`p>y#S z%pCJ`sy9= zCwI!elKUNd=_~YY(pQV)m#jC`c8ntJIgeB=DD`|1#R+a53UH#~;lLfTY(z%?Df zA2aQj&6tt)Or;$wtwWo8Ys7*{=uG0oVWzR_Y~j!0X^cx8+&=AHP->DiWZ z;*^c}x^idImhl>0iuc~AjY(eA;k4;d#)!+bF30*jmNGmvj?v<|7Z z8GPk$oTs^!F)8!L#BPjIHV91=|9-voO>g|>Da!J5_)QUX?0?H|miFz=Z}$BC|2=-Q zo-&*K=BMbA_|o{q(phu#_TK$tm~v=BGW?3UH4}dY_ul06Y=uXspNu{kazc|Ow%G#D z5xO*jeoL^1$qB8BXT0>gwL7o>khUKZHm4ra+I~p2U6Fez;yUDBapCv*%&7$A(R}7q zDRXHS<6Pu(IkT0)oSFu|-wRI>tPZ{+{C+O;DIXp&@7o8TT+WYM&2?HhbFYyC10d10#7FLu3OZc;pRQbYaH@s(wTtnsB^wpQr; zLin3YTQm0FFMa8kp6fI5b#{m~8oGDxR8_oo1%JH1Q?HFW&Dn^oe0M+WduFQ367%mp z<67>t9&}cO8pYTtw$KJI_i;A1TQf2;6h{>2?(+-#XBW?*{^@pQkjJ7liryxRdtb`v zCl7JOe}<`F9>@d*2aqYJ}2ybSeus} zGQKYedmq*Ziurz4*!!?PDLZIbc^aK7eRA}m{?+HecfRK?<9+a#Pw~C|X{5z} z8vdk(uLxZeU8?9KM8}fO9jNWZog1n8MaQ(6I9ZCNgE=_ePXDwsNBa?%J^TG@)hB9S zhF*-eIU8SwUPMvOVrUF@K#eI**7ST=1ic=uh`;-LUaw;gh+eKA@%l$NxwBHa`xWZZ zADXsSU7C0&bmCL!#5(B2r|Lk*@@vkmeodnjgF2d6x17ct9}bKjw2Y)kdhHZKZJdWySonv(`rb>z^WA|ofMLpoZJId9;5Sp%DM z*4r7ZIYOW0q0bOK%q{3NDuc-UsmjJ@kW)mTkN8v{MhIf(GK^( z3m4tWeHw|fPVMeP^yuw5tW)tzhb}7hZdk9PSB?Q*id+svH@X5Zy9*G`ZT6S|k@$1hJTPYeFNdzPmH zzu{7z*C>xGnDdmhQ!M?_fKDup_1s(NB1Av(AJX&F_6_WPNZZCLHu@zVdKu&Wig)+_ zTgF_1H@v-P(nr5%9Gh*P#+r1iu_hHAh^+my7?<_X6j__f8ov}@C?dD{EWNKy&Awg4 z{hp%hn1zg04?T#p${JOd-==9}C)WE7dH2+tEgh)KZ5M~h)9Wd}jI}uW&rcdINz+)P ze%ZA~jbn`(+qFi$Qd^_Outtq#jjF#p*&1~#X?Xa(0xwwpzgnZ}GIb4WRFP#Kq`a~w z&7%xgA=7`vI@y%zMQ-d(re8t6XQM~^L})8AeLXUL6Egi;5EVJAfeM|7z39{;i1C;d(W~I4H@ug!^n|8<=M6}^kxiWC_?&j) z&yc%pl_^^$DoWYJ`uoTD;s4_EbrAbg$f&w}F^fC+R5_2Ooy(8b&RM4PlBevg_LgT0 z`AMF`$+O|x!l2* zYJ*jj!FJBFg4}an(fXhP|As%O51Pnd8;9EYBz^3c_LBdp#(p{Zca8lke|qe%G4o6w zq0KjG_gTHVg2dAz`eA7Pn)+}Eky1N~F;ZZ>&0J<+E5Crg2zq!gvOTfbbXg~Qjcr?Ej{+vmjp z41NP!^d~fBhtN@E>_9^hU#IvP&ccUZIXuud2iXqWdhyYpH4}e@@Kbw?teutDoczz= zn_TVzupy&H@m(qRDV}L}9+f@LT*HrZ+6bp7o&Dzw@n3ADoBnI;CP>$cj8GU*w0o_4 z%UAAt&Wg3nFQP12_?+8pl*x58RL(NwY3v0u3nSI}8H4bDKPV3$g>|)3C(~!|IcF=# zQuxAKHD2xmM?a+dw{08Vc*jr4!oej2@uOp5??yqkuz6WGKVcg*3E7yoT9V+m-G(p6 zNbH_Q;ZMgI#hDO`clX)&fShe{4?q_cEOmIko2CS?Woh)H3$QUJmcj$tv1iZd@2HNT z{^`WaVqe$X7cM64V)|ho?Nvvcb8qNPb+qd%v2}@+_^K|ZPe#*rVmHzdp^OpRm`(5- z(M2RJSNsjE1ve!sjbaxQEVX%p`Gk9U?SrCl-*d!pX?aog|@=1H4lU+)PPDvkJ$-@)y(0VxA^b#8P(Zdu1ld3;Oo zt(#3aX!YPT@X4fP>t9{p-*n62q38>+WtVo*?{`7pRigX%AA;>h3u_QrKTHb-YZTgO z9Q!E zoAGoZHqOD)0iFQoZi0*1M=f>+p0g_((uQ;Q6600I*V2L6nOTglWY#!IyDjr&pDpQ1 zd|!%8N1rBLqs(uop9IULpX~TImvn}pdk;@13*W)7FmGb1pY|RbqVV@gGw_|s+|km! z(MS_4Jk1hY_ixNJ4~|v>jISM=f){xZ>@l|BPM)gR*9nYw0BR=GpmOS~$j z)qHK_KgdRvch;2@InoIGExu0=ZN&6(x zKIO<3+IQ@2gxAYXeN(ElJuURRi}ba+LleT|BvD_P=aP=lnU+hir5(x{7;LtEoL$)Z zVoqjSqH>Y+W$tM9Ian{6b!H6h*i8_xGpjFZt%3f!)6`(;PyX`_mtZ{3w5c%W`psUG1Ntm6`e8W>v;1*D2Ej zkw!Z)4uofaJ55QK_|jf&GUmY37<18Tbd=e~1C2Hg^ca8G@XVh|+y2P7kh^ccorX_l z`fvvIOFCVhKa;hu&}3%;{YW30>HYXW_yCr+B+j?-b%{4w`)7y`P?fPNeX8+2WemE5 z{@!ni`z_<-Ldxi4T`WALVVbhRVbFgWM}_o>le!c#U&N*tPE8j_+rPxY^$0?*;RPvCt3u!AYU9j+U+o)ssH)gtGS0(9+ z9bbucY3+~bg?jc+`oX?q-(>8|oRIOCWQ@DTdxIOLPa76oH$M7wCEBxOhF{t}^R>#_ z=g9AV?kx1}nv@yKyEe`zY~tM|yjwg*Y{%lfW6?eMIv#i(`?!u-GtdLyr0jQHB>E5N zX)wwYyqUf1Bx~k6#(&Q_5)2OT1Q#fQgQIEl1)q;k%k-1idzmht+Wh%&S zn+AOspWC)_oxWGvZWt%?-$pyy7*9fviR@G0M}>`Z)SN4JlYIzVgbi^@!8gU~?XQ@hcLDXgoMh~e%A?b|Jx z^!5C<(Yc-u_*%+q)wQ*vh!dUc^fW+&k+uA9K~sHqEB@G9@RfT^w=)gr zy>-MtI7taKSZDa#`eIwE!XNK;`qSns{=^w z|2W~vraT~RNIn}yM$q^_@p``BYVu3kSoAJ3kL4S4T%fP(8W%Rkg~&uM`DU`SaSr24 z=78{Vt)I9%>L@m?W?M@;w~fBs(?Q!_Li^oF+ikAOY0TvtA7dL|%LSZaC(rUY+uSUtNUHv{gFGbj?zYn)OQSN{V^CEqs2i+|BUzY?ki|guFynjAMS+m?b>Yg zMW|jfE~IX(x1X@B()}>IGL*4ec(WefdX_TQ$GIpU^VYXe=``z=LYmpscMoF;0SPPl;p0M{+%L z@EyVu_fj)1cj5leImc@PB3DA|(p6h?)*ob#jd4T%+FE>Vv1 z^?&cO}E|T#re6o$U(Bkl|oQwbK4#u_8G|^^rYBHk8i(NF9F|F1AW@q4X(vrLQ z4$=1E^-23c@AFG}FLm&}AJlln#e{`lHoPCa=(bMeTj{et%0_d{mJU|t&!+!19!J@n z+!w4wT7wK=vxii6i__;+b&-7n(J}ri?S`(QD*a&)v1T$ctNrx3Qj0 zIt*Pwe$i;fZk<--4p7EQ9Ff(#=t7x3*D@dOtZ9Mn`k)JPhwN;jNt8`V$?=S)3?efM z{}Wo1#Jyuq%1}hUoFyFBbN+YZY0x_1r@~7a8!IHP*{=I&f6{$YXkX=%wB3c=0|Vwh z7K{(jKY_P`x{QykS@u?Nx+zcIPMo)CSCJ9r{*s>Ua}c`VgJ0OxVHvKYeY@J}cFGG4 zn=kJ_<~xlp){k)ilk|%I8XSWj_K5|nN4W2uCH9@M`rI5p*xl+tuSs6-KGa#0{QP>u zk2}5?;H9gl>3Wh6Nkeq0AM1Jhm7PY~TS&ViEbV8b-4)#5Bx%1$+Ro>? zr@dvWp7!fT+HdlF*i5@+g!@&(W&4e^TSL$R^36P9~3B#m-paXBxrl)T;iPI~{(q<-5EvKM&> zn+~zNm-~Ih9x{!47DM`!q5gyk&PFBKS>i~C)-#sF`HiegO#YAnZ}5+TH(U#En60`m zQ-U=u!?8ghuDbt4Sg%VQydlS&$Np-tkrpg#uvU6A1vqs>Bq8k zJzf`Y_}ghA-mn?ou(^DKX1Dhm`}^#Ns2p93oHTA<|;6j^3xvvPmwy&FB2pSih)bFJC z6@~MPEXpQjuRInUyPI+IBYF2cLxY}fy)ETZO+REqSEt~|X-;jhrs&RW&R9mdP1KM%p<1Jj%~Wb9ygn4l~mHuO4Z>x!n>IoYa?n^LX~n6WBK&!oGPT`{qO0 zHy_3wSAE?_tbG~Neb@_*^adE~M-BTW85{3>k?r|Yw6n2anx=#-wjJD&ZN&w)Mds7R0ONNb#>Ct#?VMVaasA zjqy-#{p$o}NyUUL(n01RjqCh2+WA@1Xjt$+<7MvFTd(z>Mp`b)r|kt+IT~9iN0F?1 zD8F6WGigia)`QTlOPqm7!eUReW{JAJg*wO@UDj2z$yfHOrJnVS1*uc5Q75U72Q1mn zy$?p6+O#?)_&rO8_z(3}0^xN#z;`Fqjqe|kx>;}Fn;W(I$(U2@+)qUveXLKCI$0O1 zl-aD~!7ubWF1gPC1>erinap`??rP+2+?ck-JN?jNk*kszvw8G$!V<;3qC#w8WZwb( zAiCi~WMk;pbQ^IB`=c|V?g{*N@?Mj7d5RAw@u?!RaC?+fJNstlg&Z~}fxHfJK1uYK zGKMPQ#dhT9v=WzJ`v#s#-usiGfm76k>a=oYdouT2QkRZ0^lZtLqk(UPcgnbNFmB$q z-sJa1vreOY^wsnh(myWkO`Tjhk%2{zac^ypK1D{@xSTYOESSpt9o98}6Pc?$=kHLk zAHK-*ylshZwgvj=I^kMvatCZLpckXzlF5fCJT4WFn_;HlKGq9 zT_AP%$@v@4{N=8i|D*FazQ_EHJz@Tq(3UcPu?y*S{_gtp#Q9suSTF20e}{C<-%RQ$ zYeDAk6SkQXy5{f4q$BfJ)^##}lNsBkq*H3l-!#Vdb3Nv7GHLbfYjge%k@gImzp;dS zo4*UlSITow*!=Yxbqb%qW}U+4Z-DtbCO)RCZkFEa#$0_y>SoMesh>H2rHBW&URV#QaSojyZob`QLs14*ALXJLG4~U-CBQ?~tE0e;a$5 zzXPPNjQLyg6Z7|n>$!JH=ri+R7433pHfP~k?};8j?4FWT&U-==upf+({XDT@Y9Smu zleq903lUO<~1B@fv@ zK>xX8M&H=#oBIx}eru$%Nc0-I&%hzxcZhetoN;4q7P#K&G26mC$Cb$Y=YQxsu8i|P zBJau=5cz4tCzch%Xct?N;;y8;Z_5~9+{n9eKOC!(cW;25Y>!rK4`q*bBCGD201s@(>W=UyXF*Z@@%qp`>bp2 zW=-_lwoVQxkxoy=*OLQFS|nhs#L@SVp+7_M-*9*d z{^Mxph6R5c@3Z^I?MB9FS);l)_vx%zKz}WUrW9JEJ6g~=Rw6$ra~AvS&sR3AB46SE z_94i3#7~Cz`}Uyg;ch$ME>oYL=ShC;X>DDVjSjTIir$g9SMHZ`-#9)^RmMI^-j&<* zI-Bn&8}D6%(5X<~7V5N{@BW56jJ>$_{6fkzoL_0}&Z zw2UsB*kHZU??Vr_X~B^3o2&`r#g4fiy8a+>w@6Gr;qDia%AX!l$hzx{?)xA5-R zaMma2`C11;cfgOqJG$EJJ;nXwcgl)yhEK+xE}}V-^EN%l5k8by4<|78b(*mUn(80U z+I|#k+EGeiBmLa4B;J1vx$bFvBh+*5pkX!bXz0FHk*Clc^ooI*8cmM%-uWzg1k$^h zcZbb%s6)?hpX}w^LgU-fylY|IqVe%_mB1(ov-Xhjs$tYK6->r|19rkPo_uY6$4Wm) zTQ{`@yXj-|=$BIDE!j&I`wkcKMCnka@p<~re+A=(@fEGjo4%fdvVO#t`NzJj-3LIs zu;JMFW~{rB@p}_G;_J{;em?g+_7TKqis*obbtu^D{vdMjfXnew(0hc zrO>x!s=c|6HB$xW;yZ3_$}FYtvtO&JZHEpQv%itYUO}Pa&~%!q*d{=Kk*Dp|4^T(Z zY3{B-{$#Am+$`Bk|Iju*{A2{vj@Jl1?$Vpw!Cl*=OHmv8lT)(!qQKc)}kPt=D(r z5;R+G(Vf+S3++y2e)-Qlu9bHyx)HfsTlzIv+Sg;FZ$&TA+ZZru-b0a{H70#2=f6hJ zL>CM_GwHy?)Zq=r+$QG48;t29=7TG2KKzsSGno(Q;*I$b?e*5ke2Dhm$GZmP9&|t5 z{0z;6$968B0e_edpSTu&Q2^h#207ts?(&S(c+FwfJ%<>(qKlL|+{5_YWQo?~oKLBz z*i|K=Pb}jc)9MP*e-71bd_)&{OI5JOa}r%-LzgZxwnNe|b&+kff0mswf$mb~3$!|r zHAL>(zH$6k^z+iMxkp6re}msiz2sc3Irlg#)2%NXcsKjdv6`ERcQwCL`@5n9 zm3>T+QR-ROx{4X&$cjg4A9*M1(YnuqHO^q)adX*+D?DGF-@=+R?e@um^y10LL)`IB zI7-DXPjwVGVdr7P&xGJJ8GDjfshnY8&J83VNp~nb$HblR8li=vOKm0Y9^@DH9}5QJ zGvOfrZ|9p28AoX;&JwY|kbTfH(&aAFVzB`fy9kk6!ry0853&1`cXqy$J!NFL`Ox3# zmEh-)CjK3YBlx}u87(|c3UgHAjF2{o)$I%=9jTY5rzSl!Ej#JXA+DW$6p54dFt$mY zmzho5mbi2|)2x@&=_~p{_O-G|SMn~S?qdI8BTcE7lrN1mU8I>NdoxS1l>w8!5*bA1 z$Zq&gEBc)^q<0zi${T2>B6zXHk$TF{GE-y#xl=dVo6CC{A5HiIi3v+%HQ#^6JKAns z&vgF@j;>)$326qPJ8AU0*yKbEnSB3d$}DzO>;Y^P+Xb-^(B=4(`%4g6FkNihs86|k z&w4`A)%G>1gIS*~m!f~|>^n}<&+IhzbV`)@>68^8P>FWsmSX z^n6Nr>woM!F70;o>OUGboAhrow)Ga;!yI3Gpn-d(&SB$Ce5wk42%@h#a=jHA6M+oM zIVE@rZMH%5sy3Nx^jQkys%RE;a<<}Ly|T0B9%!4at2md455Narm-D<9>{vG4s_eHA z_i&+-DZ2DHCARu$+F#DbE7T?2R~q?h^h@@pg6KSgQA$AOiRd`vwEazNbv|OB(31W7 z;WhYhhOS9^`nZbqZsFZ7i;{V$(3vS~LwwR@df5AHN5&KRDF@l9ig1TDCZmwBP2;1S zsiJ(crj|Hu)JI{R(QWagIl~S$u@OF&V=) z&cbCgABA_uQMW_N__2fe-^!eQnzG40n#A2eTCtY##K#mx8B-fEa_}G zo;9|G^6YO$MxstLBusg~B^<{c+b3&BJ9*0e)yM9NfJa2aD^9$}`UmvphoO&0ppWLg z^4}n92ZI(*5V>g;cXtbKzn;8O#9jlNGYMZ!IKbFB$~fA^Su~ZgEPI}fjKi<^nco*$ zhiK<;e&n0+e4k()F+to}S~u z4eiJT63%^XCF@RmwZa;`30s@W{N!reME0`O!GnuBgBOXsr7D#LeTImXmgB-yeQ(VZX3HUaYYm3&MI3u;!i(5VL46p9J9-<#>CkWC2O6Dcu7&d$ zlMfBx`z)cW!iPv#;s|dkhvo|%7d|AsS@?x~lM7D~dbXPW%Hcof%{EB>xk8_2DD#`3 zcj&n1m+>s?fs9SFV@7!zSwsu znek|5ExW1Px^yuFa^cDMAO*+>CjXJiC@d@wft=)vyD%8#7hh?;Z z@Q)-_Esn4`k4igG)&jwLiz`0hl2psjircQmwVn{yC+h{uA$_6cN1GQ>4$WRdjVNw` zo}DcJpDL?aC#j#*OUn7bO{-@c{ZzeA%-7nLFZ_6ev?Da(8syjcYiM8lMf(4U^z8e0 zGj@eH(f%DR{7>x>zg5Pn_TJKA*2V0f@N{I}^o(cYY_g;~6WJ=+>{AU3rPa#bx6t$6 z==c$1pWmFT*R!whq%JXxap=^TlhK}dXwO^LVXRSxy*&>4{DWnT#?$pRA%2}?e9jiy z8SmZsVVLdDa3#<(FwFL66Ld*zpWsVZ@Qubd-c#Hk@{LB*_`jR~tF7wxaGEwu<_R<{ zO=wyYd*r>)v~XH>9%)NFpU^Vs_F04vorHGjK2=rZf5m&9(H@^A|8#q3wD_cRTl9X> z_|Mbl_EzS0Z}V2>bsx&r+q<8-L;48omBXx8geU8|Ta|T-f=n6VU5V{)OT04fi+EY5 z#KU*v+4D~1U848?kFgcR#yP`SlQhZrVohQ<)+7pHb4|in=(Z*ieU!G|f#+Tf|NSaP zx3Sdh)z!G--N=?c@_I*6Hj4e>QDj^RuM|GXv(P$J^MN*nyd{jU)#6p;XYwq$pGDR& zXELV$NgZw=j|7LZaamYef5(qxZ#-c#am@9BlQZZhPY^m}@&w6G*2H^|ITk@nqznn- zw^ZcxaCw=1S7he9O_^EZNjv#|18)(1f~-5>KMHH0@Uly}U!~lB(ody4Ht`QiT*+T( zBD8C|Sw@jt!rw_bg>OYmJt(K}HU+xzlj|J#OZ{e#m-=v@En| zDt)2xJ$R$^Z;PyTKhk-dyq`q7pvTLoUn=hpO zL{}#CD11Ko3v-ZZvypXlzsfE2V;Oz3iu&ZRj=C6LuVAyQr765i`dZqKezQ>)vmVla zl8+g$p0IBgdLwM$9p%oF49+-tBZ|hw^W7J$r!>0Anp4({la2>#sPh=H?bhCNMovqI zHrB=#cp8{CwoX#-WOOj*H=F&O!O`{>#3xDu^9EZg(N}a~@!t2z^Ihm&JnPzbc=pEk zPsVqBKgr8O{P&mgn8isyO;bo$LVa7Lb886$Bo{4u)IUJ|cToQWRqH-F;28Mb0oq|(dWthMPj829w1d}9UDK&+`b2dQYsLL( zrzofGF!6e6Z?nyMefKM+_isvi$4{ll^dr9opxYv=h@GczI5yqd9s_=3`YHiA_Zlnu zZsW}BnasIoEE%i~3X~uu@gL%-#=Zyov8U<*7e)EzXs87|Nfz0uHTlv zJDHDT-KVv-M)G7R_U4X*>g&Zbo<5L2<6rpc&tI93f1LX|^YACb9j_xDE8A(esrVUB z!si}q*XgC`$y$&(=AcW9!{$KFpjieg_#=<=ia(Gd?nRkr$7i=4JB)|RCzN8hRr;{_ zNwReer+x8L&bqJ1Eb0R_E)s%>FBrn1Ae*{(sb+ zdwf*Y)%VZLaG6Pf3MAwLnuLfZL2a#EjK(*U#7h8eD};+mO@i7^h}I%_0WX=5+73o- zM^H4WC17ilsaP+dr1mKRdkN6m23u_(`)GptI3a4wJwf8U-`_cN!XyOnc|Y$T@B7Dm zX6BrIS$plZ*IIk6wbw3L#rtf0eA)MptUr&K!gaJ4s1DS9e870=xscAOj_69l|D=1i zUPWF;obg?)z!e^rgw5stH%tDY98tC%HZipK_r%pOh5+zX^4wq_;O_h2&KYOs z{_Ujy)zo(%FhJQ(Y=x_!!2`g0_#MgQQ+-o-PBT{c)-Ln$kG6bE+zdLB#eP@Ub?yb4d6H|dP_Ty-*670i|Ueowu7 zD7ksSvq!jeaoz%0TY;_Ji_ss+kz8m+N`@=Hg0=81dtRIaz_Oaf}CXNhV zO&t8dnBxTMmQ6H*kK+@vU?tA}>)Xw{G`=WI&kO?7=RAvrsqQ2&ojV9j|5AIv^xXk4 zt@^z);l!MZM#vm%H2yfjdqs~sKztmw-$Y`itTl9Eoa1K2Rx-D%$Y&QD8Vj^(nxvGmWq`2WIqR-a@%ErX0F5FL+aknv32_O92QX_bqIAIuo_z1cH{|MU(|26;8*AI=`vcdo%a&k&5BXP}QTew2g2V8w12 z!C(IiSgP!^440p9mf<4$v3>OE=xH+sJLg7y)7EpYx95+)6AwAjCodZad!tQmgZ)<2 zPjBkfvh%FN-s9w)V9i4LxBKP1(7uuR;A}$?>%0&jcL9Ej-P{V4)>7cT6P{ijhsc$d4Tts z7jhMwx$N0bk$14*O%G=ia`4N|LVs`@@wHvf_}Y&PxnqFy0iP0MeZu_$F=qpgaQ}dG z{TA)}fwl7-?>!!30(GyNSIlzTIeoMX>!EL{F zuBrOm*~D2wa}5r=vG}9C5p{ zJw{u86c_PE+^#C(s)dVea4{KN+zKup0T&+bSh$89%rgs&;3#mRTy5THuC|%g(OhkP zxbUFoSvi;`%R9lvEY3zu1{dmExRBjP&&kaWKJbZylh_YoFu!$8Vhpr)r}u+Qz_ju#0-4)g8Q#T?fD8`N{HcM_!*yo*!$k)1Tjt?-m_9 zCwO~~z%!*UjvvoC8u;8>i!KQsW-rvvzDM+Y4gE!Ga>zxK6Fh{zacKTf^DIyLI}3hn zRQvNHm7LdE%IEf>Jx|_L!JtU&?2 zd%P>Yb_6{Ai_m-b0DAYFYtefexe_(UVPMo)6+2O+@oJpdZ^eV!d3T54oKK92fiB0I zd4vDqDodwozv_yf#riLuEUHt5segd>q*on&7~ApE{%5&l_za{M69&cT58$DNlsy#>y@dC%v?H1xr0o~X`2PWIPd-W8lcIE) zIiT${o}Z3~PM}VdhgyAdW=J&m!t1B!p(CmL{|gT_fb}#y)EvLfL%+-TPsKyu;Qjxv zJoG<+eIU)n^3WjfPsc+8ub-NS`lD?OSpsWc^pH#~^(4 zr?n^Y(OW4mKKjYOEk24q-tVmL>G)_8_0#yAY>o%sQ=$CociyNwwGz8_$aa7YC)REt zoh+{Z-BzoyhwX|;_Hm}8aadxl@*T^r(2jj1jLjnp+ey01e_7;4Vi~^gxL9S=@wfFj z$JJ(GH*ZDPPDjUYgI08|&doVet6$@#9ifLTdj}IvSvuYPlimR_28edxp%;ybH zsntDtBaIQYg8frqe;(UT12M`GY&*c-{cyPvtSsk@3j1W;=TS+SvU7~4#oU)r#rb>r zz7%`b#r{mXW*Pe1-Pn_S#1Rq4GOeuo#_qMisIxv{#(xiMQZ`iCP29bndY#+T@1{kb z`U>uq(3wcJulpG8G;KuO2TA=h`c)jr+5FZSex3LAVZS-qJsHn*o|xm7n1I}DKJp86 zAdhX|KF^zZ-mK@_cOS^s@~`_E5&Ci|D{<(3o8<508aGb2S)# z`ty9M8ICUBSE}m*^rIY;d$8I4ls{twJJ4offY&(kM#M_lC2 zqHH_xSuhR7)-ViPx+gS@c^}SaO_H(ZAp7J5@+lsEpR#=H_{bT_oQ0R12p0D2z-Hw5 zz_>l5x;jsJAv3BuS4g}cG33Pd-bf5@DNjE329BS%~m89Q!yv2r_3J});wPTKI7 z&&zGyeqJ!YU`Thyw(sTTuOHf-QSkk|jDjEJGmZ(HvhuIK$bLwh5xFXu*{;q&^JbN$dzHTNz!u^-PK2EUCAWgQJO z?<8({cI9P}cgWXVgr3YtW4nhjg@+e3Nk5V9>>&PJHrxu<+A8L56?B>9Ce9lkmmfXj zqqxH=-mAQMzJt1nJX0I85^u z4JCJTrie2RBbrh&j35iV&!6<89b)dK{C6-%uMO!vP~-yV zuHeB;Yd@|tga1(cFy%XVe!&=I?aP<<9IC|T^p3Z=RqJFlce}!`6(6u>qSLfhH*$}( z;kcpO?QpcgYs@=S>lwHBt?K#;ji6$O+4q#l9wD5Bm(FM|PRF;!JaoQ#dC%1Qz0Gmt zopmD{+|{L9!pyA=UkC8CXL@`73!kqI+i+kz5`|0pz{y|>4+7h=C~T^K=p#o{H~tX8 z80mvCDO5=r<(*ZYz|Vm72K=zZ|L#5nUXBQFd!Sd=R`*0luxGm?=o>q}b^>_0i1nIA z?g?Mvf`y80e2W|sI!Cuo_X;9^!;Ce-F|KwUb{f{j#+7j`mjzP2#N-+q8=U0nyy=Xl zaIMkX0N=0UE)nt^PAQGI?v-gLSA%u-F*W2O7s`wl=b8XzvY40lA0UrB?pkMbZgi&0 zNttSF3*TWZN5?vv{b$h^_k(cuxLLYE2l1rJ118+o5dTy~o(W8E6CvvY)WN|jNoD7>E4>?rrmL7Zg>|o@ecB_Vgq+uaV9Q~JDl;| z*N(-u)cN(1P3_<}rtD*91<40K#0YlYF1hUK%9`Y_%W;g|RqSxrg2%3`3j=j&Gsw5W zJcOao&JQ2l?Ya56{j1LZi~ajZYQhXab1FY8}zLF&tL27dt6N4)_ETJo`?P)^xaiZPq39Y>W4DIKhXyZTG{H$lMqWNhDHa~Rcj`a(~ zoSECAvB5h`o=^PNKpEnv9p4~)I*xTK8Dh_U1Mvt0=l*B+{mZazeYFs#6}EFl)p<1Khxvwj>%;uFpjO;QX7;T9k6m z-$*n{Zg64$;IoCCEIxQ_gV{5Uszg0EBG;EZb{e$V6ztLzqXE}L8aKJdfdwVr4C^W0?d-1l8gmVVq<_OWf0l|RA*-);Xz5&0hDx{4c};-3m#c3n|vXBx3t*USH{_b(yO`Cif{MXoau-E#=(Cdg3s=R|0*_VAn%3O?u>iE z+db1U){Ir63}-Q|J<$Ym@9kvX!@Ibb3t8R?o!NR$Qiii_3u88juUwZ%9$@6SC-Zo2 z(C|v5`BmPxBZJoRzCkiLdR{>}XYD)%?sKF0K4Z$)s{H4Xe!1UFdLUo#v%q8Hwj4f} z%-VmFm`u$Pc{Qd8Rv+@+$GjyGH@0A3WK-lLBiCSW;ddGv{c-3i%)j!9cjg?`t(<~`7y^gd!*&Adh}pA4TCKG)k^%$cMtm4Q}Yz0w-z zH-Xz9Gfu^-`VMHnOs-4LQ^l5@Eu0M)WB4_lw?XF8=X3O@vzS@~u{2w~H&Q~f<0Qe* z>LPC}<2`bNx2Y2yu#^2lc$m=}AnvMzb`%>rMrU*=^V!THJqMV-@E|-aeaPTE>>t0e zc$nZ&8_46SqZyO>)BjfHTx|~IiR?)S<%z$Gwinrj%m&_Y6kZ!P#kSad?N^jlu7VSI z8Er>uCY_d#-9r7-@~YP-wzZ>wD?V!=ugZewGy!81Fp9slj#{v=5!`0Ot2$E|H#*#Y0k+=sV{ZKb$L_By`Rx+{%*hyHPc0mAbR=C zOlR;j;?a-dpIpeAYCNZa{JNgX4d`!;w9!Nx(`my$9DO?;9ayxBFR%lhyIndrI=t?C z4m|4W?wn}!K2hXtj*uVfx2fJ0vL(G27uCN_JA@m%!8lX@uJ{kdQPw>vr9NMI2Z9Ta2BQ0_KcK_^7P?;M@^I%?V(4}LYy1-& z3AMlBd8f;h(+$5Trl~i}#Hrgn<2Rdj2YaV{F>cn5aEpW&DR5W1^q8(oHKw|<0vKPON(LlsX6!>8c4CFr@K**`eUv(?sq&P6;xF@0S{pI6fN6`Td&%wv%=xbfG{ zgum}$Zd^t%0bYSVZ1NNDALcKF_w=T5x3TuiZ?hM^oBktx>(#cCC7J8>e&)Z`-Y+BD zV)x5hud*{bc)kG~9ErEiZO;3Wv8HuEvrOV*F^d9VAZs?(u{b2{ZcY|!o(1rT?9({R{cYCmdy1!(V(cWVZ z=fm5v*E*Qvdnr>!yzRCJyv^J1@*>|7L(hWS!Em^UF$zw}s4(rjM{}mKx9m;F{a$iO zCx*yzw}O)N%iyusgPW@}&&g zeja6WpE34ju;1Rcf^lGn5IoCHQvNHHZwH1n%=%vNF&rEWMoZP;^JH`*+imOitNP&k zfVJ@g?GE&fx)s02`69)ODW8P)TRA1nEBooi*c&HvcCadXc5pIh2le}G&JM~p&%GGC ztI#J6e(R2<7T_7o9zEEzc=q3_TL4Z}S2p^JVMeabvt-e)`nS&`1<=hNLPwg(yEN)l zgOdREUOit`ICpve3}Z%qr#+b9oBOqO%Vwg7i{FiI|&`BKQFICJ9ztibI);2Hvb3ahC z`)37BIf}DEC+yFDaXiY-){>mwoyTwngbT^H@`d zqzgpti?)7)kId8s@R5O^6J!6R%Sb=4_e1&E^^q83)wli{MJ2e;&F;C97lhxvTp*1?%-`%xh>DXa%G;6bw3y! zK6p(9JSY~Iic{9}5Io~_{IWdrIKOPjfM0T!zZ`mA1?=d8%_r+~@c_PO_gNvnCmb>l zDpLt8xAIK*t^}q|V)Vj_(V|QS{7LP*2k=$FevL^RBTb&#uih=#9<=T?r^w@2ejHm* zcRxwC=x(zN?hzSB`Ei_)G;u2*E;8Y*#hgiiF2q-|7;kxaj z_Q{(QyrjqoK7nqv7M*%Lvc8X=X$RZ7HTFSp7CR??=w3KB>7Rb@(RhGc=TG%qGAMrr z_grz-#Lk~8ns3Jkp>wm@w{SiKohS$XUJAZs;}wl%=dw@CwCEx*2n?~bC>WZ!FMS8R z`+_J8>R-=hkO%5iYdnLq#(ir%N+$#5ujV^;%+>VQZ=8(pZ1M+@uijeEjO}Ff91C}O zv>n0f#?~VH#;S?M+48CEf%d}8(JJ`tL-64llg8H@+=C*WE^9RL9o&Z^|Bv>N0r+n$ z4Ycorzd(--d`|%%HqX-8?^bAaBD}X%JUwGP`^c28&b6HTC#=S@6U8{98Z`?ly`v?u&Aj=+j2Jj|^*ufQUlWyQSvqWH1byLg>sk9gf+ zI^VI|i_HCk_@S*^b^J48o_SOLqs~mnZQZ3#_80y0Aep=nS^9n^F{*rjmvKo4ypyp> zhGmg2J8AB?`pv8ptrf{q-%axuYAqY=>D}z0Eh}sCHskM;&(F&8a~Jm? z4?Kt6nOSg~ozrA6y+k-mBc8%Q?n+0IOj$pypgW&fR_VapzLUs)3`N z{&a^-%cI_7=uN%kjp`w%d-M5>MbH2CsCUcV;2`2mtlUF8KJIAdeznR%@K}W&QO+2o z3)nh}o4LZiw;MX74)zSnrO;L)`feHhHO3{HKGCnxzs~H>k3J8_C7I82cy8A%qrX+u ziPN43KlEP8%mmMsJS)Sem%+#BaO_H@&Yi$QYzuoB<#{<#w~2*G`9I8j7DY~I(8*n? zCzsE^6w} zDA68Wxp3`g4V=+mMIAT%buslc=eq{X`O~zav-uYR_h(BBuyezcnHLZEE6(s>FCrHs zb8;tb$Bix!d-t1pN?{53BPv|o{C!OF0Mtt0QC@OwA-eFEIx!zax(&cttkdKP}infR^h!!P;u zqV~&N@T>b}B}2HEp%1^)gkNM%Yuq>!hwZ{e6u~$4KUE#QE0FE!G9qx!rT4&l`CLn*thaMw0f(IDI7v&#sK)##{{)B(|1^eM2K9k71 zFnmNYTjIS5*n?yFVmi9TLB_4Sm^F``@<~P8ZlV4e)Q{~?_J#T=JSUSmotcHV$xbD? z^D<*v18mu&J+;#rqilM~d`o7uUO0cDa^K0;Q~3_3k%dqAFu7c`-^+UI2=|1H^ae4qE~tIjnuD6@lkmpr%aqLS&6A_uvNI5#`riEWO! zwtLAR=yC)T49>p7o5Vlvob9eBmTF28=e~}-i>!eswvIs_F>dJ)1>l>x*xdSg!JF?5 zGxi;3+-~M!5AxQwNxV;;AHbPO%G>Yx%==S>+}$Mc6bwKx%$lkV%-|~4iguIF0fht ziY&hCn4_b|B2)HY7Ytw*ls&PW`84f`im`*1zsuNLK8s9!0{RM{Q=IK|q}J*#Lv%Xk z-Lmy4cIXo3Jj|M%5ua-MEIOm>(?IUY&st79dhd3m#^Yyd)}AH2-I042^dX%n<+|kh zD)RcZv&P1OgQLXNdTUzq3Tk%bd50Sty)`@Yyz&0aic67C(5Ux)BUpex?Gy40cx(Qg z=Y__L;|rRLp?B#^#YIEld&GQzGqtaNQidAWE=SG^ug011XXJzQ2|r_T0iVh&r)<^@ z$@S~RzxH#+3|zYpMW#M2+O3hibs=w0o-5JH$>e08=rHdmPhwuPITP3Q=b%aelu~+>zxy7*TJLGk&TDQ zHBmZQ%VU@Tv{1o4KX#`X~9MxDt2eM2c32|2*WSth0Wy|AzbB^BO^Z{E`9UCxWv+6B#M^Jo6XIZYCZmIV~ReduUNS(AGC>9tiIX2As*Y zlC9kPSF+rdT>E4G$Lbpc`QH)cvWoJ*b-FhRdXnxjHOl{rO#YYD&(3xCE0$lV6aE(l zUYq~P*CX51;@-IW@vb=XJ&fNqli2*3%ug$N>0ab{0ehc(Xfn*4_F!wUaq_{j$l6wO zuH7N=;RbA$hr2ADU5%U+4iRW#|LGmFy+Z2U?=~fRA~Tf5VI1TZ&Ad z$EOT^U=eYHKKMf``oK2yffn?ER&(CQne*1ly!lwa@Q*3w@SQI)M`6aZ1DFm$TUm}o zlb-DPnaliq3Jiz4yiF?8LmA~cAU?t@SEYNVfFUq;f*JQO`khhbuD|$PcYXK_|6^JF zK0qA(a_CCDAid`8yo{Q^=S{5nM_yLV_1(oasojM&Pr)Ca%=6Z)&GRBxGWc|Yr>vUN zE$KDy^86=x#WiVLH1F`yHe?{VGm_2xc?XCoG4Y5UM0AxB)l=_^k)1wxNf~@a^^5t4 zZqk#8ql9K&0g~&_23&c<8)1!v-&Thljqlnq)Ygc;I4C=V? zr9MQx!E~qt(6aPY>7K$_9CP{vFzdI?69VAL51yoN-aFhNPpyIbC#ANIPb`iC;HVlL z`J*^8kWp)+I9knEi8YAgsK2hcZV(*(1RP1{*7#y|`N44XJH}w^^1_j=%S#Xa4e#tW zY#hb5YvU*m`p_QE*5?Q6noYDLU2_d%7SC#DeF|qg=>3T5Dl_q_RHi`CkTaU4)M2Lv~C46@W)Ccq~RoJAxlsJgOKSZ5{M2 z9(5OZnZ{au4;`&PkFsJ7z6Sm(C^Hz3GJUQ0&Ol!(Mqj!Led$WFHl*-wEP0L+LLX7)WbobXRPGy%!D{zpn!bF=QO&? z(2sXA=OkTDinxZN$Baae8O6MQiTOQ)c^=Jtk3kvV{<9Wx{C~#$sC~tIEg-fD95jcW<73p$e3^J86;XmNtjOx+V=%eB3>}AlgrB@g5-#$xKhCMr+DgUW; z2C9>~6Ft(S@k`=+Ro-s{+MalZu^AuTW@t1a6aU2he0RYg?&UiJpQQ3Fzf{9rvWzc% zX!rD8JnupNcTc8mmv#P0uus5FyN>_%d8-EWrTdbN&BuWM9+$CMx&J%Q<$m;$syD&R zNpugnK-)R{(*Zq+W;%y+mlSlMeX1Y3%{puhA=ntM0qvt5FdR?xdc4ew#+G5#GX!N{BN_kTsE2nO5MkWc^1>0=tQ zQ_pR=n#*&^1l3Ld)Uxlyw&(xElAE?n*oj?3vUAo={q1^Z(@r__LD zneG>gwQK3T);juXM{YuE&{Jwx1Nic@hg!v&PGi3%eElcm(t9)C#iia})=lqG?jYf= z(dI8RzNhvW!S1Er<|ok8Y+MC&_5eKDHnD0`r{jK~D6anavW2TK^)z15_juY`fL${k zyMv7@(a!<;7are=zO(VAcOMJ3Xd7j~-U*xo@iUqBV(p&)q|TG<1tUw5gXFLBaTlv? z8`NExX$E+BH&XIr${pZ-X3c@!hkRuFz^!Njn@q5g-=a^&qAFgglh~5j`)~_@srTMF z@VMFVx@+Ni-+=dho!p>bYwFCr<~DeuX>ZA+ug=V0-FEcboZ}h9w*UH<6xT5X9;iE$ zt=JC9Is;i9f#+smzdo6LEp{IyzS#22yoF`VVL*DdcpLMVORl?tc07DK39p#)Q9hJ} zP}M$Saq%H&{cUIMwQ64vsBolIQS0ooD!eoaY|WALbD;Ftd=byE{XXiN?OGp=aAhaQv+0 z@f_!Ne%8*+-`{#RvNE>+Q;o-6XN@NoZ(Thn;w=_GryEZ!ZvIYNeYlBpG?%UXVcuY} zTX4%p*DYORY;tWXJm4v3YOOV9tug5vlG9^NU1Ae^ruUu6yKXkiq%fD(9a$ejOB;E2 zyj}|RbQVVS#to`psP9j~+2FpE<9B+SwtwcqzT0~Y^V1I9?164fdr%DBltF(Te4b?d zVfKyf>>p)E7JbMLC7G@B(D3{c?Qslqel7jxKmU!+hQ8TA+Z$-RaR5!K?e%=BX}giO z8)Dj)kIQbmfORdK>FL_J{S@s8HV1o)FEZ9|o@T6SXE1-v>Ea@Ohul5L-O~r6_X{as z_h-!WG1kUW_=fJ7NkrF+0PAh+Dc|G!e7<3F$YJSzDWM0!DKdV#_5?@ZEd}Usjo?Oe zaPW3#lkEQ?_zr%feXY+vQ8%==VD%PXVtp}ov=;P?5C3d%KQw3mu>)me2O3Y@-67gd zpx^r<1#hmNpMXB8b5CPKi|I?YflgOmsKSNqhkHmij|tS(k6l&w8aW9+{Ij>|vm|d1 z{=)?Exdwdx5&8{3yS9$JH>;IPCh?ZU`hY97s|?+G0(~5aH+o|GA4~t;jO||5%d(p$ zM0MH3&@B26n0;Rws&HC;Uo^VDE_w8mbuZKR0c?}%TXvp9)NgdAn0YJhaV6015of}# zx12+F9b-Jo3Azn_e=+5|(W4Hc>$qL1{pt)Iq>dlDKE}E$MJGz<_lJzh*8MB-oBkRY zZ(uH}c<-fs`Ab%~>gw9*L%xMYl=-ptz|k@e=1XPDkg3AOXOz<&P4cr=aR=fY*J|qO zUai#79O|l`Vw}~#>Kz(kn%G$FRj51y|!fwsMbKV^BV6)l=^Hp(gK9{XOp$G=0Rg zBIvy(Gb|d#Z8{a1^O= z5pV24e#ar}8Yjv~$`v-S?N_QL1~efMP6jPEvvu=l{uY`EEbU}ttGbLW!F z2wsH#8pcl{dF}nyyoK%TvE5#4FOvQ~`NH67S9eF&PJbPL0z9pqJ$x(tEzEwtdZl+` z06w^qJQS_6tF1&=ru^f4wqsw@TwO-pC%2RTk2;e3veU_4*Uo<5Kg`(J&Ys^s9}}RB zd{=_$D^={OYy^YpiQjk$U5xjpo|tLri3iZBBUc&Cie)*BZu?Xr`ZG4EJn%(U8bbdLFtD3y%#$zd` z990ALzV)WwcMUXfHMB4bnwW{+H-o!xT&BJkJFaT%Y#*XOzQ%aP-%saTH)U(Oa&8>Y z3n@BsLPMgT;$-YJ0qisecA9kAz@4ToRkReIGjCyWIyxEiVCrPh#IKIP@1X(7M_VV; zURmdgy=NjDn1gPuXZX?xaO`>C6>MM+9Af{gJz36Ze0}6Ok*pS26W~X-9u;cuehW3B{yqDvcWC=kMn?T=1p)qp!pvGr-9wdsQF=@ zVpZJnj%M9$mIkjP7h*|cp)+_i)6sMc{tyqG3z)kqm%*8o3AG0KdW|#vmw7mo@2NJL zyWv^z%zb6-*;nws@tnfk8LQ`Cti5(MJgxCjd`hp+-!gFT-N0VG@oQQG)_%Bxx)ZAx zbPwEnFB)xZ4i7apuL6!|;ORf$yOQq*`Oa`G=x&D(zFB4My&HyfPk)r>&GWphGi%>1 zn8#ON*A4uA`))t<_Sz_8b1r*sa{eYX%~lNQb=}IJodLc5W2C*8)*XE*p-#>h`~;fn zWdGAX!k8jI*P@G_s@tQueRFS~5PE|5W#HG(e)~q+R9ma?)1+iN+ps;i8Q4b5dBQeg z&J(r~*#U(!%NKS-cZtiwgN3`N)C~w)TqkC=UQubS+(#5Z!Cv^Y8w9 z?VIWU0s5E{-D?Np5=_2uBhT%+Ez!MpY&&j4GEwcjc+&A`WVXu5Uk~OBBnF${x zPE7u|FdylWEy}6XVYF?<4(*55C8I4pW?bmspRjhtw`275*s2TIOk4voagB_tiI})XV#MSR zmLJQ)$=J|B`c_Qb2Zave*|6QCQ*WaG$D{Zj3u4~Z6x3XVo$Im7CVf!S)Y4iHU8}|7hhLhKY zv03qkoc#p1iV@kuN3z0+oUmm?tPeta7WsGOgOIE!2EUROL!;wb=Om5~9v=%s>-aAM zLn3)iEEt~rKZN0fL)QFi9J+hFg*%wHI@!MxU-jxlXWIh!tF;eG3T3fA-rGjOjeU9UQNKG6>?cJO(Lw!`QK z?W}!2dSU=QQFA3)jIifX?4&u@=3cPc zZ;!DZ3~Wa*t{MbJwILo6aVB?(7j@8<;;Q~S(b+ba@q6*@7U0`0#J5|FZ+Bq31>kil z?G}SqFL<@#qusWn=+IiT+i3GmY}n0wBtMp7=TuzY8gk``U##Qo+;-%48UBze{2^ZU zSKt4o6~B18{0paFcUFvL8EZ|vx|>hZ@PclQx1DE-mle-xMfNFv>x;zNmPTPS>lXHp zvt7WufZBBSk9~cT|B8LZ9?)#Xk)ArQgdHDy53r@Vte99kK6WK?<6^flo@jM4y zl3#*Lam1}oWGlXof#-j{{~{)Au=BsqTkAkLK91Xzu5xr)wP(tzPa;#+XnfFbBDA1< z5jta_+{EqRDtVB&$meNSagp$mePvz5QaKY(pqZaV`yPnb@?8_jgn5$roJ!j-#2oP+$sO_zFUV6-gh{rZ(_A(+4|i}>N~%G-|kcNJsOx!HtxnD zF@3XF8Eo9d^UQKPlHx1=`-&d?aa}nVK3P|}hn&>Jwv)$%GnwwL4055IEPis&bm^9f zp}WZkg>Nf2ygxEE#9ZvT)Ox-|&oA}nx(`RDzOLu_*7Hg|&-dnLk;mYlr=Sx%u2eA} zZs_)LVv4f;D`(4Q_6+pR8RWn8#hBv9vtvvfH~!CJOqtu0#)zIO#uQzCjdYJ1`a2np zA}$wa@P=Wx7>@nIjUB^-JtGdARy@9X?2-7NYS<6k=kPih*QO0dTX~$fNj`<~_w}yz zkM5pU1|Qbl53Bfa*X_oe;*5x%PG{75P`%oO<2(5~Ci-J51|~->~{pPBZ*sYm#^_ zn~e5v4dAB{-1rz{8ROjy?v|SP8`sr>JWw1%1MpXybC?(k2|u4lrVFpasa;Qax9gRI zBjv3;S=(we139h!Pt}gfoNB!Cd7Ug5F>A{7kBW|@3n{)w^eH=K8nIy=jBOx%1L@w* zAJ5p@)<7FQup+Mb?tH7!6*)9k!fCNUHuh9>*`zi3`FN@;~nVSrf=C$Ozb(Th*6d%AFt3_TYB_iE7|vCs$Jbl@Z7 zP7rWt`{d&Bjon+3f=`Yd*67V$Z%2J{A30xYdg`hD@`=e#dM7@yO?RZ!TkCy%=z01H z&_}0hYM)Q+$1ex!UVdqn=@XMrr|WHR)7k19I9kwy(_GvO^z$d{;^=eXH&g0;_?{Y= z*S!P!Or%fc;CWeoxbM)Xcrd&$*w^Ppp{A%GZYntd$G=!#w;4a2`rJ#Ov}^g&qGO1s zJ@Fm+rUI^{e(&OVm+XqeHy2Ngo&03tvE%WB#ewtq&@1r84|yh9J_`PQ__RDO%cr#y zo?+$OnLr)!^@Pv??6vYqeZ)HY9(-G|k4I@wKCyZGBb&bh9G3rdd}t5PR7dfEilI|% zp!j`k8%t?Je!0Wu`-IR@=1=V%yuGmLt$#%}D>l!9!Gc@u+2=z9$Eo)#76I8}S6;=6;{e*T0E*BULe@-FSJK=%pp@nTD!Nh}%u0`ttT zd%bc%DPJggv*ADY@LRV02I$q~f!Gz2ki$LTJOe(q!6`nL6dDS>$WD^=^4dBN?KvFY zjn)~|pI{qF>xPH6O?~p`6)gkS*SdW?Ub>3T6t7{{^xR2eonLbiCs^;-mWKJb~4`LbdBBB z;&krP9(a#)EVQgROwE7)b48L}O}}s^$bU32*O8SE#%Z3%Y(PiSnc#fuE1*Ed;;t+#gr(b2`jtZhlL@?m*@Vm)VB9*2d*5=Hom*nI7-P zB9F20YUX-!biQXY-=&J(WKWdMZ_R%`{W%!FbjYtU#zTznYQFv8xgENc&JiAK^xR3` z8oTtQ+kfF)^Ei9DcIdf5x*)XO$b9@S>iL-aGVGw`*g;ia_SjD99|C7id{XNBPVi~% zTM|MGX-n;BF4d0a=RDfcJ&3x)@nd*J7d+#kkvr>Vz8GKs5V;R#ki^2C#>eyscS1f)jv@8^3H@GhVO;$u&9cLDe}j2?H(cXGqn+RJuwSKrDD`tC*iuMl^s zY`-A4>{mP|N4m}{dBNS=JTH5UXXIG7pY7(^vPD<)EJFtiOe8O-;eX8S=nJ* zJ=jHDd7Z1CMlEJmcp z$TfzN|GJ1X`s|Nu>ONmv#+kQre9MuohVpZnc7v7R8k_m-Zq6HC1U;@t_Pg5$wHxfp zy6?(2J>NZUH^}lB)7N!hk8_3z|~+A=D+);H1HT*p0nuifFzt~f{U z$zKN_t8k^n=Qt+B&$z$r<;BYBmHn%FkHhG}rqQfAZgMpc+t<9Ax!H=nQEON_ZP?|` zagvk4l3n9MHMD(g6ZcF|rwcvH9;fX&QtU->jSnxUICgL7Ws z;S%Pfod4KQdx%Ngr2W?LW!B#5P1=^dRrSY4>)$~A*tKBiV2jPGuxBgz`S;TQR&Q?W z!N}BRV7un&&y!BJzc>Ib9fXz+K}(0BCB`9hA45@qVR)+|5ukia2sB4-WRC z7mml4Oz1Y(So!}7y3PBcoS|2*kE~txEk{oPdV7L7eVjS*Eq3&jZ-@_;f5&0M(=Z1b zzQfVG^&Df#>)^g@Q9M4^L~|bBYjm!0g6pZ+lXX{%^Uwot`o2@pQ?aM8`61>xog7Zu zBMf1_d^~e=Hp{oi*{plM!kI=eku`9LHK%bmEStNqJ>KZ`EgBj0J!|#d@-_MnIeK+2 zHUrwfmV0>Neei$IM$)HexPSII^tz;W_^WeV?Vre7_|_hB+@cGsoPGb;^UMSCqwlVU zk3NB(=+|L5YKfx>zIU0WKj^Lx--@_q-GyMT)6qpe+1TYI+Z?72I5rfi8P)GT1pB_n zZ_vl-?DOPVZj02M!Ti8$u&MT&AI>&JB5_UO8fS29h7mk4$LMJ+a%XIkqS^^De=&Y`d2jm-uHML%j(wt)i3hx^+bN^?A{X>92YNZc z9-xOc8=ha()A{n1&BjPit#ythDYOC{hCeTQQ_m-HKJUYqhc#EwD2JX_G?xx+tu0cQzy4@LMUqt{7} zgwo5cHDTt5$JV1UrfN;_F8uQ%;<-Idh2$$7?}BDs&K%KeGW5zBh1tKPoMevT^*o&K z4xhWg%(+ga-E$-6GAJN&xESBcAS*0Zc@vT7aZ6YWLLU}!jTdhfIMd!M~J`Yf`W zv#0P`#;ll4!E)s1;FC5!hL=h1N;eLJiw^4Vq1^l6R`NF;IjnNnpQm)w&v^Fu!bLJ` z<6dkTEzG6C82pjQrb1v{0qn927E@afcpr zEhcxVood#&#&xIbbM{TFC+d@9s0Uj+dE{v~(-AyxxAMqG?~k2)MN=2gGhVEX$t6EN zv>`&i?S=w!XLIfWop%O!L$2kz&p^h3Pxl$LM;m4?d2EcAGWPa!!38iPKPltwkscFf zkEcA*+FwN;;2Zi%LjI(sz{Ba+0bNcMPJnY@UHk!kcuR@FTLZp|D*C&Dae~9XyJ7DH zX2I0@S!9~*GwSak{jEhF9++e0#HnJxwI1F6adQ1@Z46Nk0oG0!JlcAiPh)`wp3}Sf z89g`iKYY`i=3?aUfnUz((S0a?LynCzR?e1Q6Xu!E>E7jDHSa;nj2C`^J=IW89qY-PWjbV<}gKJw|h=GQ`CO z*KB8vZ*Z`0vhwOkrh5e+eFpZh4~goV!hb`NtBtuL&aPyOV13zD(mfE?MxLDt*8Sh; z59`aW`Q42*^S2CyHB31hR+Z7&;aC`x1P^^_uVBMChG!k~SqISe&)y^NzI6`dN3rKX z@cmeM_iN5LZSGq8?snz8TC`HSl=ghG9XmtyH~-J?3HTX&!TnQs+Bb@tKSM^Sd^&a_gMG}mfl)SF_VvVWvd-K_kDL9d`(a1-R`z6B zndF)vwj<*v^5#On;<2fe(^+uA5(XAOdH<>lx92$uR^}Pp9VOqlsi)#g94x-&*zLb> zlwQ@Bzmaxy);6F0Rc!voJJ2(>(~q6M@jLwYfd}Pp{58)zGhgFOgtD+uU;< zKhsFeZwGvKikESK7s;aH^X4uLqYG(%+$BZL@TSU%_`!Y1p$g=h=1jVkn>z4)?t?XY zv)shGuy0dNcOPZUan5!rez%{#Bs*Us`ci{)Y%M%{&C-7Q67ikz`bFSA8pGO;X1B>U zYwAD8Z4Jk6C)Z5o@67Wj((AI|nQ`#Ncwo%Vjt6%RGY6aY`M_u9U{fy6I@Yhotyn7u z{dMwt8h$$M_oW}H42MX~_yV1ubZ|dU3uV)d^uOw3-#c|P`z75w^|xoSb+}w@vHmFZ zs~9|=)t#R}zCq|Q3w{gka>Fju7qv?#o{e0$!H+iU!JtWDOKGxRmu{x8SpYdf$@ zCVzfhPWZvYU~-}z8nokpN1HgjQF>%Q9Ii)B+O{?MY(#Idy#4duUVK1SJBm;F7h`UI zmN?+o9qer#_}1ZFo=sdXF~Np^WnT8h-sPU}8q3ofTwBt*joj?7@ZL*&?6-}c3COf{E^I*9-L&Sr zCL2wOtOe-=KZc&xXd)udfcCA z>O6Na4%t?v@BI;br|+C1E1y01tRVNxCFnf9)aV*TZt0nHo>X)m-?`s3%Q{tdX|!xt zv@HFi2L+}3Z^I6v{63~GG~C-HJ!tdK*niNjViU~g@J?GtdS35nM|;tGhcSjq_MFI` zlf~jN59XW=%(X21yTP$^sAa&d*aYDVSx9`Ir9=7Q5z?VJFSs@vTu6sfT+N;2)Wi?; zm~48R7jt(Jc9X4pjpj|n>~G}`@MYAI9mVZ9-0emm3Q%XD4dp||p>v33^i9lmpS--C zeys8IzXNeF_>HoJ;4JLmWp@?!`1XwLYvV@RD7%qo6OTW;PS198PxJOsedS8@Y&*{u z9e?(9Ju4g;j9-A=^8wyX9MM<4Snr%Xi!1Eg&*=<}#h(&tuLG7fQT}9>D|GkO%~f5U z?YiXnUNE-q%-Fgr=hXRC%Vw3My2vNrqT_g)#It4KRW^l9rcEIsluH|uwZ8$kUlfN4 zfAuz{C9o%RChSVP!q}I|de^*f6rH{0ZPJ}I%b+v!eSxtr3mu5FNpF&$d|w0n5qrwM z^?bYiR!&H_m%PfzJK3rm)E2r{9c}6Vdg@kepg&>=TBwulaPP`K(+FnoyB~X-r`E^4 zfo1gVWjv3ZWDJ9y+jC>rD#oU1()2gHO&<8cL$otvD6)p%anftug30JI;HTmZicu1- zmeS8b_WbW*BbO|H0zCV%Ed*pEVr?YB3zCsPhAXMILw=L!{F-GpSo6^W-`hqz((ea5 z^ES|iX_S2>xO6`BHSsIKCHfvvm}dXvt8UcvmEv1r)ulP+9c zHys}K1TX|*ZxxO*&kvSEw2;;lUc#K_z8|$EQO4~W)h7O7u&)vRKc%15C($GA^SBY0le0SxbU`O_ z+lAa7g4`a;Tp+XWaWVKX&^_2|oA^k* zKdF8^_$YIw)*i-xe|;1mDw7q1kGFtV_)tGb&<_I4(R+RSxN)I3c&~h@t8a8<_uhLI zEjI95F{25U+Z8j) zeENV#HkWZ#jF)o=vJw84ao9G(<1v*9q5Hv=&S@qPFZzn+n|f>6Lr6X=PAHCdM$NW7 zN744Y^@S^m<*&}$yvNizWP=GR{xF^Skk7tF_6_hLKa81^lKs!mY5(8gNbvT-g+1v# zXpwu>O6JmjGV^2iopOV_{%-m{8tvOl-v#vDLEr844d0Z$6J$;}r%CK4eU}1*`Ytg0 zj^=Tq??AL~#T_=%zv4yH(e0XOKTh~zeF!G~j;o3z=UQCt#e6(5W48O%Sk<>^#sJSp zZsbljCO&XA^DCTI!(*1QJ}tR@e7~Zci*{U3#O1&@+W&52$vkr% zf{U|(PwTRbK7HV=n6rfHSNDc=BI`{2_@N2!_Ph-CnCa{>jdtX32fA7ZcdkK`*^`W> ze2>u+hfEE99@#YFJa5w@$k6X#%lZ>I((mVyr|0wk7yQ4C-{ZBM6P+kRoL0p>B*ei4?hh5dR5xD!w6VXU&B=8G32 zzjU6!P2cl}69>ZF!S91wKSz;YW*P9Ke$J#H#S0Czkwl_45;uD>*+_mtAD@5+`Mt%< zBu_LaTBnV)PmZXb1L&mz;oFg1tGN~b8Z53rd)8CgiR`_Qo4EZuY44R$?%KDpBPL@< z#E1EqWEguQGj6M$eWdnzjZRODU2$z{TxM(CFSPE^XNi}6>{j+~*&|2PX0sQ(mFJ7$ zPgM?2SDoXbI?uKt#0{i&`IztV)lT!gh0irScrW>*l*6WxJ)zH4X!*4-qD|zk&Leu! zna>4(RW4_4Stiemjz7=fd6|nd8ZN7x@VpE=UI}s6)?TcqJA7>Lz1YeP>=(qVW-`az z|9Iy_aDe@j*lTO-h1S?bH%&R-rmW}~;~9teZWZy%?4e6iqGQQoEV<@blDo2ao)sNS zI%CO-8OuTX&1Wp>j3p~(ET8f`ow4Lc$Fi6IQ~HhN$VtcY^_a0pM_R^M9%n56s4cf) zR`1$%QGQ!B(`eQ{JAfRk$}x18OHU4bZPrk4aPbA$1-XMsJaGzpAjLo@W*BX6K^IBT zMK}D^NBKZM`|=a19oUzRA6WL~2kFb+Ps_e6JB|-K!C*S6ZOfD0L^{~1>?XF})wsTY ze*CcbJ^E4-yjFTx;D;p(byh|G<<00|re0b&%GARK>ZR<{`}ETDk-y55E?t5#_UU2C zbK&9ea;=9is+X=IujI+}(q+t*^f2`^SYG^Fc`rF&<;6eoOpd98K~GxWC+B&V4%PsV zvf^qTp)KUcm)_IPvw?Cd)<1g-u-m$qS&nsJ@5!$Kwt(;he7Xa#eLx=lNZ(na!DYGX z93scQ^5*6bI78&b|CAe}4^BfLOhz9xi0|{pz;s+6ObqoPQ*K~h?gFOAxrKEJJ;L98bGdldurjg`h6=heiv<5 z@P9JDcQWpI{NBMD(A}-A@L$={l?Oojr{K`o1jjUV!}o}>Xn}v(de12O(=+KkeP?A3 zN2aWzp7go|Vzj0*<_6B)xF~DeRpgh;#Yd_3)%GRiJ3ZAHwFhX4j&UEk@BVL%5xKBi z^IvPuU%VNAR>A)fz?4%pqQ3I}5%rQ=4cxD?zF>P^HTsqxeG5Iy)VC@b8+q=`IMegs zp$}hZG_6B_8p-eH*{gqzd3fP6Z_{YxY&-KXdWg}Z-|65z$p0@R`}Q#pKjJfj-;Yvv zD<9o|Be;aqPAB>Ix*}7A&ne(DF^bP$f(IL)^P=+=J5L?tZ?5P0K$}|@`4c5$=vDK!@6kPjqGldL(v)SsC8tRna?Tu=ghH1jbq1 zJ5U9W4KqKKYo5$Ux&O-fFI;9XxiS}BbV)mOLrm7}o#-H?l=t($G{sor<$DJ)Kb}d- zV<#K<*wCBkb&9<#C;nV?kxjq459P2cP-mCf87))5-IN3G3mZ(gj1*g=GA#ShN96W+vpWLMXwY919kn7_kVQUTtpD6jn|`p}eB z=meeE+z*hu0-h@0@$B#5I6~OHodHFp#{EkaBn&Uj)+{t&IHD|_|XI!+{F`oPS zQpe12cyjQmFAvvzv&rW7I~vfDOrL3Dm+0tH@lsc!?B?_3bQZPJ7T;GBKlg#WTn;Cv?}5=AZoz zeN^psG*`XhX!h-94V1E-@4l|;{tQ-?}iS|T$q01 z{!0G%x!G?T@hlF;<$;!VV0$vxI`>?OKjc5Hd!Vi{?}pTP|8^Gi$auqX#6f@)-5*$u z9YgQlh`w|4ZYq0}!yi64dkSNn#5hg-QFcAL@8UPOyB%3*((#|@n`f&38@s;S*ffRu zho~?6a>7VQuHL_5zaM`5E=eyuLGdl{w|%dp8+t~laV> zBK61h)n5#*XM*ce=C2x@nD9^HzCz?@1@E$XmoHw;JXA0b*>kz`7hW9?eildPaXa{p zW8Ts=9&n;>jd98dYiw1(COU%0Z5Ga&c|X`ZmZuvff93s)ykAUTvo-@8^WMdsVCT`- zI_7&jGGhdNWl=_b-U=P)`ATy5l(Kg^NW7b3rS3wW9r^f?*+)Kk?v91eKfF8d`RDiN zUGv8NUq8He|CS3L*}dTT=k`CD^XL7^&wsrCA0I!m`?^n`-=BN+8~dMnIK02^;lS=+ z-TK`AI_{Cbp7*bLjDO!-G$}ZdJ`LtJmOj&oXStbq5KQL-%T{1n1T5>IMK3w$_8yA# z-AU>se~<1UeM#eUc$=U5`H#mbj0W6V6$UryqUFBfDO&QCslJU9*l2fliHcd_;#JMGquuTjHv{!Qr6L$m9?ok z&?r26Up2Ba2O6Ep*h(Eql@9)AYu)ukESY}`Jc2uY^emfa8sB8<-kM^xt;;a>HIg%Z zGWE|t5;@~LmA78J(KD*H!IAcsa5ds+Fs7E@fwBx!$Fk?{W?xml#Lq{)oj><{C?sJ9rIkX$?rWpK1?> zuPkS))3~FPvQrY*r3OA9KHUfJkdCy9`7vX~z`53*WTE(11?$I+EKHLu6u(DyvL9`Z z8)n73c&cygM(%EMZ~Nw!tZJiq8Exw>IPK3G(LE|C?;h%|b>k<>e$fbS;(d{})UA2N(HCRg|_p6Q9N^+H2} z;hVsq?=#Le)(Eal<~AQV8h~R2?P2fZUPyFCXf&Sxt%8d=u;o?eu1HA%a7%YM$N6n!fW>?vdu%MYPd0k-3Ma z+F8p!#vZr+il)V^!;{@_to3cL+XiR@8}lX`?nU5Fyi$97ecLj3JI_mXh8$ax`hE2R z!@N7t!~rxAVlP1eZ_+RL)?Ak(yASjKbY~LKeg9&|XorbQl`ZeUqXpgU2{zezeSM5C z?GcAbcWH;1W7$(jLEG7mB|}nfT%oH(qjlEW&SWTjt#7 z9v{p5j3u1eKgQhq&pB3%xp4P0?(MwPJg+pnGs!yVumRcJIo#WGkz#L&F~{yanzKgQ z_w_W<_C@d&8^=2wjjlS~{z-q@yd!8l^lm7pUG011b-gx#a!;I0F@I5}EVfT2Wb^niV{-Mb?<>&i0 z`=S6ocHd}YQx|)KO5m)7ek!6o(GPvb!3#^LT6dtA-Ud%(tc!W(rOnditZUt2`VZ`L zd_Tl^O+9ClvE=fUt5$5IY<@Gcl5dp{^PbrHN_5>4e69(0+a~XceNQfc#k{X_82div zer&U^2jLl<5iR{4u;}|k3uoyu_12>&FQndiJX;(OztA{(EZ%0slq(0;*_6|>(#`Nl z>^vHqmv7C*C-nKS!0OIjJ@!>r*) zf_zvz7a;rJOR@5HjEeFFeSaC=9^tw3Y|hv^Ms^v@dzIE0-_;&+ZBCW{&mDSVcVumg zVNCNP7i_$(2fnj`TmOYSjjxopGN`LOd>M>qBA?A~a~>s%YlqH0QYZfw?ypT_d_mS2 z_vW@!NB5z6S$C@_D;+eOv4}5)SzoIdONQs#rp%vN@LZV+Uq3W_Iyf6+x1E73@SVrn z0hWC5<8iF68xrq5Dp`5}c%;i!Oq^rh-ReYN!>-+vcwP1uU!Ao^e04_i)?vnyTFU!I zU4D<-k@8iall>m>xGyp`*>~ZxX8PvH!j|;d`8@p&Di~S@%h_7a8 z&qy13=Q}qF-*V_jV}8W3q=WA`bRq=9rZ`KN$}K&_UBx{AGHWYNcx7L)o;Kskq3KD+ z=EcbWIL2eXyUx19w-kGZgZ)@MWhS3(Y*yRtnfP*t7#EgNSF~Qq|E=6%qPB>qGxZFu z?}mxMBADQJs;9o=fvHOS5Xz1Ku3ATYZDmL4Jynhgl?{}ewffo5@Es+!(F+cik~7zSwvcv0_eR$;ag9t7KfI z&BhY>K{%tBGb?jfU6o_}UI#GQyj{AMP|f?duy>0dvRa}z-Up7i z9>-T!%wBsN`s@j~=e~J+oWz}w8{BF;1DLnc_5tLoWLg$`fK+PI=02{9R7Gl%*w(%xD;(?E3!$&;u5yf$~fCI&O zZUYAz`&7oRw)XN)a8%*1YTsh%ap(Tnx#qXMmfj;=6i|M36c??p(jPRIhTJTFRpk>* z-u(VothhjxMR!~q)t6p~)t9!h9#vlc-*>~&<10T!D-XkGcxn*wp#r*L#rD=@luZv%ub4fA6I>_Ib{Tl_ zfoJJy>#0xNVC4q+U!(Yr<>w@D*%Wxm;_rc%jOKg6k;<=R&#UrNnV;D5gYo|o_(yCT zDl>T_bZyFrrbx9&$<0_nJnl?-IUa`);AV*tVXDwx#E% zvOC)6By2nOEFY=#Z_zXbjC$H4fpg_h3r$WA>XjW6Tg`E zS}XdVaSOKjTa4yJc;2eL-rSj!yt%cq;bYS;ODSIAU@jj*-lwtuXh5dtF#fn(T)DO8 zvv@OJX9PTN9kRy(pN^|=<;K(2ENsfRB6A;hocUG!Qp@%17wC@x>PZJv|I#Vrc~-?9 z$j=(Rjdr#oH$NS_?)t5?_Xhj&KRQOluLG9#+5@1I{4{0$3fbONcQUlFxZIUn$@=}@ zl)Za=RP~+zf6gSp%p^fT5J7P=Aay2DTa^&fuALdgHVM!c8gzBnPC~up0^N<8r*_m>2)wF|A;)+V908&I**Zmr$zl4#vch}c5C#n#U6`TCqWVTeKZ z`^P-aoO3?c_xOZCqs%?$W%m0Je$q}2_0(M(9^DUw8i?Cn`m2db>U!-|KJv(YK|) zWUmgdC$2#MZ*o5Ju+{?T-b!d~SZ&kJ5NnoU`Wntn%l0|YBJ+sQug>{X-S5MDaoQRo zAETK2ojK{YN}RR=*V^@s*lcRk2V94hw-Kg3e5bZ0!(82<=x4+-w*`p}|*`Fu~+pZ#ETbMj5# zn1-p=*(1Q(_h#xKM*3{i#yZ+aKOd2N0QY{)0~*Sw?YC$VfpR=OIKYTC$PoV4I zuhuYjNDFJJi0!ZIr~g@uv-1Y@)HJI|>wJCZM*E%N3-%COOL}gwdaf4yz^m3ovRU-M z0Gm#8xD=bEigv1@U*BWPOS;JM@dnFFysSgakGjbDwZmD~TKO16dmYf0%6m60@9Cud zPCvBEd?d$NV-^cnnLe&dv9r9TC~+z|xOg@g<_06pLXTOADaG z@I5m=&G*KTH`vxE*sFoBYvfM!ejdIqa%-eZjLrkDqL~rqE_=LyxK8P{Gp_N{MhF>k zjo_3q&YCF~7H7h1+f-}E3F?+ow~V?W>gtS^*7Muv`!-!Xi|@V`l+sRqwpC2*oA{Vj z+zmeCZO%H=z+*q(5Qe_9=Pln{1)N)mP1eq^io6S~^ZiqZX&`H^W3A}T!>7f12YabP z^wf#nB$_I})|w^$NU#r5c0)C^e;4BprQy)!w@W&=5c9Lo9{U46d`ir%6C7yVtO1tz zpgZYNpJZw}rsW5g-L%m~8zaC{Yp$5+40wj|O@@J`%cG+C0&rv249vyE>)@es?7MR1 zCS+Y$Tmub+CFALbb4`m6d#s{PkGEKT=JKqRXL>JN=LCI}(}#X5!E-ryt_9Bp%*AVa zi-&KI_H=(D+Orp0CdQTHwf+2Zazt}_=k}%VpmyF4&54(4STiDfFR(B0XW~-d8oWHu z+R^b3PE1oWx#1#*K1{uh&f1`}1$he$#K)bMRn&y-*L5enWP2~_q^{Z9@?7fRb&R2t zIcfik+PU?j*ybeg;oR5*zD4kr!6(a*Cx400Rq|gsu!FzBc03P$d5HBh|Go4bKt35+ z4_`*%rvP%N68lcrY+vfKi9|q z2y&s3D|udPMwD-Tl~uooc>1%9&Csjn4Ll@=b=6$GdbUyV)L2OlxLpZu3sWB~HnI|! ztRNOAxgk3tXPsj!gu$=sqFW9=04)`vuXn&RUfMmd+3Km{nR5CLF^?S{D@XX4Lq8oC z;6IigwdmK!StOLv-i8j~1Pl(S-a+cLV`qszyq4F$!{c2kJ@sCGdv7i@B^eCLC=-t0=JoEn-$k-0M1dnX};=C zmUp0YkJjCae8`Yn&82@?tUOocZTj(}S0l^8?HbWw6a6Yra%6h!Zt2Y2v_C(a*pH1r zz?*}B=PXa!aq;E!#{r}o9@#_O$${p{V||JdR-;=D7yy=lYUM}6nD_Tgt= zNqms_(0IJ=Vfu4p28z`tuXf^hijQ^hzZ~4wzDN10Wo~)@XeIwwE`S7K>>KMc z*8OF~&lLT>Gs&a$uO>;76@u&B0^* ztLzK*H?I8(&-<=)p4(Nt+eBUDn8uvV`%d0lAGVfuF^-YHNZYN9gKT{Rj}Y`JJF??) ztKwQ{rwh1i?1RiBw)(oU%qa-fvO|R?>DE?TkW4(ygy% zZQs?-*j~Lx=Yi#n4x)EP((g{D-@S4TWr*)vyxRv}x2O5+UUblQ@FJeO5BXwo|0w+& zeIa!~?YGnZ@3QXSrT4$P<}15WYX4V!_XHoV-i@Z;jb77=U(q+3KqmE~!z4qD&L$^C zGNck&@-N8CRX@Q#Ir!c@7eh{r^ZJLZiX*JaTz<>SJ6j$$viAC}+C6w<*C%>t`!VgK zunv5X`K1`M_|x^vOO6bMkUj8cigVQRuVBqeS=XPhvR49&3$1M5`AS3%|OLf|5pU4Jvupx-E$j2`|7~on-uEa`cJg4*wto9%y zCC`gR%hc^yOdYN^c810nl3{Hp!W4 z)<^-?{DbK4iF+@k$0LvN4!b_G0C~f@JivW|>kZ%{hV8pN5gohkL5J7R6Yo8nD#C9z z+nFaZ_kSn91AK@sE#_i$7XKx8HRiw^=I^E5t2}1Cei?Nmz@$oJ<({{@7B#n;`eBAh92eI=e!^Z&rCauXDnSUV9(E;%*&_%Nx znJ(ET7#Mv5tRnN^OUkaq{2A(PZOL95r7w z_cHi<8@N7zEzVWX6jRLSf1GE7#0f@`xef45j6NgR+V%a&5z`;fEMfx6=NjUffz=h| zh90x$UAOsMH0jpWnaq)ScKs^oM>I1C|A+??w7CtqZ+ob$1b<}_-*%t7pSimBOtOdd z{v%~2OMdWbg;6 ziWB%nMQ2fRHPo;C{cis@v5pS_?{D*5aa?8k=L4%b9cvAoe(8KC#K^X82Tq}1 zrw*M?HDA_1p>%hOGt89Yh zCD;IEv9T3-F=L0@oZkvP?%?}#OEq2}vNrr1Vy(zp`IGwa=SY7@7pOfSvNrr*sUk13 z_7TS3u#qy%so^2?2>MrioWK_A)LJZ$ZwCy5U)EuMJG$v<@=}$Pir=JU+jRM$a?49? zZ^6LvRMFG4mD>hCH#@l)SAf5L!KZg|J@$;%ce>rqJ7FJ&Uv9J3H1sfc#&WrpIF@Vu z^mOaur*;(%Oe@xUNn?zB0r+FfG&7&V?PVn^ZL28075#-gP>eT7+sYXUJ^SaK>=`zC zU1dYyO0q$^S~(64$mlBOCR!Ty@O%z)?E{vq*&DfT0q&ADX1#qF9|-LiP_~pcH`inS zi~CNuSc9hnw*Q3iU#+&mLq55PuS)OSf^24;D}6CF78{!aZnDuSSy#ozJlJZs&%x2J zl%Gpndx$z3i{k2uAHs`VV_bb#9VrnF)ji9a`4n=K{}%rP$nL7%R0Z*BQ_stIMK51P zZU*KfbD&YRmqK@0|Hl4fWJw?TR(#yZwSo3+ zj-GJn{!`GsZ2Tboi!QzBq`!wo3|-A3mlRm1>8huWeC)k>%FWFw_s%5F$NCrltwUQs zO)U}cz9)~5m6hBkS_a?e(l_mltpi*1S@83(j7?>wUql!FndtY`;2(K73T*cItgmi+ z?gMRk-%icAe^c$|sm)F{e&n(nmTB7fgZa}whU~RFD zwM9E^3a1-rPjl)N&8FKcMb~)2rD?DJvbNkOQ!_R)ez(0+-d&_|0Z-`<(S8JaPJ&;> z{f(W$JMmz{O~^uMD$aa^^kK^KtexsXizfAn)TId5EAf8TOx4UiT^Zzh-A0bzw z5V_ZYEQ(D%6T7>Lc^O-r_JxN+c%Xs#tc3OofusHpLT?T5fW}&P^~b{vc5YpaXQEf} zVJ`LLcgx@7$Vd5Jm6t8JiPvPGN0Yp3&aSt(mT$KeN4aY7viAA+b~trnTtmdH{)6%% z+VxOha1yT0MN8rZ>$}c4;t#SO_-C~R?LFk=O(;*%`KFD|qd9X1fzHoe?B`q1H$K6+ zAU5ZscsS3DJzW`R(cFN~wseK!BFJCn{#oW88jJOG7F$JF`n(_Dvstb}d`B&W*8{fG0tbIzaT-85sCVwQuf=R0}tl?<7O4B@^8xe!`RT=ODp zDYoM5Bk;b;#J3`c@MDS|!q<~;ag9|Z*c+b_u&rdB9Os(PRqOi3tFhOh)luvilYdBk z;WmKY?6`{S9LFEF7u^DWPbF!0FS?~pIal|vrokr?r%XRK<`MOe{79k;bFT8NX@Fnu z#rN5Y{Ha5(j*`=qy8s?w4u{ZN(x33)Y>hjRpdLERzyZ3BNuMz;FLjJf&9fwRs+p@P z&%J2el&7v}eBkzscsuVq@L3E17x@;GQIoUm!kqWA{e4+yu|$W0N2s-1vVLWfc2b04_CG z#rC7XQDtP$B!QnB+n-GtjiHpi>t_59MoWB(mB2UotWORz-de`XH_-}^wX-5NJbiu> zYyGpwsvN|Qi)CA!-<_P%xb_f!)T|l!FfX=DoLA30Jo_=fSv-%vK5gj>^evjpgO7Tl zK~ITevsKO)t{5lt5dCUCl2P)JXpBC1Snm_eLwt4uAGYktB=l!!9vnsaZmjkQ=zg#G zfcm%kDmEwZg~g{h`0q)-*L@%S;-6w{ANJS4m%i9$tv6KvDf-YF;a1kujf_Pwj1p(4 zqQ9rML^suuN6_Gl8vpN8>&b=DTI)7!cj&klm~$@J?7ckC;eG@6;_LQwdz#xQvQA@F zyF;|8`}TDX-)n4x*kQuG&AZxkOgPH3TH*!hT@w>NLY!OkY@l3>as!NOJih^*A)1_s zGXtx1yH7L6_Sca=tRtU>&sVVrY8$cU-04=))5O1?p}pufl>G~MFQ==RT@<`Hl+{8JmyxPi# zR;FVEAEjPCusFVvzTU*%qkcc|(!Lq>QN{oJ$*GkNY3#jeLwVeb1k!KX=VF4hBA((W_CXLfNtHr?8D zdU}4}>0W#KiF~aYn1kTBpZ4803~c5hBLpLOwNlp+V1=(%ZTCWl7I5n2ndqmNvi-tY zTeReU@*1>P%6pJFmJfIu-|yM-M{^N9NIs1qOY^6c`X|n-J&1k;=hJVCm9Rg0iTV%H zhhP+k9xT?(k|TZiM9i~##=`&Jt=K1AugtU72q(kLt@ji(#Wfc^k3VNldY;tWxN5Eq z>!2^-)qXv5XMJnYS7?f5?2o3GQrc7X)o@w3?TVLDGp|Th>PLH`2 z-7kCPDW7#=+QQ0c1}YbEKc8>f@A^*_^!@1T79Mr>+wJQ~)(>s*cPLwqd`4apy1bm{93 zolBPvu(mMIc;C<1^o%&66VnB5dY6Ojj=&q@&j#pBvexKfvZ5Id|RyP_W_TktU)$%-A}u{wAnYvda2owZ390`6^Y)G!2e#YRe(bkV-jqn zXYg3hiW9$h13r?i&RG&`y}+Xho(R#NXeB}UOgkfhEbjmgnefU1Ue<4>72n04LBXfV zod;#3z(~5AK=G-(ZEV@+5jGf)0N;<{|!!A|2Xk-?`V>D z&oCFa%>i`2+OcUzW4eg3bo`2ap7c5J?3r9i(NN}GH2=8mG0&PQ8)keXzs$(1ryuc} z@S^;$Yv?nQWc^0`%k;el`xO~88LtfBbC=v0crvv_cxxm+Bm1SAJtQmenRSrwecIEJfYk#eN#Ytc>jqqIty~L^BSV$74aX>8GsbsjG|o<>11^GFS&lM$}TL44gy+JL*U# z7}#)i<;9E0fANoaeSr4N_<@Pw9zZ5b7c0*BspqX7fr}j({9bfw*5{{~FZCi%I5xGx z5x8-9|Lk)~UHXqPci5E&{3}Un7_{igA&xNZG1s9bSTy$OSuBm{fTc-dR#)07> zI9Df|RqPF3hz1f$3clYjqa%B)q|)AB-Vg9J4BB4ygqLzUPab<`z?+>yyw zq~)Tq%_fJ9)0b~FhTS8X*C3tzd*V;Db07B_lN<9n1P{fTYX>x_c-H@At-pu0h2|1v zY<4qi`hGtSJRl!!bpbcYopCyZrqy>5@Gv+gUOvb(t(^y7g&%=IKd_Gj z1JO|(c&bC!)v-P*oC7ayIkVQvXI&t=lK*(0MFaF@8bE$f2R)qLAr>tgTzHP@a&#+U5okds1)b{2f7qjvKcyZVn}$JWx{H|S6Im9IIpAe?6QzXh2fKGu3A ziySiPn`!mPDEuzoDXY5t=#CSM$c5(HX42scXOf$VpWr7v@65%vcqxrD*3#l*$$x*E zhDGn{%at8DX}e)P^rAk+w}Hjje_VxA<)It@EOt^gJgOYI81quU_we0_j^opc)rb6R zk|A!HLE=V6R#HA`?4LQ^H@r>`9D8C%z=z|Z{!-GynNyNICG<&@@lh5HdKOrT+~q;jl{{-MmP5XwMkr1 z_xi@XatV^-b(R~O;9{P;W0cQbV@mufZOhQ^9lVc+p>yUOr+#ok{hu($z#3<*qrJXC z@dNV|T(yTZHO_iPF_#8LDo8x5} zhlQ=vaFc`6r^!W;JX4%Lz}h~^SPgtx-!B$C8Ak}%guzXi_*IUzZSAPlac9KNIgvc- zJXbEMZ$`&4AA9Npw(mrTV*AW<1wM$)j3bWiYavVb<1^W}CAw)JK9g;~EiR61hi_OP ziT5u5{vE}uO6)c5jCp`Lq~m<^HxfrD4pJNAEDYsD^pTIDIG5(B7=rRM6eH-PF+uqPHU3KeOU4g@#}?Tn$Yo^blDLolq5aI*UwDB1;>4~rCfScw_~RufwBJGJ z9cZm#cw!xR0an^aRryn3_YQqSuTKF_>1KmZjj@b6JdgW`p;3M>w$G?+VelH`-i(KM z?+N@cL11O%B>3oflzBb`&2a6$oftm2);nWQF_#YZ04jf?lB=HAF~){RpqYKZ1Nd}s z)i`8x4>E80(<|vuwzvGuXVK3|;Lzv9_bk?ikv8g6W+m{ehR&*cSw|<*@*LmCD#mGO zB#7@J4;cLj`9ZnRk62$LmpcA~@>*ZtX4f}9k*ZK!bt~WZu`KJHGFE+xw@cUI2iWh# zal4RZzP`TG#1M9s{$N^h99#|)N7K6Dx2#v=L!SB$!FaX9dkvy@+K6!dFU4uN8agS0 zzMlhLdQbVY_+b-#{4;0nq7O4KbdqqNBxV!`7eQjC;$!tCd!$-xI`mxiQDU3uvcCcz z4K)rwx%IVYLVJj8+S~@sZKF-eFN?e##l~%DA}ATif6cpd#JciZKi+Kob|4U}(Wj5%sob<^$EQATodaKPcU8upd^=1=$>2B5>6 zoON&jy*>nOXY#`B|4tknI}F+3$b0BcvaAET3jENatH6cO3jLUQ!&d`8Ni7lVES?GP zily`YJ4%$MH`6Kwu58*`L66SIR<%l z$LYMr-=0>U!4P!5VF5DgWyZ?9CYD`IK0;n}(3DMnHC1u>ZM1oiHW_PLp3!Cko8d`Q z20v6q{0q>PuBR9;*Z+oYI~h|u>uC%5Ss*&6KZ{tZa`Dt}-*(zwP1`S6^)E69+NRAt zjFbMe2T$)?{PeDEJ=2OC);T&#vR*I}U-Uf=t%GZ|)7a|B$O!Lko@*ani1q6VyL59d z{Z6+3!|eSK-o_kwTC_7U7O9+5o#m}{`J3r|A&NV?XS->>kvw=2JG>b^U5H&;!1`CV z`$pQ{hdt9ikJtzHCN{ybtHF=Tg|U|oWAD^b=3M>}oA`{MILto4Ug(~w(8^v$?Cd(>N3rPvlr5a+H%*L z9_B0iUvfRX9XtR3y52N7v*gY(ZoyD|--+%Dv2K5f_@?w~0k|)hO~83NIp}`!ELv8u zm&EWN{4D;9KvM~>vMGnz|H&AOg+Iw`t=U|eEVL1Z*kLPgbM$L7 za?;Sz0%QR6(fHBHVQ=J-d_a^NqJPo*WH>T?AZv!ewKZ?@GBwbE&Wh=P2DDGDl4~5j zUxi(LA@J6FjYIN25C2G{9(jxnB%9CD^(5`fpZpFo4PRCzHqB&Xar-fFqn|49Im~=J z!I>}U#3=-YJw*&L{l30{hYEJbbQN7k`HP z=2Pf3Slp7*u9zm@m8ig#t#>1+OMb zvk==^d4AM{Z9Q4v~&^g~o@_cXb zsd0JUfPRpjr#R9u^$U4lN1oc{b|blHhfb4Ya$*RYRt{PR*Xqx(uZcajvX>UZ7uC>& z_V)JSGqh$lPMS9^8>yeRwbqubtb-;bmbQE*p?Ur4c3+4BjNt1-Ef2W1Z^KUi=S zJO#(bbIecc83X5Yyf^C{;RD(ZBFlz3L!k^`ZVU7;A8^!eo^7bu+yA>G8etBN!5NF}Py=U^awYSWFUgO$UbU@%H{MvTQ z{7Be3C76V-BId^4;K{}$JWuxA$RltRqyHOT#CIIHwftmbuO#o87SWW$WzK2l#gOu?vOLxh>H}#l@%{)|ZJN4jmQ;$6Y zrd|M9q`C=m9NqE>`qB7nXg30VTfQ4MhoB26>-(bz=TT+GkUbla=Y;ohRmGDy zVE9FT#>S?<;hS0i9~{py+!bbizWgUoGme&QY)fQxMW^IlI^QYkTfJGbAoYf$*D}ZP z#*Hz)kM?AzaK0D2qHe3skvkI~P;7M-K2ohU6kCm9yZPx)KK6Xyb9gyN!osu@p3-s8m&S1&~EB&heyp^(V2pAA39O@T8F4#m5H8A zRivh3PwM(_z!bQMHq@u|N;NPV&>9B2t4UY&`MR?f@Lp6}vXg$Ru~P$K+Na+p$`8A1 z9-c`~c~d^VQOiCE%6b?1&40y{)L)YKf$JTAjrSkRO77*k{~>FQ@@cEFZ!}K$Wo;{G zCDgTr+n)a4v-7mQ=dm7Ij2@z_)`@x^+qUmIsE=tbLZ^W!lMOmMu=6g)eOKL6j`J43K7s>W?OiVfI zvj39s**&?kYpnle??_)< zLOWVxoGbs_^owsO@q%Mt>fEN8s)G*@Tgk*}JAaj0qIkF&KV@Ug+spV(>>hp+jcTnw z!nZb}&rd6s3>?0MF@uXA|B`d9mOB5pP_D+!5C7$~;(XDlF_+%$IS)x3aV%9CD zT$;B20KW{2hPFB~bQn7HB8%T-kCV!OpK)iF|6i3?KMPo2GDnr!N%_n&I@eueMZUdj ztTABSr26o#4bMtW7+M;!>Q{+AW?{=>TUYXb9&7&b_s~zrp>$njZj}Ent^S?p=O?Km zdS@eJ7lHh73s zaSz=!;cGsHuh;M#_!uNVOLYb3KGq&)?BJ%7|1PiOrg^3D$K+yypK4^C@FSe~J}+Cz zS0}vcj4l1I0xol@V;w?<0ndT{wA@v@{fr5^%jV3tbHT@jpGAyH_>hlGV!Z-+it?O*3^f;ox-P3Uh)uQGVwV0Hvik$G|W>nLwg}Da(_(u z^ghab;(HC(q6f&ee^l!i%E|^TyU8}T%pv&Qlz*bElb7to;9gGi~Q{<@<|se}>N~+D(jL7Ib+I9UACKui2ps8{do> z2lU?D$v)3!WIj48;Cqky&tNN%Ba)VjyL`~K&);#{hfM%ooan>2h{+Z{N|Zmy!uz+7wyu3vA7b;Wb7Q~AI=?&S)M z!68!_#%OEyvp&bI&;Pr?no6y1$+1_3NEGV<a{K$(9a5H}V?{V^d3p8Tv9YuOpMW@A`yuU+LbLJNH%Ot366PN3c6*n`k8hnF>=YVA=aC7&e=iisASPy+@Z)QY1 zxQ_E-;YU*jx((l(D)KY-2ry4FR`Yy+8L^U7k?dG={}}X}$e`~8aRS=hN59*`&VHUe z#@owSmox7?)~)hK1*UlF_emB2hd$;Pcn2CISLUbGRhjAd7LKb9@>u7VRBC-g8J+K- zI_R*9K$!J}U`c+6*3!z|(*H`vs&~SdJKrPVYcDh|xmfx4sftVKrv=!T^UbGn@Z{H3 zYYgfq$v<=J`M#7Ynnziq)9HIV^U@i&!}7J^dy(8IxMIp0$s?V|P{+5lh~@c-{YfntJJhZ-|m0$gZsY`Ix}-g>x=|ra${u1CwLopp`UY=5zOT`tfHP^ zp}vf74>;?Y_KX;RAi7n$(v0P;vXW}-Z?}9X*I8R<^Q{ln34LdINfYly^C}N)Ol-mR zNoox!y(~ZE;Gf^^uk=!{628k5J)loiPJT@nXQBZ!Ht?=;X1$m8S1N}?eV+UKv(xaw zP3?X?YYOrC?C!Vl$&DGGoYk#)yE)nNkGX4Sojml!`+vr2|aUVS;Y3#xp+)SXS~Opl{*41js1c#N_36L%I)1e*vvXK2j4n^@K}I+Rh?sh>64kDPGp8(sjmf}#6tJ@YyjooG(@ zb~w|~W9Eb(`J83@p(&?q)@VMsep2&i?$QgIyWVBaUH5+EYMl4-Aw)9fF4-c!@-ffM z=gz(7tT{Q)vPPBn`4qI3`mu#?|IFU`FgPffLqEZ+`clRyz7zb>&tI(@dbgZ%_tnu( zn_#Zo7b`ixEyP%cr&(h|i>$VHE;Rm^Q%+3b{DO&fLRWD;NXU|-ombF>P&lN_uxb<#(kNF12=R0Mz zk#>Lj|IAlY$61FOJhk*7#U0SMDPtBtxXPhM;e!oipWJIXRRm=Zt(rPGsQAm4`0DTk#dUU+stZ_Nvr(bfI4-E57<+2Q&>!)ljW3+ka^7$FO+3&(zCjTeL@q4D6!}n+I zU;AWl*K}y;IIAQ>Jidj+8Re%g zi*EXTVRX~)KFD4>#`X(z4v}87-|`pGj{ZL>n};>fY;;b$hx0q}B_1a)a1j65alUoX zVXnS9`8Atp<8WGUt!LblAF?ZTJ(AW}3!!=K#~Q>&_(#eQq;}iHqj@#$gxNHGv zUc4*WZSJ9W(fP}gqBz6tyeb6`oz2)zC8iN?)C;og{eBI0H5|lW8Nn?$Vjp zpCuc=oKb%wd|euQ5!?pssiT$1N8!Wm@0{hqoA5CRyoQPIhnTBN(}I=ernRQdKzHY8 zlkayJ*yvp$uxJ1lqJubepnVd`7ZB~Z?Wk{+6)j}e@qlC1v1o5t=lk2KpqlZsgJ!b_Kk$yH>+{UDX`$5y zUwqALkGwU^y1t3^yw~UWb#-RGd@`N{-yAwdohoZA@SxShnYH94#yH>TkfFV1{8&D| zMLpi`JA8+TaqC;5Z{Lwk+9xVe14TP53iTAknlM)-({II zk^JNvc^CV4dt)ZXiblSze)W9+@zo2K)!|{pkCd zwY0DN!+2W%ac1K|Vvnzl*A{=nnN+}YL8m)T>V=j04boKJ|dcd752nEQU>0MK|f-%9p&ebC5; zT+X9&&lMB?yzFl@xY_9B9te*i_*=1a;{OK?-@VK?aB_*`0vq2IQS+^sAbzgD)_5l1 zp+@6ntm}B!>_@h9?1Bt-0^B7A2nXfZqTc$?GzC`&l@DO55iZc&R zLfbA4d>*-MV$&a8u`KWyKJXy8b?JflLpFI*55QYh$jjb6$RDl=XpMZ~0|97O@d)M@M){eiz?51AM;r`CCYws_TT=Jm@=_q zJ$L)va-&tzP9C1tnKkrROZ_oD({$pdL- z9FheA+Nt7MDSPG=H`I5%z3lZi_6B%f$(S1`qq(h2%ZC+$17}S57^7^}d~74((tH!V z&G~jp7x^5KbiFR}IrRTq1=x4U7N;Kj&H3MZffGZKoM}c^6f!353zqCvo4r@s_28qT z_iOMR^X+{b`8^rE+JU$Jmhtyqo#uzDv&=qa@pSc`sUnk8DP2ZevMV&7d~9vArw=+-nFn4wQzk~4Kf>?Tdp=guNld|&tcPh~9&P20c6cMFb&JywZq zIjwQ4k6Gqy8osZ)bFy!FV!s*xna+yP_gX5^8<}_-0#?G4yL^q#(fxk7~So&>rY@O=TKg@fZCK?H@Zf!N6xzTLXRiwMn-lo% zk+aphf*+kHY;iV$a`E~&kLq6Shk~x`E3pe6a&m@sesSnZYiU1wVZOxs%aH-%^Zr*; z6~KS|Y#W_1aUu5wycccgySOFXYp<*P9~zH%dX#gZyo|4%=jC%+=C?n?_llVp_TNfk zlJzIAbM||TGH+v3twU$PyTqZ^ntR6gD&^aGzJJd4`TOnZD|?auoI!5(iH!2StKR=c zPs2OIS#t^`>$TroIul%;2ntR-(_Zhe-FM74!-?U`7LvWbm-7puk)i37qV4?%vpU80h@_>Tz&f50?YnZ^qQgLmj?a22e!nQbrNG4 z*mBD<(d{U)YY%IFcY9>64JsF0U#H-+^Q9MRJ+4=@Fd^-R?T}AuKyK&bdiN~F_$X;l30C+SZ zR}D>KJ6Jq3?L!-~<;G#K7&<2pQtx~-k=M{bC$v?|Sj)?b-nCY1nuQI?zQ`Xlo@Vws z7TD2Eqt|a`&7M=&_yKqV*_y++G^g2oTQYM_+Haxr({zr1CvkO~a-xZpa-_g}75z3s z&wY&B9iRF3Aoy9oP;wA@pUSsI!J`jb?Rdgde+6e;K1+Nu1#CV9+|OJ1?EJqaM)^B? zhX1$d`F`?!g_lNMi9_lvjsfbc9oP2Oy1Ib6>2>v5*B)L-eq?|?o=(GFFqGV6e8_kB z`oVN8miA@0JO#|3V(fL$MuY4v$~7>re%1$d$n-jLi0bmttMFF!uZS&BzVKJ+b7Leo zIKQWz@`>l!BZofT#`zz)e1Dwv|5AAU*eHFvaFINZ(Wfh)hoSZA3?2x<1Kz|f%Q#ygbrcVyhav9rGm9h%}<6S#IfAAf$A z^^LXU;+yZ3R+67tN-j=Qx?fBAQ|wtWFuahwM&RuY_RTK?r*0kVqtgAWx&ro`(T;1e z3!#OoYpnz8_(@OAS!|u9%fuJ`z^v2c((oTz=mM_;^gFVUGx4aSb#*!Th@+pZkAOd5 zZ$V47z_%Q|T3blW9N8Pq=N-1XrE3W`wywm=JGc^CpIyrt1LD6y(ZE%<*_W-o(v{qc z2fx{ft!ne_D)4O9KH#Un2fZg5N4YBg8(fW$69=wL&Z5pgpMbO3*fUM!<1|gTOFk^V zV7!B)|ipsVBzy zV;6kF8C@NxXV^bG4WG2YCvPWy4S%#@S93kF$nJWzI6M5x2kr2Mw0E4iefCAyZLXqC z@ttJb`I+>T$5_bK8(@5$%HN}o@{sEAhtx6O!r{~stye|&{r`)sL3Vkl@7lh7 zpTYaaPWG?kcUn#!S{S`2J5%3t(f6^h;JNJY;I~p0Yq?kXDCG-yPAqtCHT^2Dt?~)l zW4`%ZM|iIGWQ+GdoT^yC{q85N9o`F(%g7bkV%n=Ae{CRyUi<-mFlh4Elp~+K)yo>K z?B%U%1InL4hPN-YHvNKk(!FEM!Hc|m*111px0`c>az~?#yAC}9-yW!j2F0_|Bknkp z^xZ^#zTZCA3y(g*br{+816>zkZ{6b9$iw)hrFTNuX+GMCq}v(3j4}(LL1dcw4?n2Q z;T+K*u)yAEWKV;1@Da}YBd2a@7%(H`QCp;d4U*<5@*Mn&uTZ4Q7Cg}Ta z6C2m}=QH|N9&JB*MDIE|bNQ@y@Qv4Q-*p)@P|LUrIQzr!;23qli5Hq}dcu(@rCj~A z;pe)N_8Vq8dl*E|wLeVLI`&ov^UC}VYs(MrSn6R-UBEAHVNZg$7x}ii#o9W^?}vZy z*-`m#siO7F&(IYxI)R&89rQHq5`VferbD6y=G})Jkv?kVDxHv-=a4}6 zb|G&f$O!p_blyz%yqh=Y^FO4u=_%?X@8#nuK)#SibwKMscP-`W7OfM9u#d1ujx`{E zEbsEomd;e}!Jjwe)L+lr8a0uS7b|+Yd9S3I)V-1_O*v` zs_kONZ)gd8NnZqhhffB&k=>IAEF>=v{}TTjycA^ZA>V*0kGvegCWQ8SLUzbsIykLZ zyl>utM+@3E|Iz7cukmqs4qLxUz6o-{hrZ8y(V=kt{nSD2=k}G5xB48m5Nm$v+cNft za6RVF?l|rD44yW&Bx?oEIi6hz&$@7_!f#!}GtI|cXYT>Vdatu@597meb@?8x`6>5F z&6{y~fqh^eYvzAtZuDvRgmHR-g~p)!Qu6Zs);HIlDCoE|fbS&u-84VV!-gl{_r#(V z;lLVnJ^vNMien=QmLu@xD153l0(0$Y*4jGF=h^etZH~h0E^U4G+xX8}N5`-A)T^xO zX$%_E2KMA^yXix18y`LU+YhyKtTnxNBF{KGdlX#6kylp9^_x}4A}4ipALrhHw`$sP zWyJ7#mdSB6_vx|ZGY-*9l(SNV&*A6D3;Y`P@pGx7b?}Ga<!DR~JTs<@USyJW97AxApfuq7y0&DEl;Yn%T1pmSc#K5`=6cD%QM zoBnb8VON)R-!b1e`c8-W{)xM%FZdP8)2~z=RyhdB}(oo`v*XVRUo< zcJ?Hlhuljd6Zt=wHLv@EU-Wmq{V;nP`F*MUotR{d)aFYiixch@dofbX0> z*1W6N;~njX|NjFyr#w=9OJVPqJUgO%lY9>F{xI~U{hXq2?H$R7uaP5bO>Pk~KsKS) zca2;h{0D4-Tx+S`w*ZSW#ym_OZHRq*%I{Pe?Tft>TR6yCeccM3=U*@N@b~u-$>d!^pY7gN{xJ)O+glx&H-aLwA5X zjX4qBG-uw_PwQM)wb#KqT>4Y{#flj3-PT)6r7vFC?AfvIUhETcf#;%|)wa$ajFe$Z z>$;9{K_lCM#pSfy3NA*`bK96}J7p8h(b&P1vsl9^CgA^F>OkQ}WFhv+Fnr2-=x5<% zQQN9lQ!}opsT=y+Hcms{SEnug0&|Oi*Fos!QvOf;u7~1r)r?zws&iOu z)>Mkk#l})Q`Q>h>)14+f!X+jwO63TeGe{u)Rm191*SgqYeefk=CAM5g>By1 zbLwK;pQe8|7wL@~e{5oJse3)o{poMAyWYNI3URin#M!11XCub;B_HuZPn*V~Sla^n z)7p^zXzW?@)Q7e>xe3-_*d~ zeZ}My8)#sCguU|y@8qxaC9m&lhF)8y%C~~NgI))?Z{}V!R>i&G@&M?;DbE~OpP3krY)LnMd@1@p)^Nrr7yJ^o9Z=hO z?OBcQ2<>aVwAkS>t|Y7au~m`FAv{b8qRZo zYhiHC?gsoVwZuWo*}t=a_2o&%e{zwv>BL;C=atV|C8+?G!5=s=D&@ib3%pUBlPF!cTMeN)<&tvdEYl0AMMyEvgFZcCbYd2f_ExYmBYMy&(zuIs0)X=|o z@xk2}%)DgT_#Wk%QRBM~)4t+Pneoga;30X@1uo6Ef?hyZ^GvZf$?*%A$Mg6e?gnPE z?G8@0W(xLeQ|y_GSWDLf!#(BJfl}FWYqU$d z*S%o144I&@Xq?6mMI5J!|7!OjJf8_~jraX%iG+8@$IF=H14^K;#%Ubb456d`FKs+P z-{R}gm#l+R#N%4`zl@x!gtvI^txWS4{+MJOzX0pZx7qh6{NBX*s2x{YOGooc$T6oq=0uN9(7Jvh6>!Id%)O0h&=gWog!AztF_d-5Yo;;Th(eADGq4sQ*>SM3@>W#CZx9V*|Nu7@vVhxI9B zy!!tn?ZbbPZ()1>)skvz+Lf%Rbig!ix`I5=(ch0`> zW1F=$>h)Ns^evT2&QUjOLhz|G)qRQhvKr29@gX0>(Are){a=D_4IQ|; z_9jQp`>-v-JTr3QTIW2cBj_wYb1mRE%zx34=55pF*MZY;I`_D=l(~a5AG)p<`5Cd{ z3*zjb|@_iI>R@?W;CrDxF9h%47*7p*{+!7Gtd z$Q1o&T)q*jWC8I00{z}4Sq^Os+~D0kw?F=PYC8Tb)~hM@)edp3hOcG{H!eSlKCxK@ zpCTbU8ROl&x#lZT*l!Sd}l zpi8%ZAU$RVfLN}bD@BVd8ogwPDcnf9Vt@^`O z@z3!uj`xqvn?0{=b4Q8Oe{~7@hu~ed`CT#20pvW4xvazQW-r)x{=(27G`z&sVQ!8}IlK^uCN1BJ&3#uR zGmu9~&S4WAMX$}Sum<8M`9D<6kWr6a{#7QFMyY0ep7eofO?-0ijJUX4AyN$(8)Pg*6D(!3wvN+Gg2 z%?GaCQX#xgpd;U7$PT_YP|CPg^1kM1tj9OaTH;667oNK3=gQIA&NzBmBPxfl*JnM} zVz=zt7PQ*xg4U@ZvZ{r(4Q&%ojmeH$Q+>?t(RXmSthdH0DtRtiU(6m?`4hxzhtSLT z8k5)#vc2?N=YC0UxVdkVk2-5ba?fwye0QAx;W^FoBa|HiK0f%-$4!tBo;x=~sRR#f;?F zpGx`29LF}1Pjp-!yn~))ooRTMxRaq5WQ1_1G5nfblKj-P;y1x@7yb(AkIl3{0&nTR zqc4zS(0o5K{BZg?Yuedfgtkw7oX8yB`}V!arA8%*+0TNBTLc2PtJGtD*5SuDbEi}7g4s4`RM;I z;S1e2(#F@712ut;ywH)x@<)vY`t(9GUh>ThT#&JnWd?rK)wx~zwvE=ecR}-Bz8y52 zrC9t0-Tj;~zlGn!{O$vXQ#fz_f6%{ofQw(C&j#^p^f4CePpJb*-tXbLwGSTouQTRYyJHca53>~{0S#nk8154h5p-n&{evk$CAub{m$d(W!#U^p8g;9%rEeHtm5%; zAn$kL>mnX+zKL@dUeV`GirL8CJPAIha8?Cvn0stv(SHdtp){?ZweBh5x0Aj^_ix~b zl8pHtZMb6@K+fFHvm0kxMJ`O%@eMiIXEy#(@t!Lel>-<~^IsDGT$kX5%od-CCvBd& z?S`PmyJ=T^V00(%6*C@#4jyynAb5ceDD|6Qq_L~UdlnnH$ZbyzTc_6_p&7uwCU=F!;hwlTsbO! zu)(2cU3Bb`z34{4JuPGP&cnMTv{nwUu~s?z9d-2XI^IbZds(Z>E>iyaJ6|fp-%yI* z;p6xpK87FS`mR6y&5ADk5+-+~dIF#1u)cEFP#XIRU)g4@qxzB8uFW4`49?Mmvi)C! z&YOUlyAIFBf7(11Uc_f0JvdEqtTb(FEmj0SoeP&SXl^1dGwHPn+emBFkK~`Bn@;Aa zxf*%OTrLFPwO7Z&D;Hn5dn4=ePR^9v2p&EM552m`+BD3%{N#jnx$M!mGuGw9tjXhA zlP|^YN1m0@mc^Rf!g*2{5tBglK@ z_$Xgkv`l`W$&bk9?7#$k`4>EoL(3+Y2l!{!tzvAdJ40oVzbex~dqy79wrJI?%PFIM zh4CtEA)b{n&cncb8Q1f9t~Ig#>nz_(u@(DiV>jXW$2x* zbUTLU;NcU{ngI=WYdPR2Ws%Egu{Dxw@|kL9bL*d+>~4Cz8iRXTCv5u zE8yub(zj@Cfbs0g_^9m*p#{f%mnKAcCCL_@%#Gf7`vZ=la_YU5xLZemf1+FM*A8`y8G>z`90joiOv4-nW65yXKVLHZK2}m*A_|^kL?uz3ZZ3$@Rlu zwt6l|T=Lcscol8^@DJ~{%PNVJDW6KdN9h-xBczz8Lyxn&tB@-%($-7(8NQDEg@1|_ zFL0kf%)77hTy}x`d=F(mi(jOZ=j81!b;tTFV+>DmWM_i2JRU-J<^++)=z((VmA!mp zS2WSge6;Q?gr@w&IA0+KR}LH`52_er3wEIPk+nc4%6(8=+VC_yUZwwf4*e+CF9a;t z@lEE^ENd;_m0Dux1$o^Ez35CV^oh}lzSXRy-XRx?HA(?<8uz#K4V?C8|K8}uKJqr* zxd=YHnNKaWqw$ThkHW-4(qr7f7`2b0j$*&x z1&?RZw-1_9Y`qlv4mDfGcU!vE>baOXiN2%cm-)c6@1@j%8rqb-JqH@AgPvD1Uo#Hq zY>0UdGtNBfE#xdz#dk+`w$CRfJ6cMABY9SF)pCn-Ue52%LbmPvJN5w~>rSA*Hol#j z`zLhDa#PPcdV+e=$*NaApQCd1qWxXIBp9esOm=zHX4VOJcG-GZ$1{V{#^AZ?4^vEPz8AW6{V9w1 z9|ymK*ngqa@RU%3DoRsHQ%cbO|^>ezbiJD zXo-z&`Ip$(y+qdTdo(t-^}DgLFMlsK_K*KYTip}*+Ag_njPL0l_&mJXsPhlWhg%3f88`bEoU)|vbK z6u0{jz7O%8&1+qr~tMwhX#MtOLv4q`K0v97BL z*(tfb8ougB{ZbGrLBV#N8Y!I8$Ym;DstbB)KPeYJ0+7k#uZIrAF2MKJhZ&Ry5_Wv+rz zh&@}{Ps2XpwR(O${r)%U_g>~uVOvWx;rPq+yNPgA8LbD@&NFU(@~D0AyYU+^M)`k) zk2k^JmGGos)laNg`#P`SUa;sVpXM<4(!I}db@I=1%vl8bF53IWa1~=7_?UIzb@<7| zbLsyrY=8^NG5Ch;LFHqh_Zk_Csa~OYgnsbZ<-k5eryWG5Jx77 zAhA3I0^~#p@=*6`Er5Yc8K5LeR?-56oUn>Y56vp^AP+He{7kBT@!EgI@ zWcnM#z*Yd?9Qr)#IW~u}5%*&wLU)euZ;tW(2^Y7+XYgG2S$6ofP8?5ugo$=|@-HU0 z!-w!~$>)+l4}9w1tR4BRe`Fuiw&Mp6+VyWjXHBwKuf<2tSX|$M;GJZjsn*&B*iPuY z1Ma@glg1v&>)r=%wWsZojmYVZ@M$x1bN8_Hv;!@@op#Oew zEO-fS!b4$j@9w&`zh1WTI%~Tu-1H&^{q=grcc)J<=8=LcihLD zxY_tH4#Nj3BOkyCV0Y=v^37Ub=sQ1`5;NY+w?3M|?VCB|FhB=kV5jf#j8Wzy@RB;u z>XH8=fc+$YaS=K|>%VzCo9z7q_$rmZ&_|nI=s{~-(SgPxno!@e-M?th^#`DpB<}=A z$w$Rts)Nox;T+(&lzz0%?L@A~zY+$QW$Cz?XjO7sdy49K-U`e!c};OB@mmu3>)bbV zWQE{Bt}6Se_G%wUZ6CC9gmH<+#g|47u$Bt)-6+-1oCE7bqnjo^Cts<;du!T3TFGPKl69+uh4)>1ygC0a5*=hO+ z0J9KdyBPZL!t?G~_OcQ5oczrmwKIqvb}{#E8No?7@8FvFd}^os4T3=<{!f(`4CD*8 zn0r%}w_pHTu1@P-@wQ@X%7F>Jj=c%Z4bzTjZ8vxq464twOGI}UGLA85(Zo}ItU)Bh z(lT#7btF?{gBaO@{A+;LUIZ?2?2JL?E?OL-oceZUv0@|A<39ElOJ9DGcu*hbhi3Y; zh@~5!7U!8C7}!Eym~G9v5MS7ToS0U;9GfHr9}d8We}rC*3W(kf|rvYiG`&n72hy=0a>gVsSmtq{Uy0@0{u7&eJiiEMskCBy1pdsGm2l8 zRsRb8#1G@2c!4z+GAzpV=d2aoa_VP*wY1?i=%9-7O~{In`;4*3kI?S9U{pFTBz(ZH z`lhhrEb{+3xu?7bkD0lrhX0}4ToZ%w`4xldK$q46W1S}+AIhrlV=s|pY$MnG6UK#4 z*ThDIzln4q-&l=!2){xoVf7f^6t6+YDsS!|a`J06AL#a=e2?1OkU&nEGCUWZ8lT-0 z&OGHuH20^FXUsP;$MF?)iat!cEADQ_HyFAWzu*`0X8G*tSNrYru0C#CcY0^G%qK^f zYxDdjkF`lY!zO%&qTS!U`fi_@#aezVYh>j>^h1l|ebZ+n_c}W#z?>3{!^AdLN5{4> z-uF9GW+C>R&N0(Ik|g8a{|al1$BCgFNypG=Q@qx~{Ezgd4wNFB^lkb+a_^;=_1w?% zGH6VB>x!QTfoo}1hL5w^CKrCkF4=4$!BBc?4fFW_X*>7$D62dFKhI2nXC?#)5H10o z1hmd1TBU**mo_s2Y%WsE#d>X%XkBxGTC}zT>n4F-27=p;rlRO>60p0OD7LoPA8EHs zqTM!Vy@1=xufOe*fVK%yE8fVcaenX5^UMhb z=r(Zw>7L}Gx%AK8$F$WMgZQf*r%3x1*rX-9Wl*PrK4j7_+inKUZl_;vS(Q)^drC!}QdRahCz8AhGyTJbVG4f~)kw@zl@@T#Gzm~;c-@i0INKE}pJ0kI;?E7e9&a&Ze7h~UO zz+NHU`C@Drx;}?|0MfOy;U&c$bEq4=Uh6^S^OZjUdx*;CQ(o699~mvrdO)w2?*CEc zdytKt_9qTWPJSd+{(RbRg=dMENv{wu)4%(z&A?uJ#qzM-by8;{eQJjPiO;DH_;1(I ze+>Rt%y^DKFJ-F~oq27sC;ob|8Sf|8*b(o9&OakZ{RN)#7J0E|@~v#}L}PBkE+#)z zF*X;`h>1Ku0{%GfJwvACAKtf$zrpJ=dHV zd?S-F)G-H@sX0iOLDn({_Ixr2TFVA?Pr}~noT2#zcCF+T6HEnj!PH2>RQar&_Z}Oj zf_Ve)+j#G%-q7g#%pntfO)wX{1zX8xuH8|vP2VTh1CCnYNPzElldDAd{ZZwWkHjet zelr)uHhpAytsken*Pe@qDet2{AIDQR15@%)4&^Z)KlmH#NWKf0qA6RSugP#8pSUc~*f7XZQLiAPgot{g+bKaAi zmG|yks7zm~jB>WQ`54pfjdY68$dWWg6ey2Y_oge57G})?4na=?7 zbesBxOeg&#D%qIxECQTo(FE){+t7)X|B}&F^H0n7b#MmnRN$sO``Ed&Yw$PL%s@|p z4^;7Q8N^oPI5F($bUkbYepgHVV(elWzGZd{S5RZ~D7RV(_Vx%d70w=yw>9oX_%hac zTG&s~S(<5l&x$OLSrPKVtuf+OBmb6>2;_Nx+xFipZeTCeh}CVztc#3ZC?t<#WSS~TN9Cg7CG{xi*unB(0i z->CMK*FwBl_@Vcr3(}9YU#q~x-}QjWKQd^$vk$a}yc+e3x(`{Lp`1aScOLSoU2m>2 z)oGVFUt)j;z!~v+>80ws@Iy9aHxHn6kPK`v2JuC%OrSDu9N0U+OJz*t&LD7z3O=-_ zy|CJ686?-aLEFsdUg3WFx2jX$5|2~PJAFrb#8_oyqmewWGF8BDtiFgZ%YUSHU6>0_ z0qU91$}@dm`jX^VwVii{w!Nt`1~kxalkZf)?*|!E0ppdOt`hj^+p^c$IvC$QC)K9P z=)GWfx=k;SHTse5KwtircECaVJgGdJ4iZxb?5l>8Pu$HMxp>or-F!2@b9O-&fsM)p zQvC+co)C^eYtNi#nQ>N8PQ1z0@x{kn+I3z+h>5SS$mUO__yhBTPkK3JaJ=o|0$0%C|Q#=o!4^)B{8nRQ!%jh#E8nq zP-r&ihF26eyZ1rvOIHM&b-tzSnX-GTys;wGT#HS{wo?!{OHR4rR@xISEhko1@f{1Difd&>EDz-CEBlXNk8oXmVWS~40)w^0Wu@+1+R2J+;HVG z;?kDJUB9GlCs9Apr7FRXJYUJirv!I&AM!Ui`q!tijUtoOKF}{=% z_PXo)f9rm%9AeC5Puj|u4?rhEj9ED@tLS%}?(^Zx#m3>rx?0f3v1r&Ch-dp7|R-1Oei8YtcPjp=KCBCCM z)3s~}ndEe>CpNAD*xGb%oUszwqf5TOt+XK=o+_C$^?Wkd@)L{|n}R*>6g$D|b*b1C z)>naD@56S1zog3eu?yr==1gaDh<0|cm)Z8U(yv9p^b7oUK>P3H*TqlrcMkTrmhr|) zt*hn0SafLsyMjB%2Kb}-Z2}%MoU++(^Ih3f8(9YaNDO6r1Dg4f}sWeK+$kK>q1y8c&zqB3);8=4#8I;DGS0M)oKC zBk>QCi|qNLPOoAXc!q4X$i_|J0{)*;w;Mc>{Hb!a)IF{_rLNjIU0*u=-00vrt3%lD z>gOR394Ke+^tHi#y*q6hk=?t~=u(^uwn%Kp%kG5cf2!AGY*@`WYJTjLsrj)}2AdJ* zjjGJ7Pce=Sk_CP;i5&6t>vrhhPx!CF2P2#Um)5*)7nXR{}^g*iJ` z>uFJ6r0dGQP=K!>h`i$)sd_PU{aLH)wU5X2J$?S3ZylO=%P-!T82Qs1ul)SYH|p=2 zNbH2~>Ci7Ko(^#x`bC-jA2x6K7vq||NyeoYeR87liL+ZdKODHNso;F@Lg=OTPso>7 zjJ$Ib&kKD|@2vlU|G_N2xo1QD@CDF^HfV1i|E=uv_b_Jmb|1{4-b71w6Kg08KQJh7 zpg+r2g1j)4$9P*)ZQf0rk3Q~yPKu*-iAZ7T75#wbsPiP`GOyNQZ$N4~Mw zCK$v4gcFyZ8}52*cXs^!LbGe|ri@syotz}f)$*W`C`ETP{;%X!I;I2s5V6kLRU%lkX+S_yI9D5L-EqFcfclI8y?18^CyRMtSnVR0*`X=T; zxH^CzP`IZvVf)EdBm64||5WF%jN^UcZTf)e5&F~59+jVSeqG)L#@_dVS@L?Li|^wP zY&L2etCVF0qE+$>TuV1a$AZ& z_XCF`z~iVlxAP_J7B3UKx{-aM>GN?l^KpW?*m~6Ii~UXH%-<8+GkC<=XE}&GQ$QSO zs7X4+tOK`T48i%#Uz> z_TYrY%UM%yy}we&ZSSYp1(hqMkaD%y;0kUIcU72~ncF$vVLR8`%{iIS!uk;7+z$K; z(OID5>t%0mNXZPrapYeb5A1=PKMQ{%e1W4nRYq%nAED>ZdPg z$xR%du=ss^dJV|rec0lkPK{yUJb&Eof4e6(;Pu4^&ZE7%jQE?^Lm%hU{w@618zY1B zm&OMgj0AD{qhkbD5*>^qeT-#{v17|M!`QC60_c8{OFFi!9*%q0?DMl1LGcK_XDi7K zFxR#L_c6Y9yA9yO_+`^D#>0vGYa4hzwh3}EkXvEXU=DLYwTdy$P2CVi=*t-sbnOZ@Z= zz5_2#ya_J6_$l}dYkUA6hgoB>`553b^R^#WirM*Df#P(IDK zbr#6`w|Z%F?DIf;c@cX*c^h^?oqy=c7t`ryANSe^9Ao~!eq$t_JD+$|?+iO8c`k6+ z%G{if9NdZw+>Km*J$*Ri(z(9fsMS&M_<#of@FXYnJfi;r(hp3OdUSC3+;&0;bwaBBvEb>(51Mf)&q|esT4r%YrSUI%czZJU^ z_$T<0hq|JY?_^}RUN;Xq6g52cM=y@%wtnA<6A=AE<`_~Rtgl9jqR`TseB|9~_#$~Y zYM_ymfX88T){x}9aa$^fmxFgfmBB{QL_N2SZfq*bH|?c=@PI=XaBba>ehz)>qwZwR z@k~M&B$wYx{UqN&W?Tb)Eq3^Y^bPh6Q{KcsrasEY_6gegF8v(PH}wwKyzZUcIR$$C zI(gNykx&;DjfQ7q3`c!KvQcs~<)_ITQR$;u{WRR1_@N9D`_@l`V%yH@dS z*x=}&>MmxS>2-@gL9R7$_c*aZ#p6wT-Y-0hGPIGeZ%lFEv4FblYk%Sh-zu)6zwBF! zL1*v-Ki)=e{pU8UAO1ZseT_PY8BYYeoW8e%I=`ikVzeH*n0Td|j973tGS|Nu@htGK zl;5kn&G-V!2wo07G1mVT`RRw|q9dBPoN{&pFpPaSoKTi8*BcW z@4VYhZl2fqem3&Odi1kwaPJ^I-5odJ-f z!a;bm{23-NkX%LW%> z`_8JGiucqv8L{o;f@o*_m0og-GN-ea8;Z9--ovwpn3Mi3;kfjH*Stnz`QK}7;e^Jd zeygvSv0wjQ$|Zqu8!!_dzs{OS0_%S6b*(ja5wJ>MV_F9W`8lSr4g#!$HvCe__{sSh zIGo(;;)&+xtMH5|_k|P3zsR1ef5i?<4(NRPwIJ`{PW}%?JZ*27;qEtxnK+h5Ougvd zerQAfAJB2W7AZPVXe|5>c;(>~E*wtbg1biDb);_{1{WH^&DZGLVQ>Mh*`_1Htt50# zxODk%lMBm;MLr)KYK4Ync*@>2nU8wz8@M;gH=TL2xw7Fnai;KwPtu=f@}JrH7=8UD zcFIY{kvAI5NyoaeJ!CE+4q0*>G^zYt@&_=#zP3nL&}YpNE)0C9HTPn^HA!n-G1Pqj z@GppKKZE>DloqP|X`w`~$HP+drDaPLCfXU$zXdQTbn06ca-^iMB=g549Il41a#F-!q zuVNe)bM4N(y`Hgyx%X|1Pxi12`F7wgbN%OuNzKUq{KnUzUxAh8`q$0xb}V=b>N<)1w>(4NqcaLn3NKfDRKOElJEZZu!g83&=S z!K;wLkm38GAvbBi3Txdm=h`$^?MB!mWSvG24Pq+v?E&6z<(>YuKSMYZ04MW^sTJ<@ zk&jbnmkWOb;DP9ai~GXmI`kECG}kW!XYXeJfn)>0S$NV3|ESQ~Dg%GIXpbCYig#4r z{C37v3a%kv?)~!|qw8hn@+IiA%XsvCv-+?_V2o00Zt`eTnMD8OCycD02tfF$&%z02_2JiY< z^TJQbg~CVaXx8>{5&o4$4-rFH3(nbPkx$gVwT$l}w`r_(^s7Sa?QLWmVh$T7vUil6 z!o<0Eb<8pr{&|LR$xZAzbad2Q`aYxc7<1M3qvS#>!^jTOpRH=2zWQ1Fm8{oZVv8!s z1y!H;%!kfL_dgUaszm>3qRf5NwJwbmmC~0Q+HmosXAj>8Ceb~~MVXWd($6U>OTVE7 zrBxlr);F`hWNKWF?w15-;Wd98c#0tQBSgM0exY=SE89Z+p~Pl+rjI*kB5s^(674V zMiFZub~o?7!t-Zqjl>Vxr~lY;gR`~feCjcBzOCc=WAI{?dGspj*^@F~^JaDKv7cpS zK6|s#^(^?MeNbN+XW05j6F6VUJZkOqfV%~}$FI28_Pc@GRp?-%ox%fqUpe^S$hBf2 zUjios;IX|9z~g-Ue42M(OSmY$E4i=%-d?*)>#T9r3g#eAze3DqF=gA@lWRVyy8P>X zChrxWYJV3TY->Z7;lGOiI`;B(@DmT|X_dW|edFhbyAIz0FHr0T_ZE3*KZy@@(v*j9 z3y2pGzcB!;-z5IjdKh~McAVF_u6$PhX8Zs-_-x26EZpxHASZijJ&5mAwzD=L;@>3h zoc~X=jxBJ@dMLTDAN^B!x*Hv~f#Z<8 z-&tzJB=2=Uz%_8Ne|%GL&*#}^Xe$Z~YW|Ykn~AEyqd9Nu|5SYN<<@V=AZl0KpD_<-F3aLB%TY1+A zjk4DVZMM>;$^8`Op@VkCo6o2J$BU57E`iphXf|hP{V2btIOf@&oHGB;^>>z7-sa=r zzhcPj@>~noip}i$-fhV>vibB@L!$?sJ@w>Uck;eRmFL@U$22u=rN4Y@?bhVNR%n*m zGkEqIFiHZWD&!Aj!>IwrnHL4t{2Po{b&@||{d|XKzhM3FuW?ESKVbSYLq5iOlQUN7 z;^||JaIZ0Htdsb^g0@~@tO3T_Aza~pC3}b#{INLJ{>|i+c^kiaXTIgJ$EtWSja4z< zs;7Okhv{b?GLY)M!k*z)`q;k+P}qmv#q2}ASz_YU02VZ2o+n5LJqYmFeUZ`fR zcLJaC|9sNs=>zZ_o9~yQ>k;>|jx#g2`?8uF&A{(8H^a=$C+OQYdyb9u*W|S3&f~t| z!f?D^<6ypjgRSq&*;j1zWR`5Kpucx6xIb58ucsdk^rIKt(_UHZPNV&pg*>h?7f6;- zJHSISvAZ8sb3e$q6p!EoZV!X+4UE+@r(|OZ&#&P-1;GBX>+C<;0sSSx1^P z!A1R-pHJ6sdhX3t`5^Xo;bEBi!S$Ehe5#M~z4KUivuRg#X(Ph_H2Sag*Nc5*fWAfP zoAOf27gCJx<vZGg#&oTf{{dEkOk=%&3$dTfkLK)u z^DOE0lJmlcE!2-6G?Sbcp4eb&?|rGCb5mV^=a#dE4dmIkq3r|w`jL~5u=bCF^RhE< zdG3Q9lj+++t`!%C?-Sd z)EU`S!MMw@0hbXk=e6yY&)(>1k^OQkxV#B{5^Uarj-C!1jrmOQSpsa-?;8BzC*i_b z)9N>K_ZKg@=75VmwkFpvV&E54yi(Z+o{p{-=}+#R5L$*53qfY*BvwGmqaMyBFNxmyR1F&ocJEv+3=qzeF~dU@-gnnNFz9=rKkMimB8b8B$HLuQi%jtI+^I95Dg3p@ItEqFQ7$|)sm`PkX zJ|fF75>^;{HNMub;miE~B&+#%#Fx!9HsfQ&4~Z|M)stiI$Eo4{Ru8tdt>rHkZ-6c@ zxG#5b-hrLl&{N216RSUd*F9@XkoB@AHd{@V!)u`j_#m6Hvk%>GZr!;dU%oM8deMAO ztk^qiD99Y-ZMk+hc$0Y$o;(zWM|_2T%O3N!UD)7n<9ro-jdxrUPW*JL)vW$JQDVgQ zv+vS?o_2g99QX6xa=v>tZO`IcYX8JMPb}$~HPnHu*0=fE;ei`Zw*Q4x`)V6LJlb~K z_?`|Axgus}j_gqTP4uOK^T}%%Pn141e8x!BGX4VOn~HJaSlwfuxae3F{aA!-Xkjav zf!$a6@Mi(L1b%42s{?phz)O3Ttpz)B&H24M^S%=v*0$pO4a}Py8Xp&vibw&AZ_n1w5}z zJ#R#QQC=U;$*W=^2kP9Npbyy@nfON9v#57F{rEok+kwmw zDDtfL6W7{IepY+GBlNZyo~68m;>*YIpL77*8pdSzHN{)6*H|TQ(5_cG6tTBR9&V#7 zc)GWmxxO$?Jc!>|tY`V$7joa`bMBjnIXqAAQ}-tC=j**?EUtl0`p>;Kz75zvLLbj` zW^Dw!rEGT%#1Xa8*AU?@uuh7iD1u^ELNHFNyX{c)-?X#jf|&fK4`c< z|2q5og~0xd-^Z4j{{0SMB%CN2m_EC3&F7iqUCoOHYy-$2QTB$` z$R<*Kc@ceGXthV64;Eq!I`KGiZ?Hcnirjq1jYg~%d7~J;&q6lOh?Q+@M+S-_XGD=@ z>^2sabd_;FkB>G~r*10t9b|d$gG*iIOuDyVn?Xk$@eO*t=ZQ=tcF2*KJc~{eJs7N|B21{cdT8#TTA&Y z;?DMCcNH!)(^r+RxOUyXJv>)Cd93x*+xP+c)_>9l^Z4QR@vSKD^&{Y;aLmMBc{}%= z+zW@=*|&Ww_l4+-_Iu7XYUaLxd(r>)my&C~!+kY$U3-fL=Ns`O&=2`dhATZSgY(1j zI>z5cnfGt~8FbEs-g$th7r17yo{3vqYTE00+dqYeOBIj6+H5<=;CyB;HcU^abTaX_ zOIhn$?-N%d&GryB zu@&aZ{g2B==lB+$&3|_1V|k5-CKWB1C{GVC>94^YjMw#^k{^4Pw zlMN$`fp==Zk2V^v8n?cU|3`_PTnDY#4@`G~qu5&xeU^1)=cCa#8N2)zS)Zmq^wB_v zS}~Gbq;Gyo<*?(~efx!zTU6!NVYf{0lW2IC%F=%W{2Lt`yf3|eHMGI^_ouhli2eS%s(bA? z8&{bB>4L{suu~ldJ~u+U^d05@>W1#AT%+h8dfZyze;s|RAfHYve5#+ZXx{RHS9xZn zsBDW9TNWQTYw`((VgqLEOZ3IqS`}i}%5cGO^ucG7p1%qY(St`Av@P?-l>7#p2zy@L~%* z3i^$OyZx@cT=MA5&a;sVut|_VsWJBozIT<+SlEY+a!-(boA9RDtgSw53B#O&P{qCa zF-x|F$&n)8O6*+MIyNfuedZPzO{@G?+ih;{#|&S^#y(&oyMG)1z05JOH1=NFIl!+5 zziF>%HG1m+a#}AmUGE!N6Bh6F^D!Pj<0(L21cr;nJK~%VFnBRG7-X|2JclYh96`4${THZ6y4ErWh7g^ra#&mzRz{K}TyuB&H%F1)T39FZ&|xvLbsu=k>J z9+TFFbQ68s5bpAA?X7lUq-QP+Y<>xT<-r?|P^N}C@d1Zop7r39wEb($ynzL|o~OO1I6uLM-!rIKW!8N^u<-%=kiJ3Rt96fmbt`*f zeAqn;pt}|P7h??CsyXo5u=5X{KbtIShRhD&8 zuGj(M3OXq7r`=N8wGzmO|9Nt+Xd!rzY5d@g3i}*IXV0kUoaR_Er)0=-XmlksT613s zPPOqbo4#O~&vlTo1{iBM-*M$+@gmpmWzomVbt%6-{jl_-FY^C&<3o{N;O;xMm_~1#FrQFyaG6vH))Uml#9!Whq7^T;r`6v3>W<}y zU-C>JuKag!_EZ^KdhlEDSjBBAmMugdY}?<>*j6W>^!GJ_x0RxK!~$qtmb16IlD*Z6 zVJV<&#UQk3lCdz-!_PT8qm z0w&t)oel%_QQwQ(-vei;+W~9@!!wO50RFi9!`ZX6L;J&Xd6x8!BR+5vG;lJsa0>X9 z4Q-r?Og9Z3EhBaWTT;r#^unn9&6_(su-k~`qFW~$h%Ll7pFEEk-1+Q%$A>eJX(R^D z!(UH)eA|(V8~O^35o?Lo4K|G>M#AbSiIYzv7Q!|VGOcF{d0x)HWK- z|De9<;W~ec=Gp4J@=H$H-?Jw28h^EuZ9X8E{@WaS5^Qi=iLKzzKhlxOe!#eB>SFk9`&2a^cEg^ZDbzO`mb_%8|&6(sktj zJ_fyb&70l1lKR=;&PvK;kGKC5{1fXqdFWB*;B{iH!r7CW{m{vSiG~;R=|fMV;U}+A zpR2n2riScW|KldaweTBxz*uSvA2whD(2u0J2J(=~p;e~ql zU4Hw`$@O05G-+nlAH}AAC9ut-Ed#om#12-HdbW~&#VEg$el6hr0^rYeBH%ZUmr!4O zHyZWcG(roqr;NTo%-*Bdyi+>yv*3%F*!dfDzE;iw_|4sE)2QD9JOf5ny=(^gl+CyMWezAhgI2ta(;n4Lk=+!bSdwWDV}=^WY5Lg9r9KFdWR~eztSpk32niHuq;xN487<_3qeX>|bSKVbj800st-#B;`=e{^=adu}H>!mv} zVXAqk0jO!TVv)|KqALBa4_;er9J>xpY`0V$rDeM|W#~7bGwn4_DvCYz$ z7-xbpieBVEFM|9uwnfOK?%43fx3s}m+_C+m#>)Mf#wH(?@ZX-(?}mqKjwhdxy%rzs zBFUr!8O9R^`{^IL!P7Ex3OGy*{9MaOAd@GwH^73gMv+rFV^R0aZ+7oH&JOhd-F7&&K}*|_+ndyUx3&@$Ohwa>(+lh6_5$d+m5onyWifBYry zwKqWYE(%^k@4EcfcUN^@*mj#>>H7om6ZA!8bRG&mi>^}k=x8rU4s_w|3L}9{@pa?n z$VlIn#>g=A*_vEB9K_#fQD?AKz8hz+*dfCk&NR=EI!7Z{-n_>S!Qs=JGFr`zY<{;R#C*ge#qXs5;#W-M--Uf#*R zj~o~q&BKT7c^0r2?Dp_3xy2;!$eCBjHp`aD9?clp0Z=-7Q!?2R zaNz(lSrIasT^~F^U+xNeXEpaClLZ4tY!)(^=!*PA7Cxw6xbS-7JCI)&n`sA*!-&{#+Gj{$vJ8k&0I<>rtLeHOs*KPoKDdu&T!cgW}SC~ zi|Nm`ZjZlU`&hfsA2KxulHJX_s(#}55@&T@gzZMp?Dc@`HI_YsxwhA0cWN#6q}E~! zV-XA=1s^PnG4T@(ldXw8>|G&#y3g1B6z=)i?-iproqK-vd+vRIAhxZ>i3#(w7pIW> z5cjeJ6mqYanS5d@{bpcl;W%@3!?^PLcIMJ=o;|hD=ULt0E3ePj{VewK@l*L~6~XHG3V z*SEUiT)uDLPn%k}C~I}YBHn{XelvS&;io6BZum6s?fWTH3nNojH$-?3evvN`pJ$TUgw#vHV5bbY~7h?zzO22v{!r>I-pzu?tU)mZw5I3Ft}qFV3ya`gQC2Po^6aoORO00 zZ2hk4e#(zGN49P>iRGFYi~gf2p8JA$ZsTTir1dgy*Y|gsi3k4GOvL}`^5KpzAs4a- zN-&XsqU7=D&S*(iEdTNE@6`S~twZs@wG&5nwEoyk?D(mfxc_Hn;=cc&eb1!W)*Jaf z>3qBK-in8erp6`FCnv?Chn#0^E%fJi<_Ke(8vSubS9IRASoBMtMD#|_Nc0HTU*~!w z*Edd&87~>P8m6(w@HKwA=(5@(32Xi3r?hXCj_V3^coLeI&zxai1dp%5BcW}H|3dzYVn-;| z{};k>$*P^a!=8fd1x+*IckDyl?;)2IIQdgx+s3@Me-gPDnib$1Z^Qc!^FAM2kj9kD zvxccg7r#UU^IhXFVJ{(mcTXhYqhHolz6T{I4%qc_hPqh`Gmu3&?|B&8^j_b=e~1_G z?PmH>M?XSbLpKuSOBz=GDJYA6I>tp* z2qU|fA|JwIVx`z)G~Z`42Q|B(`Sc0?W6Sjx_9iC|;VgTPu<*bgSGmT+7-}g04aShm z7^*n?#{yoeCmWS)QzwoXvFnaHXY!7|&qBAC-HQ5AaOxifgY|P#_{9D$?_3+7a?|nY zPYyl}je$=|Pp*wm!{AdN_%w8ZU_P_67Fe8sFOv`ABtBgR&wX-PS1ESPx;K_}`GGle zb|_E#%ZT|f@$1H@r!wDoBcp2@d)o%+W9^qp5{22u!CL&e?4`-B{d!5_g0h0-d zGd9J)ttF?0z8`!wx#;8auqU@}1#j(+`XL41t z!RRVIB{xX!aK};epcO$syU<8`u!(>r>$FhDLDS&O=#*-vrq}=Tf1!b0z@xV{FyJ zW39PYrx`88#?0LQ<7HjZT_uS}J;RGd8@K*TNg@+De;snaDq5~zVYw$U-vb#A9FAGXe<`-85)P$8ZF9&cViBtpz%DNyO@0LS znTw{d$KzsS(Pr?bRPTNGYCgqScY(KxcMajgLk7Kt)~9S2#DVdRruMBj3EK zGm`i+=ce7vI{JHP=PjI@R>QezwNFP9b)1`4&$(%9|FI;o?mtQrw|-EP_$ue7t>@gd ze?m?Saekrgly(qdf->{lX*zcTPc962{$6Z_p z?91SxmFAow__gBQ?A+4j?csPP<7i+UAv3yD@B5hFQsk{n{8EZ5=x4qvsJoW?R@Or` z_jV2^>>Z*z_3++$GrZvy`rBYmtgph3QOj6E)B!%n&*98Q;m?)y*H0Vr3tGg0o?gae z+?P{E{>0PY8Pe-im+wml^0TIx$KyBZn(;p}&xx7n<6iqkwVvH)VV+&2Z`03m`Z+{D zrK{--1GkI`Oyq-XpiS8>&!@g(pRQo8UtzE2JnBD!|1Eu-uRuFgW+HObp3B2sDz_he z>SwHl^tFM$M!3fB-c?9{)wbXjq937DKZ@x`9{nS}t_ymeDcU)-7@DN>G8$<+yEWI# zx9#untpWJGGG7hzY8j5qSB1;j^W;Iyy}q<4M

(;JDqg^CtAqDD zfm5mCN3=GuXLOu*n{5+4BD^AB&t=%b$k}4^ZgRKn6I?CeYPYr5$tx#W&gRYUCl|ev z;WRfc@i_8H~cw?o-O^C-JT`|G(@E*5y|m~|SNKUe3;qFr6*tDopU?sI%7 zT$9IxXReKxvD)w%OaE7Fv&p!F#C}-#(QG{wpD%O#xMVTA?+b{5$T8Nx!&;Kif?wv||GjapX8Zd{$@um4}QtbT*MmyU!paWL*@FKZf4@UDow$$lAJo-ZT=Q z;~w9l2rzv~G zz!~ff_`k6?5EsVW?1RV3-taAC1@+$xF1&5)Wi#!ySwnlozux1^rVr@atsY{*nD?hW z#wz*2wn78tHwz&L`(_(mRlq?u0pV{CHa*!UqU?jH_Bgf)&p6+MyU$}jc@B;F|6;RP zWshZg=L=Vz!Dhj_4cu%b#)?Ogt?o>FJY=Jn&%}+1)!CDmVl$iNC4K@taB=Qh>ECIz zAxw@O7v}@WOj^74d&Sax3z;38uUoe~y)OJ+@|BC%n^_Z&XBjP8v!m^?4oz&mYOfBt zwQL13=ipZ+bm>fS<>qKyJLi!Q-&tP7eg*7zL+m|&lzOeSSez z&2#r06rC%X&l%$rS)W(L;WTxfpcHyoz?72jq5`Bx$Gg=hOsOL@0Q{_b3P0>Q}lcv8CXYb;K z#>74LqnYV4Na&92sUdT!ZO>6VYWG*1JLg`zH1rzzAyPOn8(QqX|2bv%Kj*|V7YCrv zL$Yf)?Wpce?zN}kPxx4J%_Xc+w``nt7E}Jp)Ogg7hk5q3!Q?`FOm}088gz0j z=-p%F2wz57W4_T+!ha^Ttburg2=VM%7i+l}e&jJ`#jlrhU&(vvs`@7W))wuRN0weS zk@9cBZ`|)h*_-S3tA+2l*XSoL1JEF3gQ;WfS=2gKzW?`c^}rX$&iViH1cUr^8FsFY z;gl~_Huos=7lKzeTx6^-B2Tz&2QDisLSI=JMK6~uu@$?RA9`$&quYvn=~DW+Fi8Ig zrWsvR8N;Z~ii~R8U%-3mt-=M#-RXLC*$0ysTgy)BK=MZ?X-E6(bjBk40u$uJ-|$`~ z{&{9gL|eA#SsZJVv^cr}tyiEVW8!33W)FWQS$ zO#OE5x5BqWcc9xaR(A~IAFl438pEOW@Eh`|&3s>c{N~GxqSV`}xMPQ>XN#wQRQ+n| zF98-+wBeQul26yhQ3s~>SwL;%C2`*q9M|GryEJV+^Co(;6MIW3x`YLsqz7p2oC1q< zSb-N2{$w_#pm3PbagD>ZRcfylT@lDCNgTv?&jJX(`3$thEDV~ox zKeXR|hW7DOblH7pz8qin9J~Jm6Ik2enLA(NSMK=dHM3Nq){E z>7O@yTX&Mf)V{u)>z8hF_q)!vb9TO;!pZlD`>@w5v`h3!b8Fk(zzJ+nW1ji({;eg% zlQ{cbJCq~sE{C54mOJ(e#lTnjBOCR71G!y&vj33@$>RiVvpdA_G>Ms*%%MJ>(It1`?f~wrP%?sUL>oCesb2*!odmbzfjB*@jKAzgzRI# z0=^%|3;LNmSLc}p&d3KpK`C0acuTdw8tt|vEbL|aP;0Sc; zi2-a5KEpsq*Pclyr?ATdUm%u2bx#Z#j?H1RV{@3b`on&v>DV0X`8OP!1N<`)MNg2; zA&NdAd+;|Umt%7f9BXLTLKivR<{&?l*2W(U`UYM{ZM%abV?0WKNR2OPlvU`(y$dqMPP&Ffe?iRQw#-+x4zrV^J1II(TRE3u#0_evyiOwTWN)GYEo3zKN4!$QDR%_Wx*`a0EP#@of zCw(0_*g7@qJcspfvHsuLed>B!0)AN7_-3Tp_-3-+2ADVCYWsEjSZ@QV^)_Iyx17$7 zPdMuh9Qerfb_ad1<>8d9FPhf_-jPqRrJ8q(zKl%Z%4H$O&AZd%9^AMj<^1^LF!3s5 z)T;*8Rm^YIIYtX`Uue$>HXC2QvEfYZeViXZ!5O3P5a041@hxwqWbL<+wco|gqBTDY z{4;)(@>Qnj`b6a98te#}K4?1q6#bl=Mf^$1RvX$NJ!zV)C%t_h`zHiHjX!0VX=e>< z&7{jPwb%fMkoVuZ*b`Db1^GS>l|f%>@a6cjea!`nzq{(#uYPh{CjR^@W>0V4?pxE` zY-Tpso71132>x*%MDzC9nLMv(ZZR{Twj$U#&|xHd>RU6A9fULe#JZwi?MR=Ge&$2t zux*9NZPnbnYv6N~70fjsHPGKic&7XAvuZ~;MB9PKGuGEKE_8xJ?}4ks=maO=Svs$v z^*!{-^lx0{*f!OUbkToD9{cyD&~;#1i*2S3+l%^+zID&c>}`K^zqf?_B`O2W?;7-2 z@(ZtDoZb3I_2K*U>l8bWZTIuWbD>F+O;3O`1B|`&w7C#Hb?4$3@39$WN~T0xOuW2X zGz*x)tB+vwc5=Ya>AVLTkxma9$$6+a!T{rJXPma&jvmvE5ZEd!P^xhJ;=8&y*Zp97UT)?R(45#4nsGh$Pm`LrM8SEd%t9xGGI_c zPI%P`Nq0atvSkm>X3)5+kX>anwNi48t?%(|yUg09MNxEY!Pn>TRogZw{n2NBXY7JdM2OkWG%*;JME67+q)Ia`FNPV$3J-V&BiAIpBi& zBzpf<^~+t8lrintD(4;N`Hb?Yv2-SMhnAVbbGJUSeG~KMwtcPI1}|rWmx`xq;`?*3 zpEgV|9?#?2#sStC^1wmK0-CG32J8sX7+(%K>A}msr;HfqGT8n2_&5{UIWn+wjKdEw z9)G|D`~nuXeDdxr^{~HxJh6r2uybS(Tc|yUCvBLvZ3Dg%$aiw7X-tv_3gGRP@OIhz z($8CLI@?&Y$&*D~@3hYF{?3SJeJ}C>`Rs^Y_r$i5^Q&+}&7I}G%;qq5 zgBoOG`SmSwg$Pcvos}XVXf8f#eB;q!+`8g>O|mJACZgNIKNsaOryo`Sn^Rc-?8lFs zOTAPd>~>OgN$u2-$5?&(sCHhThJ2uNR+;N6?74xh&?5Hue_TEE_37!~wdu6m|1?_v z8|IpKqxjj&{V0CHGcwo%DPI{f(<$u`-%Z9ZTS=X*vGZ+oxzzz$R7i?9nGPwnn!Fpwiq zFS3yDD)d13uOEF-c4Ffi_&j)G`yHW4cJ3h24&SwLZ}xTZ&pg9k9O$tNi$qF(aOJJ` zG}`>kPfXHU=v^~5l?L+r z`xZHPlS?07K&Ny2bnRTDszOmv*o)D&;Vl02L2u!&YP(`>esT4$h9Q9_W(IC?DbAxn#d=iXQe!| zaT7etn0?j89{eCrLi_h)FYg29*5qY(=1j&`ohh3T@x7Twk>xcu2v%R5WweA}pRicC znr=^*PfWbUh3Bua0ouGgE!M;Jf%U+*_NDQf3$$Kb{Slu*HSO4I;nh?>$*bH>4t2Nf z{Pgn7eGh&nm5&ZNV|8T*FVE9uherHNig_OxOg^zPZSJ#ZuS$52EMdbP+`9=l{B+hGSh=-$!S|cIl4yL$cB*SSQHV#3Buak0jTe zdmp*rr-OgX@v(4k<5kb3*nxiV3R%g;KlS5JIq2a16E4}nJdJ*vakU*jIS>2O$MN<_ z8NnS#NjB$yAwN9;Ew$G}7xt9@aq7Q3gZBR?ZGEM9O4XmBduh1wuPIn)yryqid|XyZ zJY)9Kc+KJD4&%R*3-zr-wDl*xqwAl~F6LKBlMRMKyj#^+j}Kg#3@` zb6x#A=bic@+5S)D1;>9mN{8xF{neVSV@;GpBZUKs2dYfb#8Y(5F?_by(uO++l0_8v zGIS!j1OMer+0T?KPk!if@Q3)0y_wKT*~t_;Ct7OGH%`*Cl)ngHROcv-OZhbLpPhV` ze=@GW3Lla5DD7|i^lWnUJU8JIFEFY+i?z);8}e=E^Y8ZYg46mdKk6_fh9u0@j&k`JgpC9;D3U8lw>?C%6Lui23gu$~y z#!?Tg8z>{cS7iv;z}pLWUe5DM#Raq8La90t%9PNrT6l_Zl)AOt3nljdJi1v!18@4@@^`id!Xu~&;IBx1om#BtrR}c>|^mKCXcD}ite%wvH z$IyokGj6T%#}+x)4=erwf1wlqkU11N;P|)Q*ktch>_Mk&vEW77#wVeF$TS_#C7;Nq zf9k7nbPYHvSz7T>b=Zp~LzH>9?UWuL!X|m3JGt-#d$p4@jN^mnwdMxRpg-8{@BkB= zfY#Pc@YBZq$%V=fFpy~^25!Q(f)4rjbJ^3mzq_cIe8PRZIh%~<=8v}S3?A#Pw;PcgqS-i<74c{{r%i*UwKkS}sgoTf}pm&$Vc#O;fbi7@sZ2v}6B(Ue|#;ia{u6 zZYzM@nc`9MiIed|zipX^>(l%4Rr+GfX4sWGGL1zVmuN$0#@M>Rnp7YE4|^cIsq1}Q zf0S$~TX(H+mvtonfpDqg-{7mrF69r5j}7fkPPt-3?VYZFUhw#+_FAozeC`fpiHDgF z(WrDeU9|CZU$7!8M}4E}(s1xt`nyGr9Ow3rec&hS zfxA`zoLr+g1IaX}`^jv1YTf0w{ZPJJwecnF^?P4W+In-;V-#(st{V%JuUHx73S*oG z*Xpn2zysitZwy)R65gM_pI^QYTdrII4}o9FzCIn>dQ|oX+n?^_BvKCl(|L(4%Tli& zJ;?QwH*#(J)0wle%Dd(IrEf5gW0g^Tt-sUX%|N!Y`vu>UFI+hpI|;UcrWoy(DJSTd%gyKR(6B_Yws(&;S$DV0q^*hHTx8+IjlS$ zDLqg=;=WHoQ#I$ZMML8RSN0&<^0nlPJb0*&^2KMF>tAJVgEw=g#Bp-g&+FaU3B4=e zoG?GM&-#@CcOPZ+(Y z2mP-ApHMkIp%6J8eLfTap0QycZ3#xc3yg)G)Gz0Zo^tA!Q@2v}=D`<`QEa<{WFuf9 zd<}7~SVnf)M$7o-DxVQu6(k1iUC}AQ8N3zzeZ-s;Bb#V%TMe*ANBdr=JtHJX0)v;$eMU@a+2JWn5Uw`T5umNXdm~-)?+ol-Zl6EU_iy1-&Q-0bAl96I z_wC?0<8kBMm-D>uR-Q3#_gR={?-YjK0#r1utewr_`_(yDZe9QEGIrI%*%PQfb@NyI%k>M)O zMJ`2_bajqOV809-UlTa#2iMB6?NqWxiAi6RIq&J66OHd3dS!gu8x!+%p5Me{)%dJ# zrhk&Pd(b1KXH+bR#LE{EPmau!h5WjQcJ|M9{PoJYsq;kjKZ)nhBc~>Lw+nn${>?sU zzv9##Cmqvv}^x!Z%`Xl#B-qzhs^US(~08^HxE*D=61Yxq8ZJPf#i4B&TVLb6u|Dv`-eq-Pbe16B;MCW*)MPD@6m6RJI2H}-V;$7I2gl*#z zJ(Jx_{VfJJ`k{Hf_>Bz8OJ0-g6K8Lu@)i&LzBTs%yhD2+e+teWrhFAPrYyzRaKE4X zHf&K7xz`z*t=#va_hfRf*hTFRZG)fnqYG%y%FSiOzksh5yu(*Z{z7A62RJg4c!ejpN;#N0S(EXmka1Rrv=iQvvw)sBq&^D`WnN8cOzK#9o#*ERkLZ;KdhFuv&RbS`%5%1{pNj$Im9M5)nMxXh3 zrn4dTqFYDPXj=#Ol3&teU81A3&XM0FFV!-Z)5~eU`QZG_bsx3o{B(In{=Zo_hr2GF zZY-2v(2bV~B0J26zUH%EMY`k+=2x*VNzU+YVP z&l|A?^AzJbqw}w4v3Crew$eL&NNbni)Jd909L}>05-csh>w5PVTr4 zJIp5T)z>EEm_Du*FVnsYxgUBX9ddy4jQi&EoOfj%=%T6X_xE6PxP{m-<`4cuTv@Q7 zh_lq)v4@ezHFn~;?@?Yi?X?Lq&h&5Q0UyC32RMZ3Upw>uJbi2jKf-+PXY}z2p4~z0 zRmWxr2irDdPbfKwgSXS>{>>RhZMX6K$&)y^j^}OVJllK{2Lpg7!R%S`Q@6MM7HiJevd=6BF<=-&iz%nfI;qQ=m2u8+OnZE$|YI(N_8YxMI> z`*&R2maZt>eFHjRaDI40PG;k(FYsH;Zx(syX04uV`(0eyZ8yBg)hTXFs}~U6BsK+~ zJ?B*5D`1X$@wxcfFJj?yG356KZ|wc-@a2AdEW)AS3T)-zNdfp)i9MwoUK_&ZbPV5R zA^njptQxpnTN_SnV9!kG{p1<<%v7iC<>`mFR8Z#;@+l6ohqeyehU5XYRmk&t+UiXt z_g280#g~=4EdBh7M)-?sGi%~nWBw8LiC;4BDz^elf;0u8FxbQd|cm&Z?7onqu zQvQk%{)(X2*dQI-M;~N!6Hk`>KjREOQrQFUQM{8|2VJ5E-;3&u6~pg-e>41PE%6u+ z(nme3#1?&j>RN5yr8eC;$U>ggc^7=c)|2^0JA6?&2t(wN4{mYx#M=9dSc|T|q>*{O zlrrQsTHlRLLgQUH#(2NMcSpa4TvE;X4R${8Zy|ef?e_WL+i7+L`p|^FpB|fnKkB5N zLGSUs%;Ni_Z8N7$sl7eo{a)-}*QdrT+$P4_f#X=dA8#iZ995sAmQE4_PzEBdp1SEPJ1O*U&3>uNr#w(c3E7W8Q>(>jw@Ml(p^F zH}gH@PuX$w%-+wRLw%lUKmS_d3DI#79=nY*Fv{z*!$o^0Hm<7Yn)t$kO!mdQ zwn*FFO@FjrXEUxD^vRC5rN3@pP1-F(zB+)6ExQvjksoQpj3Af0Hq0>BMYQYiH9Nj+ z)h1|i4Y2x#;`t7fON@BF+RNB4jn1;svy}5da_eh>j~$PVj~)5lj>jINPJLGHDY4i! zzKXtj`PGgmcM36r%2itozjz8gr;>d6-HdaPb`)!Ed2{Vp>jyqd zEUh=UVwO24vxa`$Nlq!n{^1wAEi4+s_Z4fcdZ87Uof2y;9-=;IJa(+2Iopmk6h1n+ zeR7B~oZji~E4~xF7A+92{J+Kqzw#z^PkyJu-Y0`BP)6Q!!M6dpy6fP#JR8|m3XXpk zT)!NgUk>gsW3TqoIBUV)i|pC~P5F(PSLwDkjRGGM=mL_{tI5;m>@6UNh4!aiPL3kx z=soss3ZCukv3Z>`T#~OZXwIdH!pIdfA|JBAw4t6c0shNGs+m{$C?_TouCg**gfxOs&9b>ijdI$T6y$3EiBR=#K-h;!(mKqzZADM|_ zYz+n29YhBy?)=mO7PfQ+Cb&09lmy^>(bRk&G-0;fRLtjq)s*mF94vs(%{tY$|Y#gRN~f=FU`yKD@LUO{Ro`C6FU%N@FxUI0 zgkyKI?#o^Kg(z{WqC?9TGS;?*jBPEWe9P+VwO2YOZ$K^k`y(sA0bN`NeOwDIex36^ zBTO310}lqWT`@uJ#7q3w9puUxhQE3m`&O+z&IE?aMq9Cc#7j7_eZmQ6)z``v?%-$q zAoy*u-%IxVCvxXmSNRV9|APN!9yVm>M#ii;DA(PrlrJZ*pW5CGM;qNk_JSfjK?=A@%8#cE_mQIvc);#8NnY?EXLOV~_eE*IWht$kv}3$iJ`y`QN2E zl)X0H+W&HQ{d9*OJKy8Cje{F)nD8I1E#>!DnCHYjt=7U8;Jgc5)M0xFpzpCxOy8y3 zADM+Lb8uTt&O~R7k^jRnI(yfa4>^Cv-Y<&&YR)Tjl7(C?Z!L&4 z-<%kjljIjSYu+7s0ezYBg0>u7aW?ume~zNkz%$fA5Zt$!acfW_dVDFm6p`MmIGRUDSKUaV@0xCzWm)5XdE;plty%Oho zgk!-{3H_Qhfo)a!!y?$Lbat>#eKFT9@XExwPMq5g+IQw&c52>zlfZc&pLb|yrPWpIpYw{nuRly zCmzrH{_X#8JpZ0B9)~XFe;(|--T-X^ulS|^*gU?@4*+zC%(UWTCUQ|6359Agc z>`b7Or>~40y0QlcQyU&*E{l7%D!9rMIl z_3nMW1Mf||lkL~t=DT*Aukda#I*^aq-*Pk;Vma`d_+k$Hk-X68zzs37Ro*;hUZ!U;M>^zyG=^!+(wNME@FN3xp zfwn6vZZT=Qa{00?%UDaqgltg`xu1itO4jFs<)JOYZ9RE1B|9p9ZRN>aMjKjZQ&~fm ztTD>&xP?COOOPM4VSTdtVV(FrBah3InTap;kYJ}i_WvE_i43%aOS$@DbJ2nWc;`??)4$pobG@z zZ2|aaomhL;&+_?mx&O>F%J%&lxsp|B*qt1icQ@_Di7Uv&FYdgH{|j-$;3j?$-w1ZO z_;740(h*#-kKd-A%!FYkTlA#!I1D{G)njVZ@v8J1rAJpgsofq>iN00}LgD=2$fE zbvMiYfU?b_j9Ida&IkSyPrB)c^wW;k>G3%h)6HDOMJJhKQL%eYj>T$Vtzq6nj4?!C z;ASAd;$`#|WR6R)y(zz9Vi^zPEu@U{D<-~c##U_BMfVi%38D)TH)`fr#CEc{82hel zvhBQ6{MuC4Gs%)^>Khs6xirnQ^2Clv3y$ryOAJi29n zf(Jnx;;rB4v0~d3%j6T&Y5FFvMW>1skPpl_J|Ec4Rz9%K&&da-xHH8DG@++yUvVpS zE+@A1xZGlPTg%v|44GRDdRM%F^tpE6|DHw$9trWeFA108d- zk~-#2sV{lv%vk%}bKcL7o!yfkJL`Y)V`sjTADhyhADjGIek|(``LVvK!z1raNs4r@ z_B^wv!Fc9vU(MX_6FVE;5ug4r{(37{74iD`Qr?CRDzSyAf6hECK|T!FH}>|)rneCJ zbvbhEGUV9;%No|Z>BtHkpb+QP1|cNM=ClRK0A zw$ha%?5m;U27hvJFuoA|F9Vox0Z zknCmsz{@$Hd-FD0IZ->k{$MnjJrDMV4B+%qAKnd*;=H&Z8h z$2dBWr?L%1unCZ(L-H~=?|`SO2YAsXbI1jslkGERx?xworhpG$6ZPe+9!w256KB_;^qm)$NWoQ4`F)_ z2Ag*l4{JALH_E8r1)XHDk8a0yo3Y4SRl~Oxf1lc_{Bj4GZ^b2Puib@TEsrwId|PEw zT9;79PmY&%%81_2B^IX%{HOxg#Jp&8u$%N&&Rj67D)7B`M=Y~YOTWfc{(-Lbz~ zMDzQEc%k=#XjXkjmo| zLyweB=fH5c3x*v0xFw5d8+?xf$IomYb6oy2N2h$|6l1MoR}@1j8JSmQo!uIU-@>-1 zoGW?HaIZPv6|AeF^flrs+AehIYn{O7*4Mbd?dW^-wR@Pqzno&_#eN-I@GkQ9m*R(h zlRj4RDM#)Vp|5#PsIR5l`WiW7pgZYn=k2rfwL5@eaDDA)*)u6?psy_((AOsRg;@_n z>uVFuoaZO2uPLARQGG4l)Ymk3Zr!XRL4R}WW{K}kq?-i?(aoG^n&V@2vvgB8bKXlg z3!XqX3l8XJ>6LEXY}6Oh&C*TXEO64gnH7IKm~Lj$%pkhih3IDfA#}6zd*ka=uYZ?y zmkDtY<{c-?!bGyZ#|`vw5t! z5}&1;DPKpTZk7yO(#@2&=U&=5m;m1*8$Nfgp7Frvcfn@>pWlYhZ^QTRHhd@2&D8H; zy4exVH!H`>%$pdya+A9UFI`&Ncf1>xP-@x8W%`^4d`$`^Yy~y9I-^g3;ZRh?9@`VtOIUh1y=bt%|KQ)lcJ)QmW8NY>I0`v28w;hVl=-_vveI><~ zQ@7agnDGH;T^Ct{UYd*yB4){y#o8C$V$0>5ILoE=?&zk?!aX{!_>vP_qw?<98lCN9 zpR}`t_=%0Ku`YDs!GAe=(+>8W$Vf-m=>4CjoxMkX^>J_Y|N1!S?J{+aQu@0~ac06b zehjybCE3ngtuF*+O3aiovw?uih&3-hpBNcWrRNzKW)-^S3gXwBN}UDsL$hTYtbZln^m}zqL(hFNKbBXKJzI6e z59_!;D-kZymkBp#z_g#!*lUPm`5UoLUF0#+7$g_7GtuL*S>zMvlFi*bA#B2-k;G;B z$(3&y%d@eQ`LUDviNDSE8@W$VPwoB)J{jQb5tlgUrcVrGBg9W_kMjlgC+qm{=Q;3= z@lWLL4tssSW8SZ5;Y0jF>-=qEDU};lYdofTq5VJ^dOdTf{RTW{pzUVb*86nY)SO_0 zF>{M+-yQUf%jsp$oy)iO0wL*$S9z)wM>&JBUNsE*U&`EY2d-jT++`F;d9FQ=nyYT~ zYZDHemef8xZl~D?`y-RSu$3hCg%6UrubRcm^>>iiJhdS>>qZ+h_R+rfSK8lb4%aY$ z=*@{WfmDYjmB)!NT$M)*Zsk_dmrS&!~r9(^KNT8)N8OM(qSCLIiMa+Ag z^GyTvgzeFReK2~u*%=SCkO3`}F+Wja6pqH++M__EiHY!g~H z=~_4aS_3vMWS?S8Z-Y+l z^}9psS8@}3=cXw1DP7k+M}Fu-bS|GKGIU2h<=HpqWG5LjH;boJr=0JtOO04YnGw4( z+1PZi*J%4XZ941Q&_454_7%|iz+7Ro(_E!9S9xPCx$fZdWniw;yrdtM+YbD7$d1Xd{d0_>9q6DlvnO(8}6OE!Tk;7($d*?2VVAO(}im{AI*kuYYkqQ zz(<7#tu@!!%{vOrId$gm4JF@2vrF)1@0*p~M9gissE&u*`?sl;^C=ZVGyaBlWk=Ou} zXISN@s62fIOkQl^RxnCdKIO#w9K&m@da2Y?IW9`J^|}=Eo(xBC`RhnxIM6>#_>i%0 z4uUuFaq@|nc<@%_59pTvOL_JkJpMcDSGdvKeoih(2d^KpS8#q0mgmiZujRd?c0cFY z-ggye=h<=a8V@wXohd7^@4aN*n<7}>r0oH?&`BIT3Rhnju59`%BBrGj+9`*QK2K-R zy<>`%@e{*V0j*1CP#x(E2*Ap zT(6sOln*;Nad_~VjK|4&F1fCGaLW9OcAlWFGahu;8Hyuu-v2Z2%M*By^KLMlUk;CV zl{eR`(>8o(x;^F{8nfFk&Rm!_*D`x;UNi^4B>343Ee&Odbz&GaU$-z{_W;G+c} zTPJkC?GEgY@M_Mmu8P;>vFo33ZD?__M|{McvLJK!Hq8`&BcbQl0P0en;^vnB@{ zXQzwa#?s~+!Z~x5h_8+0dKwH5rp@xDlk+|IYM<=zxC7Y4FFHD&J??kdB*WOSb@sV~ zTvGB?r`Wz~*%{@p*1p($pL?{8o$fZ^3zAR5m-xbB_7Y~_Hxu8D+!r0Gewpt4)^BLS z!*A*O=KnH_MohaduvCDPzc-HgU(m^h@?#BW4fzx9t4+sNlI^mU9PejKX}yEo9J1w{ z$W}6xAM4k4o`tlViH(7N%=6jod&;nx2N=87Yh<^DtHd(-#IIU*b${!Zu+a*i%?HWt zZ`(_L4L@+`^LBeKG*8ZZ6R*r|KIcS)&!KF#^0DUz(A7T2$L_qBkNrEt!?90oo9!WN zwsEiJWB-Ic?&DL3y(#*4`Ph%zVPAazL_YSTWnZAIeC%rmY_^k5;A6M$`#o8kt?V90 zZMK$=y&ifwk&ivGjP#H~=0!gC#CIpM;}#EM$90|^Z^!-rkB{A?qe1Mr1ji5QW8eCA ze4Xkg`q)QYbi9v!5^KNCwf1YUlZtMogVfn}(mTmrmJkA2sL zUyL^`u;Kfnys5$V7ahlU$gAe?rd8d8^QNr-A+O8NxTEXZg4hCl>>r>00AL*<;xXCJF``&+Ts)uQjJ&O+>uL$y_jEpw>0%Fq=pe?4dS z-{cH9`g`>e{Dg0CHV_~CW%$@HPcMjt@v-OQV=u(VJ`W%JrTEw{!N-0vKK6O|*bDKo z7vN*h$H)HOdBY>`pL48_y^FDK#(ph+-1W!!^rzV~J&Mk+F&OyTcf*&RJW<`)Ab-kx z=?-T8F!uDBfvZ-yW#xC-OE`Mr->pMWJb?~&U1#Xj}Sq8oY2Eaxax{=iPtvPI{1gU+=sdOpm$;d8am9bxi$wzu)+1 z?S!$7PoRUCv2L;>;%n}(brZ?kr?DXnyaPX_(4c864{`6DY~|R=tJy<}|0iQ7*Kg%K z>qb9x?BqJq^%OWtsrgbx7yO)6?@yx z3}{9+-EPi6{j!!khQ#d(HV^jBR{qzZYc2s#%4Z<|c^STY`9zd^O?C5$^OMY%PF~|W z+gr&PMW50Euzf7j{41AgAWk0DWk>Ua3dhpd-sKEXVtI0|E|G8jJIqV`9%2!t|Jr-7 z{)9bPsZBRgTMv&i&*Ww@_qM|MP&)Wif|GXrL(ohy?e^2wz6?VNMve66{rOdIBL zd#TTlWpUP;Xw$t1yPER>4$YlQzq+Tx*@OLQwzUU4i)V>`7yr}1Pb@|!^L--wMZWZ3 z+kUa1^&$I+{7p{3N3_qd=S}x*IebDmJZQf&X_Xj|6Zv&?M%1BK?g-#6K+dU%CZUO; z>?_ad?j`?j7B72R7p1nYx!8`pse-vZ->0)4YS& zeQ$v?wwHn5`R%1G<&ylm*1a=c)=n2|N3<+I!7}L1JpTgB+AkQU&%)aG{l<+a>XkvO zjy*$rB-vnOgL8aHS?JqN-%q>nXyW8heDlkcb^J!s&7HDNo9ozrCeFL&q|t86^cArO zGJre7hX1%Wc3b+o)?3hCZ)QEpZv=j){CzGwIsMmBcd&bH1;anM%HQgSr;T>uS#JB= z&AcC2OTsC0CwSX|dn0W|rURpw zdiw30$I*Sjn*!WBdBuNV$Kwv;-?`dXpTc7qcohA%^Ui_eztA@YTNk!thxWCeRql7ZQ@Q)N6LqjMgN-@j>Nf-X zLl$)A^!23s-4SbUoOc^+nBaj&ZL+{z>(V10bHVMv`qS=X?{+$T`@dj&_h5tfVvF}- zUnOpGCBElED|XWH*;l(^flivWeppAf#3{;$VBEyna_(^UOP@i%uE9nUz}K6Hy}eX9 z_pjw#WDmAJ&c%yQ7iVo-kDqpDvhnoC$u(UcbKbu<%YFwBU!Rw>^5atCE!8gZ6UVfh zkx%>=`KSl-7^M>HL7SpU`DJ7)oWLiYPrsi#gIA7ilpnM1lzI0C$U9Phiji9%B>u&F zte+O&Q|I~+G(_DyGZq<1%M@E2~TDxA(=hPFutU{;y5$oIFcQNo? zJ)Cnz#C5;M9hdI0Ir$nt;Xe0WFQd0&AMHh62EetcJI(b}`B}FY(N511lL*?K&L?7=UQ}|w_a};@P=q>1(?_U^SCm1;QFkSw6)%y$YmFwQa zoY#E|-xK?vRQ4`gzvZdwB#yBjy!un?Ce(pf?m@#Ab^2F?%X;uD9L<78RaU%Qc=EG{ zvf{u~%zLF2v#Po2G261{Mls^`w4pgt8)wr-Ca@&ljrGa5^YI(KZ04+d9oU;~eO|eQ z+NrY-*i~QulgV>0T$^{k7Jh!baElqIYsLAhzUJ#c*e9F3Q9e#+Jj^_2oQ_SBZ*yGm ztqj&_7xA&m;UYR1#n_y8f!vJK^7gXESsM#o&sA3T?u?Dc_V*5}g1JQ>GVhIopE>n~ zzuWA#O&-p>EcT*K{cpQ*n{N5X6wB%0PP}m=dXIF`W#BH4v#}odg{jjGkptKq#}GQ7 z=F!Yq%>2P$mgKSTm7XYgu7z%NN3SFA9Jw$4GL+1d4P_p60*{hQ9luNj`e`K=cc_@! zB5=nV{_oaqw*=$!6R_^ zX~QX#NtyCN_OH&Er0f2p%4n}c-4^oBq8$roDaieK$dp24 zO93(l|1NSx^3qStn4fz_cd|bH#EiAX;xlw-;$VKhfqRnz$Ux{;G0$fHUHFJYOSRk$ zGr%+LT(SEpH}DMmv1G#lJ)t9Jo@Fe-AH7(3&gMInn5WMzkIcCRn^MCf_BKi6`n?re zhX+qP-@9ll^m8NpGY`CE^8H94pL{gNrfKZ2w?fa_t50Kpt>0&|zph2LS$3t7ea+}O z4U|2XvQ4~SN?E`P|>&tc_`|{p4DBs%X!7H_KISimRN)UYY`Jp<8q@mwCt-?7`-JMy1fC z?3|ZDlMA59VrcT*5l89H%bbdaB+Fgvd5m%*us#NS`$O@y8-YWyJ!V|qL-@q77bdpj zz<<#86Par=X7g}lo%8+!doLhaaQLss@FUCj?8qI)D_;LC`gPO)WaO0Q$YAV=>t!(d zpA3zPMry!WF|jWWeLl&5haN>=@X5tyT+sdzvvg)#yuf)@w{OHO$0-yW7a-t{HNur72IdY?BAx8!>Az zIva{M9Xzjf&jYj~x#r{#hliVc#7w@O^6cFQ_Lxq&Zr2{M#x-x>W8N}{SR08s(oH@s z+Y`1R^wSgCo}~XfwkP*KrqtE{H~B3aR(uA zq*$*V;9mK&L*!-crcVdw@&%i1SntSh9X4})JMX8^j`Q0K{F2X`wPxs7XR{?=uYx91 zM>l_*N^V=}1y1=CHw>&p{E17HKeirykNcN<+nHNujY>ar_U2CRceNwEKz42SyIYCP zabkyR;gebyy6gMkHA%>LVwG2tSMDWGqZvCVe6miz1YbRc_|eq%OCtU`;aJM(`H`e0 zoDuuUf=IxBMI=}gjwOG2QDoGd#gQ>@h9hI2CJ*ksaO@uTe7ij3%v{CB$Z!n(nEys! z7x3O~{!TUXS$>4v+2tACw~=|Y*2Sg3atZpz#pGJD@_kwKdi?W!R}PDOc&RUP3j2=t zW_csM=XoL@a7L>KUt1S<=5nuQ>p98VGZ^6E`1&Wh=iG^T(Y?E(!Czt@(0*zK^L8P6 z{HIf_oI1j{=xQ2tHDJf6@K*g^v_qVF9{PG{$~N*xtCua@i9A2k7+%uArajUmVJSd}^GjhnwjGKp5HS_JX7s5{GcpHRdk6I$Su4I8pfA&hpuU(J$?y`{{pm|~EazT1?U5Y%yahZAjFUB@ zb>Xk!J}TBo4LVB(YeDyn%@?dT9i_e55s8AYQNAPV4p+D5lEb~~z*FJK#$Rt(UvJ;9 zvWxpwv`2j%`ml1hjcc7d-D}>hau9pXeZ%@^IWQ~$hUek;1M8SIt}}&x`gF<^Q0AET zx>Jn3k6`NHw^J@jd9uV0wD-|{V|E$kyL8H&M_bAVE#2Ypr*Z7~sjaIou<{CvHqPPw zO!&XK*Cr++le#G`Sbk~yOZ@N)Cs$^(E6-&b^_{*Pei8Yf__|x@Q?}7e&HVvOfB1&* z$}`QMVqKj%bH_mWXm4PSpoMmHPx)x_S<~uA{Gt9>n2zCZtB!qln&!0G<^Sn(?O7an z?+4z2J}-x_0<&rVVU3%1rEB5W(8^zCB7?ALIOF{%!#ay?mI2ld~Kn@iCoOG{4#RYYp^ZFA^WGz%Hb6BW1-X;$1bfhDi}O>D+k@&{XNdjdNZLA? zGf#Dl-_(7}E-`(p(xJRb*n}8E)TL)~_VKv;C|?0jZvNU|G7p<%A$FaD0UoY1Mv=qu z8BuVqGe-0IE&CO1w~FtZcSL|=(TDb6`FxvxmPy4`oP6-}#w=JdHO2bwrG0W21`YDStKURhFY{~ip2wio8e-TTd#uVjG=gvV*|o2v zHoAFGmYas4y)b;+wBfvF`Ac8H-q*0K;+Xt^;wN3?;?qz+yBnPM^|tT8-KNu42l!Vxhc0g*mhr!Acm!XcYp*k~ z{-L4&bd_;%)U|7H-`1aK!=yRc<5RkleEOy}KZrhmj#kU!c_VVgWW3OWDDWXUA z$ceU~Prm)1rrmZN?`5l>MIYGfpB?|yxJI>GJj3Mk)19_%psfKuF1XQaWj}J(PbihS zsPa~|pr@=k93P!bZg3}WX0~r!PAB$n#Y@ZnY~s7kI=?cGb5DPB(cPGM9Czx`wevf4 zb;Zoc*7$k(v1bCG^t(4GEBIgHd$2u`@=~#6SBO4jv$D=m6rk(oqw89CHVw$Mbo<_3<~>5YdA2MKQ7=f`M$mZb4#}D7 zDx+#@*7gaN^l=k@a_Iw`f$J*d?a!%KJ&ikypw(*pEsDRb#z$c2`+VdUyui#MvD~&v zJZj0XHP9IhdD9wlNst>Sx8#7Qss?#1n3eY;%x})xR2h5+fFa`#%mHJ{;e9dr9+I=0 zcP4u*e^3W<*{rt+U4`Fy4*>_?qNSIn$JezVvh+7|E*WDOd96EzwC)G?&Gxw}Q})wW zCbD~mi9d@s{g5*+gW)off9s5&d`F*m#?Q$esT}OukDQF}$c#w>X9N31yWYw8jvU@F zf-|Mg??l<->_JQ$>Jyjb`Z>c`E;}&!tRf?f8R!$SWx;~BJaRIH)59w~im$*{k%4_f z=l}d3%2H-3`|av6;F$T(u6ilAGpT=8G^1v5c9k#JS)bqi6MSy{{3eKH|X9S#`I8z#X?8;q+k z*E}x`T;v#>p|e!7DG27ePo#HSeChhX6IRK>6y*wX$%R^*S8IGl$F^tMv)pqMU+vj3 zl%Lgo7l?Casp#)KtCGzgZLqT{Wx(s4x%HrbPIvN=!pm~R3kJC(KzFN2-rb|QB%Vs= z3*ShHry9w4b-wUq&NaNvdUc;^S%@8Dli+xfdp=%9R(S#q&RM--+&SG2Ui~)`rwtzw z&kEdboqc=4<@0#&E$eIooAX)j7^%VhgW%ruISk0~5bKjZ6uZ6`Ur#A~MEZ6%wtVez zy5JEGolHX(q#PMBQ*mZatX1`SR;<<2JUfy9d=k7~vH+g5Gu3GMIMvhfaUFX3LBH{X z*TfBu&cQpKnI~wZkJ1Odu@!R^0<-2(2HlP0dvodjpdWBKmgJWeuFK72<;N>2>YR<_-iQ}zPjAI#4u z-Bs}>iFZB{r+J(o&y?rj?f1Z?^Eu~F8_)-`ongWlN>5T`o2b�W z^Pz$L;DVf0rVP!3Mkn(bY>(`L{*=>HcDXQmozwr1>0j|1CmP@PU2r;kp)7c4s%tC{ zyUMI(Eb2qPEy1*!aVuw$_;`}|fh|XuQ^zIqO}Xd1OZ1;rzd3k};J{s~%wIeDkLqjQ z!tgc8)>8EWUQIoZ@oSD9yR_oT%ssUH)(L*M8ySP!{-Cxr7M&+fJZBkkjosb$VqjLi zydmBfYfQeu&!!sC@>=j_($3YB8nx$ha5I=b)WaMP>~9!nGvnV#-(R~28`d!V@iq?p z>^U>eCytwaX+8V@MA{O(V{LemQS17k569Q{c{nJij^v;Nqt`X&(cn_KN)qQ(xOd7Q z{PR(|NGxXvPea7qxbL7TGCbS;M+zVBdH&Iz6@PyI0Y7h2c_3cG(Tsw{XE8!FRulq-CL$3Erw(#Fb%I|mXe|Y#y-nI95jo7`X zaITSD$6LKAW{w!y3#ISqKlZ{U0qLUG1~y0zH?QiLv$5dCIiCDg+^tbNXY+0|2Y#c< zmI8-Ai@RI`MvT32jJ3Y$=Q-hsavNTbPWp=^Bc|VvdW_ge{;%QxW&BR&Q_gR4mc+IO zjhObIg2(SkOD+!@#IgUF4&2*FCc9@VfA4 zBWBePN7qCL&+~v6Xcs=SvzI=_FGKa=$bxy{wrb8@_&uxeL#3Gc$qLBXumHWQnzi4} zx}Hk?TabxO==Ej%H}Ckd$5ct)g-Q=)mCF~}H*KWUmHaF)>ekmWe;(?K2AnZO$xA0# z2Yc7eJNa-Nzg6Jt1u=h1LF_$zKfS(@tqs7TJx>`j<}&0SwsiBX&`^1bzM1SD|u#EfBW1K@8!paNAqJzf60#xdpkeo>mr}SAK{h1xASuzcXr6~d;L?A?n*z} z#{y`$T4zCsp^zWDnE#5;$j1&~(ah1Z@zuI>Y8W=$Bw~JsW1kp-or3$ORtAVG<^E+q z=f1}n)190~UT_qI---8wKXhNth%=X$mxtTFw|rUue^rFrc6mls&L=NnB{AN`! zU%0|!oTjrSl;0_xPMcLitwDZHS6YJr# zs|wa&9~pyZyjfO{mO+v{x}Ecx>HHKLit9zSBE3IcPR_F zR95?!BP-`aM@7)nrO?$S(AUNI2z@_CKa;~{zG;{5c+W5sSRMOYM{KzWnNp}uR75APJR2w*R<)m?#MP+ z2i7y;+mmdTf6FRAP;Pm7zoWZ!fg7_e`p~zB{&iN$c&p6pW3asc4u2mP4!3m@x0}ZL z$w*JD458m{8y9{ofL#J!@tESKmq8E3qtY4{kT1T%6IwIBHlx41z-Zg%^^BRpU13|j zX-Dr0J4JbL(^~ua5EoGCgU`ghseRJ*D`-b|g|&Fo%)7$A$2{%zpU0;4Z_aDBJ6im0QXEYcwiu zEUEXjf~@bLB_K@mrv8r-Rd<~9}#>) zW%Qx;FQWY}{+IsJZU-BtBhS=cDSbQt)t`H84o@Ab4~;{8pzo|@-e%U=eRzqX?{>+} zF#Xgd^kX>VO1H)}@Sk|g!N!@f$%r{+pnns`rQp{MyC0sgk$AOLe2S3e8OYl#=&1FylnyVY?I6T zXoK?%X3o$cWyqVEjI6AT&XVoK->Nxtz9l>Lf3S6L?uVfxKkK~(pN*-LE(&ar-V;T~ z-G%@Zjd9 zMkD)1_Hn7LWvrtE@PIh)v#_sfpLrMcS$mC5#8i|rHrd=?pqxePh4A45_;5aa*jlH9 z!SC>`%wgeYvcZ#oH1sgccqY_b5UInqC!RZ%@fUlh_TSB2Qx#8oBX_edTA(}e6Wu=_ zC1$+^SY+E3tOn~;-@|~VX4Ok`YFGVsPT8sl=ajBmJ*Q^XfAat3IjdIvZcg2ra9jBr zqiyM`@ACd(-aj;_8hq7&uX_r-jZduS9vVLIjY{QyF8`_SY)Gx#&F|{zaV|e7F7lCp>rBgC8uZr%raj-_XnZ?bIvv1RJvWcs-u&-=pl^z?InF zJ*#|;PxLYtJ{pT+6U#ra#!{fM^fDIiCU(XmzImB57PF3(r;31A3+_9B_v8CbxbL#z z#yC2?mgl?e=eG%NyRHd0?<5O*d7lV(so(~N(%rtsT0X>*qA$$jtS)!(nf&H>T1rFC zI(R9=#`PlfXg;(1ti`{lJ?&sLr!^C5#*W2jK5Ira7&5|~^&Q(9dDz%e#8^x^AfDxi z=oREe0dM0fb)QiAIN@zvG6u<1fHb zPnjOZqO~y@Ihq9=uZ!-0<3;A7nde#~qPw&t>$&Dn&p!h;=~QvnwmE;)apq5a!%cS| zQT85iL3#487_33n>PYsNe-19pxwYZh$bRB&<~F|CU|(jP4-CO` zbMWn!kjuRjnvP0-)8=;0(pT0`YJ7y;?6+1doBc>&R6~s?<8{dw>YF;D(LJ#-8~rNE zKA;=j{%2d`>yX=frH8r8Il1yB8`SnO?>J8*+qK?>ID1opy*~S_&Iz6DdDV6XzBA`M zpwsVQwCV6e&wWND`(E~7ej`Sk)9bjCs&0jcy*zpInJ<0I=T;Ao44|)u*tgX4z6ra< z-J&z-1|GAiB}mRgeP0)j1mN?pP;Wjw-@L09o-ez?F7zVJ@f*}@hvvRz`a8$9z6}i> zGLv1Dgi$Wt4hw$-|2?y^)MB8*NX}2Rt-(;0yMDiQWHi2kCz+ z&uVDD99WfSAt2otKPC6d=MF`8gZbAD$xMB~VI4S>&8zfPY-@Jg)!;A1EMvs}$T;e3 zJl0)p;juLVk4@llD{E7Dd>+^&J5y}G@ngVzz?Q|5osyG}@~oUT0?Wy%;qf&N=lO12*PF@O~Wytd+^%>ZH9zL1!@XkEX@V**&%6Ctq{3>+BJkJO-M@o;+&PBkP zmXdV)ES(LL>@rSF;8OP0@=KQzm(Vj7{T02_+2d(1DEwq0!~PYXCcA(c_rP<-LYe=8 zP32zazg2!%-$u&+%6>1{_54rv|7K*LVh?5W-+#r{NUtx6yJLot+s4=W!K{lTALY%9 zL@zCjymL-L%lD8osFtyhWj^mZ!&uZb!8(g{*G%j8O*!NZVIMh`F;#m#eO=7! zD$08(f59Zq^VsDtm~Q<(Gdq7p8Rc11JM#>0Un%qWh+Td%eyhCfVpjQ) zeO>(ixn2HT$~W1#b)VmG>OKi>b$%nvJQ7T_X(TW;*k_DHBeJ!LM)X}f1HU2td8vba zu&`jLE`-USwN3_06j;p^RqxN6X(ITB^EV$IT6$7f+w!C5Zjli+a z1$BdC58oPRH}CztZ_*q>e~N)tjF9zyWS{Pz5Z$X?(SDTvdYGGD=BAIic@LZEI~(#N zdmax*c0*gE?DaKNuAmO^bPD6z1^u*xZ=EF@s;u@uR@srJ{!mHTUw|j!R_pc`;6}fn z*10XpI_I`zSKP{8ryaVlr%vk-wElDIS5yB*o7Tg8*HJz~`8<36-0QWK@+B&dtj@zm z*bd+92KUdm#nVm!ZyHlA_!2&?|KN|kyY@8$=enSKhmULp=V6=f<%c%^GY481iU;*) z7Q_PW;JbK5|vAnMs-Zww8 z@6w{kfh#YKM89zfJn-Vkp0V>H8^OtV#+R01EQ-U=gcIqU;vM=o2I|rA|J4ZLm7mz!TNK?qA9?M)qYg zUnk28CHu^?Y|Pim^1|J$Yw^M^*53y&jDQyokL>$hQsls^!y*T#48{|Wp40v0+rMoI zY>B5?Jnh)$nzxJ*Rvdg9bE-Ia-Idsm+!8<3-Q*?k(JpwV{wtszueeVJG8TMwLzR8S5I+koz+M6ftMHKIWjzrn9eQD=EzWzJR$&b**o}X+q zw(xDR2k?K_(=Q)k$13t&4>$JaqqnI}8g)eD)$EH5@kF~`kh52cOB;_KE*>@hMx&8> z&+VqH+Vt=FY9wQIMt=)?m~8I*D^=@%sqs)7ZdMvVi26z-oauFw4anu+~e_@KJh3# zN^8SSH`Fut!Lo}4Y#fS~<$pCAmwXZhe$k5H@PgkL&$8%6{}m_m6KsT<%S1o=CSbnX zYsBusk3LkNb>6_BzWh{t`R+ddi9ThMxB5(JtsbJ!b@X|c-KQT|H!b#W7^-i_2k_W^ z@pUah<0HRkxXx8zd$#nm_)AInD97MKIt`gJ7F%WtHqBIgl;gN_CCR>X#otP7+7J9= z3QZqii*G`||EBy{3bbQ{lN-BPZ$g|bjc@R-pHFmmI2Zdu zmEs~x$+Z*pakmuytUT=G@@M%gSZDa9?_y2lm2uw-XZHNb_!U-LeuZtLzCO#3FA#dJ z+9Ub^uHC?^xSns~yYq^lRl=JTS4f-9tlx?JkHfm=`>*6rHp`rD=P{&Aw{KGG<<9${{XUg*Ip{?9;(z=Re0e$k&S>9#zs~dd z`@H8M6HB;Dkv>ooa=QvWjT4OYhIFq%DocAGh}=boSsbrUio`E4VyE&vyU=Ky@cw;| zb&<1XGyN6$@_!wBDk;)6)rhHnH~89(Z#5s<;V!G(Xj6V}FM2`p>Bd;aBJ|Nu{&NlM zbyxJ{=kVnK`()aQHg)8d8~ro8riNob=KsBn^Ip!ens{QISvD>-&au1~&Q_vhns{KG zl6^W?QVPx$%Ot(BYjww*uC$uD(KN;l{Qe{NzRLc3Z0~vOF@U9r{_7a;EXL)Yr+wH= zt-I$^`|>}GFKOX@92^l(lN;UIo?FhjMvW`6trp-tbSiS5F?O-%7@={Ub4BRTMd6tC zsY}s6M0@i8^{m9+k``zP!XII>sC9kPdEnlztXn@`wCy$_QBwv0sKYriywE!;7{{6IDG3i zdn^u*na3RON0u~w&*-nljwb$nAj8rRp+QO!t0*u<5A7G!3J-*L? zCq22!l2PJYd*HiUx<6YNL5@bS@0oW)Gnd=ogZtCW`{o!>jzU%sXkO?7VLWn-KzttFyth^fe zaTRi83G(Dh^7*`Y@wo5Q9g@?DGG`n*kZk*(GVTM&{1wQB zw|ZB;8ikg6;Cng1QM@;t+lwzV1ieDHd&?NJXf)TJ=OSPFF=M{T9y4>&c8=FbX?e-F zV^Y4+_;0>3{gPGfJLg^Byn4*xUE_?Zm(YcqdB2DCCB0VbYs(bx+C#v)#kZ(GvU<^$ z5c8(>B_7gCj9&5=SYIu)6J>pA{sp_{D-3S^zGLTWh7G&=I|Tm?QdV&67hm&nKc~6Q z#7_AS%(melMwZcd%@laas*-+xO$PgbwAK#BV$`71Lu0~M7PxzX_Ed+n zVN;)9J!VV)xN!~pknPRz@qOfB`%}R9=(=6R(SVQ4zLl?bGY{kG58KG*?0EjGdFa(K zXpjBClHK4<=g}pL_E6r5qfPS~YsUloQ14IWy~fohIMUJ^4pDzOXIk>8^C`GbtoJr> zy#q~nsMp2sKF0MaYi}R=TVgwJ;S&PJs&-=kQ&ux}+7=H6=O!MSSesp(la4+Rw&V(H zN;0Yd8P)rla;O?(`|#Q7*|O)>tS>RRw;3MUJK1RLW30D4TF^g=GPikd=-*12cFL%1 zEwZ-GGkaDi^DLWDuQ$+8oE6TUT3#_*K64ND1kUf6JYMsleF}T;O$)#+`^6dZQD?H} zj(R(IHiKfVn^+*0Yb9j*P9`+gN=G!lI24s3ItSz{@+>_?Gd6}Ak!9vJ#< zo*ABkZ#02tF5$i8gkVbS_ddoeyUKVzqN(vCY?|_o?^9di7v+r8U-6y(3}2w{Mf#7> zPDuZewKdRD;M#Cxx`!C4$;5Lk|7;y|K7HHdFtKDt)l5&_1g!zHOs-X?i!#ml(ws8c zlo1cUiI!KHMX0RL6? z5vS~-_@>DZ99b7*4F`zX4SD?A^ZwD;+bJCp`a!>bHfwZRqc@2hFDtBa;u9LnOvWPq z(*nMPKXOh)a=}v=9lZ%0)ZjOsKgGh+Md08o)Z0dTp)qNVnMvViu=zd{BA$q|L$PdN z^KUd7$pskG7^3Kk@$>i%tkQ#DxQwx7ylCz}dZu0)>En48>(_sJ_%ZPT!8#52GzQ5* zt;_lNfdd}am*PEG$Ntf@M;oRt4S#-lGPpoTtYJ-SPcL7k8Ph@k?H`ak!}aVA=&R_Q zaOfh&Z27>`TNP6rTlU$yPrzNbZ(@#~>)C#ui6^?teaN%rK4beIA2@dIH+*f7xrcwh z&3Nu|&AWJ~_M~5O=3crk^D2H?#Ba@Mnlq>9?Uw*AIUb(PD*tlhWcDfVEU_a9Fme%47Mef)twL`RZ=Cq2*XNeAY+&}f`{ z67&2U`f=vD6gY;OXLDVkLy^B^b3+0?p0x2{t_x_PP~)a-`-kz(HIx;fl8wZY-(y=h z@Lu{Bq|Ya}}U-9nA$=4*z3KPQMF2lWOY|n{EBC&YOCS zE|f(-;$@Qe1N`#!_zdw$!K0Xjzw-Volsh)o&S=H9@T@=UtL9$cZ?JF59M#y|``NnE z+wwU>nLH2=Qcm4cHXDyW3O+T^czpB)>1xrE?p&C=ET-JuoYyU z%lX%H!jWanS*vec|FS#t`&;kiyE>d(UCo{e{ixD=Lw_Z*sdUv}=TxF&Rl-wBSJ%ub zKl`hZmOHQRFTVpjsgG}OM*p&pGiI~@N;!ysteTjNN0G&c@R=RJXV#Cd`_^aqudYDv z5&xBbwTJb&%bV2q{sd#mJLnkX@mu%0H+{SI{+2!1(y!izC+PiF-naO!J)VBBH8^eV zW_|BL{%h^OgKqXNb^k`)BP*{0S4+UzmEdkMI9!B2|FuZZlq+rpx8xz>{Jd{$UyWoV zGD7y|On&$HJdM-QF(a$JJ7%Cy_xQYxv-p-y-E!r;>kl%P{U3TG(j^bQ;fctWS>tV= zi(NHVj*e1_j?#&I7mrb!HJRiI1+Hzx-?XoKanANL@SlC=jqEzZh#f>{>6v2bH4W6w z@J#G$$3}FJ^?jH+;t8d`_PJ`aoHcmpt`EWQE1s)gUs=+gD?P{m+=lhH0AD4~8;}RR=ZuI% zxrbykG|{qp++p#ujg0>{$elZfPiYj63*m>z)zdgXw)oVK53f5^h+MeTIMTIRdkXJy zWrkBG1-`y{_0?MrHJGt&efz%h#QOf>#*Q{V6Oc2CmokuE#)G*FdwPFTZDk zbp0LCLSziTJ@wU**FQYG?mGD0+TliH=4bKg!~vQ2d>pv4AaV$q)J540BdxESHuNq| zJY5gGj{Fp-C6?)+zj2hA$#@dqbyG+BsNkso;J#PM3DaP~5oq9iN@Npo=&Vv=y)gTh z8?wA>o2i#!U@PYTb^Jf*>7QBWyP?01b@liZ?*FMq|JWO!Sqg9aIMciKyYQyN&{YNR z@8o?s@5|qh&(!>Xiry+4pWgQ%e-1(i^{#fZ-?Q53qMaLPXD|EpmsnFTjMKV1x~|@4 zP4x1vjeXF5e)qFZWbg6ABg}tJ~-J!KXvd8P!H^#uOuW{A#0dC7%pmntA4G zCiOLj4*J%t65)2*dB|>O zGd9p&_@7>VxM}^S2Q1%`>fgz;cILj$+n!s84q5Zq4gL30$COcn@UwjSIuXB;&u$fM z#C^k%3jyRp5T2F{ZyO1Z8wIaB1-Wo4`TKmw$p!x-;r_t&?Ius~KUcF}@h9R5^Wh0O zyqn59@rB~&YS+uh>+pr>IUaKA8IfqAr!fT{xF4IW_+a_edDlnJGmbd?p{o#kF=Z0@ zMABaLPjH$_-2>85)8H3Aoee>6Pv9w^d~{g%?d&}C4bB5h#oo+iLr}gzqr_qf^BrCRzMKIx)Y+FJAfJ@VcJ>+v_&JP<#k+GK;mgX}=QN zlj3GhOXmLtJQpulTicv9Z?PNVKv z<{0_!GiCpOyJi1V8OA0{?)qBgSCW1$eL%V%zTUlMtcxPndJFcuqA8q}VU5pcjgu27 z*1W1_P9I~hF?EFrt=~rO8WR=QctgL+*Psj3p$oLwWt$$zi!`k|qu*?gm{NCpZwR+j zjJ+dIC$0*9-i54p`cPBPhZ~kd5riT)k>eNwqIhg6{)cP@`nK^6s1MkCUwQ zvnTMQZ?N}aoL%szY}&~LHq{LQUx;?FGeq*9@V{W z?vVi*-7p}df3p6OV`TJjWOP#G;FYYSYizj-u2_RV7)(Z=&3cl3H;TVXzL;+CnYiXo zr;KFuCqECjeTr?o+Bc!^-V4^Rzmqk0HJ?=0oMdt)YiyU|C$;wU4J}2l)H+MG)*O2Q z{nna$pF0yb7aWz*+S_QZlpjhq3;L)$_y)PeSx3mRB|JavlPSn<+Q=}*_hmt^T8GNR zq;h(vvd;T3YxbcG)*bz47_5K(*YZE+?VlNiUZc?KzR8yTD0`p9_tSa*L-@X69+2lI z%+}g&2WG+PhNBz%(BG(|`QQIxI1-aQgihS;J95lJ`wk5{a;yY9RS9;gL>lzG7hfWO z*ooGJx%OCFbxVyMT7UJdzX&>G^Qu+&a;)>pgYg>0LLFJT1YU6^JYzAuV-Y;$Yw(#X z;4=$vg}0b|<`DXMI(E(ruzgjK5%vJRujXz`mabb=^4fx{=_ z58%rjj=oJBPCMP`1hT38J$-M)E~QwPlkG=vs?N`-^VAve<<*~mE_M&Rh&|Z=UzQ*F ztn={U5j%H)c)}Qaqfgiy$q#T49xb2wyX>z@kaf?%oAUU+mGAY_l>gMi_b_nNOpYAs z71zPX^RcNCmzz@JOYi%XeL@gC=J|}h+5i0G^<$6ZKmM0-V;eewTX;SMubg<~;>Qm# zZ{K0AF+wwslbFZxtsf9m>}Otje6!Fy0}bfQRhu~r_vvZ%>;Lt$_~t|K9_d{0=e;#& zi1@&*SDjWzD~R9gZUu#Zerh`y&Klc$M`Pvq5on0EXsY=3#_tT z<`KKK%(rI!)bqoUX@PL;OI7*(9{|Vuz%lLqaBd!c%Df~ab{l-lCD~fuS9(6Q(uttpTWKk^EbJ-SkLqE7s(cj zPs!HDbq;1?>w)j*N8V=s-(ilNy)tL9*X`%sr^v8_;Qhya@y*+T-`ZnOYTf+};$Gpw zZ?m6}f8t$mt3CD{>Kz8}nm@%PeL}mc|9DS)^S@GGG&+sB(b?)Q=vw|B*_9*smtNts zz+;_l3**0CFpA&yxJ$qbafLBrwB!qGoX;l@{N}S3w@^=dTsd=B?mMIZ?i-CA<@u_l#X_ z>~QGz9O!nr{$mGkvHQ8x?#JQz51#H_+YVpugfBZhUuXJsw~x-hG*fRSW#khuZS;!k zaE*V>-#vbOt@ij$_*vik=qnXCwT}}%>foI}p3MFd---5drOS)^z2nV&oYr|JGS}WUi-Mtgg%4|)yp?=(LSfs)z>ci z+C^VlGfrRH#|i&W3@4r)TxicE{{s6ZyG&Z&lk}^7Q@Oow%AsxfF`Th8lWj(IJPu7H- zX+q9yr)&eb6kpiRm>SS~YtWH9&t@;gdK3&%)?+8U;T6UiMTf)2d5q348^npuyWh#& z?T1FC7bVhe26mnd>^#iXj(+&J@Yci_mH)8rlpC8;!)1ZFZiW#rw_zqcP2l?MZ9>4`7OV^M8F0 zXGE2^N$n^;O=seYnU}cN$aUH|a3sFzdGKuZ4^K7whv(k&Rgcv_z8c}o-Tu#<_9qXE zl%glO+y9E)e#>@pCjKs*+xk*CcUgfax8f2{?p;@tM`o!fxB6RrUQFt*esOqzbpfAy z6c;!6S-A*(#eKdV`>zPHA4_J>?QNXOS@4{i@bYDxH_!2ZeO44)zm1-R{m{huRHGUF z_Xmqhkh2Y*h6$J5ed=iqp5&aVHOBJF`$u0n)$hrjGAT?>jx+A)v13{43ZVv*A5QRxUsXEhfi_ z&(t@&E(phFX^!+>ezkMLu?zU0SWa<56~HZ+73ZjZn_`6&?^py4wAgu=zdDw<6mal6 z)^n8a6p!-SjBTCGbB#O2lhV-6yTo}4F;CgdQ#SL|IsEIhv>t@_v&zG5XLC;6$$wE~ z_)Yvr!G9e5cXHR0AN)sM_>Y2rd{?rIE@mhU@%Hv0B^c=kHK7yrzE`!$s5=l}Q&z017)PM#m) z`;hs5^zGO34udFVGjCtav%fv~@xt>TjxBuo!9xold~n~w(GT}8{HF&GE-ZL(|3c%z z0}K86#@=pxP{xBFE!_CvUY>O?%zyCRg{vO?+rq2|cbor>Eqib98{hh=a$|L|cDsj- zpV`Z}LLc4#n8xkV=XC!E);W=l$;K z(dT{ibmn=Z1>e(|xBqR@l!Y%pJ^JvhnPzTHqGT!doIsqE}Lc;RmG`m)>qndIX_nY>R@X#k*8+EQ_qE`j)bTF zVuZz0BTp5Co+%9H=FADV6??qN3q8JM#eO7rG)-ti#=VD(`-Nv@@D<`ZrZM*Sk#+CE zYu|_0I`Zw^3{T|kG)uGQaixJ?_pe{}b`{XA$>K%)0i(ccUk4 zB^SaTbmA=LdK~>p@0b3^Igc@UMrx&(carra$ zCui8QZcpRX0J=VLp_ADQEqLhC{*h(aYtb31xmQf_W&~=D5j?xmng1bZtd-45c04`% z3$T?gJ%hM|qvvv*^A+(z*>-%`%7$Sx!_IomaP$}MLt07RGoM#Jm1iA$JARDVI0OB3 zDd*e4E%_O$+EcB24HYvr!8{iG}q2k@Zx zsujvtv&}ocA?IdK?)W_(olmMV*qh2;O?*~E%0uC{)G^_huu)7F}~Y{GkvwzvlR2$TRih(J8s6FEnMbf=gwq(V&C56H^yzxNHU&l*M0|j zH0OjI+6#yiWWLNe!4PqQIm9%X@4iX?u`=$zB(C!z;8q>YSK!|Za=9aMZ1N)M^hTBZ z?>W9n$=nCf5VYI#+wJ)WX|L#m`1GmdP!Rnje-NLcHvLzH+bT=Yxjn&TKlD^y0zG|G zWhTmJleXKZSU2eII?lBCxC8XV_zc~>=sZuhpJ(rn&yXEd`g?%2m~w4oLsP*STk2PR zz2BipU_?1UAt^c$tWvNRp@4X0_3jBq2ChZUl#GGCJul>XSi9<+=GJ+lRa|@ zx>=NGJ=i>ES7gtQj==B1JDnNQ`ANkMsK3|PJBFU)%mQ~X_fi(!Xh->{!l08?kp{my#G!}(`Z#M8{oQgem+!GVreX}-}=(*-b?aoht0iLk~IQDJ? zXNq?+eXa1lDU6|DJn{&5KjQgja?s6A7*_>ndfnw`+U0fjvx~km2p0~+U(Qaa9rotf zJBUd<8bdF?&5@Nw@XSl$otMBvFNT-S!|zbYov~Jazn!!x-Qn=Ft_DIfO+`zoMC!6sfRd$atF&B)u0 zlecWx^uU%4Ipf1^%Q$zS_wrMR)9jed^ssf8E@RGA95;LA9Mn=E^V^R+^)IjJS1hUKQ+U+;YF>m-%~=og<0s!>z?Pdaa2f#S z$lmz$6xQ)wd&#ddS!dl?SIk@8Qh4!4@lATSmTz(dEH>Y#lCS+ED>hwwDD#dw&Slko zi@4Aa$-zeZ@NP2)!b{xI2LCaC>n_|d|BWQ@N33h#sYct^=6$w~ygF57?^AynV*s9> zQ)Nd8R0`$}{CygOVwn8kZL61i@4Qb;*yZ(CzP_q%!W{blCOC4B`M1K^jo2=A?veR- z({CzgX}5YuR=#M@z4(*bPlNVF15M;WYd2}&1!&*}lLpp611i@wk@<$#nYjKO8hF#B zf$;-0pt%3e;gfjp+B@ve!->#-a`k`Qj7vGTLvZBR zQG9mt*<}A?{%q;PMN^FFIu|nmKJ&?BWBO>|8pB8Pqje-aIPsZm9_8R^CdiC;X_7wbR7+yZ|{x;9Kwsco6nTzL#;I z&@%4#kT1jW7xv??E=YDYT;Y8seWnlb+;ke>~Q#J%x_fEgv&`v9|y!ifKZU|N^>o2z9 zk$lozY0fkU|N6i$H<)$ME%r0s63%m$QD!u_RZN0|%P4n27AD}bh-b;3i4B!DK38Qe zSn(8hJWNGTsBT1WW{s6h^H$aHU0p@Hl+S_Btije*EM4Mm?p&aJr~(@-=Sm$oh?BP3 z$mcyg=rZ_B6FEQ%nS+w~MwRRUk7T_v;R@~*`QOyN3wV^(wg3OVGhAjeKnS@9+FVe` z1hJ)ZOQp>u@d62=Qt)00a(bFDXluQ+h>$?E4Ft4NT9tTC0-l~`ro~o;%Cw#XNNb}g zik{Q{&aW2|P@51Ckc$%p=lA)(?@TfY0oy+P|9}7g^JJdPyzhSZyZ72_ul-$Xuf5h0 z;*_m56B8aHx0Cq(66DS>=8WFgh8$8mn)$DGXzx${_DPIeXEyT3;r4H&ulD$*1#jXk ziSV}Q8yl;ekPF$)G{1v)_l}W#0D8yh6ukOf#^@-RFUF(sb%-~`7uwI3PvIfP)eYUI zrTRjbgG~d_y2#A(=kkt-V~oFJ)zewk&P@N(+NZLJx6}>(3KbBOR%_%W+Lo78_JEVJ zXPA>-+6zWXTHtj1bKs?d+-Doh;pg3FBU8%f`$Egi(a4tK!Y<;=mhrAz`K|Rd9PO84 zK@8NdFtNek-J33RPK6Pu3W*ea_>ehYTi@*L6btQA}B>ST`v7U`18{0c?o*N>fE;lf18qVY!PgV9DPl`*hn81It*&}sHGEIGznAUSr1ISd$M z4v(?q*a2vu=Rb(bvcuzjq3<{|qVjCG-@qSC?+xbtEKZ({@Grvmt4E%VwdL8ajHo<= zpC!*q`OSWFfn?fZWZK#re1-M5oGZ^XR}!3kO@O!2}6Cv{J{PZWN)oUe)h_- zpA9I-GrnzMgjOCizAFC($VPwjhv5AfHJ6OVc<0r3|e^O$jzunxcSTq&9w+tJ` z;$!6XLe3R^#?F1SS8@m3vk$qmP`q!^IWzbKdY3Tu4CKKVd*s1z|4wY&pQXK`TTC8J zv*@2`#X#*#|I8lxZ-duj{1u~n?0NB(Xxt60<+nA^d$razIi3Ex+S;rI&XNA>YQLYg z`k(2)<6IeZ02!1=8zqCbgXesf4EmWRgSw(J=m9gUj|{qn-;zP;(Z0M4EzZ}Mhx_$K z{3zKYjHU0A!|KOT=F}ZV1QgY&IBc!w9?PGPu)|N5dQ5kbad#Lc?N#@+g&f&qS?Cl*! z7QYK!4&cji4BDuUBbVE_=>y_q+~7=$%9;`Wiagt<7##QW#+mT4hq+O0Ci-{f+Oo#O zJUCPo#ZFwau+y|CcFO6MHD%aN*Onk_t{Onrz)!D5u@kYNEbJ6yE@*vdquuTFMc7aN zb_d|gW6d)I{QYoaa=6qW#%royw$8B*r{}#WjbbwCBFfn^ zX?SpaifEJ8Ll4m+!t?#dql!6FS3T0Uu}0g_D~EpGSkMnoXj1!krthZ3T4D8Z6LiGI#GSlB8-G6z~q4z+-Ltn#`hdLxINTvMB4 z^}L3BSu=iQV<-E>6BxTokVEwoN1h{x)*OvYQ9jUCaF<0>u%0lMFi|r)UH<=IKbYlH zKC{WPJ?HV!nspqTKsen9$)EURYkZvS)9c*Q@L=AxpFDAxwf|k@{Q>J9XMVu#VQ}_w z{35j0#rRHb#s}i=V)Q)Ov^8e317@Eg*GE@m-b8qzW4tfaWVrm2*|uCvf+sS9HcfWSfM*=U^3L$T3~wzmlKPE%2D~&V7#sTlb3$i3;@hM8;(1ByPkh~y zk;g2&=CI_8^cnGmXg@O*I?wSH&PG;CPgr9Tr^C56>nE%=)vPP+$nBf4eHJ;B{Q0#F zU^%D13R!xPwl_Hw{A+60@ZAWlL8i*i?J?b+L)^ngNDOt4zXY8@`#rX-&#+|uxxC%C zkKd$^dZvFrHD`_?2M!<;Bqz`C-gl8lkxwG&Dt`?5eE=JxuASzZ`YBqlzm>F)w(P*> zwdCTTH?Cv-ypuE9>*o7HFPURJ>%f%cO`Wn98a&N!$@7!W5kX>+S#mcIxqI%or1p%- z6}0#4vNB}pEM)3TWb4()*sIV{uOyy?Z43J}eD^+MB>l8)tQl~vEH0cmhIhY!FTw;P z^gqbn|M3xLf&M6OisY{5spk4i)Q*OzQ`GM>J8?-6o=i?{JRR{up1b_(nYyu z$x}~Mo=&%Q(E@B(3$T@Kgob5cH0hbPtkfAn$w~X{;#T5OSUMAWW{tIP)K^BTzb~R^ zNuR|`_zL^=pC!mv?x~DR4FYCcqnf`X{L*vvY z#{E!qOzM!aRy%Z!agk1Y02!&+wW3W-W_)Dvd4_)zZ4__VG}YP6UYb4+kHlyidrrKe zbEmQu*!b(*c1aJ!ztG>hsxhnDG2DOWs&!f5MoSM2O0FPtzDQeQdZ6~da_EOJSW%B2 z_);G|@E$X{4+i@RzcpunAMMAg3-qHV+7Hbcy;s;v-{n87K4>1WMubi|eW5Q?r{-Sw zMdVG@{9-P7$GRJp+g5SEExV}OxL$VAGWJ0h;Lp-Vf6H0Fu3_vCv3I?ewdxY`JUqfW zx-Ys`ZHAAneGhm=KA!vF3zPp#;D;sH+Whc?-#x6}4?k4H57PCu52Cp)e$@Pzt{}Zm zZGRuE7I_?5G1drKGT@Cl-pF{nvlqtL{1~BJJS{X%v#RYmZmrMUm#yOby4bn%UNjc1 zUu52wjUCfCfw8M*?zhvHWkZc5>0k2i@H4hQIThIw%YP;uQgA9Vd2w`aX(w}Cds{2{ zuerX4x!ZrPH#66z7hSTHxo#%=W45VFn6rnWZ4>Gf-M4f<}%#nuPTx`g-0 zhhru7*J|WUG5`DOabtS%6R}Im(c>nZ#+GtrX<--ZUy1`61D{tj&m^Bu!}~T~$p}hb z++gW)>6UFLUe-vLlRi!C+qrrsHg3OT{^7&v*Zk`{W*L@jzA8FqdM@^i#!F)(86x|m zn|{ic6#u?r=pJ!6u_e_aLx%a6tXkV6L)JuPh~$TCYU~GvCR4w3xp+BZSh}1oM}893 z<<2KZ^!zWPeYxX2eMye?MSQFBF*%}nq<$Qy9mvMeadbK6KX@WD_+r%Fa0l(|m9w%H z^~%|sGL7{fXshy5tu~M&gn1)++RyaPfe-&0yV5QbozDc1IKU-N@W~)>%3yT9A?SR> z!$7{hm}l#G^u^j^)7l{HBss%ajmkvsDgV5#P2{8aFgmZ9gZ)|hURHIcwU3rR1zSEb zz4DnKW57`bUn?#omgnTl8Bc2;?Q6aehma>vVBNy@>|cD@nk?p)WuLywxh5-xdYc^S zUCs2j;NsFkoX)Oa!1}NU|Je-o>N|*+mA~>PJ2%%t)>7gfxoD4f=gr%+j%d%#de2IG zzb?nxQ|62|ZO;}4VSo687Y*+r@R9rja`CT_&%kc>$@Sg}#u^M8RG(I_2w6D6%iJ$Q zZZ8~V%+tEh86m&?PP9LwcOSm2*!uxg&NDD zvmYFx_2?(;GtXumUPnJFzuOmD%e8Bw&-u;a<@NaL&YsV8>X|*?2+5{YL=Kf4_8?!U zu5}B2q0QV|w-9U%-|D{Jr8VdfnD`{TdJ0`;o6FexAopW*=->Yw#Z&zMWFI&a6Yay3 z_{fyghXiyy@IP5(<|i^j^@ z_hUT8Z*}iSto>omi4+*lnWqoKpOdIhyjKUU%_#q!z>lK9;z7f|65bb0^Z5|lDg7We z68*M(tTq;hT&Ce^VlPoS`xbIvJ{{Uy6t9S%xLa6MC(z%MwP!Hv`*VLRTmJe(j_Yjlu|AYd8Xa({;^*qs6ywv+4nVY40eT zJ{_&C%Q|WrA;pKTM2=Z)#Ruyd+O4(@qn+z$=cRm?y;AK|xr5A^J=DLCcJ5#d-omz; zl;sQEO*@mWDlXhhJ4fDuJvqUCkBdFxSUdMc+j-rT#GZDJl&?#)ok=CPZnNrndBL_x zn#Z*B80}1Q*zFXZ)V6Z^T#Y?*7rdoCq4;YhtVId5eHd+C?EPtD8GY~;PuMNJS(p|a zek=!BRSr#v4>Y+Pnn<6nhNfM-s~nn~f+mVrpxmryzTsf3os2m#%WoVE53tTG!>^8U zm+xGM)97wZwAUN-*SAx7?|RPCtpBpB;TC*MZu!;lhF`GnDkh^(x=-?rFt%eUF@tVot|9xK_?fOwhks~S{?59rL8i0(;nL#5cAl3n;D3@I zweHc<@M_LmYc8A5J-wk08;_T`2o8L;^q%ii|H{&@hC7Il$5l4CPkV(nj7<&-?4HE-w55dnuJ*#jRmtY1}&7+u_VFXvJQwd1C&C zoOG0R5YzGNl&{=>d!hQj(7D0t!);vKrr7T2psnng>Ru|eiP2Hd-8{f^bGXL3ZsqF! zG2__=ZGOwOR`EOUXg$t(-6&lSqFakLihmn;Ap0w(>8@A(b19!nIs08H!9spxOSpR_ z>*ZtAqx`fduDuGrxDx)j0zN54CX}#`AGML}a&X3`*B&j~GyZ=)=WAPPKO46EgYY#4 z3s`bvCv%E(wTyv1&crER-yZ#*C!f}_*gs2%J%|s2Fs^)s;>)j%D?dU$u5smk%$N7+ zLmT5;$JwS>uCR8lwbb4o4)i+%wvqBNJ5bE-(?Hn^#eknMlQo?SvUbdwIuny65AGfO4iGNAy{Bcw)8>f#Wd!6__+D}cjo`#!%Tcq zzQLy(i#mTad&RXZQF`C8#U6=l$FaGmiOJOw~cg|o{Uqe>^1Kr5 zr~H-Mm!~x@tXde(%>i##fNiDAFeZ`}YiZjhjLSOc@GRx-s=6z@2i|0KIYvU(xlYjv94bGJ{oE_L&TQBPt*)R=O|&VOHeErRuB1)5^efjG>{%3T zQw962JIus#*}C(n<67FW0N!iH?`aWyu>?9Dh8MIyEPhDj9ea4k9{6D&{P02AhOE=( z2+wBtVLSZbqd(8m{^uCSeP)uy4|?XUNokFf93y(h??~ME9bx>A#EqZ$p_BDv5cPbm z%oox;-#=FKKE?7=cNtlJH|Km0BD-c$M*QNXP5ID7^Q9a*R_4Lu3C32=GN+3!ZQ;li z`DiI0oH1xvxq%Z=|0c9$ zeoi)TfCmjDq_eCO-;He1J1!m4ko67dra5UklFEyqt=8?lyz3r72cEZ?_Pwz`vgJA2 z{&VP*?MU)_dAHh`^X0Tgha+jXllsMr8apRrH^>@0Cu29rgALN+^PEd53(xL96Pcnm zTF*zXm2hn(G~J1xw>i%U$!|3G%fGPjMJ|0&d-UD_wuC(11^%7vn8*B#_C>Ufr)Bw{ zB3ov2re*Ef(#O{>&uF|8-gpFBzeztoPrHR9grOu`y8aN^($3rxALW2olj!H96(dJa z8W(C@b!BFw?unPIc{jax-nFN#%vxzMkMH1IxWi%b(pU%Q_n7CJbCNCAoSX62&pGi@ zl41J2^m8HY$mM;LpvQl6eV{mnv3wVi+M#Y0vOryXnQ9>el$cDEJZ*7BM8KJJniJ3o* zwvPR(SGexwTzunbbLTzyPIvi2vo20*G|U}eQr^IPH)q~Ce6Eu^y2y78-$fh_ zf8M3g(V1Q#U&HXY7aCox=hoigLj2;An{HX~RpV~usb6YlPS^oo7J-9m!1YBQY?kTR z?YS@edg4M~_bvEUsmYdy1-dWE`FhLEwZl(*;io}KkzDaTXJ!7*_iSQ_p{AJYn@(ztzsj6knelh@)vsdcd=h44EcW6 zPk4TD^x3i`W6F!Fn|m9HcO;swVNAQAZ`s78#v9G-ZC&uPzsic$uw^l}5Y=z(x6+2=^(rjiSow5Bgjec(I$Ml{}$d5 z4`?45Y`(P0gUo{le{ihMY8mgh&QovVd`|$LzWnBVVv*0^CZ1xxZqswJ|Cd3NAbRtx zsC`H@k*r&7_z3xJ|1Y9W*}PSD4Qq9*Y%yg&w)*Ak{^^S3#t*E0hY`UERZGIs72yeR zUP9NVqDoj+!R5Zk%HXNl@W@Z0v-T`3`ooh~Qr{ZhEk22jOPOMib6vc& z7`lpQPbvwnYMau;V7s!il3 zvgr_Gxt1}!gm$mx*(UnC5We^Gt|gSmcX9F(-s$I^!_oIQqYrCeSpKXB_Te+Ed4wK^ zF4-mj)SX;|KaxIzKXmUF_|$4Qb%|dzhi6k>Wtw3w2+b8&N zPpO&P<>4FzvPba?qyH_aD%A0L+A_j@n#N4neVo!$$*vr{=S;@-I@ z`p)+HQm^9fUg&)#jGyY)`4PRVnf0c9ysumGTK!i4MTZb|s(osw)(Wi|T07Ja)vx#H zed2i^b#`(eo7ZmnO~<$G$JC=|ML+pln!@Cd)s4KQY|(oOf)XF@gVO4<9U2W9gl1Dp#>kq6qt2l?IcMu0w`ub^})A+_>#hub`#yS?W z@k4$$BiCc+n?B&T;;Ny`_c`BG3=f@M_Em7*ZQ#3G!FjiU_in~saT9UXZX~AgP%F;G zQ#_AN^MZRU1v8Pmsl7UWeh&0BxdvX6E{;y`FJ}F6q2IG6Te(0gp}EeDVdo01=DX~= zijUz&7A^*37n$rWuxH{%-gSVY?R8&*hLOaUwEsGdj-Z<>`%c=;P<0-O*^jrOSMi_m5nWI68^5SR1d*Xw*IR zUwTPNKDsyh*aGynpQn94>-)yYs4in#{m(;}8BgC97(;t>nXAx8{$eoSkGP@1Mkb@~~^rPgHSI7WJoz;_Wn4=)_0ANwuc*J05@&zN5r z-dJLc+&vCj?1P4ruo;bm7UQACB*zfXB*)Mm-B-M@n7%EDj(IcZJJq%d@VjC!_!+}P z%z-7$0sr!0mK}K*?X~t7m=m(yTWk7?LFlE=HZHt!MC1Nw`-jq&S8@^?UtuiOj!mhn zvi2MBp{1AFV>ynooXlA6!|hCZsh)Y~^NEe?9Yc2C2QSMG@E3GiLV2 z=3#HK$L5ud?q}4j$||EDL2SwP9>idO6}qY$I?kldY{N;JTC0ru>PWis0b$fxtQjY+ zEkRL&g^d7w<{`zXJ zPu}YqEjyd)HKT{ejL0cJy_bQrmz z6r0!10fi5qYyv$o1zKuHho_yXcqgM(=le4(U$~$8IN!g>>~p?fdb{N>&GR}p?>b<~{q}%1k#SMsWLtSEA8_<kNN-Q2X(R$@NlAEV7-f=hG8d z|1Cf9jNsG+)^0H0?}_Q~JIbGAt%@YG{)34gn?{b1MYcWgJ${Ry4)C0Odi^=<*OYRN z`?76}%jb8I9k`(bK3@^!z}r;On7S;QS!R9x6`FfwIv-1WoJ>%=s$ zVkBHgTL+3yr~RF5!|bX-Mk|K3a;0q?QXD2eh?RppK8I-qyiiL$E5REJ9m=JU65R2K z?+kMN><{?eL|#)b%72VH#CMF^_0^65N`9cF&H4_IeNF2C=Ol)Svf!j_Txb4J& zYbFj{kT`G!7jvCBaNCIkx1Bg}L1Mr)6ALbf7;puJ#4=zHT5%t?L#tfc8GEN}KW^HR zM$8DwQ{pllYlAK?I@0PpzV)Z+b;z%d5C1TICh_28mu_?17Jim?>skwQtA%pL%3Hz# zt{tQAs#m#eD;Y1nOFSSQ#e|m1OQ>A7{rMMqS-AYu{H;$}>z)JoEZZ%lZ82 zKmE&x@|o6p_~d?bZ=)l1cV(pX@yz8}jYa9ku}*xRsjqbBozmqqAOymAUu>9GX@3@waY1~LG&{D%i++5BF zBl{Jb^*H;}!=OoJweK-+Qo~$#(sSQZ>#T<}y+xp(`Oi+|URyo)bgk}VN0d&) z{k_s>dhXNbtF8M($qVHRXs$29SLGw@BG{IKjgIs_V@)i9dH%R|{U_~`o^+6Y#@;Qu zV$-qcD*K=AL-U4%(RP`%Np$w|ULSf%e7k$bB3dSgGDd{9#kIYsOmw``tupkFv6any zCbWyu_lKg}cssY(POzfr%UOVK#bgkVs61`3#?GWoYL}Dxnq?Ew7%egy9**{rfT5Gy zp>e*EZsmk_!T+lr=58 zbg|d+_NrKsLm188N07B8cms-h_j;l(y1>tcBx#i zQ_RNHY~o88BNM8YPM1%84)|611H=O>;pa`vkyqxC?};;Bim9nH5i{v?5$)cHTvOTj zd3Gc1oGu<eA5cnwW_%Irq#r+tzHF&`jU;C4DiyZp^o(xtaFvDb1vqnp*YFJnXTS$ z6jrdtRkGIQQ-2QiwwC_f~!xyri=G?Xo8*EBoQSo1B4;<5!YbYf$(;-rF{m9Dls4BboN_ z-Y8b<69Yr?@$9nM(7hb`moW~rutm)z&W;^xHB8-+U5c~g=6VA6J(L?p`QeNcd17jv z4ad<#VtGRPj#*YXZHKeLWZubstF>hV{C(pygNcJH~^N z^Z0-Kif{{kAx2QB#p+M-=9(e&2Rm#XnBoL9KgGQ5U~XA)&KPU-wJsm=u{9^#nU62? zY5$774s~#?r8YC%aZ9n6{O#L1h}Yb~9$*JHgN}Q!b&*#~cCL;z{=*|3AC`nW;FmLh z_7z4}I07BPQr1KE48L_nxbrye`ZD-59ed#lXCMNeX=98#7~_r!9Cu#VZ!>&Y>@>pmj^hAjW^kmvZ18x10R~_-OmT z4k~2+7yfD$<^OD#ub}+>l%MG&AMU^Sx-&NpYN(=YjuXD(+3d4CvvE+No0u=g;Ihp* zW~37r-x3PNqbW_4sKB0ql*U-4|i~b>em_+ z`>pr{EwwjWIbB+uw`^;<#q@SegT9JqeGlW6WrSMbY3USdYq@xoHKS7F3$M1M@gH7o zfu~=Br(57@*U^X-1Kda1<;Yy|KDN_7{F-a=>m2y+8u)NF{8$cOmSH=ch5x6`vwD~O zqQVOtlyOoPy?Gga>EO4R-mG&Q!eQ-BGq7LlIC3R-7&c+}Wj?eJZnNZil!vah%CuTRSVa=RSRXM%P?Wy;%W{S@`F6&)4 zOE`-Xo=d*cG_9Mg(Y0p71N7_X(C7xnwD^GZ+$`2OTZXJ=-HyqSHf-CHA+lwhU>s#{ z9!MrMv%eiL6aL8igiS4(fDeTw6KXRpnb6|Q2umik)Mjk6WCC;Ek_n~!mrS_F74ASL z*mEDbKpsB6cX#Bmen>XlGc??BIlmO`hWV?qHOL1& zdm;Hyz3W{0@O|i2d7xK5q*(G{bbTuFVKnkV^-Dg)exHzhVDF>OVIXsF-d2ZvXmQ>Y zCJq(9Z`xLee5i}c2eq}7wo3k^=XH0KBHtw+?wJ{`Lq16U*C8Y7kP(t6HOP}`D$ibH zo#e^o{MUMM&(+~Ntrz1dul3@4#hWX%{!Q?83p-b`1{RIQ&IhK}8SML!G2*RN@Z)~? zQ9PzK>KMEy?4x@n@Lw(W4@B?F=5rBin6OXmektqOLzHK{Aj%jjvxs_-W#6UDJtK@H@d!B@a3kkGe(AQoqHpfjm>i6hNWdGNN$jg1+l_C9^l!=b%BXxgh1T`I$wrs1M{dM$i<{WQ z+ONv&u6`lmv+RY{j6oP zC0i1)=YTm{C0nlLe;)D6EZH(7EZNyA*^;d?&cHsf#}2SZ>lZ0I0X{~q$QNHZJx(HL z!eFE)`nCV*X#3BMdU``FHqlA=_BeB@F!_f){ZYJ$2FfZ|X$5w8<~Rmb6IoqxsBF=qKUx~wrPWXy`l z#X5ogxNg>q*tn=43!?pK>n0bA_GOVv!Xs4+z%Iy~O4`{`kAM7V@;L0Xv5SxXjG=w^ zlwy0J9i8;2YAiAzSyM^Q4`dc)!Q|L{ElhsE#^i}S({Y*f3CceMF2CohaOFM7CSv7O z(l<*s3CkjzD#5av->duOHny?Ob@2SP@cta;z%|T;*~p1<@VPA~M)ICdnqtkD!%7nz03?YaX#=8WO&^-G8!|Pr>o%w zowwcqFU(;4o5&|s#+;o3&sRupMd$23a(!#ghM2EUCvVsw8FU7jp97!f!p{qL)yxof zz7O7wjej%GKkam^|6&|J_q365FM^-%W6U-F9o=^Bl!MPIX)&||Sf9o^Tw>!p6e{qwsw|Ui{W^5#{Pz!m5TH&!B@Y_^q(l#1B zggV=Hy)wg~4K3J6V>o>wb!u*C?Fnr9B6*s@_}?0B>tM8@#kRF#Pc`=%rmuTGp<;o=1jDgdOl5e?lU*BVQ6l8@Qu*gq&*AS-8El)20A=*rem(F(b6ffH%)+_n>o+f z^2kl(TE-7Js@Jwe_1c!GUfUAYYg?jvZA(a{IVy>>fmi|q8x ztSv3p+A=7hc`Lp4%-oE|8hBLez(&@AW_a(HtOGHfPjYjQ!?FG;=8fi8ejG11Y2G-E z!e6ouNbeKxsr<{7uR#vfAqQf7-vqszkOMWyfjZ%bE_2Zg;#cCn7<>o!26 z$2HeGXqV_?=?cW&Imvp}j7}?ERalX++E#a~t*h2a-lMD5A@A#=^Dl>XG}Dg9CGXKw zH4j!x-lMD5!7q)py)G*2>zLn~1M4Z@L7O|7=c~mp*e&YBFT$sKD1 zC!y62&V|Kv*sPEHV!Hj{nnx*P#Obg}Dnp;GdZO6yyOhacucnQ0m(ACuGRTHl8DYca zl<^50K0_JGMNWeaM;S>v*C=dgVMMT@g%QDwHby-6d{^vjm-fmpb7=p3JGhWB!S`?q zw5m+NW{ds9Wf^T>GEkN)x@;Xvk--x#we7$Cn`aYrgK| z?7vpV#R9Ip1ODWi?j`d-g|)}JpGpi(_K}nWZ8Fys3)o~Yw31lBt(@g?;|~;z1zai& zgI{tHaXL5gOo8#Qa~HGEGwb44v#QvGFa{H286Gdc)Q$GvdJlsKq*t}U z5ADgN0p(Q?29eHn+?g2Mi(Y?(@0M+z^3eAB!^qsQBO^h4p%^bU#IRjWI~U_WGm|!Y zmdeL%jQ>D1hOLJ-6JMfRaf3=@F>JwHiW{RChF$JfSy%{5X^b^a8gK1k z6_I~<6MWJ;&RKoNc|KztKfWy{f(^&h6rUu}MyT&h*M}P_aoSgFTTgN!#)Ll#L)3>$W}`o8*2o%@ZH% z_6B~-A3)FlHT?oZ_w-BrujeF-^j+~r)UT7Qv7@L{F~DxNXq9ZmnCzuh-#LK~-+Eil z&q9{XMBlj@edj9loh#9IuD~whu3_#YOT4u1t_ zfatPrFHLrfkXMujhgI>P-`8073C65?eS}XA9yZ%I}5faVyp6^vpA#T(d&Q&FF{w>Yl zNtuG>TdkO530*pjGdg>nqYpmVCf3ZyAw}Pu}mbVE0c-8&HryZ>#`0} z2k}MHb=|9%1c%m%bj9-Y_V zQ9Uyc8^>2EFPe+?_;>oB!ADo)qV=$vIpt7{Nv^%Dc}AT&16>9dcnyCk(Oa~xf!5W; zjjUk(cSYky=5VhiPL6#H7W>O+qx+N38KEb?_{kFomR;%n3o^6abA|U9xbt1`<9p1f z_rX|_S0;26o&F8XZ_xvJAs+(CFCXu-`?On@mq}FWf^f6 zu7M{uGaeK1Q+`(Gd>ic8c>6lRFtquuajA_9IEyn2`dmw_oFkO|T-7(iM=6{A7&cU{ zAE7@HFplgLKhyX5obiYEu3esLl|4e)$YHLb&+LItZ}R(yIpzzI`-mw3-&h!nINS0w zSHJLqu=_P%F?>c_IRg^t!>m-sg5R@Ji9yD^+YM&XI0$P^fOnTHRLq&t9?n37CcHee zuv)PU4lria^BU)h65zcx`y3fd&kG1nZDDQ{oU7TN24u}PuPn=2I z37s`onaQk~TPS0V53v~H$46r#zjKYtLdIn~XX6wX@KvrA@qH&T!CqbITk0L0JgO+k zcX!+I$h?izrE=|je}i)JS+C*y67+$!oFnij8GAL~*K&Sf2mQLUYRR_k$Qkxw=53DS zqs`ECJN&Z~-+{H%xm)sOp|6|1ZLwm~aZhpTXI-4y=!Gw4I{-Az7s$BEM@T}ZJ_yNS=LIri8m z=6cPo1I{bG*8Bn!C!{ae5-p=}@c_Mm&Q`p0m{_2S=^%XaF7!c>NNIb#B+yR(`{VL6#AfH5=o3d-W8(4oVKCvl z&{Q}*-I*T6=%f8Rh`%mev$$WLiLvo<*^z0&=(Vx9?CjYHqdWN>!|00@mwk}0u--K= zK0g?ZmF$V;nLzGe5TA<>;Cw>e|`y#jmAjh@L*hii7;!L&dn}lY=zHdA1zoaIWW8@hF`&bU1Uq~JJSXr^yxA%#~ zo)vrwjN|5=ipAcGTSr?leHC+1w9zlh`A(@Uh4L)#hjmAD4q?tn=Z_Q=JcQ>cPjOEqv^uZJH$&3=y^AAomjz1$AA-$cy4Y$xyJnSh5q3 z<+8tT#GO9m2=N8-C7uu=dACk#G+tsEWvNS2_6+@T>?L?1GB20bzs)lcg!qo0<*3p zre7I#l)Z0b)`a&X)2tZwVAk1SRtrOcS&QEzX6yJVjqB{Tl&|A`^c(z|=9vvM~-fyIAp(!@L%)XuR1>;WSnlR)}oo8GH1_fVk7rum+A?i}OcChN__3Rz+58o@n zmwUjMN22(0PZVDst-5PlHZc%|FFBvq!-FHmTkxLn<-bwq9LD<1g}&}b!Iu{H7QO_x zZe~7gWy7vZiOzql8-`-f++pZjJKX2p^oBdE^N&|Imh9@U-R$9+(W({1(HRY+q zH!NgfOypvFBYqRfPOmWLd)Q9Ter&Gak6o#~(Z}^t?|axaB?Ha~M>3Ce{=~<751qLX zhP(!uJR8|uj*KotR?ouTf&XQ#?aTHg&wd()eAj8)bmiyKKC~}}bb)0DwCVQ4kOVBU zF(fu!VaSR+FeJEI7$u31)-Pd5$$DWHVaN{lPlO+5@cS_IkZt!}XKA1v-8FWW;5gXv zBQW@z;KyU&?W16A%O(b`gd3lBjOoFR71(yw-u}3;Pku7iQ{l#*{A5wvZlC;QVAAv1 zbxq{entu6w7K3r*`(Wq7#a76=s55oR$jTUwYeVkg_miT&$8d>gf{&hsC+~nKPheY? z{t&~IvNu|GUwkl5LWAFe36y6f*8l!ARo_<+ke@7O-|dedH7*(-(f`|V`0+yd$z05j zQr4RZ_x0MzE5VN*Gu_Tl20azaeHZig{}=hmWXn|?d*+!veyPEmqxs21-`H5)Y+*L? z+%W#VW2E@V@nbYHNIc(O+z*e&*P-zdUua)eHqJwgt71w|N7hg4vE7cg@{`FX=BoW+ zR)oBJr&n#rB9EGdMau@rPbPnRFT5q2?fJ0ix6SlEHrpHdEt_q3v|pRf)30vUcjYHj zey#ES`X*kU#78lA!?Kanr_-9F%zLd7y03fj*Ol)~`N__~s^{VRe-u`gy&S9>cgFQx ztm=EV$ik`%gjFXBs}5!!JLtbbtU_CENrsiM9^VSy(pju={Jxs}czxy zwk5(qVAWX{lUsuC1L4*?@@(8X?L4^E0d7^DHf~L~aBCMa>-*zY#a!|)?|c1R+?sw4 zZhgeUtz#_QD!h6Gth$f?mtfmHVvZ;{X{IJz%DJ#qKGr%9ri);s+yU-XSa&0S$o;WyJG73+y3^pnc#OIT`P?6)N# z{|MKJZMEg7=rjtf1&%rf{fbJ=yDPwF#%GD$nql0%h&3y_44US=#nGyv34@;WJ+PDg zqBi!PYdF_k^eu8jVmlNah5eiPF1nkP3G!|$ekQVSi7~AFDKoKrHh6R`=dFc_H*;q1 z4SqMFFRk6V&cex4sHYbvlhd~cC)@dd6?>^KPJSthlQS)x{L?5-j<@Y97MX0j1!jh2 z+l|M`vh5xkHzSIZGvjbFZS29x`flT7&P`o^2zwy=D|>~LW!t?FPOe~F&c(?u{arZO zz%Kgt;pD71oNV*&UyqZ=d@4@%5vP1JIC*$i3@4B2gOlgN2Nq7ohS7tQ(@pU3APXl? z?l#xQG+{sC8ZX4 z{sx?^G11uc$H^z-?5xT~AREC1)=k-#`rza!E&JeP=-CG+W5;Qa;b?To{<)b(e`+6{~Xq^?z1k=h!A8%LfV~6aar_M0+((_hm8RMh)XGKGe zUCa;R!dRAgEVb*wvWfm9va9m0^VwB(-VL1jcVSr!c9u`Y9m28$^e2X8$HnzYeD--h zC*m;d5iqQBTwaE3(tX_%hK*ephP{M)=U~{LcghE;mxli+3@a=u-SX3LtZ|c#V~fDC z6$Lhq-5-ZxC0E9QQ7?~T*pHZt=it_S8@I;WTmSFi*33`At(kulZhe`(E!kVohg(aC z0~&GgPRri(3E9OifgY`G45t8nXD$GRSSYpNqP;Z1PsRzBB)zpXv6A%8+@Vb!Ez(Z_??hcKa%_l*(0!D@vLxIHt$+L zYHd~rXUzBTIRchF0Y4vwCL-S){-HY9J5?DRn9&5d;!?wSkz$Zr-^CIjU!i=)5 z?lXs2w%i?-t6kJ@q#?0&NOX&ugy4R4-Z*A8M%Saw0#5f5>Vy?!5k*0Vpt zmb?94`b{jTczmmzT=Do;_SXLRwia3VNEBl#$BZ!MdUJRbW2RUblXYUs;R|BSlsJsZ zcWkle$CxP=#+)6+m>aRNmQnX&Vw4JF7Q8nw#$5PrJjP6k!HjVKF4rTzx;d_#_Y+@7u~~Ge!h>uc*2h}*;0{=NG7auh?VpXy%>L&oT*ap{y_8GdS?zISi?(v`u#+S8X!KZYqq z{|DLoI+&Mg=kjA8QU0~ngYBGt!j+;!ET>-=xjC0mkMJ z&K9b3JF-c{W6wT5raj|l`vYkF*q2;y`Ibb-EoQ^jnDzGq=>NW4*2yOP+3Qx;W;Hnz z{g2lEJ$?Y}OEYFMKY#~`WgYJ?tJv1Ez4pQ11N1|9H7BkwF|2w3e%1c4ejlKWjZMb{ zPch%KsaLqPFzPeAUv}-Dx@|uI(dZwANuzevxHv24+Er_uy>`_c%dXnj2S7fvl2fv) z-qzy-VB1n{KUv!cU9M6sUuFKji7bj7vwaYL#{N1nQoE0^ zzkZ}RwUNAKdn4?xYtQ;x_Se4w-ny3jo}VMf))D*%j^OurE$3R|?Tq4uJCXS@Ur5Oa zVO-(HH<4-A;=B6REaWj*^$1usLJr!E*gy50+Adq|@4&Y+!KM@W$W|*Kg>!AS=kZHm zpRTX1_V4#gSV-Q2>!C-yU&2Q0%JEpX*Pi+J@B6n$_x*#>egDPh+4mPF{oD8bd+nK@ zLAJ!>Zs);sardIm$h`TKdkFa>-N1vKvTUIExAyTBK+alz1K5+dQ$F4n{KFnwF#ZVG zc<;nF;&R4!E9Ktkhu3xeXI$5q1+XuFnyyr@xBJbxweFB zpXO_@|M5QN;65lSayK;z&aMW0u{UG^G9XhkCIFr5S z&IWpJ-rw^aGRXCAabYVw^JSjz|DKB!Gi`wD9cnAQB!4OQd&PyT@%zUwzrZy-q26WM zacw>A#f1}y-$%}mxxmK%PYbGa@wFiq|S1#DBlRZEWbxk1FZ~%M* zhAU7G$%T}M-`^g|yW}IZt)#IhEGW9f`#!Ry$k|;u~i=tU5E>iVK$_+cT*9G+0QozY~7S$A?L=RL_iWBKEbN58Ind zz5@K`M7xi%gF2wO4;xR_`;0O8ctH{4fqg-IqC7%d=_gp^ZQ}NXI`hb@M|~ZuR&Pi~ zj#n}!E;GebxxxrNu)nxa*j;f^v&dVeHgG2S?l3-eONqTwN&POxrsjQ$ClqUE|1ur$ zhV07HA#TYwM(J6V|Mr~A>)T3X<5pbF6wA*5pZzEBpP!uXBtG4jSQ9w|(F&&RC{Zl4 zwBQdO@rD2A_7bn%m$cw2`a+Bl&J())tzTHbA&ow?((akaX7TVro?#!F`yM~BmUNyp zsSJ9_FR>VUy*k2GKjlh;IO=v@(_#Eq`$QA$iTGKMF3`SsC%M#Duc&yT>Vc_YFKtsj zUe0H`SbO2EQT^jbo&I_;K1#1x=V{4zTks%!73)Xq5c+eK(Jjocx~rf=!C;-|NbztE zCDeti(|Z?kJtv3wh|baF4tV>I@Z_Jk*H+?aXwP$SJ+(ZA>!RI%bFY8>ZH(>05vk+_ zNeRBeeZA{d&b?PXFeQwyQTl1-%sX?u^%c~&kot(_-BaIA%HKyDE2zVcotP4QiEAD3 zS1UBj{HoyXq={ z*j$rETnJaqm+Cm{@5(T`JLqE_{dcwT8AM)M;-e*`U@t^I%;ZDtk5IxzalbF&_hkF` zEbb@Jz6D@97wu!>PDz+JGf;+%@$;Dl9jo{xOq?~qwX$=s<(6C5B=7XBTOZ_8-4JqE zaHf1sT5mg6f0p0l;(lMs@2S1Ne?eOU1v3MdyraHYe*y)w2Dnx>z_s#ouGPU4#0?JA z5wC|c=E%yvW0*Y>-R?mn03P0wKa%{c%A+udGg^F?P4^;~@3dsT^upXaWr@WqZ zQ|?u8ylXYF6v(HZ-KSpG0iKPo`i&jm%?}`<^?j787ktY;+ z#auH&#f@V!6l-aij~@AtZQ`@r|!(!Co7zW1u`%^CRK%er^f z!1sQodtV;--WJ`PI`F;Cx_9Zo_x@e?#tnS0N%xq)1JUzI-6MX=K=&GSZ^Xd&*67|4 zVnMR+8$U+v=nl2q>p#{j8S5tGNA_ooZyDI~8Pn@4(~QDAHt z^xeq!&`NZaZ&H`|;uvelIdc{aI0rtv(A-L~%x)P4X*-^X{dNX6CtQr#h0Ob@V*{mi_$de$V$` zFDiI%tj-10i9gm>;e*I7&Y1VZ8!?&GhD;LnXaQTdgN?;++2{*3>@$u)R!dhogx$t- z-NgFE7ngW1H51DBaK2(QIA0k4a?EdY~CT3PCwbSgSIH%Q>?%A$I>Mhvd%$ktG)2I zmG{09xk!IT>0CQLSxLst_l6VbQyFXj0@^u?az*^FK$prj+~xRBDdyl7+F}7m+2|5WFDx0;NvISpl9Fy**f@5 z^euj_^yl?htV6{`6tCY!UCh58UazA6Eq?{CH(R`(V#PFx(N;LsMcI=Wfds)}X8Ia+M;KV5O1kfG7X++ zMV7lhV~mnc*M=XZm%3g>PstVTGBYh*NcnP}WIX=De3xBaG^lcB35*C z%yj=k`sQh9pcubZ_|C2N7*DUdYCp?^K+#1 zQsq8K2_{pA#^1~5GnB9SWaZptaphv~a#Jp!GNRuO-t*t!1j&l{G1>wDs~wuF`ag^E zqUA#BQ`>ay2gpy2g&Vr*y#eaGk2zmC*Yw8hKc{Xi#^z(^l~I{6G&^6eDy^}Kv0~YP zz`)tX*lUQ(Bs?)Y+K+!Ers$A-TURr~D6aWZ_dfC_kI>&H>6_L-|1il%GTS!RM532E(>YHM$v_K4T>e z|M3Ur`l`HZ0|8>*^>0@*xVYuZ13!0|=PFlmzXTfxG^obbF;M*nC?{NAy}}(B=>8vc zA534pB4NP$@8~}IK=lgGfcJOn{y*#fumSJy(*3)1fB1m+f2I5P>;8xV@4vwPZxat& zI#BhB!~x3$DMOwH)&Sy~A&(@J0(m7~VV2;yE4;*%C}-Y!g5$1=`#v-7ds*D~*>T_J z^nBNx3y!<4&+n?}I~K>iV{zO&7LV(F$Kr9l?^ryp_Z^kgyLephd+gu6@6qonbj`pT zV%WkzL1L*=a;40 zo|Rl8$H;)Z=wqDfbRXTP(q1c*G+wLoioJ`iwZX=i>a*6={<)b>f8EY4=3-sQXI*f z69$%Sa~UfwJ4wZ=Ut{ke#_X!!@P8%$U*Z2M{{NQ$wKHNoy6>~ZIHDe5OgqnTYOoEw z)1E-%S%7dz2xY7W}?^WU2bAd zNI%f?X&NtLwz5}IU#a-Glph)`PcE}9mYv+Pli7WX?Z+q%{Q34{jz?p`i)WRiT)a6I zI!^sg_GRL+&v!?j$S^WIPl1WqLqWgEg!eKmo;Z9a@Nn&VjIAEdKlDhmqYpw-9W33i`HdT;E0lKr})2sa*ETOnQQEoZIiRVo55xD(`eWhjy$o|vHW?- zpb&9OBgmR8FxlzRM(Fqi<*qgw(#HzFseF3ymz2*a)pzCLObx!SXT}=cr_0fmDSLpA zO~16Di<|+%m~CUvrD3zhUm3yG@PX=mo_gDmb5qcJJ{IZc<_>9d{p0FL{2+Tjg{t`5CG5&Sr;gm0OzLjG% zqdfn?Az$&{!}@V^W!QD`&JEn3nm^XgbJ_}>-Od^-&uL}9Jg3yhJ{ohk$R5ipCX&~# z$XD3P*>LFbwscL6*>%WP+gDL}Oq8Dwe9yIt6${?E&^(Bg4&B>1Hto(c-$-Y_H3R<7 zg#WYP|IzUO7-HTIZK#Zo<3g-<`ZTv=yHPmCMQ)%9#YE5SGA!RM&U`Rm<}ETB=Ij|_C-SH)#r{+uhhB%HLV9wfdMEk4choiqwcbV%QyqmqS zEgd)dx@U22$3R}&x>Q}rLpoh2 z_yFCZJ3!7}cdk*WdoG?;9UmVzma1ImGlK)l@!AeeCBvQ6^XsWKbMLzUtd$2N!dSSQ zjI*wNzbi+(`4nRq_0l%Y{dUg8gVe-;PG9ypGMm>Y?Z$)E}XjW>=O~*8R30xpR;8XXZ$qJwkV$30rsffzV8wT z&s|?+8eNO@d*!zjpD-z5IxY6w#{5%kHe{Qu%tCl0PkBhsBqt(2NfpLkNwQQ%P)a`jJM{Gk9Rtt=PGEezTXgzY+b;3-$L2e$L#v{ z{m>wWv^}n;;>LBKV{qZV4USN2f<3OkUuBPLMs!>=4%p+`;mGJSu3wLi>%N4(<9Y+_ zXs@4T_3bOx7)z!(8d?vsKc2m_P5qeB zc+s*ox$z@mY_;B_2Q;(}Dc%zFpeok+jnj^3Lk*~g<-IhZO2^2P*GG}5^7}nP`Tl!p#GvT5eMkNeYi}MOb#?v!zuz+~GYLCcARq}uB?;~< zAqveTaY+JJ3s~H00<<+DT0W?)Qq%;}+JS&}1e-Sc{Ukv9X=W5F6;xXL4M>+DE(Ka` zKei=^{-`+U9MGszGG_VfAu{+P$iyx(^@_uO;OcF#T6*jNZ1!vChb zWvvbWecie*JaJ0%I9vB^hDKxdgg0(&XpaOs-6}o7hIw?r2~4NyPq5zpX=KWu!RP*K zM61bxY|YU%+!uBxGD$Iv*rSX&&4QgV1TGW&`Q7l>0(;CBvSIJGg|xpAZwtl4o)rf> zuTqSsy1jG{dJh`!IO<6y-mJG+dfJToX!Sj?4vB~BgKGfjj5jO{4Ca8 ztL1+f%^XO#_k=&5WUNgy(h}p_3nqHD7LiK{9WF=ZwsV(VZ*+ZnQ9&1NpO5}Td*AGh z_G%?_d)^^x`x>yFy7CbPQ;qOsWNs63&I^x_p0pyz;~%?eZI_M|Wc^65zmU%(pI{g8 z9jx`_c{!J%To9U1?C)9MK{t$_v-!vn7te)5{0f1lDBZ>Qruc+CZ|3J{&ef-z=D=&$ zyt!B#g3-0TF>S@5!p@Zm)!7C3!DFyz0{04Jx9bAWlNWODwALhXvcPQ2(!Gm4brj?; zI!^aJelZ_TY|g8rbKc4svUbsLQ2LC6SO(nDsX}&`0cjlj|U&cL3qi83-oS$+fcYhyShM15c;E8+3 zTtkfTGGY&h^eDSRWpDW2=gR7AV@dzdKUaT|X=nG>%w6&Y+~YQO#0U$nJ%WqAj`t|n z8Y_39U9OdKUgmcNG>Of%*koS4%xgCD>e#fRVH@u?F8RUQptBO$xY&`_n9$cWCdIGP z_Y1@z_+*>DNS$_kRHBLFAIZmL`0fLa=A-|Z+6--;S!& zcviPAg)nAZ164Lt` zSIRc&Z7lITF&_8r?Cr#Ss`0oV%ky{~XGC#a2#(|DWNdU!ig{j!eK(IegO6>#6L<}B znZ(ni(Ss%{7++-7fh|;SIJgF`D(C35u650~3?Xh+13*xKsu`n4u{&zr6ZP;&_ z&{-DeE}*H)=8*|`P4er_!XO`vpnE0RL=Nw zF1;c99-b-It(o!X0PjrTy_E4cGXB7(m1&B*x`gpR$M_X*vx@P)5gq>o#;^0zjo?%5 z#>Ssvq$et7OtER>z{3REy@z(Q!Gqe(#$PhQOsK0a7#pgD*JcvCb02+6?wFCKJn$#JPf5e~fR_jJ6q!!u6Xp z+O+Ftxb1TY8S7q~W;8ADhL5=Y%hXmv&8*+Vw{<2u#;hoO{!!iVt=mBxwbRC$o{gqq zY>dH$oS#&K9gfc6`v?0Mh2W;}iV?mx{!4$jg7#%o8U}Zw;&Tq7gK*D^AAS=K?(^ii znOEj*aYwD;J9v4azW^GX53iF4Q9bcApP- z_c{FkL!X@=*nPV2EvwIBzGM1WOc!gzcUvjgc<;rIXv2r|4gL$Z;!h)!b*5cq%A;jm zpV?*Zhqi|2ppBZ2#5bk0XisB;PvgTK8@4 z!=|pmo|pXn6Ju{CZo735u?5s$=P-S5^Q?70_e*jwrXTsSGtt;GkG@owee)3KOsSh1 z_+7Makh*Qry4!@yxVjx~V+)C7liuBL?Dqe|=2L;o3jMc7`P5S8-v1@?9>CXhUzF>K zyE@%vb~)FkA2%d`XYr{J`o=v2uk9?M{KBtov(Cn>f+uJXyOj3};c3C28~%@R6*nXF?4<~=Dbzdw# z(}RqwopKuY4>d-7=dF%0o%Y_OCtRmz5z6DQ4pwi^Yvxp&-xkD>`X{J#bnYxS&z zX9IaA-@ov0l^4B?_k{n*f0gmWJL7$*k3nC}*hg-7XTF&n(!7gz*6@4_b1wQwVxKlr z-PeC?yb+cjV<3C&vDxtn+BbW3{b%)Cm><0#OZ^k{sr=0?j~kux>EX)@@8g`+I(V$k zp=mEz^pyeayv7`Bjml;`fUm$$TRVG!RdI~1U9hHv)P`V{EEjw(;0ywz`U}!t`f8&S zSsHGifL_cvzJI~#`WNx#&c;`@nY%~TXQN}d1(R~L9Hajh{>xVO9@%H<%}+sJ-XnjX zn#JCW_B(2p-_`jDxQsoo0KYZM@9cbtXNo6iqWu@4?+5sQX6a3=zb~^6Z)82rWL?g{ zzg+5Po!Wb!+D;>`((}`DZ zv-;*DY&QH<^4}iq&v+TvbMUfYCUvUacAXn|&;NClUC+m-diVeLRBsmklKcN+-8J0g zoZERla0hpoKH)MF>F<{f9lVAQzO>=!WkL4Ws#&WUATT6Dztp!(+v*v-46QBfq&?o=57hn&PZiMDo-wwj|92I7Au-*tcX@`?2g?0Z@;JV+bm*bp`N!Tuon2F4BK^p77yMsy6arP8TyvX(Rk zJ3c8j@J)>Yy}sudZUs(!JE8d#fBd;IczD;&80KmW#9MPuMo9ZUmjYu1n`!@5Kek|0 zA7Achpe z%hEl;rpt#d5H7}MXihM;iiVo{)l3)tG+*rSH#z@0t!WVd(I1+}+5V{Jab{jy?XGD} zDK*nOo+TbQr=P#A_RVQcWBC6d^IM2tYMdjl1K(cOJWrl{16lKoyve4)b^`Jg+UOgl zjn;P~Q+~}}qVOPikB>TCKM$OW*TtUwi)X@%#(aq1x|6j+?T$psTGmPiMd7nhN zWXh)?caS|h`l2VAoUK=^k!9P2v(dTXZ-*}&X`eyUnT~zPbv;|-XlQ6b-g167`~do7 zOr9R%{EhOCrkle884qo7h8N1@(H#z7c!!yH`axnsv56DD=E<9Z?5IXIWW%Eq`!e_FlN}RWe)eJ(9~}Nn zTH-}T*@~|`IDS}m!r$%kA2a6F)K|TikcqBj)*1D_gS-ib9{#0t<-#fCqSIa`@SbdO zlMzlfQrS1M<=}g~Z($EB*ix96z*s^X$l*}W{vPLL%7Cw8lNY<(Zl6ndb6I1Y zAHkPE{~h2VxYXDxK9s=NIpl=AObC?1GhUDKDK~s7hyRKX6;3tRmC({w;L%!eYwZwI zfNy!yQRYazu$;Sz7Gi%0rbJ-sn9Xxw$pXh6Ysjk%3 zcB4743O+yQ6UMR89F%yd9v?RIcIYO%oo6>a-LQ%=rBipnm8%*K!9Slz#{UBQAVNQH z;rH3dc>alfuyNB94bQ$f#sB=vx&E!SBObzEKs|TJedUS<@Un{-*t(PGDjgg8_=_1+ z@I_-uGdjEOM*P!z@K9*bvIXe-qy3VTf1OtDw0T+ub60U!fo0b&EQJmkgT|H*4EXRY ze~1hJS5SC{_7<|9O8XmIa3mxhg{G?=X(59>&I)kVF=1BrSMg=Eq7N-V2a!yW-ZFtP z33sK){cOfm#h4~z!=N{Xny>OUkTam}Q{ZoQxOQm_oq``)68t~y0sqT_zxEv)et4qb zuN-$ncEI7TD=U~1VlL+#WPXS>vSQaezj__Lk1m$gAVnY8B|H9r@eygztmDDDr(^KE1jkERCk(QnsKZcA=_87Dm(Bk;Kp8BfBfS%fO4D!KO<8PmjzujT! z+TCNY^i;;MQDZ<)mE9&ibrLW}sI#9s(xt`6e%cLV?j^hz42ANUqoYVqeJaz|QwLJ6 ztrp)DdTJ!%^ap>=9oOUGpVCvA&-3Z2qtR3Gjf9(>w{*tz)MjMSI(T19Pd$ZhaP8s7ol+GZuBp#e)PR$KMbJ+V+ZbA2m)j`+VYQ2x-Pf;A--`dfe zDCgZ7$&Jaf)}7e;@R2|BZU%K<1eW8_o$T61Gze}bomyRRw>;L?$zBe8E2Kzxh?J)}EsKz&Qc{3+!XTl|j0v$gX8It3s34W{zE zne+a^3D^s@ww^qZXR#P@#Xw!of3M15D>niAGCnyxi|Lv{aB~7ZQ#8(+GyicNBGUG=VtL^Jk%tUPewU~WI*U%!%25QE4CqF39HaAE2 z-2Cjh#nzejed?h*5&t7nBfa4l)?0@z_%JlOa;J5}H`|;nb z0LNZ%`~Ypr2Edp8ulN|h8ge{d$~csof;6GwS$`%iNkYcayG`=WBhT$ zJHvk5Fan)s1Uin&?}Oho;TIJgJCVh#pDNv@*or*&4m2iz3T`U~v%l8mpM1JuA-qZR zB)_E2*Om^xwsQvWZCTyWFq8L^)pf|~=ATdT2Y!+3e;NMqGCD|j1NHga+8np3z5)F?Z>r=VFr}l{3g$Ay9Wu!|ti13lm$B(54MBL3-kY1A zX?Sqt)ZCLJIm^VoE*-#{lkAx~A;~jUdUlw7mp*RKDR|@V0`$S(!?U~N%85Vuug~hM z&oSq1*wVG|73JabvIeAAmc!>}Fdx|H=%0xJ`F3|j_0JOY&k4M%HWNbRbPM43oD;aL zDEkuj7B`|>RiQKL&W@&KrQdC)?RYr0Md1jh@5sxNkG~OJj_=-RaXWXmoLTx+*7q%} z^Es^d*{u6n@Yk=v5BgemSsVK8H?@9fhuEPhZP<77`QKZ6O~g!-YaunZAEtbqvHdXl zfIns|KVocef&=NmI;Sq(TE2nU*ka=<0Y6r(0rInxdY6EYLwswWtwZgCFYU95kNas; z@3})Lq%y*n_;@G$IS7qJs{8vnqvH$b8q9S{sExKy;Rlcobc!;Ze++qeR#@;%idD2;_ zl)w`S#@bhEzuGw^lQjxV>)?4hVTBnF8YFU9?OSb!L#PtlvmWB{NKV!Hom4?X{(L3 z_x3(yC+oNVGjs^ntLsGVnr zovu#-5Am@TilxZnYzy>&5wa57{ zztRZCqqbU!^$t53Ppui8|z^U{>~t6T5Ut)VR&d8|0SyzgWq=eg#N#jf{zjXSLI&hH|Lu+3-7<8oij^s zh6Znf7QYNl-Uw~ZM6S)i-)Qf1{h2c7KSN;0lV$Yr__GWnp6NxpnkrG{pbHJKr+=4`a7IVBtF{a2L;% zmy{T$)Y$(2iMd7Ky%4;2=Ky_(au4?<&bY_{zi{#n-%VLx=BOWYMQrC%&gnWGzEQ;H zR6C7O4SBVp1Hb&M%7;e`FlWkZ_Q)?YntlFHk@fk+>S(Q0RNauBj8C)zpM!iU3&)E` z%IBbcsHD(-@HM%*{KaI(<8`LZkS==={N!lN&f&YngIqDZ4K}u{a17s-bEa^ObhTn| zcaXUhzi>03Nms6DI3^r^_Q>AiLE!MnBYQI$n`}VgP`P3|2aTvZHYlks)~EK;?Hr;h zfoCauzlFQufx-Mf$~&FWjltE^m-IXETFHCcmozG*aZA1{pLRrXuK05pgXLFy$=&(# zpQ8Nqv%lN?l=b0v4M?ifzUk}m(;#rl*H#8!J%FC^EMpa4)w61NYZ1?i;jIbq)*0~D zN{hF;>nib?-P6Te6VJ(8(U&dy%wcaf0s720N7#IlecJW~tZQ&TmKf^e{MUMY3pn?} zZ_g|(h7N9k9MHQr|Wy>!jdd{30`l-7eHf@&6A)-@#*6h?WKguS+mWo6U74_WGv0h z>%XJV=S2PL*~!xL<%?h0fRA}|cN@rlcztZ|Og2Flx34+55 zE~G24PG;G#bhon?9QolNb~}ozuyy4(d9HCuKZ(!Pr5LsI@yLjC(r)GK5#7rdg6;pu zE`LC6T4%BR0Yi<=$|YcMhV*vuEZ>3hDUaooao}{li=3rybLuORU5;?%4}9Ks;7^$P zN}FWphVj1k%oJh@9KP?jadxsu{w{P>)lr{jeJ@)mJ^ZA8tiNh)LICN(!2H4Z*bK7BSCm(*~^xn@>LznVSS z#>a8TkG0Hi4Qn51AF^-%SYysU_K}v8|LH_^N}lkU zWTezhxxm;GqHK^lL1IW0A7}Z2G9AA6z@co@8XLb|HjMfS8iwVAj4^oBLXk+3}539W3dDHytlR#8hI^N zT)<8%E+F{xQsM^`7eJf|1#l{EP4g1$Z_kTmcY^!V^wT~O z9)d3ROV+#S`aj?gT0aZLe^`fa?c3U@sCvt$HX?<@3u$ZZHvB>U;XTREXv7!-b7zkec8x4p71mL{{%S7(>&py zSnsH>chLP)(RcqYY^_n)Tyd~9TX17zS?^enyo-nH;rGlW!S&rJT*XnizQcdn2ZHPV zC|uS%;9A80_%^C&L+8ii&yGv)k$iOzWZy>Xj!yYie)vRrY#mhIhoy zlf=%GOaJ={{q}tR_R`a-83T+z8r!lg-(wcPUE8yK`JCmjelHvEyKqSHDV^i^BHiQ! zI!EkG(A6K+&Uy;H_vu1oH}}ncbQ*IBl)nVg}! z8Q0;2pLrWfx$BS|QcphUNT^dRMkZ^;Jy1EPjKkX7cZI9>>OAp0a!eiIqp>Si?|u06 zm&wypnd8Y0UFFFx!(SU5-a5TKbV=<61~2wxFT4!f zp0XjzqVrW9p(XtVTJCthHHymU<9q_^dx!bd1eKj(!kLmk4 zb5+_Uk^Z>@CP%V+d)Ejo*bA0FxPxTTERSOel*t_S8V?3%(}Fy{zfkMX@opejX8-|+Vs$~Of~Q}u?OrL zEBZSL9S@CvuNV8FMfC z!cSn)Rf)MGEgx7+<~P1i541FsaT9& z!{s{eqZ2N5AA7~cBDG3P3OIc>{d>>gO~%Dq?HFz;iYMRP5zk$>|2g!x3;J=}+(yCB5_@}qcv6CUsj z@-Ho9%mYVrcL8!jG}OWP73-8S4jOhFyBD4**gF6}b2YqBdwx}C3fA1ic%+a1i+Q;> z`cCKA?nOU_&O)q{Ig8+#mq9z<3Rk~)@36$WjnF&y9{D1sD|8T=d*M0{ciRjMn5#Um ztRF~hk;74S82Hj1+`+{6m5$|U`3__86+?cQHgL%5hEIk*-XOor3((l2L5X#n;A?Gt z;R%$t>ls!5qTUsbAEy1sr~Pwvciqs>%J-&FXPw)VeSp5cPG3KxuQH>r)knV3r|JXh z6dK=8D`Ssi*^p&v_tMu}Lu(qEXg}Q;TBUQJ`Noi{cD@Z`Sz3A@?))Skx=rWcY#tlN z7aJ}!2Zidm|7<(s)|%PIxK|GyRCk%n@a<#FA>y&u0sANLpbltOer@4Uw2gmzH@V3U zD-P{m&cl$OeUIkF4gM#9|Bsnt+549V^-2kC@=y z`NSg{z+DF{;`b}9wK#lNDSee)WE`GBEO{y8CAS@wzfc-o-P z`sZFkJ=quj0d`!ncv&UBQpv^mb==#U*IszJW7~AYV|=TeoEUS!@r~rmzvsU^{>on( z{!4&Syew2NKIN`D$hYKknfNAq6`G^U9K(MB^(yh{R*oZ=@E7pgdH9&(Va%m&6?Lm< zX9oQ>M)iSw)~{f$jPhyuKdd#%*cM{9RKg!LKW)H~0KK?rCj^~TfDh5p1m;NdB>YxW zt_fK&0hu&#tS7rM%9q81rF*!LUFnRe09ujXNb=%iY|(qk-}yW?=ko)NaASe7$-p=L z{Nw^#euE3>tLhWJH!>Hi&?iz=ezB=Ho*(xuYOL;!caXMLV{cMlQTWkJQo=*jJ zR-UJycGjm?bi!wa%JVUX7(-)vaeOXP1D&PU=dN=+AJF^rGfP|Wv2*UkFm}1ofAi6Q ziFcTz_!p8B{;PC*hOf>rhRm$2oLc1`P*;w8)E?rQrQ5ALrm`-q&Q7@GJo*-6av8R) zbX{1`Rko)dJK>7n_1MyW?5+RlUi;MdZXt0G3O+U%-toQ?Qh>mSTNpM*Dq) zJ*l4kt{^9ni+x;c41Me|#N(qk8fu6C8Jr^(&oMZ68$XVX@SXwe>$+b!Kku1mt$AS0 zGW;39>IMckcQ<8ceSxmWotp8m_8dcE$|=@Jm~Gbf^16;zXaIG*tIf_>-v;`Ic;Y2pJaIsP z^WLF#$hHNN#XL{ve=xN%Eq*?-vOL*t)&V+A)JC&yvMPA;%nP+XY@02{PfO8rL--*@ z+bcOIQ^L3CshE#)JF?d@DNA(ECp-E)n?4e+ux$lfKTkYMKW~?9W0kkt%A#Hg_pj+} zmm3^u4yEH{NBh$I8G@U4l39|y)!;~LQ#MMqWEc1-rF>2lhGN>#ew&ML-8rT<6)(S# zvCqVhULCbXrK=0ZY-C`5R0e7+3;D2Sly^K_Lf;e4GoE$KuV9V!UqYQP!a)J>x-EFI zKUAKNYoIYyb0t|~Kx5Xo+y3rNwiG}4y**hazdYi-aOaWYC)ca#fmL>>3p;gdmTEVJWApW%7)esX)y^7tFEl``P#HOv1tZRPT> zbq1DSKpLdrY1vPtTpPMWYxL~u zUfOJpJG;7#v#W0bckFouKC5k(z>*Ffk7aGHKz}S_zrAJ{dJE-da;B^d-nbn) zke;h{#pAWlk>xhR@%1yHffapu7Oj&(dzZ3iZlv60e7{9M`)EsfKLWR1)_K_^kNp-=xFul@DJrCSL1*a;N}Z z<45qLn)T$}!4CJr*A&b7vCa(>SLnshuei*L(~&9C7n;U-!ne}5p0$!gq=^{9IsAW* zd6loLXPkj@*qrN50_AHIFOS_1l%@T9_U_XkHvRZq!r4t8Kl!`uTt26Z)^vR2_@)0R zKRKeD)xnRPzYM;y>PYJw-0qI7eJz=D_rOuUocs0qvIp*PJzYUgk9YUmv0M0s=V%XW z=bzYHLcVq^Cx=T7JY|3pKDnMVIZbw~Ry+Kx930S&Z#$pgP`-S>;Uo74e#q3oR{fuE z`ffqS>wKhQy7qHMP`O??H#Fyg|7+%rVB40&yIYjQF_u#nf032TGjjbB+!TmcGhfwq3)pr{GjWd5e=;2IfTBsb`G^^H= z=QNF3#7%CkxcH`Q=D2DDOWD{j{9M8?wWU~9U)?4j4cv1(R6I&l;B zc;g`Bu=WnzyY1)P8B>~YxNK?_`GiW%#8Up}RKj-~hZ%T0t2}Rjo;+Q%zDGto&<0D9h)njSuqqX z!e=4*s7)iMf;Ls=IjgMJ#OHm#v79`9=qVTc_NM2YV12~y#?yXu>O-EbFC)hkFW1cc z8bg(H)xK{p2FL%Q&l>hXMiBF+JskPoQ;ipfvEN|nJ-z8#u4>Pde)vZM>&A6a!qcfE zJzHNQH)k{B5k7)x1>c=OJ25?VKQ!saHdc)I$>H_bPpO~N6U8Ubb=HdYvYEA>m%al3 zD{Dvj{WAKHPrr|`yTqkEI?i(ti^tyotUb4%>AH(Tdwjviuj}l%%Z_<4n3te*C}`Sb z&Q$*Qlovl~>OajNq?|=7)_J9Ri}nkkeU!Azoi#P#1oa@?r653e{4T(pz zz$3=tcWi-2i0+RQ)7zFw%!!d)_h;hcB&(F`Z!Es_m)WCj;uCB4Iod6E8Iun}BeA#z z@sj4k?sL_r_S(GUcr*rB?Z)aq#yBiqa*mvO$^j`@D%1x0n*0=P|D$|YrY{qGzVUrx z>-QliluMU5iMMIb+QZlX{`h+NuN(uhID#0x9;Tk=U+>!Z&E0@gGHCBy=h%$%ynBZ} z{{LD>yMU#Qd$N13qbaN-+4Y}WGZ)xv^BvY_{JKGZJ8RusgD)$#Ze(|Bt^LTlC-NL? z2wLcw^F3Z>9$@aQI2mH$-q~Z@DisNyt;P^nUF%)ldCGRO2F7wGPrQEvvB%|%@7vI* z%3>=_A|Lr)*?Zz^-{IXB=nH?9@0~(Bu0B>4{$`cs{q815O5OdGRhf0@6e@FqxvrqD zXrA?Bjr}?1q-Yt3g-)M8l1zj-4>czvk?7p+x{Q4KS`NOgAUw-~Jm!I9{$t}IZ8Pn6i zwH1?bcKnKc7SzHwx})$4uC`jT0J-ZhTnS#`@|NgaTzdidw&ROD#IkS>hI>$XPufT#g>2Hgxy$jh)p`PQXWbIdbj=VowjVm)C|~bMzw5)N7ZNKXl!a z`yc98V+_4}xiPfS7{r4>`X_%~NL(-MS>-s%Yt z&u|y(>y%4$d$!&~oB2-TzN(?C(v){3#C~&?>DpC|-MSOM)1)^&#P%oDeaB3gVe1y$ z`3tYu=P;_u(a#*@gxC4;bnfUW;6Jg$mfrPIlO<~>1zFRn*f}G{dh-4o`6xb!eApAq zGkjw1bR3j@IMg~l#zOLAr znyC6bLnpKEqSkZOiM0FBLO^uEHc)jXKi^GytazR%qv3YaOG;=(MgkRUtcU&V3HND^&#Wr zj_9@9fGu9$#QbW_Vpa z^B*5kIcyCppP<*leM!EuR}xAepOy3G^Y>ZyifoO`R?S-Z{B!rkY!(;!H953K2GyOznQC@4mG52l5$CWhX7M7^rc+;M#tHkGwVEQ^due%H*?``>E)^eSy=rpZMMbJ51mE znWis)4mtTbYu=uCvp>i?(RBDv#5>kPV*`xb(~CUeSMf`>!IRqSiu`N7Wvtab5E(;^ z&AO+1HlMay)|f%=ltIRv|6u;Or}Mu3_>fLBM%%9DJ_pLZD*K7JlI> zOHUkm;ZNto&)3d@pIYz(k60`m9R-dqJY~f2RAk}li*VC9oA_(+qA?r*_ll>#2>*b_ zp#6f_7`_}GgVtI+K3Mm2tp1mx_0f?J2Z49^lJwaz zoEG5c8b=#J<{?6ixN`GZ=Mb1b#kT*^ja=X?Mh$&ix(hTV-9_^UU!Bqxt=IGYaK@s$ zFk}6An3D+j{7y$?vixoL_juRIyXry4;__f*&g@`h(rC&M*kjFeY>q`cMa*#q_j07W z$We~`N*_SH1-Np-@5Q%*hHFYSb0hj##{3Eg(8iwGtGe-V>Ry}1j6;zv@36+TCY#Ae zk=-Nzgxb}a(8!K{=Z&xa9t5b;j6XI*5t2`MdO9$vBPYw{Nqb18%)t@4m?$%;9xj_`Ax-^ve@-cUbWSiaqXW$0(LJ*6-Lp)W5bU?EdlfTX$lQL-O_+lO&2Cq$2o*yHy zW?Y(u7sZMyzAtu;CUYojf_e5ptMYSeeH0*1n!!&H-En2Z^;>iB#jm)&D4h7nwR@{B zDlRRJjxhr`2cc`Gn}b3%tQU00J(mODp-AnM`!rVFAw*2xHuJ_)tB6p}NXztTbWXWBZ(U8!~)g)0NU2fpce zITyomeEC_FPp51JV?S4U!DAY$8WbZM`>lON`I|p~UyFS=kbG#!gRZk|IyY&IU|tp& z7O#JFuXsK8&*=OTYtihZJy6>xE1q`~w!7{=VeS1fKKDx#dj9jQ4e6@7!%6cTL|2tx z1zPb1T<|5_=7u~z9i)T zv8A@`_k0)G&-wYb+9#&zE>25cQ^xi=*>`br`SaR$EO~uXR9@@84fZR9r)u$vl)7Lw z=j+)wZKv+r?}GaUrZ1I#PF^k^f*;EH*tC~2$Y)<`ED4Uki8~jS9|@dNPWE{V@>=_@ z25?$s$bYFa;{Dia)*R>z`+j6YhU7pZ`*lg&*o>~FA*L};(Uzn-g2fsKg&4W4iB;9j5*?U_YlvLY;)x6 z=uoUbp~hcjmEHTCDci*jztliO=)B6&s~ zzLU=W_HM#IHL)dEcyg zbQpP!{f%%(=5XIcc=OS_9pvaW{*`gtYtuV!80^b9{KPajawI#BmmNT^c94_&Z}5Zq z{)OZPObA5!4LR&2H)w3|QHSe#};dzlWO3%G*h-wocNHMe4#+Bv{;?ONh{tLb9|g~{qs2d6BaF$yh2&GpE$j)?XBP7)ACF)jYr!lV z+V=7M=?!zO{5x6WWBwiaXOaV_7*7`Ck$)$PHYOn>8`0xa=6d|zsLyCUu|Y9gAlBYC z+VjE(PcnBt=H;Yh%-x0Ij;ccJF4^d#X&>BZP4Y~4EC;I`VGH-O%_T122>#sV&x+1P z@F;SA18Yt+!u;(AKL-|Io9{4ux88iF{x)(43I{tGb9&z~_f2E|MPJ?HP7OKGRlCP+ z)7X!IGZXlN;4{K}>n;~7#|`baI~~5ley8h?*B1JFu1C#-wO*t3dcGHL;BG8pD_pzG zGuQ52&mHt$&X=J>>=91dZjfvv9}>EJ6*#yO7~~)8#=ni9hA#Z4TKL}#zY+dNmc{U& zYWeKC@o$f-oN>vnQJdZ_n@sba8u&hKYHoyc*<|g^%YNoS_!I8Rp%2+C;!(oqh0u_d zKMx(_19XhcM|3Z2a@X2ZZZG8;YNMTWQZtMGrqGA-#l`cDd~B2eP3uV4&W(4}QpM)A3hm}Z?d5ZvkTfeh%t&H55AlReo@4`%4V zN0B*2l>I%pIxmlOX6`;q2Y~;rN46h@Z=Qf}o{sX(lh}c$;GNFOl#lG+SQ(UIkuO|_ckYEr)f^`Z#vIE zCqepBKFJ{PS$q|K5WL9|&VuI~@ceDuC6&yYYk{9j?zK@)duuVC@atVRO=>=aH^B>7 zJ2o#+emwGeG7nmZ@S10Xti>9}(m^@$c-LjKACmtYW3l|%%IV)Rw=nF$ryyTMCSyy2 zR$1RZJy*_0m1!m)y6PVU_c@hWQ?0fsqqgJguaCis|4P0LA8=NaKd6o8XTv!PIEM$e zMB!8$@@uF)$EAvUcUZjD8h5mfG|p9J;`l4IUM{)^X$Gvnmh$>^}Em z;}r`&+EIH!txxVH3MS%yO9?~v+}aCja<=5F?1psQ9|F66ZkcVksvmZ=dvGk7D+ksEPE~&dBHtm zn%6ex52hWb?OWhIw!ee>X_(_o=0~~nV(|p(vykx$j^)g;?*9>7mEi06HP8yVPQ;U3 zH8r!=U#}SZD88a?sQy*dKYk4~MqSAq3ttzE!A8R$Y}QP#B-Y1W5S7s{D(~O*e%E#2 zjP^S=IQ`1QD_MQj9cSv{W0SM^9W2DQpuLzaFbWCOKQ;V3c@f9K%?*uqwxM)W-CJRkiL8dlzY z=>0fz_!PEOGBhOmh(M#kBGw9X-Ui*b*P6bslGCZJsPJ?8v)Zuab+lfM_#?gm@kq-L z3@pcy(IzxEoHcs?`U`vyjG)ypTF&E#& z%lIZv;+wb?--OnsbZ7ZKUiwPZ2f@5sV>DBvK8O*P4?=#9`0|6ke5QUL_KRea{5`$( ze@V3e*3wA02l8O_Ha>et-%QBfm@%!Pd*$(Kal5g>|_gTyHqIT;s^f8V39dfY7#=d`Wfxj-% zSnGjLSaKN}kgeVJd-Bslqm#*9-a)@5_&@ZWO}`~)7UiBmXOJv(Mfv83^rJa6!Nmu) z1%Ab8$|v|fbync#7i`9v{^Zpm4puTTDjQDjwAaC0`uQ2_M*hjpG6!~C&pV|#%iuI~ z+*!oo>3r=s9wAuwl;W8S0Jl0$I8X+&LFolKJoy09w$7UFiJl18D?Ta z(v1`9RuQLx-@B>}zHR%+jVsIfeU^`$yl?o(Uy1t2tK=hBUGeRtf%wSzG_vp5gunUQ z_zBkIODJC5?MqNhaLkvWGZ&w?`3?VZ^qnl=Cm-hj+UEZf_=?&573g~Cf?Hh!nUiF9 zojaQMDB)7&*7i1J;Uk;RH+q-vqEXoR3!*kX^V5E}C+~^hVABIr1bEt5+uIbs%baG(~pQ2kzkImA)Wn2#S4_FuN!?AHQP3z9<4>BFHmF@gw-_N@fpY55y z;0tX&zg%5cHTBMi(JJo*WTJGd!Zd#R=JC@H?@a(3Z9_-Bi$h= z{^Gci{XX(2{)qYgI_16FN|v}zFUp>T*Xv{eECeuH$h*g#czoNRlXYscC_Lh8_SHtW0%q3ngUz^}duFDFLUj+elf zFqqu=)cKe^`1s}Qy;bn2J-4xZx56j1x7y0ys!ijp1L{a_o%x>RLU-F6)HZUvJ+XlM zTHx)-4b9Cla25m~$l*z@zQp;!FQx0qR^Bq-INT=vkJu3R;eAs%&oaeWc30xldxR9>>r3sTD(R1h$U9*8eK&?<~FojkPBZ@kgo$TYEea_S{p< zA2i&?dM9oxuSDPI$sKp&%bi_FOaw6$~itvQ3(Ch~6=;O}bVy>#=N zpndF7D^`6UXXaAx#LiiOeL`L2c6jX&+Q2S3G56*!AJc32xVwEEw$EyOtGRYDc<Y_1VL&b>V+$+!IJ4e<4wjPQe_jLo-->DlDECHB4f2K2_g#@6GXJiJ%-ODa0|G{*4h`f7j5JdeMuKlPC# z>Et_=o~<#q-!Gfjwk1FNgSl3=cyX6aApgaPz=h0J@y``EHKMMVHe|cXZY{-l{kM`8M`>}J*C#awP*j?q`Svhew{*<1D_x4^=vHwhWA-7Dl6Uh!$sT+HJjWI;fS>!J@oZ7GTGp}Ch#8Kal zVoMe<7lK88DdlRk;K7HuG78U4-&r=nQ<9?*eAS!e4g|z-AwNvjgGnU)ke;Y zxD!?s)y(oc+zI^7aT&<}8?ZC`)J#7PJ#Imd41$A{dp-UZ=&{-IcaE~=dEI>39NPaK zWy#U+F~;{8OY>P{85IaH7U_oUan&``#__qH@GZz|wRd9C&Hh-M%i&Kw+gwDOIy)c3 zna*|yZ_+6XgeN~{JO}oaJ(!m(U4fOcmPrcdi1{W8DiwJ92`{%MT$n7}`XW`md zKRNtA&0HU^_iQ~;-cgb9IUf-o~FmHekOP~G3x9; zRAc{YE)`2vx!#In+vOr=ZJ%^l<@dY+-i<$V>1$5kyR7j8mnZs9z!M_-%q5YRop}`( z$Uek3O+Sin3;x>a!zaTzIR3xPZ`M?>g>QI>>_p{fvi2pkUZBr)QTjab1p98#({Xf< zFy)T&?sOe+F;2-HWZj)ro-Ry5O zH=#1}4@z&Pp9<(u`nAsF1%F*6zsBM|qdceN+sJl$@^nrLeJ`(6^+=P z3^jI@RjwEw{}47e`SE;O8Cu+r4kgfRT{sxWL0I$m+ezvc%$%=Vn>^f_(hCLneCs#&sZaK8v!CZCTgm2SD z`Tq8rK6W|wAM+>tT~A(i?2<{Nv*~jJ`a5&d29D+T*`8(0xt{!3;YF;$BJN@&x&IRFAY1pu|k?%>WU%o$$9L9Z#zqDh^bZ=t?@THq0ExuQtNuJ+Y;wGSD@%HO~ zZ?63-@KhknY+tT?JR<}50IOtZMWT4$$hr#JkxUT}zJPf}VG4*>a^^6sxGUMx?!Lw_ zYrYqc8m9C4llqRCh>gGGU`tURd}HyoZqIkiS;tA{n5!l^jV0e%e*^Lpx_M;jk#hVG zr_hmHzU%gm;;gFlv89~XdKLP67}?R9GWwi$VZ`AGZNSc5&N!r_Uk&UToV!sS*F1ct z$zx`(xNp?3=(<_b!CYru)D3$QuzUNl_r`vMc;QbIJl|Db3;9@2&Ss4>&!YY1>>0$< z{sr{YMjVj?+UHEbrW*D}$_q!~A4mxdA864&x&A$!S>sLr1&l{+sJ-QkDZZUVwZppV zY6n>oYsc)-POq>h0Q)>Ur#8N=CF~1b1#KH&v9a2q$hj>rQ;~RFX;SnLlB)u{5YYnH!UVt@BCe4v))U-8o%T0J8MIF zv*ddx8JHP&?P5 zqQkSKxSuE28jqVfacW;VDPYB%vDPa3-mDz1OWfv&t5{1*4)W~vK3P*{n8vVD}*hYXbBTB3_5@t^V}bar}mS}}cbhilG4Gj$l}5^^eNJ8PgN zm$tO^-!?s?Z_RYU=~|M;`;@EnUU)c{PwV`4=pgLS{@9`Y&?D`bt01;$2mY=0!N_Ok zA;)d}uoNFxsmVSK`>m0PnaASVeJ4EN)!HG}nfJCSS3lF_^i4aM+25yp_@P?2?*ZN) zn`nkT=E`Yb<@bq?jIf7u1PAe>&F25nr;TuibLF)C5!1JU_THzi@=z$3*tfW+scD=s zDe@_2wDB9u2eJQC(|3S&->3dJd9OH)5vfLQa}hqo6eCwL<|AlVYt~EN9`3)*OXmzk zO>pWn8Sp3F_dJ*LD7wRP9Otgf`Zx|3A*-z1;ZYm$+rKrTe#8&ItU9mt=<{a!#LvOL zC9;lv%g+AX%f$HH=!1Ihvi4C8>kiLm`G9}Y?E`Mc2i%EH8}kE857qzA`GO;T{m1e5 zUjv`}^uXtR!CRy8UuooYkUYQrfbCz&Vjs%AIO<<9@9O+Du*Q7Bx}UrqokKQuAenm- zLM491-nzJVhcoA$k-bJZTe~dEH z>V6gVy;*lz%`lYTGYOxDZeb|GH=C)CQNot;h(gPCGh~d{mc$?`n8+TPC(> zZSAz+40ynJ^f|$a9#!liUM_!o-lxE;J65eT(&?V?W5PdsR@ysgRs0!psQnKGdGEiB zuNs_1z?b4Bk-eLxa|ap6|1|#WfcOINrpxnH9xD|k6dA0wjlF(OJILGon9)5k={)J-3{L)2bB z+?6J9L*Lht_@{H_JRNy6{b!mk|{^B?;Ek^Xc4Ao#ZM z5B_2@t%CPW;56MBZN2|G|K%fe_c+rrmNg!!9dusVJcPO<_?&BBDIcHSwL=O{9mn3w zV9sJWHl0&h&*(F*Y35ef-+pW5d1)j*w2}Bw$vST|j?hbe1o#O&+@$TvM6)6i}{b=^KlTjcswdY6}Q}pKlgN8{E_yFVsS@KXh3K7YKAIi z$q1Y#?r1deM#ML4YSVpNqk#8P^sT;yoxz{?^_x0Fzl}4~@Tfx00n0z$XU*w)^wmus z;xwgmc^yWbd!uJ+{$g`+IdrVpoO$?YPr_G^BP&uFe^;DPsu7D5`k@t1(!XYU@CSn| z|IBgX2^34R!;(Mjjbiu4;)Q+zei|L*eCOTSaX7LulLLF1GsO|uaX86m#o?SB?g`s6 z&2Ce+>eIAmm1mzA!Np~!c)owH%$H5uGF2AexaDh{8K1Q3> z#A`XBs}`{HEd|oCV}8 ziuCcz&)b3CG8}zcv{vjiHqVDn1ydRRym{EB<vstt%QjzFlSLn>ub{eX@!5@e}J)Nvw~%a$;u(@3O|So@nP*#-9$X$0(~< zpHyOfT#R3Mh~dCoAfKZA|2ABIr+uv>;lR}c2M*wKMRDMwUHL%`l(`9(*hyz(T?4%y8D4$bKShd+^sRow*J>2*n6(M zP3O{cU*!JB*-3V65Bt8IEzXgh))~$N>=W-mSK46u%0B(H*E5`IBdC4iXuTTQqtLr- zbj9{){*E!OM)IMJ!!M(`5U$ok`w`%ho)a&tV-XRxLJf3&?4!v*~;LvPaI` zwlN37@n*)EOgf z;7w5)Y$g6dG&mfa-SSN%6OZC=fR2_VVLOX=+IvVTfkV;fF8q`(Gs)UF(j4q(zMS|T z55VK&%j-P;2dpvfQJeMslXi}u2Zl8^$mTm2t^f4Y|Bluzx?%2h&fLSA{1do|r=#-S zlGaARy$ zdo!Sud+@Pjn*$OC;A2^JmXBo>ai#LHB<7S@J{EUQEA6%p>h`g0>sK>fFb0jAbI*md zbaT{w+Mi*4=KMeQ-aS65yUzbVX9mbj0+dTG5TKI)Z6~4DULco>W&&*uL93pp(_$M26kGBf9#&v}2|pZDi|f4@KSm3Bc#>2P~y9k<`=l_B5CHAZOC zLLa;b9ZJ5aa)+MrSxzb%eGR?%@~_S`r%qaNc6D*qgoY){c+ZN7ymQSI=$$wzI0+cW znjmYIYxRz^$C(Lz-%9Oagflp`Zvum*C1=!n!y8ZD;v=8I4=s#>CPqVdW2iwD-Hn0n z>>4EHI~{`#m9sD3D>1hp1`217tWR{Ik8C{EN)9P@yK* zYG|x(%EQmL?$!7#tmUd z&Yluk+ru2Z<5S;kjbFex%1yB2<+&Xt_@|L~C8{wE15d)}E_G?~gNx~{#~Kze&xu@* zpUr+=L=2`7yw3RCV^QCQrO?7x1A9iKg|8Ylvs>0e3&Pdlx+|cu%b~T)pt*(Q(7=C? z%d*81v+NlDwvQr1+0UUFobN?0jeHfcHAz^>aM++0o#440z4}-vRI*WN%YvyMa19Pkp)kUlyH^ zi>3ZN>W`xMkaCjL*HU@N33S-W*f2-&2Z!*d8q>;d?3ru~S~e|wff&P*o&|%gjpXWR zkE>qsx8(a>Prl!e$@e=+zTXYla}DU6vWM(iIy)z)`DyG6Ke^OnjM4#oZEJ| zXP>@eu+`7_71VLFzigT|^PPOYY0tm|JG5t%t8V8K%P!#Tw?8A>iRsiL)5@Wz3V*J( zrit?|vwE*S#aPNuF263^pP1$3)Xg$F74uL|o96UA=H$+&@MMzQ8oREg2Ye`>&yPL+ z-~SA~^872%Tmtx$%BRfWZz1$NOZOQqoBPopfPsCs)55jZR(0;oJ0ZQ(c`o(3)Bi!nf~T>VrO z1B%h-Vi7bp$Le*l=yb0OgI*Wo_?@$<4sQczxdleK+iS6aGa9r%edDGyt6#>U!Q?zs zx-$4LCf@xzI8l!Zoi(+aeJi@&%38IV*I9ir{@v<}G2ZHn;lAfp&Ti7Xy6Bytc)adc z{|NQG6|6ty|NFs1y8e+zp;vDosz7|cf@YA({AH;)gF-xr;-;eKBgs5*~*) zCTH5capO-dS`(($+R{InkLY3YEyOa9+)eIMPRB=%k~c>W%m)}h>HETwgm3GXsE>FC z_j&tfxV@|Fak4vVK51+=lF7NxzbU!g@SNtD^s664F8ei){W^vHI+gvJ&wiao&hZ#( z;%%Rj_M_%E!2Z&{^zP$0ahYCxaOMxM=rrL`u_X%!is~LUtIocNUT)`LD^IuzTX1qM^&tm+b}tO_Lc|-}K2c83jZO2HWjOsdxNh@1 zJIDL?|733dEi{w(?qbJ2SAVSh4!`#H&GLWbH^XlSwk~9!pv!Na1O1bW5cP8w(ku5p z*Yy1#9BG25?)}ayN8o+W)J~49DIWV}^xRB~7skoKg)Y7t_~NV<>G9f+2gHluSLB0c z#~CBW7%|4^W{lc7k#=<9XZr5j^j!OwA34(g{a+j@8vE)I=*1c@fqku-_?~6rLv}xi z?eM&#ql}$eH(O7#`_no3fYuq+`|cv%%(`u4-Bystqjl?Jt<&)r_0p-Jmrh%{UOMzn z5D!S#OGi9I&#IS>^*nge+G!8Y799%@p9*1P8w1e-{NcnA!umg%bGM(ShWU2lHd>#B z)G+TLw&1Su0t!VL`d-!ujQ+?fy75JnQ)HhV`TAWxj*!ZeXup&z`@Iz5j7|z_sw~Ysf3M`(=oh)mun; zO)d|$Y57WMd6U&!sK`7^`|$Ms7Cv^9BV_d2JXH6`%;qgwz*o4MK3&kl)=|dhCe|dz z`V_OD7Q-hpo%)op@XB7^<UulgQ&?`#K}?qveb}j`1|MX*6%Sh0j~9&zrV%@%cZY zbMby}UnH0R+iMxKV2=WO7O=bT6zqDh`bxU|pYw;dq~k4>zvnz|Z0F>{G4Ri9@RtJ~ zbHQgGymJcn^O#|}PJF|(ey^@0`+O^O;_Yu$b|ZUwgRf;We#fI%+k4uGu%|a{oS$@i z=y2W6=eO&-6F;^4DJuRuR70EBF0Z5A)$l9R>W?mZ{+Fk0-)=a| zDB1N1`tJi{f;^&-nP=xur_A#QBg|84?VYCuXF2=sUCwdQ2ylMGffHU-f?+lHiEF&D z+C3C3oI%%aykY~eDxZ01P3(N;_S3KuqhP+N>w#|d)gti>)}?O__+VWup79oIGi?O8 zKjXk1Bj55^1-z2jrK`(b1sv)llB&z?VZTX_9bETu==fUb`5Nf@WAKEf_-e*jwvy#n za>mlRK&m(BXTGU^2A9@3W1&Q`CbxGaHr-Nr@5DRA5AS?A*{WFC89s%De)eJpd@KOmLEz719`yEEHsbdkv$>!&Clq0 z-}9pEpbzn$#k{YY_b%f7Rp5-?D+}%3D|#+UY(=_xEB}9oeR3oA$xo4kryToa3Tr>* z=&Y73a1vvD$>JC`x%5uKa0%~sVR=%Y=@mLqcX|7=_bJDee1(wl+8%Hl_D4F29V{Vs za2qnZy6BR~u8rT{w50koPTx`dHpuCD z2l#o1m2a_pS$~l^{h0vgI*@Pj4DuTs8awyXlV~63e#ZX66AQ)uX~I``$f>*6wE*!z z&FkbXm$%RKdukYX4e;5xnQGzY$G}!K$9Psb8`slw zpS}s|AI{q3TYf{)i`{p?)_=aqxIXZEJ@7_Pk8S5?JjK}7Iytz<|1qH2lbAE!Q|OYm zjYoYEI+NgRV!S7gWRwoUX^-`fjP(^@)7rgk;dp3nckztoAlSXJWcga5MN7A)_NvpI z5IIBtJ;3^I9IqVv;rj0)ez#;4J&7&-H`5r>f4Se%f5#i0wX6ksJL-o4U8~35P(0d} z&E%uuliA39u}Kl@e(6lNp0lVLf9zG8@HbRlb$;_%$ma_4tiuh+NpJ3R(y_C4vp4L# z=a`lE95W|mjrBaMyl3lq=tXi?_mg|J<=^?7%16XIa==A4x>gulA~3oN+k?M}{1sMp z1P4F*y!@#_^y&)JTp0{BED1jt%MVO2It}_fn%K#||0L%QFb=$D0NZ7!=%kkUwacG^ zpT7E)$Pw`$aM!Za!%@fW~l$Z(Z2p)au%<8*vl{71wCX= z2sR()o~+Ske~4bcd$^W^oe*H%h4Vw`O^2cDO_Oq(-9F2Ii}wb3?;+lsX}y>8*?4a% zFRbtxul>?&p=}IjF~d)P!J4!8CX&z7S%*J`{G7wsHcS7Wwfup3$D{jMy9ncambL!@ zYyQh~K3x4D*f+0UbzyTC_pnBX+u-*T;oXJIFEk3UE9% z4UfoWUj(s{i?ETO8Izl(vuzHrrkoAfnZP$q&!H@2z~a!jYwBwGy@)*oKeOTW@CauP zS7{Eyr{-DEU_Mfi-SGZC@Ln)({rvE3YHj3ydzpMj>Y|WEz`OvtSIK(W{ad@&hOQR= z6X5MZ#u2XH>3-Fk1gWhs?rwird-g9kb4CsL{S|B4Jtns~w{P9^+kLssE{-pUPuafW zT#J_e$o*R12Hpz~%eHto--Fa<7V+$yM)TQ2;H|#VeC=lV<)NeIqbol4;E@$~Kl5<) z;1$m=X7Atn$NKv-fnlSW`LMnB$8wgp(eVm<=#`6%fj^u67V&^a)=m3IJRoS;JYezY zF_5mk7sY3b*+bRvP2pg%8NfhlShC#gc=!U=K=xwS(d5#%wow{Ay;}pB?z67F+4b6x zjZ%n@N@u1ihL>*tczYP)mmCN{H}KNA?Z7FxGVjiOxElN{12;D@r$gky%BO!A9MyrN z>bv)yi6hPc8p2U%=no@!Q z{%!65srTAFBZu*scfbeFNyi62ZS%oZ)GH`uR9z6^$@buU6zjVGol{|`G3F8e7@Q9M(fD5H6&j*a5k7?{=?Sq9njURG<3D?T$`@;V|Ubf!H0pX-;G-}L|Mm;cxG%ik|Q z^?%adx{*AI|1a&W|BE(g{i**;II-oeQ4m@4{P;Ipd+?{lW|G@9!?>zqs!`FthZGhoXV8GTXje1~EoltO|4bP~e%C)jHQpbt{M--dL3x@$5-uGGOF^-@^ z4#nhc9Yl6{Ddz=oJ_mMEUdIzxo143pn_d}g2^i#fQa6>Xz~6saP82@4?cGSwi9(t&+H#)oV9Y(J(&=K)>T$g)w?E+u1di*FR%)HgUGy z95b`I_DXF33o_c5jWevig_6sM?`~b9vq$FyBjqpWH7uz!$5!*bxSAMJCUP9z!FsNK zQgC(sqsZ$5&IH|Vbk<*Ntf^mNl-FN_PEA}rZp5Axo^l?SNXlPy%moTJq~{4HnN zc&^U3f_rpE!GQ`pKjHoT_#!K)B|e^<_b>b&xJVINS21X5yAE@$`Yt8fy_j4g@ zBHvReg02#ym*FXT-ABw8tB23$%w3^2zh%co=9S)P%r*wSYi2eWoxaNq`maE%;4l#J zm5=!6XXsQ``=}2Y1ODuc_IhKKbtZV+@b3#bc#In4LxD&69BTuS@C{&>Uc_(1hUX&<~ByZ~^eQ@s10Dc`5q3<`cEgeNJ~Ki2768^ytH% z%b5hwSs34gOJ_lz&*fdHSEa&7)I)2k|6%+|rfDt?-!Zb>cI|Lo9S4}1ArCv4hFaQ638{GME>d>%jkh*9`v zMq}%b!RF7vryjtk?rTw>aIa5&^ZAj1d~j~t>G>V4ue@RP4nzMdPRx=WCSRzPv%_`4sp|pneQB{4Rg_^_FhGQ4*VJ^Opd55n6L&*3g=ptoc*)gWSR2 zJYW($FR_=;;j{91)iWgzob|A_uis>pzkY@Gy3XmW0#6?VUsr&)%fa7e!~_-+6R`7P z4(uMnMWt}Qo7}dO$$1BW)pw7t+}GeM50tUjD+{f?qJ5su7CMS;(S>|bex%z!Y$kQ~ z%9)fNuk&PQVymwDgi-!^pL$i&x7Mj+tfwzaJ@&*$*(>n{Lp{Vk%X+AX*sASDyL_g3 z)>*N34C@W9C5a0}opVE1h~LpyuL55DF?#Dgo?)GH^;{Kw2mAQjO3#vu&Ru*DcCf6&^&sXOs!t4m`D2<Tz&m_goh@SG z^ARjh0oRX#g*a4;;E}DbJ}0u%M|yV3Xro;B>P)qCHrIUcG?KlQ3lA@GYOvJ1E*pFM zYvAKI4t~PGB|CB#c%}zci(pC8-)lWJS5eCklGlvfDqr?RW99kaY*`UKF{XZaEj?tG zc7ux~H2_Z`Z^NVMqu}7_GP7#!HflA?OmKmHnMqt&{_ZUJht688!4LB>Gq7(azpcPG zvdqXloFIM^Pj@8DhwU*KGE^r@`5e*Y%Fi*A(Hr8rOqv?H88 z&hH0!)_cuxuBqph_xF=K?PnD8@cw?x`8}PF$n^=2?B{dOF!Iqmso}Y6BL&Uj`Ro;R z(-^od0@iS0cYe^=w4@Im6~D!bF7Qa)>pA7%axPJ4?UrPSy^b2xl;KZfxqyXu|x~%fgBv9zc=3%)1dFyzFg?hK1V&b<2T^)*F66#_?X^% z(0XP{$27s71?-l5v0={uc5;3?1Ju&jtSBpcd_hrjKfF@U9pJgR;2q?Q1o(+~i@|^4 zr;l+omvqnF%sqD8Y^a7ozIs=WOV!sc-D*#s)r%9pH-P@mxyR^Y_!T4Uo6j;It-V(V zA8BpXQ(pOZ^2dv>i|D0r``&@-Fm-oQ{9zzBuydPDlPo$J@W&Sh)fEiK@P>5#627&#YL-SqkXp z{MGUNCjMTbA6df}>mRB4V#OnM_|-P?_Xzhdhn|jtqh&)9Izh9bJgpL434!T}%+0O?IOE#U0=|)4Z^hbC<4K#&3$L z7h|{S^K9(4M)qQn?8=L5ducm-k{&;gYn`{DGurDM==k}m!B*lt9a`sWSm$owtYV#I z^92IbM~uF-C5GJg$|d&`C@8xqRqVHt)fg zy0T|-3_Df%`t~{0Z`UVRHPFYL{i0fS?H6R(P~G%~{Qf75{*pcDbfUM-CkHK`C;gU+ z_5IMHH(Hakuw5OSVmPgX0on_P?DPQ!HGV3hr@S5mqY^U2ycy>Sd*W458brT%2+r?u) zZ`yO#e5;^K*;vxq1?yLj0p~1g^RZd?Ie5qe53=Wn@bKc6L~sZX-6PoZdwo-zlUEyS zlHa382R)h&@s5vGnk_nmEJ>|Uh*(i%cZt={_!;uitFf=>*ESH?V031GkeDocMfNK_ zS;{4Y5|3fSCbQZ@`;5+M^k+H#U2=rbfs4%CeHD)Vy4N>kYdw=W+}0wNZ`oRVsk068 ze6WBV;>p;qj~SiRY_16@cdZ#)kQk-jBd{Ezka1`)Wl0ji3Ay*#xF7 zH`r?~Kl#xcgRPT!Ui>5gAF*XpdKvc+^-J9Q+zDiQwlTT630bMy{|LOS0(oli|61-r zXS?cl^ak%eCOq{7bz|MdN9KHi@ouv0lZ_#gi(EWnw$kugz#>Q6v0+Y|er~DN~{Fk1I zA869PZT5bVj8UG=`HNPRedfAbSMO)YDpL0PVD|$`fS*9t~qAUxq$y#E7cz# zRh->f^VFP)9>38UfoFL6V2!W8l;4ZRW0k9@ey-xR--o|T&l9grz-y@~tm^|uedLmn zQ%V?Zh!$L3=P^g#q`TI`bLoB;e?RDV^*Ozpo^PCetNVXD!aU?#aO?lX#~C)&BNA5 z?cV;+qvNEzXQXE(>jIyPbj~yai_L~e=W^sof#J)NeILLsG~PqbKXi9yoXU>>D)u`M zJZ=LIwr_-UWx<2=tP&$P%l+*;{N_JZ^SqCH?cZ|G@LPEEH~#y`;2F7B!n^d$9N_Z8 zOz6^r`El}k+vw%9(>Xsd#_w*gr?JeT4Ii-VqhEld)O$HUvLv++R21I13V($5bQIre z>N$XqIsId@ojyOC57>R6&;G3aG^hfYxt)o5- zzl>t+ZmxuAEv&sKK7(y68)46M%l@|VXQI%@#fKyxP%Q}j;FAuIXo4SXs;(^)Ur@iKV)%l~LrN!Z%+H%@%v}X7wxhEZ znKLeKN3K}>w#e2eD;P_>Z0`zqi1;e|+O73DaBIVoP2gQJ=!RDZTceVRW?q)yZd_1` zA4vWjyT2i|1Fes-Ua>Lud>eCQ*HH`Ve`rfnW>@F z4d2CU+xXvhbXwUedTeCYYEJRe;)$Hef$uxGGSa!;7hG&MvS%Fn)7Y8gjd^(~*L$7o ziYvG@c)o`Q`>OG_PI4GV(iXP2K8x_ID+)Di5NF z>zcc+2S*t#-g|1$d9vUSuaH+-XXy}iW#b(kLU6eIUpmAi@P!K&{e4-Tr8kVP`<~gH zzn?w-Gx*2%CN}41A=8KlWa&Kxm2;Zi`KbTpe&lHk{1}|^E4~+I4~N*pajn&LU${{F zb|d?CPHhzalhqty-^SQ~g1v~j6*1=NXZej7CpqCpX8@eJzjrf!y8ELe{2m|S_wEti z8y)F)cu%_b#z(kcea}wbVnPRJo~a}K$UWOSY`|wKXOgdD(AQf!QlP_+uDb<2rRFSS zu4F?6D@ZK2bM8~m(^QLkRvoF#)!1eY^BLfSt=eDhnboP$!FD*mH z&-4cuOV*b|Bl~^6l?Tx?tG-!p*k7zt86VzOi*h zjT^T_$y@5<+~q#!8RZ={u-CB1I_YCxcN6p@c-n809#3Bh{5OItSajK%+EG=dE_^3b z;9DNF;alLq_c7p;?(_luM<0<4|DDtOR`ZG9hW_FYEOvDl?LocAz4uw}rSD>~@2;B1 z+nRL{p}nHDZ@YG_z;DbKa> zUAXYHJ4%hs4d{s^@^Ck_v`>ir|l zd(UWa_*Le|`feMYC=FE7gTM$JQeTMBUHGc`Up4m&7T@T*YGl5#)imnVTOxXq%{OXa zu%Ger4dQHdP2ef^EBjjQd*1nY?HAa*OXC#+lX!+?jrNfKFZTsXyUXl*_MZJN#=T=` z+~vTgI;E;P_HXT~B^rTGrp~u-?$9^|*w*5cns3j&Tk{29a>FR(!Z`N$c=q~P?D+}U z0u!+XMz!qlPwW5ohH1N&8NQXX=!IF+Z-w%rl$F&sUi|u}ow^r^AVNLq#78~c0gF!bK2^j>u8`t_sA>(P5f z@46oKT#siAU4K81{j=26$I04BH$S-t8RkaORlf z%<*w&j@CI*o<7D+&iLN>wR`4wC-XZ89M?Z%cFG@Ze6nH5dY%zY;=?Id{f1%qA2Ezk z{Eg;s41XE?1^5d-Vtkada2h!GC~jDN18QEIccbxrV=xfOXbJS#zZo~*eIw%x9!2j* zzb=n*-q0fGBuETYy+(rQy_SAYTv+;jcF-7z`)}xP-*Ce&{Pg|!Z}Hvw+L6x#e8(SW z4D`(+4~6xTZZ)0`MSV&NWOuNh ze~sPo*qei`m+%hJ{YQPAxkisSx2M2;{64tuD)#?J;R7FmA6$vd|1dItlvVSZ>Wh(I zM7bHR4+mciXWsd;*82jMey7}V)hN0CoLS&)6FsQ3KCb?xdq>ir8o4*&pIjWU^rwLA z!ROBOW2j%-@be)Re{y|gde?!;#=yZjM&|;2K*j`mHGR2ZiQ+l&?+;r2Y4#vrynSw6 z9VeXiPIz|CFxe9P;1I8MbyCilufs2~^bYY7-tF>|@TehPQ3$S%zmc3<GjZ0?JL`Gp8GEEQ=o;J3e)AN2gknf#z( zfx{2j$Ep0Fhq=ivgf~n?j+F<-2AV_g1A2%v` zLGgUAAHnh~r12x9quHMj`+gsDPIv8pYpp{r-X({ewWe3p|LR)%+;@7`+W$XT>xMmN zu64mB?{2N7hoE~{>mC0~a$d7+nGNaBu$_nfk~<$Bj?MP^EVF$#c})eZyW4ZT4O#Pd zl~z3k{T6ffsUO-3_>^=@)y8i_4(VC{MD*~5*yuOg@#2>j*fkisFM%)LO9$RFSYIa= z(2A|ni+w0x+d0zF(cNOQl+>etz&;28c!bF7bCk~_7@TV zA4?9MSr?#|?AnW3w*=_pmtf8zelzj;lbczbSZKE7qR$_nW9wI4{U!8u`QrTKWb}9R zBl%Y?eSV?zd0TH{Pkr^YKA*wd+~mi|tED>?c-_xu{b%y}F2BR;p)XgiTl^e7U++$p>wVLV z7K_&dUjUkt?%sQEq`d1}$ad-atiJ^_u>q}7%!v^wcgri={ra9pw)f1zcO!otJpa4Q z)$0T5UsgqLD!Zr3ZgLhXxbEJ+4PTIY{-w{69`d0newufg%l~~Ht?&siuf;#GQhjxi z33IzA&^yfKyAy2x>BcRQPOnT@jxH|NNw|FcCf-ws4_ffeVchmU+t*W!tXd2&JAYhO^L_Z7H{o-B)jy|${&dZ| z&D^@`i<-AI!P5@mbFMyQKSPfJ%jZlCu#=kAx+ZjO@p;K1#S>A1Ek>GddD|;!z9Mbua<*V4kJ9Mx7*{SOxx#C?9uUwgv1}`qYM|<$C_ZN)t{yUIu z?i#wWcFj9=J^qgIy!oKY2RxSXms0Ir`Sa-0ey9x^um1x6TYL;!*BLxx`&T(v;bgM4 zfcm5V91|#w{=vrm-nY;#wvw~qwT%|_*w09}=py&mwvDJcYaL$m<;^{fx7_^>e2T;0 zv-gsr_w+EI)9=B@<{ew>?)~2VC;Pc3ejnMNH~d{i&NG@*BXlR6dB?;rMy-ku+ue^1 zKMI*X8k?S;((9;KdD^7^lkK-_-B4=H^8ohVlo<_6$~QDD(b-e4!>`)VKND+fR&$Q? z3e6`@4pV~rs<@6#v$6+1b35zg^}ox1J`{(ree8a0_CM0&Jup3Te_}29b&MB5&$aSw z@Ub=WjOZvXeHOm;25}7CqZowcZzn#Xcwndne;j^t%fCJjIqnaaM9~f7(1tr!7kPW= z51j{Yw|&#n1?1oVIq%0;)2VkT&-?(oSnopn9$mi_-6=AVQL0#cn^9KcUYm%{*wCL* z8g=Fzp}$}W_vGUnyOVtvg5mHwEVmChn)o$OF0W{ zSG%oC?6+*QTq~xY_f6y~IFOH1c~p;qlRM$fTK65k+{GK<#hTkw&fHwyAYL4GcyT`P zvM!z7;3ft?UIj0Hh*}M;Nfof!H2Fd3?h5Gda_I0f=y4&nKjg3a?f7#IW2tuF+ERR8 z@B@8kukQ@rQ900ukE<3KR)Aa1qh3re%&g2~3*fbxbtd$nHPL%Tm$e^2Rn9}K$tlv*dOXEL!IaSU;mW7f5YpdtMlHk{;_#0YT$8Y+$Z03y77F@c-oJ8PS3Ql zuTC=e)I#g;C0`( zjcZzy?z4%f{m58(D`SVgX{>A@e>e1qoy+qe@RMf@FB-b<6?l%;KxZ_C9*1{KgpUy0 ztMU`u%OO6B{VAIko1;87@xqd?LDPmmB}T}*G>2Y`N2sSqUdKO!6N}gEfv++5yIov! zpZmKBY~mpXz1pM;3XU4l1i2A=`0tLX@j4h!^xUVoBfXN!IQ({8>inJB9ztDe#|@4t`|oJB5F3I{4XhF>o6oPc%NJpfm<<@oOHA z0JD7H6?dB}+rR_)5%dAcIlP>Gr#+|71+0rTcW`DvYg+4V{9n&_RYqh@XwT@|y2vj% z0Z)qi^2m`HzGl6^K2+Bd=vZRS*QTc+`<=bNVjK6&W8Oo&-OfvVntNH(c^7c)iP0`^ z&+gcQ4hCFvU_zJlr_@c|qSpGviJJPtWeEJsSy7KAQ zj%?F;g|2LqY#2#a#gPZUeuw#3W8-JatGb7=slcJZdFtubDV8W5T>8-w=rIyB zN-yKQ_`UFK?RCB{Ev3g_IRE1%#}DF9dBDH=37%7o@QnRS9%a%G&-F{^3Uu7Wc*29` z@9vj2?&q8B%7p}s|yc0UA;k~^F>ElyDPi^{6JOEB}3(87dJbei}r6FX@we(YXWa72F zk}~cQu1xs6aI;^00A47%d@tABoXmecHk229di{=^nHQW$`rBW}_oe3(!~%DtPwqya zj61R8cbaE;$#?BNwi}($OG|~5ZCZL5*o0$htE?OkSC2jloLV30(Q#-$X5}wUv+|d6 z;lriCg}*WFvoEDQ`#7}ShMe}=57O(>?VVrotV=H;Gj~M|GCU4GrN2s-5I&R}@g(%^ zy-zg|mi$KUm^_n8*YjEPt~WADzpS;GIJB0NSxbZ8WBhL68eF*Jy7ap3P;%)^-s{rE z?>=DBMajLuG14>34=0x{@I3SHJX7;$>kK5^D*|k{Fhs?HFPei^uLA+Y}?|!;4k%F<(`Nz z^mWWbUt6KCt>o@`(p{tDY&>o{-wYZ_L znEj!o1w4eTY9=zDUL44W$ z=$dc%8{UteG!Q_ZKJb)kLWk!Ex^eRgjj>_mTvOE-oM2wA91TPx<+TyxstWuH#@8cj z3#l`R;uoxi=N73h34NuhrC~`2`pU=6rhM7nvJK@cOlNz$`pH|_-pW~bb(>p`o~hfs zmF@j*^qF6O=S+R36k6;_;eC5#mt??mC%Zg1&yu|l0UNRM&g1Z0)v~+%Y&pC#hdCU_ z{`LMYJ59d6A=^)Wm|SbD4H^p_`vu8EgS)0%2uaObr;ubSjPx{@c{OIfd7(3 zweYt9@iuxHX9cnM(e3MMunkj+Q-C- zNP1q`1m*)Q*z5B?&$~4@D~`zg3N!6@yLA=a8{ z=a1Lu-Jiy$AP3uiukvT7D9*Qpv+XzE$#qw!c=X7SPT`$f)xdDQ!aKii^zUfKEOObkL~ufFYmNJ?SLb{)6MY;XO0gt$2+mvTCv&0!{7g2 z`U`+7KYQYx;A*Z1SK4dxc@?lOsl3{o%bh+}M17snVD%ecO1_TPB3-^tHu-~Fkvm(j zM+Z^>A87i`MqX%h7g^37r+DvFiPKhohIYZoz*4QSE2!c^~k{ zwiKG**b-wNP4LBy@Wg%icsa)}s|lGdU+)}r!K_Ympn-c1a?ZfY3lsxA(`G;QS!xo9 zX}Ie%(%Kw%*2b%I;5(kDy(RwaH$Dgb4=UZ_rhcD6ae+C01N#&UMk%nRhoVsW&rM7})D^ z>KgT%zV{$g-V z2K{#K&^#|g z@Ex3G`Wlv8$X>|gbCAFH@LB(htv~y--QD>&2~O-PXT<~b`4FE^ z_%n-lVS|OX+5QEc4Y&qgkT_ZYJWY=1BSM^x^PGgw>$#R--v;yO@xmE+72lyJ0KQuL zj9A5}^E#%%gS*~L&Wo>DYxN{rE<4(v*PO%tj$UE=fD^-XpBzi~xfq|S@_i-ucBGMe zmDE>B=UNOr(v7-j81wd$kM73OT>WU<1p7YM->SN`T z(YRIYBkk*1>0~DRS1?f6m92YHZ3#C&zJqatGXZD}O*TQToZXe;LwG)*x@6 znmevys_wXz&bSXbdXIF1A9LNcTgtiaj^~9ladLRvuv>Kp zJHfjPS8APdg5ED)pUM@( z;Ij1u`Jd*c!TZ(}yrVl?`Nha63-3=OzrA>0sPAcVki>7kdt|WnJl=giYw&G8&*t-; z@D}kOd~t_+upbrYY0r(6QPVS5Je`me3Co+CSjiji4*zBZ|{`>jRnRa2wmUi~ost%f|i{x}oatu}|=+G(7jUwYD5}ZRPZ~wVh+| zeT{)_tr&!Re}io60o%4#-dY>7*$a>2mBVdod#tZ(ENp9gEZZL)9u+7JVMo_62X~w< zVymiqY~cUnww-O+!prfA{*m>1(y_Ob=ynC@cH%2x_4Zsve4QFW={$y z2kO-S=zQR{$8_F*>a3EH*0Yda0OD==Jo6&w4~qxOUUhk(YxnFS@5{>rW%s&#J>cQ3 z-o2~bKgIQ2o_Ok2n@+l^e^dNlv~m^KZF^YxzV_K!IoRQ@{c|u44er6l7c3P#>+b76 zGPcWGys%ZhF??Tp;p>KXx_kKTZ03(`vw3GXw&Z^Fw!JB~S(*xOy+WYeQ?)U%UzRXZ@17 z)Y~flXM&IE)u&o(XI@bxI$xk?a)INzqJHaH>Ne=NLT!I`b99n1z&>sX@ILVb^%oBE z&f4*YY{mifz2_EvC-PYEZlk>$cyqBEHQ#h|r4Mn7dUh^9K(0Ufkz!yvx4Ntizbf;t z1@1qvU*F1Sn$CXd#?RG#0ky@%L8>$t`0YqO(^MO$(bJ&xTp3c7TkcVXh}`4Md<_(NKo zsRsKuwb#cg@I3&Y-FNl9GW0Gltlj8Oe+wJ!I_G^}8*SGE)ZC@W0p%_XmrJ%B$O9J% zJj_z#f;`lhLX4pQ?R9&{xiRZaIElPf$%Wq>`b9GKHKZ{SMR|4yC zTAnB`2YDiS=*l(O|H|PgAb)}1Zfirf$oJcTeIehk$$Zq?A$6W6c2_!i_j}f1NZuK@ zZA1QeWmgyD)v&jC?-{=01iXEiuNc})eXr#9aDTXs-`#p|ip;LWkE~wCr{|B4d|Q8V zDZQKA-)_@)^_a744eSng9TF4LuR~$haJeUcth)|TuBBUtSQg`g!;$PQ`Fvj>w?+J< zG{^BZXB+cYgD3f#U71@+y`SV<9(`)m!^i7uj&UvRp2TO}#JkTfwtbK3o)gDLhT+}F@J~80 zyqydWSw3$Y#-9Kq^z^QL-cKs$j`@$|6G-O^-{8#MYxj0(zd_rwx7T@aDue9nH?qE%?p$BNVpkWEU zkk{DjduA<)4Y$|G>tvlGq&!1nS0gu!6eQe z$Nw+gQ#zRRPJ`=OmvryK?j)yvWZyrv9s~0IyS8T_vY=e?LQ7UVe&$s9?E0BsB#%S9 zIe~n3^`*D-GrK%@7xPK`{M-1MkB{IF>87u^*Y6Pimd$g$I?TN-gI4WFbb+rt7o1&Z z+WYBJKL3P2*%{vZ*Kxn(sc0_l>ng8e{R1%49CUUjeErR&M0g>S2NiF2~i4T`HyWt86O$V4{}r7_Q@ z4Hs|S_YdK{n``N0A~`2kKVSJevgplTw61P-kvVNe1G>c_^m+YuKEG|{H~BwK@LRo; zcQhG%oZz`C{vYDMd##OU>;H*7;IoaTi+h)q)eo{~XYl#=_UGk%uDc>KP)9#je7qfp zr-w`KLibY-kKaL4lJ)807E_S*#`~ZrdLITV%+A1m@&UnlXv3)T(0=~>qsv1N`#J+z z3p?Gog)2X1Lr-a+XDtE`5W6?YTWK|j;SY@=89seOrPVVe)qj{;kFfchHa&&16t|dS z#VwA0b+A>mqq(i+jFx`*SLk*p?=NZlPloGnc5cK+fK~Nf(&6snxnRhcR6*UEF}3of z32v3gCO(yVt!)yyGx%w+n_lZ0kMB#*u?{tSu7%#HqiosDIT{V{&vszZwFZ2Z(Q(Gk zK7B{WolWmK@!u2jyD&~AYko8D3xu3JclzLIeMK+or(S$Rt<@J(JX+(4HwTEFg*5iG z$l4mdU(NG3;y1a=q1Oaw*ZRm`6kl8J*jBBM9v3A)%;gpF5p1h&Y%A&EF0Xjx&=9Zi z(tIrO7P>)ia&9_aVf2zSn!+n)IBVPB=prq=FD<-pV_%(ec+=a_lIgt9%WG;JS_-hH z7Ei%ml14|CO|Di~pkcmGV&dPh9X>*EfdUzqrbii(>vZ zx%A`b**&T?UM2XIZYNv##3)-2k7Fy>q6b8gdoxWtXRpzz#}f|<@{DVPR)HhYuq(SP z9_7>$4X^v77VaYAGVC>1A3T?FT)8RPHjlAVpE<%a4a9Py=qL&K=1=FV z#X@c(Z-yAA<|O=xb|nwNMawGmh9>lZV!j{7kG}#QP&DuCN%SwdxZ zwsG1t{v>a0OyFyFZpf@r^o^uPu=2drBiOC6V(mI(yi57*5Awd41H0yu&i@i)ZKOB8 z(r4$!r#?%r*W4JqCjg$3-s-oGuGqZ1eb|rUFFeJ z$rAZJ^|=q+NcMz(^x&37_$QTPuq|NxN8$8L@@S2g7a!)#dk5|Y-jP^FuMjJyZmjH^ z6KOBxxdogZ^hsZ&Q*||eq>s&bXiZ~>$=m5+{L8*!T5FbWp43&nP2Jz{zFmWYF1qE9fPWs_Z?v-ymo^a7 z%m$v{{B)ib|9+Cs)5w4LE%H48toyB4xowkO^OeC?-LLr^gGa}1R^5tu_yFTXfiZ?Y z8yHw%<)bMFO}>lQ(VgY*(A+J)jsDW*E5rP5Sfa6XO>)5HGn#)_k?p%^Q*S-^3-q!x zmN7DaQSb9lGdIEY5cjEH+T|CXY9$|L&IF@G zu(+`9IhkDgJ9H%%SE}i50RE=4BKI%CpVCA;xMylvsh$b_)UNq19v8V^GVrpgk&}hY zCq`cojo0s=-jVs|Wo5E0bk0&CHBkZV6L9{V<#)s{syJDK`x-JT%eMN>!`s1IkQ`29 zbmX7RZwKCc{`~Rd-!ngR`~~yJjMrIi@15@(&$Yn`^UoSTasIgRzWG_>{quw4N6pU| zKYIS?@nh!u$7kW=%9wAC|8HYV@z0IX#osnY6>l*7#Wxwg;;U#cd!cVa@vG+e;$N8K ziob7;EgtI|Tl^j~v-mTnQTzzMZ{qj!XOxwC->vtW#(8HK&hTGZIb-yd%V%U<**hb6 z<(?T?S8kgz?#jj)XI*KIn(~>RKbjvY*i`TtBYxp$g2uUL7Y$^TOb0io@%!iG(tE(~ z$Hy7ZX8LWrOSsv_ybq(_+=^bf75Q@y`ocD7Yb*7O4P2X&WjrgtazFiV4}sfb=y!*> zKkEc~>?yZS ziu9eoyCr>PEkeFwU}e{CUvJPUo}c5WfhU4!YZWUc9t} zGY(?R8+(6c;~$}w403rzlfYR3?CMYK=C9lO%ydhiX>{Ou$bl!YdB`W$JpxP#U^)w2 zw>iGzHHl>F8S{ZpcE3f7J;|kG zfJNVP`QPBcv=o@q%_;U4a8>9Wr`z=}O7Bm% zzS8?eCobIQdf*=RJbM3I;WHKP9_S+#?$$K;JbvF`t4lX~o=+~l&B5gL)^cU5g`{6mRkL6&Bk0qTi=`YA@bid9dx`Olr+4sh5|x1>EVLkNxY& z`MY6aAv+!2+1tCZ1|AVh=^uHqvnKBQ{$Ygo#nIK{&VJhckb!PEyyhs~`)nE*-mfy( zdEahJ2HUy7tDphz+AiRH7sFYjGH~1F?5TQSQ$HqGN7j0{@mu1(p59CO9VO(;_Mm@@ zUaHWQEn4CJh2++I^$ydCSx@CXqC?dt%fF^t(gN_cUUJdWmGe6e(*LDTH9=fg&X#DW z_|@dR)SPlRrgo0#0@{+!`B7kxP(Kvo*{WH_n!BKz0zSW0-MMlZ^)B68b)L?t3SC@v zp0VaAd&C`A`hGcMr_TR4d6GS+lJn#*ZFBJVUhZ@GWX!`G-c9|c_+}7YDmAB9b$VWZ z(Ko(+ChvS>6Xz}<6NIl5_uT^DZC19t+w(B*0W8*qOn4cxx~?qh$T zH!`rt|1bpidRL}8K1Su&xiJ6de-Gh0f&b;Nfmu9NxPH5Shm~nyZUg3L;GZMaAG!WX z?3{M>+{*b7XK-@v=9Bm}l1@K@Bz60h=FXVI_VSd_+vIi8GOrCAr7C}Eq zoNY{&z1_p!k-e?@59LY9@27iAbeOp8YS|C;YG}hos9}%rtkv%!$p4XS?gnyKv)@2( z{)Y%<1xCv2=anskZxwx*LNP55cwmbLe z`URF97iKRd*kASQedVOJ-+f=NSIK~^vPQ?=<{)A54mA} z@fvaj>G@fI6R`vA!5!?S)VhXBhc`DMx2sqiH-{z4I>zV^;^a6C)cQFG+&N=hd|dmK zJczklIe%=c&d>n|@<)i*Sbi1!Xl{O|H-Ga_f3#z5sqgAOm3&;hdM#t?n$9oR_dEG6 z+6^QYT6O%((dRGvExr`|vhvB^Sq*PHys#6Rv~n{7?1|6MgzlgAcLx4-p>;+L$brqySg0M^xg(^7|rJz?l}&PK6T{o=L7rU6;Chh zH0V97^U$7y-UEE+T;Hq!?-MN7AP4EA+Y!8PL4PeYS-;*W7tbnU%vwH&$YH7f5HEU{Rm>xgoZBhzAo;4ic5|#5Uat9(a}wqL9_CgHyv5kcb>MfInN^1$e(4J4TlL{^ zbD(kAt`hoFbz^h9=-31&&WoGD~r+(cq|n^^!~`@y@FMa^v<5k6ZK8j{S05wycf-%E z+TBDcKChJ8-49J3Uc0-Au~Tbz4{sbCUb}mQJmOi5^_rbWZ1u;De91bWuEI19A7s7L z<+E;pCXW7Tu=RG{(ZJ`CWTe%btcJ4>kV(;J$f^1S^gTK-GS^{g*>X~-HA((Lm$JZh_9y_c$6O&p>B30n2Ye+sMa8GPUgdb ziT1NSJUi0;0mfX-8STFk4)Nut%fog19H~FJ{A${RQTCv8gHiH@!2e?KXU^E$7n)(b zww>#t9Q%xRgYhv0vTN~!7X4z(k`)(6c2)Dg2%pYg=+yfC*^DJCs79*uPFB_`X1oM_@dFB2XcK#pY?_<s!&N<34gNVj5B$wVoGXIxv$+RzKe$*vU#%C z{(HS?u(kD3WDe^f-EkDW*`gEj6Ws6HkR|GqSiriBG>3F&5L@X# z-=-ToFIIH(r%#<~jW~3JO}SKKgrS>E=w=!ClizVUbmQvS1<1KfVvO!J;TE}#pF4QC z61*lj>sdNtJ!@d&cPPigJtx`TQ)m8u@|K^V>zVX+^U>qYPjA6d?Sp=^C1+G*;Bn$$ z!aucR1A%c1%Y#u5o`JiH*nkV8=7F#IS@LlYjzXuug7;ibzQvdcrPB2dIQChk6GxLyWzS=HezSXyyt@ZJb#Q18#O@s0 zTe3IZy(QSDBd%bKlO@>V-L2|7bcHf?F6=- z_RtdtF1BJp>TB5}KI5qeQ7)|)M(NLjvm7`B>W}b>Y01zcb8cpl-zW{eZdyHYV?O%L zAyd`AD@Y%sy=(hcSBy3;Dj-Los($*_QGAJ)PRuVZ&oCycPt6JJlZ0>a@`)ow_k(a-_4m*QFQ<4AAVLI z4 z)6`i(nZ82yE;MjPjB(oxTW-F}m>TC8d2G5rG12x9rux&}{Gfm0e%%+R z|FQ1T93|)7n0~L*$Ke5Io{7&4%`;41Z@PI_k=Ofa#&vm46>E?VPl@C7%5nHiJ$_d& zuFy;8-r?|>xAPJ0+HCWgdUW-V^4`s?1BKb^Ry-Wx{$)w8L1FPv)M z_YCiCaO@ld{G`P%{T)WaPxrgP&*vQcM5)OUj%rhCa%!Q`C^b36w~)t^t$d|_y?3zH zrPEAkm3sAJ&WEO#=!fBxAA(;lhHqBGKNr#OlwRE%>|Wj4#}n8y?%MC|#a4u#-1Wc1 z*&~uSF6~I(c;^|lYB_!FN2fWpoVQ~mc=l0(J(7+#3doC3Jzwb}XYMYpzvIB;;ycW~ zbNB841Sa_t(!ta~zF#U#V^d&iNP(&Qvws_y-pwBR(*r|$NayCM=Ync4UwDa{*Mm;Y zD|uPHe~Oexpp7JJlVn|bCKGFY6(t?%y6Tez@TL*)8^Yg?3%} z0`in}UX{)eikyYNpkj}GUKVFzH2|Bgz4Y&ct<%7hi=QJsX?mWqueKt4wjz6!J0Vyn z@=VO(v+9GkmA$kTnWUa);v)~eL;L0v4vu2*k*)BNCisZftit@^YwkEvdWoTb4P=wwt3{95mj2fX?MSamTpQ`UBob~u0pIUA zj{S)4D7{wpoAlSzYu1?5<2J%;y<_(By!6T)jO*%^s-f#AZ|eAqM*AthzeVp_gPs__ z+4rn!9uu5d7ArLdRzEb?(qBXLi>W!W?i&sMd*@FZ2VL?0dB#1@Mg0^{&^68i-o zovG{P+UPF4Yg`=&+uhZH)DOj-+g+Zyx%YOHqmrs4#fZ5*;P$(C%p9g8y_>lvpu+)Z zP`Y6c`&2ffWdDl?)bC=U)$gK#x%J+%u)Go+Y*oJt-q(O{CPM8?3>gT$Te5!xu@Z0J z1;4(h>ARr3oEY|RnA|GiQaBZE8{|J{e@Kr|j8S=~qPr@5$S0n*;ygR|V&@l``4{gc zFIPOQ37RhAjIb5hhWND(%jSIyI}c}R_Jz#hhs@8rZr*pm50(a4>$v~=e*B?-DT24k zS1cQI+Z1A2pO|t<%vW>2VdkOx-Uwg50seeFeEK^0^~ceDuWg5aTfLbojLC<$!3T1A zZ}JBCttJ1}qdT|b?^x5s4XJkM*UIv(_n?FFz9jGJAs4>@`ASOKto;EuBz-#{0Z$ ze!XA@zQBJc=O*B-lI^dvCoSEC`rCV1OFK_M{?(}+d$BXue}x?EyEvzWvA#q9h#sEn zEKt};Dp=Nb8P8?;y9uOZV{ z7i6E={?6h3Q{3Juacu8)M`ms#uCUzklfHn=RBmYE-l4peIM?>0Gw(-d)*5v=dm$hE zrM*8TU$EPfX5PtKu2AM$mijLGD#tQ|Hbm(5+Ont}_;U{YFbu7PPR#vH9S| zz2A+LKfQtd#{B4Gyru_PVC^^jObL8-@5bhnUvyv3VC!P$8MXYip^KyN@dDsat*LO= zGF(LsfoV(%DsO{Y)*!hWfr+E84v?Q=^~Cd!L4TeEKb{O<&W0{?pvzq7GLN%>Mz^?e zy6v~;mrW_~wG3?VwS?(Gt9fr(VC$Z$>HBG9two;}Tx+IJ5?uA<+ep{hwqa_&Vkgz> zsBuC+H1OFM;p0D!ebvsjV(Oo&*QsW8+KT1)4V$X%dfA&68cQD{{-9hNjkg#7tZeHF z`5mF>3iNdK0#4Q;FLv1ax#}XG)%w&y!yB&aFEsF>qFi&MGSScv(pTSmRuO2zUqJ>sH?-?Qvo|OK00>gbEkW z_=3pzz~L;e6@r(x9pF%uAD*5u9xZQHrN_B9WLyU2;Tjtqxa`vP-?tRXh zs^_pt4)5-TQ+I8DOlJ(}+zZ?9egp5abo_Q>WgE44$|0}(JMg_=fu&~!m`9NKPt&=9 z(g5-Js&ivc2=*xQLGZ^uRaGh&+ZSH9%OnPH+J_B%)_Z63UO(@JN9(V&KYT7 zT#gNs4#tbQKSmGU0J_;epMwL>-1ZQgNcY@qcW$pk+uZZiyLdLoeU`YFp8b2^6)tp+ zQTT%wmaf+xWUVzP?T-u5r)sW1Cq&ll03SAvCtJOdbVjOhRB)X!cgHO*{&g+TaUuQW zdO}9YC28=n2YWT$yk7mCvu3YhQ*+OiJR64A;;e5E^x>}OHgbEP;1yHd;h(RQE=AqF=h!r0adgb%Bg9&}Q+nrp{>!%BkhE+#yC=m9jf1UPd&xVmto6dGUSf6h zV#EJY@)_Vgu!DR>o^kin7YEsA--y>+{9im8TJK{I#U$7HzMMHU!LyZHF*_JpTL7)Qyt{Vr zEQ@!C2fy-!;_?;Xz~CN}XHwtyZ}iY@JC#v#7WXBnr>weRC`Z2ypV3>b+aSG@x=v=4 z{6E^>JTC6)YW)9vW9ZrLFTo!|R>mM{!MqCWloF|V2V zeC~4Yx#ygF&bjBFJDV{zF_xV+_00WAQ_VHo@;wL~QtsJ&^f!GyGGyf%Vq%`BuG`cj z@BFKAezb}`b}28qOFi#5EiulFrp+Rj8M&O!+Eh1lC^mHid@k~(8(Fp$TCwK3bfz)a zjpqB*p!Xf-`xnjkrPI$YbGl1M%AD^2^BB9)e`?n7{`1WCH<|5kzH0dU4$2E{?BspC z;1Sqm{TE#Xo-o!RbPz4(a{b=$&aKL9v7<#VGR}G>M2T*s{@l=wD#J&vmo@YmYF)V< zxc@}II*kRcucMT|f%1Q$ytOX6#--^n`C3B5)cukH6Z|`(kJA4L4bx2e_@59kb&mqm zbjtsg@}X!;;(ebW*NF-qw~>SRqw}~cf$v_vgU7si6nMtd{@bD88Ku0P^7|;CsZ}rd zV~BpbN2z!0FGjs!xJpMEcMs+NHw0XpzZx2^f2aH*DKGJXs&U7nK9A^nX$!LT_=g_> zuh1fU57^CSY=WuRb&S&P?`iii?OOHR*P%1fru#<|GK1UL{O!=NZKI6}_-8qDL!A1E zxf!kRrciwo!%|dj#IQK)IWrMF4&mQ|k$Um7v=s>q$I0iu1Q;xO5?!eWI%<~K8Dwn% z`|g4AYZGTR$|(OH_76GyhvOjtmaeBp!lEt-4a@z&BJyNC_{gQrcsn)%-@5*Ut`#WX zHs4~%579GZjkI8sybv;meCizswoCY4&VBMT_|D{iI^QBk(~zT@TKr5Tu{gy&=q2dB z?lDRW&&l0CAu~`fv~>DdVt0~##u_B=o2TEOF1Z_3V(Tm$GD_lmR!o_gOq+R|hQq94 zZwS4749ph1q6ZvAKc2xJrL334@5T<(WuZfOBTd{u5dW)pwWKo}3|SvYR%+sCYEs z-4_JId%*icV9;enH|x;#X<30Vh+P_bUX1qIg4$b8d#gtof6JGE_oqSaXVU)t0r2_? zzZkr0gW5X|{oF-+Rh)OJ;=GF+pP1z8TgtiEKwoyvLSwD_fu-*wW6s!SqL+ZDK)iKa zY~bxGWM@2mTuUE<$6+=da=L30b~SOCYSQrf-9I+!&kCyNfRA%S^tTziT-wi|KU41O ze4mRwlX?-l<3&NV^);jX@=?aqm;hYQQ!2Y^fmtlZIlnB3rl9}@|y99PR~67+X8V?L#`{SGk~l_ zPtGYz*0O{ywjob)MIJxP`()oQnu6Na;i?-+tAXt$7oj@OUCjkV>u3QCs(Q!dEYbpPeh-wHlmYYg99+8TUv*Ri+plrmfD3t!7QxnRC7z1HCCuK;gg zKc$x%{j8v$<zwy~*3+s z?ynR$2Ry4KM#NVC>F>xJN1JEtrE|#hdGsgitgL78%!j_#GPZaJ`=@+|T8F+GP)EvV zBJUidl$9}B?MvBhl>Ha$o6zrie(UsG1@5bc%Pa9K$-LJyPoZF#64dT-<_!P4B@1)3 zJGHaYan0h9bga=P2}Xv^dkyxbz)>W0!My(ueI&Ocx6pY4b*W%JN&WiB_CNby_J2kh zPY>?`@g%xpi;PG32p->zOdmF{{1><`2d+@~dDm<&P-g8!=SB7zZTJwdL>To$AQT5@eL* zYVNpXIDWdFM*T6sl-$i4bUx)oetyN|d9l5|5?aT;iWs6~H}YY|@HX@AGTJ;rn%o>`jZgK%i zxfe$0?uL$-8PxWAcvBsPPG$s^&4kB}2hBP6_Ubb8@yDwdeE1$RkpElX7%t~h(F+77 z5)_4`$LOGbwgJ=M=qD8Io)=X1R$vmjVCq>p+LX|9yn{UinPU~Y2;^1u1_QQl(MI6& zQgdDJ2aj&kUslifN^G3(Vk|CvW)1kuM9=u`D6syO@_}+Ao^kzTl(K&eD!YwwZ5*ZS z8$o54vmShZl(N4JD*GPoK1o^8PijZ0^GZ;i&()yiVE12yj|ZQ|G0u5pj{@IKUjn}T0Qh7LzZF^# zdmt3=WPOSDGlJUBr2S*eh46DzpnioM3br@Yc!RgL!b4JCbo#(`B$Aj5@jo0LWp1N_ z`d$x?-y5Z@VwMeDQwyMl*GG|qN8dGgGq8LubX!e%D?aL3%A7fyihQ%;qqKmtsS=lA zm9GmZpH@3mzA$X)yxmmFH=X6Yopt^+I4@1`oz%y_YwQ6@?38sE=`AabJs^n%uz2xn z>~RQ=H-fi+N56jx(QlU7uQkV3-{NPI`@=)g;yM%mn~@t5ZxO7cb#TXgpzly@neS4^ zGs=8co9zY8=QQS1Vl6}U|6qvnODVq(AEU_hn?|9t|1|4;5gBqvh`x5x*WpoMyD>!h zY0z@9Y1gI!TcFL;cOx`qkJZ7=}6o)u4F*~z~m9zlGpd8>!F+hMl*678CHV@_IK z)-W5m1D}zMbsuFy(b-?#G33lJdSQQ2>ZFWP=dBQRWdB|2kmJ^ZrP853uIKkEc{d1d zQ||t1l=iop?OSv99&%OWUMTpRgUa5@JUwK=fKKgxJ3+VQww>`y$9oQ#cJ#LdujpK* ze;f{9;JOg1&ov>&e=a(#HtPiPzsUbdqm1jpFVV-{A^H%$T+hC<$PCW)gv8+;U!t!Y zLiA;)FTb3d35XxF{9w)yeUvQrFA7z5wprGi|K-fT(3Y&vVpELfcMXgocAGl!oBIs? zXF0l$;6QZZMw>EmaOG{p<=skr-YvxGEh1iTA@S1-$Uk$Qakp)VF^ms_OJtmz{716x z%l$N=#$pRG?sL(~F<=(Gd>E}DkL5g?ZpRiFcC6SmuTr*=|J6GN8w8GRqrlN+wkdqR z|2$~^9KM}*0?HGAY2;lxN1fk?fLmyy1elKB_8ai65(jA`K36#t2kGb}x6wDq<-y*Q z#LKJ5LPx1mADPGbns4D(MW$}rt+cLQ;vASqj7=o(>V6jGjf!U9pWl*;^(DDukGYkY z3b`8~je9TL)R~J<(gWTy(_h*saTadi@0>`!6^C&i$)YJYajnEcwpxDu)$FIo;Zxhf zeHpT!UPIifM*9-GD(BcN+M33*S&UiYB6iVk88o&#-dV{#X&K9iuk1X}*;+DHNf$bH zLwC88@DDJ)T>2Mzxr{gwOBa8@t*ldMZ*XNkc*=t=cQ#2p>Epy8TKY=hJLO}4!b8M^ z3Jv0OnUxZvZpIX4eldM=c4bkJ`8QmHxo6-D>^nVHjg|XX z;G;#k_-RwQud0r{-VEgJGU7q?c#Kn@Zqx9$ivKBUl8zSb6oL7lmlXLcSdDnI$Ie!^Pxmm6an%m4fS^I+PN_nf?E40%#o0ucwYrlQkKVf{?z1e?o zjQ6w?>k16>d$>R57V(J#p9@-)+*n>{%*r>h3>u4v#&$qsr9xxGeOoj(j=rm?vqYc! z7~(Z!Iqxw<7d0OZ&r7*>FENtPpcfjf1Fzf-TFJgah8r4ef(EOh!A@vUa*rq>=;Y{4 z{|#Tln+3v`O0&zsxxS8sdM%H;78L5F5c8R@o;lx9{3QNj?8m{CdC=;Prc+Nq4-@`< z`R#swl>FJ`qz}wRZ^?Gy35|GuE6$wD!$!G>NNtYc{NV%VDO<(9kU9>|{`5c->CBI# z=E_#>#&l1NdIx9y$;GyqdLlh-&~9< z)crao`Qi1F+p+ZV@OrT;57iO_Po0mE)uZK_IYu55Ise&jp8s4w-#Dx12CkSJQ#>xd zlT(#fQ-H}uOtuF~{c`I7vc_X~S`P#h~^4vX9mk0CR?4?#OQR+LcQ|gyMn}^B! z%=yaZB&SlpjX2{j&IERor(1INodCDR<(b({^pi<0s7i3+9%ssxahw^@W62VlUtVy! zVHixRawu$4;V(BM2L&Gd9QuEOCzU=8S`M9?CDa{PX%{-v#aD76=UhvO!H}4Kp|M!# zjku4k@LWh*i-p!UL2H|s6Sqlg>koWBTAOOp+NJ{@bQW6YWW8wy? zZ4sQmh|$m9C2$^GXxa^|b-*17Z_G}OzR?)bHmGk5twTt4)z3pGStg3@mcS zM(Um5ES)RgXsZNTmGjo!7~9;)7@Gse*vJ^01IF0K7~2?QvpL4C<{0Ik5*ee_rZvXk zb97BXxyT+c$0+sQdn{q2HBS9r7=4^^&ATb{pgB&BvAnZD+bDVd{W~Q$k^}ub5M*SiDHpw}k#?}%4gAP1J6QK~Ra@T{9nXd?A$scy=p)yj*w@Z;^j9zWR6O0_M&#@dm2cFx?*Sc>V3u8%U1TT@Ek18 zE??QVVp()#l;_CM@3NIWD^hp{EoC(&p>wYEtbm55`^0xJy3M#a*Vlb36yRu*+<44G4Y;@Z%b>rmPyDk^oBge)ztQk~f!^Qw!||;57vEeEgy$vn zZRPHeIT}x0nTH2>-c-A|zb@!Za4NJixbkc8!;SF84e-Z2_+&12Y>^kf(e2ou(}urR z?x_jmT{vaVqih6qGyyK`=>-$BnelM(F^y{wm zm%l)!psTeooA(sY%f=V3V83c0`5L7)S+nJ&J7Ol5*_Fyj^#1gxj67f&+?^P8HNHVL zYFQ)jJ}c)PIp^f&+`0Q7zs+gFS8e488I}u#`w~*2r_=WH&^f}n!!4{iL%}BOz;+E+ z7PXSQ#K6_0<}OuP=d$Hhy2%-qOC9$p`9(xtJv&McmZqS5{nvpvk;|KO|Fw}9kY`*a^bOVcvE8KyRm6q=JRCEn+eZaV<(nGAA8fO zh^!?SkY6{+$eYy78C#*DmEp>|&L#QaS;=(qzHB1=i=CZ<49npCj0x3yXG~~6Jcc=Y z>JoMSCg8Xs_00`WsZpz+;GS>MnOMK|9ERP7iesFo5t}U17MYO|uVfvwM{^(e`OPQ6&wzbGvzxeAjWetIeUxLAtj+_RHH}Kk zDmUT%lyjw@QC7~Q>>20GDOZ%NJ>YXsjFMT-`0ru-<=lZ&#rXRe>oW4A^>SXwfO+zj z`=Fsc$nq*R>3(_Nr$%LXIFD1#m|lv`&MH?YU0Ke!_DDI}jfytjRfWgjzj=bP=pOpG zmp=46b;yfWuEy!*+bG}68KIBB(dsGAY{>)NflMCHvohLFSEpPlyeRnUv5#qfo$`W% zNX`%$uq9mive{?(*r7gEa_R;4`4o9S%E@CWFm9pzcyJ`J?}^fYJz0PL0?&IHcSe+w zwVO4vH&V;0x)Z-PXGyF6PHrN6_u?BUu2Uu&4lIrE9+=Oa9&1^$ssy1 zaDM<=Stj-0P$~wn2Wo_Opx5F*<6Dq-I(;Obp^rnz@?)%l%V|@}%YOqWNdY*C(#zaS znHk~G&l}48O;fnn{fK{26g*q9yYh08d2b!|PZ;MKqN5oO=8}HZ!~R7vj6)w6{v0`z zvkVzDSB<@r+z*u&jV_Y*rOqzo7Hf{^8~T4}wVS()qVR=C&ao)`WMVHxu|IJSbM_H@ z&^6V z)v07>RnblrJe%%(VFCOa=>kTbkCW7#Kj2pklnu^5vo>ByzSbnpl5iJX7xZt$WXCsu z=PhEnmAEK3a@-Xl$0ZMzCC6nvfijpJOZC1g@|FhVPz#jxOm{~!BeG)&a$kd2u#;Ms#4GxG zCVE~wvBy4q6GD5|dL?nmQOJom7jsW+_!RxFxGL5+g>_b7khACtW#*3Mjtkb3M)t}? zUdq{AStD!Mr}_i!6=}daDS2WpxyP&FquT1$Xja z0fu>uOU6>dSjwZPOsoPn8CS22i#9~RczBC}YpWfVXV1}&=u>op89bNux~x^1pZj;ELhx9W zx@!Y{y~gju$gkBGqEA%QhUhYdj9ccg#?<42@8^N0>f?zMX?jNRtmL$jE3S>R{B7il zYa&;i$P6nMA=v&5&V{|N%8(g>u?Sbf3nKI4!0AH*5AX^+4?$BROCKQ6Vxy;!kDop%6V(=3xfoE00%m zgzjX$?gM_|p>pt21zl9Lw|x(D`_|%nR^yVJ-K}3(S%e!OuOyDb(u)?|T*x*R7?D;70PYo&Zn9*xN$WMc^Wr zyVS9(7Zm|>F5}7LcNyz>-6SPz>0g{)Yb})dka(8g?lI;=?%7BqM`kDXOegkCXW~%) z7SUa;vDz6oaqjCLW{fWJ53=7?7C+$^4=~r3ENLPpM+=uZDSbFjoS<%yALA^#S%mcA@((MP><2<#85Q#vpXI)*M5V z$ScNR$*iV;c%VR@>*SuVD;d)PU|2oqpJnl`%)5-Qgzdkrz_d(U-^_ zS>{#Pv&ry?T{6SW7fF7poThOZ9+3+U-dnLv7u=6LfnHc|uN2)Xlswb-F4h|IOz!m7 z?4lPb-hS+zG}HFd!#biuowIrmqf3qp5(Xin;+^r8z&F1Ma9 zrVAD@#z)AJcyg7x&dKi*?iR>4hY$9bIHuZ=O zQ;!gte-Jq1b{lJ#54kV8gpakx$J&)PekgBJCH0RZ*O#+i2`;R0=ds^4+L%~NhmEPo z)NMcSnl9_V=<+fq*=LmX+8C#?rtb!hzMwU|STS&d-2=R`raJ=G^!Pq~9W>YU4s%UU z$4?k)P5+-=q1SX5KFa;*@RE})PnRp4g_bT-R1M|kb}u4ty^-CJ=Fz| z{a5h%b1k;mHP~bY_$Bl4N2$oprF8I$dWv>HT}6)1bz zyU=B?#`hPo!)1SLGWgxdSlOQ#B@RON>GIE66OpAhbwsvGlxXvf>QM%s99SoMIJZq1^jp}z&{XWQ-z=8vH}cKVBdtH)aT@Kea%kLcrR_M0dRa>D&B}q$bR+wxvlVBn71N;G zEFt1&g8gV;gl|7yX~>K~zWsz}UzTsXemIhEt0x$ID|^e9u2cq2MOSj6CyTwL^D90E zjXNFV;8$6HL&ePa3vArE8HPL!N3NcSe8tbTl6^FM6%qsDXw3IUI-RZa9LoIia8Jye z$(6eP;Q4d;_A&TTtJE>&=|Use+D`U@B@gfn?`3)!;X!nA>^QBj8o&QnfNRqa@O!bA zuX!wT<2{uxuV~5pisuCLCis!K1&JN+gf59+8d%T%eGPKfca_pAXFkdpTdsDMJTyWKG_WBTq5z+KT}U(J19+G@$m?wf3kbr)q~soRUcvyib#Zf1qC zr3H;`4P&$FXwp|ic^TiVv&{Q-=DbU}~ysd(to=2D~Xh2}mpEIZO{GdF)D&RRjY}89r^zRH~zk4jt#_?`EWhPKI8eW}9%t%=MY4(x_*zcA-Y&Z8` z1m0nwHNq!jRDHj@WYc9YDAm<-WL-%9O>%bki{akz>f{w|X=ifHZD>nj?(Ez>r{Ht9 zd@syXBD}o4Hle>z?!klp37QyC;eGK{NgnZajOTgu@6Lcdl)~_#J(NwXVVkgnNrWcCcEwJm^`Ka zb$g;dCg<~)eWS59@$@akKpaU>1{Bu#ws_^xPHg)|%679~BC!y)%-2Vi&~EMR+h4ce z-tU#zAIeMv4!5a8ZbF9)gtG~qZS5%fqRSRH?B4QkGmpXh>u=r5-K_N6Gn4ffdHf;% zUkzHb`mcs=Qja~huOw3QtfT(c+QjWg=HX9^A*TBZBgbR%CHN-6v*hYfS(mG)>Gtq| zR&!Us#vG|N&elEbT`;GJ>FSAT7j-82?o(PNCPe0^5+9q-ziEN>OxEfL<=J4{MC+O8 zP}q5~zVXnsS+|7q-eq%Vd(x`NXFWa4%U(pi1KHFIP0r)H@29Fa86RE#6-6g~)7cy6 zKDATG(-Sik?y;mT#;p64824%R+fLnG;5{+JIk3ISp~5u?3ZW} z*kn2Ce4zuWmzLVOA)UQ^2YdPG4y(iPO{XA_xmU3A;5_HRcc6{$PAdQ|Wnl9(+uQF0 zpX4e(!2ak5;OGT>cf{If`Y4M$XzjTV-b2^$avnJlwzbf0rT7cE3)AvZHsSNE!al0P zKB@|^-#ZhP*>5wp{sLv-cqI43*|VD+_5_c_)@uDBl49nCW* zaepwdT5!5YxeKj{yU=bAnuDLpfBKK$JJ6?aAv7w_PfQeFGix6Ez&*ru_ku6(A}uds z4Clg^=*gk*1(@+=-$cm-L}L%g~(9ih7Uas5B9RZ{Sk7`&D{#cYJ6W2G~?(< z-rh@nAN4h!=Yji~jJYQ=(ldkq4ruI9Tb|c%8{hW{bhP7&c_%-D_x!f#zR#d3+m-W9 ze$0P~t++&u^!ROK`)o7ko$Te=YT^_6xI66z;x+cG^86eVd+(XRGTs|}S zB(!O%7vR&HmqJ4D1z2o>qaR<>y_HoT0v+t~we=N#=f66|_XSQ+lTYj3p zE>q3EklC-4BVYeeL(C>^b-T;~n#y#@O1 zi8-E~tIt98g1h-HlzV%uI{1>T{kmaw@FmIKipD*tR$2Eb?KjbWo|IiHdYh6XdkKMU zX~Z+fnfnH@CV#Fwpd4}nf0HeC7=J#s!jKcAwI#eBsx9I5;I=Bwwqy@N;!(se^`~-U zO~2?Y@OFNIK1wWj?bqf=KV{5WE^EDhKV`viKR~3NJHBqR`~U_#Ehap!%dr1uC^>b| zJh4OrbCG=#W0M|Djz^E8+azLJ`Jo@Z59Z&A9ExTfvL;zN@RT>uk!KK-DsTvmP)1`s zK98)QYK05Bk~Uo5$=~O)D_hr7zjY$-p~1+G;_X&BA8}Xgz0Mnh&E&agk8XGM**zOd z*k?wk9`I1Fr@*l3 zc1iH_Ec(Cr5OiFz_H^#Th5{$#@*P%QaO4KhKKjOXC2zjiAKX1h8w0;R)zAc=s%6Ww z<=HxLePkuR#H*YGe)_UtFXC>QF6y(J(em!Y*bP#~9II|u^z)uN&VGN(TfA=<8Gycp zo|97uFST8u+%J0Yv+zFmI}M0mtCLs`%db@k&xZ1AfwS2cnDdgMvL_1+q2{HBKE*FN zn*U39>~&Lb4&+ziF;@`3Hqw@?>uvfzN1V=M{$=d32>!sY(4maE7yQk=qI$zz6&=W~ z49rt2pAq^@iOPHAn-1s6Bk1|5|K)xnVv)Zect24dbT4;!4X#{(tu-H;>t<}Po3O#YMm+3|#3P64wwZ_eDN4#a zr|#jhmwRTIbW6Q3q=`lDY5p}J7)@fhrJ|r zN4oL^cj10^_%wYB9ln76unFDUIayh>8r^#y@k-_4>iiozh!spHCLw+CBL`V;=3S)B zuZrWGCBNs^aL#g2sfbM74elMQuR#y*z~8o;_N31yIYaQSTJgTJ_+$z=v)~e&0)NQ@ z3%0NDJBm62e@To{t}fNsk4i&tll`bhaw#D%7Ub~pVP`tvv(@~T_6x~P3w>5feHq_J zS1GNHjQ!S0O3R7M(3dF3n!ezCJ|AK4d>U)`Y>rP;^!w7&uU2Bxqa!n*hgR?q{ZC>f zESz>Sj!@VJGDhKVL$=FzdO{-7-6V zvQ_6V+=GwvQefbWSi&a@nFsvIKKg8&W`A4u(+;4kNc{IfK1YcKf1l3@``G3W_G}#yR7xMi@eD5Mp=g`-$m#gcf9cfS66WcY0iO97CKvX{5lN$ z{)}-hk5*a=gU3mI8KaEP8e0})(`ZZX1+d2VY>4ss%Z87Sd|&#QG`?j_wX7|}?m?6P z_;Pi>r<{Yt@2RMh2ENIdj^}YF3$Y-><{$)Y6GFiD?GUj2DG0W%uNtuVW-D8dLK{Md zLW5tU@6+g@!f%b}DQ@^}7SHAEg~XDVlJj(fJ)$p#?}4RZ^&cjLdq2jf;)X{x_^Jz@ zmHPwbEQ9O*(+zTGVAqApB5At>dP<%%;eI#2g-6n&-8yf4m~bs^I0xp?xA@s)&Xy-A zTiKh9eV+Jki$Ao}m+C%R@$-$w6QUzpJW+iyv<@#gDvf)}o&2}xJ?dWuy$AA>=*EJF ze*(Yw0)Fzl!G?>WE1@It|NG!Q8KW1tq|6C;;w0nz!!#wU)uCjfpD*xMS1;I=!AvXD;1~a;oCzdXg9}AbSNA3y@3+` zCqHUKe(?PJ%KT*Rr|@MNad6^e=&^^rhQHx3zJcBpo)19#K75Hs&ZVJgd^-w$a=44d%t|$&yu4sgg%9-PPkgG2%%Ivw(Q5mfLy8QkOxg&D+6TVO4H(4LW z-3iE`X6QzI{DskVN$0{x47f1l@+AFS=>5=Xu&n)?D@Yb;rYz!HmqqL^4wFUlKZGn2 z-}m|{N{i4^ComecBz_>)gb?!RBe^r~pmA?e*Vl!9@^e~&g}V>A8(OXGT8M5o8Jf9` z_L2-(4k8}{VOc8wL%;%lVujy?@6dr_b^e1Vli(|LOIFmURJ}$J!dNk_%g7-(K%bpGQUP=5#;Cjc{-XDLh_XFh85&T_3^Z#VNS{R>! zd$}tksW~PD?pFojzMuN-7Z~_=^DX#4NPfy;W0wCR#_R*eD)!S~_7*#-;(VP(2qX7TljcUzlIJ}g->%SvVVw1V)_oEuLaXc4Elm@t3jV9 ze>>Rl2;&kO4`mxj4oh_U`rw$DlXvG~8~je+dsm3rxbOdf8|CLCanqPG5;t|g2tIN` z;l@GkD2Iukv=jc>zk+{{u(lm9P+A)86SB2x)n6&Hk41Ez@1gIIH!Qn}xJ{qil$F?977RyjyVfglU+*(lN0BQZu!ep}U6Ct41P0X} zJFk(rjsv_u2#>63_cw5dQj`(XmV~@9o`09;a`q=~RmtnGFMQCOTM*vgRuG-tW{=6< zYM%hFOvrv9nY-a^)zA3+pB~->Efvloe-C$_??_P=9c{DcbS<^l$8l#li5IqxVVuW+ z)5pge%NFqPCgVA>QTzdUt*&fk*2&52A52pQ@M$b4i`P72qigshla(Kc; zjI^Dy@-6XiJAVdycHbWf|Ow`s&&Z+)4dR z{;Tc{g0JU*TiSNxCvb4~XiptBf)5~XNM-E=IuTJ&^@j=U34;y}P;Z05Q zf!d?9W2uwICr*vs)3#VS1n)08R&39~wyqa_Gc63CBkvORJLVIc*MKA08`bQ2uN`}F z4ENr$&v%{S-%n)?6FXn_2xFi{mpyjRZ_g& z);Ru?aR}~ZoINvc<~=y#ec)K{q>P+r>b?vg&J5X~k~{Vintg4?{$`@0L6^`yz8dRY zH}5nhF)9(dblru#Ghq;1UIi|10gt}%_)1gpm8zW0fL^zo^h(~zGxXX99dx3rw?VHq z^!a0LVbE(>eXJ7adDfYfPn6!K2^8n7XeMKb>)AO6ApT1`jxW*6G~ zvwX!nbKLU1)3^`2>tXQq`+}TR|B1fyF#66x^qu=y`+dZfJ%GQ2Z&~~8?5hmPb)~QN zpZpK8_GgQ%sh!^+4s6m_=V^at*HY|I`A?t3CCwIl-R~dkmHs9(UW;GRt!BmctlTGj zEqt`@aph1uzO`UK)+hHH>$V#_#p!ex*Ze8ZnrwNm9i2AL+jXBZAaX)-kjOl*q;Jl@ zRrz^}8&)UIa(eug#eyIC>Er zt+iElA!8JDFyAJ0!Z<@edU4p=r}Rye{~>U+#T@fSzW;+wi6H}fty(>S|vk8Or$HEVjh!_afe#?8>4A z@lC~J<4$7TLC=5kwyb9Q5q=VxB5PF%b0u*^tAIn+{lXUitVZTvbOMciFyZya*U8hu z`;FASlUqNo@MldQ6SlT-W`u?k`!*9`g)* zm2(|Y9;cf8-rd+fP1KLUf9mHbQzJ6grx3@$`NiZ8_D=XO{!hJJWP6TAxtOsr8B$jE zUINRCzx1cbF7 z_&IOyFL^-056B%dd<#vycz>W?k8G3wA!OScd?e3Tur7Yv%#l;Md_~=$_z4`#8dLEL znfiCPSF^*Jpz)9%WXT!}gP2}9|{WG(M=4jW<;d2x$=X_U$ zmUAgrjFfZp%sDy&{=df9WAF{#!1o>aD$fSj|Cn&So;?It(d@Is<+>y^T#a8D3D-p? zTx-Q=2|Nq=zL;;}2eE~PA7oEM@}XFK<0VIh_}f#_XEl@VCEs!|-8TZeJjZB@eVukE z(C$pW$MO9D-(#$PIJar`bEp^DT*`S+tFI>d68~&RN5J!brbKAmjjou( z9_1MycnE%2L7h@!^HLwZuwLlw7U)X8uX|;%;RrF(?-Lsq7&qMYUtd0MIOXb*>r(sA zjCHA{HnHE$+7o!jO5}w+6T7;O^~jA~t;cS$CJ9fv^)<)f$wYWkcu>Q(7JF#8Z5lkc!k+J#gNoV5K9j~TNF7ebhQ~DB71qU1U?Pz1B95L3D*2PXw1*Y{FnLp z$IF8a(*L8tC-d}(#PFR(Ui>Qpza6%ktY_@0lLMoA!7AEX!rbT6HnxYp*M8(abZAZd z;q6yJ=?r9I)4xHviGtAZ{DkQ8{hB=~G=H)JVx z!94y;xt|03*ML{xzkzST#|PMuPVU-xo;%U}CT#*pQ%&{TUzu{ zLke_TSm|TmQ@NrP8nxue^W^?b3tHbl@Hp{z46X6s3c~kq!Gqv_Ci9(fi3N|-dpG## zGWFdl0vmeojeHZ+h0Pt&j1I4#EqVi5{IWW5`=pVwro?0Ez=>ykc)C2%{ZzM)@ZptR zTKQke7reRlaDq`Qbu@&SB!cS3t2Pu^)8Y#>l~ojkjp zXL6=c=+MpHqBd5UrR!VJXdh*BqLr<3r-H~$DI;?(I%E%W7vJ(k&O%_9I3mT@FaC>8 z+iL#TC5gV-X>%t%!0$TRE#j^=*6K`)KJS=5QvVb;U8A#yAZ*Z^$h|-q<(c4E#vt?2 zNp1lja>hmcfW|wS6Kj41$4Z!?({~d0YvHyP+VBPOzMQWe!maF|h|D!`yA<5&Z3f_X ziizLLP5cJ0vla}Kh?@}jh-seX30E=&<`3}I;QyN~a-(xJ`(5Ty-~SwfOJH&Wi|7%v zpt}>me8%>GF4M!oy_uhQgzZ5-u25(5tJwz(wmqKxwqbh+d`;+dKJib7fJeq5FikdL zij5f3XT9gho799qArw5~PY8ym!h}ckHi?_Ea4R$*v@q$If2-tKlf8g@DEnp4fi-?| z$%uUtXURx?u8uyT@tjcl+|XXY;%s8oi6t@gxgV7dHp~LXK;1L%{{XAAXC$m$i-%x6 zj-BcrMgNRM|6Gm!=|TUjLigMQ4VL2%d5WABb&N~HAL7>a(+p3Pshc(h=%y8nMeN1@ zi+;NDzXuym-D~9ozZrkRP52eQhJWEk{0uj+zcY_>IY#W4oHv|w#Gi21*st0##C~zU zO;i3D7~_S%t>q*!VCiAR*@qMFkBk!f>CBJJ*~fX-g^y$7vL?1Ke)t~~5=%)RyDyfQ(x`F=@^RNx zrFHjW^0%cDL&cea3n$(9NINlN5>piXT;X{!dFC6DqcYCd!?4R$hFS@VkzL&mICur8b9?!0|yet(C)-g&XJbr*O&hVCc4AvkZ>^XpVE zNKfTV<^sbn^%Q=odx=qd$}!ecjbBO&I{P4dBEz1YC8mBm{$Av2^KSaqR@oOWT%}Fk zwWy6REgh>={taI?3ilYi6nb#k`H3tJ?c8Ed?WAc;N=um z^OV}OJ$HbUedHAI9kkaclq&Tns?gV=^{)JfUto``UODu=Is2wMTi4?ItvXNRUV8k4 zl$CQ~R{M|AzOUARXEW_<`WUPC3VhOEPt{O=SJU5-T7@_X*0qb(f&JG-=D4ezhb2C@ zF`s?BBZ_x2@lyU`rS+Yuj{44gN4<|&ET7$8uK^!8mDt2A&Lt&qE=lLl z_b+*&E2jGDqgmwnOfX^%k7hXs0?VyrOuq8#y;XK{QeBPz0oZ)EVV}m>$U}i`S*~r| zG0y4je$iI{evI6Sdw%nOY-fKchC6(7l-Vb5a}E?0qXG( z-$6#K8>h6OpR{tub-*n=Q;p6G&*=Gf)uA(nW9e7$?zhGC)gb5Y6*vSw#wPHk0>>>p za|fLF6L|D|j(!bUujp}(z5=!K%bo+ghw+PC*@Rr_$jBRzF?UU~)qnjr>;v*`(D0<@ z+xYJ*W=wW1Lt~%MM_kWLU_bIJ_FJXgPFwwB|AYqcjau&m-#skvYR&kdn;7GF^K)J; zW-o{_kb}Om>v|{j=o~0PzDjI<8sECll(`+UJMfvx{~)_V!S09yPdF!9IvK+*#-QiZ zHu1Pic$;UMI?2-sJ^XdGtySWX9PDB5dfL{C-O$D5B2gqE9DMa?4iBbZ1p{~Glnx0 zedL$D_Y!Az+h*#}hlhNMeY~41_QSo89`3?^&?tuvQttxBWXjddM4uO1gmUSsw#Q+2 zc=+ynk-M_97_Y>>Y3w!U+GF~98aQM9tb0QSd(B0vqt7>ejJM<;#Pq{6yfL79Y7xU!#}xprEk{E>J3JYz4M{zB-1vCZJsUX9m!$#3-J0O^k)Z_gTS&& za)O1+eL!j3WQ@5gxx$g%%M{}6IHM?ML^BveaR)KxjPc@h9KD>sS1Va_21NcH1PAX!|8h^p0q8B`DE8v%h|H=;?zO?zubRld3iIe@ zJ?YuWohqE`a@&*vf#vVAW@9(X93(T&YQ|%o#VjR;KqKxrOtq8SNa9%x{QZ-BM2!Rf z*^)mZ%s%{G7w@|I{j=WS-4npw3EbyZOj#(fzX9wP%oeN?lk)Wee}j}GE~%m&+$Ki@ zFFYpj${92Z*4Jp$8bija2@7{~_5xk1vWR&~@E1KX*y%d@a9zb%yMZ@q z0{(_LW#$dw`zhd=cPKeqTXo(-sq7-uK+FJ=3VIK55qfi<5g=UmaF4%cL33_3cc)8k)rSwK zDUC*q)$2;@uDSWGyU9G+B$TH`l?nl!F)l_5pWX(*9)n_X+)lz$=02BkNTU( zTSx5bi1{Fv$*T7Z^+wafWo|<0;bN-?>*3#7Hl*)MOkWvtHt@Vp4RKGF&)#1xj^6?6`IkvWb~b1-YdNEn*N48ia% z!R0WzRV})$)OaWKyo~okhiQ(^B;@7%LS%_mhx{e;lEcF)4q_V$jbjJ2SoArSbDqn7 zGI&mU(&-eMy56Lz?@%WVx@=-Sk#okQ;iyv!jU&$ZkHArsg`)sHFA(1@;&DUeb!cRL z3dVPTnL#(B!KHCFHB?)oLkG9@!8zIzdJff=&~tEGd(E~)&y;(FWUuLpJJB(#kT>Lk zby6oCJcOe4RAlwpXk2oRyi5BjoNF6#m)cj5X$rA{=kp!LcM9Jc--+mdDQdlTn!FGt zwthz_63j-kGU@ zx9!l~3)tD4vdFOkZrWK(jCi($=5gS#lmF;2o=(=9=b`&Mh^^|M<{a1xzpP*8^sXca zM6!Ky-)Vf*LPH9^)PD`0B8IPe@1yp_dCAK8<&Mi=NQw}hkTpwkBC$pb$Yvcj2>A4OgcwR{U2glRofWSv9{t_#GO7hio>;6~K{UiofVrydb_}Bg9>XW|r z)3?>9X7;%YUXA2Dq4Xs%NM9xBL~{Pu>PyD|dHVS$df9L3OMKq__`&5qNlIzP;FqPr1mB4d?c*f^gziaF<^CWKA^20x})!&dr%y75ue4dMr zYWTq)Vy>ns^<&=mH!QIy?2)k!jV+-$im_n<%{~n+ZGwJE z@QGD3fA=weF8le=jRxJglicPUo4J^1al$xbIffWY}gHcWj?#u ziyT!3>~xH}&I$j$LmQ#)ixYc<7^?qYbE$2hZ{3a#h}GN3voGsIubws1FP-v}2lZIJ ze@ia4+OywD^iOEw1Tketky-u7p<~Fb0c4ioa$AhE^`tF)-b(z*5{oP`@Wfl|{QZv> zBNjM~{0*7pTu9rGADFu-a%;nKa`%VjNL+NgFHZMV}hA#tN$R%$; z?nYZ{T9lE4c8-}#Autzmm*`B~73QeepUvGRnTmQ>KMTJ;2f2Fw0}C#RUyf_u4qS=E zDEruJQPl)^L3}L6yH`z^C4O0QLBo>m*r_u$I2-(wWGu1Rd5!`5U?kXT6T zE>PivO_9nWSr=TKLoEA)k)yDS*rC8OvW^7)mwNz@-^LvwH`$3t$lKA9{6|J;`hW{EAgGgg5w`V-yU|pz6RSk@a{wKrJtwQi1GIov)2=Zle!cBt#WQ5 zcno%OLB0-bUOu5@m6ZcIPH_(0LR_&6nJ%=`6BJh*$6b~ZkKMMITu|m+mfH?cmb?4T zmBaHs`W9K$79gwsi)Y8cQL^&I&qbw-A1SNas|{K85d6}_{#g^}qQtg!urCH(j>!AS zov8zcJ~c8gXed0%eQ6dS$bMDBglh(VLB3GYVd|<=I9E|UQTB-X#^v|-phKNtZ@(P= z>_Z3IWQ*!sQa|^Rspu237vIY}dA1opnfw>N#Exi5z_#d_D6lF6M{P>$v586!KKWK( z27Y(WBS;=O@rUn{&qE;_56b8$zWZqR<@^olPp zo%<U<_cjoLG&MydZ8zvI)$ z(C^h0_di{8LI3x{?ftJ%Ui?rqz(o|}EKH5ssHNi9Q6u+6QZDjIwIAK=&}Pb{fq!j+ zGLSMMY(Qj?BZa*+_6bYrPh^ce^MM!nt!-w{But5RJejvb%1C{Aue38}^dpVB#2M+a z=nD5orlMB}98zX*WgD`IoRfu8=7Rg3@Uw=_%o+Xpk?43De50|iC-p^_7+l$5oNuFF z>$iWWN*uPz{-}+8Qak&lVaPplU##TroLZ54hJG$|CF{5QC+#Efnhsu9Pi3!BjnwaU z%p;eP*flre&u|NUgRhpU-i>bVc9cAorTF(Yw8tOm(U9w$v>-VU%~&6zlUNv95LOHJs0+Jsd}8-3(Lm;DtN`f57Qh5pico-t6huNd2*u*SWi_lWXZb92{W!B^rH z&u{35)|1({_0hH#k?2vN`J4VzT_?IVG_u!cJAdP3`}rG_R{(-oVNDo2hY0Q!6 zITn38q1}urihdXTQu#gG_-*I6(0@BPb-;VX`Lv{uC5Gu=?V6sKR)Qzs>c+3gUDY#2a?IR$a<^iVVJ5^ z`j)E3zUF24CRs;)JJlSKDFEgWde8IobO3zpHK1! zD<Pb}2HQpJ1xpi)1n>w60Zl*aoLX;WmsoZmXd-lUuvT+ew&jWt;IE&a7h&X!A^ z123{)+fBXqYb72vsrjN;WzUFvEr`jH_8hcvl;`fJ^H*%hXYUZ)HFAH{F77TZny$+m z{f+?N^y|Dfu}{fG?<4lHhyJqBH}srCJeU1Zc(gLPNU0E-t((?~|8mkYC$#7iP|Na>fm~C4M&c=ak*wo+EK; zJu~vXeLSCKp0oQp<$9)F=bfrf_DBx!9YTlTU-sF=&OH20_7i712UfC&*bCg^Bd(gp zc{t>p#Gl>GJKSizS&bQZ#-n=Xk2NJjgmG~_^+W~ z2^^!r@NxhQ=)A*VsGx13tA~N(p1zgOcsQqXBY1lyPpSWey|I!ft6ph*GWV788m0af zK7Rl|vYu4g$Mng%SQWAAa2NQz_sA+}a;(Q=8|!)6HirGMF&_52Jge2Qd)l9Lo=$mE zIX#zq)MdS@vXAYn0S*nkdMK+=wvOD3%gBou8K-1dAuFYv%w-yLEi@-I(TOeO%k9W% z>fuZuxEcfObAemz3kSdDU8m2VDf*c;4>AY$nB%{Z=e{G?dIy#|y$*GV_fpY~>mro- ztjF^si^{TPudI>&efIPWw_S@$BWCVb#1jF`{~$cF6=Wseg*ri3j3@I z+p7xO%Z2TgMm|i0aja{tVXswTuLZtu5`Xq+wwZ0XZMGNqWlUA3ZKnJGZpS|le$K}p zI1PTF)q2S(Qj87d=1h~&Q4zAkQ8q{CUE%2ig2!ae;zn}Tn)f~O9$T{a#cREV#A`U% zThVDXn!NDLDZHZF2vSzVcP05+<@pkDTEN)OJO^L0&OXj_IX^1*leU5{UzGULoULNd zrjaWm&9rF+XO>MHXs=p03$$?q-wV!WeNLP`Y2wVVXD8`C)Aw&1)ctgFrcvh?bTl7u zTeKqM5_;jk&a=X+_`y9oo*YU}PuLTJuS&s@jr~%=r`*xWdEbEx$(ZsHmZhu-n(irsGG&N=q`P0wnwHo>tx$FA2z`s89nnLuR2K170Tf&}l z$}Q*pZuTNX{~$*?dP#z=mxzv3i4K&$YSP9T=qFX^Bg<-5Z|Flmc}dq#qWW&X!o8s% zT|Jrj7w!o>qn}I%)``%@yXeEB_j@1HHl~*V*?8?oxO+38{r$bH{& z%|+CE-4^3ni;jf@SJ!W({(ClUW1?N#Xtk5R>d!&-jl1b`1n=-ul`TQPM^4Vf?|&@U zyX@(^`gcElLH{!JoeVX(e;GPjMon_Rto7_abFQ8{)zB^0qJyrVs(7E{nZ!ReGGB6r zU33(=kA4#|4GPbyX|LMvZ%Lu851Aw3SF0Xp5zw#1-+c}9T)1{$!`xpWM#(`AVyX4zESWP1FjNO1*6HuW47MeH;1>eu$k4 zx;W?Hr5yKqNjdF&>@RF#v6IUfZzOn(zbJBJDm?n`<9BTr-EGyD!G?s>CHv&J;J=dp z8vl3gxNAFp@p;6CM3oQ&7|Ny+`-B{cA>WYL`Jz&Psz9mtg3GRpTQ}D6 zeVlIxdhPD1Mto!``HlYwm`mxYjfJl(+_Yp2zd!eqzusP-1%Ujf>ti6`iVQlQU-yxdh~{ z*G+b%qKGp^qSKX%u7JNy%B8%zVS~tMd9S{@al?CRcGj)XM<@9hdgwQU^>{|CGrL%g zD(}Z03?uiV#(I+nJ{-`$+=nRfB3rOWehsbS|EQ31Vuz5obHNL&*T(#~ zy!pwL6L^>Vb2GbuV>175KOB+OM2-mZ_eQ5s&o4IIG476_PE?q4Uj?5GIqWVq$xT4tVTRvO0uVxGPX#&#)mp|q`;~Af_hRKCUw7y}k>j`hhTkf(7I|AMvewv>l>3T0pDya( zMNSmKl?pF*GN0cCxMhxVeX6jWLaRcid@=FY*$n_cq!~V|*+4o(k+Pbe>CA8!S1yf;pnwT^k#VVH!DP3&stgF|sgO}3%fj5x~2 z0(%@bhXFHrt&-a!bNRTpl^Xhs z#2W~Ffpaaky3DnlPnA8-#@9I0JPq4}kKk2olY({FCRL^l|58v+AnL58j=b0N0kEgE zf&P34S;LSq*8c9mqT&0y?RG`qCvx*G`$Tqh*T8+ET4ZRbeImI}NPJo{j-$OR_mL~e zqcJ~n7jYBwqnWwQJF!Wu+~$%8`sk~J4G#hravXUua(@YaG4?Rl>T9W<$K1FR6~8EZ zTbbbDIQwzQ*o4X0gqpb@H_5bLlFZ!Ziw_#PiPG#z!}jBDG3P+?gzGdCm*6?uJbV?L zo(P(U?zO@5&~QdZ$lP5)9esWUAA*;f;_VLh@p`6lK99V+M*M+Vf7{>i$uM8O!~V>5 ztQ|{r9^Q=3|tkIOeymD z0Y#5PN%=W`x!R}aBuCf|?F5D{-WzL4qVBILEm7C~iShZSf6Z&eK}-J6#OCh`jXo^$ zCDG%1nEl;-WAwIrmfGum(<8md7Djn})A+qq8Q3sYsduN~3!JG|$~l~_gQ}bZUr^#Z z-QYyOYQvUsKW6YpU9;1hg&wBCQ@6rXP3Vxx+!66OI^gCd22WY{ZvF?bd7)(|^y#|~ zzU(Nf>^UzbON-`=9C9_V&usedKd4mpujNyqRCWWyfdaMu;ENJ(VYK__gXCT_;cUXr z@%=Qy8(2rjO;EizsnZ?xHE$PVeV;vpe(+yC9{(_WoSe>GJk0Y7J@DaUr*zj@n;p6(`96ql7+_kZp z_~>-{lsr?tE1d(AIHysJum8X>x--sF>@3E234KY``_JTk&j5x#)6dp}EgbA;YoZPDCpQJrWvWS+qA$pN zyUh8v<_S7n~>+Qt1voOv4-V~O8D_Vfl<-U;u110Mc5yu28mz60OJ?f5n#botq2 z#_hFnA5)Xu>FJN~9=l#K;u)r?m2KR~)5cv=P243V{-0l%YgnQwKNAlc^3!Ka940@* zflF*~$u%FyYs1C_ZGZOvSX<$xqwof2L3BP{`wVh(Q-`iw^~|^y-D=&C+@#Jy>d1S2 z{hHw%=w+!>1pPy5-koAjk_!&c2E5m{T^6(pZ zo;%sQw&p-R>R;5zIN#^J#32liLC|I6ABSY)1^t@r@nKge4q#cT+m;J1L^gec{8F-C z&}37xy095V-*Q10jt=gWU|n`#hov8T>>2i01|s8}_(CH4I%Ex2^K!hLvpw@3S%fU( zz1G*knj8=9`Do8i{DUuoPXV%Ln&|IJ{V$(8)zF39U@c(pnR@7HikkbLz~Lh{L*S5l z!S7{FZmPX&1kH*5ZrK4=jKL3eyvRP#cxZ0|G#CvnPJ||7SWnmsB8Eo%B&;Drcw*m2 zyU^*?9QaFsISYKnYslCF+uL!-jIH(r<36$pU*SHogwrRGU&toxDQq(PZT)HU(>$@I z>>1VGwjFypx~Vo{`w8~K8=!%n9YvL1|Mbir(?00E)ahOHYQqMx4_06wcz52mU3?%r zvDw6q`u}Kq_xLEQYvKQSX1L5G+#y^7l?1d*CV;JkOBBn5peEs>2E3qFg4W|C0V;Tn zs0q;OI4BFvq05aNtYc41NbJ=*g*1`Ffzy^UKwe3s$ux zICYTk;x(MtzoL|UbT2YvD)FnfK<_7z8QCH;#`>2WzgUx*O^mi8wrN!`K= zJm9)uN|rB)eI)$g*|Z~5y*_YNNP9l(gvV^`N6Ps>NPBYcT>616szln`%@~i<-U-@! zkM^#jodWp08+qt6>f2LYKHt>2zK^K?S@kZLdf;008r6;u#Ym*4c?j}J<8ivo{0N%! zSyJXCe4X<`Y$S=$o$S?$Uar`&>PyNQu|Fj+=aQBK@3UhcOUT=r?`&_}fV|4Hebx1P z+svx`ipKg4yba33M(lJGm;Mp@+Cn>%)ue9L`wG_jidNDV@LPEPeodZ>59lOK*SMa! zl=8drbpefQdkr}o8Uh~w055)+G1O`B#`MASY4FBq@S^$wzIK$d{HugED8I>o=cB&B z>mhv~@SyQ$ZD*!>>zETaIIUw&WWN4y=H<|_|Jl6!TI$8j%O5_$CnR_C`^ya#y!CEe z!TMazI?ZRj-psna37K&tZ?ScIxX=VTWgLD|@!QhEns|}1{yY8gy_owivX6^f`{<9N zGfqTjykO{zWnb5I#uKEk?2XhE3{dvT{RYowVzAh;m(d^LUBuH1`;!+nb#cDp8?-Q- z_~fE@im%AY<388T#cD!A%FXGO!{Bq1)nVN(Y~!MnT*pQt>&d zE-ZKLl+{o-h4d+;4^kW4loy`nzIs5Dhjzy$3zWjUH6@aP*yZfPJ?auWlUmWxZLD zV;=fb&Kd}!qX*E@Mc?=HyK4`>rzQGM(9ib{$lJ6e>=>i{|B3Enj%h#khaUpVZRjqg z=q_$Ucli*$<^TuWlP)QZi6!|j@9k<*h87`n;g zcz?U}`B(U1lzdG$xziOs0IdGH$&{Ppb(78^x^7a(SN7I>uzSSrt*6TGOS~I3@sWkD zcL(^0aCQ>moJ4SJ1IK~e*yk8DSc^U9>{LVc)6Z+^S2HvqHlnMgkGDYs+-LAu%9`w7 ze|Uu4p_p3Ye*L)AG3G$-2+ zE2GoMThK`04MQ)y(iQ%Olv}l5?(ohDP)(BUoDP22DKLVrb6^T_wlT7-V=-7$-81_8@#S(>jcN51o(8vK-9 zyXzChhtKP9+2;N8%fY8TiS$jI=I?sS;@dS$372#4!9D?;14EOO_gFa(jknzw+%we6 zITClWL^mAMT}Qv9JsW4S;>)gu-#^MxwhQiLY@5`|9lz)Q=PdSva-Zx6XK1{v6g(B5O$gmowvC!~qt%GR}*&^Eo+7OCM_E{zPK; zG3e7tEY``KU%!dYc6EuP$H5yAp$+>yUCyQb`C-zOrr>4v%FNXRy^hscO=L`7z70#LEUO?TJJ7q5u_s%$#x3l{F z^H0lW)BYv?s5Luo@3cNUt{WSHyLaxKI`$Wi-pK5lDafUZk$rA(&)Il)8D~y)PwDn! zANFIpLn-!SIh(l-9i;5S;5l{lQSSY|PJgxgsOP4?P`hQ@`~Y@dP0u~RJtks(ds5I< z-%(4>-Vfc#|69<1(TRDlYEG!$CI7fsBfLlKN4pa@Xf|)58?hn% zumIij-fYoLH=Kj6JXU3H9r|4PFrCh4qt9vj6n2FFBlFCh`_eb~g6Ro9v1wL(#F@eU z$W8Pr-A1H(TiGY-I-uM~kysu{e$fL1q)VCK$(KGJ=38sc#}+ZZyOw*I_pu&!GXIzH zbxh*ED|g!5@P)KQy)PBw-@DK~&-Cw&3~Xwxs?HD2kUhOU&;AwH>&~jJ`E3Im^gTa& z9QOFLzs5Qdd6~9$Ye%sCC!zAU-U_u2JQb2NjZSRDl2<;Xy2Cf4AymHZa9^I-k0q~g z>(-9)>Gh%9(}()l0YjqT60rE;~?{pRI)rZQ5wfDg+ zgD*;+?{IZTaHiCY!j$(+ayH)wcS(IHc&a^A-u}-JV{fykxH1#)e=D|ne&gRyc{K%` z$JJpBx&!Jyo!E+gFU-Hd22`Ri&R^dSw(uQMakn}CI-Cyr8*E4V9w~K5k z@5pXy#fDb&)S`D5U$H#TiQc+cO-x8iO02Ah#&>bwYZJU+Y!dfa;fHzf`@GfJnO#;J za#3{tvB=VR**=3lh;9pCSkVX%E>?$X`}Z}(l^9aErsotfs0HpZ&Tyrj{;(5N+uR_1 zPomAUv=Q5G(Z%6&Y@~~QNreXpKPZPc2u?O}&!~X=K_(vBJ1^EO6x-MNh{ntB`vE-M z7_$%A`gvoPdsfn~e>cVg%6*YB%09y!&u78>ta~EnUc+`WD{K#Bh!@%NfV`9OGW0c-PPWM+ zICbsB{hkMnq+vInFr0bfjYbN2>NBn$(WB(OoY?7ry9xIssrS1GC^~HqEyBRrsag?(LPxdF2D%cqrk5P*(5$&~?4aw{{OJ2&uj9^P(>; z(s2a6_Qz2YV{H6i;7IBVj*<)_!(p1=UaXF z854(}Ab1ho_8Go`bEQA86a0JqEyF+h^@7>#(0HAG43YM7;vm=XB=v-`yeY;mx(! zC&J8++ml7WMCEef;fY zgnMM%6{h_Fa09d-koE`n%#CawkQu>;lEn3mJsba!@o4Ac-0}KY_IS`+fcr)L&&OZe zjGX)e@0hOzSLXS+-d9~dO&6T@yNmVw;(m9r{zYuAap0S;$fqxQmhE8dKdkGmxs0~2 z;QxSOKaXHPm+=L}hR*oT#f&e44L!p7f515ZKWmJy|M`DQ|MP!id=YHuGH2hf`20CL zO}}FC(8X`TLx5O{2k0wjPe+1Nhpqkmb7YYV`n`2z$FqHVOY4N5)XwoEroJ9}*G`90VslF>7*{^s%C@)yT@gYPSxi`yzEBA3MXx7?`_e(ITW zeN*|DuWu4N+GhBf9r#y^4VM1eIX}fFJkqWzP46yLz32Gbn5~|vMP|v|=<-JVNsRX2 z{c)tGjJw%_lk)4q3vCF@H__=dyDGRS2QLZWBSp@vT=J$D**rk$O~XbLz;8-9e6%nR zTRi_CV?4U7BJLhMLTn8Gyp=s6F#brM*cZ-=%%Q&mQ*3YxxPPU1Z#`e9*%*@jFJf<# zesoekV6h)WMux=(^8)EP)C=;bav$kWj(nUcjC#b0_-J8y0=; zUKw_0PaSeo=5D+(cjJw@bN8d^n~b@81=$|3s7Ffs=5Dm#N1n``w0)HKiEmxiyHm147qdylkVlhKzxe+^Duw$?m@m9Yri)zHpK+FbwIS!N*~A-UU5Nh>SsTp*)9>mWuJq0-815~d zLl%a(=B=-Htb{ky#O6_a=sau!hL}$q;tNB4^~ZI!vAd3yZAZYI8{AU0PVGK&gsD} zru#Wg?%vJ*tA+DVEscGcyy2ET*@*?_dDR&7{jvV9_e5$;IC5V?_}no1D&vtkH^(uK zarCdd%&2>AimJUqImY-z=W3mxZ2ypPNnh)LA$UMP*4F)Y%8E|lws5C-{of7$E_e0p zXRN~SrCzCJyg?76{S%ot<4h@z4pKEbSi<}*W*+mH&s^p;hjaKFIES}tX92$jX8$`! zaeQyfWj%qZigyfi`_W*1Zfnmv@8T|Vuh?{^aF#CTNFuu@b43k(Y{fZXmR<($Vvl9q z@rmo#rSACrm^zp8b7A@-5M8|&pJV3!*VMaG486+(-%Ua{7>n*D^C0JOALD=IU1;PS z=OLZw(=Dty-q~*(TpqQZDn|Mnl-YvbB>TTK?qrrLBj)(f(d7Tu-f?s6YEoV~ZuX2t;_m*Z{d`1- zA$jqe3CvL z%hw3 z;~agTHO8OGUPA7xl#?C+4|CEImisK+TN|Zh;J7z@5x6^t6Jr>BCa+e!kKQxV`xrP% zDS7kS&cS8Zie6A^oz$H&0{-xwRXq}S_&jIsorCceZvEDL-nam+EPpY3!{fjxp}pg@ zhkrKz3Gk%%!vbF$pxIz07Sm6Mv*&>(3X2xu3z~jV-aT_+I6*rLUa6f0<2ROha8?ds z9(KVeJK?8tW@GMo6=wnO&7dDzZk>*}^M>cb~O;8N!4_#UCfasDCm zIkTaUNzAX~KyC^5w6Ag;u$Rc3R?sh@k07!!_ADuO&Sbut=~vub8|Sa@)2~|f#(z8C z&9VL3BK;cg-+8sNeFA;b=2E$Rj`Yzn>$;{EWQnYU$x3o}0ru)9=KK4M$ual3rX1`A zHs&F%W?{I3uM<5=>S$*=vj#Sq^;W}k#D}loPE{&*{2%@|X-#yMls#j-sVByGMIMUY z^U@0R9^eT7IXBT24#RhaeuQ4+tN>$ESfdqap)fg~rx#&Z=o=e@pX4mjo> zAGq%*f8M}7=XH`Vxc6LT;(oj)tCq-`rT!S~7DepmH99-a-X7e>(wUrDCUIuj06!Nx zGwDC}%(4s}r<5_*s&Z!eT-=$ZS@%+BmfBs21Dy3k|H2#UjC&Ab1M;!Q%z$?4jC&Au z+=DQAkI33d^xwFH7bj!)Q&;$3@5UREAJ@9G@P zT|jlsP2~-+imo#n9wK~bS>=VA$O3$a;R{LpZ_EBbVctx8JaDu=+NLiI+uhhi(H))e zq{UZc_1XEOHaNBW7`i|$HmLsfkAc%#WSRT8#cP9R(b-hfVZyhDa4YHvKJ!z?7S0BN%4v9*<`yAnaeqH82iLF{XL<6c3}4PxTZUf z_9r}tTwQ=}iN8qUb0(kcgw{XXHpm#@2QsEhjYDjOmrDOUzpJM0ebbxjJ~162u+3Ge(x#teZH?jQxkxBK9M)ql<%T^T_f@dn5*zN zv%*UJG9|H_b*sIZ%)m!Y9rQkie7WoTATpj<0B6LWCBDYYeWutn-OHg<$_j4Tdxh(M zrS6ly`B>Kj8>SGR|D%sdr3 z$M3n+KUaA;bQ`|~5C3_^LpO4s3BB2^ALZhLY2JXS@ZL( zp7!dk9W$yLLo-gbhuYVk4ppf`wX?%qHPJ6;i1>K%=ln$7cNhI|a|R7RZcA9o`QNF= zP**W;gWC1;zar+N7#uAIN5$YMed$Sw0nt``m8+zxp(?*v9TEq(ZjZW8hc~XRZDw_c zv#KFfG2`t}*W%OKd7{AETUFiBy|zA-F7TFoK0L>69bOl>axxEHd)q_)dO5R9Xv?eT zEfYBDT6!py*Ko2A{=rjK9o>nIq3#*&p@Y@n>JPKLW}oYqZ2eT9Z9i1^sk}ExEgJzX z>2|l1(30y1YQ_`;KYi_|w};w*Dee20Z0%^{9JQz(d=(q*!*45RfR}ZR8>CGg*9rc> zj_5cNW>AMYZ#$)rqheS!<7nVL*1?c-je#fN6wj>A?>^KJ2dD0PCVnTiIor2(IDu7h z>fhH3Jg@E(dUJroK=I*_XXeR1dTXt&>PTDC7%D$`kTc|y8od?Ke){{>`Fpp3Kgw0 za36B@fcp+`4}Ah3II(l-sWRuFU;j)RkoqU9I}&I9^wRY!3>+RB*EjZ?7<+TMnz5O_ zX?P4o@s&$nr$^0LYvz4#WM937l<^JuPJX%ehQgjFW%3Sd_XqNzVQ1i5dYQEL>iqH<4I4<)%CyD6 zY1yKe$!)9xPGeu0SU7c?^)iLfb`aV&;rOef{qbyymdX8Tw9j4b(K7Bbqs-Q=`Ds3?6GScjK z1!FZ~wQN)~(i*Ds?WY?y2tJ)>(kYja2hS^6Oh2I^+DveI9@pWu!Hb;0Gs_&TuIek} zF44;*G*Yih@T9{@XM84{oX6A*&&==S+nBQ=+RIaIdBwCf9XQZEK3Ec+&PUY@$=iFX zI=`}BTgzG*gLi9X5+2dZAZOzG%D9|q533o-`=+jRPkzxF#%}auZ=?2JG*8OdA2P}u z^yKrlw69Dtc$U6OncxPaOm}5|TZ8^?NAOZv9fi}nK3c}v(9j1b{iHr-f#Z81S|*Qi zoBc_zioz-Xrd|eFs?lgH-PEl!_|E0gHMBmT^|jVWe8nVuEjHHTkD}=-1%8o{{*|OYILigTnZ6{NzEt2B8t|K=={E>`w~_vvD1XQj z`1hOTqdesrfp4b&Et)=C;QxnFz9*V~rNF<>NFNjBC20cx8%DY(nm$$Fo9RD_rhiG` z-wSOfD#Clh!Sr(NK2&fz_n~H-46%0F3P-_Tpk3xqlaYbd@Kh;_j0!I0KGxbppi6o(|ud`cPU!dnm0+zw;!0 zD7yaXzOpS}|NOFTPe;qT?~3-f6&)uy<4~x4=D+&-n>Xm0KDf3!qh-_RZxH=T!)~)PUcJHB(Z>Z=SI?gI$aF%m>zp@7ph0@U5%(70_go%g1t^LaG<=)ziXxXmz zXj#uK{mLR6bC*T?;G7KzL%QoCuaSVo?@n- zjHcg5dchK-{6nMr;H)COcCnEjh^DU;_<2Tp!I-}C%LTrf{$4bFsld-Q%4fu^e}SK4 z!ilD53H%$3^v9z5)HMR%O#eo5U%Rsf{`E%rXQSy?3jFKTjO_`=x~;6}urF;4`KCj! zC%bjJ%9neBRUKWG=&P(zCu_-^hn8%89^_6XGMRahHEhs1^@>iS!>?%!6*Z!-Z`E}z zXw`397@a@gke`Oi$!}e2&;|AUhpIb@X3}1Jd#J0syRW?*SF~Phoj=rC*b*vl3HJ4~ z$m8i~OREd{eJyeIh&`q8n(`pL=3rgO*VY;;^mO&rYb*40R5*R1uGZGLdM)##?P=>+ z)&=^yw%!CUOR74Gwl;?HW}-`0#fRNI*4xc%h{pzv4 zeTR)TOTD&w_8)7}|7U_1)|@#Hp1J+%$(n44)+;($-Qgd~UaH|Rdl_A)mU(EI-LKxk z!=Yexv|ih3`Z+^yuc9GZ&z8}zp755~`8#aP-y!yHRndAmS4Hc!o@@x^u48=DqwN*3 zzjL;{LhpAjITVT=V_DLPUX-4@KQ9WU< z!2hzEQ6f0d_Wa3hZKoJfJd7>jbO_zBO<+!6T9sdg9^4;hPO1*GZ|{AHy?op_l2|Eaz<4pnc>uV3=%emgEx_f6AjqNMd?bw2ArbjHlQVcLCb zZI594k}CV7650Eh`4zGGlZ$bawBWdrVp~cXui2KG3a3})7orbKTG==wZ3r@wF-uzK*!Xg3_UU?k zBi(n1{b6H+UO!1MYqq7;AET$0pQ_GpJ*BtfGSXU4(@wRXRy$fRmwS@^U9}ERG1AJ1 zQEsi?pMp^aJjRTzOKZ1zq|t71g|?^D#zz`^+n=S5iOSz^-w1t=FzCBEs%MvyUOQY% ze>Sx$O7|tCcMdbsGoteGLDEyq^shwa;97w{RLy9!X*drTvKCsak!Q#sU*pLT{pv0l zqV+Y*UbiDSy+PAYd`nJid1Z;wyh7+bcP4rc^e**k6ZAZJ2T(W@y%C=1gC|N}vt7>v zpB?2h>d_~81JI!7b=vg2V7iAqyrg1I+8CeJs5cZF z<`QrQKMXF_^O`MsUO71P!CT5_pjR09>{N}snZRtUM>jZ8N<@UA)kTk+KYOg&_po3J`|i89eeYI*u0;z*I<0r#{8Yv^MVJNzf<50yrjML zFAblHnI|%7X+3W;-U{Uo`fn{SC2Z7_ymlEwG|$x&%@e&hw~@Y78S{58ny1M*_+ilK zOW9fOz{{E%64il!wha5z$4bWjzn5yZZPO0wCtV?3D}N|j{zs&{J~GpFAC7Xj@`t3C zeQ2boMAzo`1b)a!UldK>Ch*Pl-$&D{1pXPL{Jf}c{ItOTK*>n`d#QHrVfM33;J>eA z?EYKnSHw1Nravz5N#Fms(g$MG9}@VdKMiM{z&}O1{ouO={z;=hcSZYikH9z6%c45@ zT>`%wAM*X++$!+9l#Hx@#P|OufghxNXFUG%1U~7%7;KyuTYcfb+dLw-Xeee*pmG2f8kFE9iUIPq9ck9NnTqU zbf34iG2~nt)q#qDC$e=h>uB%EgQ3dPx@`yq|9&*xW(R&46cjDHL?imf({11orF46UizW)6~*{9L) zV^KTBhfCrA|4=e!fLoERxl52m(+zv$C|71tj0_L_jXMjJVcqAqBE#(H3E0RpJ;-sf zyU_<{>t8uvr%YQ_b$-{3`VFLMWpcAEgG&utBQRohiMl`QWsnc~X{QYP zC}rAW`r`SMUM48=6d9}O6qNA~iRujQKN@8gd-8)Za4KTpw8xZ*8CTu! zl#ETU$FH4D%wy-ul2D1(^n}=bFM2OyY%9FV)lcwu>uE|le zpML6fo!ZNO@@@75(R#N3iqh}P3&P%(0JdK523 zrz<;(8td8f#_tO~`}@`FZr9F1&3;O~F63lh1Nf|p;gT<5$tTzQZEd+e5K++h%ZX3r{lZn)TejjMmEq z2WeHJUm5#J=FmT#deDs6JSt|q-`Cf=*Q50cPx6j*X3Rd6dR^_D2`+(m&)^Mbb##n@ z*P`{(s_5q_Y%Hgu?N!Xs_q|>1@q1>+tI>MWo^R-3O(!+yqN}00BVjB1-XWav#_YA- zuSDymwWBBPMZdZ-+Fskq$__tzmvb#Nx-Qz!x|d_?q3<*@zQfVEXsh1Z;b(kpvX<(j zG#dD+QSTsn=NfcpXghaZv>sQz+hGJ`Sr9gWJqj)6M=M^DVh)~NHQt9Wl* zzcs(2x^V+@rtRZe>UI7U-0R7=f5+f+l&hFop5GQz&a>Aj2k-4WQ@TXC9KXR6vFW1o z_O&Z#Vg5t9{Gg9co6mrk?$P#sq~*a^UI+L2&b85UEidWid<|Rk%V$R6IhWuYtsz>@ z^Ao+CoxN9DdvqLah7UD!yg5JC%eBsgXLAm0wp-E2xmA^ZKBM*TM@IkFRpr}}duBN& z^0ccm27fOS9}GF-I90zvojw9negj{Wp zjw7em=%0+Y+K?B5=b_BovS_=uU3$5~lOnsL{VQU=6Bw5{-j*L2<%U#i`(|?-dBdKG z8;9owW4xSY6;?&zRRFIj1}`}~m5kLr@p|XGi_!mfAY*#sW!UdXZ~i_q>6Q5MZKNxt zZ@TWGIKB2aq`SVS$*^Zr-BG>y*8;x=eRoqloYw{Z^YmwPeEQD?KIzEFXH!3j>b3O( z|2ZRlL{vupMBr~X(%*}wWB1fDL*PsLx8mE~F7UT8t}XHDTLr!sJ^0)4 zGT0;VpQU_xeEH1+pY&tp@v`{|f&U#P$N_qXbg+Roi>iZI1RU zxU^nB=R0V~1 zZGko1R!G?jU5C`ldY;nD!Vfyo*|wOm)jb+5TRsE))`N#r#=KKj?$en% zNX{eCvYLK|jBag+;=vy?7lDWRl|6_(VMcV`;_&-mzq06IY4y>vvGeBG5G^aVvU29F z?|fG95Yva22cl&~X9|8!S@$=UeY@M^>G;t+^!s)2-u8H2v7YqKweZGUvVpcTV>b;7P>O`Ld&ss0LN

{Y&)LY^zqtQ|8lpqWoC4|xm&^eOz_T~ z!E)+rXGEM}Yf8Bx&?@O%mxW5}kW4(ve<+&yKFvKQSy5q@9%@!%t|bCO^gDQkXze7c+T<|WMC!uUD5hjfMX9}2$2);#??9j)NqHzP*R;5}*4baX>q*VOrc zp3cj#hja)p)AoOr29KnC*V-ym$JWXRa`f^d1I#^mxw!|Yyff&@@9N&WA#cJYZy$fZ z!Du7)oZH>S+Ce79)G^DqTo-LalLw38#jIJgy#HjSsefyI%30K}JZm#eTbnw3a}0TZ zzNW8Fbqn=6#4gkN&{yV5}DYd%z85zj<@4+fhV5_SbbNpU1wY zZLPLn(faNAT1@%1=wih&@`>^?PVOm0;a?jq-^yGSqTk4whKU!M+luNK_}4&JJLBo( zPdU)f)$q_A@pRQex9I^4`j{d42$w&Ip-)py%$@_AKWDdn*_^|e`_Pq#LVni1CR62H9eKTXT|I9M z+Cz)Mh45v8SC72z56?3-ik}2{VG%N@jXnr|D&Ti{^tX#NKWQfXgN>X8aGqAYjyD0P zbiV&t{q>}n{f(E0|5JFj%ZO#ac58=cS!1Yf9cLG(bsFe~pCr=%V)}Ye>uX|LCH-Ga z|GP;mK55_q{1o$UV-5INiry}7Ef|--h&LAUwNF&`$=i-Jby8b;`__(hU{)f3W9z%A zFXLT8eZi;Hv+-5~+lSva!5nYD`{vxOGVxcu4!Ec4LkDLZ`uFoQR@wIvZ*XF12)jzC zZOvisuNZSa#Z}VXuHC!Uc&6wTEo01nB)7@BY~#*V8+WcMkVARb;un(nEs}bz<#9Tk zZFH0#obX%oeTmRR(N=8Ljo9Vjsm`Uwcz{va5Z`uSL=?v7(Nqz0V)i2uyIP$7FwE?K zJl%X=9mmiZ{=hm9G(`D>eJf|jRqP2*qD#$uKcwkW^gWR~;j!)VhEY2kvq4{u_QN3u zI5yw<_^^YTciwX0JthojFAhe)ro*T^B|0qcHyTd%!3c8hl{-_n1IUQkelZ^Jqlb9hd}>V<*N3{>(an$(IuFR|DK>OZ^KFrbm|*hOB&V@H)KZVE zp-<*`Ti3!<(fe|z%N@9={#lGozw$Kes* zullCAe5Dq<7L&opS~TB>y~vxl*mq`!uj8E=@=TBCqrda{_eDS5Rg6~+q4^eK6e94!${QLNd&zB7H z#gD~yev5yqk9hx?!1wojZIVyA_#aupS9}_b;VV83_K`1V{dZglm7R#Xn5keW$O0xH4w^!&Ss;*-zV>X;1pN40_}(@a%`4Qo{xG-Rw7d z%68tY%6rQ2!&4SDNt}p6c}G8%chUIE%)4Sm+}nQfv9ODnSLWMh@t0uwV=m)8arA9- zndav`PaW3xwqNRMe%_@%ed~MA{UUzMrz+kz)9}N5-ot;u9OKWRLj|XiE#jNTMmam>6R4L+ef-jF9LjuKi2o?|pZP-yq&0%X{5!@$-ql z2$}b|H@k6uK*wrtb`$XfDr+M#3Ez|amEzB@7C7#a#7?KZhoFxS6O?`8yIt1O_9?_5 z!smz!*dAy|{F2BU_ngSMIpQPtokyXEPX;7wIZNrCORT&)d?fe`e}zuXU*XD2_$$Q!0_~b_l0P-RIDdr``rahV_+tGPrt;oB zFl|Ynzrx^^u5hOKi@MkFzjLqnE0lFMUh`L&(Q>gS6}?1!`t`6D>tx*cT@;^MW}H#s z6F2AacKQIZB*!Up!}xnB!C&F8Xj}6E#n^@K$XoSqEK;{iTk=+2{KeT)l_TZ&#dm_s zJp8t-QZL((Hn4QtRq&Ukl$Rn!gm{S%|+B{Cm7AJXicX{6YFBaW2GP z3T2P*I}Gm`FZDFIBED6KGa+@~GV983%D>5P8$K~whUUx(faf6N?gGXg z_6B*Zh@E~`V%Kq+nLHvaXKgYLq2XV*pg_Z=C_^W_ltQZP^dSR+^c4!{#TuWq_Mv|(pw|GRyvta@tq_* zUF5AcPez)|(;i@mel zT8RTfy0WQk{?S23%us&oJZG%t2Vh0Ey8T_~ATVvEq3cc5e1AO-o&s;?J0(&kI#$Nn6_-*uUe zOl9r|;y>^g`0<#6AK!fZ8{r!!)`!R%@R7**)(fe7cUI%q?G07szO}N#^c5w3jl@4x z6}nCpWzBM;>quD_<3G#z#V={?mRoVZGq!tonzCXkGF#Hl;;U^T|BGH&CcbU9sNN2A zp$oGZ{{U#n@N?wG&rz}+e0+}XXF~@RA9csTBSGHL{d8O(hCbjEvv;EZm1+8)o?e%( z>wgo`|8}7Nm81XJ!KVlPPuh>A3)4rFNf!n9Qt3|@%2i*SE(*|ngf0@m?RLyZaBa%R&fv7~x0ghgD@`>E)$N_= za3%1q@%Q4p37*nP+@j7cf4ul z1HUl%6yu5IM*(6AcQUSHjO#f3=mh-ey*-Nezy9lDjik%^yBdA1mOZJqHmM(&mP>r$ zu|DDP_L1l?)Y0l^tKJmi4t36j_lxagxvQyVh%#IA_4)P3w*8g(!FSHA5B+Duq0j~F zWI^UJXyuNQ*f^2XtdniGuHy~-6U53OmSY0`btdB{Zz1%)%0WD0^vodZ-C6DGk@PIm z9g0PZO`N7qY_k!I82;QQb}u)3dy^*!FZlHhIxp}HQ059B5MJ>!eBlZWJj+<|leAA{ z@OgNN4O!TUyb+lq`5yAkyiFFy1)g+XBk`n@HM#Km(Haj>I+zlS+6F+?RCtaCNa3JZc7^|%L7ST`0 zOOZYZZt>MD`}Q!t;v$hjzRXBupJOUMRMm?;E%>!|z{jM%j75XZ*8<(he)}WpJcK^t zVgF$V?>05q6Hs(Mtk=OOyYz>?ew?LlzsMLotUcjl0VQRQXEgLce}taxYD! z*4-p6w#NjkqScdlXcEgqbeIja-+54ekJEmyW7LjVgeUE#QS-+ z1{*PvKPy(X&_^EhCALGM5oqYlarOfeA7|OOA`<@vUaI&5(4|FHMx9{pWc&P^E;%jLIZf}?;{^2Ad5^)jZHw?r_B}l|Ky3dd z{z$A}GWJDZaIYQp|J8r5UCa3EQuP?2=LYXe{swgS{FG4=zdhso=v0wCD!$KkKWfKm zZ!Yt=LYF(HzqF~q zb7=n3#%uo4(jJWQmqz>~$5h>4ngf2S`%ANH{?h7*-6{Uk#J-VQiGF@Si&x$8wI_sc zW@QHO2Pb`#{{IOdp|(!lM_Zjuc{P_dk0WD*kG-~u@_Vu}-Mx{F$N?o2pS774I~HmF z`aIX;7n1!=tiL|fFQmk3T|j)0X7rK%J_;2_tdGL@Sgqo7RO;&f`lt{7O`F#7d!l5*3ai%f>&Z)Ew+^oT5f;9#Ot4S*ubkTMaQei0Kuy= zRmZEyLEHXa$(!+Sd;(vEAEW!6#Rl*Pc)O2#vO^8riSBT}fxDbf@bOQ7``396nSm_m z_=aMy3}Sn@k1u+T^W!Z8z1rT9@0Y2URvxK27J=XJdkVi3w<5>JDNPN~U8cheuK^Bw90MiZxPt+KuaA9s$a zEy=_?C{oaW&t)U?#$`1H@bTYc`1dE)XPu8gwdk1?%8 z8HFCXxXuC{UWIN1KY0!t=~(Pw`1Nf12s*CCCU_j$)by>P66ZztQ!jcB{-L#4nE~r^ zpBbm;2Ie4e(53ls{qSDw=fL`dc9ya^zX#haHb-Iw+YZ{w^WK?U!aDrwbIJRswCqrb z!C^uEDZvXh529aT<2|z*`SamM=;#9Q=-Wf1Y?WvKnB^-Nw7*WuejK<^Q)(I0EwL@6 zobY|gFSU&AE|B@By{Yu4lQzv5?USW{Vu#p2&TGp;KX?V*;2qjvUD~o#uZ*MYuf(RJ z++UEj$9L&G^u989=q`BfhmqMogPvsW#i#Kh%9#IGkpAWZ&Mj{IHFkA;cxDrO65UwN zF+ODPBQZ_{$K}j*9{h1Ju|C&ak(ZZqL6kb9-+ZB6v7EDomMGKY8*!Y&isP|RMh!5Y%gIt>b=A-mo zvD|q6Ffq2VLyv4wM%pXInF-c_rq#+h{iy&Ta!ve;CsY7@W1weQxt4Esnd)(_+SWmOYB}QQ8#Qz6#rm zXInOM{RS<@k)kwTYll|N7`l@$D)37sY%gG+uv*FVNICJZ>>S(_8~=JcusfFv?;73h zVjX?J@478_$v*FQakgyodNjWW=vk$bXQ7Oh`R1J0 zsxsetJnM7JxtuA<`L5VZpPUbhj4yAxiJBy%tJEF-RVAi*p@)_-o!#hOQm!|f&xw}Y5XQm#2BUbn^AJX9^#j@u;24+<_)>_TaR^{${2LJlzug7Qwp(@COU&+KVs^*wyY!gd#ByXU2#zn>*S3qfjct24F%zZj z0_=fbwCxmQent1FeRH@s0$&rErv1KWxc9Hv%ufraR-o7hVCV8O592t<8hyVavSsk{Y7JlP*+>)b(RrxB@U%B zs4V{)=$U)YT5Khut%6h2?qXllYh&$7eH*br>}gqP3)F<-&EPtL`wzFQOzW`|`%;Vb zWJjlFT}a$Uj}e#f9@;M_Z5B3e5BHumo=;rDOZ7!;LYo%q`kvT@9s^HONA7e?Ax{5irU?IIdo>= zOU|PMgR-y}kJIjRbTPlV#NuSE-DTPske%Q_`cueXIb(41$6mInkiYU_{N`^IXA;;p zwe-oVT5N^(Hu2j{8TmaG`=p!S3lv-NoUeG2pC6;l4q8`tf&NEM=ffx z(6bF1nm5C?(@mP4|8r99JC#($+cML>Gl_5A=2bnmvdKLXfABu=QA>>a-J9Q>Uw0Y0 z#v~=ImT@j$wrReSqI%gtaEAnapLp8X*R*~}TLFtLQ5y^Wnyij3-o*NQp0OQ;?#I(6 zXZV^9ZsGqCv>|-xdFlx+a=6!LTRDDbkoMb1Ji5Masg-l62snO#IP?h>Px#4=klm! zTz7%ChZ^heAn!Tm`+Ll3?EFm$5jVCEC!_s>lY`gjIC%s(GPgPKE8EI(|4(pY!qoJA z#wj|s>@y@jR409Hv5bq0`LxIwlhixMnB1%47s!8@aXo5|D~1-|?T%O=3izkIb=6 zRlEv(>J#Qj?t5r+$(Uo;oaa5}l(mWeu%ej0g0H@{cROoO#v#01&Lj1;m++~z_cZYT zZ`PjlL)r>h)I^WQOZ2rTcQhX1cL2K@I`7Y7bzT*I(7Q1QzIFrr?Rxm!b==(`R%x~# zt2A~UWL#n$5GPDq2Ux*Yc#L&WMjw7b-(?;BjxV;-xL6#i{bF#e-GGf|K}-w|Xiz~` zn)(CvHMt`8np*5Nu`x8nu8xlK?_+3iw<#`$M(-Dkp^+&zVu_)F{v~l9`j18K`vvIN zF=R+WnQjZVXD$q<6C`ryd41uOeG(}yNMK(CO#B>cG9Co)E1-bJp7Uh(wl z3&b;zj}wwGi8n*s3l!aT2|VDG)$VVEu8*>pu7!6VwJR_0RV#NiAQP%miGhKAC>`7G zgRC2|Qy_P5wxx*Njfb1a{H{_Hi>F}s$YF1Q%xIcb;_T^PPR6P#)jNKt;Z@>djvy}P zNa$}AbT}G%90Oe@A#d4RV^=ypS;3a3$HlaX?P+8-ya0WNxR`TNlC-#($;flLKPI|{ z=iAe?*qHNONnXcI#O8sAN?c62o4OsH%VX8!V*Uwwweuci!fKtjhryS`wv#!U%bh2O z_0HEl=*OdtxQY*M*6lF?%jh_}>G`X5nwL7Wwx~xOQ}wu*WvmUc!RNCUfl|a z$T59>M*F+Sm-&&lBNk#a(r+{NrTo_A-8g?I9}>k=Wv7JObB@efe$R!gpB*7S@54I+xZvXSHO9=O4-rFZi7`yr6}@ zLsnujPU@~5Dlz8u*s&5ne6H&nCUSxPOz^+JT_X*Cn7gj%@MFPe z8~M+*Wrv@qE$(%`nnt@9zldUTlKO4E4s=j7dX?4WjUAl;&!eFgq|-Ccis zxGZ1zYV%XmUg#WNcCGy1`R)VS9p4Y%d*CMdE$ii|7AtwIpZ#tAgc-dxI|p$te5=yC z>*q?-uDg}6=m*D`$BVVZFy}6br%`DN9>7m6@?^n4b$t%^-6}qbj3_2nQ=UDs(xpy* z_v0;UxN~^(wSj3iuX5%)^MkiZjNT;woPo+$HxE}<9DQ0@FZ-ZQ#wq(2({>t}$A0da zENF9V_a@fIH|bySx3rb$z}_>&yXF4gnu?!rCd~g`dpI{lFP;1(bbd_f31Zvqxe+>- zyU5mx`8{{4;q$B1Z~*=weGy$X^5MV|IUkep2>+}&$e2zn&TN?q?XIF-^FtRpOsfCHG4-pM3%D7(UN_$h}1JW6cvqr}8MN=)pQ zgzP2SlY4q?lX~$KXP_x=R9-5lXmU=F!KcVtq&qA0+tDJe0H8e z_tL(!-GY6;#b`e;?9%PqhAXo-(7uheAahjOPc7jtO<s5HB%)w`!W7naJg$-IvHfZq< zXt5KTaaiNyls{*{i#>b)D80Al1$feVWQ*`LbN*aHbCegp`21f6&ppYQ-25NpD|hUn z&HUwjXDQ90v*7q+aQ-~={~UbHHwxZ$g|a?pl(NF#tnl;OL=G6hq zL;dF%%Xb+|EZ_PuWn%f(Zut%0`pxdsk@qKelhsdubM&kbK&edp#DFS9$ zU`yHknek<>ld@k^yt1!BzPoh;<#RoK;Nq>j|JyP_!9}sbbAU3XMauf! z=#DiPBeNgjZwUEkSwpmcv)m^r{SRe5Yifn`Pi*?7>y-5c{1#XeJ2loe-m~gA+FgK% z)xvq^0Pryoyr3^`#I}7v^_uZI9X50``tTgON&mf7UBA86HQ-M8_O1l>h~UAdPP%IH zEzB=xG!NhH3KI-_-y=VFH66WMSw98boE3ZDRL-%Kv6ZhuoATBv5>b2+?q10`eGBtH zZv7M6WRKzmcEFPL^RrV`Unh1JsblJRuaOtAD4A~boK9?cor%~DhjWfbJaQ9W;2Ndr z*cRC%=w~XWynXO6rgH3M@=j9utaD}T81@Oscqu36%8X6k(mBwfWS`;YD?0mR=3d4; z`C8(tGqx1^FYm9;&Y|Cw3w)yQM?(CT@}pzQ%l<;rh8bxl-U`k}R%rJ+7?%hA#LZnY z;=eR`(BT5Ky9*s|7dqT7bhusUa2|9xn?;#Dk+!j&<@av9fjPaNxxEfr$DU`^_X{3F zzuSd=7h6|k>URy9nqbIqAAI2ApAva`1)<#XEo+`Fz@Y)6hI zDn5}|lu5!^-eWrfpLW;eCcNA6Pj<&0<*wxdN8a(*d$^op%6j^*{= zwb-a^L+zEacSA0&2yk{HbvG)m}KT7Osp$q9lN`d}% zal$~_XHQUj4>~t*Ib@Hk^3Fo)$+(jXu@4?SkcrN`KA67D*G>90FIVoceaW@$j+Z?< zUfVLFNA?e8>>pMQwwyUZpK57ae;=*i-+y*VWhKPl-cJV)b~PV zTgv9gwt0=wri`}--MrKK?Y^;gf>-_Rh(6YxF=Gw9v?y+@LRW!l*Ade-xiXMWyxdg9 zEAf2ar2i%yc~4OR40#6|M9=R^tk8T$x8Y|&{7({dw@31lse2w6zhd6XS&OZ_Ev;C^ zopkQ}RAL*D`6_z3V#g}ZGxv{$hOk*kTF%Qa?0~+S62V70_`p|0p6#oy*AL>0sZ#TG zo0unWeH=;3YzJ@yW0Y4Nz)b*dI&h)G_4j~>^snQGowVuDkEB&X6B1{5^=R%2NILD6 zl3oVx(sg=XznVKpo}_tmgV-|alDLnqx-!!hSEkrfcaKz7us`;u4&W?xpl*v5{Y(1L zGD7cz9k@>VumHF+|1#cO(qz1n_MnWHJ21!KIa+_|gY>O{KFIi`%$=?r=f4l zo0V+lIfeb{E3*HZi0u;o=J+o0l;Lk~_?z(8Qutdb{LK!`H|_E^VxoUG^=grQd8D}w zK2{1Jb6W;_0}+ewL(X6w&E%r|Mlif!ArsnmoE#PI#|~-zuW`%KKL|i->Tn-koVHZ24`jd-O*bUp&t?Y zAaUc_Z|)0lHyS>)PwqtCi+r%Vp3b+cc4Ad4-NFy-n>I8#kuM3Gu4)oKrKl6aR>t_x zZ}-;RM}PYFL*_P(e%uSayjQcxr%rG+{Wo^;X-^cqQVUN11^w_gZ+j;=H|-6t07rO+ zynPY==c3(4(u7B>vYPiill?x@Jn+-W@Y4bC*IMS;$3EL>@ZemHx7xe2{uo)|rH}W( zv)-nT%uDQi=ETe={Hkw0MQ8f|YwnWihdFn9_qa{+t28_U?@XIf8uFC!0KAsr^@7jLw zd|Qc|G~`Dc>|a%0_hBa}$4&s;Ch3z6I0End`-^<{@Lo~iy$HQ@#=uJh-d~yfz_eus z4;Op!ME{MPsT4aIxB`g{~Mj>HrAQES(bk0Yzy#N zk;)p0Y_WarTdQBvek5CbaSTR}x1r1}PRSKcPJuV5;Bygqhv9Q*)#TQjn- z$Rxrv;cY*8a;JgIf zPlE=E(dDM%w>I((+{!bN)p7VzQn8PxaX+2sGI)Lj?~Q~;t{|>Fj@Twc=FACLGG}@y z)rY=#9r`i*r0A47efoRo^T?xy%-*%^^RzXOHs{jz!}KwSF+7Am`QT&pUH1j@pGMGy z&v4E!8^4Ii`AO#@dY+$U=nGXhqqEV6y`*S#(6ieE?^|le1S7UrV3cnYY#7AL*|g-sD?7d*?LHjJ3xp;Y^pZ zxF}nhUB*7@bIuM4-}0??9w@|4*!-2Qdb#G}T7Y-7rn#rR_nmjsM zzx6tMpMTO!`pJYxe-c~-zbAm{a^SPmIWrXE?6+_A%#g@*E^IvF({hPicXw`WnmQ+( z=_I~rE_2ympv$Gn$Jo!=HcNzow;9a5*VZ8Cn0K=J}bI=pxgRQHlS^%N-P7o8wZ#LKDdS+h>0fIT(5uefRun`yr^YynS@*ps25LhQ z-Bw@5`dSE#4gEPiF#bTKz6Cv2;z~zioAPj9&da-Y5EtUAd>%RNgxFL2%G+k;?hw1k z%j9LW+C2=c70vKNKm&S=h^8ttkvrox7vp0Lt(=9GF=yk4c!NDmAM+QL#Vhfl%k?iX z<|=G0!q4Vovl5=UOZ*9QMU?ys-LUicP0H9yf%5P zoJm=L>^GfjZTyxv-!lA(5@&F0oU%dug!=yscN4krHXL>+6|yd(VN-lIZX&TaVfNo0 zGH#)pMLZ9VCdzz{ne(yWEV=f+a_trYe!Kv4F^P_)Ao`hUA@g~XnA?-}N zj@Yn zZu?-}!F8{^+PRLL9dGsju=BZ}JmY^$9klO5)wOT!%F@tB>((Cp!E@U>fAGp%o!1Zi zuyd_CWZyP*aNrenU|=A6H!!H3e&h?GS7>J~?X0Dp_Y=tXt`6LH*Zk5@4egYWm*Luh z&jGK2`^w1$g8eH{qbNHJm<`uqb`BX?=s|s`peboZSG~UOM4v-n(dHNB6+5+wPeAwZ z;jzDg4kQ+(V14z>qtLvZgBW*1m*j3-%)ZH5Ne^V(?p~Jze}Fa<2-Q$+wCh@9#OMD{B+G&5TTVikCjyiKRHc z$TK{hR`UBp_>jaB%5TnOY>?P^`CV?dyAnQr5A7c3x?MJX!L&zf z17gdiIjqlH%`rY;wsnNIZZ*#mOCQ@cBW6r~Z+0mg4w!x4ny74$I5D|Cm3MW1Gt7|5 zDuV85HKIeW)DT)5$4d2{Yn2A!wT|F`L*n=hd^317Na)&sw1@on&>jIqIm ziq4nD=xar(yNhp&M~Xd2#@vtgWZ#uMjhZ8CqS(h-Bb=qrnc9Rt zKf%M=E{AqUvrnilxBt+Ip7;IW0(*>#ybY@?A3|?=<_9LsSKr`&zQ=d0HR-ogDBfa$jHc3jW4lu8wqX44Qp) zv_Z2K%-f>7+oQ&r1MOOVDDi*A(nrDn1o4eWs>r`ET?vbB`dN&5RE#mNWX#rBUt9e^ z$me|UvgmJ!MSr{_vA!1lm2=&+Jw6I4uhCYs8U+jMXyY7?7jzOohE_uFE zcX}auDg3^=mbkmsYj5*qVK4ZaT6yNM%io4bot zL`=#8IU5KMvi8*Sqe|V_Mq&WSpIWTeG&tF7|Hz&wFBbM)^A?IdJGsLR?>H|o zFWsFN{0Lubkvm+^|K;%AAMzd9qysx)PhRF%P8;?a z;eF^-x;~Vqri^_FnGT=DcHz4ob?8Ll)i=T$&tl^WW|Dt(SZI>i!Ge5G8m?@&_HtiB z@g(udzjy^}VifohKQa6A5sxyPxKH#r_VmqP(QUh;S8U+il+eZ+#v#vI@dQEmpU`=H zBr80u{ATcW_&x%_;!5wcDzo76n(^JIhC1{ zPDduZ!EbAB#n=t3xryy8*4!-HnZW#Wz#QLLlFyuHGxN0fCG&jUv_Yks`ubT@UpLN7 z^~9Q=z0R0tvmOgetQWX@fO!heLm$OWT*o-u@C$vwbGrTXL+DoG7WhBu!^jS64mq|t z6l<~P;L(%`_t4&@@Dnn}ZI9iICCltJ$8En-?RigpA) zeT_|nXT=(un|(N1&e~#*t*^EOKe5`1HP3IxnCFMtU3xC1w~t1~i#-1;=BNSxR%q~( zVrOMvZZ+xVFm!X+7F#RhdiOHnUv7@;uq}qVm2o}z`?yJwA|>xgk;Jhi21>}Crc5<- zbB!7;hQ5;8t>Y!N_v5Svx*~F6d^NU9_(93AO&PhB$$AboT^BTY(aOn&?Dg0wcq;MV zOX0l@=FE3$R(xfs>_NhlG-8}G=isM6w;*OAsnszhPsS_u$$b2Va>is| zVNUJb{{8dfu^H_H7OnT>)Vlom4bX>VEv#owGLQS{)81w)wn+4^p=+M@JZ0%CGLIm+ zeFP^G=hF&aerw{y%bGY0UpNe3=mm4u#DBtP`Uz3DmtwNpIkLrhnvJr z#p=W3irt6TrA)ANjw`LUiR%?Rj_8O9Gwx`bPX1e0Hu(b_?(kjYsaZ>&nhJEYwd9Ez zM8269$86j9OYW6&&RtDcX8#b_%mt?QacJPeqQPSLSgiSaOdmjBG^m|4VsO(|jMR0R zv}5Fb=iF&9>deC4&l!Bn-oMwj1}+8fdoQsDw2xx+FEPeZ{rk=S`fpTd|(oT`qz&0G21w;wegEA_Cwn`tNBx8wjCc9?F-M1 zMGHB|-y@lGUt{yZ-!6jTk&AJ0r#ZGDzQS}p7B4+8m1{Z9f*+k7?eN1Z@H?d9D>QPN z9O7O+_HE%8Eer5%0yjAmvH9Wi^{uSky`wnqmYu$_uXwvw?qjzY);bD5G_sU-*w_=V zqTSQvhm!M>*VEo%&Ps|cr5RrHXU=)6#6oV7np+n19U zN_>A}cM*9<_?i#C=C#Q@#Y1DuJf5M-gy*0=;b{u?#Mk-VL*Ge(Ia%af&T{8zs&bQ- zMQo(Wi@uLP;&SKK!q}qc)|Pbnb8BHucArnx*IiQJX2umPVq7;crUwfB0`I?eOpWA^ zyIgq38q;9Ll&mRXS#KhjxBR?!++{1g*UMfbahI~*HnG=iVz1f6o@T{e%6j{(^z!m+ zd)t!4e&#dfidgdKc2lm1_KA9<;&W%2?Ka!w(H~`!V+h&{5+5zJ=RuzHnew*GwK;~o zZO!$w7}Iqz#k3OfLpA@`HKD9#DkoEhE zw%^b4TVk9qe8s`LPTIIQ)>*^XpIJ^mXL!Grw?pxZJf0f8lNr4eulNb}P&=m3j=OJb{0B0|2ycmC`cQH9P@Mjj|&lEV9 z;?FF_pIM5(vJ`)1G5$({b0Yl}oBqvG{G0asUiQJc_%lbYNY?lBZ?m7vKIn;xH~IBA zgPuwy-UPe7$X?@tvCLxxAMpu$oEf@)c+T07ao)KxL*{TCJE4rfTmR%d_&)5&`)%>; z%i@$_r-3oD$BZicM(5c>C7%6Zu1lV0YIBX}_l)=XEp7hVY*S!Y!#=x4_Fm-3qs}$- zS<_IQrp(a#dlNIWiJ^kGCS3cKuZqkm^5CNK9{cr?CgaS)jnEH#YJ$gM@VYt7McrxK z8_T^mU})USyh+Yn?0VfeKQWqn_@T{vSKc(8dn?U*Be^H%S2mjX@_;Yjdgu{+c})Lv z75J(${m&i~KULtzBXL5Ukv*S)t%NxSZzc{AoK}HT4>%Q^EeB_5CeB0#6P$USX}x^C z9usF(;7o834XY!-s?CY*SaK6Aq(+Uz+F*Y@@`yX{Csz_{L;n%_Zo}{1DDj74qviY4 z#2*S>{FIthQy7aLv!}!OikoOBPjI=w#HHjcZ9zV<)3=X3?6acpXjploV0A?d`d$tU zWIS_Zt-;eii?Pg!fw!Efv78%w@mib1ScuJ_CYrISlkhn5AtE}m=bTaw9F3$Z#T-sQ-BQ(i-RP27qoLj=CXUoS- z{rFxvQ^`wzNKs?A#Tp{7N3x#lx#n@~LvuVKZOpC)4|&DCql}!y#a)pdGN!)TIGS&q z`CLfiV^~o9rBe!Yfej0U06MmVBwreEsWbfP>(!SIr z6WOY-TzN8=r>QL@`mMyB`p})l*HFzKpTpks-zm>bFxSK;UG6Z>ifp(79<>l#2Ks@> zu&LA{Xc@?PXLwmq8LIof#m^1jH~p|lhF!zlPv=uF=1JxjM6PWm-ew7V$#`_OrR*oo z=xwWzT~C(^pHk{ll;qlEY=lyiFamED-Wc3n6mI{Sk*{DhIBPq41+=QvefJ?~q?GeL zPGTA1odXzq3h(*Qe^PY)CnXTawN`BVr?)D8+7CDiy#TkaG&VJvv+E5Bf*s6VllLx`;$id;1e8ErVSS|T>}Bw}y-cgIzF zN}n;#@=*Wo*VIT7-x@Kvb*V+^<{{*$#(v7g z;3hLJsKOoI%s5YC-w)PuE{gemlArMfb6)aJRd+6~#K1>fmwHy2$QRQdex^J2_(-<+ z8D}$>SDL!ojveBOLi6+PH_7obC>t@ zGjZ65gl130_0a4BbgR?(#Oxk3X!bNY-26g&Hrd~&pL1~fPhX>iZw5an@xwHuhgi07 z!MA0fPBZP(mWtR`>HBj1or&grg%4SL0DEc^JTy;YqECK}-&Mp%Gp6DnROz-(4ZpAC^@C>X zB&V*xMf7YrPwfQ$o)s7BPeO<5fjfF_-)BX5@1E)l^;HA#`;gm0+8NBdLC)uN$HnPq zw>C6LJe=5@en-7A;YE5}wUat7Irs_on6h3fvfj_#d4+-tU;{6$6yHJ1AHhW;xZwNK z$a*>8t%Uw4OtN7NzwP{PKi71AH@cs{4OrOu-GjhX_??AAeXOj}oxIbwde3dul1ml; z80RK+e)ns9c1z*guaS$zCwX6pS#Lw8@W5Y@E5n`vN|(N_k!LL)P&0zOFNJc3EGbY( zyPhp-osYaP=dlMh7GBA9N8ML5lnrP3FLJ;W1C#^r_OG6K^Hs}q-9gqL_Po5{_3VY5 zTMAwe{|8r+L(Vm*usc>S8@OxziH#yw&w>9{UjtPa>UXTdr#XPU)7VPeM^E8=ht{!D zcse+gyjo)4dWYwvZtM$;FX9xwI2gV-1l} z@Py#a#-5mL#@;}?Cv=-Y)cM{2xW?G);|-`X$t zX7%is^V!R~md%33XF}^Up!xgQFYjf)yyr32g1%qcd3rB+!Z$CMe{VI%Y?mV)BYJE_ z*0??7BKstn;9J)GnXAlEMb_HO`u!)aOOEg8{N94A?|xv?B{Wrle5w16B-RSu#!g2b z8y)%Z8(DIiecrA3jl@=Af6q>Tl8ZXld&S6!pX0swpA_onIumB-{x;5=u4!0Bt@=gC zzokmNe&%vM@=pR{Z!ydlJvREzcoGmV;lxw`D)GY(l@}ezZ+!GIO`vgtAPVP!OHz z)5r-DgXpIH7Urdo!R-H(vqtxwN!(j9ZEivb+rqwns#^z3svVD=PWEC;_L8|KQENy0 z$?iO_BQ$ZC&>V7rrDtuU50N`gb?aG4X3mGCKpyxNxx?Ou$R5=Wclc%AYa@@WE_XUM)T=!%$r7>>}$l6N7I4C4%*j-2b*8_esBqW>$9YnMVCo0&%mo9t0hn1fXB`} zNS&j5$w$46c%~vpk9=`*g{gOq3_Zzw4f%pIf5;c7<4pPD{j27YC#>tpXhRRbz~1-- zaB3C38a#dtUa{jA%s_+X`;RC5c_pr`b2hp*|g2TOqcQ&=&d1sTf ziL6v{6x(;}l|^B>P8L(W{+VXx1J}p{o`b!kn7wUCy0W2_XJkKXEgaBvGLC&H5k5t` zSJSS?)Pt(faqa!edR%guK9J!ie5-U@1o6`Bbv`*txwuJRI?3v#~!yjm+Mkf zLiEYKe_GM4Pwrx`=vp=n92K)aPGw)ble2+_|KiZmw7MK*(nZj10?t5rURg-GB>NN66wd#$t zkHtqVTFD5#PmOvHwl(k>4%7bA1D`wCufh6%za(sX-FT3{=1&~$&aksC(dvEmJ{ z2Pc{MqXj;%)4$MmdzDc;CX;e_m3!tC)In-!Zl}SFSN4+_|jx%mNt8NqdSY)_Do>6(G zjhZRtTki4;ACep*J|tE%Z+ViiR;mN~UECwmSM@%{EoF7O`%ANddC?*lh4u-@c(UuT5$iH3dR%fLsj zXlmxpFdNZ>n#mEoMqSfDe&D(aU?(|( z_Yqr)KP~Jzs>}K_JV%wLqqMc?x9%p%^IsT>%$9ryi+-;(sg=1QiPN$3-kkqtpX)&8 zDlv(6*|H+7M^3TiUf159b)5~Jep1czVYC}@#^HZV!T*?wY@7xUNQW0>z^_N34-V+& z*Ev74)?j)l-)-d2()FOv8^48y`-7u1oMRB2N!@7;8jw9Xg=;zdPF6E-I?Nh+!{NoR zKrH>*yFy9TbBliXqJ{r~<%>d_ct`Y#5^!Eco@vPiB6di5UdE%S@wL<*2vmS?X)}j5i|@{< z6BPaC}C*ug{M&9oSxV{o}3Uri^_*e1oxuoX$G2lWoU-ZP|8@Vm~=eJvYhu zn2POnX@WB0^Ym$_pJE&RNZe>FSz7j$oJ+kowU76vDifv>BPuby_;e2HHlv%B@TFvC z_R`Ci8OyISXwgedozP|pZ;-vlh^aH|>i?F?`o?eW zBaf}zt9h-5UUz+2=yfi%_b@a#2U>gxntTvl?E(Bjak`!+b{{L}*4>Ol>;$K`_V>4q zQ^>j1W9R-2_ry+cZXERhq?Q}D72oQ#Q1IlGkajXIB((Q1xIB*Dbofqq66fF!U&+}B za9+z??3)skl4JU(^Xs8e+kKInZt3GZbaB*q=r>0jG^yvT zU@RgxjfU35rzyTo@fDMIvpa@Be47II-?SU++3weLosJCMXFH1~{!8&|&N*fHFC~s$ ze4P@bW%qST3_)-@`w4qdEcp3|1GV5M=S_4x5?heX*&E@FoL^`n2VM8|%^BwP=}Hs6 zqa?2hr%k}gi`^oKE%*ZKT7UNtc4uI>2|vEqJWIZ@57>F4V7JtS-6n~@fM?0sNx|pe zFF{Y+YUatf8GG*c3i5uLf}Td75&COmZ3%t0Xv%h3UtYe2e>#z;?fHXNQ!~I7>OT_0mkFUOcu*Fvx9}^u3zo4a z#HJy!8XjU*FF#hJzyG`q$5pmi4Ot%&tKlLJ=#%mVTo`4n4~f^X3Q{hy2n4||jNsI;!)L!z^Z zY^YxwZC*py7he1X{Z7M2LM<~rrwFmM( zkQm$`_KqO-4%t^C*gGQFJ7iz6!^*JzkUQ1JL;g}*AxW>gbr8|#PGKXEI-#=HuAB`Y zIlSA@@5Ys&-)-p04aJ^v0NRuLy8msKJNzzfMf06WQS@GYg~4;khp6-5J>-(H^WY-+ zAA<+~9vB2odWm3*IXA?BVK^229b82?mlZT~?wvC2G8ggBD1XE&=G-H?T_#nRFA^B9 z;3Nz_Bres$hr~--_^|h9$vgJ3+3yQKzufV7V~podNj>_Y&`!s=LjU{7b05A@&Y*K}K>PhuuLYk}!td|l{>@W%x)x5^*?v}O(h{BA9~21w7x$}_ zFf}pre0%QS={kAG&h|LYDkb*Yc@&*#YrW&Z>SLnMn{gA5xWluUlkc_rC+BR6j2HPj z_?^ghJ?{J_Blfo#82ilFUoZB~W@x?{nisv!XX3jVeEY;FUls2^J=e^4#QD%>@E$7$ z;ginDU1!3%8N3V4pB@d(^AVbVa~3pRWzziji=p`!dT5??kp#_q7`wzRP){WkS&tvz z>By73p{Ev7?;D+XZydF*vahc5eJyQrmfCWAvomdS+v;!J=5vCVeE1^mNZp3kZv(5y zF7h~pCP|-O`6kauD|V8$xyWXJD$IKX{70B`g!xJ>*|rV%+8;#k;alXg)yV($_2ZQP zG1kw^7l?(Olzvufs?ui^KG0~Ii z@`+8#2ps0xH)t!w_a$@d7R4VB|CQ)FT9IXA$O!3i&!(O&`JTmgf7;P=X5E3Dc_!1D z*S5zvv;HDyUnIXupL(_}zGD;gu*u9x>_rceS}&2R1V3>K;fegV^HBS5EpSZ~nyp^_FZMyIp{_8&TtqJ}%XuzI-It3aQUHA-e zlC#)H_)g<_*=t_@qmjoamOlGJv_5-nk3Rb)^bt8X@6Cem6*(%;tk%~$_QZbr+GzV8 zXT2px(K=(DS=19pLjT9FpH8mQng+>J8sr^0^Zk!wSWiWFSPQ;LuK`2z8$=V`ghb|k+bG$pLhEs1I)VB^kvyz zrO(Dpb;COf`^+`>ndi)nb3#88cs}bU1|zjL_^dh4L!2{b>~ij0KWCn(R{ES7PX+f# z95d~XCO_I%wQj4^ao|0tT6f#l3-#9gk0jjZ4{~4nTqyHgSj1jPJRy9K_zBiIwge@m z!;Q^hF@3Y=haU(93FHYVQkr%# z_qJ=4u)seRpUYVjepwqT=CwVmbjjEycaz#5U(3~Qe*jz`7Z`kRAbCrv)6IO2G0)e> z5hrvjv9CkPPgkVZ=a~@VUiw0%E(6`msX)BAiBz{FxOGB=8?mm5pKHw*@OIPu^ z16Yx3V=^}8NfGv@$Y^Jt^wql^@_v!Ki@4LpAJ7S3UrIVVpa zcIHh)|CJm!JItJu`S?=v6O5dbVqZ$pb55oNb^sso@15Q{*x&ZG-f=|*;7I&?I-h2r zYfmU>3Jxj2cC1xME+WBoJ2*Yg^=V@=HY)ITsR23GMQu=O@?>^VzX$uXvU1mL@yT3I z=K9K&w`H!NmeC+3OkTNgPCB^ZeepL16Oi=|vsc2yWY6^Rt=IGc{tsI4?`Oc@kDcO5 zc$nnUeTF&c&r3`SwDz>%9NdYGewz87=i|tZ;XIEY0eKD^e|82wo;2OZBQ;Ztp-Ey! z@bUDkYa(xr#33Y6XH@?0;J>`1;m>)K>!5_Vg4cq zD{4OZeUbv#FqR`SKWHRpxo>0UF7?DQ`jWM<^5T+^20s zm#9mbuh;EOlGw5$nQJL^PT3pDQCGK#oGmTbx-6N;8&7<(L+T_E^98;)eXRI55z|#o z?N~2)rHEq*w-67u=^2NA(_q$o25WPK5zN&WRVlU#@T#)GqYcyPM_~_YJ5c^ zljL48wOSij%?l0YS$Rgqel2ZoB0f|hwpifz2>0%f+N%tCf2jXdt!Wp!+3^?0BYt#c zl)Mj33tfxs^Yt0-iI!JO>RiLZ1+3b3NF0EkXLGvS01?|@7CeSD~vP3 z7zO7F^ea5K)oIB4Mc~dZuk7hVUJ=?Q2j#)n-;V59ivDQ%1ol$5u(wYjxkJHjWA_P& zy=FXP-Ol(d`6bv)t##sKgPE*1zAfKM?#Ii!eWvQ05qN0)CG-DEO`7k>ylE%c#ri`| zYv8}tp8tGLd*Y``VCOX12a}_d;zT z%qYLi{C(8l8&1qn`W;P+Y~ee z+z&W|D`#@u^u3Am2`%c~=hkxm;m`2a3U!EHkKKCrBj-?RH}XEbsQY|qHZ|>w9CZtv zoW;afbw4~#&O1x~1+n?JB9}=11t0Ws7`@~$dPy=kvv|)@_H%ERAuF{nbg(zT+imj1 zF<|?i>=}p2eFYr8Ne;Rp;N|oDZZ+|K7`(Tdct2yq`(f~Y7+JQ})Ma@|ijaRRk zY|H;~g@O0O;Qf@jwk6jxJgB?g`fh#?V;oZRv9DT>BD=@EXT)zjiSIUYQ%?>wmFr@E z6aSFJmAAp`?B~RT*pDsP729AZYnm9wgF*5U%6s!UZzO)U*{&WxTOqhSZTjIQ=J(gc ztlh_Z7sJcpKLa11#2Ba#Q5g-d%guZ7eH*+apF}IUi>A@03p*xb-+Cd^VA|_0BFvm3ox+9BIq)Wc{Ze)Ip{Kt8Cz?Ql^zJ5MWzw_ox&d`vS%GSy2OUQwrXUZ zHFg(moI1wY80I3+NPO`X4LR`hw@->-%Y ze?RlTmwt4AKi9e$uY0EI;J7prMt*! z3l3%O4u02+L|&(jCy{YH@#Ov>UySzobn2j|1delUtcrY3ZTRExi?PUdS=8{3P#Zo% zZTRD(kcn)y;i+$~*M^S}cb*N+TW#hbM@XIPbh8e6-e~ni8EeU?P1jQ!-T_>Mmp?(j z&BV=$uQD5$NjqU+=s~`fw(;K~L!<^C;Q1$zfhwtsE@PK>8fBklUfLX&UiW(kvYc-f zKKkm+kmwyU-^;7{ozzjl9Hd=MO-t~^OC9DZ;NT8$Q3Os3@$VM+!IAFUJ$3KDfg2Uv zP}ggo5i63LBe5bYb-T(eWP8^pj~7rD&qjxo_}~RMZNRinjv08r$Rt_+T-`8} zxm9By=~`AmpS$%OX!2~3Gi#fut*T^idf0yNQnVs8lBuX2ZqEFQE#fSnGXn}jQsbBN zKeLwtUzw-uyHXoD#l#bO>4`RAt>^zfVe;OUUkJSf3YN1V`q`DY zVK>h^{0()oUi+AN=O80XJbaeZ9iB@&rI@(x2sq>WsF4`b2q6&-A!YcUOB5|7`eXtsR>f!=P2~O2KsJy=8nx( zTnXgJot{6apn|xt+3d^uyh_AZ!+7$gjy<$l!}D@(wuUuT&RAXWX>#kcwlvmO56{Sc zDsz_TTUperS;KSQ;lwucJ&s&SkB*sEl3?aaDxPs|?GXCPSBG>|qXSAVm>bv|KScJ7 z&}Sn!Z$|cPQ|}=cjJIJJx$wN|(1d~Hg0bHHGdW=N_s>Oi*;Ag^)O8L2F6-vOpTNi9 zkINk71XT36tYl!31kcOqr;wMRIH0-s4C9ks9B_5rcZmGhVZ-zEh zX;MB|en_QG=Xc&bPkkVANyA&Vqjz!sD_qX}{OmXQwhokh;O>-h1G~EEA5=ZB4TT1X zjr}09^7#*)3B=iDhSv3WgnsE#PHaX;1sA&9>>BE-Jk8QZ>05k>-NS|4bY3GIxU)P}aQ-*z@DhWSO2Y0{M#y%qNmQh<=bdAj)EHx(1_&kPp?+$URM-slIg_y8u>VvvtIKn zCQeSlhL%~S`ZF?>rl+7KFKbKWJ1=WX<|F*~hW7LIy~YJ?OZ_H^xsoxlZ+F*o$_9qU z9@jgU>A6RYT&ACvKkV*%0M7NpghF6n=ah{?GxXD1kH}}e{!*=Lcnco!7`E!S`eTcQ zjzxB@fIn2J13Mbw50A0tUxh#DHU-wuS6FAS!W(`$=I)aJP+jpK&bXnrg=gU3dJOU~ z)|k+AW{alIU2e!^NiHs}qbG*p@W8$#^Cv{K}_?#D+Ff~f&xhtJ~XFM-5 zq{zL+qVvp2MdwKl?Bkl~JT2%v`)oSTVRW9u>@WL9aSqs~^So{9Jcmu4$7<6jvNSqR zswp#|8=VkY+Q?(tqx1Zfu~<6KSyRW6w#s-`;@hvtP&X`vCp`%-YGJK-9j^T;=z5dT z^`1o6dk0-FRdl_mIVRGs@Kj5-*XMYD8gonv1k5=$GRJ^zj?K)mnK=fSW20@3JIpzX zoG*Hv)uuJaG;@w==yftjpMDm;XO3&lIm&x)Ka;xAnx|cFIL0$e%z0|e^LzkO0^okT@X%~7$E_%hj0VxS`4g`Cz zt|tVYIb*T+C1|Q^dgh=9D%z17a^RIxpP4;tSPlC_rm0t$vd;{WeMJ7sEyJF174i|j zY4^m@^RhIhOXNPgd?#o7TA$9GCGuGip7jFni7a-xGQG%b3TLWCUi0rdUtbL$z`wcu z?c>W1Z6VK|C4=3D4kI>R@qvqsxrt|Mko&6XPhuc8!Mo(Y$o`@il(gIA|90g6B;zeN!A|kK&~_Pc!5Pcdt}L&Vm)8c-RzTU(xBRFfn%oZtEVHg#b0wB z`(y_)fXLpWdz2y{=PPGWfN2k#pG1mWrEN(XY8a|`{BL+5#vIZsO-SbO(M zrS3yymXDkR1Fx!AhF)dPADKFj$OQA@+2Y@8cg6>blQmuU8H-+oKUkMTt_SNcb$$TxKkKmP;ZA_15^3e2WM4_{UX#*a-NTzfaN zMW#BaV?K0P%wAiJAGD3WwY5s9*rjxdEbqbgwG!LiC~)qBPDPIKL8sOD;=K!`cD>f2 zAdB3erf&E>u(Et5%_iJ80mnx50sJIubU(>H{3Q3Ft6KewpG3wWZHVq7&zGoc8ze`u zJ?~cg=r{G+^`X)qWA}^3g<)5#1~vvx)WBx!YWd{M7aRn!b1D4`LNZp_&&M*>mB590 z6I+%PXnD@CjlIBsv5n;dQ@Q@@=Xz{kM`_2heTf}R>_YRgxru%uxptkbgAu8QEu_^U z_A+qBJIS;~yW`8~Z%gNjLwC@(lgmvpL8t$GKO*?=sFAW%F^U z>i!#fR_q~%9hUvBCpVc@?_TUNR(qS#C&gaYm)<8e0W7`mAKyD)FE(f|ecJWDMr?i9 z26i;wNnRM{xuwO_<TR!VPq*9X67O`+y3^wWaPaGzx} zB(~!)HbZb6-nRgM5WbGLS>w6z50ObXBl}tMtrz(=)y#u&c9mf#j?U3P7`nWaZ2Kkd zTe9uXfys-=w)Sx^VBB^YR>tkq$DI^tWZVJUxSJVwGvf}JY`dISnB*t#Zs~b{MYU>$)Y#!l+UyH9s3CKR3YTKue4@lMb z=`Gk^<-DOEyMyf2!S22K!3r1m_A+)m4-_6-EpoPDr43+e)bt{<8EMX?d4PTeLY3D%gDa&=XoD{KqLLa|C2-~k^j=B z3!m1w`RV~V`&KnyJ+T#@xM)e;!F^NS=#2mTuR0eU_(SKS75g@pblq`?J*V!`KU6&* z&wFds_>IQCKQQnS?_$defABda{LwB)o!I|Yq6f=f*aBQ#in0Gs|9XE^Q^Wozym2gh zSqU~%fyH!c_er}?r~y3(c0ReU+sI*L?L(H&USt5-TMVE5i#DIV7H_>Pdy)P=xwEak z=xxW^hE>p1`{;jptS=v6ELp&VF6(EkGPd{?@f%BkRVi>;q_DuCC1;FSQp zDu7qXiv1VE3mv)#Uamgj)keDs^rekS*w~JYF-?bAkKB-jeS59aMN+J9rd&uSSoxXkCNGz2rIfvCqA@ zKypZ>1oEJBx#lbGeYd2C8FEhH@5b3<~mi#4<>nBC5~yJw12khN6n%yvCgXCi;eh`D$tE2mZ=08 z-TuBMqqmv5kvyMGZq^3YL|<}cES}g)KVt4J^sVbh_=KhnLq94YUm#;M^rKAlBS+u* z(J-wxo@d8m|F!E!q60ig%;?$3N1Z!?M>@}(W3D1Y${xEvi5N@vT+x@ZOnoU+P3?G; zeMfXRKkH@SinI}oYsRDKNz?GhU8n0wgF7T2Ru21>7u_cZo37}&c3z}k?@FDB-~RbC z7l>b@KTGe(LcbXZ|B{@%-Sr*RbRGW9`o8oq{=6C;{x!h=TKKHhw*mjb9nyyIBzZm; zJCxLOjkd#DzKx6heE)B(ztumv{PkDKv;SMyA9Q~3|JC((Hm=}D*4oQpp4QrXKe1=+ zxw~x@skQmYeMO(q`qRwy_jc6!6Z^uotbMUj{CBLszVu4*kE9UGE&7#%?*w3C*R4Df z6LNow?jPt&x01N20I}v)yni#X=FP;K2Z%KvOT7P%qv%{m44vx;`hU01g)Am<teEuAb{F34>(?5DY!Vw~?;J8E9mK&2}`OYA70 zy}aNW(4%)f@MD8#{TjWibwEljXG40)4O61zhUq*na$7Yp&H@jz&&YolGIB2Oj@zXe zde<)H#6tF@<0sW0ExP{&Q{S3m=v&wv;~7IteG3}-XrzSZB^t#66!c#*zEyYtbv zt^@X>FDa)!N#8Q$3L{2Zi?`mj^eqK_&62gVk+o}~uh=^L#WvR*OV46Vx}F7WN`Z@| zXG!jr4&YM(d<^|63O=#)s~GUn^(*@5)~|FJ_2^fP@Us-|pSAR=-G+?OhJJLpde!_f zuWz*EHL)FLWkIw5;0UcAW#piJ?q}%ZHXE#WmD?{iO_ASE4_7xF18%YPod?l(47<0b z?^u0V`i`_=$H5WWie-mI?X7MAjBe@1v z&rH4FwNdQYXNd{PcVe@Jrq8T;I23fu)oa|QoA#}|)s}s$cU|fSn7i1CU-Q1{79VSS!}*-rY-m-{);U*(rssrU1OqsbOHHa zjXZbq{%LH+Pl2OMC-IWlj4Ye+7Uunu5Qn4|nb?m}c7iMhozRX7-a|JP$jCKBC<^{gqO*Gm~pKHWe zMz{TKTiepEw13!&QSanG z@tu2!IrIW6??SQjkJSAlUh+tK$k(WS92q|!_$_3PQhWAA`VqT)5cqmNjOcWoO#edH zr>Ps4=}hTp{2(%WJ2dJu=~~7lF|%nVj=fR2>l=ZE(6zj?6T0?6*PFWOIw=t1zR`TW*f0FPi(6Ii556d^t<{$H?qoG4|HW zobTV8?aMpwGT+vy`Tp2!U+^yQw(z|fd);Q>a5=HpS*%InN%q+5YOYy4N$k=oCSI_) zpAf#U$6edj-Yw=hWAT&q-19Ov;U}L}+_eLKXz`Iz>?d!Sao1<@%jz-Lj8lPsMBDv` zn&T9nvx9i(cT$pT2Z8?xsjb z{@hN^V43}0#4FJ4Z{wc*%!I^Y2X((hR17x$6g>tToE&G4$jD9jN_4+O#Pmy8G1=(# z1p~>K%jaY2SPi~LHR?u-$R}zN}EtN~0 zwfcq7_h?Ix!v=?0RvfmC);5`aiBB&9*!C5NeVXUwnP`3}^DBdX#LpzM+;M1zxaQh@ zj`&(PvDd_~hQ|?mE$4|#iM=kxzA-)B-82pumEgA#-;KnxitonaaTbKuu1{F?d+^&djN9`Gdc#}Z&7yfJx(#j8^| zx0j^LAg|eYbu>@#pflO`;|bQmz2w5}=Z+mW%$k+J2RsJ*XQ^dn-l48Wf7N%GhZ<|#ZO1)i1TG-NS3k1O)$VCHt5d$IP5IPP8EesL#roWfl1 zV9rI%y^x%itEZ`i-XT2hD!1z0YgOn=)tC)tT-&3tvs{sAiw|6@9l}=9~|k zeg7)PoF|)oN8`2txHf`^$R1VKRWHRSgjYEjYcBotvcnBPw}B>7O`7nW%bq7OL1Gu% z0X>-XtLI&4MK5WEPquQtPH@TD=kc=c<-EdNXhPy|EcsIQ9HECaU~?8;BX|h`BhJ@d zgqLV~XpN$Wmuz^^qSoLE?pZu1mYuGh`xd|LT6QsS(e>Ir#=a8E7NcF|_D@!|{mGFj z>-__iy0Kl$4(%h)j2x&@Mt&%Oi}T@a5oDMpqulCNo}<-K_RbQKK=I&15FeJuu;X z{U_~Pt@o@w7<}{9&f@pq>}WnQ`H&Lz26C(aJRq71z9YuwudYI>$Y+yR+h& zw$6NT@gR6e#V#pwVljHXHi$Db#D&B$W|6H0ACea%o$EUuPT%N*)_u^r@X6LA+4FqR zyhqlu-Bcf6=dCtOk1}4_qTag zmxhX0XNKIXzjIqvRldJ!^_8K#t?B^}G%xvMs=?F0@EbeD0m%!dtxXRt_a?Uo{8){o$R(+zv2aR4zq|^yVSTP{`U>q``7X3>-+LQM9Y(!>}mAXm#yBC zDK8b*_S=DfL0p@uhl!pkI>uq>PjrkYfP+p?ml4q8Qafz+1MCXX_5;Nn=dZ9^zRvUSq$JhKn5}rR&A?A=2EGiXN#J4G z3{>Pf56|g#!{jv9Z*s>j>=UK3*9|n<`b|n&t;k=NO!%DHR*<%&9nsIlRw!+WO`*5` z&)G)N|9Qu*|9hF&U#tInpaHvGPHfS^D7)Oh(66jnnd52B`D*4Iy#*c|NFcvdxp$CRxSl6Ft^(^MV`nMd6jYad1n!y1n}lo6KW^$(U8+q)P(&aYmBFD zANbJH416fnI!^Rj6-!KcIF|kiUFrIx^lSCE+4Ki|T3=nEz84$ullHR?dq0J*E=N(w zjYoZu^vvNoSxR2c4g4N7a(K=S{1!jnKx6-D*_J&oYv|wR9E3e?@=H5<{C5&J`8NBb zJ?Vzx==R_7?*CNWq}YBg^5Zcc%NHkpye#5-E*&>{X+NIm zGj<gfs-DQXDpBnR%xk}8MTVw_JvuDASaQT*!Cb?F~HO`FGl`rTYF283=IG+3nV!u(a z&uQ4_@N1KEuKIRu#eL*ZNWRH?wNe>N&RlwQNZk{KL+hTT&bG)n!G|OsP5dZHfp2`9 znn_=x&qY)8afp4xlTjEhztR|2A^g8QgPd5y3&Ry37vOK%bvwBXn&N?N`MH8{)j2in z`kd-7k1q^Y#XG_-V5-A

+#) z52^c-L*|(Zo{|H9VD0Ujt8A$HST%5*(XYt^e9OP9!?=-%4erojA{_SaCZ&?vHYcKs~ zR+7{F#+nS5vTEjp?A8Y1e-dLpV!LsM#B=qmTxeU*sZm}+?W;LCA^m-UZ-dg*ioMoz zCpz<$l4Etn^Sa)rx@xr+U|y(%S3cu@e(?9m`}H_@NK(ROj|(1*8u3;xhO$D34yXC6 z9CdR3=2n?EV|tC;hwxBYwAb5=i?oD*Lsh-q?P3USS)gEkT#OY*&Hof zTQM-=N58ZW-N?njKB%{OgfVO&zQ==qQEFNSmqS0~i}U{eP4b8KU_?ON_V%011c4AbaS8wFmvzZR@OA_sdTIm=8MHFZa2Ix-0j+_3TXiQ~PB9%t^v7K`rrYWXhMw!ED&n z(gGJIbk)BTkNiBKV5jsawPIe-lyB|WGN|r#c$3&}?=K<`t#fkVUGnRRKRS*4ckg~& zaOhu#r`677e)-N39iwjE*l?72xUpeNE|Leq6T0JgbpDCrf#mg-v>G0He-BKk%Fdyt|ggjyhEET-LCLn4j&&llDT++k9H(Uk9~2o7rxFq5&dfSO)odBB4_GmoQHdB0O+8L?gP z?1=(+i;RKin)Y71Zsy+4z4ACXEomR_&Rc^m2^^R3e=KWjagaKRr^!<-HI0ADSo4XM z5Iw@qn|v?y(6g-3O{=IOW#;YwEZ{#S2K@ieg$wmE2eBo4uo1Q=VE^V^rnZtib_ML4 z@RQ;{MYe0x+?Cp&u9^6f$)7d&NzSv#a28L&Ze;Nk@dsy}SkYPihsf-T??&p~f8c%b zCD2wY_QR{`t1q78`xHE9m^mNeHx}RcD)Sjk`_b}1Vj6xBg>xe8D+}0D%GpzDfN>Qt zt}FUckLyOI@-NLB`zOtyd&wIcy}ygSBUa8u;q8*I z$dY4H+0%pOp8kuM$tCR=F}$38Ikz%nxa6*qecKCfB`5acBgnmFj~9enSoeqFEl0>v zDf~zqMy^b9O3Rw@tQwS84lc=^o%bZ|NZZneB}>27M<3SxqwrCwLm7u}strD^_l;ao zjSQvBeWR30ksG3AihrctC2}3PA_-Y!34K~JMI&p^8zqC(@w~__@=o;gB7;Qh`j(y_ zDRKf=7rw-Je2NM9O$YkBoKA9C8b0_;Y`ek9Pj}bV-2zWMf{j4tw}d{0hI@_Qy!ROQ znA1hDCr4>zCiM@?7?a3H^VuIxE_642lXfC|$Q7DOzS1IRC~}hD5o*`r4RqEbaO>WrLW^P#a@*~f>^S*liLVbiW zk+;9HjrX(QX~?O6Q9Z7wsL2|N!@e4WjyqoZbaZ@}=PyOaCy}LtrMrx}Kcn5@-_oyD zgV;a%>4$RRopJ_9g&u@PyO!MrT~CL;OYoIXBR7##w{QK4_ab|wRQ^DZSGrX1O!mq}z<;*>UiKiP?(MUVZNVb7 z&bbmDmhIX!`rbwwUis)8{I&&0)qN*F{#%L*|it)xco__zhk! zvTJgO_}3>P;}|vDB8?J78g-hY6#dg2!RMkhZi z!e=w0qa_iaIX)G8J5r0HAD^dbue2zu170!vYlY;v;(6ggb+muskmA4aaq*!gT;Ig? zGUkn4Wb8{{+T}Cal6o2G0jsS?Xe-G5QtX~(ql(YDBE^SxJpXdT*Piz_oPGYKjBl^6 zX^?hQ+OI$S)N2GJn$)x@TsVM)?@OK?)>bG<0Rvd zeJQxG?|s&OrlSoUc%$Nz1~E3#wPc_5z(+jxywmW9XkMc4>%vcfi#5L!4^rP*{xi27 ztKcttO#Z_Bj{Mx6_s}``i||m!DYX<@1m5Ie_VO)pyG~>}dk**2?1!!xIozd2k%v0> zQa|YM0ChuKCh@=Q*YLLRfAZhTeZSY)v-WpE^IgmCVJ+Xyn!by*J)JdPLW~USe~+>L zr!)Tjysx)UE`9PBrXJ=0aXyjN{oo4&d1nys4yKJEv^5mIFpONn{p$X}yn5}AkxFBK zbY_Io8Y}VyKU(9sJZ*WgZbQmPCV?PRl5i4`P|Ps`vt=AlBL+$ z9$4;v-gosh-Bz;TacZhKl*PG9+rivnn;P<+%HsH}mV+hyE>Tkg)6j_mquk+X*iw8Y z*kZ7y_)3V^6xskM<+P_|qxU2z;k6bL-MZh+ppBe zmbvscu{&g=Z=Ao?9lpT(9w)iGzpl>6r{=5=I?a4RT?}q#Xa1AA_z1B+!L#m4kCHZN z(=6;2!?2Z-%ls5Pa6UTb0_1@pvG!7X#EDLM&kV+0T9}tc-In&L@H*gi3;CT(XDKIE zqkqUeGMR_Ww|bDW{pa^Y>PPb42tJ$B4d8-%wHv6^}ilc4>a;Q ziZA>X=$F{fnHTu|?tsaYO7B%pNbK>c5y-XZYLXAR*Zlq*+5QjA?-|;~o^VjvHLmU` za=@7zl?~cj_s%xz{IO=ow=?G~=InaO-4y>-a#WEIS5;jJqv-3a_eSbPZV`NWKa86s zFcx~?G=xD*$lA!*$b0JIHj(?Fts{JYjT*+4$k!ftVLI})$l_TaqXUTyKFE;4Mc=mL z>7_8Wz?lE<`l}o4bMw>w8^ocCJ)?}cqDN_aQ-X3r>|D@UW$s&RWuL7m0|r8OLTkIQ z9c^+dCq(CbI&NdbDr(S78>k$}AI7z0W%dq7%Km{*EYj=7tWlJqwdLiLOI!)W%4Sk; zhFZlRKc(Nh3z)YgE8~mlr{;6(W{Pe*Ey10)51GKyZ9R#~1cAFNo4HLR1{K{gC0jY5 z?JAvIO#Fb*mIiOc)<~{Hche$;m;#43JxApn0r<#7$7;<$_L!`UUk1E_6|5i8Eo<37 z&@DaKt9&=3r(v(E!d~@{)UcBI>pI5G-iA#>loPw*xlbUYwOYD^tRL3wsuvBrXRFQb z85~`p+ZsjJ=+;fl=bzwt(Mx*SJI&{1pBG+shVfavtdM%rdmUvzip8%UhHlEpg)gu# z9m?5rVnIp=DkqNc4INb0ys}n@r%US%&mgtg-hwuOc~kokxA;yf3b6g?Fb2N(E5^qI z>k{fFJb;ca=ipt_$dzo6LJK7c z?#l1~V(rWlHKBz6WBt^~-h^yZ0A1e3TKYTQkvRz+?fu0z_%yV1vwP=0_;%nIMjK)) z2|^#NiW;D|6moxCy!fbd1iI(Qz@5aM+1Gs8e?+!Fdv~OMCG}xG3!2nM zD&wCvX|h=A5kr&3&}1<*S)3M{R0>U&LX*20tE`(N?-}dn8OEEbD2p$kcSn!oEcD}I zZA@crw6iwA=l0{Qjp?k73g+&2xTbTjN$fEBtc`N^1CjmxO7Wz8bw%g2=c;FxtP*)t zNy=9r@0|86YK`%Knp%?~>*qM_FJi3ajI*3^=3^FV!0bPzX`GCyd!?@9t^QsuPeO_M90H0$|JqL|kz~1+SIsWK6 zFVV3eD{SLGtG}BOILH1XbGe*395~Ay65(awu{f1)>>{!F(!3Y?=1USg8oeIxz7CLw$cL-*J_j%r`N2bu6uFnqJ&X5cy%C`D!Rd*|XgH-EyMUp0AVrpVOj@JEgrKEVIF8UK0_|7(>d zp(Ag9W&1hwaiOQOQH7kTLB59P94#v5n1Z`=bll4sZ(sG0R6+i0l_M`$}YtViWiwv6biPL)tH+eQ8T*)RlvMYz(}P93onIdFUuKe2nKs zcN4p2mB>8sT(46rDgO7kiB-_&B>MazeTr<>g5CZIGPL+ko5?StD(f1a=3b*i)qSj+ ziQyC<>rn^#7rKW_U~;^Oe5J}_6@6a(LQA$#`wP9#^Ig72Bdcr~oEN-?@9*-Bj_7&S z*(7?B2R%r5RAeykGDms04ESYwl)R8IO^E@uxCn#fLk@&>URSvbV!5}MHUXpxW_xASNYmT1Xv%N2w~h9q@wUXon}=Gu0&l@XajKHH zg=e!_i@oz#nZD8d556P0?o-^ai;r(U{eSl!d{2rp(H>W0 zy<=UQ^RklEwX7Livr@;oFWMyrh(7E-75$tSG)+uEXCt^D)krNfo)aI5LZ4mBcIkB( zVx5(exJTVCcda{oGkUd*x0-P^4p1h&$T)Y1O#%CvT)$yjmtNz5xVf=%?HhUL>yLml znNK92I5YC8i>-PO^7CeFG+LWO_tAK<4`oBUz;C#!s?&3p518fpDs!0Q329??HMHV? z)3TiUvq9+Cg+DSY%fAtPV|Sq)=1rZL=cOL%5!SDFB6HmwS0{VPh*u)@%AZ#}Z;v;z z&UPKmSL*I7qHbP3{%m5My~sZCw@=NRhVGkiM3+$(FQ8s2c}lm(9~n?rO}pBE8P`>= zA7jo}(MNwiKf57PAFB_1T?b=bzhvFz)o};TJL3X>oTP+LI{OFCVcR%1RUt0YfsaNB z_(YFkZInM@tUW*Wl1P=y-(KY4ytQ%yAACcwh`MSzzET5gI3Ff;B3i(M@UEfYukBvq zkQjH+30(2J1#eZt*uwO7+n&#w*Y=3gbrPNR<|Jju2tI;O_Z`ZHL~=YzJCW6nJP&nJ z1fQe8r(9DXDrl;5s1+;WU5EayAUsQW&NXukJIvMOv|9*HXXMmo-mUn9`wd%5aKD@> zN(o%E4n4VLT4j9tfLX2JfwhpQC1g#4S0zbb7VWHpU&*+wXM(P*N#ZZ2M7mB-LWi=B za)=ZAPu8K@uZPZp^HeJt1LHv#BJ(4IzY7DU$>>39cb7h;0H`odj!(xPYh$M(uuz83bwR($C`M}N_-)J(q$Tl@aYV+fh4*~MBUPcQz4`1!R@teM`*n#E2P?4&-< zDfWe8ZF%S%^LofT{W)+9F1Gms6BM8L`tD_~-pBg*1lj86(?X5xjhjBrDDe%Zf6{;T z=o4#FY~#DaG z;ra=-h)%vqm>fhb{+0n?C^m{sR8lscY?CgjR38uWj&ek?$57?}Aa@-EF+XFp@l)`5DN+PW;uJ1NfV< zzm^GK-Y3Il=zR2^hOcHX+)a7U8SUQT^L|W!JJH=wWxkWh3nqJ$(D9^8@#DDy{WSA0 ze3tNxvJYEhVdQ&FfBMY)JQdklzEkD9Ol8q>zFQA3C*SVnTahWTg=yEITa8U>IX}0|{85e#Eeefe^qbUm%axJ{Jm>~CuRy=&CBZeQc9Y2M{@ zwH`um*S#LzPQI1C2+aSE4MFS;9@cGTX;EHQjBU0PyMtLq;M;kzvbHr>*(5xkck$e5 z0nYpJ)1K!7bb-KTIr!y?114#%rTCIBY=_6s1{ce*LG0sQHolK?eIdS&72JJlL#@L20)x+qXytQ-RY{T~F!S-jKRV(pid+B*!}iseq~91(I?%1_*7#ZFk=&#BJe_Iy0m;K z_7}#nn7%S+En=s1Ka8wq*eTm$!~Q^4U!Op%l5;D1XVJBak8*aJ|M?Q5zV*~6&$7sW ztkqjt`KHucWU1GEak$>e)Z=EIWfCJKcx0}F`M^=^6tZ{8zquw}$W+QQhj5ms_%LqQ zX-V;q@LxnPh5V`cFM89!i8N)i2N^}?LFT}1nFH~2jW!1!{9Gr{p4A>}9k-eh{cLPk zI_*XHxxNy@e$O0y9rO9TQJ2i86-FC?kqH;^?M(wVDzMstt#_y0S^pCAJ`2B*Y;1-* zIqP}HFlT+Hrd!c*qVv$HMz_Jsde6IJsxii*8{O-QTHq}f%0BNY!B@|ge5I#ESz2Mgph55=FcEtT_#=KZ zzsrUPOfS&&`s4HBOT(I*%%3OWn0_u8B*Qu4`#i(iCB>r8qKjaiN!Q0Q^? zeXC2WUNLm|S+1oe&|{MpyGJTH^QJ!Br#pB7{mMURzj=g`D{v5)j69;BiT%)gwZWGC z@Xf+gi+*DpWey05vK%glg+k9 z`=zZ8z89I-3ilk~9%)B6?dL@a*s8F_oAP?n(EOg;4u&>qV;_xNs;E;@H;N^mB^X2Gx9i^N_1-sZ-bU%~tH^^8G>N zcGJfgyIzhtAC9rtl3zmiVcrYuu6Q230dl|KPGGxkr{2c8zikwHH103Isuvl!zrTPv zP8D0Rb(}WFY5P0=srvc|nr%rtNwkytx7ewZSlz|Q>PW;r(xhg zshzbsP+w)=Yu`uyD`0p&eUN+6amsy*=;+P>%b#0dsl=m4W^Fic#-G#XjKiNw>gYHp^}l^Y2gS$hJX_tpU))tm>NM!$gn{gIK%S|2n% zll?h`_3{F?f1y{mLvuC%*uRI$HUL`}a2MTA5`4sm`$L<*U`*Qk_d{jR%lr5B_cq?! z4u>{B!+RO~0saNA;J=eKpFQ?pFSK99FSpM}5WSA+BPjYvuf>ON3p8@eI(v&Zu24IV z`CR?=CL8m&MVmijVps=j4x+aPR)LRhn(BV?x|YiH3mU3^S19(G2X3|1t#hxruEj|_ zYch7Kk3I}d`4E^Nn~bgjn)LZJc>KZGF%>oLrIM`rD)#0*_PB*<@HVXZQEu#E7267X zg)Z1?D(_OBT0g8}o1^y^j9cJo@;s6QCXkFx$)IyRc3B5X!*`iSZH_baxv}G$y^6VM z=l&jJj@>72%KZ`b&9b?c?(eQI<$Q*P?d&%t=sn6Gw=J@@gvwH{LGHw7Onj$`8ME+8 z&PjFaToTh`bH455-XZ*eqSlvmlrQO~9^nI}jGVozqPLbj-0NS*SM9gxH8uQ6_CEkT zmDfW#AE1k#A-GOnewkOX15c!#@)%`O>buzf+1Ijc@toPK#`*88e6yP~mIO``KP+(D zOnHg9IdL!YZ22H`zA@;2$rrRNmh+s*MT73D$MDOs`W8bUHT}g;*w?Z&uK2}5 zZ^+SN^H)AlsqFF)D8P@w6}y~36s zknogE@RZ^k-vkaffx{kfIMd)MJqC_9;m2)#FEr{ogGQarmiZ`s5q_pg!*M-U;p2at z!VU&(g!c#6`pJ0?=h8_nT1U7RF*r>op zctm+tW;}EAEU`?Ta>#_WCm)Driq)r~KblQ`94WSkkuHpx%plk&8Z zS6NV`o&Q!@ptg)z?#io?b7O|rOc&lfkf8*vu#_=~oH`2FAV+Jy*4g;E--c}WF#h5@ zV#Z9h%8<1dzHl2pk3tKD=Mh?1R2!y+f;T2Dw9-BkZ_Y#;UpMe(tl`U?w@;6Qip*ED z|CEk5R@yiVyb(L_!^#y?+a4V^O8o3a2B;KS8J~wX^fxjd!K-%oJ7j`Iso;wE@^9rI z8raWYU;K$gwf zvSQ}5hOKv%dwX?!KeK>rn@D7v@P~JKR znajd+&^9>w96_P57X7-W$=^Ych?#?{50?RHJq6bQJ1zs1W`AX>k^XI*zq!IT(#tPosP; zB`CU2@N8x29>%^0I^SmSDj%F9OjQG> zdw}P9@Fsl_x(~*D{Caqf4Tf)T0&>(ZU3v3>Wr#LIwAYSKBvtGs=(Yo_HPPEHU~jkj z_U;q96UDdJ0X|exf9FZj_qc+xhl##H_A#|CypMgA7_W2Px89*mbKjD-MNeevhc?h( zlUEY`PzSU{g@398-&RD(&ntj;U7!4{?|)+lz%}p6fpJaw8@fDm&oglCr<6YvuKh*$ zI`me51}{vy^SV=6`+s?!f*-&>Xq^>@E`rb7!8_|SlLq{PciW8bK8g5F_Q;>|{x=cd z{uG?t%rmp?nK8=RSGn&n=xL-LbVF~b(HHUlhkWx3o{6vXUfMP5>cGbRbE9s~p;`O9 zasA$4W$nKi*QSoPTvgUS%QNDmT5OM3Ap0SUEmzjA=9#rGUsA4)XF`9S#mJM)dFJ^e zCOg%d(mI`%b*1jsb+CKml(kPpe4h?%mm1%Lr@=ORQEm#pW^R1VijgJKi6IUcb0>Yv zIuw!^n-=jMJ5!rAf6&{!gf{0AIXfbfPDlEQ9X?^Kv7NS-F|udfjjYEr(b?knm#2K9d{0pv8x>^` zzZial`Ni^6`8hW#HPv8UML@p6MR~-ItD!!njo#IO;aY3i&S~&H+-?FzuPj9!g*K@Y(vO)MK z*cw8$L*BRg`@3tF=>Gn-<9j-u6?r((Kc9Gc)2`mlJXT~Vxg8n0tn9B>)`osaY*`_? zrPtV7a^PFwUxbg5*jV8K0<&~Jtj!X)7Eq(}0IE~TnGHMxZ=k;#oSED1us29sGUw8V zzx@b5w8HE2yonKhU>o%QJVSqEFKn~=`-;y${izs_4#uQIM)^l6?~U~LHOkAL_a1!_ z+AU+yXtyLaoOOnCPNB7iH>m}^@t5g_|zXT=C4Cu8)mLa zi@f$-#@vrL0-t1!oLUiOzcT#1E3m)z?N>(me^CCk{mSAOej8&5Tz7q*M{;x5Lc5Ih zVFzFS{tsvv&#otjfwZ0QWvFr5*EHt$gW##M*`n)C{J;oqqD!CL(1)tg%=5yYJy&>>|v+qoULCjP8T-H=NXAv0Z z9E2Fh>*zj@r{w4LjL*+IR_JQ^7JEPTq#n`v_~tI@^J$Yf8|W6c@v!}~r#v5@wzo8& zDHDe|_hxO4P0@7Zq92{Xy~2IK5uTrI(4>cDet7l?`3?fK-9g_v-!5oL$M4BQpRfyT zmiX3p=#SL>8herGKIB>C_|nkZ?4aB;jAx*@?&C&$rAc?yn+-eT&1#Fpt#&L>8fvnD zQ7qR9itcks43p@<+1p#3*lccR99{6G(Ed%w@JEwa--)XVdS-DiHoCZcW$94nN0Z07 zPU*G#3}4F4qj+8qJcib}@-Ct5kmX9=0dn7oj4uA>rcbB9Z0~j#=b5+;$-eA}@#ykp zoi1OxN3jQeT9}8QzoxU6@_(P%+bGXfXJOtB`x1OJutO5VoX?)1DoNQYxUm@eu$HoJ zBOXl745_o<@$0H|J?2Yd5s~>yBkNG0$C2Oe#lBg|cT*(qju9*7CRS{qnDF(q`y@YE zhr%C1gV3ea?D_`0$3Wi<0WNW)iG|8T2bo?FbR7?E7GK5+!)H|Teu~fNtv&Gi^g+(_ zj(m5c@$PrLlYG56%CZ;t&*v@ia%SwU@DWzLn2cYY;Duf0f&Lzbh6D^9Q7X2D0P>5* zx8cJm<%AYog6_n8&O6PAv12@OgWSlRd;^;NlK)Bi3j(u>}8B zj8mQq9tj?bUvR)qo`e`>YQUaEUJfN&bOPcRv6jAg41C-M{jUTcR~Yzs5n~j*k^YIU z--mCEwvG0NF(c5P)*k!ikJQ~Wuk7F~j!MQ>!vY{0#kZon}H|d3}eGTqM zLYrR(Ppz~g4V#0M)!uXc3*HOtnvwT=z`a`TyTGj>&=k>E$3s)3{5#N3ldj0OH3vhR zpQCI%YibN@DvmWJ&s+Zy+WbSF*BEPRxv{2djY1Jc?ML`F&t<0dqWok=iEIMn(rvmdw z`f@6TV{atZMr4^1WSMSaZ>nr7xKDl{lm9w!r4uR2}_uO#J~^h|C{Ajt{~AbVlfn zI%zk6-e{~(WQ1gmPq?@{f;VDK30>UmQPwVjhZeqg4EZ^hpd&oa+{*fwSQ_D_TUj%z z!_XfY-~E#B)asbo@~!!uNuz|9K1Cg(uhiB&Htx7IJ%;U0J%(+O#IP+#KVikYTlai_ z_h|N}CGg-L;Hu@EAqGn|a?Z5jYn)O$NXuho&07^fpDKP0;*TNUxtWh5i6=DtJXB}c zAEO%BR5K@TnG@s>ay|{1537j1?kI8oM~GP(ZN#>WH|A!Up$|sJ=$9AEZOo12#X1QM zIZ7OV3UMyZh?pp+^vVwy`~qBL(4ox;*8MLxY^IMDP`4jrMa$@8Wo!_?Vt?&Ep;^RE5n8uP5(ob)Gtn zhyB@69bb0=`^el{W?p{v4r33!1E0x74{3H@!O@=`2sQTcBG6Ei_7^NKe-oXYM*9`` zB;f^`;T7irTjBj2#12{WWmPCCAFc)W+`JckOyH5QUL`PqO;`nI$1|TM-_r#jhCkC% zjqf-W+MLEakAaub?8WDr+wKeCW9r?7SJYt}&gEGGOf_B$-bi>Y;fu8K_DH__a5>3u zAIYJL6JfIN&18jGt=705^gC6n(%DyPNErDYjYuMCJ{cqUTH$;Jzoi*G~SCgUh zBMr|)(bY`Wb$ZRv;yzuCQT}PlpQfuZ$_pQ2#dp(hdLaMijrjU@81;USdbK=d*c<2J zZy`2N@wJHNXQAaoE;80ak=oKxt27j_4n%f)YCp2n0=@sKIoSA5`FwzPcvrXW5qX3+LF4`scRc{7O`Jtg7XqjZuM#V zG0)l}e4!*>unoM>e2Ii-!WYWRp5SFq@C=Hsv-J*Aa;{(v1(5F~o(&pYcK;gW9cXc% zPZR45eb6qAmyzeJZFMO&H~PAlafohF_V>-alfKKCL{E70b>gSj-!H-RN$?tLb_;vS z7WR@Y@NizkHh8XnOf~}#w`lZGx2IMFbU!Ayqv))DOt(YJJ_KIJuEJ)qPifeEd2~Of zdCW8NMn6BMt#jOceoV(Q;8Cl92|hONDkbR7j>uEYb8DVr8?^FV@)RHZ*~!Mr^x0YX zxoKx2u+IdQIa&R36szQRk{reAc}l}*BS-OQ{F%JugK`%LY?a{aaY~E#O6*GHr_Vyy zA-)6NEA{x_W@NfGtc4>IL&RK~^OtJiq^Em$ZaMGW*4=S8Pi;zfEy}>(&^m^qhVU5N1NdQh@WYtF zdbWP&KF6_(JUh_36xrci*X4L)>{dUBD(1(`A7T1IBq7h9sZHUHOuwD8!TXYS8Ug@y)^3WrJP@?j<*n6I&H}A1NzzSMce|70`JHHUZ9b>z4y!fo`k% z3-z65tJTWjzp2}Jgx{5Wu~`e9HFTb( zV#~SgO5K*Tc9^p1Y<8RZ+PK1Yn|o z6EwXEn(l$7t3#BWg|v4jKJ8rnBc3wmxk;n>I-j=hvk2aUxfD9~05CtzhsL4j4{EpC zOpNmPQ2sRD!zeFvB)Yyp?DctV_7`4B$c z3!l-(KIFB^>Bu~$oGx|~Yq`uv!}7f3i8aefp4c{v9IpFXDZXRzhOgD+2wyAWr~Vkt z*9smaGT+I@LplxpO7%0osP)4~M%MqFQU4xo?TY?i*U3L6IRh3LzFhFR9rwEOzW5Yh zF2)pMJniV>k=-{5kLuxH^W~!d{d~E^?=p%nS3COc6lF}ei~jtP_RY2Bd?dWKwjzi0 zF1r~Vxd~jk5&e&mV}qQEBF}w>uHhcOHS5|+yt2p-kMS(J4~vdpnjMJTtk`dYZ81x7 z1Gd!M9~<(o;DXA&+-B%(ME*7B*s5DGd3Q#xlmJ^Sxs8O7IVkRo3o%*5&&Yxyw5CX!ga?GG<3~8Pe4Ijb^-=jMvL}0~Q~e z{|z3r#Tc)Z2OYzBO&?SLlkh}^`05aIDt3Vb{C4Ek1jUMG6W z^nxHbwMlZ1n7J;GZXnkMzxG1pK=OaAxf=RN{*R)O?2)0?JnyKU+<|03jgR2mYmd-H zi*L?3@gK%_Z5IB+L;PnG`pd|;0_>MLMPC#Dakl|eEB~?m6L1JP3JjZj;N`^r(&wwB z%TzZ3>(h9V#riko@(k-i1eU|0k|xBoKf&+X5RW57L zHXOm%I&3(8#&61Db@1pOY&Z#;T~X(yM==g2?YdO!0jttNJC3s{<=TU@x z-h<8q|GnUCs?t(qoXJ;g=sb!zo6nplAM+G#oE}5Zu^c@I{9Vx0bG-Yd=o~l`hB19a zd*^~c4&3#jz9%j@kc*a7?wO zTxrFxkzWzNaxC~IdbCijgP1KRyl5OeX*|3s=U*)241hvM!(sd(M1~QcGLccdsbTp= zCAUcm@kd+XhmYcu)PtNOapQ@|G?J%U%BMafGEME(9}jl561NMbj-46H^RG0JYUfOw9E}HOX3^tjIuM(G%!C#GWR9W!ft$eC6+jC5N9CRY~9FKWkV`ErZxQxKWl6wtJr%lUHaW6 zLmgwL4t2O%UY-;;e&tr>mL}WKg@vr2A=E8-zf|6dT_Ou#_8D81MdF9hj1QUMhuE0i z;7F0>9Dz2Aefc)p7u~xF6M@CESA`n!GmICzcAM~o=%h^f$ZL@g#5Yp(o6?S#a&H(m z4)>6-jpI!AYS}~2RW1sAq2pT{d#K0_o#f)%$v!%ZxsqHJaYn9+Huh1cwvXy^#W%sp z{(3_l58thhqBopv@PY79eSG5rz5G_npQbl7$_pMYW**#zJ|LRE$HUQNe{eN2CgK%i zoTk5rjI%0YoK^4(;`5=8Q@1}4Vw~_neRw;sD^xa{K6%ez-#J&EZk^F5tNaq^hz{3B z&Hz`=uR0B`3(o>q$y4wY?QFa=-mjlb4kF|n(_Zld&R$BzUm-`}hi-SEGhw`ib0x3OB637(`IT(Yb}z9ndYcLU zX_hu?$p03tt@~)J^;B%GRu^%p%i(#&hYq@fZ>p{@z1UKZ3msmxdhVp28hjhIdWZ{O zPCojghn4|Bdb_Iy`ZYz~y+!#HC3)d`Xumm@!%u|gp`9~>qm5;Pz@J=Gzh$m9 zS(NrsL%eGE^YVGI?Y zoj!(#&oYLK8G{zzVZp_FJ^>eNm4*+pbiMuadpUoDa+16C2ss6Nml1CTPmo6LPtg;l zgRd>bf*eK{`5HdBvJYPmogmlR!8Ebm*`uEuSvktp@7zda-GlA;G@J2WW)4;HTZ(*k zroQ~x=t~0r0MeH%>B}_Eur&K}@5)`B8#f;EUHZq!uGqfa=xpFy#2f4p4xaQ z``vdB`QQ2c5IO&M^3MNN`gOV=k=@T)_ZK6xXG`9VgT%u##(DImia5TB=)@NuET2{# zgqHCvaV&Y0gh#OChR7tg%v?8C8&VPHDm=qFDzD@0+Q|BNwiUi)7Wq5e&;YaEHj95z z<^b_tMZ|l3pL)%2`t`-KhWqse8>{danb^aN$tNG9=jnz*mzEo`;^`DDKe3dgL=YfC!+viEnHu_x+KN*QH1LZb6YVaF- z;Ws*1V_T}P@@=U-Z`u~ec|D;Ga*i0kef%7DEq;~Q%Z;J!#C9TsY$&w7{|Wo(r~8Db zB#!?A=ofV~#}iL=9cRA456iyi!Iov(fCCnpPV(i59O#KxHhISxdt{8Iti|ptv6EJu z^GGfL&Ida}ULWB%0t=L-*0uCi<~znR-+|ivJTvay3onQ*1YEPv{lpjIf))A?q1o6_Q5)J5nb9Vtd+v zykxaKWypOL+f$%=YhD}qyx#;Sx6;lZyWvTA=1JjZCDf9hC$?WyFyGS}&u z^vL1|ZC#81>bjd?ftBeS`wVle;ddwc_zAAy1(Xq-_Hf_qnA;x0W#jD0cl=ZxX36z0YmCC1*n_^xT_TN7qL3w+Q;|K-b| z2X)W`9iKaZx6HkJ5931L8|cEuJ3y`iGnfBieBqLnf?(31g5a+%AqN+6Qy$5slU5>U zpG;BF!AxZ?1a`mVeLM74>;hgxkEY_MY0l@_F|U&*6#Ha+2^QTqPz-@<(tKln4A8L&O){UB!s^f(41-^U>54?^dv%lsV+ zquIIs4jIA3tK5=5UkC2+9|-%tGO@eOn^r*cp3xcJVS z_{O1T?{zrY&+)e{SAyr0!{o$-{2&E4$+;d!CTnY=rE{TvzJf{}foGK#cWGgZo0v^a zZhcm#CtiF_O#E|O_I)e8@Sw|;Tp8lKEV#y*fdk@Ln?;`+d3LbZKBixM)d*vr#71TI zwG){^c)Ar4`X|}j#a^ZBpLBcG-OQjRX3(eVf42pz zOF75%&O+h^`@_4<0`D{R@m%eIXY1n|Mj!uG7{?2Y8OQ%G82=y&j6eG;Fuv2#a&+JD zh9%_1PyE~!Jb9H8405)?smqn%2bw%AJ{{+2=MW5j<76XyM6fQ7{Ns!7L~7%-l=PN70F^Jaaxyo)$U3#OVqO|5(Xh(Ft9PU*#et3;8qHHv+8V zPS)@Va&WUgN<-CymWIaLf}EBS3~oqomf?4)pvwg+Yp#)p-B>O`9v*g$N0lCu{p9)P06jCMxotWG@z4^(St0iv_{mVQ4 zJ)S$Sn0h25Klq^dNO5+soJl-?o#bmwf^LoRmzOIC<(!Wbq4>O>4e3KkyjwQKS8r0)IL0#B0z0HF>>)?UX&iHF0S(5$?!NAiQ6%kqQWu?4{%tq;lm5>hFhNF}@GPGhf4P!3TMMqMSYcrDESW&SMThD^uv7 z#F}?jEw8gyZ3zC-!C5eBL+4?;uk&{DOT1+Bb%s=OF{|Xxa5j8?Qf=s(E6+LG z!yz{21blVJYv*1_euB^NkN^H4`VuQJBhLo(Uw8n?At>@sAT}S|#a>cNYz29N zWOAKNy(QGUa9m;Vdd`Afbx}#l5O~#%wjpu%&-rf89$Q>@D}HU4Q;*cOhjURjJE z)=n|Dr6>7!F6Z6fey6`nyVJM8csGdu$MC@#NxQ0|;H#k|>erlYPaC<^T>YcjX>OW~++5B3Ec>7y0DN(SL4*->&** zVT$G364kD>1GgarR9YYs30Z zL3eWWCGt$5%MO6=Hk+&E{>i=CIa@NvJrj|ES)&!uD|4QIo-Q^oe6B{?XU}k`3-h*< ztINQ{9%6ctnWo^U5>%MKXA{wL&m?~u_<8gta{Q#Id0)QfYPo5el2byTq|fFUyZF}H zxBQc#vU$Y4&S#&@0EVBDPqIdEkA6Z&`s{TcPfcH(bsM>ks`00icu>*3$=Cvg&IXBd z3H-(B3rw_WG_CpK^Y3cE+}8UM`hlb1`Z3n#DQLq|@_`R1JCbL%>sojg=!*1V41Ea9 zEokX1FKB`OE-V1fofGr5^Nq&MV_oqMzJ6rqTzkuA@c-7{&}NwjJ36>_`qXige%8%` z&!GpD75k*n4=bF8U(R{9%tIS;s5u9A)`Uh&$+LYX?8 zdgMFxQqFzoDr}hn9<0AaDU*3N=dqI9h>GM7GUh$N`7g4D?btBq1M7C~uez|e@e|Gu z=wVMf0bMM{<{My)&ReqTv+_lcsCN4pcO3i9M1Ju$g?))U3+zi1`NcbxmTZ0kejuc}UtR-@&}ddCda51c9YN%kKcz0*485{(;5b=obAB^~)Y|n7tuWVC5LRv>IN` zjLR1qcdl{gD``_?4$+MXO%Ym>GL9UTcX1}SA-6d*MJBIVr^(~=`9aA)O{_*6w6MJz zKW%*U|Ndum5cDTRUt}*hoa+jbvwl;6eaX$g=ppuDtG4PP7RY@RJkHbP2bbT zYq0MC23_>C$TqG%09;dI!8gehW6*(y1ahNz@6qh}&P?SNv4wZR%lqMfvscxW`mu-m z@yl2dkvrVayg`feB<4=`jxirs_WXwVJ5fE#cl3EfKeyPvbPIE5#^VJT_iqiI(RoI$ zgrDxy^H{hY=k?>|&o}0-$l=oVrY+2&7aqPCoN!_*4?KIiOg_emEW`ZlWxVa2X=szV z+&42%)`vEq&!dz3d}dyi%qM*e(Z6=;>6wU6IB~FnM)pAdWj-CEM{+$Qx#8<)*p%*C zaxFxgPuV9IVM{#tnX=a0Tif9c0`MJDkF7{4b8!~Vm{{fg|0Is?kLcfGWq#?SYRt(` z;g5KKumann&>(b-_?8Bb1S<}@iU&sE;bJg#uKf^&^ESKv_TqrLkD;dzX8 zCu?phefl%!|CsB}iWhT%ubkm!EuT$!X=fkLuVzgD$-UrviZZs_Mcrfgrs5mW2XIIB zqrK3Eeb5H>9C!#b*^&(CXndC*1K54KfpI%3X@Qac^Z>$zCE9Ipe{z03Bx8aPWgPn~{_KtXE z>M`^HouQ&U--Sw!(018tM86aFEth_#BKP3=qDMJ9ab1S8=}YE9U@rTIJTDU2WFh+j zWA9#zrY+uOz?3(FE*=IBy<$o`0dTb&% zDL4*XWpBBSys66h%9K;gsdqkW1H2GkqHm8{UUPNNbxP14uQZeovL6(j*&ypg_Ic)B z*2#UJDr;{Qdj|rga32ME-p^dUqPNf_^ zaa#UUbNvfFnGPO_-x7PHezwr}!n{zC#doPxXu;+A8ZLX$#f^a$XgnKo7kfDUo$pr=64y%R1-z=gL~= znaa0vZeHMWkvWt6b>N#ugQ!=dLEyXaPF>qM+ePS*d?Pf7@_&R*n0@FXC+V%&W93`< zj(uADPNQ9X1HaYK$u~2kE&Mpzs{+$xEW5ys_TJcB$?IgDlUVvLb$=Q0+-5wN^CPXi zL(kl6d|x;J@EC2G{DV#>IpgCxS8ySA@($Ko`k;ayd44bVkvzluf9lUOv?uBOf|q~U z7Zv~?>;AHj`Hi-}_`r{k$wxSczWh7aa%S;Qgf~HUJb)Zn!JaxFoo^iPgx0k)SDZtr z?L9)XU*fx?KMUocKTUj+avjjHQ@jt<#%Mfc-e1XUz&Cq=bvw_dFcyujb01*6$@zoG zyt|IvB7A2Iaf*W)4poSs;aj?F+~f*gNgML+$7%RINc*fS8KbmcFYmNH^=rBeGg+HpH$EeT@}7q z&P{Dz0=_^?MYg$XB(lvV$lmfzu$pfg*UjM@_qc*!CFdE&EqYqw*56J+?p#ls0f&rJ z>fxM8^l3-_B0fcf8s;+|b4|R zsz#U1_}WDidK)XylkQDszZ{<*`~}}*cg_AZr6^C@?Y>y&9TaS+N3}gI(f1lhO0G-FXnGFHZ23f|uNg9zv^m>o(}uk$Q0vgzlQUCXwAar4SPP7VFShbNGXL>1 z|Llz^3g?n?FYCIUJ=2V%b8z46xVa}q8Lzz$(Z08Kj;8Z2{?CW{Vph|Tt95_-B!6go z&Kb|c6rHm5e2Y!Y-N(?2W6&G%w>SCeIC$@Dc<&N>(NyQZs9z8mEpRZVefkLx@I6-RMVg2B46i3o9}3boKbR~pZw)PSu3AW*XPvPOh1~@Z9F?3It7go zoq`*AB!F*^8+pWyJW>?XlPk6fQ>T#4y7VB22GG}f@DB=Z807P$@a-h7X~R4EN3XNl z2aEpe0!`NW0(x^J`Wz>?eBcMy__`fK{aub>{x2uMk3nPGue;8-4}884z37mo_bqAS z42bo}1Pj^6N@A3Q`S>;?cdf~%UHO+fBES6x@1&2`ed+}8mVL@wo#T7Wk<=q}#ao-S zckk2ps7-|~Ew@^Jyvx_T-TwZzDZP!aId1LQohfJ0-P$92OXWrVZORGcJ+H=J zvieS7O5XFfbY;pzdy%W)ZLI!-O1g5I@6BeT-5taw??HF4V^ZC^|3FsyGuJ}Lor9J4 zQTdm$qSKvd8}hdJPEEvjs=yXse>KsvV)YF$H`tnoyay5MkW+1B=xEpZPfVZ_8-W~*JJ7<{qicW)dv=5y6gf(E| z?BRCkVIl2_e+SRK*l9-FYt|eMeR(KwIWi`A6$G!mS16mx9izK{%N&ujvrP2q_DSf| z_c-!|o+c{e>eo$DHm`<%oDBZU9vLur$Tkc9AA|1~Tc?Tt;)CB0&p5k9!?VyN-)ps_ zbUbq;{L6U888>~rQ1b={_D5Z}lL?;5{rW=t`?r3Z^H3eLr-`iHTwB_+K4JE>AAWeM zaSQElq3!0{TY6GCJK5_f?Fpdg)%YM}+ zWwV^w`2+N)v5Z4Sr+}O^WgK|d&Nw=d$wij<5$`JCmlKt-ZwpSb_pd2;BzDi9ymp;- zrgJ|0k2dZE|I5RbwWE>gB+g#urw#g)xgs26Aak<^ediwZoiaCVhMsE=`jR$T|IEwl zmU-EOo@5U>F4~N|MIRY@l1O`8Aj0Okd#^q(d(e-NucblkHAT5=*AX{Tk6z$ynVT() zJ3MCz{@-%H9vUh9t2}>$`^b4qibE58v>LyS4E`t3&M)aNeyO`c$c>S4 z&?oMd+}*#^a(63t*Ob0MS7FPx)L@z8mv`FJRwFVK8EUUQ7@Nn&jtV{N+e^>8D* z=z1k3&kN0TLVLvjD-QjZ*z_Rq!8aVY_q16FGLp*DVwp;tOF{lQZ`L^|jwCxoZ-!L+R%# zALkvpmhxubx>(z*q)y?%CFgLU1N>Fta~0&{tFY-UQ-Ud{$RVd7Z$m$%j{i#;c`xMx z^*k@EnQoQ|l+WzbeRjg*c3pn6uM9iMV&wZRg{;%S?|$j%2f>@UeA93~!UHtJh_{T*)haI1)CBECiPbap% z2l=<+=0wdkQQvKBXO$N@J; zgPXDzlQo-gqKTJ_=S=f~gB6a}>8aqRAN<08f*)F=1g)Ew@+z!mEr|8??_`P6UfIs|Te zW3TZ|0#D`I#IH+@ds&MI?~=36`>(aNAM{^q-LqXmbl=)~Q~CcYdJow%pC@O}c={#v z1QwvTtyFyRixr>H0FhT^Ehg~3y%0Vfzq%=050pRG%&9N=LnNO@4}AGg6Hd1U;o$)Jn z6<@aT(2mRAIaK$`{zGD4Yvuin4CP=g|6cppsO7RVu-!T0y0a&*SSR1)C9GIC!;#Rv zm2aNa+7_Lvopl7`0cmZ}1?*GX7cqfoMi-(u@ z_@6BA!S;Tz276^WGQoQO`8IDx3ug^+eVBEk#~&*VKH6@zE4iEaCIAoKWLLYl!ebtR zW^A^X_jIsUMNchsOK>R_eOjL`h-cZqI@##LU#)`k23?Mt>C@=D)ZM}uWh@e3O+QC| z$iIvWTo~26tOR%!1GggJcO7$atq*u=`G4g6%*b;y&78SHhrLrCGJF8f?>S040$b6m zH-8(L(*Kmv%E1ogsAgc7f480k;00-yezLbssV-3tR`OorD5v7zA>WV2ZhLe*G+*@Y zH=U=&YtM%NKAZw?fQ(_{)5q=bTeI=KRFuuaZ>u%RNH_b*V))9ngMqgPJGH$vulWZ( zx#}R>Eh*n2#|H3bk14GH9o7V>~OM+#r5~`F%MgqW!V`+xK6(vUJ6;jSmn9Rf1h?>CTu{=o{loGC8x< zZE0WNVD_PMj-E3l{|>z`(ua&Y^k>)Hq4&jZ^x<0m-!uB)HDPcmItlRFfI+Zo_3B+K zE_wOTiZ6)Cy7Yh^lQqfJlf82Ft}bG@vcLPzp`9@Yh~bLcNx%H`YqJ$D0s7{=O>eiO zEIc>c%k=)eRi^h%=Ea1I&*)Pd>qVOve0b&=^P*x$s-#bKD>oj>x`FsH+7|jY4_r0d zHQ|v-oxa)OxyUcouNRc+Z9ZM9_a}*RFId{IqkVyATdCg1t+(jq19KJMhFkP!FB<>b z7@vtx%Zz?!ny>_)gx;F?RCUS9(yIGk(dSd}>6MkGC97X~AljTt`(Eny-K4j-{zknI zGM^i6)cgIk@l59PzbX4Wes>yuP|ZF>;8fK+;M9RIPBD*X#3`A-y6g2mET0vgzXh}O zXWudYW&TV! zAP(HOx^%^jVH_yAY*p#b11}GV18vkq*wL(4*o!z1@yn3k;^| z&tj(O&+Lr-zol);-*cAstA?y9E%_o0gPjM?zDHzD)!WO@(P6Lc5yo{)j{eTXr_bnz zoWUmhaNtsIg7XOtKK`5b>P6ExJzBWVq%+j9;n~7^Rkx@m0b4jq3rATAF7hL zW^6|F8wFqEce20x2iiM#Ow{4DK3ngT%vEi+-q)v%f1&?o|Na~o!Ba%6*%dFZEX{r; zOnd#rCPbsXCX7_-F3!~3TR$O;v$e+ci$A&N`@-v(`0_j8)ME6Z$lQZ2g4Yc1gF*k0 zAMUDp`Q<}#tKU164V<(%?=)B5PJJJU%0~$O%DPC0LE433ymeft_bJABX6}VDSB>;T z)}2SjaG^ei{EYAz3Nws(Hl966IazmtZ=SQjF`ISY_3n#0?K#69X2LN*oposjuBC?i zzreV@CROj>e4`J8`M=lbL%`&N;3t-c;n=cM*4e5s&a1YB(|ALTkD+bf1qL3SA8xz$ zeEr$e=j&|?>@MUd<1aeP_`9I}aqpe`_=~95H(qah`M7W&78uvx8K?I_@MtDwW&9J2 zJ`A*v`z~F*%lFF5hkUO@=L5~T?;NAo-I1*S%Np9Atk3;h#xq&t!he3oztd>fYvONC zf1Y^f#+AEPAd~Oh_zryWr#k-L;_89+@9KK`l|xlS-W!l7Ht|9B!@AKr9Ofm4;Zc~V z_wkxUy?+JtNpMo|d7m?4ZAZu}CGeA77CKz@yUPcZS2TParMES2q~4amrO>#(W~Ba3 z=0o6e4?ps8X|!*kbuF?&iNNK+JBO;?GvLCyHu$^K*0soXhmC!4$8ZBThK1MXf?;|e zo;IGP(+9!NYxy-Bed(K{y#8}kbs03~ix&@-fE!g`7&K;{tEcLVRlEE*GDj~DEVoFX zgl1F@(c4dp55p%TUVnB?yxz|zjXufzEQ^FsR)4x@_Nhwf-pVg@ycyXi*9{0CH+B1* zdVBe*1y@wPU*A#ncP~&*){e~89g%&BpuuKe{On0x@1A|G1SW~p+csDq|MD2U&A@DU z@ff`yFUIJ7vs3OD{A8Y{(hqHp2kY;oyv-=LZ;<|7jB!7O@-KiBGRLwGCm7!r8TIE6 z3b&bIe0!1cFKx;mDBq1W#xA;F)#b_?G}iU#VSIS>JZ7Uc|By%iD!TM<>HZ-vJ=o_T zG7tY;(cAPcD@ONHgzn`!@|Wp47j3@>j_60qL|2gZVkj)z?O$D5Wy&bLBv-lkBWY<}WYDEZ)D6mm(~&?DIy zYxoRr(Z?kkIfAtu4Cfddn8^4%hJCUL`(zWgNHgEQ7kyZ?d>rk_?UDI7{)2vr4!H?i zq#5Ho{15%}@pNQ}UNFJ`^ZQQEDTM!)p7)l$vM>LuQGN~OBXbIwDv1*!J|JZdXZMZ7-gLfl%Z#zgq7C>sr4%cRJiu1XXyY8ou~jyneT1#{BmBWS zu{n|-prs2Ri6Z7D{@~-g<@$PXuF00%-OP6-X-dOF(YxTEB0BVmL)H3nd?C_krz;tM zGF#lj`S88DOEY_`mzJ~SkDP98WZlvHrf?< zx{0%~w%@iS+`hzCHPLU$RV}eq)^Z+8xy+&Aa@Kwp4OMb3qo3CAyw-e2mim`d|3Gu& z&4@lX?dhk_&5PiP(29UTcS6ve5OgPm4@AK5fiTZJ9c#pj;Dd5d=uYH0fNPApLK@xC z&on*w?x{w>-N^M>v+xY-6Fd>y0C|I#sC+wBdWaY_qkBkdMn z(fQDUUUD4Q3>LpSe1mzWf^&&HZ>PNYJlf730 z;D1b2w{l+EU?tZ|7uq;az@!TSY`P|0NIt_HdGRTVx>s`cQY2hI{rKy{|6N91R{Wnd z0RFdDof-c{-hcfC_L&I67e91L5UY;WeH!#%$#^j=XcKQRFmp?YW>8-IVjz-l_46JLZJ>#WdEr z@F=~@Zigqh4c_2Zc!V-|h1vKZmHOZvG<#j-{`U7j>HC}HdHf-DpV61s5%<2yV+d~+ zbl4k&&+y!8@)=`$QpJxaqh@-hitk~(5=>KEEjPaxY80O25rclY*>^iO=y^47Hs-IA zcP4)!@0V4T`2vS^9wUu8{1E%8oUyzDzN6USFSapeC;lLkd!zHXYiXxV%}u$LSQqTt zUXA9ZT3r&vetPXArn(*MslC_k&a%y06X)8nE6(1EJt?j$o-@$0Z1wAgD4W)(wjnDt{hR~8 zUrTu@CuPj{MZA~%mVG>@0qdic;%hU0R3xlt(vOrhcp&hrh_m6WW66Y{=o<_ENG9Hk z{~NaIMX8+6PzOKv2KY13I$&K*iI!W-h5Qnk+i5rWsO_T}oH=Ind+!=#{y*T4%CkwM zdmClG2Wm^!T(q`ipN(wmO`|RAnoDNQnRwUCH|@2*yISzM6JPHv@VRq=qhShs^c4KE z1^3(U8h=;nWTn@)#+6qY3;q$`(0YSXmc-c`$EE+q_W{0_^^lcuy>@2dd~h_t`z(BE zvlozC zIy`MA?PR4TJwF4!TH@$9)4jWh@$FEQ+;+w!HggAk{_{t~STdJAcJ*!R^AC6~br3sP zznk}+$f8q$!_)*F4i#4bhh(L<^51D^q7ocKUAHSMOC?{%Db@www#ZqH^K-F}Cb)v< z&B=N*bD}Z6iAu|3g0JSe>&Ezs1z*oFKE-wV{5Xf&ZkfRx2z<45O8iXLV9y&Va<(bY zqP27R0PS=_e`F4%zSeKhZ+I5+o!n9`bOGK%a)i7pYsYC@WW%RJ;xHr*(QEg-5v`rk z1GJMp5!yjpA~%(gmtx{{Wt7k!=B*5$@LdyWBXn3&y$N^8#h+T{aaTc_mT$a-Y$M z`Lt1v9X^%4fxfpC9|%q9qVMJG3$5Mw*UTYaoE*Pd&@S+_C10)uuP=J@dDZsqC4wjG zDObz75&SHgspLeP%NN{dz)jlPT1flGT2CkDz5iN2^L%ORWx_QYzarmev0kp8lX=$l z^7xthB=}EWU_0Xycuh3+xB1AAmGDObGudYa7S4;tJfF3^e8~*tV)nx_pT;ANztYEE znsu+p+|GsDfHOH|EcENlV-oyM01s1ud;6E6vd*hqLEo35O(}WGqBh#dW*;s|Ya3T? zORkcANcP~DhzYCzA~eO%{<07mWk2y0tB^al81uW*q8q3(>Q^ueba>HUe3$;D)^7A))=9#OGOQ z{M--m$)v7x`PLcn?e)gDcJ>;5J?U##+6owJSNb6BX2$hzcRJsGH?q%B*1ddN?Ck&T zWqkWwN*lFW2+K@hptYw9x zlkZm=eX`c;;JMHw7cdsuW&P&VAI<&sy!sVU-XF2NZ_wU5`+dLP@_syRN7ny|<$Xz% z_kXp#|1e7ZA6VYs#`mI6H2JgnPt=soeK*vYnE}5((x&Bz3Z#(hNy?-s4LH=TmFsPz zdK*)rWhqKh_g3Wm-?28bp(mZpH9YLX^ozj_=#TJ2@$e5tY4Oje!V_h|6FK3Dln9>-w*#AMP6ch7eGz)|9CH(>rH;Ym zR-pYh>YYkoD$tR5FHqpa)cRyiPm-+ZNnRr_yZGGddXi-SfARgBaqw+?W1dk`!!x(= z>B=T-+fKjR;M?U|cy!6rpw1&71UabspNFBxr8@7`b;@l|#LP}aU-=+%QReO_I%W7b z%~o&^dlF@Z_m375qHM;6|bS!+Jauo3QK|08|N6?BRD3! zy%&AS7Ia%#;ETT?v%a1+XPy`GGC2(Ed`i$~&#s?HUKG`-jPx>h-eKAGJMc%*^Bd~t znP0NwRHN{cvZhq0vtHd={-)r9h6nf)n0O$%*=Tr>i9K6#Uws{8dof~cKE}0zahd0d znddtDBF1pC?Nr~I5Ij6gJtCW$c+_zqRQ5e!X2tO$&PwYW8?hti*hDU}mTTk81+$#Q zmYC(l{?o7A@^CqcLp95ZK71h9xXA|=ZQh&VStDb*zHP#0031^B^Nm)QOJt;o_^?@L zsmuEn@Nt?^m*AqT0rC57w9LnP^4I)KXx)Y|t$P7jrM`nK%CptP<(o7l|4^uG4P|97 z$%Y09{Zg~ypNCh{_CMM4Wv@_e{r8F|K2X*Q{o4l3y`26C-;oSoV%-}aqKwc*r!Beu zP0B?-S5@)nD6YIj>~Lj+mCbH=CW-s$L~jDGJ8<4*;1xCRGJTDdm-o{T%^qgSTYP|L z>l6Qil3=WxxE2=C@oz+=S+1v?w1R4ebB;Ggo2v-pRM(w=6#DQm5~| zjfW~P!Yrb1uFRk9S^M+Rr`B!4IzvO`KyY*^*&#YHg?yA4+@$S=}nHREoz-SFtbku~ciKVu2`8B1*8{z;BT>7#;v zU!UvM8rFIp<%Lev4O3bwwROfCbxPhT6`gWEI^}Iz9BbN=l-V+msfw}|y=c(GoJskK zR4%uk1M+6lKSDaHN!T%y9PK9Wv;FO9$$oN$tC4_wVAovBKT>$@x;cF zo^>M!D_yegnIPSqG}us3YR*hj6aufza|bUyyrZl`W-bRgAsoJz{y* zLkF0P?p9#y9O10*qD)+jtw;8Qt-Q-2cco|4==x6J(FHswuulFb+lxF)BZV9^8*M*Y z_mWKs&T_h1USh5nf*0h27`YI8*lxi!J9A_0|Ki6^CJz)Bca-&U+*luTnol+gO@qz^ z6K+VYm;Hl0tgogrhi>F`J1}=c?~s=_$@6B$Cba58o(o>1d%eXoN@=N?uQaT`M`;lI z=w|Xg3*SGsLTT7cY`w(1>amrh{lD3*tevqiXIe@~@r`yU!6mYXv93gi>tS8VSVUg< z-g-UXa$ldo>p^3SezP82k^ZC@_tKv<+SK>AnXJJ==yw5jop3zdKyZ-tAimN?F-p#H z^0Xf#PkT^1^U~pe@HUrki{0M6mG=G!o||;}2lOAFYswb;*zOMA9qo$^9HY_gYI)CE z*h1`}_8u6TV-mXlh`g6Q0(!QExU_k|`H#S8eZ0h^>9qfT$}|%*(C_(D}Q$X>G(xVsi)Q{#`OWthNmi%^nZ{f4m(67qnb?cDVyH(|% zYp$B)S@&CVOu9pl?e=osN5z+q?RM`%?xhZu`p1n>>&K56U!Ovb5Er(zOx|~ro8pdy zvGvV^w_cqZv;1l`U#FMSw&a-kvpwPMqR`A|$@$y;UG}UuMyd5<9J)V{oN1@8H}Y%K zP8;%p2RdC&PC4?Qlx0(Yr9G}b4j#JPmeft+OMfAFC^6CjzCBJHtXXEKJzQoR_%3B# zF~UD)!stn&~dNk5x9h_g^8C4*$$7^=yiLPc-m^zc zc_ckQYa21H<>VyT0qo`x-zoidAphoryN|HfEMdNz2kY%N$La0X$UW`W$UW^IrCzgc zJ9SskZsP3R`pwkWOblzy;N@4#`v)z3=;1n*KDcQ^wI#*7S$kGL%ARuKS8I*DoV~7(Prr6ZZ{v=^$}Lqqi&va+-ja#+!{|pjJa8&|Ej)8SKDwRt(!?GlJoHL< zGvT4#@X+zL@VCN4Yu^X;y;`M?-$9GyJJq&wo#@v?4hOJx0$bv&m&N0wXor8*eG>z- z7x%>l&v*(u5pA9;-YOo{VgUGWh-M|_6Cd&LD$xs?nhANXf z@t5N~`WCs)8pSo=$aS$?C+XLfa*fSFp4Z8>umknnZWgB z`gN9EU#VXg%k@BycpH}>*V?g{n{tjPwLk@IkVZVMt$8<-=7uVX6~P~ zkFMA0a-zS%Qm4}ut}`FJ(D&fc{wL{AWc`0ydNNt$-L>{7er@7U<`_7<>7RjX zZRWj!Z(j4>z&Ve3Z{VHVyf<*K$h1$AqS4K~po? zAG~(z)^y_FGx7ilg+gO6$ zM!tzx;^MMqO{~9^{UF6QbYVMkn(POcu@<(lPo;7%?Kj)*U@xd!_XqZZO53n_&MWQv z?hA4}%eRUlw?)f?1?=>E5$p*k?u`Y8DlqK7H;67u_J`yDDr&v$Ay>=vi+dYG!xi$ZkZTB8wtcel z|9E@%_^9hE|Nrxu2{4mzPeLG|2_Q8I-UUJw%On9!0&0oTdQSk`PKd4bE+Qr&+D-_z z4PMfMTR?l6#Ja5&DAV07H`@l;+7-HWx7{rP)bE6}RRJLri1T~CJ{K~?fVz+U?H}`) zna}0@KJW8B=e*DDecq8Q!@QSysnGd_vOA)Cje+;5(6 zqNV?Ho1HqTvg1aK>9y?3vE-$O$`C;o(KM|ocP4qEpmliFX4 zx1>>LHJHcv$g3x>CQo^9__hk7)IN+}V-_6So1Dv@0BY!)_j-u8uK+%^QO|rwy}C|4 zS=y5iDU6NLzbzj=j&Q^HEnB_gse?6s=7wVPP0J3W)|>SIaKmj|)V6XobS_;0{qpO- zw`wj~wPvFUutc%xqQ!%{(|W7+x-cywPJRb4;WKGY5{|*a0dOF>E*!kWIPXbv<_frF z8&)~v4>A7!@M`Rzr#*uhf)$@&UKGm-C&cv57R%ATk2A$9!t}k_iBt8@ziy{?jsM<# zGaKXa8HRah_hsNfe(uD$K=TfCT*h_DjTYZg2VV8@#Ver|!wMRS!R3sTO)q~(dfQu-tmoP<;_xf6 z+y%2sJK+w^K{LiF~501;B>iygF09 zY-C0TF$1rE9{%RlOQyWO2Dy4~r*|dt*tZhfB365!QP9fxa>$R$BIl`?=Y7bKJaTL* z`G0T>dXREY_zokNUj;TdCn_no@X`9|)Iv&tHJkuzC~sg`!+FH8wHE{Y=^T?tLzzI!I% z;j#LfVy%mcFMME~TQxwRn!^yX^8)&fB;X?NQ}mldza4{&e2ccpE8Bh^^Gh2`CmFfI zjf>X|+R6amE)QTI<3l~Yzu}*vzoGQUT9U7}mOA*qm39*Gb@I;Bd2OIM?D+_|hZ{3Q z17*CwBsmZ(!4HhR6f-6o%{jgJ(Rz#b`l`zDNSHxOgK50AqBjAOx@W~d3Pda+Xosmy2b@-&3y05}fFSMb!fX6VK zUSxiM2(RNyb?Dn?@x{hUYRF$arvO^J_c$?};8n)-=g>={Q|A=04oqu@=B1ysQ=8LZ zpMJ*?WGebdXsS_hAMFPn+|+`bM41#FI`H{G+2o9cux}FAttTAZxV-rwyeWH@Gu1|D!#5~~5EaY4;i~K+GRStQ5r(~VR6Ghhf%-ogH1|xP9*@cc{#h`?X zC#V_!f8ciQF({5<`z`a?s@*e zqZ_grM>aJF$IrFTdwWs2`nJ4Ufvlg)daHFM+J$d~d;4GXHm}W5{N2<1kABYjQO+$f zhwv5jL0{EZjGYmkS(1P3MYCBsb#7aelIzNModwU*4?gWVOnDBTYpuM2REx$Mx1DSe ztvn=JNiABMbt!p{ne5TK5WR;S&a5fO2l7@E>A>kEj~w7&dw%{FoeO5sjXmcF zo(1lm%&*oC)~=rE@`)!kU(Q^&Kw~d3-j31eK0J32U-{#^Pqv_&+`CYDCu2A>XTRCJ z9$xhpEb zj`YRH=wEa92zttc?Dbnpe^V@)OKJ}DzS{ZSxDzcg+BrcxkM?V4z0=MT+F3>$OKIa# z+IWG4H{I}0A3muy`b=GU*FJQ7-VY;lHKwpPeT(|r03IH{#-8JU@%6R{ zK9}b3{iH=F_vtK)*z-L{^uV>B^Q`Fc1kaqtM^6PJ%CkM~TBG!uF>U`eiE|$uS?iad zp7|HqThBtZ1Z50Zy!A9KS?}^H8GbpaUyG?;^KFL zlVS8rUShfWNNlImYn;+=RMS%WjdYFZ!`P(A2mgiWz}Q4yY`Lsl&cvWk$%u*2apE`( zWW)~`12Kj29$4&1X1Uri_PYmbHGpkn)tX330d^WXA)xzK`HeYgjCGSGXu z;KIjyS1|Sg?*q>XhX;hSTj7^d=w+PKm)g(e-7D@0CJ*#}4*7<&!DSTNMQcl#Z{f5W z8VW)~vehKZHAlT;Pqg&xEiO>)*q_peem|Ci9_1yLoMJ>0>3W+(kH6V%JhPOx9-}R- zXQIjedPwB<)A>d?bNQnKSi~P2X{V!!xqcBpv2(`DXy$c&Lj`AejP2r?isAeZd+j}U zx!=F<=3MZs^9W7+r;68MOPw`$asx8Uf-KPU#qu zi{siBkZ3KX9!_Gv z_OZ#xIX%^mDYyb317B;rq}p#c!UE&)2p6=1X#+SDUd5QV5=Vn}2A^CZ0 z7?Z05WSRFJ?i`LBXKba!L3FO558m|fn{$Xy!Fh?s$p7!VV~l-eMqeAt4+m+(@8Czd zfA07u={^2`hxZck@g?Eoczn9_kDNi~4aZq$qi>iq{jm}GoDDTy^h@5!0_oXPmk++7 z?AobKqQmyfq*ozduz|wIhs?P7IQ2%0(Z!Gl`;T#U<>bD;4*-wHg4<~RI{p8YaZd&p zoI~>F(d)?#TIQjaEqVezANl!+#l_;68wHBVsdk(EYEgLV6#c_nqW`0>vj$;**?C7r z_%z9{+G$w6y3Wawub#Ze#HZlFU+2YNm&ASioy!K(m)RUI9`fc9C$B)~zbhWkne7qz zmb8YpKXIQE;~Cq16TSn{)d*-&=Sy#kBVTx~)$4te73!2z$ZzVn9xpZ` z&`3GgBRS~pQTgn2P0@jUwb5Nd4GPw266x9hv{?^U~P7XE_{cQx%X3J7a3~%aMJLVrQs_}o4EnFI?fNoGQgd3NWu*j z3BIxl%UAY8aQ6`Fpz`+4^!|{uy#K7;$HpL@S}vH66$c`=EoaA)8*e*Zeubc;t=>`H z;fB#$EPb7EZ1s-r*4o#Rh3@05Yl|~Zv`DW1a_8uo@q%r@R7sb0eUvCWdN^09u z&RsphnIGBzVC3se$sDgy@JsNc^Ra$3gfp!)hs;w4_JVL0q&;)c^O?0?vxdD<6;_+4 z+8c*1ncDV19N&Ox%uwHZsg+yh=xZD3{~+yN3odl;U%9uQzE(N?Xk758WrO(SpZzv- z&2!PGlM4LzvrazC+2UNkP~%+8Ihf8ka~Ws<=R(Y79`BY;+_i@9J;@Wphx3xQ9iGh| zS#-XNc!RgVe~+`cNZX?GP4+pyj4jZ@dOrKP zHk&>-Lks)l%eWJ|fsa}_M`9*x`}feDqLblYc)Mt^Q85AkN+Z|8JIiO5vVK~&6t-dK z`RE+vqD9|E?`|mHBH7SJ8;P-v zI^CEi9@!79uc1px&xyXp_`reuX06z^hrVt57NUF)U^Bs(N%sA>( zikCUh-i5IfyZBLH)Obp;1vLlgG?q+@&g`3qFtYeJnj_KL)YG3UDLDN(+4cj%$h`dM zEHF+3#$#%mK5|OQT}zmg3jCJh^)22C>L=NJok)LEc()tcJBI9X<&b=;);uvTcW$NM z`X1|>BjpWT;fz0+@Z1relPw$laUT!)*t1EVdYa`s{1D$e`=(qS&%E{Y^#NWWUV?3H z#eF`$mfdw0JREN4{9N>aW6-?jL+fC;A>EpjUCc?xkiPY0clwEz9pF1?`(ChhjT!3O zcRTx-kWp{RPyb}yDL;L*u+L9F0$M_^IStS6LW|FV|1ID;at2(#4tz0i-3_k)i@x1{ zeAM-E;{iVW`ukScXB1`g%qQ`{zcLP2mWQXE#sjY>^1uP^wVsK-;DO<1!RzzDeT+5U zqK^a~*u@$e!HyqruAo^0nrGBIzUj=B&Vx+ktMDJ#|9OTug=D1U>^CGw9GMp-);y5R z!`FF=R~yOW{WfswtR%_PPGHL+=g&k=I_=bi&_iSkcCxmbtT8SR{TaLr#5Z3A{;=j5 zn50)}?GNv)n0Y6#xjb{OcqT#imXc@mRl$KR^$0ZP(>jQ5?_(YGX&s!=ou9=zn8iA{ zk-2i)8A>~e1K~D=iAuw znYE)@YYb~rcAl{@o3Tapj3XC>bLj)#xje&r+c>{&ig*Bi30>RAFD2}ubm_Q#K_4A| zD*-2it+AXg?;GEbSDY_c8%~FHx;8_ zhWDx_8=HE-C2`=>@Nl=YLB??SWY2je4Jd%o%D z>ytf8E4_mXD%W}) zbNW8EF`V_tdR4IRR${@N^%y$enD(awJln6YZR{y7=@&z~!s$!>P&diO5xD#{cu`FY z7q?yfpTvLJXZ_d2&~M||Ug_Poos~W_J{da;Tpu{7S}J{ZRu<>VRITy{W+IMe@2ofVbNxoqj2%U01X`mIYfHnP5Tm+&qB)lT%j#Iqrb z^?!h~*}lcNeGZ>x!DnOOv(3noEeWzD3qH$&&wLJ_jro*(w#*s#W?;;6`0S4*I#ad6 zI#ab1S)9un?ytWr;=4At>0^90fmrQ8e1^R2AFHi$`cgj^!e{?~$&Ra?eiCKJ<}dWg zj$N#?XB$)Ws)1!k_EjI39aEjYBs&ztmmVR1#JTuWEIk5#?Hs~2a^>W!C>xBfQ(Unn zTs&%v`~a;xZ=5Nc-1P;>PM0qP|A6Il`6V%ZRZ`SmCB=HCMY^JY}n`|;+a$mzVf4Vff* z5AiL&ls_wxFTMId0AIR#a~S-(xGv#$Cwf97u?W@D^n>4t%*O`C;6C3lI|G8^-q8wgDJJwvHLoU80Ua_9J%)TYQ@f%$KZ4dIx zp#vNmpG{V>=YGKXz(>}9KECG6$7hwr1GYcFeBb1>e>Pcs-x=on?g8feM_W#x?*RMQ zG~YhHt@&ooPMdH2A7H+<4zFd6c$u1W7XqXHxA5P%tFHWF{>v{6&wr+U@4lZHy}rPZ zW?zR9^S^EM9%F53XP>+BLP}}hKgh7+!Tx^x++|;pHy4X$5KonDj;_%$WZP9necdd} zR}DWGVGk+qGKtS!=;)I1o1yVvT4S$U9p!vjou?bZ7STD)MU1=0IpfvGxtr+v);Vbg z^2|NwRdG6s9aCbx<wmUt|9~*0JN^5+v z&K^B|teX?Z`eI6J$882?`RQW~oMo(r?q!j0caD)dTYlN8^IE&;f8r~g>HP|4GUDT$ zkTCxB-e+bJBk%4zHx#|XdAD`y1FbJH=cnF9KUof4N_Po+Q@gchY;H*1(!uo&#ojl6 zG~1Ssx|eu;y{_N??=#zV=4{r^0v~ljqMXZFqVL+B1tEPG&nalfXVLLZe7o1oDcJLi zlPz&{{5bl3M=ko=#heL_ULL*3;U{FqY5b)B1MpLgrPEe)-zvP)cZtrMy$wBI|AE1$ z|Mv%C<@ygS?aq0#*Xchng*b1v1OD5U*4t9bHO@&rwGM`uJMpEi|8MZ+W_Zly%SLc3 zzLd|RKVPnBUJNpqZfIEivT@qYVfSi|@4wAb*&JiV3Rv45Ukqwv?- zm#dc#4kTx<-7XJL0BWLx# za&ad!ua(5>`pee0c}KE!`}<$FWUGh&pINp#V{~Qfo~$#E5xb^ujE-#m*cctzI?x#J zOdO*lU)?bt;@&`Ge2MFyBvXI8qi@`wRi@^^H{#`iWa@&?4=hvX>H24ssmnK?E>i=X z-z}NybI$Q@_>?kLYnJF>kcF2%nQH2Pf=ulNCy6pOrvC{t^#RTXKFT%O1Htj+A-MYK z&e1bp?Z$6L9DN;e^p5;>nc;?;?fTPC-g_z+W;;4F`2gg>TuqL^Tyh2GkTXzB?m!Xs zrwdy7>+^N;UnQk1n3SCJ3ZF9bP&-%aK`5$n;>zwaw=X;y;oZzwa^^Ihe9$BR9j&EaLdNsN0C;~xwjhL9`4ervDxy(UqA!Asp14?2>4UNiNYBI$+^ zX~~&m)%Dl@A3L@)rfua7%n!BPR5M#=RXohScI7XyhhjbUgz8-5yQ=uJa*H^#Bk|p3 zpY$$!IFti19@-fX{MMch=&g!99bDh&(_X4kmR*Y<%HG!zWM2pSA*_8JiFLePc<=m# zKHcY(cZY1*^Rr*XTZ}9tUt>bYvCj+9dd8j!)s5A@h(hvZ$pPA$=i^n|p+@SG+@^q4c zXAOCl7#*xTw-Ws`%#}?1 zq4TWt!`GX_Rj*p|e7W`uvOg19dXV^p zpZMK>?7!1R>FF94eQp`tY33k$L6qZ7gKA^u9GoPy)D;H&CL$98sy7pyuOpg zPj?w`ewW<1|FCKTss6F{N$=ZcJVX9ns}CLwdwgqy$wutia@8eD z>(W>iL-{(-Y+8ixfm%ztz7QEwW$mldxoCE-;;}F8wCa*|@@#(?$>p|SQaulMZ}v?| zftYAX?W#?DLlxmWw-tznTJr+r^*?(muYVskl6JDzu0YmSp%cyq-+_9`d$g9^|H(%47$dD|8uo(n3**$3 z(thS3vO29ksoD46#JoM-Ge&WbHK}=wWjl;%YXlGNp2D9Woou#JMqm(Bkc>{O$>i4E`xeg*RFf&==0GcYSu41t=Fyn0 z4)!SZ;ojFqJ;~k4)DUv%qYU~eqpn)va?jf2g~ppva;Yop%?}#LODI{sYc2JI_7X#E z_74gimhDJR?;1ZfctkTfclg#CZyWcRd?rrtU3chLM)M?o&!e6BA2}^eNYl>H*>}T+q=E@*@Cv7_Es}WcYJxzoL^qoD?U@xIQ z#i#ZYjufp~wAJ@cV&CFb@rm0%>s3W4*@h*k-?XcKbq%=@30+1`-DU7hzc%$>cE&qH zske59XA=8$`zxUjzjR>YU|PRRs0pTgf8_%v)zGQ3l|GNj*1Kq= za!OKb`af^Wh`uooEFb!6J5B`uvrV%GTviBv;r$RW)bP8O-(CFH9;=leV}_o~<+&jD zqO3Y>MMs@+Bwu))LbW%+n#%EUeoL(^WN&q8rS5akLFBiUxRKlLuah2?Y{mrH~zNE z!Bv=ftQOqp#9`V&M~iJ`UOS!qb-}VJ9&Zs%xVrI+tTmF0eeF?`(rK&l3~k}RCN`hi z_Cmk5iosh4_JDz0z63t8>8P*Xo-6mxbUg<;SP9%IPHz8fVl|EA_MaL*`VHr8jy}`- zXTtS+!%t+ycdBmd$8+yp9tqAeMq6hdX-?6z;FXOQe<%kpVIR3_p%0`hhMZdHi=6U^!%6T#$PG#?i=BmMHK3c#Y(#6C_EP6aBb~-xRkoKuK?GX?kD0Raopg3+Jnxx z$2jBuqciUQw!-(Eal3kTBXhR|-TEc)>DEGD;^@}4f7I5i2ZHI)D7xb1v`8=FXkCdrfZc?loQbbt}!Zni}(&OzVF?d%O#Kyc&DF6J9^cxgNyP z(mH_cD{9}dl4#fOZ{;;Ogxu!N$<4?rN% z`A(j-ZBF~%Bis|rM;S*a^z#6+rrLwdK^N9KGLX*W>n@Cy;E(ssEh^x9nlG&==G%{M zGPcOlCzFRE|A?RInpH0l*pvN%7-ubPEaM%%r%V3JC*?ay-T#l`8`Xy1cXQ2kPr9YP znS(YzeYT=|=JNh7YGxX}g)!aJcYy2q?H>mm={5}6)Goo!-juuFc+*<1z*}DZ9o>mA z=ozaG@Tu?kP7n2X2BY&2LHGAz15g`q8THy4Jk8zC-kzh*-X7Iewbtd^OO)F+%6Gdj z;;-U2V@MW0sJX~G|Ma2FPwiRq=go)PzWV3GuW$a_Vc+uPwHjk7@B|n6B9CI<#}5+c zq~@>edd;)`#}}K_FRv?C?H39bMM@ItzLX@Kegv z9&*(u0IKkM|N&oIx+~?lH zPS8dQ6VN#hoU70rjhn3>YHt;I$IrHOD>7#*^-#jj znWdXQr5yj#zu0noD{@@ph6nRxtN)ZeyX%q5l0~w2W+R`ACdn=^EPH1KbAPl_x*IYF zTO|$rC{`eTN8jAz1Fkr0jd*_q*JbOZTeePm+jaCAmaX%m(aQTVy|49Mw$7ip2R|1? zD+2|G&JVPvpch*@6a5qte^uV}B*kDC8_iRP_qH6t=DC5o`J4$kVBP#w@(pWgul82f zN%9S$*=Y|P;92CH>|Dj%HaW7se_e)i9o*C=)XmT0nsD%0^9Qo<9T~sQ_*mWi^)o81 zy7@jBJLRPM&E_VTE44+AqwAR1IISr z^PvOy$8pV?JEJ)pA5-*2Yy^0vqhb53?1rXUgMq`H+o29F+fT-)*>=h}CoeXw4Ilk8 zS}z}V@NPQK29L$3F*oHKxqdhJx&L5K%Rs&e)rUTgkHxKPm8Lb%jOj~7Bbyg48ri&P#mMHxJ4Q5r*)y`a>V=Wbny+W5k&p$Rs%Yy$+Ny&F zgexEVf&6d}eq_cT_IP3s4fe!pM|;>GKPRF%&oYx3($Z9Lkd5?UDYFF>M zFe+vvpW)lkpq@YV%qaVr!RS7T&%8ssAA`rp`r|U(~>&B#*hc#NXq8N_huPguEKiP6r*RvioTGM<+EGv!AB}OD`CLhCO z4|ImJ?=`*cyTo6fWu97?|L2NEHh-6S3PA^r4jsrp*$DnuLI+KpH(QrL3yt7qooL}g zv$YXgSl5phLeN4SJu-{+>JQ*}FTan%ds@#{tQgtc0A19*FcMxCE;HCOJErYj=wm4K z(f_@T%&+FB9eDNq6yprz&$Vi(K^O2cepA&o?yD8tUHO{j3q4a!c-4S+YrZA&hHotN zMlFgBj6Jw$Zmjx+GUSQBYZ7v=YEfQu^>uTtScyrj`syzULu|AL>gYC$7&z@6l%=p%Ii`gnBFNOUpP@&9=H?d-vR z5L$VlXk_z4o{^`?4AF`!Gin&efN}#`Ig{K#KZ)&o+-Q9oTKOKok2tjA$`Hwq{pg5q z1DAN}UB+9_9_s$@y=u`)YTH#7t)#Yj7;lO9bXqC#idOo38txp)1~^-tK#U=k!rYW8FRgu;TJwM9-rd}rf8#rc|N8j+wNLdHJss(t z-!#eVYpONPt~%DKN+Y=oUAbZd`kd%TXU|^wOXHbVa3Oz?Vskg0pS*S~H0SY-**%ZA zg4^dZ`mFVi>+;|yPBzD-v|+y~udxGNQoiBRDdDxjxR0~rd{$m!1^7@tViobuYW(NY zd5kCPXWfr4bs2Tz3-Q0!D(*mi`V|Q^*stcjB4|{ww)TT{B(UlogFfNQSgQkb_Qd41 zwho%nHiP#y79TK%fl<$CZM|u-f9(adtKYeC|JqEizsc{-+)aI%r>|set-}M6Mc}H^ z@LRRh6LEAnVz;TfulPMGgfHRB2c9~~KgzB3uT7@C0O#hFvA!fbW6fP?yrOa1@Qk&>QCmJhp*QhnV*jCHl6jW{za?SE;K{k5^zz) z9M<#u9e%40g3eA;T%e;N>1CbU{2@7;b(TGn-u4%Ch-WR^jdSG`Q%2^kVDDZgva|u4 z^ME%quWBS~<{H*oa)`BuESo*-PtTA}Gl)JXwEg98=%c^o|5PIu&H7D2@B;EBxBj{y z%y%e{cqIB|{C*?S1w0+IjacVYcxFnVwJp1@{Lm54(&&%VTRRrBpN{qp;%}17`G3DY z(GtC!HTisE`u7JSyFO=3zyF0H&7C}R@!O2n4Yz&G>aU1rBy;ER{SI>S%X}Yrm-CP~U&J4<{nCoRR*|n! z2M(;U4AA$)guXrW9WM5~sXVju_{`f%jXv+qGfO5I`Hjrc2JaWUbFSiSzeezZza{Un zIeX_#DJ;uFC#a$J@UR6_n|=a~tV5oN?>PgiT<6nN@tgB#M%9?ZQuN&2ob5BT`tI(( zd55>@+5N;m^se-sVWT)Rkk3|p`}NMg>PsGow`7ks@=yHKh^=KTuL4I$%C@Vb-!z+z z557H%TJfrPr@XkrrB_C~yMkKR^*PXLYww#17dZ6tMJRd%2zS%)d zlKs~Hugtc|JhT5-jJx5MnI#_$$yW}P=tXPac5BExc;~7V0TL-+=NgFw4rZtWpa??~l^-+9AK6uy7k2nEdKG~0*+Ir+ge9ljM{>MJ- z&jFA2lF5f11wWzz#p3^^3t3V7T7hi5&X>N>+QmKPZ~NdqD=r5Tl~x8Ml5{TBbfVDospeBm!*o1Z_opynso=D;5Z{*JYdFMM5}FMKCD zN@8wyvHlO>3-18OS;qLT05nxD*@cYSk9^wxnk}1hu=gaJs{cDaO@7uO_3Wa|rTgAO z=1gZck*k}xo*eSy$>%+iZ`Hot7@UOPlvtwA?CcJ6ws>?a_3{QIM=N&} zg!y)BZS5-h`ln!?`Jl;z#Ib+(&|26cDzKq{sjO7z-y${-P ze1awT1jCbTUt0U`ZC{#vf~oifZ}@$jysdQjbxd1Jl4`J}raVMjLDpQ^Zqhqt59#|s ztKS_3(HQ++kNnbqjYt2B1F?VLzdJ7ftiEwov9J9V?rA+xU)RLs@0eQ<+#R334}8XV zddVyF;~PoFN0NfCWGFt9Vfbi=qDAkm2q#rxo*wz^NrYBV~ve>Rv6FJ@$DlQ!~F2^!^qo;D%RK+ z4u8~=k%XKZ9w-Q=_^2sif)m9FGun2R!1KsPtu3=TE3brclq4G)OE_G&^J3;RArDTv zLy+tK(!kPObIfJ_Qe){gz?JCxOoThkSU;_wb1HMyzc%>Fm+ac$6XE%vkyg= z>qZ+9+0w~3UPE3>vK6zZ?s41H-pZM^oJD!pxUxqVA|t6yWa*J@f3KXW^ZTZn&#tND z{v7U4AJxPfO`MbXM*6iz?0(u?c%ABCn)6n$Z%p`k0Dn*&wqVJnIW->W|DaJ;vC%NeQoo4$C)%XC9 z1wQv&!{kd&&C8w4NmT0<^MQ}6LUYsZ$A>y(4mluZSCt?82zYawiwe-Wr{&^<%cB-& z=TvX23I9w=D%?_Odb&#LSFFkCE!@j_2jx1;N9&^Opt?F6u;^wrq#hS`-_d{cP_&=)bGh=^Y@aFRs5cK^rZEUd6n9- z#uKEv{Q_q2G1irnd0C@!lS*E&1Wh=5NrJ+I)iVIB}*8IsZXc3Zw93%~md!DKX_JOWJR#gP#E4`$6~xK?i{rD8z^uD1BDAda6u@xExi+wtl5 z-;>{e|L)g;PqZFqU9-+aV_tsWcCw}RM`oD7md zbH=l_K}6vGtc=>x=%O*9v!>IseDwoH4*SpJkk?pCA~Cr!@;s z#lffI>)PF8#n_q!w|HtQv^Co3 zHi~Y7d|P@LzBFRE1v!pgazHidz;|^|QH1wuvN=oVxtT`3-YJE-pC^gIbSXZrHK0y~fgY@R^%W z%=~@oyx{L|w)tosd?dfUf7skp_E>+LNHXf1_-l_MS06}0ma?`o(A>vUbtl!Tb;925PiMa=LtS|j@4<2>ksXw=G9;yAk>0ah(9Wkm|-~#-k zFPXb@9=m#PVw@$I-51L!c|X3%or{T_!Q;-a=5_Pu(SiES?d#r;&#ipo@~>NK5^Ij@ z6R*W5t~qTB7|+}VJlYFZ4&0WH;4|*I%|b7#>KgO0n3-|MomR~3D6&38%#7GTQz>+( zy?&}u<1^4-uqFNU=Nsy4vi63(ScvRJ|7NY*|Kq}frdITCXe17O=pB4A8#QMkzLkse zY4z{mqIm^TWVeeC9HGIzEk5v}>%otRMg65HQo{NnoV^FmIuxUW zmeLE2X7Xub!ojw|8Mk*_Z1ie9x*I)3G^QFo8qb&ymhY|qMxa~oKT9oxKTa|}sK;NV zXV!7XVHLb4KlD8MlwJ{Tu}L9b0OpS5Lu_b+&)U^ zBbPq%OLLmyj4wdF)tgsQ7X|*2EYR~h=d_MmP+{71<;Qm&*`f8U_qwhco&a+sO}^L*m9p@E2h zsd2e~D7rK7MBxLiV_Mf{8<{mRavnFq2N%=7{C4B`JPX~Qz%S5)Z%XtlT)jbj!qUx< zFa9rG(>mMhxAgBQ&m^vS(!aG{7<}ix=dO9*JGJJGW6hKP9qYG#jbr_KgtpD=(PO=? zEk!sKJI#37gvsb%z|;er7t3{HwxB2h*zNr;oQZ4q8{Mk%vQ!@?b-Pj&uUDM z5HHzc7@OETX4U1_|C?z~u@=rTw$6mI;*{K%{Hj*mWjMa~WMY-XUd9qf>t;Ny;8yzv zE@lsozK?FycWT?8*oRa_o07@OrP@WELCGA+Cggy1t@S)~?@2|m680<3NHX$e^P$_= zGHa9_*W^4z`Z4lUD}6uhAC8}-=Il83!Ewem@AlwtMaEh(U-u_5ZwI;mHh4M}XLM-N zdJf&oeI|xKOV7Q-dwTXi57Cvn!kJruX3BK?8B5WRszGv~NmBV1-f7Do$ z6X;3@nZQKh9I+j z$ZkI}JQ-Qe*`3RVvX2oz$-m8skhe8WadoZ5{=14|SKu=!*?H$oY`z*3J^lvoo!vJz z+_@#>*pA8Aj(O*N{naIdzmxCVB$>7J@HTWzjp4m_Pqz4O@5@{F=LA}_U#YX|BUtb6 zAKaXL)ink4ocx`X4Y#!}#CPm_W#t;}ov267zX09+2e4NWXAI%XK#nX3?MaH%L1P`z zt3^BKC@0B2SJ&PDBDv~bd+i$I8Sy!Jx)!hXE}PG|=P~{=@K6dqN?5}dCQYpM}=e>;+Y~ccdDMa^Km;`B71POk;0Z)=kaSQ+9)8i$d>LeTBNm6O7SX_g9W7=IkEq z4`^>5a?oEpXK(J5o6GJ72Tz&)WdDK-n=a!1K67kW6*fgKJhl%%x$KaOkqsE)DHobU zlUwnV53jE7?&K^#Ju5%C-c>G4Cv}8#Ij2MUSX0px{t7=oMVnjICbs!jzwydS*5p+a z;h~Af2NPM3+OY3#HVxt==oi33yu`}I+zXxgN2TC*Pa!wg*A?JAaI~`J?n`XE}_pF#+gEX`%uRD)Twa}OTHdh|L%-4TVr#^DO*P8 z*S>6yNO_rY{v+eOuKH`;I=|K(r`{dPI1f!$zI}4dj-&Ak;tZn>QG0ys(p;;4-w%P? zN1Ljxdp)q1@rh&qL0=VJ@Z%EVQ!g1KXRL(Y%78<(BzzTslU(wuW}A;6R=)mu_+eC^ zncUaBE^;4A*&Et~46IdK?^$OrmVfoqKmjp~(ff!`-Q^GTRIxT~HP5;4Z@jn4G|GR% zI9C0ANPZ3dTmVdZSN_*~fNdZ8ll2bcUX09|jGSwvkI<^ZZq|kmJkS93VfGB-^X_c< zKU^Ld+jbwds$8D6{5}8j^;ae5^nT+|#-R6sC48tQKZh}eJ2Pe|rz+SP--zC(T-S_7 zaA2Lg?k8>oeTu%_zKc0aGH8s9dtunMc`5L2&QymtUCfmz+~x#>ewh2Fg`ej(XMtqabU$(aom-eay1Ei4_!VKwnJU zFEZR5_~EA)`Y zdS8(OucCE6CY2$gB24u zUe#jb(XHDGj`40-`Z@1J`6dQGLw+&k zfd=)Aaz{Bgvjcf>7yr4RAN*WlE;@D13V660+L(4rPIpjw5XjVEvJrWQ+&G~ zn^S9WJ9#|Xk6F*!(Yx###&|Vjoy*!ghx07$oKO#Y0T(it!i{9WPhUaTquu`p?f2jN z>EgvdYi`UJn2Y>{SLsjjaOG~fI)BK~nG322As`AudCA&8Kt=qTk z^4!r~ZRHv!MgFkj&hC9H&h2*Jo$9>%KJRW@aR>L#>8^A2DAp9|+YgfKmN*{ecnmE6 z9QxF_*drjkWA9Xq0e~6f}N*ZY)$}&z(AzyJMj7Qz1w(#w75p?-KG8Qw^NN6^Wa5eq~oJxQLHVu7N2 zAJrk?3MBXaU`&Kkxy)JdG96}`FbkLdPZQ-1B5MVyh) zN<6TQ>$&O4O~#N z%h)i)TDx@*owxkrhm^CIVd>ub|D{__|KG`dTVB=A>S$OoOE8T^k2+GD)EXb+$g-)N zH<^OI4easw;8x2ASLTSK46RilbfF!;ClG)Mc|5gLjMWETj9TZVui@i^}xHC z{cYi6Mda@o&EaF7m{p5gvL&C|CG@X7GSV-n@h!VOyA*mP_7ugJXQJPFkd1k_a<*A& zz_Qmn$ldS#8P7~+?}VNm!n5Q1p3UOf?iVyp&gDuo@>_f6M*g{{i2Wf~N7km|6Ga!n zXEV)5+l|1OYd-Mv+1L<@B|l$`9aL!N(CNILTJMCe4&o4+vvJIsd}+GA&O5#2ubwwajgB^K@ zJf91e&)K_bN@-aQ_RHPq+}AC*py~ItG20x`Wng0r1CC2*KQ4QUSm##L_~3(h!|D!m zLC?#hzO`l)`@wC=<}$}Saz+|3 zA4Q)C4F^ZyE0h13;3+Eq$RPB2^v?iu@UG^TYftbUf^N_D?4O-ym-4M~f0lm!)p>Wx zr|4(9^K2#Gq7lviHtd@a`1V0RS>P-Hujw2R;cdJ5)S6%M+m%soAyZsAb!-Udv!6v) z?SNlDjjTFKPHcy}jybZ*&L_6>+P7d6um(CgA9n8JuZpz~^jc)?d}QuCWN#TVxD;PN zNh>nh(goK~N^PD5uO>swrRmyp>g(!kS(jPKcM0>;NX_02i&C4t_^Vg^6udLv)P&9W zr1y5AOS$ho;Jo)6aHRLs%;eqfy{~dFjI8#VX-!YlZvXpVO1Qt7_oaV#SDK8+!??Vx zAxZdlO!(i<^;LZ;YYkyM8PJ>$+B+`!gBNOR@JMC-G2bT}IVHbYjtfj;ZFKx{98!O85 z*=ujbnvr4MH=k(PHR*<2@L407++K$6aZ7Hu^)7l?`x=8i3K>o9R~s?k1fyB7hLP)j zzP-TJ3A`C*a#M5#a22u65qpdigN&E6*Kjz#3D4s{4|`;%*y9SuEPM8_c#1VUf=wge zWMm?_C5wlrQF|}7jkv-yY9rAmP%gD=7ql07ELzyUf^+eZRq}mGS50CLBzJc)ZpDY? zN3&$AvrfA{vO`9nk4$H>Nw3mA7=33u-`W#?wsYA^RzI;Pr}XECL+9-}T(Y|3uzYH_ znf~2N;>AyQu{X1Zy_qkuH*+_8GjCs>vUcSV`8lCgWU1_!cY%8-xf+eUqnzYio|zh} z-77!ClKPcvrk1YW({x_z;d!fHJ3O_tgR`|#x=Jq%tr6Xqq^Q2jAZt&D_H)G0NxJcK zq!U-UVtM*n$*C%E(d7w5EsI{NtU&$qvsP_h-ntHbb^fNST34ah zu0yY#kA7T-{u?mY&BCYq=Bfjnndco!K4?l;A%2qwZVI$6`5bZSWbFY?>)MWNW-Wx? zwU1@sdUo0?p7T@1iBj8?2Ttsx;#K%krnB`aI=G1TF8gCo%bEDN((qb&2JXIH&8hkec}M2w8mYTyFsE8O zcfyC_8DlVKs{SK&$&?cWUuWUZteS!kAj#IQuEn$r(Coi^exN2bPdzk3?(tn*Ch+QGc}z_k*_)uTXm3InF!mqA>-5oO!JpE$%QJ5GQE;jFvG!K{ zjkel`r8EtlY`nQ{zxlx={5W0c-pV&9xjv`K7?!sC2y(#o$&YJ$m${hTV0_?3w+edF zcW2cbAG}CAD~~55j{>T|hPDIkk z8UD0la;@QGeDAyZ<>JpnxBcJF#TR{s2eH<}_vr95SdUT5$gKb-)|dj8KfL7w=48d=(B-v`Pi6l=4f_Ym(B%blHhTbu)8;;SQgVn|k$V&~ z_#68BK6KOdob9^~y>y#-ZO^Jvb!!wus4+)adkWll?HB^@1Fv$QEnOblLb9Z5uJMe2 zRiHbEF;p{#66u}55ke+O7U!T-$tH<%J+_^p|3CSFX#D-xFKYzIQmln5E;$ zmnS$9bsgngsGVqU#;NvqvTskdhi5tM=$_W(t>{vsrGM?U&iOly0g zEZ#C(=WF4U>|n2S2K{}=ScCW>sxAM%zvcz_yZWNy*mU^`x$XnkL45aaTf2IHpP#(# zXXUwOa+@Dqsohlx?TS94doYmJbF#!b?7t&`|a*2C1u3L7RUy>2>b1Kz_t}V zMLwvDnD4F3brrvVPoJyI;g+rT6n5@~)nD!YJFzf5t5}%cmCb$8sN|+xKKsyR3&ElA z=4Y(4%`YBK4*YC&3ZJgu?>oGNcvu0(xO%AZ<2^&EXE@s&ddP45th}09 zW~HXz8jm~9YQxx^YgYbTV@&h`#nH`0`=UqFSheOW&{`ESMGx|5A#oAumv{dJpA)$I zN3B`KN$7_u*eOG?SB9bI4~I`iz$YWo^ONuy4@M^&#C}`5CawHN+M^=6(%SF)?bmJ= zt!n>O6x(eAa_zto@)x<+F^e3c5%@c}z8+oppC*lKE-rp-r0yx__JCLPKfW!r0RIEF zt6~s}o3R!bsD4kDF`6v#Scs3n*1Ns{%+~|^b&TO!#xft9 zYaV)bl7$QT;k3_2G->AyXg@8!5a>dE9zc#ZeBOr3-gA@IHW`?dFY*XBv`=dpzURD& zs%2x>(kk$#cAlzH;ej8 z^}Oq!pG$v!t3TO>s<%TN@Gvw|kw+U&ZJ50|$Q;GKzX9R2S-dX{_lJNN!P zzU%a@X1Qw6V^T zhqqcfmE>U!@-VS4$wT$Go?HRxRC-=(-XVTR7Zb;2EjmV@T5pV3(yg4EsC1Qt@pm)+ zxHJBLbH<t0}-onLRo0?2QXJa_?~?j){R08K{81ra|-CmWIDj9KrXdsW03 zv&OrG+y>!nDtRb(Pr1d?F;=2u%tv>)fc>WH%yC`vk4sm`1@E1`@#*g}hEue`__R;8 z%pAsg(D$CbPjy^Z0G&fMXH=(1^xwhQwGT~e{k?6_H}EOv)0b*YJI)-+-=%Az(Z)25 zA@|yxrirQx&Rl8?jl_P?=VQJFg^}FW!dBLx-C6#XT^{BnXK8RtJNa@s_ns01AO zrrcVVN3MPfbd@8zVm>Op!@7tmS-STr8o@5JY2=I0NXS|;R#?-@Jo@SX5btQcKa^sy zSB3SO`C@GHXDsL(VMOq?$1Fb}^T##I$58@~(Nk*b$y-dKl2*|&Xg+pxFy)PxSmrQ$g_)(v8+S|_Dhdg(7KZF0Y1ixt) zxC+p&YBO|TQzo|mNilNyqTRi+IZ4bBIPN>cA+2r9Y&(9Y>wK&;9O{|BO6CpOKwL<^ zm$WtjcqRv0XvY@M#pZPPo~92g?AF+Wj7fQx{zlG>Dhn)aH?#B6bt0_|nOlNMzI@Ky z*c1G+T|+VWWnV&6-cy}@T38lOF?v+!|!fG(@p(<%K;G$r_QPQ*7!_YCIES?V*-vCa;8pvQ>4 zxLEZG>@mv6iqGR2>zSn!M(cc$TAtZ9&DeMXT`9Q4fq%)z;a?~kV2?*N`z@BRw~vIl1oP&@>`dJBoX??r(pWeLGKV7+$r*+bi5rU*)oJnX&YF+S}4_>lWc{8!;&h_N1}- z%17|lejI;qW2Xv#!E$0T$dT&L6~&T?Q)Ezs<%)&W30!QvskVy6&kpWYf562%HU67b z*LFK=b!~Z4WFm8>`Xj=zk9_QUa9pP|Hd5=ZLSMCNhGcvB-|*xt(N8CE`GG6?#h>~mhAxy zCl4B}C(w_efr8+L5dqRvVUO*+dtFFBSjwx`rP zx1fyLHEs>uJn2WAb6YyX-;~oTzfWcxI%fGr&yyd)m^3zxE86&fJFd1*Ij-#!Kijyv z*Pd))-YniV@N@5-%bC!~5+8b*&Q90>j^(pv&Df*=9momf)G0jpR{r1OA^0irP)J=E z!Tf3GceC&Se*~$Sas~4BHuTO3jBN{ZFF%Lc%7s?)pp}~k4Jr_wM^P#(fC9((jDy%~kBJwbmo{DeXrO&>E(D zCDdzh$D-@U^81Yk+R4EOFL*+SN6ZM*erskR;>S+YzK%o0E#@;W*{kvmwf2f>MdQw{iRpPw<|pZyF8<6(x_AUJ6daVb5qt@HVOVs!Cb;P zcA@_*Tr{fL%l+mbVSht2W1U=^G34U7_f~Q*$hz$_$25)P{@H5l#dnT;)Ah@pDE2f< zx5-Iy{ZHe%bF$B}W^&%Tbd3kkH)5gZ9GS+o&>H)<_T5LkNpFU}X7hdMYv^hG&qS|z zbeU=SPXb>X^L%zg-K_Xpa$*j83Tnq2#OmwHJ9iUPrY>}C3Nh+??y-FPxrIh^wbzIR zC$kO%t9(5kKIG(;f8oT5mRj~VJ_hZFmZ2*``=R@6II@+W_mKU2Jurp7;lQfjEA8JM z>~Rb|>O9Zhr_kf}Z}KWDLM!awRp;>gTlVj|&++@1{kwxb$J%SF^{|SwenV~cZv(wC z^pDQ){ku-Te`Eh%uiu-U>vQ@2C+E6;@3DVx<9Fj9^_zN>y8cW1ddJmV|CN2c zj^Cjj_I2YLe!uSgF6H+d&hIjQf8YMyF`wUmv43y-0>2N~zw2(~_q)#XxA6Or{X1K~ z|8D;d@cV(OM(l^RgRFVEjM^^L53|l=ERB8jR2-%LW;>3ewE;RiW8I{OFEY{|M3-7| z5%H391L#=k*4lZTT0Ewao+7*Tm(;v7Pgd>K?`VU!c@lHJ^*w5qASXNDlkbc@U+L@C zh%OHHoM?%1KRC%)dM~z!-dRZd(VeFC-YdK(8aRHXJs!o-^0D`$-z4WC#kQ08>%d~1?NvgM!uq4A7(=K|Kr2M2RDQUZ^e z@38OM!S}Ae_2JvR_S4}z=>qC-fRAT0Sy!p)(vD8<)>Z0Xit5O%2U?LXc}4;ON; zBWv4LRT=C{DMsg`mV)RihuTiaxAOkaPaNR+P2}0?o$yETO>^i!j69Hzwv{>_TmOgk z>|cQf6zke*?Z3BqV&wOr-5zSc6Gy8%Fr>Nu@DSpiX>C7Z>|y+}qWK_omjrV?&n;q} z58`w90rymY>O<)E--k=R{c7_Pn@xE&qS4 zgWw}XyXHu8Siv_=zGn=s`KVVJgyuCDKH7X0xE05pN4r`d#FNSw$z&}58+fT1057k9 z7rcCA<3%_TZiE;74yW)^BK)|x;aL|qbGV+U_xUjpaVdItD{oig8UWr?CPyUtZxnKQ) zWJUFh_8!{<(l7C~SEm}WQv4KtXy_RIY7eCPFXY-SCr`AfzSpzRvgR=joZrfQ_+yXw z(B_Hswj{>>>|cso_cJegu5m9m>kDNCKJ0|()yT}NsDA-{dj^pg!85XJkHcU3ed6yQ zwYV_tK+o+DlVDkxGPiXxu=v;8uX! zf{$FE;uo0f;AG3s+IknXEIg|Q{X66h{Pbe!Ijr|XjLWs&t3NOElZ_slWn^{*z-55< zU0GTETA!@kFj=+y&b%fm*P(ti^GhwH&}ffhEu@Wb7DSTqrgGjrmvC-_W%sbhDzgs< zJDmAT9G4ed2rpgeDDD_{!wXtN=N5UaHFOHk7~~=U;s-YG;Kz*Z@4C~vr*VyVt*0eO z8{%)#OJGad?dv^8Z|KrWi+;lBK3em_Nc!EiXI$4*E)x z)wW{f)>{6C{a>)oec%4Ae&2-_w0^t!j`*F*uYs=CI_-SvJ^R_APCIiyu>bq`FMUWk zH>|q_T6-UuJgD`_BWG^=Jlej5wx6JFy&Hg@PVp7=du6imjN8wzyX|(~;#pglx6ctoXtt_kX{aS`duuo$eDY$I-hr_IC!`dd!)|n*vPZ7>~7& z*Qy2Z+k#5+HQYJ?s$;8KvU-0VbKLjb$7=?7s2NbV$k&`o9il4c3>#{xZ=x|;^C}qU zG9Kwd?znTOJ7b_0e--wE&e__IZ!__kY@VsoI7iyg?Cn8c;s5#}#*8fLW)=Pw z5jE4k&6&zpAB(-s7tmJNQ&-LzHcM5<^FIFT-og%@4~lILoEmfB0QE5V@1DWDbAO)? zVllsEGiUQ1L=TE~#5aba6X7e#fpLo8hVeW6ciVVTd{|&Cn4RvN<4g=?A$WCREqb)B z{4QWszk<(#85m0)7-u>#!tV=+zghI-{qK0iW58@suk#(`fyFn;cHgR7WzkZq{hwSF z>;IFP_Wx@B|Hq5OCzy}*N8%OLj3xI--&j(huZ8Fy(W%7|k7>>mjjW`Oi)h3huVAbJ zKH*aMeqowyz(VUh?*EUyH;<3HycYjIld#NWW8bS`5zRWNh!U+b0hjD*L~X5>%_SiV zg0?nd8<46AnHd_FQl;MlT5U3!8QNaB*VKM5SeJ%mCf1?W@9n+c62N6bGC@8nCIaU7 zKA$C-1W?;=@Av!9?~lAbna}b(=bYy}=Q-y*=Q&5?4fxyvp3A}094nFO&wq60IL$S2 zPd0GBinAlM-6G>PgYlZL)Q>V~@##Twc^G3X>-b96u}OTdjcwHTpQ8^5jGeqMd`cpjyiu`XFliqc(?O*u1iMNFPx>+KgT&sD|@HHzc$)EfsaYP4aZJP3!@94s&h1B z->wZ`B;PrjIWHae6!?^Yr{E%Z$~dQhYbm(a@GK5oZy|5PQ#|hwoS5?~;PsL(#%O1$ zlW$Vi2Ej3TDSU>U;Ow7%r5@r6?uW0)u1c{kykc1_{Q|GmGAS*`rd)9f{xf-f6L#?* zBnK?*&mPWgjtWR_~OjD7K8mFw)myAaR)g>#JBhG4CONZzgc2G z%=cn>&-9(WFcR3r7smByKKtPKvE+9+cJp&md;QS-n&-qf#$FYhM9-@E_>!8K zBmXk!8!M4~Kh&R*{LI6BV>Q@x;tR@&AzvtXTk(m7udq}}tckU1>UjBI=AFQ9!W9F} zG`|-68;8kxd<5G=d|Q_@Zk=t}&EdDQwY~S1yccGCI*$N5_oe=Twm*We(P{%0n}XjF zdxB^1z_@UmrI|g6DS{&>M)?+<2A`Vvro`7YlY703ke$$Bz}G~M$0IzSjh+;yAIY(w zN^buy`jzu868mV)$&brovXt)@{U#MyAs@m{-RHaZ7S=u=o6!{2?W0yY>`fQ zWs%6AZxR=f&pso2INSK^T;*ua<$8*Fl5Ck)CVMnOGr^?`yKpu8T&bn2U+y%XzY0HS z>!*cr#($VgOzk4e_tl1f`L zNrliy>@o3~cL1BnR_`;+6KIee4}P(R@9MRfDuJ)_7E5#g?Uv?Fct&D-4kMd|m%k3r zOoM0o;F-^`E2{_Q>XmWpsex`0Zk1UJM#bO$O`EWk0Z5 zri|~r0-DXuN$W&*Nxpb_H%INi>}hbKwr<)Pdmp>Uf;>73ev6QS9dV9_B`3`?c*McnN~PX;&Pe5QvA@p;#}mv&k=4EM z_D^G#iCf_TO%7RD)A3=XY(%aad1-UV4Q=M7mG|Y_{;2%4CFF<}86`44WlYKyLW>jl z7k)N-Nu&69lPwRZ4O_6ieysDGjHL};v%+gu_GcO<4c2J)(T)ZS?`Uvr7=)uHVGxe! z{u>gM1^4q^*zgfZe%!Oc*?^t)v5_bDt9bJzXf3?iAC)I}B)r*zF7Pz8su3EoM``9A z{j%JF6M?P9fKBp{M&}L`n4bEYFl{to+73+GSVgTFntsOqjmY{D?BDFvW8p%zG1p~n zjv2cDx|y-7O)~di^*n^_PoLV#8T63Is?IsG$D_+Dk-Hu6ezG#P>($lD(%ayb1@Mq- z7`=y#$KOHcg0s-M30VGL()pq?vB=QsUagKx7Pttn^lI{8x<}iS$XE2-yYnqo{aeuOkeNr3 z=d$)ojKuTb*VnVJ(gE;wjO!G6zEoKt@^0r+WM5P}i@#<&rY`XeI;n;~I>dbDz4`we z{IAI~Ltct*8-7C9FGWYTat{1+&UDLog;|?r%yN)f=9uLWlVOfoG@q2PhEHY=iJV$M z-~SALs)`y*$vq(J>Hm64pG#-sCy`lagDnNvL}vXnU|Jdl(^v7=#@qis{@Mv{vi{3i zmQzi-ec~vZcxT5h?2}C+B!@+P7~gaFDP-7I?!AUTRqkOcgx7PgS~+d%4&fs)^)q;O za+%)Fh7{UarEpf38b9R8%NrS2cMJQe3wY;W|8$q;>yh|F@t<`5(V^)h&!dlYE_3uP zQWuwfiBh{rn_0?rB?`JHESg*}KB>$2;Uqm00BPpCR^!-KX!w!kP!A?3P&&^n4@H<3o z)FxoQZ994d@JX!k*`5~}&n1=-pZnU6(IYzOQ)-Er>#FzyuV!oykULuD>gUPip25eO zNc*j`IG@K=X!j{=M-sZ}Ew?6Z>jH+dSxS9Zywdl3Xd*QI7Bm%Kn8dt~gO&%0Bb1za z)fVgOvI#$ZE(3hp#@9XP9EUD*m$Ei>Qtk9*E0ndh#G8GeJmx=NrPK?3r>;_tE5xL1 zTcy0cdDF5VEaSVK*Z}D8#6Yu7Z%Y0_3h>mtcK*u}4;#U*Y=~Db&zXr`O1p>I584QA z?@%1g`-a*3FLi&R`G><7ElU->b1`|y|N5il|I)eMh?zcVjP*%9c3<#4Ol(0vd#?Tc z@r{~Y|CFUFhqlA7AoIS1yj!N*J%@?Q5Z)6z>?rqh=O%4ahTA;c*LdkW=)Sx+o%Str z{o6X={UeO&-T3h~#NPFCw2z_+n8WdmnN92jg_@@5QNYkqJtZ}||JlTONdFP+WfKO0 z|Eu5#gWq(~71OhuJLl>8xU@B$n8r@lB`ubmXByn;7Wf05kMRuyzw|$AR?-0cmjS=@ zDKLb2)(LHd&M}OK@R0D5kGXV|Z@YNDIyGtAa$@evz%SpX^f}N~gmzloH#9m-4Bo?x zvDid0*m{E3B5EsPS1lF&UFs}hhq3=aoh9^qvF+-A!8vVU6d1Je<9khi=i5|Z72X}t z-T8k&cORu(5#3HT?Fc*?|n)%Rjx|U-#yktpM3o@(YKK4{^Sdta4Cl@_% z^#haOBDU8|d^G49TD^1GJ6Xj%iLM8A1UpdjQA!M5+{R0rB~C}NOzYF=0W4woLB89_ zd$N9+bQZhy!=6)H?*oV7?~1&CmHQpcZK(+(-^;z<%RTtUJOlbNzdvT&??_ZGr}leu z|1N{3*zjSo=?oiQ^r%0u9*WI8KXyv~BM8FMH0AD2fD z8aQh5C>{AqOnfJDXZ|JBkYFxfzCz2fq}6GfK9YK^{E2NTXJt4ecYHZ>^!J>Tsh+K5 zRdZ%B%-UKb@$Klz{jsrGy$55mK3{@tO5b<&K4L-1SJ&4PC-N#bs{`3~h`z=k-@@Bp z(`zJ^Ir?q{rkeic2h29L+DuXHo&1XSPEOU^3x7_IQrf$U_GHb`<}@;+USZzGF4pDf zBIM}k#aTaPJo=$0=V+@suAoNYs>SR-kS~xmAD{g0@Kwk)&H-uiROF}Qabg7|@0Zeb zggJUPc`AFm+lZr@LI3laM#y@bz}gQR?g8 z#hx?Hi!>SMMGnTqmCa)AA#=6!A{&hJBH6c+yO{GL8#pggg5Ocjipn7UHK@#ON`&E z_+7&9Ws0@z){ASVPs0D&85@&EOm5n3+^d!R?J+TBvfp3B+|~GQR{XY|v*Wjw@Y})v zv&kD13x7yEEiu-3f$JIVtoC4DB@-6etB`fFWt;MJG#rOnA66{Z<2Gj3Yn zv>lCW3-R=mp;0F_U5Ihea26Z|=R5;vp}|c4UkuJ~!q?JHOH53B4fuZV95{bjjPEAm zn)-43z)Z>nU8rM<>dS&c1@+=X@fvi9e zS-SD?9QNYKeGI(!L%$j|r@8j~*J}GY3bu-V9uZ$UaLY65yvEn4)@G>(TZ?S|sgY0j zAn_d?%)3%>jk8Q{%ga{YUY4yK7u_)LG&wGiLHUvDCj;Z->w=7#*b2vD<2}E=uDWr@ z$Uih(a$(Z8Sr;U28%GX3OM=pO%%#x&(uY~8`xmRfl(vPZuYJl! zzPhy2j~v)cKJ}wrkNlSXm9}_tZdF@sU8NgqpG)SHlocXYyQfC~G<7pjX>s|r`kJ+HdOzM80{*oHnFZ1s2|M2&EfBR&(EnZJt6mOU7gTot%dVuF9FYk@J}gYkpj*o;5IzBMStul za?Ok6!Q|@R`;SE0Rnc&P-^n#o@rKKVv;G&l1SHGkKRhW0y;u zKtE&o0QTe$WbfsnLA$Rc%8smNZ=;&Ji6zucL=Rt(!Z$04v0SCaLF+!zOT%Amm3Y`H z+M0>KwuCsF6l32rWpwWqXFDsDGg?`2J#9tnY0-VPpbLLp8*vO{Ai8h~do!Y!b)uuJ zUYs)EPmjZHTMZs1oMpB(V(TLx#Fu-I`;?aJCd~Ua#Id{rJQ@v&JKRSs0CKKb=p%M^ zPSqfN!ea;R;wSj#T<4txhbxV}iy1$(Z4>?GT(zwJ1`i4jDeMKv{;lAUwqz&{>EwSa z1^(37w7NPOcg}TLZOXV5_6kzQq}4UzlhX0i_kJ@C{7jryjKv0wjj5OO4WBPj^z#iL zPW*YtrkZc|AGBm0=Gl>h`Z*urt%Y+Xt_+)h^r*)0!R##j4-1}+;wMi!GSuL?<-&99 zud{|P-N-wKH|gy>;p^Xe1pROoakwJa4`b{0zeeqXs9gIxe@acGiNsA#@qB=N_aQ#a z89(daDsj5spS`&6FRT&IaMn#~;5HDOL`~c$5B~WJ&XYT;MvaTP#oX|?*i7b5AAU;3d z)!yO#4~+NU;eDagMZj<_er|vl9z4gsWX-UB{O=gNAiQky>hD~7ACf~r`Vv2boC7;N z=K97KlQYiU04zj_z0@HmDwR`U35nXWsYUKWHCkBl{$7;Pm?2 zkx#cFr*1`FEk$nKLSCt2Y7C6h@=6u){n_+6U5@EFvcl+}OLj5#PdOUjA0x1tHB=^f zeox+vv*~DO%5*(fk*53p($N@WiNSt|>3Uw)`IUOivc!8-J&WI*wWQ`kN26ZLU1a+N z&vxLffmcK?m-#I)oFSVxNgl8Sa7+Z(Byb)B?&JVlKaQO7BaD1`7X0aY?wg$Z2IaG? zgDQP=Ag4Rvr?vbq{MySrnt~twGh>eJ*fini9kClY)66`(3Lo|=OP1tf&jR+oGVy7X zS9=a|bioR3A7nb~?dO~`>b;6~UnBn}{lxGW%lp^#oZK4z&noS`#? z+V8yIoHs6+*xT+!<==ABM)y2RqqVBEH_Sdl1G;WFePpV%cW4Rcn`nnVoBOLRjbf{j zC!+BP_k~7gd}{Rt*fr$YVC`v^@BSP)No=_A(UdOnvy1~@!LbA1@o?Ot@j4eS@q)|C zWfu#~@|SuQya?Q#)Qj4&*l|j+?nvsOUQ`3}B%H3pAh73u4cKooV8>q2V1IEK?1$kQ za}G3qYj6&Tj&pkb&G5iY@WPGoL=n7E2>%z5%UaK?=H}b}P0HJ+*IVEba$tO%yfDS6 zNh`c5d?7g(ct@Krlxw+S)+6Nq?}*XP=+%(Ba1!%31sja|p=Z@FI!l0vVVc(g6b#kv2%{C;?$Zr@QMe& z1n^7*UuvbVC;#1R#MYDh?gY9-}+$6wN*95|COYSsOsQZY8rEl z*uO?%|I9eY24eq8SQkneH_<~5Eo6^v3ieBU5PpgC9s)nM>qPXje*}Jsy^DrFoAadTEV6H}`+>pf zlQ{PK!1+6$5CaV@I>G6xY0$&a^GeY3N)8fpgkQ9J6MGNDQW(5l{EB+!H{we*zX{t0 zzo|xtsooaTTtdu;HBMRZ9q=r{pN5|!tD2Y)&DS40vOe4&+j#OK{QWN|Rngzb`udQe zYfUwDt*J+p<6>t~H*-jw+@_xydkW}TDapjY663Rp_foFV|9_tIl_&9KeJ76E!gH`o z@coyvXPpW^nYD#yP+OR|x2$UR$4c-?=WsswA$&kK&b~(XzX3iQ-v272f1&HP&NFEg z{^1~vS|$(D=wG2pHNN@uVYK;2>5v^ohhElb;m0c4R_@+V?BLl+S@WpttJUvNVhJT1 z!QV*!M)5bAKP!JWpK{ZNtF0SG)>|9!FL>GGcZ{yLb|`(e_mtBolQ|ztJaI{^Bda9V zTIU4k1nOahp>Mp!k+oX(R2XBk|ME5|Skmgu_iVhU?Y;1x?7i$4J(qXFv6Ip? zUnlGDiA~6P=C9~=7f-;SNxjt;+4o^z_0SF3S*z>tSHd^_@J$DOYd%WGHvA9xDA#fB zUfrrZ{ZnwEMo8Zb=0kW2{v??b=!zzPOFW1emxue8-Wl%SExiAV zZIJi(iJ3BF9@m>~fxGao ztR*Jy9{x`ohJPn}y8DoMGKVA{NBI14+W2Sjxyj>=^bOt5;Bm=iV~&x`gX@6hU$HOu zV_%BR+`>L?i|pefOAkIeBI_XgxCgOY4`P3}V1HYY^A`x;7;<>N$YHVFi7hqvY^~Va z<{oggJ)MHReJ$U%82hvb*{6+ey9K)c`D@xI^9{PUU~eD9M%j<89Zm06?Cfu0r?$qV zYIbVAVW+l$OG{E}6dz93_;Ax^{X7ussCJg+HEc*ZQ!9G!3ti}l(5i&ZSVqe>n+B*gKq?S=!&o_ao7F_wxJfwZs1VYbXBOPjwj+T|4n*?ulLY_pT43 z-_7hDe@318eWiE&(`qMv37x7YlB4RstHxx-S?E;xe?X^>o-^sRVfmSKsx{;da-nbd zTKQ!)CeO%SmqK2)=WX?7EW)$EzFhoYvn>PnSoH9bNX^Q@oOR+0=sioVoPVUo#%$Wx z&X*{Le=2UmKptIexgmLU|JgM*Vu$s;eALjsCD(q=|D_rm+0S4juwL10|MzQb%%<

fW zPM0NLUSs3S_72T@T;g}LA=^_FTbq@AuA}VNtY*JvI(vk&cWF(@PK}@2wCy2k&2;mu zoM#2MzOQ!ws9>Ax^x~(QPZ!k9#y+W*y=V3Tmyt8R6Ps|SwjVo5+fTd`o@!)XG_c<( zdv~{86t`_Edv`IhliTu74feU7KC8*|E9Va5EYspW?1?UmWj_($l9lgr=QeCB>9=m1 z$$qWmRQVos2y zqra1Pt)7v5w;XzxK!1ysSb&m~=(L0D^x6FDqQl`&Y@STc6dSprQ^^h8I%;yfoEoP0}+CNm~`p5Z`W3}-&j=xzs9^2`g4#=4hbm?)6ES9x4;?7f^shLjx z?q-D?;ud0X7hZi*a^Ust!H)k+&m#w(C%>lX&2kQ$L!$77)WDeRiQ!#4@79iYG;g3k zu_d;%Cm_G^Exa9u4tWuWx&xlgGxqkaW2wVLUbRU7#wUB&$97qgdtD=AdfVB{-wChE zKK?#b7_u8Vi#M_PzoiR*fmXM%g&52Y4H&Oj{;%S~x@nb1vgx6eG$e?~0~>ARXd zQF<+mjYDc-nBSVY-)DZJ=b@JOXHoCr=jXH!^nAV|AIS&YM=l((BQKLZZs_`UDmcqL zO$X<4;u&(sCDaw-gYN;KW5g=>!PyVa62BlgS6dRgTIpeH#M|n35+$F9= zaF;piAZA8#u1mgXnVT;?r96$Ws+VWv4AA0eY(|Y#2j7UC@Gut9^=PD?Xc{$TiY!92 zB(0WAj)nLi{INMBIrqo-Y4QU85cwtd!>6WPPJQnu|CMvfB2pdI*T-v7aH!f+)h2x%~}nk&6Y9o1>_Sq`<5C;(*H~&-@d$`M=efiE4mg#@3Pk$ zBsOa39FNiRK*>C_G0!A7(eNDXGS4&tzUQ_imP$ zr&1Y9=pttmM21Dr`B_@t^ogFC%ms-Rs)5GxemcBaPTkgc+K{?GIna3weM#Nb4d5v> zlDe%ujA4HiojNQ<(5XhN+gd~2)`X~U&AP1{u!ZGJh|tLjoy7J%Q}!sHNzg*=zO*@pM|I6wE1k)czS_NxvXaV zq`FS%aTNI?d%gdi{SDuzOlBX~XZgu*jfNzv10#%eXEFXM4sn<}(@dMWe{` z4-ZJ6Gh=c;A3NUloQ3_Zh((FtWl0+NPUO0LyPUZt89t|bpEULyqW1LPc#K>v z&^($}B0qK@$Ci`FU(S<#Rc*yj4LXQ?NpreGn~VHvT46x-xO>#e)Jz0DvDgUCpZJWcMrJbDI$Y_Zh^!l73ugkAybO+S#_=s9&NoXy<)Y0p*J42D~CNmZXAhsg)$5Ls>>TtgP9Qxrix1)RqYXHvCop!N*+^Boh6Z)<#1o+j>s_fyQ2ii#5uaL^5l~$fgSfX%QFgxr-c463 zZ&yF29B-MYEDglnsqMeJ6lJWts_X|oTTT=1l2ZQw_1L=qfy4d*U9E9$BJ9N)Jg@Ql_0W>_G~MY8)xkea%e8f7! zTraLD_pLMMc$eK6A7GSN(`hp|6i=IFoi8w(Nf z)nV8HPo&d+yd{2fH9AEX@k}xX(x1emzPm1_el&D!`$JA|+39-^2&`2WW&Wp8u;!;K z3myd4p=qEvE@iEKTR&ekHEn_@#?Ix!7_vAO&dz*5+n{&9L({3kTAp5KO z2|0S#K)<)>f0sX~?sDq=NSk|rn>vi1z=O)};q9&vel+OZ1pH+Iui6FtUBIu&P};4a z-5~AyXxB%(Ce5W?p?lt9WzQCY8G1y+Y?(cjHa&%2wWtExqz}^OE#}dUJx%Jac3`gT z8KBLtokM9Ob6RjIzg^k$AbknU)VON(0dr=6`H~S-U70#;6^9+Y?o4N>D8r|^nFDV7 zu>oFx2bg7EmX+(gBIgnoGR{NW^QoE6pqhE(F%4IdSN;OzRhv)kDQs8$PD5T@n%(H?^U<>+tnM}BO3fN_eE}f518fu_)=weQzi2M668N?X!n18=dAKSdK}WhDIJ{Z zz{y<83Nu_GSEf&O+22wlb-HY*00z6u9kL&5Qr+$CYEOj@o6HGW*8~P_e%!6>x$rCK zHl+Qo9<}nC?8YGTOWIG0G^u`HK&@nqdMrt;Ykr&E`%#lC^y_lJdcQrOe&p>|g_lj- z<@!DHJ5t}(6Tbq!bK{pHC-s-%SO9)b@NSCNb+pHlYMFA!PoD%d!XM$bd5-pS54f~uKJMCTEDX$ z&1>wg;$uOdwnjzw_YC+_i_)|C+d6L{d%k|Z(A&f}6M82?@1Q@R7D8{W-|>#VdGOj> z+?Tns1$t|8WZmN4U&EXCS9;Zt;LTq{2N?$wt^o9x|3BqinIR+h{1SR}efH~6*ZO^G zw2ai&O!&o=ap~x@lJ88D<xRq0@*9=i`NR@w`UNpp(c{-u=vDJ8kWo#B zjG}#?Q)tHA=x$Sk8HOC7eSfB_*qym+-FvdmJU&!Dy-L37Xt*npeLeYu^V?tK*5!Fy zQ0;N`sB0EyHwy0`%SWC&{pvh$d<$J?Ow5?p`yX{=)f3AxHfG!qdA z@q!O94DZ+N^{Pol^joge%MLDnC$Mz2Yr2uy?-=HRwvJS`tMj@C`~A&X`jtL2l9WBE zq92jdV(%UFn>vh+LG&7Qhq{PCN93jp9?PWf%mVaSa5H7S#yb;;L!`aE-ycfH6?49v zj+KnhE@&xh(=q6~%V~^%adz)berM<>cCUJIFsT08sml~YV#|U-MfV~V-0QW z@&(klcwczboJVrqW6bZkE0sM51fF|`!n1SOTH2JbORei^Q``K5GQ0wqB87hBce|Q7 z#*pDF9a(P0& zm31=6xN11Vvs>tQIG)0@njDK&_B;!oO)vLfCS$(hWpu(X>09hAAN`B25CJA1FbV%u zFwRAcv)k@f?ToYEsq?vst6Y!HQ+EG#HfPPS507FWF6xh6n*y$)uN;D|o`5Ul@%vN{ z{F+9)CLCRb?od}cI4L{9j)?ge4Jf8=HO)XkYeb+i4LO6|i|o_TVxziMf# zb>)(*Z=+-0ijE;Uiwd?ldh^m9S%niERfYfa{(mv$>T$-^gkN;7uUzX{8#Fqu%! zVy$N!D-)1O6+_qhcZ{|E=mpd|2c9qMW5`-tH@M~%+R^p=Ue+Dv2K*y@Vvf@!jV~Ch zS<0RXM*HW|!M;kDsAOD<7%xqiSnQ~}-|5zL;b{HgR{GQQhvY$Bt?M0laWlTD}j)-CaSoJ<>HGyT{nkH{nB><6D@Qn(U_kn=9RC$Zk!B&QNx@iLOtdnr$7Wclz4V z69dfS4BfBbf6ahC=e{v_eLOo4Bs4AK2{Jyf$X%5}P$GF7h5iRfd)boJr5xiY}5 zJD>(5I&Pu^SNffyo^+qu4t_m_M&H;mH&!?`8DipbD|{&b=Z{l%oAV&wmItUucb|Me#xA+itGT#S4Ac>$Lf-NYQ8_9if>40DYCK~yD}r-RlQEuhhtp>z6lTW zT6{!)XlL5U)RgsLi+YX{<9n+)HrVqrHsZG_OV|>MKh~r+CA_7UdAn6$)BHA-US}xD zjXo5C4=coe)@8g$QTA*;U2(v3N#cCbufC1_soBbc2k^JN%XwPP|8L|fGO3jIZM2`u zys3q^$j{TY9Dms)bh1D2P0h%nCu=PoA-OL(%BLZR~E(H*1ZyyU=;-&xSWl>Kos-;nz45i27_v6dbgU3j9!hc+Q!lkH^ zFU{`t!ea$~uUhVJ$3Lh0 zNu=Lg)*QK}#4CFQ$J^?PAN70YX@ktN3Lg_AS zL3}n%1^OD#cb+u-HIrCpgjY(Sle9UDYb~-S7dTR#0kxBMR@)to0(%7zklo{Ubx75;8sUFj_LxV`Hbr@YnU9a&w8 zn%$bm@3b27@um>dt-+ZXJAV&6B=sYbM}A+GoX;As)Aokh>>`*q`>;lGoVAOcM*wLF&;0~2{H>utJ zfSOUR`!0nincsU-PygtE^=f6j?dtP2xt4`o+e06%w3lO9`(*yrH)`~&kud?cY_9A9 z4AQTSHWNj+N@y(yx5Qoe)#=L@!1v_W*CL*Ia_OrI_ys?cZ(F2I!*4rCtDw3cxhpci zmT`94@!NuvCiBPi4ESwL{ae~jMb_KcA6x)W$-ckPMr#`#apQ1!%Yte~FrveIsrbt3 zLKT@leCq)<64dRdXn4N|ys@+?uuJ>N_$MWAQ32~<4tin@>!8M8f(tw=b6WP}B8){L za7UVgYQ$yOxA3gr?JD+{``5`jrLFDIC1Kk22k0{{i$0#Azg*&o+waJ3_A;lObK9po zE!2QU&yE;+c8>5_5k6S11N~q>M8K=^8i(csYG=&t-RSQw^!M&A!w2N({V2l~`iT$U z13Yq5Mnv;H0E@?m9#aS`&jQ2uz;l;DM=Lg)*b*Xp2l|R{R||Uv`%3cTyK}Q<9&1zm z3CHxlkiBEwuF%EM;9_LXN638f8QhO;u@;(Nf=+zg7gR5)=vFVeUa1#5T;S-z4wh?V zoU(`gC4DRf-$!+Q?p!dqdt9N)Zv2OlHr4LeeO~jNuF$-oSDohzs5ka>srO%>-6;Cw zuQQ#YKSjLiPcnn*pCaAr#W7=t^qq(u{;1J+j*KgOZ{fOzas7yKy$PH?>cV%EVc-Pc zUsCQ09WV2$^_c;6ZDo%t_<*lx9{UNx^TH#XiPziWT0vVq5&RWCe5{r2YEq`*W2LQm ziSE#`c%OQVvD<=t5PMG>BihPDmgWK5@c9`&jQ+71S-^E*F4YCp#(=Ir$z00tx>|Fxl?${Z1&sXqV4cp5LY z>=ylhA%1$nl|6rLZXXi90nZ?K7J_FYc$z%g10NLmy{g-fZPAVnJeRl>=EWNJW$tHR zM)<|#%?j2%(LJK^mi4(C|JFg)hT&WO!T;wrh_m%Xn!#`AxE*?eiAax?8 z4>?P+_d;FQ1)jLQh7O>$2|pCU4?Xz-wI^bX;cV=5=B_zzdyyHLw9{;PcKQN?7sA3< z$iAIi2jq4cI!ck)c8k z&WenXakf%#QTlmxy0X+HXS3>(^;NfHN@Gf|Gi~ZP8 z9E|ly@VL>SnI>0P>T-2}X3U|Ciow2Ic2_78gl4pvi4CIB3t9K2^fLOIgq?Mn(N_a| zV;LjXBHmrm1m}5f>n&w9& z*Uvy7T>;e_5MRB{13uP?O!&8=2S2RcpdEc^`#kX8yg*Q0;~m_alCdc;@R5GyuOVROz!WS=rNX&@n7}8gp|e*GgpS8f3~mCvZpb3Bn`7w`-V> zYc+W?u9e@O9%|N`^Dsr`A>WIxdB*nk{Aa{qW_*5!O}sa`+y>0N*eV zo-p7^VUJ1d{zKGA9gu^-Bfde??ss+*zk>Y8a4|o+24fQ1u^rv^U1}1%;)X|&&8?OA z0p4Pb5|~W?hFqyzv`2W=R4fu2G`zQeyg*k`Ow z+MdD|yEC+f`LPCC&nxfI{ZhnWt@S&LZ+83F$@(w6(yr+|c|eqSw&Z ztMFhYeB0yos_njj=4V@Tz21-T^BTWb-NOAf&K`9h`pa7SnHg~v@2l{yyCg<2`Wfd* zI)VKo^Mkcg_Vl$rgm+j2U&XdD`ErdL+2L&0_);4?bdIylJAuE*SX(}YC-JN85!*y~ zbdvBtzhzC9ero;;x?ziCa~-1pb_K8n44+i7qqoQF)O8-2_s~toOx6{XH}*)oQRCEv z529{1{+jEuv)G?oD|`FWUy9IP6t77R}Xz5P!sYURG-6`_`zF6t# zodxe6V!b<55m0A2Wo}zE*-{2R+ZmrK#z)$=kz=|Nykx)NICj6*pXf9E7JbI_9i^8C zG~ZDEEs#KW6qXa+BXHxQa|FPuLPlz`v6L3>gt4 z&gxRen>F5@=~H_G#0@dtG8Xd;9P8jMS$joqGjX-j-Z9Q#e%&^Eg8KLh>~+v=Z<_9_ z5xTX=+Q9FPTs6J`#w1|0!}~ox;Ys*nyrXKC<`30*fp`v0?kI<(5vorx2rqB&*Z6Y=7!)U^F!vvVS{cZGH(2y#dS!#Ui9o9119FlPPaRBDAJ^E zw7;cVkh5mH1p#HZ&ax@B0x&8P}$N1GE^oUA#k7`$vT35jP zOYE-DYZ0${1eyKyZQpU#@&;% z8#e*RwZQyizhAw!J*d7{*Q5Rt815ln;3aJIm$1>_96PrDMP-x*SEVvSgUe|d>FG{T zb|(m(E0yE#8L)LCGu(`U(96m>9g>i3fMqoAUgY$vzhd3F zsGvu^A$D}@!|aEgWX-!Hc1G(eWu)f~Wwb}GtCZ=U7olOb!rG%eJNJ=kRQvt&g6>#xw-_WS^P!QU_awhD1Cfa-)9wCXmX0* zBB!FqECBxInBfx%xfwGzIJJA>DaLG->gfGtyDM}zxLyz$$|FKkp{LUlJHRIy{AR7v z=0G$|DZ(?1lkiNXG5-ZWx361WHQv#v@kfR;w8`#MUyB6PBiJp%8+XII?=vs%VP0J4 zZ&&Ye_NYH*T<*bl{5W&|rAn{*cu!EhzOqLZ9%%=bP0(DftR1@rMmhf?JS_FfB%WOU zA5ODWVyne>F8e40^y0VB>kN1&>+o)ZZ#SV2y#~L$R?)2r58V@T^u7eW?uA|#La#R~ z2H}#tQtj}qzfsvOb<^zpNnefRhnIXH@_&uUC2XH;t|p(pfINtGM+W$GZiO>6*X38= zM;CfO7}5CjUg-2k;JKmFuUu2M#Mjy$Uny06!3lYVT>jXUX6;zZsz z=e4Y@{5}-b_YubEHO9!CBiGrTp~o_P>UFyU>SN4dnIqEoMU2r!z;sb&Kz$-JqV;_r zaU^%_au(lF>0kGuGJVLL*tA23XRbkmTKZfl@Z>4SkCIiU@HeW~fd;lLlZuhBg!iOKghi_)gtk>EF&(Fd0VR-Qlf4llHyeNG<3@>hj z7hi!Fx510wffq#v{FuH!VD8KHf;se!@9;f*hd)-5;y!NIRpkNo74Azvg4>()Gis*Z#|QNBrZOh( zAbp5Ebsu`c!}K$qex74|eplFS!0qTgS?3B}<@KrGaRz8pZx5V2zdBEE>qFXlUl|#< zm$rl_@1dQMwDVXbsNR#&t-;Il)+f+$p1=kejjs10GF+2~(CZbiD>TFHQ?GKitFOQ> z0`GeG;+qBT;#0`>^|9Y*eG&L?ApewH-=C)U`#$}?NXi=NcUDxtZ_w`>@V3xN>lc1} z!tPV=>uytj#TX9sTj&hEQs7rlvi|%hbk+Jr*ZVp4=kPwSn>^U(PQ?pjTg``CjXjeP;kt=#j>VsH2Z*7qUb_}bOST(Z`RymIt@KrH%8(BkpL zcJ=WhT`tJC+2hfdiU-$&2H|^t3*VdlzaMN@Uo7a+cvR-x^>$b2ajzHtghzuBo$s+n zo-1+}f9~>W^9^~Z(eyFrAb!s)F>%WbzE~NB-*xtY`bMN%T`aH`J9;k+u1L0_%+xZR=a7@s$oOYheivPyOI zK9T7PJ>mDNH}L)wjHk#dX@4;=$B@HPWR&K+d|a1D1MT|*>L0wiJd*aW^E*RtRCv`F z8UMNd9!(xy=yZiH40_cIeF61xc+On2|A?;hN8WwH9#lVsW*Vd0CTPY8cV|FUN5 za7_15=V3Q7f0qsP-QKRwc1F-!9F33C_b=)Dy?np=%RoT=C1a!MEzHq3z0T0};Pz%k zKz*s)q}>Ru-P!nsWY5vmSIBdz?LqG3YWB0z-L5X~8SLlv2>oE|zwT>Se+jLmpT&x_ zr|Y(|u8k7eY4DJyuNi%aZ&P&MKd~16WHx&N*Jo#O-e~Oyqqk3&by0F#%6NXnc#en} z5qHzv+ymR;j}xPnzE_z8CjUK_6lDD!obMMokx$rgAA;jW&_rk{^Y@;c2YKyxiW0Ys zd`H3`%i#|N{@75|uFlNR*D}r`wLS(PybXomrvti1hKk%Ncr+b^5W)p!yg%$o#pN zxpD)s_)lQF3-6h2EBS+MkA%PXiYyq`wu3fgt=LD~_rl-T+kNW2PV_@(w>}Ty?`^>U zN;`5dg8X63K2&DV$0*<*?diB-un$Fv(csUf4|6Sesgkzfr~4}PwctZ!w5+YaDrnbe zeIK-5zu1xWh#|{#`9H&R;dOeuJFdL#^A8zwf!l2JO?yDS-X2lcuX8ls8#}7?D#q_B z=E6zz+E=_0bsPNu2!6kR_qsyoyM5|4;PdbB?$2Y-XMLH$`tl9Wg^QKl*JsAx@;l~4 zMZ%U)BIB9a&3+xQO1l-YiLI}6)2`ExtZ&os*4kw}f5BY&g7xf2?tuF5nGvmBpJfJn zOyBSX7ZJl2)n;KvPz`2w543p|{_&IW+Ep%KmA)>TT0hV({PPR=d+pIME|Mu85&ue8QP_P2b|4tZnPTX$Cm0?`F(gJ?bM5I~t{*-*f-x ze!u!nXHY$bKJZ_xSwCWJ>kPU=`{_HkvRzG$MAZGUw$>lv_jXZr_PU^%+pl%YZkFz^C~=P z!kp_5sI{4cFi+X#3~dGGhoJwvjHT9g2$-SsdN(|U9z3H+*HNPTNhNMvWSHL1IISNU z&msNzd(^exbu^|zo15F&BV?cHk}>V-Z&)9W<0E{C@qEtf3_a)dqt~^oS2CWT#h}}* zbY%S}^q2J^)iTa=JZ8E!p7MX`t;+5{MvcLKcrceaX7+PQfiqO^^Qy<0W54l6^nRFQ zzw^36)7?JxO8WU-d5_kQv}fYfSUiZ&^h{^y;hrY-uaThoTv9~6+49hnb1boO)y5iL z$~v2BncjMPk1O;U?_E*eu08||Zz@w-A3_g48E_SU79+5a8ff1}`*)+O4F6W>K85`m z`Br#VzS&=-?ABx){4X+2bb&YITcIWW-a)^g)9=H4`_~E`mxqymcOYX-e0a}OTcGSV zae1x?`UZPc>OBp#a}{`Qi!`BwwyW!bUtnGj{HK6_IPa(C8Soo0nBU60odNthzWTh| zPyDzBFKf2Idjv2^wYq1TooiCxjoM)l5x=QLXxUKCsTm}Mkwj`5U6wKvxi zudQpnoMGP>-CjO>fV8)(yGw5mpWFvQiM15C#(4H;55gpSEaHQ_5ZDgFi(z6VW*Kc= zRPG9Wi2wBoWY2w-tbH?QQVU~rSI0HV($}GBDzJC*Tw>5lp~2nIrWV><>v!vZhQfgQ zV_&!W9(qoyWrSxxa@+w;qrcma>{*WAz{YnD*5H|iUUh>npmtz;Y+zl!8=3SW^Y+KA z?Wq>UBkj~)rR*LhV+#Fd0z*fWSKX1=u5N&57Ev#!BMV*Z{3zcp_{i@SI$q5eAlK2u z*3nkp-IqFE{uR9WE5C0YedKX3&6cHHX133}q4(Onnr(T3GeY|#x@KObEPc%AUwh7) zc7Y3>u0Y0bMC;wqd#=+3%ra&pB*tv_k^#K^THMusm+mhq1@AcpZq5I&2|CX?7Sa7B z%$XP6ZlewI2Gfqs@E>ZiRMT1O41DH*&yjdP_{8R|B0lszm$MlEAGpM(k$dDa(ZR!) zLIeD-JGlzo(!lE|d%QkrM|~{dpCUQ zlrcaS++%lz-VgZHxdr$*eBG)(9unU)!jo#)r}|oxsLe~!v&=R#Y12iU5$rOF17c4_ zJGXl`JbJC)uiou!S1%}(Ju>=2S9r|lG{#EeIA|aLxy`Ugqu0%7xQFzSC~(sU>ttoO zE0lx{ZU=7htI3{@9v716DV;D#3sat$w9A0T-A#T~&b@X6^CDvKI@muoX~+E0@HFRv zmpw11SNJ9&kMrv<`|#5eyRr^A^0=3lJ6^dg0eMho@Oqiu8EW$q4_C&S8GAQ8LyRwD zRZ;F@k0TKnoKqp#sY*Cbac)rqW#v=f)fe#Yg-xpnAVha4?)bGEg-Th-z;nDdp) zc^&U~Xvz6>>~hVoMq6^GIa}I{aa7d-GyO_jbXGZfH2W679-oFEYC#Ql(lqwDQm~C= zUqpO$VpB;wxt0A(-6xIM8fo(y>{EjWo7#a1JoDY`|ICxQ0iNrK`wO~4-RL!PJ}<$q z@5usrQqkQm*7o4f_Q?-Y30XOQ_NIHXluZ$Wk*pT?eD&ZOZ z`!N5jkD49=3uBZ+P7U$pTDW$S+b=bHNpI>i_Z(R5C2vE9S3TMmRF7t|Uk_ac$2J$S z!a<+fgw0Y0e%c-)^C?p13i&da%RNE0&@KC0gJ-6ffpZo(TfzBFVy$Z5`QCxiz$x&U z`*pSp`I6EI_|zxrtZ5I?|dC%S{-OA){C z`{`cwo=R-x0>f5@=bwOoqx~nQUqI**Lq8I$Cc1;H_p(mENUYX{$mB7=yZ?(v4%qJc z#(d&U#|w;40pGLW@QmzVkL};{L~s8T+vyuM`7Lo}&wr|a8{OWMMticKID$5%ZON4^ zSC{eZ1i#I7K0AAKtT`Ksb1c@)#O)vNLk|(y?~dw2U|CBa!}H-x8pw$4d2I+V<&g6?UwCD)YK-9MPS-EX6U|nAS0+I?9ydeIWQ&e zIx|jM+r!4kBrv6MUaPavh&g9Xk3Ji)*^a!l`_$FQjkI$8Yyj(8b2+gm1^9-e<)jV2 zQYr9vdN~u|)9pm_jDd{VYCm$9`>UDnn%t$G4A$8|#n5)do-y&PyPUJ}H!H_$s3lka zfzA_|z5PRSqeSoD7ud-q5;1Tu)^lb=fG5(1u2QJ$5xc?pR&XBZBaJ?CmE#S_(u~93 z*3RD-o;r6Q8H`!C&xnO$jn8CHU*zUe+7-ICP1nyxnz0-Cj7J-MXvP>7*tdkb;gJki zyXq}4;>>?5c?tBr$s%xYSLpF7(w@X#raP3~4d*7-U6=r`m&a|0kKBQxuvck zv7*2^2)`@jgr?o_is({a)=J3<26HB( zg7)wsc)dn@*ouBHx}Mvo#ZdUm2kG%=+FcJF%=YsA0kzE=MPI)W#}ChD_;vb@xi4!uB&`kK0)<+bg_UY@p=j&|-og9gGPUJiL}9#vs9SnQ`W@%Svn#_%xzJrJ_R# zAIUuWMifuo8UrxbWZ{+Aenr|B=1 zxhMQQJ9>W5XB+e1oNrD${9^a1> z)1t;i@?e!^MDb&U*ez_T1V3YfZ$aZ9X&;&m%s=Lg@S~Q8sep5%*h0~LJgtorvWjy9 zf}ij~{MD?va-PI8Vr@76(M6|;lY+L}?`6#r8V6jVhMp!hrMz8jh@hjRtD^6;9_@C8 zHuw0{qg||xt{!zW>$jXAm$Ug1ANjhH7(=&V6M}Cg_zG-tj&C!(E!SjZnVe}VrIxJ< z?$UP+e6#&h-9C!T&h3?RQ%#xhOA%-265mq0z@71G#a?R87~?MXf}=i;@dNGUxmF#> zrA>*&P5B|ZxZp&r?e=xx#MMK4n`tkN_Kwn?seh-BAqQDG=kd$hHQH-@2~IBFFN6lJ z4E6);hAqwBai!f=+!NGgshppx124HAoucgd5q%3U3GWDiJ&VQP|?cs<`s;udlh6!_I3 zc)8#iEhe7)CtHwBYn;Sl7Z7Xe)XxyCxpZ;wPr&ykJLhlgLG`1ceg?^eTjC&f*b_X& z?(BY$JSYx<{mG%Q?}VNu_%>3Uezn0HR2u?=@}`b_TIt-cD?)}A>g%kRci|o4htJF{ z5efKIUr(D_VLztBP7K&vE_d;~gk9QvGGQ}mTEd>@)6kSnz=^5ME3pBjKA}0cw&O#R zy&mM5mJ4dd-yqCwlv~JpwSkwqt=E~aCadp$5 zX6-k5NqE~S`~)qs!QWgf(w+D>iUwsEFb=SADvi>a@XO?{2mv^=R_ zWdm*M>fbsOUxW?YC}oB1mgKd{+QZj zYil@f;=#VjvpAY>MfZ+-`rG%5ZPFBFn{;A#ihZJ;#q6pJrB?XW&N}wY3Ph%Gh7>(K ztpL9wdedgc|7eByL-iaHHTbP2(Vxgo>9<+NI&1Ry)U}ImUSt`ooppEAK3cq7*|Bc8 zeYQPKIsQC0*7nKD@joEnQu)?Wq1?QqIJ<94#i^|mCOZ1&6RRqDFyHu*^7J8cJ6_H^ zCY-rmKYL$6{H{hW3-sC6NFB08hphp>D(^OTV$XpS{A9}`FKX7&c#%JG@RBW!{l&}J zr9Qp{ILvo#nQ~s2ck#Kd?$O%Q-c7;hD)wW|rOM68xzM(+f2plj*SXXB`p2f1mMqM( z#go%J`7ehS-FvanSzs4C^(x@+7^5sWo1JPQ_hke6n3X)>F&ksGy3O~3|8RO8nqQ*X zl{RQ_=nDORv;8@=PyXV|vgem*<7?hOkNa{)_GbC!@{%DiP3GB5YJAJwq=u9yk9Wvf z8A@UN!;F6l<8Piz?}Yzrk@e;tS}uIC8r>ux`71f>>fnnm_+s4GXrnQ}*;U3&YeU8E zEF{-S5dV%3UD!4BYFcF9i{@duktaDDi*r;Xe zvU@Xtp&J;yzz_fiQwPdroq!hx)=-%%*cxjG{bobw^2Uqr)yBxA|Ns0UwL?DY*Z5}m zcxERLefPJ#D`O?G>t}g)$hiJW+CAOBpd6n{3VC0d``UN=Vu$8k9-d?N{-ga{75piO znM-%Omoy4L3;&rpW`8i_6uF!S^UX%*a30>)U9`1>@nL_cuO`8f)u7DeIEDCO2EUhn z@DKGZH~LvV8LK=k^}m+KE0=Zdb!4TSr%X6HLGp$xYjb~7``n^L{9LRNoz%ILdtts! z`AyApHeg@0hy5B^KSqMbU6!??JA_|T>L-y?S?W~^eEV*?{mzbL<#Y+YwF%=KeT)r# zQ*VxT-s5jEeG|v}t?EhU&+!YUIX)*=WvRm2A49Ha+o4hQaaaE_ zYZ+r2zw zjZ@JlR$oJIcWNHuTP@4xcUM0CUl)5f@U!em?qn=vEtvq030&p=m+Jg6^qa5Z ziFc^Ov11rdH2mtDjUqGfTh#A3qP$%ROgSIlr|BhO9b3{NRlyI)Y=Zs4!u z1;+Y8a@t}@*CPMO|DYb zmdtLMZoO;Tcx&c}@gtsfJiNjfcbmZ%XB#v2GW0R--|F*D&v6h@I9n@W% zCw#)a6z=Un2Be>3?7xa`PYB)6U8CpHaJtQeZq|gAV@1w<8Qng9U?|;2or7+b(CvXd zNAoAQQ0D~vQs8yLr9@$@hY@`mm03*uc4m-=7E{Z~Ah1Q3Q_r$n{eK z58JMz76fCojPXG?S-Obvkvw*}v-VF=KK9sE8fd>d;){!$-leJnM=S+|g@B5ReIi~HDT+`aLO0*>+Tyb~%l8?zY{IdNv#i+0y$?`#=Az}KL*Qg6Bg>EZzNZ5 z&3ALMa>gl_eFEGyqbAl-Uxzv$lRdkul&8fG5xe6({-++#T6jgn-?Bof|HDewFzf)~ zu?Ffgb%Lkhn+9*pr2dz(@fY8W+nC%dwu11KoQIM#0}^jB_1~%CEOg`>g?GhAkOKW? zLcewR-JeDu{weyfwF2FrIvzEDdp)ZKxw0L7csq4Gwo_|%J9TqwsGEzuvS0&i)lc~b zer;~yyB2)kX1(2pQD?8WOJ0Qq*?d=H)STT;&D`jAOA?jKo-u0XPBv3=J5pD=I@gKHREQ{sBB=m*z+aP0@z zesB$gYYw&3@8X*l1LlLn>ijmK@4ail99^fpL&j}bo%AdNW~uQj<1Kc}&&p3xn@q2- zl@`S(J7+0N&u3iz4f^(1Te3FV9et0YyZ17Oskc_&vB^?@q}ozH{q--lhO4u)8i)Z3 zZxa1X+M+%e25V}*skF)QN!UraKJ8^7&d6s z4~?Q}DD9s(2aQ^W(deTe=;PY2wXfAIF20fWHCchqJ!0Bz4=dkLmj0*ox9rud&N;J8mwSvQt=Xf8bh|ocQq6Pd!}W|osF*%Gmt|*V zh>c5o+r^&cSq?U4JoapLu4U~;^u4LdhVe0DlySD)l?$hiR>m#b$$X|BklBudcEYqn zy_^Y0FA{rASu1UH#xC?&EH>(>S?Xi3X(eZP7tf`i66`8u*W1Y>@wo!)Ea0ETbJ1Hm zd0)<#Tlww6*O-lrbLFNjETgvAQRH+r_Mq5q-OwRo!0n~IEtdGY2ypNH|EPQS_^9eT z|NopBE@viy1PFnECV`@vU|TPc5Npi@yd=RJ7c1IU67Vu1+KTl8R!t(coe;H+qUl0+ z3EIodSn9R~HEp*9ZM#9XwqRF#+a-YAPKa6&(F_9feZJ0_Lna}?Y=8Ux?eF);JZ9#c z&-MLzf8O`c`?JzqS&hv71^|-!dJCOg&#UNbTO()vc-HM56YF9x2?hr~d2#ej z;x)s}L2z~9P(!^?RxUjbM zCSxTwfgIt!?l63V^7m{%-XdGC>H_!O*b{b-`&qI@Fy8(^)sOXj;&I&#=i$P`|Ek%N!eUue0PvBKET)`Yd-7P56F0%U-zxs&^~qjzBtR3FLHWX*NpFxd=c?Zn6qWNch}>{7e8Jz z!5)8S?rEQE^=2L2bDvr;i#6JxJi#=4G_e9?fs`Ax|e%Z=s< z=&&JesbG&gZ;$1vPaz~r@`ku&HD{Q&LGXvVUW^qr{d(8`4Z^<2b$Q^++ zxg)|m$sOvqbjRk+)2Cvx$uh|B#*#@tP zz-tQNkKD01VGDIy`tf4vPXqDdFn17SQzr+Up29e<=l5XzRdjf$bpBCrOZ+vR&cC(d zXp8b{A&NX6rImb@$V1d-=e`Cp6ovS`|ecx+iW_YW%H4Lr~OXZ6Csbn z;a^jxl{Ij2dI$d_Yp%C(qA{x%r^_ZAi@~ktQ{7kU-tnqE`t!ThjmY}#>^AqJ2VFnXSfFPGN0WVUayj<^impT%d#O3OJA~iJqQ6{z`}kdA(+}q^Tg~$; zsd-K|CpZ0qwx>}y&Nu_iIoF)no!9)CBXP#j$-dYN4OE?bsH1uP8|DAtv391vyIt?7 z&;7P`dlBT=F#gf4?1RXNtfp*Rj?IpFSf}@GIW{|X9>2Gx>KT$ljT^^@~l2`?9D$-wm55deNTTluVl4HH^uL3(evGHvBUBZk^`nZ&pfo z+Y9ZV%RA{~5%jkX^f&$A2Y-pA^tcFmTx+@>=M}uOY@M$)rNgb6>gaHhlXSSO*dJ@f zqnvwJH;pvb6*JFtJm|}lCU@Iwsxw3>p5$S!zlJ|-;k!SMe0Z0$7i=7%OmdCxp7+2T zyzq!&=!L`aDVQhp*9Q6zyL1)3DvW%3m-GP9t0&TE9Q0~H8kf2Q@PC4a+{AoDqta=} zFKUjq*zyE4{w`aePWR_bh%I3J5zoo~oc#-Kv2DuxY*|CTA6wp7cquZu{L(?j9k0Va zk8ZIQJ)%FY0d6+`v|+>k7N3>=t#)S7=WN8%_bYtW(fl zWQrlv-s}|IBA!c11iR?mf~K{MuW@yD<5`k$aLiK43)eI#QN+?xJ8~)0M%*iym|SgClfLY*y@( zFCEw2$!)TAgr^y+pSuzpG-Z9C##V8K;5a_!;~vhux0AE#9-LM(7!Fx~p3D8u$PdtF z*`4Hz<(}b$72r%ia(66|%^d!z#$3~6n=;T&tyur~X>H8&CFy~g7{6Ag5K z+t2Gqm-r@O3z%$_7`K-t>eU{!ZP@8Se&@cXc1~5!%*XbSDwmum+F?RFJkSp8USEcO zpbs4Kdhx&Z-kTi2&s^}a;Of%J*wLkzndcOvBP>|Tf88-!INSOCG42?Z9}yj~sAU9p z27IHrW_EWFncL#C1KF>X|NE%3eVKFwNA{Q9qvK9^OR9V;?{|1)BjJ8{c-kZ2$m`r~ z+rhIqexRpV=cm!F_u6}Z0=mQa*bmy#tED@D@?2ZHZHe-?Kz zPV||=rymGDrhAE{m>=7Eb}Eu68aGQd5>g{I#>>WadqNAPcMGskzE_|}B;={`e?_8Tv_^yGVjwMB8{1`&YldGU!WZN_0>2Htzdv2kshA z9s8@5yP9h=AGYtbt~5`}s5E^i#;!Z9HU1znntXODA2yrT4>WGcak@vk{>4enZ~W?L zOCvA|1D_SZWj=86Grsx63>I85vuO9m^WpVBa{Fn;{zu`1_LFfo`{WZo)@qb$amEz%z-Z&>5owY%!*3?ku?P(pP=AL%*&I^;k2uueZELk(DRk4rPxe zxA>6%^75h^plNO8@K*VFEbcSM76dIBxW7}4WnZb${Na|PE%8!p>_?Kb52V4W;qQc% z3+wId)q&`|3)3UF{1n=AGJQ?HG-{XYU&uwTj(q=?RP4u;9t$2(fG@W{j|k-qYYWM~ zi7i*Y>kx8iM~YXt??T8t@=2DnrtUM!%(i0)DuN|eK6IRX{007Cu`BB&hM^;%PfSM7 zPNekg&NCc6+m;bNjcmjouSb8+^VWYl3=J>HFdY8?`jO&4)RuwF^ggouVRR?S@R94S z4Za5MJx-8Yg8EqvH@vWQTu;&6$Wi}}3>7B6!L>hIrwwd>E+B61E0VoDqnc|xSCmws zYboAoFZAL`==eH#y=Mu_I~go;!T?yY<9{-UEyTo5)0C)}*)Yd$p+H#@mI57UPSLDC;rdjm|yf zM)R>djCGUd83(J_EAgd4$5sk|Dusr~RvJE@++xQ8!OQ~7q*uWEAltsUeZlukQ z{{dV4XIK--iF<7OgJX-|dh^kiP*(lIn&WfZlH-lG_;h1+m)B^kT4ptu{mYHE|M4|o z{UV>--N#!xksZjN)V5zR^g4KpoV@!DWI@>};mf<>%e!9=CtGCqvMhKq{@dyJpL5Bj zz&sb{PQZrDf9RPlGp5UnhiUgc)+XIYG?Q_EVB^)q*k}2@#l|byALa9z7;EEMFD_;C ztk(}De*NV0$ri=F#?QLgwsXBW!LePOuxTD6b_TvEn}uv!z@SaG4d;DqLvBOv3%SQG zipmbqMH$(}0_UP9pjWq!88*gfIOFIkG-iLTDIXDasg^`h;LgNX&!cWx|Zf)e;`1`~k%tF5r zz6sY{JI*&)lO7w_)NXcc+KDxgt)wqr8LWZgYT}2`3%$X_1JwN%@ROaQdhRG_dREMf zKDlS5u}-jwVQc9B?)Ry8Td60zQ_CoL31h6{|3ku$YySlqyf>!FibJQ7ANIlzWM}Yc^}#Ui9sMp)VU3**_1yd;hB)#E-oy@w-=+ z-1LKaZCZo$c-vLANn&98#`PxW4a4_78a-kRb2$wiX)HR@FmxnGUbgx77hL{5wMTlA z&!_VV_^YAm@t4fzENp0p@lmU6@OP(us}nxjx#nvZ?&F=qWAH_QL*F`@Y}pT76w?*L zRv$*sss~5+qHFx^x5rz=pAgFa&?cqg{+PVa*kI`%+s z?5Vf$tu&QIRp-yCI?n!^Xz#xpfM4gQkg?VqDyc+mi1)Q$LKXC>6)-YS&@(zFpf}LVo@w+XMPAMJIhYqRV!_XniptIV; z)>ke`_$JM7s~}c?{{0ufdh3kJ%9+U84gxe9Nj*A#8i*T^fg(0LTz#G<&r4c8BIt$Hss0Mb*Cty1R!wjXvYs3j>)O z*ZUL89>X^pv&e^GnbnW^gWucj_q4x@pMk3t|9PQhu3cZ4sCrE2`vPY5k5tz((ZxL2 zGmLnVVt~TP@eV(j5SyKZE~jJ@jp0#jTV2rSIDRb75*;jOuMnTJMLI$ka#+D=tI29u z+G8QVONUb2dkyh1%ITAL@cwtMBxl7J#JQKTpa}fF3DgWWG$lZXf zE?Ga!K2W@g)zmBRhtccy;7{9wKWz`Vwg-P&hvZT8mr0E23D#V&?s)gC=bvZo|0m?f zU)_GP{5a!A;uO@+l+Gf-pm9n;@$=|6l7T1QAAHP$KIKA_h$&fI(S7YXqTg#dKb3yo zr|c$2c9tJl@?0r6-8sgXwf)Av&r5ior{_1_Xt(2*KcDAS#L?v6?8vL`^N;g<&Nu28 z5=%2{(=AS0Wx%FvS>JZ2t3EORd#n4_Kb_~T+|RwKx^MYxo_BJ`Xj#AVnLN+qdH%A# z?PRDOp3_dBaS!LYJwD=Cs#9(0yzbu5_bvA!aIZ%Pl?{Cl`9#X#{S8ZRyjrky=lZV7 z5YM@NY2UHF$#eBlkI$-XY2SAL!aKz@Px5ZeNyoF_L!&$J*+#rQMLIXI_xiqVzD_-8 z`z&HZ77kP|ZuS%j2e&Wj+usiARUJ>xUiI~X>&f5i;&cY}HSPi1$Dfdn&mErO&E%Sm zzVGVP=Nqg0?TI1BD~)E1u8y<`Ja%=sqwUx~mIq5Zq zkJxhA9pliIT+R(Fhi>?SUtQ=U?gL$~d%t*e!yRqn5B`Sp+AQQQ!7=RA?jUR~)t}QUFnCPE1{P#tQq3jq|P@E2{j<5C73M&n+ z>2QJm&7MHt>99L(g2JZFto=6A?3Tm{FYAG>Xjb?Z9v8TEmIY@5geyXD(^3cHfxPoj?odkU}v))-@p%4Nrd*Twnv zolE|#;ly2|S8S(mJr5<5TeDRke0Kb>6Jo202auk}__lP=R&G9LPoYKVp}V04eae1; zvO)6Wi8l(zti=6~u?JRWn;X}sw|Qf#&H7`>t^M2hJnsx_Bi7ZNsp1Tm5yf=B8)<4Vd z{(6|JyI2o9_A%>WZ`%`Ft{p3yY@ca>zsnc>+avIO?2Tb?G&j>&*+J}FAekH+TM?QQ z@cEna%-q-N@t3LWg|8iNX+J))$Pc{gu}$jP@5aMZc&0Ypa_`3Rxg8r>B-vE9m3sD0 zNV(n6ovj({vx!c8jH?&rE$0kWs_b@TcgcgIQQ_yz#9i!(D&SQ2%cCvm)U#@)1+kG3 zFKCACd}ruT0dPDc>;9@AKk*YYQ73(-pH0j3zOy*@<0Dpz z2ioOF8|OCgyMQ@__l+uz|W3hf|x3fU)T&3q1&1(zk-e8rz$lK=jH;VfU!P9Xn zw~;n3rtDpkpP*S0=&JO+O&@c3ne1Sn|Ml^fZ62e$N`BwoHpiv6c`NTW(%!iPjHhwm zerIvBh$aHJD2rE`xVY{>?|Lj=1?nDzdq*J|46oz!iy#vs$H zXuq)N3&>53iP+RygM4A%oHwUQeJNJ8{)6OVXq`R2!Nzvv)`7>?o*LUT(0_M6n#@<-4EU%+l$ ze;&SY<}dhdA_nzt?WvS6Tz+urN&C^4-r)TV#^n0LU*8NJ#~&^{=;IGZK8--Tg>x>h z1%clWnPY@lG~s&V?we28j2n5g^C=s~N1YKk zhcQTYOpjk#%Q#PpUm4LiHs`BVM_b&p5FzsRxwa+Ay93#X3XpeY;|r(8-2W_w#yr$m zQEzI@VQfF5sg3>Oi5k(#!-^+DzIEbl9NWxwjCoet8kp}T7q>EZKkX%7GIt%h#REf)0<-Uhvgo zYgk+Z&9!Dczacl};|O8Tx8SYVKib@P#3%03TL0^@WQ(4ujBiHvh5)`t$)}}7p6WCiJ#?ZxlRxpn8{Ok>RoZSid(2IP|*{9#o{sng2r((^VdLvTxd}dCQ zVi5+)$qIaJd+I>>5)P&D4EbKm@9Wr5G0f9cOnhe&n$_dk_(#RlhUv%g*-VPP!0#W! zOQi2;9P$@TWE~y4E1%7HoA$YJBkq|rHy6S?{|ubGeb3Cf>%15HvxUxJz>D647tOsd z_}J6GH*ERxq15*q^Ni;I0!|-1b+jeC`{Fk6dsgGka}xzw*t0kd@hs?~!1SpBRFrrf~=I;CksVmD-o)+MEOGi*=fbO}TbPMP(&; zQi~oln)r<$!0SuRG2P;m2IXD;?Z!4pFWJwyWfzlg!jFEkPIz~w+p1)SGVG{5 z#6Csld3VX)i0lxJc*ooH*ZAe=&v7Q;Xb$-ICq7f*{im^~Uqw$2d8VA#Q8;Y1gKOEb zKjY`%*|;6Nb8~d)eOz)1{cMMRM`zwbymq!7ukAs8j(QjM#MUf&p<+hPN%7hdo{KI- z){K84UWpDdtf1%*@U}SeLcyoZwojE}hu`yy64BPuHN#3u_wex!FDc#NX)DMejvv}O zp0O4@F1~A&l(ra&^!H)#<@`1Hs_vhD` zRNpekH$BJuS5tEj!Q0E>%^_@Jq15@nI_$1>*j?+O@paf;<=cy+<7ph5XyYkh+4D5` z&1XNfq1|M>`ztwPW6ZMoZ)sV=dfL}48&N*`(n{)uQaq`S^9AYcLLX<{mExo6=L0*L zTmN%vH>6+}f+xvmTLVwB_iHM~bp`xrY1Bxp@C2H4#!9{l%LgxD9-%RTrU|^$o@9S6 zeD($>KgTtUE&kU^`;4jspJ3a+0~^Kg3NDe<`Hl$ZCbt2X{^vV7fQ$S&?|qW}jLkj% zDduiY*eToao*RsVhmcG5p?}3UBFD}%nzN0GO?Ugr8%mv1$rjJ0U@|`CANkUWGZ*9g z_(vwEV3ICdq{AeZf=R3&Od`M}asnn!EbxjHOghunPcl>$eRMLvIO}cYoA$cu`Sm<+ z1s~$zpVnH>uHl(0Z~4B~D{m?0Q+y-o9ev6(9h{4d5~o2vXYwKfqYlpaWUsuB*hpkS z;kT#kNAEvvMmv$I1xxuEBrA%JCCRxHx`B9Za>Z0ZZ+BC^bEI=-IX+%|-$>Z9N!tAK ziM89oe%MFZf%xXc&y^*ztI864JjAF%?+?RAT^@!#XJPwSgFTk~LGW*9ERIiTk};`J zbxiCb-!zS)Fm$ZkmIWNU$+tgDwm9~{lk*2TJmuu)|4;l|WsKc|m(+~7II$w9ysZ*D zU54#1Ew7vpZ+iqk=qO~&=Vp9=!z;i}{>>`%9>q;6_OKlt;~nVdX5tMuQ`Rl##2Z@O z2JaQa7sjsO_Tk50YSHFrW>_0K{>h08dKtbaonLW5A#5~vvDd=TVC}c?lfC<`JZ-;? z0}lo(*O4*szKmxWvsd)nW8l1PM?Vbi6fmzicI|EK1NW>}1X?synf_DYi|DxUbDQL2 z_|jnU&iUxP$d0>X&@C>=JK;w?0FKY+Oy{k@^>TR1{LvXrHOPP!=GiH_##u|s6jFvY z%A84=z-WI{A!9G3+zw#sp2K|k6J;mPRw~xY`svB`{5wA1*Pd^XV?%qlgFJfLzw_Xs z>Gu4Wu<;CJ&zGNOFxvIsf#HD~2Zn-Q82An5hpu+)SG%J&{QCN#pTP$mg04qAg-!E; zO9nK*0GjX6x5B1eo8~+8?L6i$-aP*&M_YE%cL(yfaz@!aaBVLSlngKs4=hOOSEu4- zcMjs$4)^nGKa-mKV7#nGJPX?GjNjo|9>zbIEx(a{hfUkwf8y`|zor!zIp?iko;EnG zhOhL6~Ocat@y#bKD6Rk+sU-zme2ivi&pFj4UJYb z{14HJvzWhVMG)Au(|5;t|9iBeH8uB-q7|}NIJOvvUcCR=Q`3uEQuIQxd4t8Fzn8Kb zC>BpT-e5F(sPe`0$QN%9LB4Ry^_MTOgPtT`T#I}m-*5yuqC>XImipefacKD-1gXopaBYcXA&Yd`@b8_Q1y?r{H7Tu{TQw+W{YYxnDfh&J-UD zliP47yzFw;@>Y0R0A98OUa(_KVJgbOpqj1u1^Ir!a`}>a- zn|sA)k7jF4b+=A`A0+1{+Tym|Etr@ zI~Y&Fv245Sp`*z@W%J0(Hqcyuac@CkhsVmmH#I+Z^k-^ocwT@`(TIG4eKNBVTc+k!kA8*UNcPPI;W3;E zV~z#)8Bua}^zk*xhtzRz?jqSsb?*BK!Gb#0@bZ}D>Ck`0f&?ahciLX7z% z!#zE(`0z`!hD*&1+t=?O>uc(!OvZ2%o1edXJMT1iKl)4MjB-21K9ruB+@9!NiQ);N{_Vi-0}*4aY0 ziZMRmx+}n(r8^Om+I%Z>uE-qKbOm;?x0%o1yrW`2V9q~a&VNhIxsLnW@Ch&0eNUsQVcF`pE&=@2QD>_S8bgB%9VdHx`b(OXVxgN7kz?*B0#Bqix?R_UQMoHSs<4 z=5b43!*cK^Zsa{z-zUx!T0v~RaLzJ2iWLu{d8N1S+MD~v4valtd#cgNO?IH3`GhCV zOAyy%+p@JLJD^EL!0$=mw}bVOUn37+#HU#QQ}xrhf}9TxMswo|_Sh%NqsVs6m9)b? z*m*h6Yx&;D{vkKw5%K2mde-s&k&AX)b~2raic@D8GZ8N$?c*?+;F6|4v&J z0MBZlQv5+BdBv^^^w@R5f$(|*U;4zR@TK7SRN~0#&z+BGO_AwuLeE|J@`Fb6+POwT zdq87)89(42Xv&_2M#-KrM&f1913w~p4>;$mZ9lABOO8!McAW41M0pVgf$dyRv+SS^ z4|9v>KyZPVjrz$u!d$MF;N_}bd8 z6g`fn=y5cS9!FC2I08M^o+~g6#dRkNpapfz(V@*J+j8P3=yP@qKlp!(K1b5%a~W%f zKQ!?i`11ku`9*SXxHS29b{-`8ZphyOoQf$M1;<+HPxP4_9tr*bzd@e`i&N3(R@VKb z7_k2aeQrI}mp*@!bwAnHWW-!Qov)xQpx6iT-DRw24davlErb0~F(cz7e}R02qI=j3 zQ+~u}HsGt;1Tr>=Mt9-=TgrDibiB(jimRc^k}YiiAGoSMca+6}(ZJM>lgWq&Ge$x^7P5b5rKg3z{_wctIw$I~p9?b|QvL{pj z*xa@_vT(=a*txJ_D<7Wn(k79Gli2nZ7ZOj|E=v71EhOk)3! zJ8{>udWI+u-^meRUk= ztQ36le0!bCO2S8zv*W$~q+dk<%;o%2IJgFSk` zxhMmgRf?}tx$y(&evxk`($PKkmF~%n1M9Ak>SZMf(oy?fObaYv*kLS`O z8a+mvPrl2PEQ!s?IX6!{hW3LG-Y=S(eokL!+C`rO$a-W@L==D?Z*dmVWEZvEA+XB*(x%&!vuhnz@{J?_}Ta zfo~h0tB$+^m&wDY88I_6MyJ(12;H{pW3LyVw`gN0->PSskFF=~FkA7gC+Defe3g+G zZ(3q8hB$ub(ox3BHS`IM9+FI)k4(&-Y_qW6--!%#7jq{5X=OxlYo>8t2)HV~t%i5+ zO*fhgk-L@x<4uYM^vizh{NKxe_3fo^Yjy58)gz9pu$8lF;=iwq{;;L$H6yy0HbgJv zJN~5L4nE~7e&uI3orqtNKP6qCa$|8hlUmrGYVRj%?~g`wEB_zF{-`$PqsU2Xv#YJQ z&1&$(mF>kZ`nUhR(H~}a!FSh9Bi`DcK!#;ZD2tXGrpll9n`l8e7jxy`KGM#l1e@Od+9yP2HK zUjT=H0}lTH9R7`Yn*Wh2KH2m`8;3Ky4dk=2;Gg_ArQmN6olt$}c`A>L9rwt3lN<$>ca#(B_Hkn0GJ|c~4&}~a zU)ebzo`V13%i66rsbj~c^w0aCvx>P+@;*eEzrh&N z`|95}apJogzRh`_LrdqBf0&&8Hh7w65BOCL4#{8nrS(TzSj$Y}P3@d5oi`BANEz0)uoYh1#ebc_ zHfH!Y$QBg<4*8}vKXBiOMF;L6$I5jzJpuAxRNz1Cx;b@T`JTchJCfagd=8B0%TP1tjUlW+)K3DF~4rECEH~0S5*Iw|dc+7|7gu9`~H(d5UBT)rhx2-t? zU$c=AePo`xON2X%?Q_$+zxCat_SskI0ZGc#gAdRc-2E}>_as?3sW~ln}L-%;-eZu!m?!V;mmN&qK>Ie9`heRv$ZLoW^On%;KX(Jz|Fc>~8iCcWqH(jJK1@5m<+ z@8q8N8c*|hY($3^p(FDy_^nZ&>%7)Drai6v!6e*nK0Mud*LgSZ&f?v7!1ezc`N=pr z+mp*ZCCMhEEtwn{Ro-)Od0~9KxjH8=*!IV2a+Cx6!@n`3oPSxU7zTLR!c0EOHL7zM z`aOwU$hqE}+V&fW*9Nx^5r<41!z_zDme$IjZm>9~lsnqV#T>e0$_8r&XVHOu7~HZr z?->S1CV%Ot8{(W77wxX0Od)mVb6&EJ^O8@q|K@X^TfZ+>PO#IFuc2MipA3G-$^RS2 z_FFSzeA8_Bf1@|6N%farL#`R*lp-(pvT)ymYzT8NL(f94FS@Sa1=W+@1{~V3ts;A6 zHC3wKb;e58DrHxj8qqZ($2njNu+^o%PrxHs>4759gCh8rxqx>l1$l*l^~vdl7o)$ER$lC62X@wb;iw z2kpbVDs;(7VIDo5;BKcyekLJ2#cN=q?HJUf64}jClH+X**_S>?}Jr z@72;_#<{J;^uJFm;bClSny1b|2Y~z9Ph1uVtTx6~@;Te|&nK@H_LGd-ymKq>ZOrG+ z#`A)0bIeiLvsnwu_$ZUhdxQ4{yr0awdT3HUaBTp-iYroV>3-r*-ykNgljre!@HKId zW7j>#>PB#?kuzNtdD1g{g>jyBO~1yrQ$1J;u28=VSR4Q@1LkVwxs4Og$VLNxu0DO7 z@Uxit_Yl+n4m9N;Hm7;4d3^cYlHB3BcW^G2b3Rixc$vdq_9OWgkjW(kw$c-xHHeDJ!E^Sr|0`Dr-ZfL>F9%-zLW>0E-Je%(DX z>aWS!8rK2fU730MIN@XkZ8!0$L1y>)Jm(jf{=)cjPswlTFAiT6ysDKCxXRd7&bNpB zV(uF5uTEP-2X+hM^sDs@A3Uqb$DB&doZ38c1Fsb=3H)?}O-tBMbIfn%tkL)=vy?K% zJITcrz&0S7!22b<|1$6AGyaTslUvrppDNhr(SylZm-Fo7?DMy=|Nn-4o-{MMMVnq> zpR?EPeXh25nxl4Br}kp!-;#^9XAWkgGY&I~u?3VM^v$Z&IsD;gj4R|fcDw~kO|y18 zuwBc(swCfICGA&w^NPAhn&)8;SgSLT!IH8gx}y-k&SI^70K1l-oWu6qhMOz5L;w8H z#RI^ieF}7%dAW21{9f$$84L8ToH{<}e!$E=?^W>rXQPcG#mBuvo+X_h4IDY%qP#{q zeCI)zW-~_NsN}Dg7^C{O`!#P^wUmB^6N1xx;8e?8-b9v@T|;`Q=DBqE8Fv``eji$) z`T6KSK+Z65-d?wBv3qr4^RmcK>e{>_(|T=QS>z%R&BF#<7(eTpfCt{7d$7=#Xp_8< z?DM_jomkq!D#v%B@kTGrYmyGC@v59bzp|-b4gT1B&YYMbywQ6vu!VwaaK6|8G}f@DhCvQN~Xjm8@X^`<`TO@`Mx)w1(dT zP9AK{x}RY$_MkOd*QK7o2k@dg=6Mi&d}rz0fMEGA1C({vI5T#qTb6ZG*~h?vts{-1 z0^KzNEV`i2qUjyjSJKyW7Vtj`t{kF2wWYBLCw0yRygiY#yj9Doc__3<-`Qab9)eNrQ$Uc$WQ)e2hWmA^D zU2s&s^KSS)a))hiKfrHf4BO@&usCy*Hilxx@PY7*`MIXRY2RE(eZ9Xx?WEbwwO{(%$qx+@1K2;eO^k85c5>MZWFPk}I@)qR z-yg6(!u6AI{7=cn{|*f8oZo44Jbr_NUz5q>{%zp2$Ry8u+WYrY?-l>Ql{p;#OrA~OeoA>F@jJuPvZVvz7^FQLf&OYN& zlih1iEMp&`-w^M8DgM?=@LgPt&3hiY&|J>gl|{Rz|Ld)?d#$1VK>F_c_%?Kw` zhu^jQ4j@Ye%#1?CNQ96VrR&yBBnLTq?`-g<(bkg<^k(P1^Iv+j^M4>cbD(*K%0*Al zxT6aDWM~rf2!6(W<+XdEukM_qT^z$ce(hcIDheb>EIE=)z5Q!KD)TSt-!)}0xz7(7;4U% zy5YbYqj@nj>^;sdsC?eD*L;q=oyW+Z(l*JQQk&pDLXG`}eMa+Tz}I4I=zrV?1QY8$_Cc4fc4Me*&EYE@8!RB?k$jh^eps9XBqH=p#x0a5G03NMFz4EcZBJ#u&xZ` za^7(kxHb=2`huILZs_`?L(`|x_JDPL&bdR?gFa@)XClkc4)SE?0mi?Swzd;rAzPVy z-cjuY=s(xY;VjbiN3exK_j~W0_~Yk|b-MfFdavkE@AKdBTy=9e$M_$-*VqIT@tkSs zpzipDzsf^tFh8H>H^3Mp3xjPt3NH&t$CN&rLHrcCMly&ILf5RgH-xUa6J6Acej2~m z7&nUd?z=GWaGPaxGM4SozR0q&5|7WbC=Sen&79Ja^Ec`1tMla%=A`ikSPPvmkM~>WD(3e%>o|@+C-qzF z>r!j2yN=dU=9K$xjm5qj7TZ6?;6Ak8Gd9{w9dF!d`|cv-H;@g?hh7uLf5f~>YRq$r zz>SsDfvr8x>evY#tbwuCaQ0uiZVl(oqx-$P&~=)-us287jJ5eyRr%;|wR)Q`1h(Wv z1@E8sGzZGe<}U7=)j369bWDP7n(KQQtT%2iTTzkQB_g<7Sz)f{jUV344 z&DA~1C9ny6d6joty_r1`o^kJSB3b9LZL=QiuU_CH`Q?o|$uQ$%??#TdAV0+RE(#`E zDc_q@AgeH+^@`WcE$KMhn60+n2fr+HVw2j04ku1-Z*uHEX-{@g!`0!U~py1=XtK5#w^xb8(cM_Q5_%@XoB6zP9T8_);*H8~yj|BYvZ(e7chx-7b$k+L)eW=SFw^5t`RDaHG*Xv1vxxr#Lfb6?TH> z`+?OP$e3r+|JT1m?qihQ~Y(O)-7l@k7DY!+d4;_JIHSiVLGX`Pv@D z9>15EiHoV9Zj=AQKia(9-lw8HUEXtwRsjE{QAcMi1orwZeLoAGy%kv`@?h_Ij?f@y zy2704x&;{Jq7#_d#bjSgvOeM&@oR})xQB1}ZlBTA+0Jm9?4x)N$JXbp-CfKj?ky{cznh#b_!3Z*oh}18XUc{}56?{5*?)ZU znKD-{>K{km;prPk{$23jm5Vkt-LAOa<|oN_5nrB)U75-LzNM_hxSH4%`b*N6W!bpKIB_-pN{U99=q~1a=<{bD*@)3K9~My;f6ZyRcYgP<+<0d0tet%BzJIi38STzyjR#uaAp7PgyzlW~Bl#e>T6aW=)+0Ne zAO8THfS%j3479j&&95%3;@;Xg_o{~c2C+`idSZUW6NB_88efRrE;CA5Nzi)jfr;z^@+-Gp1O9~1bmWkj$C^*>qYQF~*0!Ekes4K) zMk6vv7SHBzrqM0u_d4Zry}nxG{PKG%C>N&O925UCeQVyL*>U)S*0hYXc^B|pW$gWn zT==KQj->1$S%r?Am=$}CXSqgpt>)!E`!qhXEb=S3|9_n-W1?60cqgUmPKy2ONp&a1 zK0)0{N2^|S>Q0LNJgqK%Zv1gh?rzDZ=``TBZ#nl0RRMp|UggM?-9)rnIPhccs@MKa zpQG0F64vy@`Qh*M$}Z9+gSzbe{wH+LytFywVM#Tjx{04BK88s(N2n8!~asR zQ7ikL`d7STnEuHfQt1N+)wdscxrzRS+@7PCIiA&bjsbYtQvBt% z4n7<^8F=rEUpZ2*s%0Nog`Utyeo zoCpi|<#6@!8Dh6>$95IY!%S ztkJ{tZ4w^@JiGI-*{iPu^feQh)Y3;CZ42I&=BUcr6bx3-RweBeK4iX7YEJE5PEN2D z@R^s8AJbtn1(>wcmm5Ri=8!ycOBwc*Uqxg~8OXlio>!O8{mIMcz7TF-`obP^s8@p@ zVQ6$1nIWA{*Nwx@fvv>0|8QS@2-#Wo%2NDdUBJA+%sta?K8`wS?n|Hikaav9o@a-OzTA4t?067wE7Gi1w|ofxn*YA?XH=2vF$_3HNCY} z?1e_g92jnFc@UqB*45|pZ76_#OD@Q_>4Xzc{swt8@-mI$Z1Cej?6NPCL$C~dNVDyh z#EiUR+4b!AwCy)CO`k%OMLR$wQr^x5aM+v<=VH?cMW*=);*nZS1n9*_o+k9XVa_`5M+DN^VX+{!#f8 z;@DJ{!dDKTA0}5e_~PcEA45($bIKj^JrKuV2!0p4xOp==fyNhN-HF{Vi5qi@2eT8o zb^7D;f5ns8zC0(FPc`EVf7B(pU%|zJnaB~Nz}Zm#_cz#O ze8fMP75BRLzZH~|C&94S=X{@Gmgqk=2xzGbYqLb{TJ@0H$f9xMZP)-c&o z%XA2znwXBo{)?>AR|%q%-nMcqQPz2s%$ zq~}vU!-qZo)5P_9uP$PrVPiM#JQkU(v(Gc9SnU-Qt399HUJYkheDtkyRp6=J7J5u2 zZTac5lD5>2KY4Aj#%|EJz7-Rx@AA`)ak-2I3|_DQr{dqqk-=Tbmqzar|JFF-%X@7D z?a(pzysGSWd-?x6!`Mnrn%#%8j4kEFbcMkG?=z1NUvg|~&t1=XKabJ${&a6N&NCl+ zefB1DPAwf4^=}&1d;mGyc*FQ&Uc=Wf?7T>KGCXu)=bEqb`Pzk5{I246mHj)+cO##* ze46+?%;yn4kMjBYg_F$T%{}|GqR58LRbKz*DsKj#QG7=88N=r^K4bZ0ZZ46H;&~&H z_>`GINKN>w{mlpFnTfoHuUwedu!hfrd>Ssy+3s&f&Pg1pH50zFjOMN{nu)44S(`sa z{SViei4VU<-TYC_2bxsZsS^NKyS_AHHMYmc4m@opl8d};$?utoshdVM*K_B@(Pw>$ z&cBZQVkdVfcVP!qT>U-R90gm!6x}v)nBU)&EebdawgtfT@e#&W`G_XfW82aPxJuW> z@4MwNcY*huX4rT0;d4pso$kP1ad{n&1y@-3m;ar-v#|Tdfm5zAwYv-%S~2tFUf8;k z9Njkj$ODyYOz93XpJvs&MgMc`|BvWDIf8Nx=T4hOyIslnlkE2o>A&r3A>R!38~E?a z!;P2s%EO9*l5D_!Xl6g0QYO|KF}HDo-J(Qt>&x(`Gii4ipTToL8?gy_!DyB_Vdpl# zad0^_rTv-Zw(aXPz)60hxqNqNyzsbfV~m6Ipd+oZ<+p2J-kmD5jPFj$w4+OH8|UOp zZH+o*o=lZ_knh(iQ$-H>Z4-@yijQxPIDI{rD)SWIuTUmP{y*8aYVfCq&J0Gi{)Vw} zeR`~V`dyA;&a1MP3Dp_KJqhic1y=mq-Jc0^zr)1tR^|oHl_>XVHNWq9vVz?H;Eh?j ztMr$pKeQN&VLZ6-a{A6YO>1AjJDW4N@;_HS=}Y*wgI~m_YR(ns-M9pP(-bI!j*K>Y zwAXz3Mv1RExGFGu!Z?2fJ1RaUU$e1l!gXdeD-&N5eci?Qu)pEovWVZ6-Z?R*vDJHq zHm_IMpZ0mU50YE%WL^=^gq`F_JGdYGY5k|=S8Hrqucxq=-OoH^w=pfD@K#?6>y zf6S9D0Xr)^#=g5We1Y?Bo5$*I1vY;8#(@;AS3k8QZd|fLG#@@wNc>R-{a_=F32qaZ zo4%cASuxqQ^zL%nco-e}Vfe+ZzEMpN!!JgeSId^l*(xKklyU4u?$Wt{Za#uhH)mtr zG0E>+&$)wb9=~lbEoZ!1&%NYM6Ku^E2bcEQbZ2~QvsdSE`rZ@eo}VaP!X0l{{hYj% zPJFv7J6qp?PGyZ)RAYEDN^8Qk0qIntRnRj>Mt5T9!)f%aK6MV)13h!gxUr!@Bloq? zG{;5}nud-X3!b2HQQ+F2zE#`uF*#EiA2M9Rz}6EIo&qDmql&TAfQLTi&2vZ>}-Soywmk{Mx}7!BhN@ zz4yJSuHv30I|!$xD|XO7^AS#;R91O+gv)sYyi2Yb&b>`zu@z)uD;S4=dOUvW39Nk< zXB&n$i+{z}n9Wlf;8$N95q*!l#B)ru`NP_gQON|I-eH@kRx5Yu)Y#E4`JrOXUwi1g| zx4>K}pO@?bYlvm_4{tB29BwXH+K3N;|4qMe&J;b&eJ!`GUa{(7-{_`#Gxs&iN3KP1 zM)s!5e8A-_@aH0OYEPF`J+P4xK{$!We8TK8FId&y*XN_b(sK3Bid=>Wdt1d9M z%p5(YiFlaABj}=kD+@-)dQRK9ZS*&~mz5Zce+vzGo0y!pR^Qh1$Y{~+vE6webLyVP zo;-W?7kb=!nbcGHZKIoQJHsAuV=lPW#(V^){YBEj+Dl4j5Zg^I&(aFMv1OHBhs~XL z>98SQW%t$0EjXLA$Ly8o;GG=<@Xq^L!_JEx-Z>H8DLJw=#XEOEvtEN|=E6HV;>(v< z@Xn4gW>1)yjE-qWi7g+(JALp@>6#sn8LO<7j{Ly=+s(Iua|OUA@&#~hSukpOv%5Rb z!S(}B53~_S@k?W+>_Zj#JJ$2P+dsDT-AbeLUHI@?osDn!BtB7mO5&HY8H#7t-|X_t zY@28PGvjYR%b2CSggQqW$L0~|3~2hV9=eDe?3B>0Uj z+QzA5(i8n5V|M@B@{8!bcot=KZ=c#c1di1g8pX0RPGtOg_BPKt1H?A*TRG2#msNpY zynK#3e&QL%x=hy6U8f{vTs$PkdmU}w;5_Ez0fw+*4&y@c`Ln zTd{-x!DH;q|FG^=?Dck@lQ%zOtUIE;0k224+Il_k>4eX1=6PqKu|@BcQ_`W4@=u9I zPLBP6_dlUdL2};Xf#lrBKXaPZRDPPTi97)b*#_B1%@N|Beh*C(ehQ}bDV(#|Bf0Dm z$*Kk9g5Z0%k3CYs9$`Lfhp|W6k*zeBz34{6MGwE`iT>rPVB3GqD=XQ%MzQ;5^Zuue z<~Q~`b1tUL?ZD(2_N3^`JZSA?bPg9M)z)g@V&m_4<(JNi<=b#J*2%8X4!v>rQ5C=4 zwKVzdu9fP9)JC?E2xhVe=zm{oZC+)cTuuM!{Ry{bq;RVpIo@5zgOt6bU)hDJu@M7z zq8ta6)+r}PdYk2`a>3EP<^D*ybNiLMC{@lt7k2x-mvU$ID>o}uuAMxxZn;Y+cY42a znhOpH2Y%?e<|VFt>+mP^^L3J~)5p4pcCfqHwByhGZXN0T-bPMztwSC;Al*Kf029&Z z*|ZTy7Z5K*9*D};9)P!XrSMz1n@iJhwjCSGkZ=$>t=N)!_OiXUS>{UlOSGo7@DuiL zpZ${0KJ33=%CNI`vN!(g8s;@_@V)U@-fM6C5&RSF>zRIO)VPW~%`f?}l})i@c&w{T z!NZYpvtut%K7M@Oj9}YeN4pJ}4wi#d`yz=Q(fXL9o8(QB+}=(eTyw$3 zzmQjfd&+C!)0=A++qR2Gkr?D+5`LIo;gjELmE*WJXQx9Q)nJ&6P8G*M%=o?|i^a@9HL-D}#5UH*l^9JAG!* zoPQ*Ahl8(Qywq6t545lN|NPa`B^=vC61$`9lAV;RFzZVKU+Q^YvTWf4%a>GE1ROia zonwn-t0<$4X!7^H>_6&DFBSaB*J|H89{jp!uyA#JY)gf)ZX4xn-Q`jHE}h-dUkEN( zQ1=JUlV5KvzWhc9K92yOM_1Rb+Uy(C)CzoT`N(Ikd=FVt{iz@E{Jb#zGe#@i^W4S5 z`i|j=)EM%bddKi{#_&&!p>8(u!z&%$>yEiGRlfap@$d1mhx8v<1o^Ky>|5^4W8GEQ zQ&{g#*1O(kZ2gX>Z@>4X+Nt_lZ@<%6!wI5MX)wCY9p6`b%S~W>clU$k@>IFbuk@Dl z(BAo!t##oEE?D49?k?6Fy*0VqC~?QD{rLlAI*qseLHQH7_uI8IrevmEbfZnQ5noOo zgh3u3B^g{ztAbW{+d+TDO-tJF9-IC(H%8WhZ2;oPZHt7{ITEeC0>Jy0`xG z4vsxk-V3X$`No!2(-uYXA9rtuw%+q3bk>`_Gen-AO-Jfpl}*54JUTaU1ADEC96t5; zJ2v|Fu=9>$4C0Sx@UHzn=Xox=Q7hwiVmTZ?`!wF)O{YU<}>KyIo!_rk!_&C_y zU!w1-l^@-AdHvA(4qtMOP5)jSLf_w}@A}(5y6@0^L+iU^qcd;cWTW}{A@u(h^Xq3_l!KDzJl z=Z4sKV%b<5_gMQEhS2{*>c3(L{Vy3>|K%I19Nfz{n!h=O{%=zMmkgo*#Y5}A;IGRZ z+`GhRUNwaNFH!$xL+JnNq4giRJGJ+rJxhkr{|xp2=^^xg`Ox|g{by?ZON{0@^zX{L zl5>O8u(@HYtn4xtEDadvzLbwFjLb9|IkOVK-+X-Xw;^wS|KP?sE6h<%mD`P7+pwz{ zdB{uN(Xk(71?Rl7-B_?EKn^zan-7ODzqb}JzoH?`?~Nht_}S zk14u8!)R_BLjPY?|Ja~Evi~_l>%Zf*6z)|R&Hpfj{%=NUpO$t@vtfbdhvS+1^-_f*oc>7y%b-;`YZS>btE zpvl$;K8-%`9qzz+gjhn_ckQl$s`huWK_94pShg)}BxS@h)1PQ@B!lr|N9e$(u6J9< zprZ`K-|G4s&|_RXDK-T3j+B0hP9{5$>Y&>!{1x{Ob$+ck$KXHCq8-Y(?F7?o=Zb$k z(GK-T5l^W&18f~z(sjJX`Mt2HTA$o{HgHqSiFEwrKP0yn@IOx6GBM3NMMEA`yS(@D ziGR)zK7PKyL5+H&F4nb`~P27bG6 za_8{oJZBDF*q7Y4&5IqGa)xjrrI+V#clO&kY0nzAIsC`9;cEYvZCiGZ@Id1mh*o`f zUT>RaKX>-fU}Y~$D_i|f4lNw4>}N;!w!8N#r>t_=x#QpT(^HSD=EB~x>f_6TD{$mv z!CAl^y*t2lxNTdJw;E=M5Dc0|%EaDX1uD zvaq{#7+2eS3V)})#zA~9+00RGYR@FN<2-%O{Pz65xKIy1pV)h8d&b2DwvnXQLkxlkJEs?W=P>M^!?AyIC+e3+MvuK@HoND){n*NE{>$FB<6CO* z%VLAhEx{IneV{zG-{r%?f8hARv6JoPcXOFR{YJAczZda1)6a(AVa$u8gGm(wDnmudB`+dTpNw$wkzBl|)(0AvhmR~YLcRrjNP%A8LbZMaAJL9d-boe-Fs{;HwIwokFv#1BeNkuOkm>^;MYBXoU(o5)q6=fqcRSxYWm8-~o+PajJ^?bs?^7;b@f zR??1Q3YS*&efK-+7xGSLDemOA+Fy%ZgmW^x?*=~Cbu6;w7U6HKXW5d^VyDf)9&7(U zwroiy{|~UY)PFteYWI(Cr3(L>#zsE>5^^T)h7NbvSY@279Q_hpUY!mrC4xeFc72JR3GuGX}Sf4n9w#k3Gm$LzSJ{w$U zKOO(=G}azIR|Sl!NSltj8(qd9hzQ%3{jJ3MrDh;Cd+}@ z*WpPnKYjuDi8t+Mj#eJ9x{3SBS+~REjjiG{%IWabW29lpkX(vI|b)IXukvvu*Se2T57o|LevBP3HY7 z(hc4ur#m>Mdn;r!-rkK2l%h*| zcD-nP>i=PUp*2q}ba=mTEZ^9rHmez*_R{bElHB?U%9l1nqeMqiG)!>F2ZuN)+2Zoz zD(IO@e@%Y7eYi0Do?<1~!=me=UGDQuspnPPgI5LJm`Sc%d;c#-R+#6=BKt3JU@hOT z;P&#B;0NV}&!yZA@4zh^xJ|O-m3H#}9^lpi+;#xB1oqT;%AQ(~g4=B1mcCzV@JnNd zw&SbQ_sC!}f0RB9%4?4dB=d(;HrB@1m)X7?I~KLyeo2o-%_0_6dq(~f(Hh5}a~XW- zQuxs&@TH64PxJ7@&5gpZY(L!LD~MIN<)^$eDdVB67oJ6~S7HI|yre1LPAh9C-%ecs zA1^pscdoe-db_x;8vID(m0A=0`t^_B)LlY1CVKI! zwy-RUpK9k~{MC06i!p|J>wufR7ZgiL41kN}#mu0UJu@dm46S7`Cd&9;r#?cPX z)mXRVXK3dPlKkYWiR%zQ%SRrqBCezO#2(D*u1@)>FQo5w=BEBjz0SFQXTLf3YJZyd zs~DH=XqNBbX<%j3E%gkRErCov0pH;|8I2k-BuPJbWEs;6|u4ju?yfCj+Z z4Dwf>d{;;g_51sO<@?RT_Zvy`!N~WkHIeTZd*C|un@uCK;G^Of^@_QJk45RL71&pK z+CMGXp<{C|{QJW_K+L$$UJqlHF|7X6{r$X$E+3o@oPh6*WzmVJnN7336L&@~GBR^+ zyYI1V#vW(v74Y-Y?WG5mzi8>dmL6Qr9D>l^+Zlg7xD!{5HRA`qiEQ%dmBOo}mnsfI z^SqgI!PGqa@N4qRbTLoG<7Xomsa|_(9UUH*6nN6yv(OHwO%{FyKG4PN9w8SEA7V`kK_ z>ObwDg#T`i(Kh}a<#z8I^X~4Q2btF?@hf!}A|1ah#@;#*er@l?FX2|f`ejRMw1$jV z`&)6H>HAxJ$l^@IiYboFt$kj;nzLfaO}??(4}D|5ul+m5`XAI8%2+$r4Q;FkCOZ1L zJy!fvgN^m{3K6H3$J`}T=>0%r8w{S?fT!9OJX`Jdow4bzt8U~v zot;p-|6SbO3=R#6etltRxEp)7+O{RyxLY$g?jEfiXrDQG7*E8VrDQ(w2FV!WYYmLo z<;kwx*P4<&cBJG!$vxr=;t~EYIkpQw{2>N^@WaFW@U`@KGk=Pw_2&n#4mid%*=PXX zb=?r*S$^Zt;ORe5eYNIc!*kQ%@QnN&WBtSs#~Qz3Xk$I_mulNyYLE4j!N+?4-!WG2 z5XTz6ZfIjI+?$4{*9<<^g@4Cbe@~sE?5TpqLmR9AKhoe@J@{D9qTf@UZ$H5+;0Ny6 z!z^QXv%OEQT()Eha#RWV17!D`0$qkq@1BC}sC;a;e7_xC0@?A)$d0Q{GkZepvDMR! zlA6bzGpMqY)ja-x*n9W*xT`w<|1(KjW|FoPnxq#lNeYxC1y-;vM8!_aEh(r_3zTb8 zTy@eSQsh=pn{siJURaBzKy|k%sGAAYYl~FfB?WYoRu_>~QCCUP)ua`)Tw5;Y`+S|x zIWwP`d}cBY`u%_s@|@xT|^i?mtalI+aJmUsp0mR$o@@tx#cGko8OoSie5b%Sf|Q`>l^`WN!6{>IN8 z-zNWt>Tu}Zv)n^=oBh%k**hkrA6q_+k|QP=axHkOB3HSN?^>~IZD3Em!`QXTjUK<5 z=d!`HVr$V}tcv%v7rV;NtDbF2?XzW{(LTGP>mxkBH(;N63;WC->@#NEbrX}paW5u+ z)B|SOMYJYl`(1){SH4Mnq29H7(TxAOWWM%l6;;$lAs^3U@0s{&JXxZe%*A8+r! zW9EYRfE0}D!eJa_{F)0dYc7(_%yt7VYMO=UmM-cDG#cZ2h`VllNTY$8J1KJZ~5eJa!w!8fU{+jw~uu&Ofq9Ymebm&LMR?kV_uDUwC1j z(YZ7D)R#w+yJcOYdB^Yp?THVvalROMqNC;#i{tCanun_za?6nND7E+KZ>0R)j%;YV zjdd%+xs$Q@Et0>DhqNx(iJiPM;w`(7I+2Cuv9^pKA=QY}5?q@1bN_NaD*82V; z_Anp7R?l8fYj-K}72<)Q&x%?ES|@+H4gcN-&)Zf*Tb)H-eINd-FneZyiLWDU$YD2L zCVX1EnB#kAyVU+cJYQRJ)gt8$*UUv0*ckF2e5E~-?E$-S@(aFh-lJG@{BgbHtlX>o zGsag7zqiTX zE&R^lUI#vI-EXE&K%ywNE>?xUM_Xz;LA`(kIr0f|^%Ns>3x1EERdJTa?}(jeO*~?^ zx9&#zNl<5?{|FzC3D1`yZ#;+}hg?e0?RxT66IS<0A6ezWeHn#D$Df%@SX{QyN8d4{lFIRtpJi*Y zx9ws2+>h>O-lxwk^pk??BWD0B@EsFBFok?XA12jXmoINh*ROe3HComFe&Cvko_Qbb zxjxn3tG_5t3LolYCjQu)@C7M1!}w#7G1MpB6)`!!Q_bAIOq;U*;u%bU-ny0h z-A8y^^=z(cydX`~3XRR54&t8YWOzlfUS_J^Ap* zBBPcu&V_5LIhVfL+gDA^r0BcS-|9Qrr1tWiY8|tu%`^zWu{#5h+Yt-)${T;coY5x}=j3-y%^VuO9 zqJGCzd_Ff9Pc?JZ#Qi=)pHsX3$<9}6u|Hoo*!gPUew_SZ&DV_)zhavt*LT;$Gt z z_+^7W#ZOvZ&*x9vOAa0~XNdkKpO)4I=TB^Vc5%v|*guRvQT3eX(QdXb00jkZYrFi)*;C1P&v0q#o!0QXx)l>2E z?6rhf;iwN=cLO+;-CMCSH_%oPN72^@XGzX2Va>$1X6nn#;X9rGF@Jf>{yoUe`6tGc zOL_L#_|{O@>vBXgAPmrq=WKE)Uk*O2dyeEvS|)$?w)jEGvD&tsi6_OZi=k3(k` z-v#|79`_92Jxv@7coKh#wu3PpbJ45g$WSMjo1v4Kcon@X!nO1uCogYcJ@3?dZhgvb zJ%3Q+wR*Gj`D?!{e{vGMYVRoBx##O?`50x-6ZFTrIyq=M^T*3V ze6s%fl(~m~;mq)u3)vz>!>p%1PS{&SjIEP=d+1pHCdniDrB1;AvYK4(O?O6??S*z% z3`CaoAh$$=i93m*ZzK;I+GW48U|_`pLj$W7!`PBY#W2q1*+m(2Jv6>TvSu^wAX|Jn z^VJ2uzm9n;i^wjd{E(sFEjh}Y*hHO}Qs#2WX!5eDDHErrOdIn>?B-8bv-Ti+%U&4W zdbeO!eY^R{&iRpXt0e~|d&NI-YBgxAn`mnWaI9s_kI~i{@YNRDee9!*6T12FnAW>> zX1Ts;1|Kz621K|<-ma_R&&WyOSFHVtaqWAj(Oz_1$?CGvxyK>ca*gp3@TVHzH}YRK z@i*{aa^fQXJA9@1d}U@X@E|l$g)TCL&*%DlE?OMeym-+)b|2R%Jrh~ni7YlYy1+iJ z6IuMocxrN@pU9Sb<%y~lgRO$Gi#J@ZwAQt-Tdc*D zR?b2{JQMx!4D`c!=!fh>ZjTtzU@`iphSQWSq4bLv)$5 zF*MI|HoppEWB3hYWsRR%NuP1}OYlU;!ONrR3qI0#zHnf0Yd?B@1NQ#}`uzMXzT^hne;-P&-9FyeTln$*_VLU63|)#T1!})fr~{+Kx-|~+RLnE zDcn(q-r#N=`+D>!U^RY@O7@L$;;W6U)}Ej19&6vIg-14l3x}hk88$|~LcR~q5~W_w zy*c(~?_x~NS#ii_OstUuj7f1#;-6&e;#7=hqiUfCVxm*_PwI_UTby3 zy{)Y4dgiwKg+UXW*jZGus~p{Y1GWS4y`EpqbM2=!HywW(+`5rHf%Zs?nUhxbxEH75 z9sL-q_W-;0MXh#^tGXHDqc_QcP|UKPH8KDGep`E7-v;m&&+q3O7nk`Sw`XGS(xCkk zd)!5=$8FX&QH9)eYoyL6)B4y3Jv)3~`r6>V;&q*EdXHN(vviT4(`Oa?ILRotW;#4; z&D>`3T=Q9#^jMvS4NVv_6=@R-lXtViB1sV%w%}X*xep{e$dc(&lu?XXWp_J zev3|OslBylOy5uEL`wFw?#3>KecjCydrlKK-&y34;Jm!#pKdFJuh6}NW~y_$mTbEjOS`#H0uL&)DejBJdgYN)`0VQ{!U^%Hv@mRemPu> z=kfH39+i`Jw}MZtm!ILigstz>a2Jy3S=Pr{2-){X)Nc*z%V_G$$2fmjx{&%s7Ygs!uh;)K>G$*To1-U|d^n?De3T>Y zSNL?e)|>|MB|Imjd=^UGi1($EBwV_FpsD$e5_$@$$#<3x#X1H zu3WfL=q{ttVaA}#90EN>v9ZUf$6K_n{PK8me^L3a%2IFNJm%8hcQo=&1Mi;0H)r$h zS=hzSMECLY6f~xeXPVE~S}8q6eva>p*SODKY$m$z0@Z}y#+of(rEC8fYKqO|*#O_v z;qUzJi`Pt08@t#qXb=OI9`@9Ux92`{-o)tYwdnGXt*l$RSNc18 zR8wT!(%oFI9o=1j63?FFH|KFLdP<*g1c z-0joeG|s$Gn`dzDRbPo;!~RUws=^P`H!!$$_Y?l}UzB?5&t;6A%yojfUV<-2br)v_ z@>rfHFI@VVY`uM-B}U%nXywdfxl;cBNtyK)8MokB8Dm}h-$C@z!tz*F(RcjT+<7dk zdG7L9svh-oa~*zu%d?L0-ptnTWb)&)-0v&*W>$SC^ZlQ4-+OIvmczwxbMn;^uWatN z=68ut9lrm_c`w-IW+u=XwQt6g=I5kBJDWMPOKo;3zn6LFK<9Du@FnJ90KG!{c(wP? zp^FzSX{__>xs91{>SINd_Im#_$a${hxEB%c$+_IM@OJIVkqhSBk6y>`x@hZ9>!RH+ z%|EVpW7n?^ncc;l%I>+RsAN|Y^iljPVsS6N_FH>i-(=poI1<0RZvS(4mr#3k5#Jo^0f1i4evHJ`fFOOQK1I=sZno!XS#nG=vZUmKq(cZS2|ONR{2 z`S~1rVc#LeNr$5^Ksz1asL=WOddw@^-09D;o1YcTPy8g+Q1k2OmRUL<2M^XT|BlW( zK3)Kwhxja;&+_N^!j}f`?f9?4>HH^gXzw3$)A`dpcXS>c|A&-Q2rb2TrD;iP&eP!P zLU1L#fAJ9Lop-wDdfU$D-W+_o5@%19Pji0LS*m$%Zky(CU%{sw%L<5 z_Y7{m{piR<&BOC%KQOUt{&;Ww>9n=*(C*axC)oG+s9<56}T-Vgk5r`WbypB&uUKp(}Q$?ShrF}^DM?Rfib)f4`=CHC8n z$Ng_dxo}2*|syf@|#=CGVOqW^J)PQ0G&V4>UnL@`2vO{%|?+ zPB?94&2u>I1HbC$kJP(u{*OW9tGbna>>}nawN7U3J5S>~&(@0yu9Lm2g(d9q_ptYL zYh~Np%_)CYXO_KIev05a*;C{lULGaqrKx=qLvCJsjpvTnuBXn*ifLnwKGMI(AJg^x?*4k_ zxFqjoNA3frDXjT(R^Wev24o|6m3iv?90wuVb^a>hM)H4)Pp?;6TvgfMm+Cis)A4W? z{X%n&cj}?vD=e-b7hJSg^^o7*eA`|}SKxbS-|(KL|4aD(1AN~A%|uI8n|Wf_`}po+ zVEhB)cXQJ`Cp|Yu1oBoL{oe@vyRnP@V;Va?gw|>P+#FAv&0qEBCY%1@9h*CzrvJO> zyZgQV+^k5&(<)Eg&s+WHz})O!o~i3Z|2TNBH_q2}J|mn^qoMTy@do&xz9Wf_w@Wk_ z25;YSKD?JWS=r&_X?fS$ed*lqmGJIsmiPXm`ZVCZc~h2*0@uRlmI!-SYJlka65x?N zweMdBw@SZF@oAP%<%1gkyNBwtN%1Lul^~x+|2NI2(P8*h`mFP(JOxau3*UObZ(~GH zSz4`YY(z=UsMGJw_YE$jp1aWrd(aECuf+bluo9cjj(Z0e{x|+0?bmMu-Zu97(S1#) zRVJ~UZt!afCBaR{248o7w%R*&GJ4J7*l`zMovAN<@eiDtc&InC`e@(xBV6Uvsb6JH zuQ|krOYh2-_i5%wdV+orO5cTdy6+y`y7J?heYMipt;DCaj`C(X8fadZw&_KQR#bB^JdqwHGrV0u0IJ-GOH=KE%FTLDf|{BvBIe^NCK6dM5_YW>)d zgMYfQHEKK;gU9AIKL320^2mj*Fx#a9-{QTEg*;=Z$Zu8f}<=|@$_}YPQ zkKE^VT61TEvmGX$Wb$5*{qIBI<8`;U!I$tD?Ks-#z&~K*+0TGM{?h;qPf)wT$d?s^ zTjhg}de|)izd$uC>$lul69Olh@g)z4wk7gl{Twg)i2|t** zfAY`tA^GxYhnKRhC5+pB<780~F2y=OYH_psBk6f~JL;`}nmy!moGH88ocWQT&#s2g zWJ9k5<~X$rgrl(f6Ek?OSQ{7rld3-fjn`Njp9PIO8e8Y)ZIrifIi756*Yzk4?;;<<=|BigyWzCV~lw*sMle)e7Pck+)XUYZF zIL{54vt!WXiJM*j5#D!mUiG4tSNNtq;8mTOl`r@~zd-#RCLv zF&#Fiyz}8S-rC5SaCVrM3h&*{kF*NEhhmmu;4g}f>9wwWUzlI~#rNfg+nvvug4^oF zLAaLzx3gQuk-;ZR@xj+`+2 z`TT!C1pT&0!8ug&j9$-xJ*J|-zx2ZW56BY*Z zF2+*|&JFAq=drTzIuV~|47qEvjDcoG;|m)F-4tUl^v9F0KEa`khczLC@2W!T6AT{h znRB5Z3* z_r>}c^Y3%lEVwY|d=htywS6Hc-~mTI11s7dv}5SvC8uILu;E;wJsjAypXDS5uhVi^P4fqvf zk&nO6Ef9XmAtzVsKA)ezXFxZOzYYDfCWXn=`+;BapV_sR$`;0n&n@5U5p#sxHnANm zFY{y2RuZES@u)jfL_Iv>Vrn^aq;@p+$=%c*<+sOry6WN!&X*irP2ALZ%Uf=CwcoTx zO1x7O+lRiU`U0k&n9bo=jF@b$+S^xQ^PhpubuT&mSG)QGS2Zs(XNZ!^zlL0X z*>4qZshVqwv2yhh+&QDFj}VN-+Rc7a^~@BlfN z%=<3_Ups9GADu<9UB=IIE_6`RU7t8NefD?HCC!V>Iot>0^4lu5Uai z^M3Ey+@IE6-+p%H{h01g@2>AX%fBBb2QfzT@vr_pNWAd>%gg9kq(BZ7k0Dl4d^zK6i7D zr0Xxc5BX;uO;-Qar@d)o$#KMf;O=jJ&jl$MIS;PunZUE%Z~AaeJI!17qrm;SZ#126 z=>7z6-46rzcOS6#XL{@K^ZBu4Gey_T{WsCCxz>1;C&jtbrej@rs3sajWkOz-9bh{(iZd_c{Xb?>K&e zKduShy3YshM_#eGG*8#OH_g)}C-^+Q zb4Dib&Y$7)bV=6zRdqg3&#xQG)5%(&r(0?>d32!0=jmik=KWgTho=XQ&%EDqywB6M zS@)CExnI{^-!eV({ek1Se^PgS^0>_VwYq-_?H`+YzvWo&&t^T#x}Q9T`|qLsV=~_# zn8yA0)Bd!~`?b11kM^rG@3&NQ|7_aNx}PMUu6;i3S7p9GP|5ud(tc&;{aW2$K>J5$ z-fuaY`xnuE*8SvB+`p9ekIH<1;7IOYN&81;-mlgDt7!j-%=;}zaQ|bppLIVumHSI* ze`@CY15>zv4ed|KykD#P*U|pu%=;~qxql<=XWdUu;{GkPKPmJ5feP+_mi8+$@7L=7 zZM0vWdB3Hc`=6tId*7`Ion@>EtI}&i75-eeChWkU+jdv}HQ`tF$i+#+uLDEk&u zvy3(2R(w+#e0dh<-4ipQwKh5-&6kOz(|kGqP@gYL4$b7tRpWiWoIgJE{?2heU#=QA zlrIOyf)Cc(wfYO?!%WYhh)CbzTNO;a!ls^THS{)2dEL10sm<3!EJVkp}qUvxdo}vleNqYMpY>t`>VrcjFp_l8v z^z=sgz=-+T%^3r3|1gJV&HSFhGo2?WU98dO!|7RT1?LI#tmzW}S$dB^o8|V|iJ8wd zrf%d>9q@LsCz;QfjNZ*y=1-V#s%T65iM4^buB5LjamxOGJ0f)|`th2anKhU(Ojx$>315e%P38z+w*7=($c&v_^OP3Zc5xX-i zettg@qcQwK2QcWZ9UNTbg9A&|&uq zHDdzf%kuFB$5(NF_?Qlso>dE5```W<9`Q@;5#Uj?-(*~ib?x?>#lEayZCf9POJ7C} z7%R2D&%=rpBJ98AZ`BnP%;uq1Mu`L=OEWe*qJ!Nb|$}vgRU$HjUsF6#zE!YdJ zjs{)6t(l`8ex66Soui(Yn^J3cXP*6>>Iql2%Ep++i^ie-P7u~hRd4yPzVB49U!-#a z_$^rV`x}bmd)xO3ru@iRuzw%eyQv#48Q^T%4tCbYRIEqmt7+JKhJn2zhpj6JyUsN5 z$ft35);XQ6`+eAR;dvdfvVIy^gLroM?d@+$+1#Vxo?Ms%!{NC`vLFYppPxNU?7PLK zZ0#C{Z0!kPj9VBTzqVRiyW`hpYlm+5V;ZN<7vy-g5&LJXN(ElcuRIVG|?Dd%q%a$EKFx0?b@jIN^R?1$w%3* zUPqSbcOSMtXR}hioMcER?>>tEEgG!FX8CR$xc00c!1*;gz_t7t+P}xL=pl;DuqMm* z)`-1du}Z@8L}X*d<>7WtkMV`^<0YXql2wZ`P-9thBRcsl-}PnDvS3$EEcj2=45;}< znnv2N-^P&bqKhy;nEYSui~T)&-Eris*7&g;@?B)-OU$7E!7~)sUQBHIC}Pw{6RSQ3 zeT?&6ZY8#zdisKO+iqkC`Cg8WPS;rk{C>#LP-)i+%^Pxe2|9^6x7gB9i=AiDP&fLF zXh?JNzV9|IayrPWQh&|g2@S~>Djmep&^MtWedE?x^R8WE!)QqBm%eRA&yg*(@2N=g zAzN#IKCwJ|i1qjmzJ0+^^2?@07qN$W6W%Xl{>$h~>uC#k{}i(NMsiSYCI7_b-%&%X zPPunCQ(NS_#0n@Tz8U&#z>eBUY{rMNKPJJ4=^y>Z=x3~hij_YNJUnW6X|lmnteCwF zyrexvdnso}q4P8rPcU&WQyBlZq4h+dCQ1YG9io>7%$HkZCQ?_?uor zjfr21biJFlx=UPsoI9&mv4%RUcXHQEo|6NRswtxUxH@XU82ofal0p1bzdpFooPokz zRl&!KdEX8FxH(mBgYdYSd#cTd9%z1d;74$>VXm&NY%sNYs84byzHIrIl~b3(;Yz=@ zxm%|rz}af$MG%L3XUt8;=0?o?i$2=_y=hT1bU>W1*=P6T-^ryfst!#QKI=z5l~xrY zKZ=uOUZh^RZobTDfTzxZx6X#g&Vtv@BoE{a>ZAC1AdRzzS!1j|7+hnX;+v|lH3k}X zYfQrWsapOcy(adO_Z^fSCD1-G{B_Q6o-HG$jR3aJgMh95ku+?LS#oVS*xuyav%Gm} z0h#O8=9R$K9fmh#?hw2^l!h%^<_2L)&h~A{b+!5B?Nh_S8#2!q0_4O>_XYPM5Wau%48ypx_dW1DgW)w7E16 zkMmiROU#-UjNi!AbykGJ8K|cmg1^zg*qR4_E)VfC-b>g##M(|D&XMLJUN{0cf6X^l zdEnF@FU8MaPUEp1yd4ZbpF}@{PefSrBgmm50te#gjeC z7s3v68?sn-7!P?p4!PDb+u2r#t3p1H7cI!P61Xp$OH}u5?A~phr6>8^N}agt>%5t{ z_Vf1vqu$YeB+TBW`kkVu+VvI>oVnFVzI-fKemS*Qwf~-t|DqG0B=d~V4m-qn_TG`! z`W|opchp|X4Nd;Jd#uNa@%~wsJEr(PobF?Ny;*)7txfSNeT&bBy!zdhDWA`RqyB`h zgf0+2#-2lI_rCx9f{p1#|9lzzp)(jbv`6ct55-|hM!EJo&@Ej1bGg=g5%4*h_M53E zARbKZku2T2HLgMU%c?ohEWHw$A-%H4;;&_O8h^EU@K;8DzU&>s(WEY(77v^i^+j`sEtf9XP>z?QhqS#DF*%&|J`9~6HV-T3_P@=S88mj zcso0`e$FbA4s;k}>#(&4g7J3j(I<&s3|)LYJ&*nSCe3y@?x41T+MbDBTDB#<8@;V* zkkN=z2yFUR``omg>qu8*ha0~=e;KK`3N z?nQ^a4;{Ac?sOk*=&)_fU$AZV;HkOTOkL>)=JXZjGm3sCUJ?Gj!1$S`ymZ$EOx?Vb zYuw7|?5FQW+IRSGTjt~6(UZgX`vUm(*t4z&zu7o^lwM2)m&%e<~(5S2EKee`4M15F64w!xH0tD;_vl(n7bb6uZQ{TvGeEb0zJ&( ze_s?~3+IC5iA3FV0i(&YaHw&4J=y!)knI9az)ox-qF~i;3Yv{ zYFq6l$W=G+fd4twr!D;*@#JB!wN}IrXwHh5E6vFde>r&XBM9uk)v(8XOm^||!hVdaan;dWcz9AM0S$d?YQ zhu6%_vL{(xJQz>Clf9VJ$2}YS8iucpz#p~;lP)uo^8*%G{Ix2s5B#}#3!Wb^g?IGK zogdJ8cqZ*oHyNK}dhhV=Uk*;X*5dhk@SF=?FPd&g@$|<*gZIF5syptn{um%PD zyH?L#@%_EjaVU{L*vjs9^v-B|_&V}g3w!f)>h2={94$7U^G}jPH|w5!GCj~hiOvKL zwz)RE%@*5c_c57m>giW=HU{NuVK4Uv zy5+M)-l@d9tV(r4vVY}2gs+XU=6HS0D8r)#%gLD* zrYg}xK<{vJ(yZ0+Sc0{xWJ;Etyg!^bDs7vs>`jAhN=~+a4fz=JmbX8=W$AqZ^=C>U(|X zcu{M%%;UW;538y2w%2sV zlcHPc0QqQrriCxgI_Bm!gVyaFzoT=ze3~1gFXw}6yoC!}btqi57Os)3VXuQz@%R6$ z^!${AZ-n%`?Pq^KdhWEajg+2$M2%?C^G{9;U&FHZceiOQ*jJ>Z%dXNcxt&8lUuj_@ z&dtz_qhqZ*MrNa*$9|9gkA5y%kJ6@iFnf>p(SzvoA8nt|Q|>;`4CxbjCtYY&w>bJIaFlD$UU4jTyHW82v+OyA(pd{Rr|>ijZTz7|h5(2nLfImX``DV}&vq}0g72KJ6EA$#>dGk(bj@;6PpL)L#Qdt|!{+6I2{ z-_Ys^WZg5izt$YKGbihUwjJ@%ffIaQdi#XGJ)L~b!qms!GKkwCo!pQ`CoPPh1Dyo*9Y-q%q4Nw`f7r|B)B&$?;}6h)bQ(7o_dGDq*vD2fw(NOv z_M7Z?yNN9-)Ry)^*MA;cT6qfp;kLA8mOgHPKEh%p-FZ63zsDI6ii0#d7&>ddxX3DO zg4uDA^KZX06&JZW5ErQ&--C^ZRF1FoMA;Nvj_+U*_3+URjSrgtE*`3oSjor1gO?{( zvSq`S8L_h_Rx)3l?|BCwD>)t*!en{s+{u-G%~o{iR6Ol^zKL7gc1AocK3e3NiGN=~ zt(ty(I`W|n+?O6lGGBbe35u^y+leJh{y@F(yzp6nbP&F)fe%_RzP$|iR;J-gu-_4E z?W@wT_2z>uIy7GMu@S&_7O=JEfz8Fge3ADOF8<~7K8z!cf4K)ZhdYx{v5SkT9j`O` zbcWsa7ulR2Voa`L?o|U!=Zw95YyPvvnk%oIAiTO<9?b=H0cLjC8e#7K$GG23ZQJ_y>mv za>cbg4_^9C^64ZjuH`=K1FpH_Na2$QF4jo71m4R_kD^~Kw_~IFl&d+{;HcE)OEjcilk=L?n|HZR4C&rh^ z7&ZPK$nkpveJSkR| z+f_&VXO7QF)iKmrkG`*{nddqevhA<)zVOTOlUMg}Zk{X z(lw*dXoT~IG*7p2J%fIHnPdFv-$XW=_@&NK=#!i)qIq=n9aq9@&FJhmrRvj_?bUf1 z!b=71=zYPwqPWcD`bZ{-cb_NjcoNT5pYFfFV-4pcsxGTyRaEEiw%_>iR;rsqOvl!n zh|P{hCYjpT$|)QV+&YIWTJ1e8`4fEpJH>#!<>xuN^Jgy-EW{S5{eA3<1XC5h`J2G8 z>SyWxUVFcpJ%t_y8=8NMJ6a_U4DXAR|8LZZGjqi zH@n;o8;={MUBz|TtOR!>JBaQ(z?UILFx|Q2!(C<7~ff% zBdb5SShfxuSI~>xQ{Gh@x=pil5TXf$US}Xj0=0W+q>oq3rYlpG*ZJ3TcWj}qewuNffzK#2F`|;qOv7d3O zZ5P8}_ly^`Wce+V@JcTG{2{g<#oVc_>^-B~cWq$L*rRwR@^a>~*LxjV)!)^fvWpJn z+5f!=oW@3o52K%XRv(H*YGcnYKYBOofobon-AQM6iPE0-ev^4m|7rg~(%K5zr_D39 zk7ti=-)O$)U4JiFZsMsL(J6ApQ|+>_#Mz4qmLNXY^Gx@I&wl#5XPSxI*tKcAw{O(C z3*Yb#B7WnWwjVFcmgV>7ugN#_jz52H&p9+t^O+joF7b<+p>~Pr3FQ2I`a?z+-E-@Q zj1D`4eB<-*jq8l}w#e#@ob$eMBxl~QwD9-k*)ul`fj^TEzieUe1a{T-dgVjp=ifSt zoUn+=pNgK2OpaiSxc%t;QEYHJ(+``+x;dQZQA`fH;#5jmJC?Zo?vg7PttRJb207kK ziLc(v-griN>w@`_($cY`b(UiM?mv0^XUCwK(y5vE*YMp5YjHMzbo^Vw%h;p3>)XILWhrNu*mkm?>HFGP)a0hM z&}XgfGX`#vzopT`N=y6yfit)16FT@-jGE4>hgQ0;^0dZRD^HW0sKn=@9Pwi~Q+sel zBsn;dIJDCEzkA)&j_xb=-hO=T6^8En&Y%_?=fmxV=2#17mVG+@g+lE^Kja(9r|vLb zrQPSz71#1kkXJizzjD8^6HI5Vf`(nLne#ol`fsE7?)l?=9{$7eIs-_201wb z=^tJwtr|0sji+!7c*0Z_j zyQhv|E)OqYE`P~zzVJ^esifg{iepmPhFYVfUTo2fyeon zMbmqZ^J)9Vrx)&BiwK0yppOGzc!J3 z{~K!O&9wbm@L&(Sx9Uy5u1YR@<{ZAOBHlI17@W@{XwMX%CU;%2=-ce?l>=|iBt|~z zELUe^R$t0VJb@bg9k1u_?-=@Xd3(9~Yw5}CFG_z2@}Cke|Ea_1o~91N_1M5R!^1&X zs(~d2Eb*7qHD$&;O%0TnvTDj$AC9v_y(JpRldtE_XYPCwIcITElMgm>vu??PZ7#4q zPksL87pbX_O?>%PbCcI&f4mWU`8}L#{C(`@Zz7Lwc@R7K(nvD#U}W7JAvW?&*u>{* zFN=+QGd6PPr&JDDF*sJeciG500yf1bH9l;LdbSDwVTZ9PPBM1V=ds!L@cbse>BPSL z%J?H!_gnjNw0Md+Z#jWHYoF%ZncLv`8?kvR2k4J{r}`4IVRn-niefY;OCD;7vCFJ&gv#!vlVny!3{xuUn zkJe#7uBOb`XtXZmve8^+`@%<^(O1xRQp-ER^{dc$nsaU2&~`@K1K>M5o+jI`do6gB zyubd~@E8DhhJCcBdUeH=Gqz*fJS;Y)c>BclnYCCPUHn@pZb-TC_ZY+?Rhy&)c7D zH@Zjz_mcePj4{p%VIB@AZxvkt`ES$GJ^hVv8h(mmv~t8Q_s6W{_U4fggI~dQd<919<{Tlyms+CYTfvG z$X`FkTfde+@#4B;hU%b3rX3fMXVPf4ldTJvMdKMU?b1a@8qcn=UKBD`$#8G1=!QRgmbGFqqk;3ttEGeMIb(6LUfrHI zI|#qdB)^?KiRN3fG^M-eh!;G-c*RTEx^gU3jt0&?=ew}j7>#43arkDuj8C}$8edRu z$H@f{?hXNW!{q_YBIbKe74hEMzZ5_}k6Ij5W#xN_erm`2y1JvEcEwRz`l+Dp8=)V? z=||c7WaoS7naQi-`5-z>AF}QZ*8+6Bq6U%oE3%zM=E+lfBE-GE;!C=Y`9Jnhr|{``02w4t)bgU5rsXZWXp z`084VkG?EDM6}Z@KbP_!e{koJ`060fp2wJ7|Dt_I>)g*7>uhNIr~rTeOSo)|SooT$ z>8UwzvRv>bKR~|G3FI4{>^(Yg$Jzz84{x~+8M)oe6>Up4ZkT=s=gn0F^giD=CmPA6 z_x+ynNT%Qq%;+=dqkD*VOh2qA>Zee>@5i?7YYLEqxB)5+~N61<<6%Z{M3~Z{CVXLaw9uu@RHsv4DR-`SE`{FD>ElW^-G!;i|QA?H>4^{gR12QC;AmEKO>^l|z=E z@7lLKqCJ*$F2zGgr!wu>eN6-HNS~a-c3Ki;?qQW-I4CPz|zG9;Y&=GI~1?ZC#~4^8Sj>ja%6@QFjy8Zw#s@93dyNdZ;;(4=HF~@t5k&5lgsClw3 zJm>y!3s)aGgTui!5}dvP4;VgWZxC!hh|?XsGZLKs&bBR|V6g2loVJ0VHc#=?-n!#G zk0Ud@GVw!@Mt|@_XmkX5go0mV>_%1yj^NrP9&>9`_I}jasQ&k8Urr1h?aPUwG6H+Y z`Q)qE*waq-2%YGn>PK=RDgB0ZEhrZ@<0I(+mW{yH0SvOmccP~_*z^n?HZ32}U3%f8 zfit}Nfr)c(uC-@Gt5);C#K_IYzWnFQO}^FuvC^`$UQZi+$9U`H8>|JV{d^a!gV0ZX zYU5wE@5rY0DZUvAjuRHgZcTLK5dTY;4#Tm-?^=i7W4`x^d@{IZEHM&)JmizuXx=_` z6gFGrZYS-#S}%gXki5qmkB)DDZj5(wANJ)(qTbBsO1x#aQHE4x0c+mzj^4BUDo%%Y{_mTndfN-p&-$>*nDH4Oc)FI&FYzTBIa`2jqR7Go{b#Jh zVe(3A1vu}KZ-964UbB^1*|>7FYx2xTNNc-o zAMt~wHTHMI@y1AKZQqeTt-X5W@U}m#F|VRG3@VyOQZ?tau4X^aYonr#k=m%Pb?_Bpqe5@XwozU5WF8xpg-bT7PHcg)(^L{~CLYgK zXYM=;k94{)yHqE5Q=Pf&HO$ovyVlac_0T}Bb*v_^-z!x1Ot*0L{Xb@(@hn_bIo2qr zpIP1Spz^0U@72hVxTTG384|N=-FoPvEv2u8*98+FnRm=uk$z_+I_VP@u2yR!$cD>} zIotj*aD*=(UN|D0HuBkMK0%*_+GrB@T%J1nZHSGAJ$bAuw~eM77<18WIsS^Eyed?m zFuFqldalN3^fj9==l0>fcMgs9d`J41*4PsRYwVVAn{A_or`hU{4j$<;jn$dH{}$@( zwU77xku8#^v@bi|J$3Vvj}?`ebCMn^;+#!#jl4UNA~CXc8Y`F@}^>;6{iuyhM1~XijA{orQi1?t4GGjebw496I!XV_mW|@ zPHZhfTc^jk3XR`-2|CkUw`Q%~K|9-a-U-^-3bncX+P3ZXjBeg~1AZ>&&&lRdk8iaX z83m8(+}O-BV{QHS0)792-_Wzgzk@^clRPw&etLypeE+6~(s4fRw6O25we@`(YqjUM zb(E6fb(H?a_SZa&ylg9A&mX%sbI3mRVx`0Q=H&_ZJDd$gf7IFGess$opw`Plv$tco20 z@BUdehj9iga;uU$5!KvN{NNkU_;nbnus7a3?ZYOnaWOWQ<_*|$sEKe>(P$ILTc;8*_o+c9zhg7Fbp37y zrkhN^lXfL+zs6S%Uizpf@om~QHAXgM##3I;{XXm|4xSG4ej1*ed440`bP`XQB%bnF z;wgKGr%e2-cOZ(bEwRx%P-fyO%R=KRdpSQOdu?M1Eiz4lC| zoi&#)YHY0a&q$0?cZb>|53*0_d;T($JMHoyuF)J(_s2_Kn2~3Z(3r`a5R4JV-Gg0J zxQ$C+w>a(N{2_v z;FX=Xt169y*na;!qu~0uERbs;8S24b(ZIP@$)hh%oom%k%^%^S6hFC%9Y#KZG%;C@+HQ71xULM>Q_P*dz-vg7<<4PRx@p6(KSM!0vZK`V&_WcW!hw@u$jT)hi zxlmntBRDm>M^>C|Se@xY_D^B|rIV*HRDQ zv6Zz;_u?nqa+G)YoXEJPySZLFy1V|aG2Qjg@p~-4UxJ=X{^++`i9e+ZdM;7E2xs@~ zP>q?<@%mL`;`OYjOXu>tow&*tbkqnJ5~Ia?BcBJ3y1|@)=5p2AU_@Z)Zo1xtfgk0tP7nX>%#XLo8+PRD%aS0j~F_( zBQ)QEIzln}iaVU<=xZ~5^`Up?>#KHRW?ui-ke(S!`eOPFmIXSs$=6g zuh{cW^%!^9`(>kLzb@123#%t**Fciu4)ZvFDhSMf~?$$tW=#G$*hL*v<;v? zi#}8*CyIa1(8sY6@(zdA6#0)ydE%?^iKKMQ9C0W+zi%8}v3m!!^FkCsn8oSYq>KQE6YCr`j0PhC05 zR<+&CI%UrNj!`F1*YM*K@T4^i`d&C6IX!>CyP&DoJNXIrBFc$B0em-76GyR$SM%=b zeVb0JiDaWPQAsTpUx&j&^THj@9sfKk{WV-`q?a zgGH0~&gH)LNUx)xMjgmM*ov zWXqC9*_af|dDmSF+Ryv9j9AVkxBF|a?1GKh|JL7j*|nmN)ziIo508&;f8mkyjE-}c z=S}!}%hjj-SM1Q$-I2KREBb#m7lCWAtwE@lkRtdd;}__SIiI?|k8B4fyy(OX9ROv2m+g z!ABkM_EGO=H#pJS5L=!ed+l<6?A46D#*6LR$yh5HYxLw*8>`5J)B7zq`|nqSk6iEQ zy<&Jhj8?>lh0@BSj8ihKJIschhgL)*X1zgg7Ol(HKFjm=R>4cM514%bxOMAZu6&96 zT>n|M;e&BlO<8k=jm+dxKB&5PvX$QuSYKZaUtd3K`)Li!1!?i|T@~p*-HWuIUiEbM z`@E-H-)HRPw5_oczqY-JScAokZ^n<)w()k!oexaNh;eeZ@x`{!zL2#q_HmyMT%WbO ziK*P}`FY2e_-?GZK69)#W~&SAE2%?TDrAfqQ*kUj=Q?fVSsTcPtKV$FK7F zA@VM7ooKM?Dj#=|nW^7~x7}~gJwMHVu5T_Z&rP>(O>=pGW`4^9^P3}neT>C{7c##o znKxt2g2od9HbdIg-0D0~#X@_`ZK3{wJ>X4z)0#E6LI1!vdB>MAeoSvklJ|u!oCTd^Tt0$mH9@MOm-3R?W0)mGmlqts1_j!wbMJp6?6QEBtzxTX`qQ z^V$44!OkHEf9G7gN9@NQCid2?OW(8O5Df^{?D&9~YU<)&Eo9#{61&?|6T|Ip+|P}Z zzq38^MmM*G?B$RH!|k<4f|vFjc;S91Uh>6b#28Pj<#A`raz2hEJnqgrXyUzuJ?|jd z=JWYT&pWsT_=oa#R&C{gJQD9o9{rZ@;ucnCH|nr8MC3b=%}Dre;ZO2t$sL!a&r+wB zNKRQ~Y)5IEuXK-hg~Dk4vVUh9`we{?`SJfDjs4|tUq5)|aIdW9Nc8B@-pQ;-3u`9F zx5w~PYyUpL?-;nKTArY$ZM;5){b)|4XxU=!%id6gy+*dLDEnvg&9TL``Yyq~+G9T> zJgXK%-|?HK>bjtQYOvjo_6}D(Z>k^cj>GrkE$ibf^@96RPS}YhAI|6}HnM)=S$Jy+ z;!SuO7v8RFlmy`&wjJz_VR$&`c5Hrq_Pn`v1k*Lp#X;4WC}5qgv~=HX^Nihoygd-_ z@_!?*`)z37=;WUq8Totp-rPPfXuEt0_;b+PI4-6*|(C@hBN zQcEMjob-cd0}gEHkK$cda|EN7FVxqOP!lN$BxOx=sEsVJWcH% zE;TkRXM-P@v35bP?1C}B7DhSk>r7Vd>;7JQwn+}YA$(cbp6%^N3){1G!k2}_Sij61 zNKUXWhR0YBvrn63=kN&Tu*$B7?tfTZlz4%??pCd>vU|AP661FYtiQYQ*_n3&HIQ1V zfu!>VwBA0!zP-@B(|7F{JF;|7U~_&kts~Oca>Z0VHGwlS=53(6RsW3Rop>Kc8QzLfuvU0-CbrT5Tv z$ikIvqv#3Hej{|!lj9uwnHIL@3U8}$r0+U`qXyim)`)1*BNsz;2%34G{fw9}pN`Qr z938j4C%!#V?9Dc`Nt|9vUge7abND&j$4^jR$#0?fzt}&nSupTp-S#$jQvdQ z1p90M?xpVv_SZf>q`#Mle?Ui$Gl!}nxPzELgIDXXY{7<^tER}m+rC?ggHYc(i_v^L z&c{=&?0z}U(*2tKwgX#>zCFawf9y%Wu5bf961DfVuJpQGIz6}b|B`&?YW=OB8b7cQ z`P#<4d_HT*yj=07`ZiO>4E3o=Z~pe7zJBzbL;v>rQ4sg-+8^bp=W=BL_p-UTd5?{v zK2}zp=#ki?&vxzSu}6>N9raD^=7Xv`^$%|Dv+Q2AR-T$Rk+BK%glv1)LW}!F;9fP; z-}+>Py3$3+{$hNlqwt+_hQzI7l5gK0S!d!OiLX+uPcyh=e(NiHBg-OOlNi75dieWG z(3zg^fG%Dk7PbPKJJQ~3rS_ls<`T{!uG@{1EhQo}ZZ2X0dllPf@y_wS1E9 z_w%%)c zd8)SX&GcP$jDPN2`aC|JJ11M`q^{*W*ST}Q4#ZvNilO|d=z#S&3NFK9DDSaxL2a*G zYI1`*iJ|OgUOS1Q>?8&uSjVN47|OQVMF#h=YUJ?rNb-8d?dI%vjJ^3DUzR#tchY}f zsUJtydYGkyDa2>%8rn)gTf$p6-^@sg?o zJ?Uzj7Y ztEe^7z!`UqoN;$ObDpqs-e&7b-ydjmB|h?KWQw6r#X~5TdpGB}scw3LHqrlQUdpw9 zcA$U%D(-i19;oQ!ZT2qR_|FvM;A&6ru;&tPr1tb&b8cV7-UxeK#i##eJoLdDEm*~0 zto__Sf<9GWcWT!@p5F?6{Dd*BCq7R6tM{s@9Wn!)1aZ6MN5mLQb`gtQ8Hw#}Mz3=8 z8{t}eShE+Vja;#bKcc^mVdD4N|NGKK+n_bYCW^+Q#MI@Yv8}+q1lT=j_U&8!GX!3{ z_)KKi8OX4C$+tc^H|fq4;QYS@HT+4Jdk_B`^qcq@&AD*9o7j@d9fS9(u6IVA?X0r^ zzsMN0J`dp&&H`*lzP36(`LWL@im&V^ZYPRA#^usQBa=^2>|8%*@yjD4M`OtC&~Gvd<%t1?vDn0ymSBgDuCCy_8);`h--wdag*`BBjl?`Xc9 zUER$WHY$_nkVLj>&PsYO^=<0pIxGRs5^^0%$aRo^uaOP*9*wrg-YXG1Z*cp|+n zPJ@pOI<)Sqpdmz(1$66}??JxmI=aJ#X8!^ew-#?k(k> zauURI+3{Ub+LG_#F=(&Z%8X!q*DQPHX78hy8sBG9vSJ7EIK;(?w{yvi?<|NUAHB3F z`61w_VJsTsz$tAT+lpeVD4Q;yE*w4u4H zMPDP&Ze5gf$V9^}&|V!hTmlU@K#Ox|S2QyN`dteBuCVcOeDk#Q%mL1dt>azA(v=p+ zn3L$<*WcZ~asRtlZFK#`;V0qjXmHktO+fNCB?oLwoWt35;7scZM_nlaSniBm9C{)8M$=+?y+G$rbt&lU)b5e+nNP$m@n*0yobCtGOY0%{=ARW z7q(#pa2{D8oWj3@GxpFsgY)VU!1)s26oSu&0Gz#Fd1r9W8v&d>d{dQY-aC-x3HFeZ ze~owh@Woh9llT;4tf$%Mw#ICHL-ro$h)~#u_j`5(@aW`onBQ{+aTtob5S%;Pt(*qV zo%wv9HVz{j*1wc`>ynnfYEqh4ox$bs{k ztyd+$Z&0stIb71M%08W$m*B=Kc)mmZ->TOJQ@U)NSYorD3wGARpf0QTD+T{6`OoGl z7yQ)H5&Vu;f@`^azzN_?Wd*lC|^nY z;as}M0CZMm_1G|YU5;h!PRn=b9Rtu(EJu!I`2~?=KWnesA9Qe5y>g+N!0B9Fvpy=u zT(-q-XsJ}PJzXMhMgIjkOb8e>n=94+T zIg)XP$6}Go88|1a2sqTY^7?{(?{9jtGx5E_N%87To55ScAF)s!Rhhi@8u!s3XL?nc z`&;p~hb`frO&1#*;v&;^O$LYrXpH zbzw;;%mKa1>VP%SvE-L@=fChxN7(vm^*|?6JAt!BcHX&uK42$w5R|Kq4)%=kZ77nj zv6@J-p_=#EyJo{WX9TeRpg>p~E-MVyoe`h^Qn1#B!&*kaVLm?-pU?PGzK|SeR{ezU zn~`0z%OnDJo2uD{G*p*cfI%$Hst+Fz7{t)OTcR_!P*@7DpIvM@Po93 z_=q2|{j}OxO8LyBk2f=KEwmT2wqE&vdzU4Nmz@NbRW7yd@n=gTU4j@`ld!uQ&5+bGy{iCO={~>F&l`ZnIxAFY%?O*@(A^sZt+#!D_|9Sy(_Hz~|@|^_rr1n6J zsj;)epCj?FU*p=(<6ob}JL>yj{p%OowmY)ch~5y`vvuK5Tii45&>R8j>LZQ)EOd=K z-ohWx!hP5pw}D){gFRF0P}hDa?&WJN@A^4-@Nw^-skNRss6ygPr>i}Pc^xzP&(F?z;dgP!1Gq=Uy2}$FiGu1F`Cj7}2eTx^8gC4eSnYlw>*7}Lg zdr<4vKiR$%n-t`Itj9KLRu{*FuccQ7$rOKpGHXNL6GvL<}p3icP&^VW>Xvp*~# zR<6OplEoV)ws(ymD>tyKZ9)4BfxK6X=k@I6hFc#*cR_fE@`isdrugC##v&TJA_on{ z$nUEe#d?KZtAsQ56pJhUxt18VXpS@dCb+q^b;K*7Tb*m^zHOI%FGp-&#D3eIm3JWA z$kwC3-a9VItf@jidDPN{U=(~c#5-sYYUt^t_65CbzviDCcy8N*T3vH)pxFc3a|2s~ zdvEsmsl7LQ{IEUXFTk64Dbz*H4pI zKm1bri$8v;@tVEH2500vZ5q6?FMS1_Za5kHPI3F64!$h?F&wvPT}Hh7r%^r+|7BDd z4-ap%mS4p2aXy=MiccO%^U2OEoAvxi#VIC-JcLIc2To_3asN~Li_MQ4%IAJwXBB>! z7&50EDJDZcQBprulL_NC~|OYtqu$5*481f}?DQnd*}=;{Re zH8mg$)ZxSAwD}&~INM;Wa;1PHc3kws?dUplh}TyPv#|xC>$|)N_IszgccY&v`7ry_ z>pb2gzSz_aHt(bFyZ2kOY9l0b(5>iA{34m<{01>>q2<<|uWQAl6)L;GZrf_kqTMil zc*kpr%|&R}*mC3|thVsM{gng`uAezD(=mH!w!Y_~pf6vfY=B701p^ z)}WIg?4G>N!kE1#2J1HMqkjXdt+6FLm8b=Z3-#bk)pqVtT`C^`!@Yab|_v(_;)tv zFn%}v7s@YMLwGk^)}`PbA;0{Sg|~`0&J1`%_~mtcm;GPuE@*vRX4`FM|JV=i9UhE~ zfe&?0HpPB$l^xR|zDxC0fK0o{_T^zi3-)F9oU#KV(`whQF}YNxU)of^k6=%mN#25o z%#j{aLaeA;_jA=;d#{^c_$$~~ns>{(ig?HLLwo9{P(9~Z+cr9FM%%Jwew$eD^NYgv zV&MY+H?mIc8d*Z(~*7hDd@AxoN`c_!~9n8Dl4V!cGzM1op!TV7Quk4T+@LD+( zgg5bti;RCO_KAy(U+uBa#$TEpY1>!>&dWy=FD@LhZsy`;hoc7Drw5${`%Lq4dDo%5 zWBR5&;cCToA@M)Z)JW;$eWDL^L*;DO*tH-y51lDG&{>{79cUiJt6}qC-cRw~!P5Q@x20S$)kdx8B6n>4>wMEgu3;~^h6gyuqn~p; z1b+=<>M!*UyqjFZ{(p;;uNd7m=QwZQF>eemtbKd#7itglp03^ZnY*JcE&Erf9?F5i zh2{(l}v{G2bTi}G?KTTTby?ik~6gD?;+CrUaj$80!IDs0JonX?fqo}?|Ma* zKR4IN_0+Xu6x@CE*3>nAs?;_9+SK*Yf$M33>*;~(y1?~Gf$LKO*RuoH?+IMLKX5%S zaD8^*dVb*ggMsS>f$NI`*OvyauMAvY6}bLb;Ce~m`kKJ?b%E;}1J}0%u0NZ){>g;f z0@t4lT(1gT-xavNJ8-=raJ?~b{jY)RuLiEW0@vRPTt5=H{%+vn?# z`lo^Gp9ijg6}WyraQ%mQj%I!`VNc-t#lZDTf$P@-*KY)_{~EY1IwRfYn80;O;Cg)E zx*~8rHE?}&;Cfo%dV1ixE^vKP;QExn_3Xg)dji+*4_wa+T%R4do*%gWVBmT|;QFG# z^`(L9D+AY81+G69xLy*tz9w+}f5^Ms+>j9=myzsxayrDObB$M}to@mn3^cR0rH zc8ovh7+>cYf6Oty-Z8$(G5)k;e2ZiJCCB({j`24gswM%a@5&8-Kdr=8@J5*o4st=e3h0r zWbU#@Uf8jRR}`qjRK0pwy{S%e?R2?29M|D7b=KqOJpTCO=i|TmyRh9tm#CE7$z3M* zlg*L+5VBuFwqH>9vdONu$Gs9X%cIFY|0jto74||I2@}M;$m)u^gRvjz9vj8U2P3l-;E~f`_W$Cz>p%Q;X!jRm{>Aba$DizT8@Xs(c^Y1bX)90IhjY&`Bcsh2ngb@R18amU zHP#qOidWtGbd6IJk6~<>87Q-?1Ku%>%T#Fts!w|Vef8YSZ_1e@tC~Mw#pPE%@`#G- z#jUzZy{uw8CZ}iT7ngPJ+SA>C@bFRN4nK0raVN~20iIn|SnDkwNsc5!h6f74GYDapvlIA!r- z(ogy9%1Up2j~=~y_v<%k(A+?1*s#Tm7xY)L3Hoh2RdnywX8=E)mXMN>Q&3V~*`s&A zLBkFkTRrLMsmISabxv)ApN%-P?fi=_U2*kg|8}v^f-}xO@4{s&qfxu`U-8ZA5jUM2r5*P*$qv z*Meof^H}5(y(*vYG*S7gv`Srh&eDsPU9Q|Yg=G~zR91Y-f3>auRGsp3Idh>pi#@nRtx(sho7Ela9`&GlM6Fkw*|S$D=igIF zuJ~@W^YNJw07PhEab?l_ll;uN@6Fo%py(tyFjMF}8=;nkUsJ^^AH!y~_5y zt3FhpsvX4tF7=Drrw*uCSE4J;b&M<9Rp2Uhb#Zlf^>+1l4RMWhjdk@SA@Jx<=iqRw;GN^ck~i>izTP?>(b!=_Qw6eZ#GH-t)k^$2UB+<>jsKeDKK^Uw!|R z{k}0OA)}zYN55ezCMG^5MoMcrakEg+mC*siVpPoNm};KoZ*ok`)R+@vPK~LFX^3f- zkZ=r%JtlF<5?;Dw2`^7yqH?=1)^{*ZOGYjk$FnSP#1b`S$@C>Nm&{(`TQXOcoVR3w z8d@=Q=up*jD3knv+|a&5v((W1p}PJ!bZE-Zp{4Sxa_#<%kS-JuF+>e9B!6AvRaZrf zW$UR<%q2Y^T!fe*v3dXDFFl(J+^V}?nd7OjoHxV3P8n)KtK* z2%JQEC})_gR!$xnO1UH3n~D_horo$4Qs*X&lS-D53?6RJHhmSe@sN+tWrBVv|j+k&t-JRyhG94kGmU0A9!Fpjj=~fiXPU|IRGen2V>lmNzfk94DVF1MT!E`_5AMSnti!{2 z6i;FUp2PEa5ijFaY&C6X_&Ii9H-5tbxHvguK>AWCNJj>;(F0ZJhXELf!5EIiPz^pu zt|s9qOu-2_38!KfW`o~@RdrZ^b8#M);u2hj6}S@D;0D}?n@zVeybX8aZ@35d;Xyo! zO?Vnx@B&`KtJrFKm!a5!eUJ<(1!>4Y7J8rx!*Lj@F%gq68B=fqPQ(nHj9I2R485pB z01I#_F2f33iF$}15*zR=w%`T4gjewnw&5dujP3XwJMaT`gYTKqnQICeY#K6< zg=+AfK8o*`QIl~jj)xa@I2-5SJS@evxDjjdF#e8>rp*ku;3d=R4By5!(|Zg*!4CX@ zAMrC>WZ)w(2B+X0oQv~K7cjg8m*P&`Wm?5>BR1n%Y%#sS@KtQZ`}h!_KnjT?a3uVg zhwE?yHeeH8!E5*clF2`Z=S{CN6k9QgLTfUnU@A_;4AaRBXJHO5!X;Ra6c4+g*NyV|W%@ zz=cJ%4gbVP_yIrSXY9eBkcvVQGSD4;F&izIhcGU~3fzp9kV1Vcc0qMkY8h6b8x^y$ zn2b3H^`h*+HMkBx_2w9(tDE;SxfzOns7C-TI2+euCGN&Oco1vw7@oi;Y{nM6fY-1U z+wdMf#&*o=L;T_@eYo^* zgkuY~qSxKT1Ac`x5MIJoyn(k&?=XBHKce?NyglURjaEqEFGp?>4M+sE>FA8J4EgKXrZ6hkl!hhem7EW>e_fND&{(|8M)|3P_$=kdB} z7sJ9o*?BL-q9F2f4kjFqOX4F5nIUp#a(R$?o* zV?R`kOI5&)$(V}8Xu~bI4bR{OyaSrS>K3fTy;y^{@E-PqP72i>{oq9ai*Y6{!KJto zSK}I7hud){*5YA2iN9k5RJ=F82paRTO`9&K2P3$X_4upWw=C*zP{N@6$|Lop18 znZ__Y0Vm;BtinUsjvw(ee!+eeC%RN8496H#8^g!30k7jt#PGqIIK(3b*~mpc3NaXb zS)&?(F_?s@n1MOC4V$q8-(U|^GOxpMoPZe!Vj-@;4Y(Qi;vuLM){8c*!)E-5edwOb zalvrRNpq>Q&<4KKQmw`VScA1t=`4?an28o#j5T-}+pz~KgY=1MXvNvsfK7NBTd@O3a4**2YwW^rP}v+8 z#32Rg$UruFpbGsk00S`?!!Z)oI0BP!6sBOR=_H0?7QCo~A1w$&<+#)-m<12o@I1ES zeVNYXMjSkWkD&5+KTO3;%z+Oh6`>Tp&<_)#3fN|>#E&?D_(GRTG^H@i zMlSMDfWa7wVHk;tn2MQLiB-4{@8Sb|0aZktU@Gpw8tjEC;haMfhF};D$5i;yg3ECw zR^eVe235*-A_0j=#t@7!9ma4J#^4A{#Z1hB5A|rk>9`Cla0RZy^;n5}vAvA##2P*@ zEjD8}4&a6gwxiOen$dzV+AyIj>+4H=LiKa0K7$xm6HgNvHn5MUlP}>k%t0fXu?d^; zBDUf??7}ZlbBPyB#Y~(654>1_CAb`8n_cQgr~vsgj>Uzz9JgURzQ8W*hH7!CUoT{M z5&86`43`t9*!7T0{e)kzAAjIas5LItWi8u*X{bd#TCjf|^EVKuXu+9i!$nwu8*npj zgL<8^1N|@&Q}GPcR^Aip4azn2L=b1-Yp6Gg2W-brP;a@^iI{_hP;YY+E>6VFxD~hI z4%1x>@5a5j9}nV5{N1#X;WKy+aqm!0A_=L;K^{s_hT#~A(HM(rOf*eqI2F@y*EW~B z2QRTi%gsC_VbKryeiTrgBWdOd#F8mJlt4oE@hWD@? z-(Z(X?Io{729Cf~%)}gAh!uDMYp@pTH?MRGawQH4I3gId%ffCJRs z#c$NX_hVy*OM8`}=r@S`9V^M>?nH<@YcX8(*o$(U4K`Z^a9%9!dPx8AM*PF{6vLTW~kJd0x`a`9EQ~bju{hZ2DlP=;c-N%{}Z zLL1J<1-KB);p|u3!nAN_kmfJ|Lopm9FbbnF12e(z8>u;%hcM1HEoFECmYZ&2D5O7d zHnQ@mGGhP+VH%{L&+6CX%jMLgcpMwC#q=V>w;+9fBAIhv`uXlPNk5)kLhOzn7+@OC zQ2O=q>2oW7>b?v3g4N2V7ME->DwEKA&~w+>F;|JZ(}`kA5FT*`X!I}Ttg zdEhSm(3|x_`o)CwYaKvUKaM%1zv@N=?&eq^j3v0uw2R?xbhyV5&rl?rx-#sBXHC-I zv|GaNW&Jn;$3gm?&c(x~#~40|4W^#=k&mN424FCxKWc*MXog~qX&u8y@Hn2pCj0|W znZ9NC9i-1ne7V}NlcA74DN(lHpeQTSA^k$#Abl#rm2_~sKbLqdER#@j88w|E_=Tz&^?~;zs(<`XK=6ADaWK z|BF;&*9$sP7m)J56Xm~q6@@5&ll61ti6S-XkD^om$rUXt2 zG$zNZie4&~%ENuG`&cB8>-iOG<3@GI8q%Kp$^AsenE9NTRou1ju<=KqIJ+^lwMR>3^}*ebhXU2%w;^w*uQ(6%CcU){GpaQox5!g z75<;Y7V^5qxY&OEa(hf(eY>8zhi7sgJ-$gj7q!GP*DL1qTMUm@AE+M~?xC;H__Ky8 z`<&@&`g96G{!W|T6qQyT6K#%h869g*Cnv_os1$k9WBMuGp5*-OKVaaX!9#`)8$NFQ zgzCd5s`CHCe=2t4#*GEzDXlk7WGw2Bu)qA9HD}}Le>syGXDu7p*K7QlyGjM>fA{Y= zZ(ntq>oEOT;<6rGiClwk-u_C zJ}5a_V!S%ySZ=92<2=U7x>--1y7T1;oqICbrHZkU6L=aip2@yL`*c*Ds~DSjjFD>^ zc_U2;X?p2WYc^4MdPo@E6cK5h2YamP<0z7JD1>I`+GdUtAX98XBsP7jnF)#<2C z!#dHf>@uiJ3y)0wEMwMVS{Lq!nws6=_Nfyfk7*3g4)ALoUh8lqF2X%99&dF> zx5uq8YdDKnJ?pA-JG*X4RBAstWdIKSid z9iQ&_O~=xN!xNeku1VOK@MS`7;@HHd#G4YIPy8{lD5*LroOFHCjEPU;!=wXA-I8Za zJj5fJmcU{17~0JkI-id@D9qQ5jTxmz-Cec5N`~92zScuE>dz|b%%!J|YX3+mD=ss_ zCY8~e8Fj&^%JQM*GQ42aWOF#Hyrq1(IsQO7TQo`@6}^n%z+Mx2&Ft0Ki#~@@hklfF z@7>*+Go?Fi_|bp$NbD|iD!UJ2sn`3QFgo)1$T9UzpWpOU$G+D5+`jZWjCMXojhHfG z)(9@~_I#%2S)&j2_^{`$p0rg)PaJs9XnD*qhrWTqflJNtI~YEKrzoyp9ewa4x6~Qd zrL-@Z*((EcySy^+m4RLKvy2&!zU`(D>vANi^{atj4ScQgtASrs?&dl2_|u+B?kb_& z@v4%>{O5~*~Zu|b9hwV{%qR6J*JFl|0p%Xdi3Bt9&^NimNC=NGUhb>EMvx_ z(VSj9V7WPV3wIAp8#Hau%I@-PJyz?fCwYFh`|I65=)QxL(=h1kP7Q;8gFIa2#;{>f z0%_OPH~KNDa$4o7m9D;ZFdkej>ASd+EAta?Nq#K(?c^QFdy_k*j7gcDa$d^qDbJ^T znv#@ySZaOh)u|g&x2Gnh4M>}nc1hY3X?)%N(|po>#h3k8du0%M9B|Dtc;|Jlc92!xg<(_I|AQ z7N)lK-a#Aw`(cNEd|I`;Dyx^w>(XmLuWE9i!r{)xEme`J2dZ9R>cgsUt9~aJJfrW$ zeUBP$J+9N^cj@Pc`##0+^}e6*{Id?}Knro)aOb1*(EdZm4dp(Uu9Lckhac+U>DtP< zdTxK`<*Lf{=J>mnUsZY-Z|0~xGW_7j znhIywvlVa9x_z*JpUw~Vf3W{Z{VZd~V^X{66FS#)CT;Y8x&M&Lm-`=GIg{teqt2cR z>9IDpV9)UX{Ala3XZWSC9ygfd_xE^$GyaGE#$#A#8IJ2bSr3h{S)J?Ho}>ZJ#}PfP zso6bS$gLksupYm4G{$1>5WY!U%^Wdg(997NF>}N;{VZd~W0pDHJm?&A?219uGzJ_t zU|u(QwjO8dsY`girrXMHtGhi;nL2I2zMj(tBvwruAdgIQIBh@y>FLf9(T{I>{nG0| zuRBL1_O`;a87n0%Pgfq*#3v4}Qg^D&d>&)i#D8Jf#9#TGhzt+)sGt(O_VDX6)*k*h z9O6Of7Q9K0a=&Y{>kZcq*M3)KhkhL)SOl0>(TykR?aotCHdm`*K>E|zBu0b_#?M8&zK&> z8onMckHxvhF!Fq5?tOOH)8^RweApmy!oiPD1rj#6V6q<86`ZSwi4(3PFRhvo{dgq5 zYQoF;pYdEZVSj#BfibryOHP?^bWu~$lnL!0=N8?jLtiP{9yRx8a>nKf4|IB}(>tA- zC&=R~bNEN6%<|#o5;C*ANe`PRTuNEBe8OKn9?UW3Jd^Vtv9@Z0Jh~Mb!y!czi%u=_ zGj(Rs6};yQ6Xda_^xaZp%zEBg`bTM5*$Wf;l#MPsrfd$izaJ-@+v&oKS4`Y0yIsIkzRK`xGlsi46H6w3g-o&#qE+d~hJ#J~- zopI~q-i`Y?E-}6`{@jV@P8=M6IKvt7&GF~O-=IStqNzT2tUN9)Gln;m-A#NwKK9=~ z%DXt{50090&Ut+7IMU|wvgpT+Im^rL%2~@<_0!m6vVIzCJ!U$_jj&qo2G8Zwa7XZ| zx6^5@Ua3~nk^B#OzrNwd&W?Pdu9Ci&lUxn1`K}9Hce@@VcmB-vhby^5U!6B!*x^Q6 zp_@8<*x}a>2{Ae3&?91wjF}NrOD=tV%tJ9x#cYlFSImK!{Mdf6V`5K&?N3>hvOMM1lzUP(q`Z^zS;|i-f20(r4oE#R_2kqGQ`e_%OZ_o5 zKdpb-q%?2ZS!p+>J({*H?bo#O^hxQS^fS|MN#B(Ie!458Bx6*@jEv7N3&kY`Y7witPa`TvX9KJ&u-1WIQ#nS z2eO~d{xthQc3#ftoEbR_bFRyIBIl!=Jvo6&>yeh*C->-Fe{Ngu%G@=%n{(gI{UtX$ zuTS1Fc};oC^KQ<2B=6I_AMz6Od*n~bug{;Ke_j6i{15Vf&Ce_7Q*cZ{L&22=YYPIE zTMND@h%KxvoLtyc*j9K&;hlvW3qL5_SD0QjplD{%=|z_p-CeZ4=*^<j$72H>NUtuYwU5VG8eIE7`JMy0$Dl*|8Hb#Xm998>#M(n^Ojgr@?~FX?VJ>ODt{=CjTaR+}-#x7N zYoh~jDm|puW32k`9`+U-a-aUc3d|_zriv0Ov-or|ZCuv~*G$)$u64@!xSwh3uiP48 zNQTcRqBAO5f}Z*&kGshi3VG^%p~Q*>o?vrhbA2dd!o=y}AY*QSW7rq;_+usOH(&&=+nEHYa%+7rMQHrWQ{)&Es#ZZ+15|hMGKKZ$pg7?~g~o z>nSt_?gfqE2Dh=4eh$>-d;AMLi$ZLXzp>fJih_aGFk=~>rkck3)UF-Av*!xI; z&+l(+2{ne&BSB3bZ!l2knAUsXmd*CFt;LZ!LEn5|FvLy*2qhbvqvJfDakeSh z6Lz-*1L1%-;4krn`R0f~Ff7qoC#zZz2+nmk_&l|~U@8Z-snP2;Vl2a&lB6knmTv~I z0)L~oG3*Zc=DEG0aGn`X+7AR5xdTB~#krYj2J_a$d$`dTs)`O-WF%@1U67P&#EtRR zfnce9?pkLS2hLs77_Mvd`DI zGh!f1kB3MHWG=?ZZp@zVAwk!sc;a-MmujlsF0 zv>G|@iG3r{7szCYe51kL>|3DEOJB(AY4K&&cxv5stFeM)STQ3Dg~A@@Mb5VZd!{}cBQw|p zBR-9^(}~FijhsmK8<7d>lQZ1l3z7P*`Hj2<2}Vvhf1tk6>+!So#!CY&&Q}!IkcG`< zcU#Esd~S*GAm^SZ#2%&B1Onu-cCL`oZkni-1&q9gUCgQ>Yv$}BI{k8hIHH*ZHG3O$ z;^u^Ap=3wqQC`#9=qE#z6O^;S$UNnIwbQe;fX)KR0?b61Z-@Juye*5|-iEpE5UIxw z5BlWb`$FbvljQByiydhqA5USlpOvX+7!&qE3Z)wX9QI&f0Z|}J_wR4Mw1;zo^Pwf^ z^XjZ^0a<`Yk}`!8FAC*GE!g6#I`U6t%@?=ii`%@(I2Sv(Q$6MqWv!c_y-v2@7{_EYcCdjFc z-O`&j$gv{*9zzfL9(r{{ovX7ZC@q@nIUDQso7kaIZ!*;JCi-0$IRf;m7duyNP1vi} zuUYJPjcn1 zaRijHlHtSLJWE2_mnRBL5Hh zj5tZu0rPm*YOJAoLj}5P7LgH9k^alMIs@gc ziG23j&A+V26E1WXwmN-Sjew{*?6g#2h{DCb+EiXs*H~}7VzDkrC#Tri~nf z0z1eov#pAw@p|9FmLRoeU2h0UTsBLg$&s@Q==8{_ySdfxPqbScUF|Vx!ARlGK{BGY zmeeeJOk@+uGyq6|A=)yO+LX4QR)09g+tN}%EmAkJ$p@1huQxHZ|^T5Dy21>RO_c1`5Ix<(tRgXVh|E<9A8oIrV_ zi-=Tf#O?Ekd};QSu6U-{ll38gp*;~fHnb^7blJ6@AO*ZTR7=gl7bX$bhZ1W!3T|rO zzU*3`v}wG$PDfxpwdT@VUrno=bWN1|L?LOjwb`tJ`hCTeHDVz-+~o8?)VZ%T^joR-Pd_MW(rc(W;XsWThD*OaA4{ z@*N8~S64xJV-pQ^5`(S`6DRXpeUc5?IT(9kR(RZg&qDWnpI7Gs~)&e!cb&eYvrBkp|G>F zbjRYl`rdBZEzw)pK2W#qWlr})&e3%^Qt*6QmGX`)i~gJCWPLeN>ofZYI862~Inq_A zvom}V1r{wq`{X6>^Ld@GG@BA+VL=}U*j}SETn?epV)6JpwO^)PTZBXzy^cD6fSe$b z=QuZy%!($aq<-n9u{u7S$3@zx`t;D}OvoAKr0SraMZ-Jbccus3X&?<{$%maU3Dlg< z*&6atou;kW)v>91j#a$SBnt*4qU1as{9i3ag(0d-f>Y0q2BZ7SnF8rPB6slxoslUS zfwbAJu9!N%r`~y>!VQg-HAV}}nLK45o$H<-pr~%td5ygV?VBE1M#k(W%cVHi{VE00 zLqqFCa%svxQcR0SH%W8t83D5YrpCqkNTd+z^#p5aK70HnQa~F$bW}@>4!Wj5Z6jH2 zFqB-|IA4lYTJ3=h#v65}Xa&?z1yE2;%k!}tKMrVaITBW6=H-o~* z>n~y{vopkuM4iV=!IWAX@KXGdzT{ArMolr_j-b$@W^Y#X+=g(IKQB7SYG<@tPiyns z=D-4J*OP}d*Ot`={6@=`-Ur@VM?)ls%%}~t>fSXfbphF`L^;FVQn5_YV{$;*^n%(} z*<&eeWGD5Bt5=m4nIY$1PGpk)qz3xRsAADzl^?vQ~0DyEc$SpJZ72%W4;88>8%@ zRMA4ZTO{2Glfszdu_>HqOjDh2rb|Ybom8X3$&@?+`sXCRQs}yAq!+_eLt)PZTVxgo z14!aS5LRk*3xunI1peXg7v;6JK93oGD@AYjmN7GTI-SWFvsOipS5(q&TwHCkevp%{JypHsuJ-lA3d=bM4tpv^JV{9_TS8;a{%js8ii+84=n*%f1+&KAN+^ezYczLl7)HPD$B9gSxX>`hom8$1Sx3(OyESZY#4@r|C8s6qr zhipAb&PZBk1vD{y!E!51H)zd-;>=t+84K7Motu_ZSTbte1>0P!-y_lkkt>Twa$KWn z8qA1<8oN~{0qxE?M_!@dwB2l@fp0{Dj;gwVU-yAq=}SjzW0QxgOwxmH1?%^=5>GQH zSgE|Xb$P6%MPF{<>Wn_R!DbzeI_IZ{*ti0iE-%o%_D!DPTwhQh6KU((JqVHOF`2Tg zRrAxqI@YBZS?VEXzEmJfN$tX@aYoXB`!8qc$Sznsj+ zx`3w-n-QhbGt)(0L-noE=u(#M1fyF@+WwL+HP`0LBF34|2^1-QJKN#<^|fq^v6o=2ud%pTnI}sE-tz2TdDV@nG+3*>dEJP2~zlwZ7oV~@Q3`4(=NBM zwlP3H$$L1iKxEMu$`u5vD%P1A>S+f#F4x#c+SvdvwU%_&MVBc#`=cNhYFN-i{0PQ zDYAqvkc~HuY+_+#e!E6pX=ILdZOk}*baf%cp39EsJ0~0^ZDbXPik-+Fam`&W( ziE)F0E@q-;S(k~86QY=Tx<62_DEjbJM9s5{Gb`_rA|cczY8kFm${h#MjiS7$MVdln zOHypf21njGYNdVtdKPh^+i@XBif*?i z#V$uOD0D0*M=sK?C;DaE^%{)Vl{!M`c{{zS#krM*$$Vcx?a#ZG}NN7(!T6-O78 z5heHc*a3mMI?`gGuH1U5wA;DNu+U0$y1ScR;4*6g$~@UXqsng|i)4E?mrCq$T5oc7 zBHs!ys^t{qv~no|I2y+G8rLvdI2gVZE37qKH!S1yR?d=;ufb?$`4^?l^)2GA5Z$>+ zzLjcC>cT#6ZgY@aQTiZATWlu@j^kAzS8-b9Mhd$@Wo%ZquJ%hu4qMSGt&NsN@#Ihb za3G!3NF~dA~ut7~xi)+G(8#5n2;c~*GzZ3&6?73|cgCR(~t zoslXWXQ{6?wuurWL!LOB&NGvTSl6sb3*P?;MOzJEGd*c zlKt1U?4&08wTcUy}nB7V53?j*qhU?QJOUap;>6oMuC-*rxLO9bT>FZRJ;|%8< z)4P@J2&9UwW3WpkjF^;*JhC^sG}5^rxn7}T5xLegpW7RZe$(8@9J#NChEpBAlI6}& zdTBTSM1C#nG;3Cljnn-BI<*xzUS(Z|(&to$a|UN&sKOaw_83b3Z?^1o$4RMk8EL&o z^=&vdAkr=2*nsFpj^iNI)8SkkS@l6R3daGWFp-wAW9g<=jG zBJW{0QgqX#(d&qg5Z9)BwT>iUc8ziq4L5N)l9ScGaJ-=vY6A-#$IRKJiyX6( z$QHFWTQy@YV+Cm%)H>pm-U+I*QX_EegHe)%e66)~dC?2TX%HgUClTc_#!W`%ZBx>f zs!K8wmT`Bbl!5zoIL9vOPN3=!W(YvVawKI)OX$T8n?b9 zInloTD$`uOzVTMK3ewCFiGpBrC_|FHaezbm5`MCMlVYH*V>6xNWFx603Y}N#lb}!v z4X++0Cy{K|+BcSOcGDeJEJLf9)_o-$46_~DUFPXH(1oDUtw&5p`udCrD*cP)jJfGD zS8nDd=}4(qWK1uRN@JZnydYq7u}MLdA+wyk|^?x?&HRww^gccyq(dHoMwb`6E)rTEsZ65(&`Rn)9u4g$4{9tlU$0b zjx@w0*UzX-hqCqT=n6q6Bh0MU7)?F1BC1R896euWFm=>0_5Ov7K0mb-4;hB#w+N#F}fmh^|aZ2rY``=`PKK3p>EkT9lb{Pb>u8IF4weQ zOJsSHyyHwYP8}orCRkcUzCz<2b(SuR%C&E9ndQ_oj=%YV4fa}^ZN!67O^FHA2Gh8* zKpl9IR3T^!OvHsU%t_ta zl#Q_FH+foGIOkH$`9==|Gxf!4v-h~O{y+t3-`rih+tunv3FXU04T?hDyW^-<<(lCX zI$YDg;;5mqj`xVVyWe<;G2bk6 zQ{^Sn@1?WDbbHDu$cxNr$z{w0Z=Gz}j#=_P5&`kXXQ~*;Z2-xRc72Xx+^w(Aa#2$P zb*G8*gB!85TtlhyWM7D7MNHN4;SRl0#wAc;c()g|0uEY~d+?*zY6l#=XifV? ztpIz`G`(ru^lhv&KYG$$EJwo4OoEKYShyR3I-8Q(N92wnD~_%6owwkAExMeIR)9Te zM4CNmoT9lDCb~r>J;OnLW{{wX6=$fKt;o4j5Iv{0*_^KBu(Q2V6Emv5?plfsDg8VP zizBnF%XoI$Okkca%(_}5kt&^CcA70N&iS|&Za!-)vI;($2-3f7)Tea~pv$~g-BD>( z-lG~HMhs-@zQyR9H3}VJ`a=VD|4NQy4rL9IB-c>vlGf^KwzoQ!-YuWpb)e6bJbQ{# zKzCs2nqIyfNX8hLz5K@2Zx%OOiX3v@OHMm?V_kYASXZ*sBGb|?%!o|td@__^-8abk zbPbY(6V|t0nFTplD7fBDi1V{-9l3%aALUCihx(HQnbw3T(aY%F~ne7OXY)0<$E40ImnrS2HV&#rVxdDafqpp)} z^zyPDE%c4_*L}9JLGJg?us(5Uy;*LEW|(e2Q;(FYgl>su(%Tl453teXH0~|!X!e38 zShbWqYeXk!`#eb|xX!HfvKD)s&Qd-fDKE0-x}{|0vMWVsq7IgQEs;-o@o`W}ff^r4 zk;1{KfXcmfyf+7@+2>EQ@6*=h2y5lG0sRRC3I|VpGo2TW-cY>J+m~bvs95+@%u&59 z#m)(`b0Z}iH_TN2n|ac2pzHj}=0bAQX}&qm#bV=*R*toNE+@yRpLppm3Dbgrrq@H62TFV$$CCtF+>P?zi=P_THZA?n}tD{s_mJw>8Pl20sh76J>>c9ahdbZA< zgT8cg!fJpe>1pz{ra+F2aHc&gJL(H6(9A9^(WHMjw z{4kmd@_rd+2+7B|h*@a|=t?hnfYG{$x}ce7E@WIbvR`YhOrH(1_H42ME=kEJL+nmB zwn^tMq<8&sRkkT=dQ`uILi!&RGQe7kmHMnDtm(*F>=1h`cF2H(UN!KbkUe+TT8D?FJq;5;#t3nk4DpM zCMoOKU=bZaK_u((%O`C0kbF^6X9^RtT0K!5YOUv+AGj`W-|ebTK51fPSo0*JB!Gpb z0MaF@m7iI4UKX~ilr?h2EyodHp9MMG&Py{#cN^K|SiEtMZ?gHZCCT^G%yDX8a&^m^ zC1-P2YAK{#AF)P@gDVa4L~g4;q^qNWZw080({-6LqrJx#b1_(&A#yjXeV1pbBrJVf z)+J2q6V}!-h?;2zmbMF&Vn#QgbvwD+q33d572_2TF^rjOkG`T+Cg-J@P^=cl@lLf_c- z@0ZiLvU9mEhhCp^k@BcTjBDLdag`qS^1<6?3R|zffu&MfTDqJfaa)%~rMY9>&?0-C z$~{=}NqhOmiSVMls42TaThK1ps(^*EqG!}lJ!-cKs~F6UUXuEum#b;{(V=!7xZTnm z*BUNaCvvve2R$u(2Swz7nd8Rx=SVm^O9ef$%NY`=&p2s@OJjo%zR3q2C{U%vL+0S& z(~?pyWiUrRyw6!;G%7kr0<0zg{qR;#lfLi7@hLie1=0KzT_{@;jalWkGP^jPkW%zB z6?l%yLN>-qTkTrz`H?x&(z5TjWFxIt7)4cIpn>iW1`GLmR0iY)4^smqS! zMP^YCvfr&#g82d*vpQj4%9Y^k@(y&BZ*iATp>lf3((g5_dqM~Z#<&_t#= zbT?3msrN5xX<+a49QT3-YR1N=T&*B|e(0|yNhOGa)~r3U(T*w3Y4R!3gn1>O2sAzy zW7l=;0Q<@y?`ke%C-xjGNPh~HG%4LH_HstSN2M@C+#4@#_gTbrE8OfwuwQCb<%1!j z(|DC}O2n+1FMsv^Zq9fH=>6vq%_KBnb-Jee6 zkRgzf$r&O?q_};6TYt?EXA+;DJ!l@cym{u992D%h+q3MTne|-#q3R=zjexh)L9=zA zCfSy8mm9Ggy?th5Jef?`{Lp7DF>Qa6mm5_Y+f9eq1jl?w0yS?TcFy+JR5&N)V`2Jh znv6<-bwfhb8g;EwYBRdbA&&`js3UJ^+ua)VkQ0eG!)JIt%zq?o;%Qt8P-*Gj;#)- z6rsw|{ua5&Ojaa)fVp;voPyR$NYHvEkvkLY_q8_A>RhrG<;FOsEV z5`0i^MkqSe%wIwsc?-GXovz=?zTrNUDj$5NhR($dYRsAD1!(e7E89CLB$P6r+YKer zd-w{kq)5VyneX?cLiY}G8!NYFMct$6>|!oxKZi`sxNc(1pu^B{6*||B53Bo05?J;E zX;em1mcBwPU)yCDY3}*DZzPnWL(Ped_8&z}HF{!9WV{EVr}y}`8$QFY7!{+` zv8cyLzPNBMR^tmK^S#NZVl8$-ejD*k`GUKSe6Kss#a8s;TdR*j2rKatcHnmmj`yU0!A`)BbI_M!{F5Yd9`@jP~;jPG_Fi6B6;&r(AX4G!D2&eNq09WA&e2lMf zBj2w2IzGZ)Wbw_v6L36g5QKai@ZYc*JFp+seCO;LI0v`m4J7dUc&8x*`3=2dzN>OR zHXxDT!t00GxC&3;8w}!Gk!NBFR^d^+hs*h<+R1#s?;7kt72lM*0PkZ6-#j}VXW-1b>=s8a1-7^To;ZjYVaa{z=#TdvjA7)Db!Z-yMbMm zIsuDt7hc4VDDK94;v#&5s_wie&cmbl5V1Y@#&RsgYV5=ZJqfQ;svO54jGNG>mr^%F zzMp(K{)tjIzg3B+@GXY)VV`h2HeqC6e&Y}~;SKCSbwAP?UdK-u+Mo1~yRi{FP&I(> zZbmDv!WQ%$NE*laScmtJG>F$@8qUS-_&atYZ!qtV*|-`HU^`-l5U;o!gNG7_I2SJ> zb{Odt_h9^RjxT16;Jn3+cnK*Z`7V5XgHDH$9jlag`@eMKQv$^UPjy) zzRMq8+=6e>Wh~3!bKE|TeZVK^HJ1h* z`|%Z~o}$z>*o3b!>QtujGJZnEEWSM)&)_>uIgMWh!R7cEeP@$T;SH3};ke;8e24KK zwh>q0HS9!w4Zn|zQj}vRnz0nuV-4O!k(XsK183o7>_CTF;u=2OjN7mgU!v4Uy2pt) z6R~x~FV4lcD6FTf!bNx-pCGk?eLxsj;T7ygRU_#DFJm`GpU!&mInw5mR#1amu?62_ zh@bosSK(T`jd$?@_93f@vI`^8h&%8iGMdT%a4fvI7?R7Mg~?BFEuP0q_zYF8>>Dn_dq|s4v4-)u5U*ngE?Yp} zfNl#pU9cHBi#R{7J%jDV9+aG^)ZwVZPdIl8@r_YuQAXlcY(ej{ z+2%HuKZp24?s=SV=d-;_sZ(5ti}+bc%(+6T#aA-Cir-qjnsuVbHPk`y9KOTUYZ*t@ zb)+kte!Ws};fxzdYxo`+HC(R^e^LY-GDJ1~ai7tMDFTH<9k)!-cpO_u^Yr{e$`?PR2P{i;s}InfJxXI1^9d zUnqWxJOFLD2dnWm;-BUmz|Gi=lxL_n;4-YiHcWh${0)!c8{|F5Zzy9v*5XZkgOn|l zshEI9T#ZjL^?C9E{D8a{s6*f)Jc@5n^diR$_u?a{mxvR1(TauGh<{<+%WMl?L&__> z7tY5$cox55;H%Uda1L(3^Vo}CuW?@DN^C~f>%1O8JcjQvek*w!?!s5-@CI>>Q?MNC z5%VVJAQs_rJc+mPCwjldd5r7v5lY`CUxgP}VIAJYUW|N)V}x~h4!@vc8|6Drz&W@J zd(iYQ%i=CPgWbq}k1`NpJc>8b^?mX!%)wSc52=AijKZysl;W})@ zw`OyoP<4`5oyF z-(bM^Y$MLZTD*l{Q1Sz18;-&GSdX+_tP6guz$T>ZraZxQco1(u{YV`JH{uq&kFuXg zH>ktkkoq(AJlu!9=>H4r#Pf*VL%P9oe2T8WQqJQ>JdcrkIp=UMDt{xb;BwrLcTlj8 z?L;%K!Dj41kNuPfxE{~rTO|EXI8Md+ScO;cJ0}0Zd*WH_#;JdDZsQ@O9pLx1aV4I{ zwu@*L;}>Md z@!Qg9!oBz!sqrq=8DlUHci?^GbabiHa1Q>4=kO7JLEi+IYQQu27Oq5>8i3hYiVb)R zKcbZ1*&dA3(1ts(8DAhi*`>-b8g;lB4`UnhQrKUdh8A3k^>_*U(IeHRYH&Ut#&K!< zb~d&mHJ#VublisZ*ohwemiLKRh9_`DChv=Du@PUQB+I2HVIJPWepF|>R5NbEZ|Il9 zHsN`^gl|xk>rzK!3D)9$#Ly9QJeJ`*9GCB+olo4LLjm!KW^BhF=wIkk^YH|><4-(V zL>wcznC->WP$e#YZ=QAIV%&~@qOg?X01vLfKd=vXmbug?_yfZ`xs(^z;7NRh{TN*C zQq$0k%W*4SLdVYR6K=;A>_N{iF0~MU!^h}b;ZoP*Ym`*F)G4?hd+!|2UT;eLchjaUpwvpYRILBGI0}}=?j@M8%kZr|f_#Ey* zE_E^t&j%~-?_zZ>PU1~DsV+CG;nn2n? z5UcSXcB4l%+k$iPEOsO1aF^SP{(opBY-RM6n3L{8pjtG;z=Ap*Xb@b3ooO?@f;_#;7NRr zfhQ1ecn9Stx>Oje@D-9z;+SAAuEAs2i}V?s2RI*V@Go?lNxqHgI3GI^b28}`)3F}k zA@daSAgseS%sSPjUO?O|_7k_@5$wg0r@53L8M8SbF$1^YCG?m>-ib%>I|g`M>TJxe zA#Hj&cknE7YRNCK8v8NUM>@qfIJwTH?nB3Vjx8R?9-P=fI!AmXafPRla5~H5I_$-O zx#VHE0FPlOO8smT9>uSyY~q~7W!QjkP}uBJQ*aix;un+#$d7O_p2UxEx3Dj`2XCR% zJn|ykge<{ZUYB}41Haw5y%cvV+6<)>2 z%PF5QWd&*W3ev%qtPk&9#d}^&K6nks`daEI*KvMaPx?pV4V)La3^6y7x1-BVlyP_+ zXWUF2;!_mgLOFtya1VaO*jp*<@CA;(jeG$=q1Q^*i7nWJ-nWzY;8A>X2kW|%bd6Qm zi@tv&4Pzxdcd;J)iL>u^sRwcOD&hifBmW+b1y&*DUeXxi?<3x@1-)0ZF1&=F?{}#a z9w2=p?m^N#{8;f2c^&p)#2St(o<`ML_6M(F_&U-h;vOb`k^VoZJMZ|Yimvew+fWct zP()B%!59!WBs3`kVkjb2n(gtyO|l7rG_wg1-Pn8Yy?~0nf{F?v3L=P76hskGL=Y@U zk)pKU_srb6yPE*w`#jGdzxVTgA478Q+?g|H&N*{t?wvQVq3{7*^Co%#_1_{Mf;Hfq zMjyafsPlFWbv|4Q3*g9i@D)%7w?N=s`VpqWNz<{ZFav&vIx}cLtb*3>F-Bnmtbnwc z$Q71AllQ4R^oQ%z^m{x^k0BY zhQmI^F2G#)4cdQ(yx>8Y2yelB_#U=Gt%Wtz>983ZE@Iq4v&HB)M3*2xm;tMx#^?A5 z7z^*hchKz%#vUw&!cVH_FUQWNk>#znc_=^67l&^^$ z;WhXTPFaC{f#+c}WPF3IgXz$AC3S>-koGNo53j=(=(mbk3%-E*-(ibkHf)68_xK37 z6dr`Hq4sKGSoj`V{(udHU*Nbk=nY&0FT-+Z_9He2a^W6W21l>0p*q0?cmwvqP3wq< zU;)(oiN1wgxEYp0o%QGqya<0l`wfgwSP3WmOdRtIu_4?ENBoLih1cMO-|*28+K7Ci z#_!l7cnm&*zu?48#K*7`I&7wIx1c|8o&0wz_7qk_lRxMO_zhCGArp8VHo>{uX&>x_ zlpWYG_z=$8i7kN*aOR)rDZC6nLG8b=Sx^XbA+(En!%Z*+RzSVoj18!SZ=mTObR1^G zDSNR=@FN_vkKe-tcmdW!FOHGB4Q9bQ=uyL?E`=9i1sv`1s19%$OoZ964|;n&Y7E>3 zAHXW``8=vMTnrCE(C<;h;AvO~hXp*U9b5s=!#B{drbms0dtnvSKg^?s!wfk3aF4nK zR>EnuJgOY#!>P4B>V7!s2#>lO)QfKkj7HU=fk)j6UqRoaJ?a@)2qzxnQ6(@1c0;5g?T2-6{IMR@8A@Oh zEP~x|TqBP<7jA@QF!VT&x(S|xub^II+6E)xR+tOkCLWam7r`U27<|WjR8Po*N8n>P z>IANV+o5h#k2(jQg;fwZk+#Dt;0trn$?CtTkSU1?9-;Mfl6 z7Wg`%!w@*lqrxx&Zin}wRVR-c1#d%(&K{KwufaxW&;>ce4e$f(fX-b#>PnafN1pCs zot9_96j%%Ox*cA9V~!7a9&@Jx(8;!7C7=O#tKY>rBJ6I zdJd)VC8)E}Zx{@B!+h8RC--MO!maQn96Nx2!~NhL=uuIqImn}8(0H&%<-;7<3#Sg@ zT6i25z%l1AK41y_1;-BMd2kI(h6S(=0>jXM_&3}KAHWW1I^3gj;Z0~T0y)4-@EsgJ z(xcMh5_lBWL(Oxs1uze4oJT*x4e$$u&ZjK63j!A~Mqx2@ywIb@!8~a2FWLi7!%i4_ z5&93$!)M_6H_wDZXgmtNhhb0&Gob#(9yJ6Whcm+18dwb{XVETL1L16sdI2`WzjLS$ z%!R`v9+d{up?)s%hA*H-9(9BWOoTbG7HW^CF7OR510f?pl%G? z0#Cq7IIfcCz;xIL-7ZJRVJXL6a*z>U>xS^{&GI!Di@nHM$OUuJNb}=y9z_ zy$fsM!RyciIQ4qu0)Il=8|ZU*2I}94yx~>Y0v&F`m%;`ZaWi@jo8b6cu-`E7R{SZP zc^kF{Vz*<*?!f-S*>_S`_zueN;=gd(-Pjj6{2pWtN8L-^AnQKl2umU3eq;y_z#Lcy ztsbC%;bu7dLG%}PK+Z#qb2$EC{3c9?nvd`tcm)oB6n_r)z;fvI82$i0ge_42aju1I zxC)+yt*{-Acmml$Inl)yW%3EDox_=aa-1GIhCqy7cY!3Icwj{b+IU=7rHp1(og7wCUj z0$ZW|i^vlu!ZwJ!P|sMa3jozzaTOdKLC56-5azIj(QVaf(bAYX2bBes54B3&2aKGu7Ps+6l%YX zyx|_04O^i3J3Je1hZWG`U3?sT1r4U7-|!affV3Id1egcApy)m9HSB~TGto8p2-ZNx z`ySST@q4fo&Ygu`zO+O$`fm>lY?18Qyq8qRXJhN#l6u@J!47_vjXYdGo18EHFP-UT z=mES5M}E)PfqUT#Xt0{`1&_c}KjPnEADq0F_QFf>8yvR|*}-FQ_)m;^D1s+pA+V^W z`oXJo*dz{wuZ(E`?7a@EhY4E`a;tEeLEx&tV~Wey41>3YJ02Ci)Iufdx== zGkp)a@D%(2CvG8@gE!$fsJE48KpDIUOF{j?xPj5|JbVfMZO9Rxhc96dbl;9#VJ7T` zQ+MG1;TCumy6>cq;614KC$R?H3p?Przi2Bw3(FyW7cnrr4BtSF-Hbt~fFGd69_j<{ zz+M=+m$42D;4jG7N58}K@F5(byz1ZZIi%O{sv>wCJ_4V|tL}k?P}}QeZ?IQA3hSV! z&#Nwj&q4XUDi^lGO#!dk3@6s~s%h{8?1FZOc~yUy0Fz-eG(X&{vfzIB7EZ3^RhPia zuoH@Ed(}(eJA$8K7VL!+>v+{#7+aTTz*acEo>!d%H^Gar0R|lDRpsy`9DS5m^?*C! zYdF5XSLMSbm<#>}UX=z#FcFr+;YWMbAb1Y8Lhoa^9}a8iRT*$GybMdB&aqwT8%~sSOvfx(u4o+(ARp-J|Xb?g_U`ZRV`UM)L zdsRQU9bSR8a9jp`1eNdf*y)Dh3nx{sN0!#z%wwpE4l+GobFYl;4P@t4LQLmxDDQh zeGu!8Zoo3w3+MLmsxP7W8R#Os55GeFo?bNsrot}hdM0`RuR+aBuNn+@!Adx-msi~a z^Wo^;v>EP!z0kLhS6vBp`%)kH7EU;evfy{f?dMgGz$R#NHZp{3U<yP;0nXrNV{qID7`JMtD^iCc-*MAL&)^Lfdn_Dhe}UGjuu+{f5WjnDdbb+zLNJ zoeQulPzra!r?3}d7t%*?>c6Nryacr`LMI>(X2VURyy}LFv13_Yb!`rM3X>w(fLyP7 z8ZOJDeWMwZa3}l#J@a`Md<+|*)foC29)lIodMr8ucSDx~#y}y@gKwcr5zm1y;hbXn z1>S?arRWt*hQltSFCY)@fVpsF%&W5D0r(PXR-%7!Bg_Wh zFptEp2+JnU8H!3t>n2sR5Q!+PlSs8>A%+o8{6 z*id)})T;d3}*BDw>UU^$#L37Z4+p!SpW8@vYfoQzX82Azc4VLCK@7JY?hU;%7{=Fef{U;;c1KS1l}DH|?>Dd2shIRKCpbVIK^Ag*HNuSCI$24||}|Yv>Hz4sXF95PBV(1JmFK zs51qx(oF_!dJlyaO_<48)m>R z2!4zX!zMUxo>z^52jLyq1E%e?AhcnH3PnqN`|jDqK26{zLtI=m0+E7}H2VFv`iMjr4a`~@j1&}VoE zyx(BUU^tY+4j8hMeuOn}&9{s%SP93h@~UBQGfanXAmuys6fTF!kn%k-8{7$>!!BsO z8d=?;ci$AFaAXA0MD<-e{A47 zSPvb4M$Rw`_CVWT=u3DEj{BAP1}4B{IQ2KiAxwaoup7?ZNT0%+umUzgr{5WC@CEoc zQD3+Prod`Ac{92NSHnW6v4wiT#qbPlg5$TMA8;ScfM1~IA71r6G~R~Yg*i}TJADC_ z@Dlt1U3Q=w@HL#UlW_z;Lc>3iA=LegzJs^nJ7~HK9}4fmvAezMEVu-og-y_85AhmY z0nfl{ICU@M08;j0L!rO&sfS?)w6Eb)qv0X=8iF35>IXN#2B_=xsqXLq{0X=Cd@A7g zsWSKi8U=jnVpsq*Yx-0N7z4B5gu{HSefiWiumYML?o&6xbFd4B*7B)NwSBy6=~F}C zc9;ik>-ba=EU4>aoy(_2z;j3W*i=sWPzHxJ@Ts?;^U*#PgE^3K4EMpukk!zqrowhO z_gJ5L7Jh_`Mm}{F%mMFlJ{5+UP@}O=-3AMxdlR1;45ASdWkPr)+ic@lMp=U_dY)Xb;;1<%8B@Hh9V2z&~mlerfr z!e>w`#iz0%^%U9yYoST1PxXga;m8&~)fdX)2bkT`r@lPZr`~PlQ$NEQt$pfhm;?*K z7xJlYPzg)mm^A(kSHW~RtBp@x2h(949GUJ@y`d0xLNLRp2EmQ+1$1hQ9N;nd0M^6t z?YIXngQww7=+xe)UWBtdP!_C!J&@6nf5SVl6PlgoQ@QX3Y=!qa^Lq$&p*&aub$DU; z4EP9!olZO9=59W9Vs~@{PVeDUpFrsuKITF`H3iN-6B)z9umldz^r=j^4Q9b!Xx|Gx zg2!PEoYdE+2KVQlfj)KG5cG1WPhB|z-8>I{y}+kZF7l}hU^00B?NiNR9J~sLjq>r1 z8?u30VH>o%*rzVZLho{rYaZhVu7YRaD>!U4a)L4N415QN<)i!X4EzXn#~?Gvg^92S zI*p~?(4xSndO!}Wg2shD_NDpQmqwq$HfUW;ogoI(U^DbB@u_@x0=$)sS)n z&xf1g3#fM^x&(K?H24l`-h?fL2VgdAfRvl*AGikIgw;^@7TOEN@G5MCA-7_?VJ@iK zu!m3qPry_t4@_bOm04jgWSiPmP7AU?tSO8(n~h;8zIT zgAT)auoArYVhi9INVyML!mF?k+T4#1fW=V%0b~QeL8AxJS!n(ceG3o6EU5XgPi=xT z9^rYg1^PdVJ%!bf`51D7-=W{*$O{@iL4Dyx_zL_J@gZE_jMD2-{)M(~MQv0LM+{-*6QyhZfIpFT4fbXVE*j4W_{_(C#_Pg}KoBdBz*e zgf=hGMyUNF{SUXnOxOl(UqW}`HTV@SeA%ZSg&&~o74Cx*UgbV`2)+gXYuFp`zmDC8 zH()OepMuSUR#O?v@CbYkjo;vYm;_tlq&FE?a231=KS9s8xE^N0AJB3dvWNR&Jv4aR zrwZUv_#K+RgDhY&?1ppRWo*N@(0Dp}2$N@^^Y77Tun-!}q^rQ7lRu^2&}Sib117=}sIdr}3#ITftbs<0DHq;`Z(-mP{tXYq$8hB5KGh4Z zgW6yC)R{0Ec0oK-XUw&#(zXzhci|3VaWT|3=@#7@{iT;84@GI2c%zbbH+z6k+Z;-l$f5Qgou$7--1swJVb%m|aW*c>Y zsW2C|LeK5k6u1Qz!ggq|13iRu;ZE2NhwVg1U@T0A#c=$e_&#_N-iH}NjzK7%YV1wZ;*aN-xGA`glXt0mIgBRg5I70bV7pQ=DU^{fL;aC5H zOW}5S4Zeih9=~c0lVL9Synb~yTmmn^0`U9%stfv1(ywlXZQwu3ubRVIun1b$=NS-f;8!Oe?N_7WrG|cW)3JWlw2@!s!PBq~>K*4- zy`TVYglFLwIJU8$Z->x+co=5FZfM`cuP%enp~3NfH549$SKv!%c7k918?J{>pk`C* z1Enwz4m%MU!d0*Uet;d29z<4<57)r+Fb`IN=OknWr@Lv z!l$qsI<};pFb#aC`c*6F1=qDgM(_sgfDWzw>OzT{VHh%RP zl%*pJsFOiE;AU71_1n^Z7zcCVS2(JjU)>FBp?Q11Y6m0X8~77W>A*d3GfaUV9gzXN z4|PtX4bT@x!=3OmwCm(o7r;2U2`0fiP^&Y23%9}xumL7@LARlPSHC(HE`}Rn5v+px zr!%hLLYNL;cjI^6DF>d0Er|It@hs;c|Ee&K`sehhT%wK`ul6>hxi>4_=4gq3Lk0g$H2{ zd=7iyxDkHVKj;_mjP$D^a0%QEpTP#GaW4G@BjKI%kuU6q<`?)?Z-~L&un>;95IX`R zVLaRqi{LkC`7gi9g4^IL*a8hN@~i&v5_|)j!4vkYM%mOQ;#XJZq3ij6_3l{orHFD$ zcvjS}4j)H7Ctz1$=cVXdCC|PByLuI4|5{{p9oO85+;8%$jW<*7t;pau#srLmy>P+p ze)S$SxC1?fHPHM{`Vg}2LQn6eE$|xDyvMHw!!0lij=qfV3Jo7bU*QUP0A7Wi(C{I@%78&I3NDA6VG4W)=RAz9 zgh}uT`~)Ka)5le7hZu)(DXU{4V1$wsQtWOwS|k|GuR1jUO;zXE~ppzH(U)f zU?-gX5^{vs;43)(W%>>-g_qz5sPzih!IQ8YYQ0MPp#UbratOYLZo#AQHSB@2U#A?H z0w+#kOhVhKj2)N+-@q|%&^EXq-hkkn^bLFre?jfH&_~FFG1L6&WmpI)Z!?Bq49tP1 z?;t<87goS8FzsFX3))ZT8899mgpIHhj-G+Nf-7JP)OwFG1?RxcFa;KYXC`9^E{An+ z%KP*gTnTT%O6W8TIm2u?{R4ChX23q^_#yR#J75tso{gNK?HuX^zr(Q~QD0DV{b~d( zhK!Gi8K8I`eh})+N3O8p6aKaUTMS2hN`FBGyaE$HgN5|ZBEJeOrVUWAgnB{U&+#R& z3Tl7BxP@n7Jq%t-o1xw^#v{xC|Ci`LyaBu6!sWzu@Ex@I3i}UpVH;$8jcnmdXu86$ z?t`_^>KptN+yt|s-b(y6EP^Ax<+)G>i=g%@#sORki(m(|{*LhvH^6*2?0cRK55p=r zX*FfQ9q=g}`vdZV$?yf7xQ6i$7r}Jc0Rw-eu4}PLunJCC$1~wWsQ(k=6F!En>(LF^ z1f4f9mf`T9@!#+iY=N|2h;d*xcz*S(T(}d|Z^R2Q8m7PoIASAu0{JilzJ#*hDGQcC zlTFxYxMwr>z-Mse7IYV;!(KRVE3$<@q5mI@BUlZQZL}9MwiBDeVraXAaRuMPQ9F4y zya+$RiGLC!!gYT!?qL_S-G$EV#y-F@XuJp6L+!oTB{*gu;{~39b9mV)zQq6r-7W}(afIUM2 zwH8io9Z*HE90r5}>LRF{MjfFL7QiuWs5fM$2h^`{Nk%};hF#F1EwY5^a8bK}Duqv= zWqbM==D{vFvqM0Y!(>C_)e;3-%RZMp^2`EVC3gqqz0#K{45E?foA z!&gwFM?ejM`=G`d^gY}Mb0Mu~Kuv=mq0O1d7S=;)CUS?pFsN6696HxPpf~r!9q=t= z^a-di+yHZ+d0%7#w}bDjfEo?c;pl$!DSQPr&!&FxI{XPe`v=r`m<`PapnI?s>JCK4 za09#!E1>lt>JGm{ufYNJ1pEMvhR~0&5}KY9P}wjQHo}ENkp=t=?S>&+mQht*I&hc>|^*bO5h=pyWZR=EKcg|DGb z9y$#d!MjjnbUFhSR7CVa2-4Yi(oyRUcwlJMwd_~1WOq!umtKwks~xO3#ei61k8ZX;jnV_ z62`+!*aP(eSzs7vW%IQ%l~9?XFwV*%9{u7?j` zA9So_Ou+~6@5`|zS1^8|-IbIN*TSbz_bPq|d9W0kU5$-{?$=NryZ~FE-?fZ+cow!n z>2RlgDUqY)J82_*fM%~D>Amb*U3#*~~&FCl01Me;91WbibVGXpoHJ~mBbsM%1 zu7YLI@^)-8Y=ZuGpkMG6oN^~+z~!(En%;%4f_vdBIOcA~C|nLpAmtu>7yJc9_abZP zcprKKYoXiy0p5v6uVEhyeE@xhU!c*0_)_=^YCeQMz<1F0VfqDTLYGJA4_FE(KZ>5h zd(iVSbPc9L&Bu`&d<|(&&@Pw?-$0Xz^cTDgOQGSUfVvnSfZcG~le8Bq;AQv?Iz5H% z!23|+Y3voe3d>;&q)f)nKn2_f@54Gc<{A7Id;m3{4X7(&F&z6GV-enkCeI@Sco}|! z;uokFWW0zig61z_D`6R&_A>nivtSFfdIg^Xzd);3kt@6eU&B$aQ7-%rm%q+*ApAt*J<)6`xh1eOm2R?=Ni;x+-4lNhc4)_+1UP7JWFBtVXdIxKv$roG$d*QgH z^d(fl(Bdm>COinAK&`I>sxw>%li_>V0l^j65V#he zg@v#gI)6hO;eL1z)Kf%$z zW3yl$bl$}M&|ouq1kb^2_yrnlK@M;Qd;qn!VtZf@r2m2afE!>k__m?*@DwbBeb9M3 zwg3hV>sK_6FZ9bnt*OyqN_sFq8szk>pd9rQEGrIXMS}7{_Tc#ZveChCk(^~5$u2Yh zbWcwX7ISWQX|QZG7mOOki3;4ze>g^fv!Ek6D!+(N>F4JJQ!^hkpe7j_B7*Ic3PXH9Llxr5>X=CcW5^W6XT`KIoi=7?Xnvia9Hw!v3A(Yn)?f zs6)HdpgpXhvaRhX4(cl&PsQ_*Y0w^Y5VR*xs8l&xU{qOZFj}6=K{#4|&eNh&L^zn) zs#V*ppmQLul7ps$`tYi7LC~CM613+spdU_;#W<9LZYXrlp9snc4B@ik(rBevEQYSl1%wPja@8)=pC?PJSp=aPBJT(v{T`4h6<4MhdfdP^^gaMN7)FIGWM^%8uqB28^5c z^LwX4GJI*9vDh53TU;7T{!9Esz?d~6Jdk4o<*ou09*dHaA+i&z&d}ueIQg5jnvTSl zhkhTrf$T4z8UH++fqEu9do~eO*mLJb@jiIYN#igpssQyPiqP0_OqBfJ( z-R9@wO=*z~2b^(v32vc`A;Rwpb4O=m$}mKQW%83g;u+Cm4Wo*oSnl{MEwc#jBPYn| zHJDr@!mJ2RjWIQjqZV^o>vHT-YvSpH_$#Ln8Q|JSG3V__e`-`4gRxAaO*DT|igE1stE1Q-Lv(qwX%15J((*nMl_aUc#M8 zHMDIuc1WCTSw2TjX-y{JxTHKEn}M~qV>mUXZK}G^In=oHA{oYVUJgfcF$NrGE`F0g z$Uw}ci%=!4q#P#1;lDIJuHtwx_ms6@s&im_kXmqis&%)H)1dLCD5->f9DQkegdu|( z**3(?b9C&Yy)BK_2S(dJ7zdLY*O^FV+k;iWwqFkgtpetO$~Z(F!phjdW?*^5i;HjI znYoxhT!KE+DXOgO5Q$r&Gs>ioxZ4IqT#-}EI1s(DSI98b&$i7J%M-L`y_I4V_iH|A zne{~prJ?-jAW@he1A`elnFE~G@Q}F7F%7ntau7EHKtYUw_}j~MXX{wmi0Qxu%augp zx_K(`fJYIFdmxu~6$CvJEG7h?qeht%p@LbQ)jL*VB5a8GD&it^R*t1o$n$`=9pcq` zRPki}lz42FY9F5}Pv&3Fb*avUwzJ}%)eVlVvp}NKc$%sY{nTwPrrt9A4syGrRL;4A z)@t`Wv!9l;s`C5HAtRE1-)9I>5r#usyeb!S8fS@P&|3%jDU690GaYjNKubAhE_<}H zPqVJlJ=IQh#XOR!(t)oM1FSWK6UDK8`6b%)C+n$PqKKXSmWVjK&wz;b|E6Dc863C<41&)!_jMz z;P3&%To<$dtECDDs@i^jU)>V8Czl4p**eaR(kv>bHA0@593<+$RgF;5kFY2hcB(Ku zmjAkwK=HsjEQ?=(5tB(v{2FZ{xYbObZA6{HbXEHWEVXl*9Pwt^cya!T&)25Xo%C14 zV+~vujyjC{s^FNlVD{)p_E-+%l#x{u&K^r}gqK6c`Z&paLS#EH60dE|mQe~D(>NXK zm`*fNY(Z9q=njXN;{G{b4?iI4VQiwPV{09}5KG`U@KaU$T>q zPi>b|((TZViKW$j5X~mq6ni}C%>1%`MF*W9x1E6f;+S7;zEU~lIgf~;C{NER@O{2(94f&|8K4MoS*11#lilo|_n6B+=w8YkCe11WJYyWlbBK~x9 zLktmcQ3w-67Kvn#ap`nUaS^sCC#vk(k8YO289&wf@Q`{$#Lvl~6+s=uMuQPEEox1s zptz(&MoLlfcoi;)7CXVN9RmN~B(4a-Fc0+V==>b!5*P|59x@ZvqEAc=NxYUG)YDt@ zYt&Bv3q_;n%cDfC7zmq3(|@9$q+9qUgV0GO>2efG6pvQ4H#0M4Iy7wNv7E)ukaGhX zw2kRX_GqIr_@CrZTms_~9Yg4FT-k_PCjBxsPOfQUeWdL&XLp+sPn2o7B>l~u!yU8) zF_gAg9Yt6JA9Ch+A)>jEHumDWh@17yPhvbL2UXoAsDx^nuHXq01xUh1GJK4@vit(X zBlGEEIU78fTA8ZKqYzas3cP@t{TJoeXm?^?9$`L!lv(Vj0FthChro3h{r-%D~ zXMBWy@t9o(PPDa*6?@}BXMy&jgO+aOWD;$MUUVvDli{JO$xD-})iXj-%>6W-c~SMo zld?|#+tl_yC@f1=@$37&w{xchS@8p19=GMmgMnBejt0X!YXgV>E6EK;0Jw{Mov*@T@0@K%MG$b}O-=yo+ zFlPA}GUF%`%jV{GoI&B{cOBa(>y&xLWyNZQJ4v%w<`)&Px~W$(NrB<- z3W~GHmlKDUF+*})M>vm_N>)P5Zy7kz(aQW>6bY9yX^u=7P0EZpPgm_Sok|DAli_hl z;IVQSDK)9u>{4!L>XlQj=ZmJCVkDT$DxjW_737bN6jUf}7^H!E3|AZY2}?QRy(DF4 zu9SpR;%JWx$R@w?%aqMg#GRY;zT@9y)-4mD0;XgXM&C$}CQU8H=Jo7%9*Y@zR%x?} z#L&)c$z%;}aAIwgF0WfRj3(pTMrEuSn_hRyN=R_Erb+aRS(GC)nBhK9mt->Hws3L# zxOSZasb0FR3Tjo9o1bTIl5(m&#e^{GAh(bM*4b(bleITvv2_NuTk1jYlPZUUVb zl(Ha_7iNW^RHDPe$lsOV7zw;vZgB9h;CbZcWl~36mXV&;Q6;IY?p=3aTfNl@0+X$h zg=0}sRa}~%CnBjIod#CctEQgo?8IT25|9FQm{{rLx+fvv10-4zeWT z-kwve)!obp{{HgBrx6AumR`uBcwu>Aa9kAC#1hEzGb4nIgF9EX?jn!X21&E(OnkaZ z%#KwJC9OFgdhU>Zv7v*9^kXI0=JKkK_5I0AOE<|#CnqNJFDWOypfQ-VFzYgAKL(3U z<+3)+lpsG_7EeuHQ|qrDzoGNU$P^WpgiL~wnXC>BE9{j ze_;&pcw~aUuvdaLZ5a!8VB7u*5)>5`hr)$9c_rnc{7581K$Mu&)_TvvQ&<+22ZaZj zr7P^JO0YA`7R;gy&k=KkQqoa+1ZCxyk-3VmyT%_Zwo#;r+0pwq^pldTPx7_ZtCe(d zaV{PJ&n5#w+=yM(jY=L+Y&t0t-J2~W9%Kp6gx%S)6ex+yVsS98-*)U%gv9_vs8@+Z zhB7lsd=Rq<3X@jEFCiz9bV*BB=uYOiUoTywuF~tW&WHJ-uWT*M9)#RtM=CPw$3E&9 zm0_A)Oa?53Z^>7BYn zOUb8ZXSeDyp$M-XN^OZHl6CK#kkfZop^^+*qB+XP9=X=2V;bYnsJvBRE%vZZ-8r3% zCOX&1GNG(B4BaXti~Bmp~;|LBf`~SPPvL z87Cu{K0xK|-nf?dSevA`D;`d1TB_9xw-FjDhvFWjVABmInw4!AS(2>MV>X`qz??|8 zMsjqH;{ZAk??*iv)qyT^RF5+4%6UAEz|ws|afP;{X1FrMv)J8N#fV7eUlL&2-jV_@ z8kb*Mtj#ZSiVu{`5xsf_v8>uE+4-ZESK?LGeYcpX(Zvky1lcHEm^CeqEx~rgpXOLd z$cm%tRP9W#iVMi zIM`HT%EX8un(8)2UyMJZ3S-KPbSA@@G2qahyWGr%=n!3HcTiaG#3Q-}9amuQ=~j@R z#ns$OXq@=Ve16*@RXICPT&wH&du`xdBWjbuOsHg60D?Ep!NdmQMN9JWmqg;a;#O%U zplVuOWHwjI<{af%DQuAwJUKCwj1EWA)rL;jc-Ta5J2#ln$!(AmVIfZ)6jTYR=ui{6 zrRfUD6w*!*t;tOo?o2>R^R;12?K602X75p%19}eXJE~W|o_z-m9yYvR=CDzjLwgQ8 zOREWMx3Sa^QFBQ4y|Hg5J5s9uApMDj?mK7%F=I(^uwKStyoECD^dO!jVEk6gk2=zj zn1`I2i7QwcEGW}!=UVV)wM8@>b!Qu}HZeF~+@CBzM}sZQens6e+S;{514E&W_?p@XfCsjnNzU zYb$@Q)i2i!3jLSD@R)?ZqkZYEvD_?OxcBIhpqCa2Ay8*q(F2s;$AtNaz4`?d-|Te# z#O`UQ>!);YMu0w{OS6o$_RU(x%XW4*Xp5?2BN@LEtZ4@+n$L=8fm;LTXg!T^mUbDz?si5KNUFhS zp4~fh_^5${d-awLG;~;CZ+=o|JFsGTO?3T4SP9Wp@O0Ta8j={Hd)L#;ip@`M z6_d@Ip8MGG<7C1JfKUwl*uk~x(5`Y+BQYN_o$OaP(K+45Ap> zxUI7)k{xhV?w}Vl7I7edb9J?g+(>v}-_&>&Q$qut@NwWkHbRARbsCS#g~rPYntmrl zgr!wgcY=uvMS~<L6!;=r(N<4(eSv3IY1EXm}WGgY9ze>C>WKLo$3iQyaPwVuBda(UD1#h z9~1jfF>joNSdI(nz3HK-*=3s4A-PF^3Q4vZzh79=?er*?uZT5ua?<1`dzTbvkM3@R z|KyuwICnc;4{H6snd=tL8hIY?v|y`3g`>w!XruqPn-~$3dSZ)!Bp#{?QF0~RnP}Vh zH(t!|tg%XE_lx8wC4I+^5>%e$EGGr5ToEPC%d{PL-w%s2sgNlX>lcanBPF`k1*>iI z8`7Gd!-ngWk+TYB425`D$2zmMHpfhy6DPx6C66aB7SL?Tq?0!ommg*-l`tbtoVykk z%ghzw>xfQETsw5hfjKi|z1$Z~dMuVNhmAe-nAotC^!QWt6Ony@8;Ql2e6-=&(B5z>qnhcp}H5Jpj3!5NEW6>CWp$r@oQ&qb~ zw9>H|=29IH#1-?xqT-8+B8)2=zp!RgBKD9aGw#z{Op+EVgJ#=tp zVU;#j6|R1(PNo>vbEOCFYoQ0~2`)^Z3pnp=m^$u9Gsk(m^AK3_s7# z$X(;Etw-myw%X=q5nFUMW(iT)eaa&Cq=I!$6`j)QHfJY`lW^y{c&|vATs5HInL~RH zJ%7}Yp2N?w4p0Ul2`5v1=DDG)Y$pl9@UYoL+;UhW7!(Y zN@GOtx8{Y?e8OW1p6Ca1S8j|h_FLj#tqOV?>2!sy0dkZ;QYY<<8<(ts8=^b!ml^|5 z<y+SFS25(cYWAhT*alK zymGd)gqUhES0hOwwUl)`>LIZks;akBB@Jdh2I4hxG%vq68Y&ED^X8a@sKzQ1!jeYB zYZingNkQaQ8j{zwnJ5>Rg!CKL-A<+2dev0hT-i059G&0*4y}~xjotslyP>b-OOVxp z3hw4;ua3d@+W@Bwwp0|Q%E~&aXJ_{b6LJ#_u};l=8=J^7GDC(ilYOjWuCnOr(K3ES zb4$ugX)uW;5rNjZ)Q~Aawh8P`PFk^FT1voxE>=rkJySY*-&xK(kj~S zgi5rNH3!#l*jb2UD6BoPzt-mU0M5i%9yFK{!%K|WWuYQ@8`}&ok&r|L zY;MuB9MOO(_v|mm*Q!uuFlqpm~b2^a_2Disxl zaJ@=W2Oi+}df>R7OGswwaG+xJ<*u_QFLZ!n51?xCs#)=9KPJ6tl`SL+l2~fh1dKM4 z(%E+Vk}e|Y7SR>D&&?D`CNgw^`v`f0wg0k2F5zN4x81I->@jenj`#vayy{w4^|VMv zq|{_!I6IihB5^@Qv`lo#HS+RCkIq#weDPO~n$Um5a-?as3&Bb$FX5u3>9#vXj$_QDIXYa?O0@4o!!G$h!fmASHITCfiV^`y*BQlk9OnGIef z+Ty{B8M4~ob(5!e3OjdeD-!>GXZw3So*gYs98B5@R+lNO%`$Ei)f&f0IwEAIHab}t za^KF1n+UCn+MT<}MmL~FFA9UH`ej(rtu8vB-UU-oT-pWiHkw@nbiYKKxy7lC}d8?VduBD+tnxtP4f4Qo16y`K$rRWC|cJ#%v8ZQx;U+D?LTal*FG zlBU`@yn~--r`1lZozE6nc9g2bucbuuv|E%CCSeRNifw-q)k$8FG<_)_6cRy-^=H?Vb`>JX_}mc9l9%0MEFAG4 zJ;P5;X&?W${KNTehg81eBZFG!W359`As&^VTOm)EH#M2P%FaLj%;y8NeumjPDUrDj z^zhs9Eu`Us65hp^kK^za2KP%LY~dJPQNq$XKS*q-UxecYNczAuAQaN+jxwTZ>4U5& znQ+gKEn%is_ZC73R~v55`UPADa|LX?-Pidy2z^@lftEiRSWorEk-5Ggx`OuS>w zEj5lt%Ndu{qfjs9gm9+a2-B?Ii!xZ#e9mw7{@M;lO2_M;fN(e zg8lHgQkg!o4Of)ZZc=f)fSH>*hCPZ4i4?XL4J>vU}rEZYYbScfFe?Q zMI>@aJMM<{>W@VA@*IU|-){EQOAafQzYcT$m^LQL=h~=&`^giwW68ed9A(F>t0VCt zW%AU{TBw=aC134^dCuZ76HBH9tkw{nvpvyFo5hPro`8vOm52S~+wEpxrcGvtrVNd7 zC1pg{u4}M+Pf8Js>_-5+w z5?bzM?bH~)CM8;7mv`+O1*pBw0N90QXO7|qn0%wEC=$w(J@;lmDW72}$O+j`1-PG6 z!L~VVVg%uQWz(%=>Rm~&Usi0PS-#03_!?7~t!I5s_!DBw?=g#U{g~zdS?9VzhBU zG2Z##?_E?mm|aq0UJG#jg^k_YNX5Fu6+jxTH!!=~P&>AdwQE~Rs*Zr(U5YcgI5u`k za>^YZakWhgj$r;kCXc=|zcs4En{WEX6*KqOe_=x&-)Cghnzy^{6tP-&BYFLTlsa?JDXyCRwEAM#FG~u_ zdATAkG^vgAI}C>-d@LhP~liK^F&Qt7=| z!GW10tUj|zi+9M$qv3(O(M z6sifjS^)=X6}=TtM(O2~aByUPRCZGgD=W{*7cUwQMtJv-ITub-zkiphzk#9CFl>O8 zQO%}L?Oe5=#}<M<;;B`dT zVZ;}9+G=UBX^aqn#bS2lxn>zj>m+N0cl7upHY+rk9aEw!nbz=={?yFl#x6mry(AgE5k|9}VJ9cHB!{$?Q z#JVnyE7Q(MEUiiutqd6|tMi_&1mp5!Wk}wOmyb5eXAbmUOI@f;(VY6adrOTOcZ`!x zYo(Hh(zZ+%+`EVY*B^5rSl~zW!hBYenq&HHPdYBZnto|mg6YptPIIc)!pzID#IlWy znNxm|L`zx>q1++;GWpnNe2+U52D|Ci3B(z8rqA^rEdAoLbsZQzS+|!Dt;qz)orWS` z#F;RGHDQ(Bn}yXJ)^ljTKH3{e6PzRrZ6{x)BT%JhYi>W8?4*Q=;tri0r+ivO7a{Ml zQHuL16n>6}g;mEco$6LHVapDxNSH#+biM7|#tn&Ve~KHGAsxytsFJZg*ri(8_M6Fz zJ4+|%wDFA{DoI*xB8m8jQOP^zM3l+blaR3qE;HpxcykuBpI3s$xLE;FTb)spGHg;! z4-Kn-F|n#PtVMw&6StfCBq%Zp_qUZsE~OC@?jS-F+wxvl@=I$vlVMak!6fas8h`shsb_-e zOpv+^S*J07-=Wp|GKPnUccxAjNS_bt*j|#m36Ei!PSW)unI|(N=S>tsYTa=8d>PsA zf^PAT-IDz$V?&jPj_lZ8xg*5{w7l%ClNZL8>Y0}_v{G5vkmi{08tZ(PF$9vVN%l<2 zMo=P{)Hdg{th`RWDLX46jQ7D&LXSOVz>V?pd&TeT7qIag8XoO$K^zf_D1FN#@W zkZpp7;IK$Oekffc7N0>|#Ou2J!h{|6()JXSM+i{vT`Q>};vfYg6$o1~VP`8Yz`$TR~{xaJ@n{@EoTb zl`dAo#Vpq6*c+qgJ?588@buJA8ls)ZOV1pgM!GY` zglN`$W!e#gcAE*|Y-9q$BA%AoyJd_i&N2}$HK6g!I)SyGCC%?iQE{o zr@~6iXw?X($`stmsAy>>=E==Mqi%&3zBbzVCab-l37f8}yQj^y==y#YPe99VvJ$hK z!lr-po#ydwp4z!gq7(Uxp5HQ#WNCxRk^Z=ca~tjo0ma*tEsIlj<fz4d$jyBm;6J%WD5As9IuzU!#TgSOmb7AX=V{jFYog0 z1v4^__oJd%;7TNz!xU!|edtg28qaWIy^* zU2=#v{tVY{*l&~|eWvLNN@hOKA=eRe#c$M>%$`SJdW~@q?CSu`^d|4w2{R6~6VR5*wIGsxv>K$_TP5z3IZu4@WCQdTDH&-gZClFF zmd_45@B6e!X_uDLzNK={u@S2(dlMOZT5B-^?3U%F4?cyej2`EQnbh#*|7_CUj;pcd z;c0kFna#zR3X!e-yZkz4FRIKyVcNPQ%Kkd&D3ZO${yxTmGN!ijwSsugE6T8Rl1=h1 zi{qxuH(cBYAMoDzoiP&$*@wjc5KS2N=8Nv8aFf`Rv}at7xEosmdAUVS`e~6`S((~W z!ZWvMu8J#Y-2Y*TjjtC=oN#q~g#$Zj4Uo3?&Uvkc}iJ@=j1UrLYl(cV??1mSyZjzS%MCbaAY??(J0hwzZa@BR4Wcq%M$6;xDaQ=-;`v zr>ap(x(c!D|Ch*GK4n-M(I17Efr{~WR>1VIwI5J6@qah#yJw&5_d$9qPh^ywbXQhU{dwZ{ z9Ph6m%V`t@L`<==%SMe#jUN?af&r;mVij~H-G$R=TCg`q)$no?pR%SD{b^L)fZ}Yr zUB1>w0WxNtPdM0D5cG=^sHAfc-bqOKe0|hoszus}!SOk6`W67~kgoLH7!oK{&oGaRS~p zsqdXA)tMTz{K(rJx$M1RL#$R$Gf&6XrRYz1ILl+~3EXelk}^QA#BFM&veOIWhxMAE zSz6b#dUx}bNhTy!HWJ~a9}_^cV}U7^o^K)vUSns26e6_;;K>(43GC1V`MMdex#7h0 zL0Q;D@&@c(WC{{4DC2Y0(kZU}k56D*5GCmsXtgo7m<81XRD%4YIM%Ve6A@w6RhC|ff`Yal|D%kKqnj9{g!Rz)T1?`eamQEc zbtX|zla_KP^s+jN+_A)FD);vV>hDWBs?Ly3zWcyw98v?4RH>TCiF+qK6XmmT0|qC3 zo<~o4aD3_Vi86UCp*#7Q#P6NiJIx2>&g$K>SMQ<24tQTEmpDRRy^Jy$lm#~|tFxyw z7}9%466cRPBFJ=Zuv1gnnkV0QmoM|#K2MJ;r(R4EHPGRYKIMd-I*>m%9!=W_Ni(pr zE2ky!eLsmu*cKqc7(+uggqmd(870o|rTRLD%*MI|O^_=bGviFqoz2pEDnMPTNL>ev zNLZp2`@WrsIw26&%~3_?MW)WN3A0mXh9#!D)7sTV-?11fhaVz(kqtdvqCY z3OQCSo6#<{2Qh_PR=R6#?6xAlUZp1k`oZ#(9gAUSt9)Xf*w%&IM$D*jjE_f97rEOV zdHemVeu|Ek^bFLN6vKkGlYN@bs6EKN*yn_v(aXZlVvKVrkybNXkS*A07lYGXTc+mn zg#b>pM0?Dbw09*o)}+RqcIp;NWAzUxkc<^=iuF@zh0_2DTVjRs-I18MAcje0J9#~W zbtHGUYAo17v_7RnYXmYrn9{LjWqV~7^J%0+l;NOFA4&IM)#A<2+vj9JR+%WqcPWZ^DopiVb^*>RYiI2T`^HO zuH<@R5&0;mh}OB-tz^;zkw}6xoKMEeg0<`j5g+2*9amcJuuDbqZ4!B#+_)Uqd8>rJ zVX(@B_0&oq$;v-Cp;!yg=I!KjueO#t6IpHi(I&fGhvRX!!Y1UI#oXFTC8oifrL=9P zQZA7l!ikC5cs$|shdpJ~)GNw<8NHR!YA}|CR$D2@?-o6Gb`kD{Kwq6fURy znDgJt4yID({*;ahcj+29zwcm*wgs6(rDOO$bLbyG@uFB+gcGMLJ31#Sy1i3c+3x?; z1@>VF6u0z=NVy^YT3jY3Cbn| zM<05a6EpuoP4tE4U#4F;Sc2~e#5YoyV>e^E^X&CjoDyr|dt>rnthbFFFmLFMDO!i4 z$$#K0$q9c$Po1wGrbN+NSJCXJ5Ow@5lfaZjHMDEI3}3FBNVr^7kaM?TC!{)Pkw zA6(c^>D(_5sB+!^QOo`Z16{u?Bm%QPR2i3!b(7H;tL(swC-%Bn!q4U&v+S6|OFx)B zCm@g6o^%Q*EH|3~+*0fh$z!&tI2JBqxQlEZ(I#J*EYJU=IBneyIB5ULfJiu`Oxxc? zRb&4R3MOyUYHig!Y?8FC$_3^leI0E5VzMS2uE0&|7%b(q%0$zbe6PJnMhybg`u=bprgrm?(bSVr{VMLITim$DgO_YR+h~$mD*&)jD4?W$gP>*-ya-LQ2kFj6OpYK zaZ1a~kcgSB0x{hu=psfyene+tdalKb|9px!mdksr)GFc7vY0D8IY(Ds25|&vyGZN8 zA?x0MBPh^m4g2^Q|M$ObZ>*eG5}XBO5scG*t;B955}){q&5*Yj#No-3-~VIpO}OK_ zu6xh>QP5J5B5gt@Kw6TiIkc2`Jz4Uyq`a(l{16ZTDMTQOMuDOj7yGmK@4u&W?yV{e zq>^@eB`X#SRrj7@pFK~fxyApomy6bq^(qzbGHEUQA?xPRqY)>we1Jqq1zhx}Oi`w{hzo42AJsn$FK*UHMVETCnk}AMJHViE~oOVO&QHXa9=x zyun!3Bg#gLPp>^5WKUa?SMTO(wAQJ*_}&|=)lV-DGxiq>{R;w=GFT*IoFLd zMe@##kYwXv`XLu=qW8UZ;xwPQ1{q#D)6X(?X?FM;-c1ozYQo~?Fn+ggovu+{3oG8W z7)Bz;d#j_~d}(d3Qe>8<&De={XJ6F>{zSSNnezi#{N zi0ap?S_I?7D`Cp4dy~+x+*1c-yojg}AAYm-GN;D?&X0|hUw6s`iR}kTB?4O}mBC^P z5H;nscH-b^CkVEp9*lL`HyL_57;Np0mMSyIp#7SQN-#=xu-81u0$O1QCwBP5@^44s zq1c)e$2Jdjx5KE_uVihn`F8cN?@!NX<7?9g|8^jgN1BF%NjS45aB2u>=6eVfU;7Yl z1Otu5Ur%3(wr!B@cmH;F*_lJWSl`+G-cBm!<*Wboy5AC7X~K?OEd4CWKgQmG>6kel zjw;ih9Ml)~oIz1olL&fLr>#ruvA05g!- zx`LUS&LgB`M@3;zx2Cxx6#tGUSH{BxJvW@&raX6Im>go~ySj0ZO?FnpYm| z+;ZGWc!WDlxxq*Hd6~4E#~KK^&lAP!WDVpH+Fm!zqCiJ(Hl9ys z5AUrBr`zEOv;ps^6$o4OkRlLuM~XY_{FrU(=z-HfY(dYQn!(!cgl@_l!E>?F(xBoY zMp*X>XXd?)NSIih+1BzE< zQ3kK7*a_=fCK_<**x5oh5AW*QD~4pjW4H-6D|IIkfYNU{!6jo>P*h031nj0!(CDNT zaIrxFn*z-`eEogN;GsX&qHPGTy|X}5t?vyE(DBYzlf(V&2UT>c7#-@MwXv|$kq?h!S$40uwDk>ER1wja$(TCCh@g2lMkB$&Q>-9OIvncpD5UTw=}$;6JH@u# zrD2#bB4dgUE%a^PGB)WMMD%Te(_65TBvgN)F0N3o7Nocdyb3B;lXCF|cAZJgyq@Qs zL4M(sIP)t~DhRqq5~po*5|`up`b*<-drLZwT`w-jqhUeBpdLRnUXX_Xp;(0OUiWM+ zeqDs_-Y7!XeBh%Qw(o@j6I|r72;Kb^C+(}(NY=+=916O-D3|z(Y z2^#^i0#|$24_w7Cb_N_|M4`49{aIybN-h{zvHH`=UjcG1h6g8sE1){osv0JA?`wjF z2z{K3GuRB_h}7(5NJ+`i2*yIBGUXIfy4S);dtFE=-f6SHIJq>ibZ-(^nogde@W9eN zXsre9a=pIHJ{6>U>e3yzW%f~^?f%f3)n>q0J-5jN0j*s3)Cu-$IdzMA$M5Dlra#|X z>nqyurk*Wrb81d4`yb7Nav2i*cMZ{8wkJhQ8hl3n{qB21r|$2FAGH_lXg!iN{t}3H z?{$Vw_wAZEQyXN(<3JSEtJbEGm=O;(MnXBCMY2e!g1;gP;`VQ)By^dXmS=bg@sFV5 z6xG&;9N!%OXpd!pW6fBJ+ZGAg%Lnt@^sC<66bXSMgZo_l=v9%B)u_tmX;;TujfA}J zco7M?pF5LQKaGTt+3?!?9!El6ca^rPl}N~&j3%Ya-)%2B;X<8f19sOqWZn8_yEJ z#Cc8rWw|G_am#8<ZEp@ZIlclRovt4@u-T!IAFpYSP!3 z(Gq`hDg)XiI3i{4lD%)(NXsY6u^nw!Xzfjgo({Glh?U25G#_k=)PY zTtyMMvO*-6DRrBp7BnAN7i-wQx9;01P0<5-3cHQ3HK0sixpd_T1woP*y9}jQd(AKm zGGF^p8scpMEAfT3`S);*P*O59-dlqYFQ%~5u*JsEEiqmWQX1jt z%4%dke^9(L{d|94Rp9&k-~R6Rd>+*I3s*C-8Z9>qDi_CM*V>m= z^&cD_5Bq=M#^$F~M%kS!>P@f9w<%~RZI^l+!Z!E`Pr0Rvimie2>fxFCh`cIYTsV(`N`m&CDumz@Vj4b-~E^h#hlQ;uqctWDv*8_luS)aG_6N7w{9Io zEac9iVEn>vAnnGX(=b=is5ny|Y_@UAt$FOD%@mt3_sozbk5GuN0THD<2G1yX@K!Hu z--0|dL+)e349#_Hu2I7ckIxACuq3UJmgOJQ1rw_GYhOT*_9Efp3p(OP&%UFRCxi^# znMW6j@moXX6VCoh*$1~8(SGJ%aha|>*w=-E8nX}b@lPF(DNU=fAe(bgV@?Y>=s;IL zSbAwZiSk<3@ZhTp1OuY63z{ELC98Myl1qw14l zEDb0-2vfNu=4H=$zCZqHIX@&xnt3L`%BnT%Nz)*g1uZTjn2WX%#_3qjzVivd zSsC1Vk(37VGQ&+^TUp=&u-eM4Y(+=_jB;vUceV7I5?Y6&edP-fxOMOYcQ{+|Wusv(2wj$<%In+9$sJ2{*amOy3HeS`#}fO9G?*IPtZ~y7*->SPugA&8T{a7}!LjWvJen>gJh*$AM z?S9f`#Ox_ZCt_Vul)&@oFBYMKAA*)?w1Js;g6qFpM7iVwtrCRSv=9b zRP>!Vt;bj)9ETVKACvXfgJVXJ;Ec0yoX25&c5tqK`gs!d_JY{_$xqHlK!to=O~nXT zE%PYxQU9b6s`|wB!#MwOqRWER@6Nz0;9E;I3L5%$#zCegShv3W_KThOEvG=Ks#f84 zN{ea(5>%#88&qDi1?N}6^Y75T zGoPK^N#5SCJ{D|Js2+u?J%DJ1CXM0d~|p15q^xsd2{kCl%Wplf4cjlN~mG)>=X z!#{uehz@qA2izfNH#ppBAhq6)wJn`Md#xl{)#{Fc9k2EByY?KvjbE6(>JDP|XM>6~ zCU|P%DGVk~y|xy#fnDL3oPjHXvlV`FGjM%OAx`T8YDk6Ul2I!pSQ8Xlbps!G!xBNX z!KkgNUa70lCCI$^*Nr7oG6L8Gs9PqANPKnT2I}COsUnL7z~tx@A+>S^VU2zY)Pa3f zvurR&*mXw`tuLjF$cS3|_7%vRtR`?1Yuf4b48jguH#0Yyw*Hc^S0m}Z=*pW;W=8)LBNSFj|R zQt@c8i8Jv?y1!G~U{Dm=RdlUR7J<-)Aasp5QoDMrL9~aThQt;-=x&F!Y*Ma>?lqOs z024*_Rjv52_~FFwkU})kJIZwHfi3r%*L7u5o+u&U<7Eh8*vX0rrPQ0L6lg#WNb{i# zoAw`=^X=Ki@P`9T3T_y4laEE4i9{01Fnjy$QmdLmAI5+yw%|0mO)%CC52r;IdT@LE zn79{cLmAlN@%Nri!}>baq2b!>(J`I-ozXLS&yl9TaI=<4YEU=Z$8znHB$I0(7}YH^ zEDmAz!0%1K!D6lrR)!3mePRt*k}3>2g%2ze9ey-S*($>k_l}#CL0{H&|4M``t303^88iRc~8@ zl=`lj)?VHkx_q=ApR|^7y+K<_9mo<11%JoWwt01$ZjS{rTW`=y4y%T3_S)5BCz*Ed zgnsjmLcqG^7I&n zA*uTbDZWFfFfdRv%-E#w20GHZYZ(sN^upmKoO^K%#@G6bO@ZvX!Og$W}7Dylvh@%@l3G zKZpmPQ%5Q}-`nC|%YXdpJ7$sMWS&XpEV%?2xeFS>BToppf&8Di-*SUG2h-@J#oD-S zlwtCBKPv8g(#L2fooaD9H41d2k-K$vD<60E8Evh~KT%R;Kay-MXJW$7K{qi@{trT|nYS*UlG4qkFTbZ67 zbHOL+ETy|;1&k}#ZZ6%cH=q!g8(f!<7^-gg5PgH7C5sqEKK zSgTLf+sn5i0L*Lrax>z)g-&CL!>;M<^6nncRNG$CcblzstUP*-&B@DUerZ_ptABkf&zh!UBWsT zMT->-!FZKE^7!~gm1*nx(rBZ>8LiWHVffr|QZG}qM2IAAD!Q#){45sy#jo0S(CesMQ^W4uh$6N_ z7h9l%7w4xt|F_(W zfO%*B*zahQ*|xQA?w(dHZ_ z_a?--^o0rP3!UGqH+}^rIr;l|lKGipg4{6WPC$E3FQTzx3)QvD_!1VTAOgnRB^LfP70~OoxTV~3DOz@cI?CY=GtNZmA zU-*aLe*VR0-+lRQ_`j5(glL9R0i`>3nZlx*%xv89vp@dhH>5xr?Twe3Z`{Nc+z(k^ zf$)4ADp=6d9p26Wg?eMa4ILN1-WZejg^UM{t1+)}IES^IvJmaM?3_{B%wTA{xOj;v z0L<*f8xgMoR~WALOJaM+9@1B}`FA(9_emnMlIa7otej>>ql7_@FGAPU9ihI7g$rw> zC)jmejfF-D>-lqTjDy=+i~Wa9M7Td9Zghf=qaO31A}`qH+w>eyDi)(I*){cIm`BE( z^+myJz=N9ObQv*1IvR|RE$PMhYnx6+_Sbl?AAWg5^DbkNKqnK6oN)w~71+i7;tXs6jaIXq=WqS?kAl)2b!LSK;00Kd++4% zM3=0(7Z@V*l5vm4OBv~sg5eC@i3fzGUYSe70xb-W3U6-xq>5qWJY~7odt!?`&E4Cd zm^x;ep}00MIISf{0(b&IKPh@Cgv*elI#dO>L{$U7WyLg3>uX_Fov{0mZCCYu4>=P? z+(Q|e7UAVGuTwa`((xrT5edUqb&-SnasImU&7Wr&y~jVEa1#_@9nD12EVDT18HY_N zWr?gjv3?kmc{Su2(PN;Z64OMwHkFOknmTgt4X4mwEi4+rau?*KmEbVeVUm8QJR^Bv zbiYIHQkL%4t>}gxIRULXh|dV~no*3i5!kK8 zZP?Z^z5odoz>?b(3DUDr*~}1RXhoBkEA={?!@!Lqs;vX47=*8X$f4hsk+Zb_a*+PP zZJn@K_-S!`oyR4DGU944jRx><`e#51k?Y(0++u4E?tLSJOgAt;yskh3S>S zyZHVCLUSBV$Zo^KGH}x$ujf{0Uo*|M2PCDx`xr<4lVJGt-)+XQYI2c z>WB5LVbiutVR~L8t9vT!M|VCMi!;V$d$h-m7oeqsNIwk!Iiscz+~#8wo;qPFPhO^X zNIQ+YgS!;9by6%{dA6Mo={VN)Pa>5Hz|}8a#;_lpCJGbAkCqfI2SA){#2yKKIc7>9 zTuu=cQbg7&g5rTnsH|8EUZSpbi5*bfjKR@9IX`%2ZKxEC$RQTMyW$eq7v+$t|Vu>cM zU)vXR?WuYV^V%;ZyNoN2loeH{C7c??(bQhn-G!sCJ=$-grrrgI7;4Qje)AhtH&09s zHn0(*JX+(GZvv#!RUzUTVxGDrgX*kHJ!ETEK`dwWxa3Adrm3EzDke5AZu1DupbtTj z)_&e_WYh_Hh{%v%Ar`@u<|@xu!nMFbm`7o_+1S?_(p42LC2u;Y*{*v~n_%(YfSDgA zWVzq%+$12^cwvC?YyDYQq3MR*$vMA3eaOELwqDyG^H}7*e4@fQKa5{2?_)UcI8_K$s`230Hetis8 z`G`&+i~)hH4b4E+N>Y`0*QuH=ZS`pxAliu74C)~w>jc)=pB=wBLk{rdZkTmF$Nl9m z85Df`&HHAaS*FPbH{r-iAcfz445=Ra0Qqa$x@`ysjasq(RyIj`J4wd!wCKKOod1J7nFz50T6cl9JPA=3L_;IA9jrS8 zgC|%xEK8-v$C2=DX*SO8VNk(og_m7)xfSo>9nccD-j0BRq)7V+S1{Sx)t6CY8b~dstO!F)bve zBeeW?N<-D366&xl$1T-8kOlif1cEar676}PK~X|2a-7CzW-ZB#HN@4ZRm^*5>sMbq zOBb4vFzXDdrJ=*=Th6c?r3kf6FFsh_?S$CIRM33D#MGrJmF84o8q9-;K)`LGWYxv$ zGD$8jRT1(q_VMM}wVVm}Ja$hx@FqJPN^>UEdcq~jU@tv44HMeCgMVT0s)+_{-K8A-uA0KlO?WsO!ApV6g zW=ta9Y@HaomDHG*O1KVErS-3jReUz3=M#Lf&kWSz9U~@^R9ScY?w1VTUmRP~vC9KH zp{t%wY3UqBudrD;BWET*O(iZ;zroxFw+6PNr8URy-ag^f0?#TvON5xX}SL9vj}DW99Fwyg$(Wax3+iR8aDlO_pS9qZJvDhW5_tS3u?Msc}x?5Ia9*$M=vEs zYI=o3t5eQa57Mh!z^1D*`&NyF-U98U#kCqTTzj;Qc4eRINCfSC zj*`+9DMFd+4N^DMjJddm4#bp_0dK_~edBdqK73PhLluw~S7zN#eJbS` zO1FsIVr%307L7;H!{ej+7>Fr=ug0jneE+4;~$#LWq%uSa3plp$t5G<*DtSr8K3kA~JH0+*?}f>o``Q zYnipmawf$Rb`&MA$+&Rb>%vplP#NJrEH8M;gBFV4_`S-}>Z)6GckM&>DOPJg;Osh8 zl?n_p_g+3F61_#AXc7;P5pJMF7&v3uSwYjHgp-%ndeZK{MZAvO{e z&Jf@}3B7sFPmZ1(@1uKDyfDd$esTKAAgqKGeuQ|aHagyx5zk_)1>UhK%~8D!#GPNj{ziH|mN-8YB> zME^3lO``HVx=K*g+>bQVZk~X)R7+U;3q)ooqof`yq`-rd3*|QPf?oU5^CE<~`J9gw zyNZ-SPo79$3haZBIx^y^yK3y$=V!8JpPX5N#rStrv6>ki2wFN49<=0*z=pXZ@KPl^ z9Gc9B;(&*Ym(iQ&i&=Fxlh6{Pmpdo00<%E9UdXn=cOw-$-93+_CAEhP*f?;H2tGWc z%vM)-(;4cy%2aE|;lYU}5wHQ3GU`lzq{+|0qZ53zG)}OX#0So7fBtT7(C9G+c1qE`_AwrjOnEV6|?d{5hPY%+*iol6ziCIB`{TN73>ymM`yw5gR@m(B?#7pEm)_lodPaLZ1RE`~<;VBkpp z>ijz9vi3NlBu2 ztHmkyl3AdWrqtqm>A0JHR8lRFByPZsO2AG5v!nELlPP6qH$NnC#T2m!ixXfU^*&+S z-VwWStiY%vr5hnE8vcn!nI8%g#qf!P2?^^F2Av#rC$7xq5b3qC8^g_!3_%=oXaA37 z5c)UJYmPkV2hg^C<7gG02hQ5xkJ3GRzmB5QJ!WxEHkjdFd&;+!9Jq4%aF?|%9|=Ia zorgLD05Ly2F{Nqn_XCmF4b!9WBhnDD6wc=_ZuzxiXLGT&XBYg*K&B_CE+X#uNoq@l z-9#zo!1JerJdja&-cZR;D@+XpD4bJ#Uic7CTPfB$icDowcP3?<;`~nJQfz|5fStGw z+o?Z+=e`@g5D*Rxs7jhKpRKq75TYh5JYq0%296%Jyjj899;V7G+(9`^@^%X{o?TpT zxams`H*FZaL5^k+$4r?qhBl|f435CvVCUmrCo^TMHK7nWMD1YO48Y^?`|eL8Dk1JL zD~>L?i?noE{y}%k5gbc`3>7XDkH51QKFM{7Jj@6;A?VC{)43rXA{_MIYj zs#p33F8MgU$$0$o{ck##lv#fN+A@v_#@4O*BbOWKEjtZjw$enZFoalphkQI6p3{%) zVJsap#9%8rKAABHexVv6Ct9J>fzhNtfWTQSn`H1ejwc(`W|I$jg89Mvi0)3_wr#o9#F&_~mSveF~Y){E1eOg*w=+%G_6YhZ?*j zqa0ZB5ETnCKTdm!@3C5FeQlh}&e#@aZ*ua$f=#3%RQn;m3FpFY8E;cfJ9IX%v2}-k zQ2qsi5^&UfNHgm19)B2V-4Gu3Wey=xzki2n+t@#S`OTMG-_YX$o9wVxoNEsagPIaL z!T!Rrbueh}$B6=zi@EmFR?(i({=7_a00O_nvmHGkfX}qW!tk+oT5uqig`TIX8B7oJUpsFK9Hv8sa%W;r;Za0O?b9c$h zwv-{x>MoUTYv7Z@vyGhJG+JoC$ynOvirDLsJxcTFlt-4C2q350QNE$~B}eXe(`EPe zazArVZCW!&yoD{>AiAY2I0+Njxu}|RdMa1Ozt7MA^bXB7^pA2|^y$yk>(rYwN{UW_ zKHxi=|H}R#xp)3-==8?&Pot~zVyZ38J&U6uzps5UA|RKy!+f7(;nXyM54P|=S>|&4 zPB2!NG3)az`@mYbt@9OA!$|(->_e0xhoSa9OVE}s*H)LRj4{>biXN@iJG_2+}5jmIlQoetQvMsY34{Q$7TgCYgONj5Lv@+A^>pqMYC5|VD`OuWZ#1h9sXg?tQ0^}&7^LEo$#2LeNQLbLL4)(w#+6*e*CS|hiY;wTL{HJUx9 zvm^3&BGtBo_7GpSctx0;8qk9K|Wa3vtcR>QqhV8Mb0?P+>MW%O-HiC%E8u? z)A=L3wf?JO%}>lLE*uRl%OgKtAL^=EaA46@f2s9A`F&`z99AOewrY-Egb!aP7 z3)77=(t_g4;%tK3ddCScE$owb8aR>>t;;0br&E98+*I@k!o$m;Fg)hw)-Ag@;cTYR zQb)pErEa~UlQ}LWC@;*90N3=WL@zB^orpajcW@HTw3JHvYfJz;C-j^l>E}Yt z4PDmTVDU~*cjghsWRv;P6Q5tsV0nF)#{D|$XcVV_1(_cbwCT4oN+^_Te>JPnJcu~9 zn@Z=A%eFd4f}uM+SN%FM*dERpSDux8ObOJZ`Na|d%wI@uad_6CREX{x$ObRyn7ek^ zF=ToY2-JF5bkLr-jAtKFZCY5{l-X)XT^tg#Ima%pVmiJ@??;-I(&g#j(Z^L~^3m?n zk~4JzTMM)y94Y1fw|Lx$egQ(Nk%I@v+m4h2VhB@;;R7MMi`x2bnEeUQiHk+@!PO0l z8II49X-Z6zAHl!oF)t?RttFhNmTV68NNp4qx{liUNCx`YpFCUeJH%oU!8}-M1PNik zx|`lABZ^=<`HZb>=>nMZ7nT~o2qqM-4%OG%C&Q4-$?}ODa2EF>Q=@M-w#0XRCWpl8 z_MKtGQfh3wtU&|kF8h-Y-Mn$_l$OSbL~MGWf$K7{T8yf zGwtZ7*V{)l=8SFl^g^Y?Y0a6Xuj1!}txwJE$jC=qpCYgklEef}nH;*VAF6dxgA|yo ztPC&^oak|^EAdMC=bS=JWmSu?AyhFDQBWaqR?;H&dMVYSM6aa4nu{1vbR3Ppw*S)E zEe#72KrjifoA&A;Klp=UcBA>0DF5fiHTNaF{AySKc=Qr7IH5^bd2-N96E8Uq3R z>92F?z>^@1pgki~Sjo(5wl-o=q8BAhu8FHQ0()|N;SO`MrTNqin-Ni8hVG@}SuGY& z!4+)b4O%mPbk~a(YEog%0*Bv!vsrc3-m1SYDXNr1(&gh>)S%E3U!Cd>EqrbVC!=lC zc!loVd{w00A%P_cW!hLhU|Vg*ylbrPE~sZl78gVh$X&Zg0wY{20a7q*2X?RL8N)p8@~DL)Pde3PlOUnS?t<#~Z!4d0;-=(r504s+s6x7}pJrCI~F?GF@pX zrEzD9RyCv7&!LCdIY&s6-{f@#Q4YcpnOt%nSM z%A-%~21D%Bs0>)?-d$u2!_`W)n!;S2IIZwmiBu9vm_iy{gYJ65Q6LS>)3F1VSe2y- zj!&{7AEdSROi~7X+IdiAJ1OzvsJ-&gJuF7}l~?xFHxIdFun*;C5=F!Y3ga!0GV0Y6 zJT;7NDm?$+ROmhYM;i~HaTmcS?=g!>EERG_XS*Mq!5`6~>F{`8K&P3f>5UDNM$j`4 z)CVKv>Qk-(MJNSL)jn8RJ!(i|?iXsXbg`g^MO$?;E<)}C2^S0Ijg9WEZL@$ktwAbHQ(NOxMAP1)XQ+RYFG3;(*ok-GP7v$@-cmLIzB}}~!wn;8c z#MZ)b&@HicXMCBmLNO)0K*@19(_aaE{x=tk9d`V-)8AR8sgy|^4xeSg;?-`W_ZD!@ zNwJ{(_TySkH?-sMHDSP7m7axk6$n~fQmVeSEMjlJ6eroye}19={Js9O)PMeu{i(mT z<>iuK{`7Eje^4jfRCu-931|^qxJ*IcN998pbPM{CB92agChlog_zHFR<-U&uI}fHozDfohEY3}Zg$I=0ftVhy6)x(~*m?(D~b z@RIknqh8&dpTaxJ5OrPcQ!49c2)}QetNy>X?p#L zsDq@tK$()Mx!Ua6NG!)j0>*~3;BIZ0KH*(T+$8xl-fa&THca2@m0)Qw&SC4tL4|MJ zKWevg`^k0d4g?O7C!e|S@?xH!BvlXf71|C7iK^KXzPk6*`(}*CE+kl)1Zp@qeR1$o zT#~eCuFKSiK!e&HU!mU72lE1H+keaH{YkjcdPd(-!uKw(^^&6^G7 z7z;*J9Xily-y;6xwg#cuELtq?N=T z_?NDY)fSrxSNEA+%6MBuW>4t*U;G9*d|T`3n=Bet@z=)B;dyUV34Zk_xg3vLqKfngnZhg;rOAX zsCYvs0IlRt?HmkOIq&|}+y*mYAdG&>Qo$6j+3Z;&gjnGn;fO*YEUkIdNWSrFHftp_ z8mapNgN3wpYjO`Njl}!7K3C(nM7-ofaH?KZbUSAc)6E-KS>XGoqaX(0XqO3QHK}gk zd(AK|E4L^Mv%=kBbr-WUq`z{;DVQ>@(z z=Q@Iwx#V3lRFG?dvpR5OoLyKDyNp3`vUvXuBAzrsMtIn^1yp*uuya&SQdqjlb%}@t zp?0E+Bdl-i;qOv!L+|+0=6(Jdi_c+W3xxU#bW@%!-RlVqqDXG~aR26rDvs$`gso+5 zV?DD3#>f}q#wgMxcp(ujwl;|xCwT0BOD?4Ki-^X+vB=u!fm?qSeJ>&HSBO6G;OWok zX(x+gilikDU)|aID{ZIej~~C<`qLe~`sdp>;xUh(k*(JAmqat#t}H?&o>LL0l`spI z$@nkt_2FvTOIYJGe^jS>c)`=2y{6y2U4#E@iWm*QW)Ngfn0OsZ2v)J9WB$%Mx%jo` zga%WZr)zWqkx{l28KYIJLdj+_qdfh75RY7y^OFuov&Xsyc&u7_jmo|naa_Day)<}a z*NWHQ!jItZoa=aSKi0>6ik>g%w_)Z28ugF-*gk_u+~{;?i~3hCMa+Nm+h-@wXs&%G zZ)SWV4^X87;vjrP-NV%HCqp}-rzc)Y)7sfl1Oq8^jivc|EU%6hO7N17xP`xy2XFlg z{oz!!ep@hF^P?`!R(SDtICB5Rv}}|ICsA9+^tG9d7^oHRlcbpX>Y8&+tdT+yw9n3; zA1`Mc4}3S@5e`fa+>G36Q-VEZi5tleM`;gm79>lWwZMI4nI>jap@LJ@yd7RzLzm9x zgksP;a-*9x22spY(CJ1jpkC^xA5r;m8T}Lybc@v;BBht0Q*gav`ax>1_>>W$gyGG` z6Ups+rm9Msq3Bn;Gxv0ZwmAGxNE!BiFzHO4d@lwSHrD8uPRdpsJ$r{tL*@OU$+ZY6 zsohuj2*OBj<%w2kj9`qeBPCjo7en{9DvHg%@)(4C8i-R65=$5>xd zwh0Qhl1H1Z?wBj5B~5d!4|Pv_%!TxkU96}auST(u*7T|DTxyP3S;^h>sVpH;wBjtL zLxpHFb^&-!of$k2pat{c)YnY7P9C;-+LqaLRVsF;f%pE48KrRwu1GTDIgmH8E6;YT zt`ZyQvtUgja$RL4St_pxv?WboA&~9s=6G?6N~u_W`20*q^YQ4hgy$8H1((4{h7LTD zibZpHy)f)fq%CTQV`?s}K#JN?Rs%&6hUqg%;V9s3ryN;}mKR|=RbI2!gQp6VUPG6x zy0S;tP$*ZT20eZZJ*FTOi@<5FOV8~VZIuAoW2Xl%5eKMa{C=S(pRThgy1oDCcwd*V za7zx(tq5HeHE-X|y;=hZ+|#;}H@S2Dws^Jo0Zm0n?ccuF{d&*Fbg~6q+~sIa!iHaw ztznbflZ7Vs{8rBK;pcz+gtVGx0R-#Qn|y!krbvQ$op-sb z8FN!{f1`voQ`(GeRXw7sR>z?pX^Gt$>j;D;-ZF)#&3Q{s=v_85?*jx^q8<6u(JvlV z>(aK{a7|{a@Bogiw7^h4I=&~`aCsC;LYSGAG2NUOeJ*{j8cl?p|8nhFUimfND1qHx zZR~3$r)9ES$ra8%b0-RDYExWXybbIIbY-AwFda+(uoA^X;NzTJlXg1xV}}dYWJnc1 zT>Tn?WYTux>UWfOGMG!f@f$0SH5b734xV%G0Mlr3`)-$H-e}g)RE2;sV{|GLKpDrq zaaOEK!H5^D0<-*~xTzk;CQ*7>=hm;T5s2ATXJC+`_;9V8HdN%{>hCaf)-Knaw@pdi zDScnyu$Nt!h`#i3d4$Oc45~Y-;wmEBc8gs%&NRXRmqTwf<*j+|&?V&@-@|oqMo-J5 zMMHmAdmaCBLZbd|xw+&8BRs!y^3*Vk%NCQgECEGmn2Z)c8Cyc}H~eT50MP!yG!;^y zcZAj|EX$xU$5Qf44012zjATDcm@<42bF+IGt*Z0Rmeq2a87+^h{e>NFFK1u6@eQbj z7oiK%tFI?>*n5w6Ua!(U;77srvlf=97~8P&pwu_Bo8`FSIWzq2z(zOpTHZ``HtVa5 z`~_klU}>Zu4>J!Ojm5*t0kpsuYp{h(=))B;P0p2ec7&rMONwKi zUIabc|A&){->FFnZ>KDM$aT4z28)jko!ixm=5+>I7D#FPNaTx-1AXO@(7O8jHK3`w zzM!B~4_nnk&bEDDw#?`<2fHeGCxCQV7y26R`XICdZG)8l*B!ccsdcH`GA91bZ`KUC z|4pw6b{$;cF%suK-RMf|-r5m|%r5rsR)S%SdLbGgSOsS^V1yNvn&UWNj$3aACns86?QkbQ-dYoz@NSY`hNhuwhIRmn&$ue8!2&qzolT98g%9xbgAURLZE(G!rhdcfNE=w5E}1;$Zd zlZ|b-0@nQw#sMqmHy|bpJsj$MNdE?Yp z7KgS$)5B@gvpl`!2S_`=$I6JJn~+vlpI{+^1y41Q2W;f88a-oGopreDIM|H_n9x&c zz*hZOH6=ZWvYN!a5x3MK@pdDSp2O?Vortgkg>2|>CMw%rl63wnL#Qs~Ct$svKK5B=iy|~)j{quEhYxjEnCspzA6zhfi zi8NBQr1tWzX0qo&qt^pIv$1MVZk1W{faW+gBUiiaNQ9TYX#-|~Cnx`vn{OBJ!Nc!i z+zslRF>|CGM1O#?r*o06c|WEWlismIK1F;546cVek}ZN9;e95GnHreQVxMwpa;xI9 zQsq&ePwq=f$Wp|qxHz2|5E!~`8Jf@+Q?Y`NFchM)8oR7Az$Uaxkw4NCII&3qS33d| z0mdM#n_ttgM%$cm>n`gdY+U-^M)}xFuWPHK>jW(|eg{-kUSrB2sCB9s%oUX3#*1v| z^Pwwh?|lEU18d#PFSs2nTMEi8&Ql3jyKZ5*j$#v|abYv+Lr7wA;#FlGyf)P!bpQG#b zvREF4Rq5kS8XULTZn-I?o?nlkQ#^7B;b9y%9p9Jc41&@(bJVNxyLYe=eq2>#^| zI)NqmXkrWaOs1$eYv5ovJ?xcWy4r>uPs>!iwY~SzU$=K}ZQs33vfa=xqZ=DN?TTe+ zWaZHHBkwYD3+dg;F!t?NdpT7?CC>p2hi<*tO~+wjZ-H|PE+=1MPD0}HA$Hvbea)x$ z;0S&cmyeq1c_tOej@}*m?cWt($zckxg8e_9SgW~fj%`N@8}S=Q2iY;kA({dKdRtCO zdc`cHts_*+l^s#r*#+dQlt@M?X|QFkAmgxlSUg#--5h0qzY^&uIcR^A32+71)S=9F z9rrLV)?W`dODd%T4B}^q49N$rzAA_sMX0&(<@oVq5;;6YNnUoBouB>jp&bB>Z1$;F zikR1YdS#bZPI?&`kBakt50)GO ztwK=^(s8N&i&}{MEhUmpsUcc(Pmrfh4H6x^H$KphPyipj*1i>ZG|;m-Z=p*`=+Qe* zpNJg1x|cq3NzfCJQD{E3{gW{+N`LzAv_EW?w52TTr72}eV-Lg4Qu=`zeq%L{vV~E{ zO0%6&JmI7014qNzkB69F*+Rx+67bFKyJDCk9uGyhuTYG&CiA{|z~2piB56{6bUInz zWd>F=svxR02)W_z`rZ4vOUvD%mi2@NaOUPxncS_}$E58Tv4-E2a8X<`Vjhm*9a?TN zK-Ox?xC9|Oatrd@U|H{VFK>+g+-ST_?$R3#QbgLkPyj~>;(7U6qgfN|H#v&;hAW%{ zH(F}Z_-mW;H=1spzeN(vEg*A7-Dtd<+PpU!Jf;2BcJFHqdz}#U^#|Uw-aV7}#*JQo z4X^%<#=oA!^hQJ8)J=J#!QaTGcm0`m-y0@QlC>U^8x6SGd2;<>K&We_mfT>(N5QUJ z$p1z!zfK7JMiZF2+jJ;TTT$NK!MSWC94(1(8A3D}whmI4u=U@(Pd3dd>hggu#n~W+ zdB~nYd2b84d{B376+@3o<;!L$9RcM6O`(NlE_*43TX4tgh4j}R1ABZ8&;O>Ot{*|( zG=jXhE3w~A53lEzn@{uVaOb9(l5b{S+~ruXCIdze_f3Om)7I4GE+0H=IucVN%fga_ zHqB7)To;d1>~4^c>jM?0NpD)hObOx7E+~z-gxt*E)5QWd4OTAw%EBeYgYudF#mgbM zJ}$6nu1(Q_P0vr&D|dgIVSD+qK>e#y3pUNki&?={HCUHVCWSvcNw2L1xCCKGbprBC zB}cYeA545%Nk7iX<>d~~CstW@e7Yc^SOLp+-2oRh&(A1i;^~KT(sD7AkH7r}Dlu=p z!W3}RJ?iVq-`QDdlUTliw<+{`c{L8bobU_9rFCm*d2fcRQkjg%x>ZO$v4&=ojMXqx z6?mLsHL`75RM3+{tm#@;l3aZS&}{$npCSnp*HM|TQJm3bXnO3UA=gz&F6;b3BdO08 zOh|9^2en*)HLj(~xEav}V_G!ZQY_O-Q}Is3B)G8MoYs*M%<*Ec<2lwX;6T?)ufZhi zRUt1oIC*${tY#dFfb`*&Tid(0Z7h$xyv~TkRV>ov@fsVD6CmSCE|-+e`}-%4_wfYo zAOECmnx*~ipU1)2AXInV4PA)SPs(Jf@FoGqel7fO6dG@lk3#*l1znVlKo*-EX&-bH z0TkqxM2)KWNO?(nQW2#pyI~2{of7k*6_*8-^3<)tl#=`up7V;W^ty1gR+*SwI3W>t zco%opA`ql=)Bj)ngwK}32XpdgT! zNgv;Q^SK0{j-ZmS-x(q>*pkAU8*#*#K2V;CZaLvbOWgfLDiztGe1vJ&hx)SbXbdy= zMn5ABla3W3=8Xp@;&G432_0UqAJ|_NN|6b$;-!u6iT@h`X7nW^KoPBVC;?#vh?s(o zFdxms@j#e7v*TYg@>N5IM|?D2H;82>tr>$T+qez|EdUMfmio@!hOF$bYm9yQUNV|i z3%bULcK5Nx4;3f2;88^dT9~Z%&j}+|mE9+*OUEBO$X=7{rcA=!6sPkjBy{5xcdB%S z7TH$x`-s0!7Z*eNQVtMJkCU}wlhR?NSPo$B$FuRmzT!FjnxYWo48urLWq?k zm!9*Z!9pO>G}GBekSm^odW!fcQy7m|SAc|dgf*d4N0Ig)4wqhV+Ryas-&E$%u%3zD@j+4uD3ftJP_ z7){dm3QRA+sBVetux7Lw9Rq46 zQ8_HmpY>dRwgXn`291nhXg?>~Tz+^xqcj2wPfUBH0)H(mOk!QpsVDK zGU_!22KN`oPs}+rskF7GGp^0M(Cku=&uGIlKNE`^9Ad7f5K5juIkOVeN6$}ah<=vz zK^2cYFOZf}3!z;z**;Vrz{#1Cn^EW^KVLK$Z}X^rUz{8+UhNKaSClxUgiF_R4OVf2^)(a9bS&CsUc;cgcK7oHf~Ko=dtvt(4qOVZLsv$UoM1q}}ttZiKwQneqxUQ5Q!5p_yg z&(mZ8_ph#_#5@H?sl0@W=6hqpj`G$5-+QQ$@I}u?%8)|YU?H7RAQAHLyWf7c_3&3X z-UctZSHG3fT{bSXufl}>m33R{)?2`;lkmnYQV=750W?B2YS);WbwAJu;6yh;C~p#^ zBpvS9ay#rwcbGsb9QLnBxA0t6_Q@f|U}_*IMJQd)jKy*$RTpwd)+N4Vw^}Di%lrP~ z^RNDS|BqjP_tkG#Ys+GgsG~O!0`A8JIw#K4OfYIA$<5RTP>)h`b+@lT-%+(KSLvP{!-*I*%wKgV zz=je-uXI28tF9V|mF=>!cj#KIZfXi20-*Fk{@fEdGw3Tvf94%X?3<+W{d~;*U}Jc+ zj(L{o+%S{dmc0tFeKp;RcwC#m;h=R~Mzys<3hv}io+zo8RAhSp@&NK_KTEZ<`q{O+ zFi~k;BCcX^jj0(zC=J?pPC0g~;BjR~n_dNa2IeU7sbk+5He?Z55192FJZQPPF;E%P z|Al#?Uz(HaVh$T6DrSH12-ML2h(roq>n?jF=*+HS)o@}pc(%zcj&4BSe@^C$`N%Yy zTQmMAJE0`tnUd*s;R|P(5uWeyx{72T9b2zdUDte65Xn{NzyDRM*KGx{Km*KVD*+s+ zk5hbprY(+`nJisgT>03&B@z44cQ{z{^r9gkj4sakZ~h?Tw&~g)TskoEzu#W)Yh<-8 z8af$2Rj%;=lRY7@cb-xBvx7>$Bd1oDWBkqTPtGX#*=4LmswOQbRh@c0%&Wd_VVH!C z0gs0Jcw&G@W^3fXbrzKTJ-Jjgx;~-8>-2<9_!wT?>`?^-#c-t^T@PmepF?0(gv4e& zoXb5Q3PGx3K6YddhQGmOQu6a?1q04)IZL-8WrPw zD{-udVi^a(eLE@J_WlbIYQvJ3@qM@Sot7M8wkeYOy6_DJvFTJoF*FwQI)$Ny)c1QM z<&A0~NOU!H$BhHvn3p4J>-;$uGxXWL$>Lx{erY5_$XV(wV%p%=dkn84swWDlb-0lx z8rvg|Ybr%}@p2I++@wP$3hHfY)H5(@*r+e|@`>$dr<8P_{&n}k@&O%<(Ok}U9BKXR z!@4rZ+$No~EgIzsPqwlHm_)qSeu5dL6jH+N< zH)W#8Qe#sm3Ru~{fiCtF{lx+eoR*h501YH`7-(wg%6>kilOjC;aJCoC3~<%=fLkH* zQPYUIC&%1<$tm*BR7wA*;$27JZO4O9uzgG%adH(-Wp?Np+cb8^zEY_RqR zWag(oijYTrJcUQxA=^TTm%kemB$pY#$pXxdf)14v(~Z>S<cyh$$3yht%w@svG7eAq_TP<-=R_BO!~-?EAzP_z0n3EOdit}fpbGdUcefGQCs=u6T!yp1&?POrukfbsl?v zwF@dGHFbFGdCZVq_Q4_37(4-HfxrtE)~ombDJN}H%&pB+9F^_yg(;1xAiKjjvH`J; za6GgVPRJWH%(EO1jvN(}t;AgRj%0ehA_9&zb?%Lrqhp0et924}NCMxh-L=P@EN4Ac z^-dSLDey$1HIRMh?On?F-5NP~JT^n;kYLLme@tCNi^<57Hi;MNZ0FhYM}#3=h&!V5 z)R;2%(u))WadA@{sL(p1I~~RFloQRpB{wT*MtmjZpc_9pnX#K8-@W!~yLVreR#&kq z^X|21m3j?NLZt+lUJ~;=IUFX**l^#A?}=Ifu3(AHFKf<19Hm^ev^?1Q>fkF)4dF>0 zH=S|wSW0%0UF#AL3L{!NO8myRNfax(jCOus=+c-`Mhab=Hd$&(v#`|l7`B!P^ulb4 zf-ct>PqBcLLv{Sf0t)wxO0A8HrPxc!1-=CnHfYDL?YQ#ibsqTNp3PsNZVP>0%!kC~ z^W%y_nu3!^>3^t43Je@&JSfGp^I1ex%$Ts_!l3CW_M)|~+}=e;3b7=f{K5}pE%b6x zONlI!7?k-nX%wQ8qNPd>;y`SorXP+G-q=UTq1v&?vrY-8Km@&6Wlo<+(VVz6;6st! z>olrsV1cCf=qCqsYt`d#F~-Ld^C7>3A=$$g689o zz-jL4v(uLfjdrNe9y;Fkg1Bq$JRQsV?|+qpdDXX(D{97eMo%~ims@GKy#F9u>K|dN z$>F|ykTQ)^m9TPY7DmJAF^xAIK}d81GFctPboe(Jsee~{hepH~$fDv7M*TIdGPdm1YkJO%+D!Qr zD4nYr@50s#7m&D8;AMcak`+I2i_olYV>f4foWC&Jk0=5xO>f#YGomBfmy+;;Rgj1;&s;`^iYJZm{dii8~~WGiQkwPhtFQh$Wwukl9_UK&eUwjD+q_O z>Gy$5H+j>Mb+m{^LUyO8FJX(%Ub=bWXHk5?OQVcRtsxSWVOj1E@ZoYz)7hPCQQihG;(W#@=W zgXYJRg|#)yUh?*R(}L+T6e46Sj)&AzVuu~v;waI!)DPWNrAFhG4VFzQt-^XpQ0_gT z@>P&IS5_n-hHg>*DRg+hP2@PgS)fA{9FSC5EVIJCakg$amM=)vrov4&c_Xk{KUEs;JrOq|Vcvb3U7e&WLV76(?Y z*xS8@FDr*jbeer@jz@aZo+3%SwW8v|{BHDS8daW7$-x_XDDuFY605ZQJj1m-XAps9 zv7e*F-Q+`8SqVd2=%7t#nDA4y{q$O%E=BWXDrsTX^IX2)##}8nOpOCK)4-Y&6m&tL zV+3>#6FxKuKaq}%+1hO<){#e>V2C<^&H^GJ5?_w(oA$k@nYO>3Oj8>2eHfamCp`9><_{-N)ihe6bLnhk2aq;oL6VW9Gck(HUy0*RQy)b>{6`BJwK^!~Z2` zXZLsJQQZ8DQglqg;A(;Q%eF|fc<2kOoFG7S#$8@^EI>UCNB2~?w-F9*>bu>U$}@q? zSQvUC6QS&{mW@7=z7#`QY}Sc=I=jftbeSxF2;TJA^{CERIkSd6%bd@Yo` zdKDNXLj*K-kDy)LnApA4|7G|f`4znLREhP|q`fXxPlMhtc`p>tF7qN)I$L3AajCcs z&lbnekLI+;cSk=?AyX+{OWAwI#VHk>s$Hthy6+V1RVpSa^!VE3uJh=NxOvN26>hb- zr89E^9ddB&EB14WK9a&h14fjvi)-{QO@4jmxtyfe&dI1$go}p2a%3J`H|kGZt5Il< znKhrMfHYm%azjU^l6-)jOlgk_LE-=^V_6BMR@mTqP?^JIUl)F)uZ-IUVkL1ab21VR z#G=pQ_wrLpHD(ni2}iCA*2OcTN^YLZHAK1Qu!a{YsOj8Ir~P;GEy}?u`_mvr%hZ#kf|ZcEb9~g z<9YU~XN}5jph4mPpEQP0@r~1fIzNU7mMZhM!F2;Vr8-tNIspn+CLH#4O*R#)gz9@W|<{gq<>go?DB5b$?n|Ic!{=| zos0QSVmrix4p`uilhULclCc_uf_zFPsf5Bl;VzeP6()lU(bw0*6Rst>Dp#q zk7&*G#}_ycij#GUo5ocYFYvA@9<`?HQjXEKn|#NMLY|UBPDAf`lZ(IeLgZQ5!KEbX z#QP>%&N0&(E$)(IBtzNSG=hHE958`}SC&J%7pb9DR#=nFFf(iVHkIX$Y(plhloeab zWM+(}4W3LnQ(f|JWQy_QNo@ew{Fmn^I*CUriy4&XU`6cwDIq_&Dz1zrD++PmdyEpN zxK!6XM?Aou&BD{c)hBJp4k3=GI%Xn=!8JcDOPoZ}vK3Zsn_f+h+5Mugt$tAyFUj-x z{6s2v?$o~|%iYT27uXe6C*EO#y|wZ{I@wv+`UiYr8-DXO*pF@Ftk9-SAJOgu`)%p} zkU4g@M1}vbwv_DPbWvBwM67&e@o+!`;N_!-HP3Uuq2J#fkU9D1I>SX(fxaY!e z!s}*8z}F;jwt2yD(_j=Cqlo6KY*L#!(#%5>~$p3Iu#-W93GQrq-IKsx|IyFU-^13Pvm(NpvE+ZTJRZ^^S^G=aDre z$Ys9YvlJH4^!Wno4prfZf-n>bUU)>39`{d~LX};)F=ka~W0*{8x_|>(yE9m*Zx46P zUo>#n!`6$x6?Z}v$S&6M+qQNw%q^FNBZRA@e6^wLZQZ^16lI4nQ-bJUm!Il}Q2KpK znXksjC@~wJyQQSq(-Y_#n9u%IZD0LenQ0dXPlkW`_V-`vgVe)z2ju9=PAO)ew8@U< zhxF-;*34L`M_Z_qc>I{@(kC;IG#9H$&3M#X)9yL!@spNu;@gg>ahhjofWVe6<}^@R z20fT|{7Ri~11jTYQRzRDOVztNaz@>ia#9Ks+}E3GuRqk`=()nYR?D6XSUJAv1timp z%(7?H1e8_3@@#J7GHBpX2VqIm^aW=V@57hN?~hNhM&lpa{KXkf%D+E( zmQh5>ZCy!=cax0cCI#LMtZ3^O+%k@LtD}OU`z*2Y^xzyr#~Iid7)e*UlsiGIt{>AJ z@qBJXe;dG;4y8&!u*)X#*r)|p{NrEA(XA4iH6WP8qgSyi@u^)mNEUZZ4dHxkIUWR% zcOF7XHur{a(@$lQut?e_l->c+Njm7;NK#;hLp?e>LEK#LFhqxw2hNUb9#7ANcG$Aif=+s7cb*YAZ7Gs!8CJXJ=iiht23gmI^LRn z_}w>u-1+kO?8(LVA3x9^A8&p51>UZmKX8Y_$7+eGYg>yM4V8S_qQy?LP9xIzv?`0_ z(RWd1Uymvv=!nr~>hU2fj z!II*tooa?@{f1fsb!eRT_IA+aqk-b$8ZW$ifAm7;$|<|);L*uGf&ji$T@Tr&%3ON1 zO1g9kYYydQM$VV!DXr#g<nIZ zSWzO*r02VNf1EU2dVhGLA34eCNW-}woj#!b_q8JL- z?E1B#Xe+8WbGS?U( zBm`Cu$Hu0_KIPqzCw;r}zAp}bAaUgA2nVk@ANoCKw5#chftgYp_dQ8x-#kSncn0Qr zjA3n^eskCbewpL`?mT7H;SZoe0Bgz!zZPZzDl2N*)pgch91t7H;%?(SwpQN?XM@m* z9r#pc(ae-gPilfyEQ-_0+K=pqYrGyWUI#wPB5~5dZ02(&4KC217LaEZR$Xr}BYw5& zgl*fMA{GEScz!|m=_mLsPO-?8cFT=ln${aPEQW8t{KncmpSnsFYX2yyMvBL0zxx*| zU*u|P8k6uIN#M2`P5G5AO_p?fU}zupU;&jdJNSjrPZG6}mnXuaSGztBhgtxk$k}x+ z7IJiO{^I0pupZ!r?f5YCgge9V=%IVk%J*2V&(d}qI$9uFr*HeI2Rca__pa_+hH|eU zm6>dHgZMh^07)ty1c=h3VyD_zWxv|o#QwR^p)zHIJ<0Fls+cqcfR12RiqVXqBpFiz>7IN)c1U(3LuvZzPVU1BI z`@|v)Qq`vKHUKAIwH#I^%vY5FskT9{?sEwwO{py(opo>HorZrLUi_7mU{Mm>01w-% z`+*ViPPJp+j13$1o4z|BD%aLAN9wh_X>Qj8oZDK6nT^`w(R*v3%QNHn*n-un9G_p~ z$tR@7f^uj}?$0pfZM719zrdVJ3J?gmxU4re5XRYR_1X&>h@oSf1KucK>6Xqx&8O41 zI2x<_sfsmsP?s&&HD+xE_nz)!x$Id~vAv7qr+>KW_r<6}4z&CReVL-b_7k#r{Rn31 zoUmY&2+M~Rluu{P8?aDq zyjo+MsI)XqJa+0tan&$8ajuz(+{wSqu(tK5UITzSD=kP%dxB!I=#Gyd)&P+Og)V(B zUh$q)_1@kCtiP6{$&7$y1nmON;`xa;_7&j5y=r+f+Xx*;2I*PW@SPF9HoeKM~ z%%oNLTL)IXVhY5tCg5fqw<;vPJ%PH4t$jAchIK>9W_IU`?$HX;vN~~skb9!DiuEM} za|H7YE-^;C$uajZ`-gqYaeIskTHoaPa4=Gb zfDgz6R^-;ZXyeks@Q@`J;N6b5>EqE{W2ya;Q{Ho>%;vRh{9eA+Xlu9+C>SF;4EcM+ zCWr{;>p=-!|2>1gm=CxJE#;vbd!zuMVAqp2Hm@HFISu08A&f$cggse~h}ax*gedYc z$}TY_s4ovw-9k$0@;TQBcgN7evYjE2D7$<1k`H_*i^#r`gNk$T>F<~&siPksnIVUg zmmN3KCzq4xg)TWW&Ce!T5=v_xl!&P9`S0EKUhNLREPIB0YT{6(1ZK&@_hQQ~U=ve> zQM~1?v1^SShNY^Q+5wgX6(~5@UTl>em(yU5N*il$UfA)A3760lE*}yKz-7(jIu>;- z<#^E&yz{LX>-wkJfO!5r!CF^~Mqu5r3{pi2;0{LUyYQ+^5x4Q|!Mx!Gw&lmig`9QO ziy)zKZ+JAuC6a37-eYny>@k7q;Zpk4!>nnTUX!YolDg2CjED*WAkww{UXXuziuccD1h#PSnx zY*7kEGOI|lt@3v(L^LNk`>;$ObZs@LmXYrfhC8u%d5Kwl;797&Zem@vnUr3FWMoeu zN1mO|Uz)?(6DyiJl+gfFrgD2k1ssa@ikQNdeqX1S83F_#f`UiPC4MThsm(MT%FN?Z z(KfBAAz())>HCR9NwMobP&H&$8?>RDFrYEc2`f{@zNT=v`<4(04D3%zGuhtzb9;;D z{l&jAS9UpuS#66)bivZ|<9jvuEAgyMGpbkhwrc)yjho zdyAlJYuOEK<+;k2$RJW(n@+ifKOwEv&qE6Et`wl^v0>BP8v%=jsV;_Htmrv z^0jMuD{>{S7w(!O7iWOoj*`k?auTKCYNg4pP^61ayDrs%YyBAIvf<~c&qxzE_aXjZ z_+)VJ1`=Os{3P_`WLWlal6@(dV*5|RT6fVV<~kkN&emLj_IzwFF-tBF`N@yK(dm}` zy6NrRp%jMzzwaQC#=A61kqd7){N1_@apnqVD9r;7cEU|t z#du;E?Ua2|gNOy#jF5)zaep8^OB~_l6ZzxFl4`QJrIP31-V69W{#(2fIEQQhD+*76 zk+U;p2L3DAX3^B7f(SrpG{+Z13iy?|T_~gc zEuKyuGF;Ro-AZjmIo7F*bq-q~@R6_oyzjB*j4e;*V${*f;;D^GYnpCT$w$V8Fq5>h zmOzL5LU5XuN_PDh>q+B4Wp}nbnIGYVz%Q^e-Ty0x1=s8qM+ z5s3C?L-Lmz*N(KLnEU1uF^t1db5cQFrdEc#p!|T(LOYwzQP;GbzjFkYXbSkZC;krU zg(4#a=+-4NfF;uyC)+o*E(HgtuzvJMe3c7plVNwMvm|%S)hB}&=v7KVkH>IaS*YJC-9;n=uTj@RC(9{cG>(8t5>p&vXSiAVDDuWBnEL4qCUp% zRbw@_f7O#_<5pBn!(a6V z0#dZj_W$AJ;&*3gQPc5B!nocrm7jj{J*t?^GCbc#P8y z1fI;5=>3{lvLNF5kPvFDeoZEF6Odcc;l3)Rw_1FZ-+Xn;rlyx)W@~9JDr7Fbx*Kx2 z)Gg~(yIgu)B)CF>1KAT@tuV4bHA96ue=!d)U}^!3`Ymk`DGERcuOs5B0yMo*eRiyH zD=+HuL5XTRSvg3WX*fuy_a$@GJd~HhwM3kX2sIHoo)YkSR-Ig;E!DeabfhH~g{q}7 zlBR7{k);)n$2Hx@l9pNcj|72CH@fL$dmvF3(IzeR=D0TBvYuYu8!k>4`xuqSEh6!j zn=w1l`S-0mTW?wDuKyxD7c}=gK730uB_6TAhlGHG`UYO^asemxSq595Xl`;v%G&4& zlRxFh>>cO&#oGQ$903R+K;Br`m>Pt68`Ewz{W@g|NGF+zA=i?tM~GOGjQK(WpiW2h zlL^bkb{D0IW(>-H*V^tb^}(i*MhZ?vMz$}PH(%|J@g$ctdTv4%C9DhAERJP07`Z5! zy6ooBoK72wAyVYS#|@IWbIyLG&%lNG9r!w*=;%t(^XDf7vzzBa=@&hvj#k3Pv0X&a z=J8nE;8CC&R~M`Bzr~5=d;s$F-sQg+^CkTtJK7;0;$Y|e(U>$pR#vTP`5>X1HAZ=3 z(sUaI*%bK&jeHXo!0M8?4uMMsOfMU6uZDQDlK?Mf`}>3?&z|gaJL`TNz`m8Z^qH1D z9D0UDh0h;_um;W{ppX#%H^RLY`-;*UOc_tCVv0Vpa1Hs z&%gePI&`z|#o>mpj*%Dfg<0fzzVF%^Segfzf>gJ@`RC6^kIQJk<7Al@yTWd|;hI?0 zDnw>mx5T=we5OgKlILpoc#Z{)tqmc10*^RRF-nq5Wu;qAEBPYz^n@}t9i>k4&elJD z`OTMt@E?Eo&BL94u9KY|k~n$x{F&VE-WsDA(-LIW|IlC~>f3%o-k&`oH|5pcT;ETSsK{{iL$6+f~mumeOO_AJ)3ncpDAZt%mGrXlLjR=<_neo__GnDqhu`S3@M9 za!SF=BlAhs1=ktU^SMpW`OB{Rtq09-8#lkRnrY*%IVECp_JeX&j6&Nqf0}$?) zq?~pUYz0DVe__@NvpuoNUd3bq?@h3pH@mITTs&|)>cOnOcC-_t6@S4^XoOYf%|@`< zX?RH11%Em}`6=C+sojp@v-U(r6)r>1EW`14>}h)5E2@9e-_Ee)y+$ z9!{0nm6w5_)nt#>03@8MJ7zu?jRN^j64a6V!wxmuY&gn`0W47VJ47;$8*Db0u07Gn zz__kN^(@QpMd+Po0$;}$tbBYF%6!HmR0mtSK}r!0T<2m;r*s=LOx`%wxS*e`VXn$|}TzeK0B zItNzNpfNvFj^<*cWzjqddBe3O&1RksKrF^AMp9RXmnG8qI}*%-Y_5O=^CR@_<0rEv ziG`L=Fk{fRrP0}5}6W2w{~cyc=-KC5=uK zL70X_b(c@Kg`Qy=`)8tqtv`JI+pSxkccUu|bwsx#JAxrGz_d- z$tsBC-)u%tnjH;p5=Okz1yJVLC2gvmn9R7rGC6x59Vc1o7rh#6Ps2A5)=U{SyJ7?_ z=MK7rA$uJpl%if`Yu{>oR#bMANvwLsmM5*z8ch^ZjWKM(D2p-+&R)X1sD@ioB%7T< zrCJqB>ieilY2RYGTF*&;&FUJucj{O2ba}h@V>jJ&VwL@OKi}y~b$t>wf0a;JEbYHG zviP?<@Y;C9>PBlf4B@RG05v|y`q5b4|N8qsa@XTa6zlqasz9uOazdfaS>$95RwsEb z@Dcf!LdX1e`JntW$3M}W41Y*R(u?W*WiQZ?Gjn!i;k)Jg8ngR~YU;gOm;{F14F^(} z>dpCSby=;+SwO!MlFh2l%tEEYx2_>X;G^bZ-(-20iiUXEdN`D{Q0@+rLX@Q^Z|U#b zeBl)v_V#R99aR#H%HE`k6<56HxJqcPP$!#X1N~$7);)HANYEaf20mC3ie#EMu^M?1 z`mMkNYER z#mf%D{+Rqq7(7hsXwnW@E>CV;JwH`~S}aD!iY4)0vDWTH!YrDv46(Rm4BPB$z=x7$ zyN^1S+s?U=PDo}_E4SK%x#<@sXCEDNnwabs5jUR(jT#cut}{xsEgW(#FtNI1>0Bvz zu`Hllf44=Z&fnd}Mj_)zC^X&s$Rm(|Q=UpkxW1y5H;npi=_D0aD^Gk~r8>TGwS=dNvvB+PD4Vt)qGkMr zuW{W7GH85GN)R`XRdOIg9^oL&brgrZ@_3E5QY$e`S_ehQJCz_QSti(eb%sMx<)t;R zuDSh?uHw!*b!kHpj&iLmS$3%n)iP5w5t{fN?w95#_hs?v(*~Aeh)Jy%g@A1(uEk z^*yKY|FZWc%yC}XndbLXvTeaBRS_})mfdcuv{b9TMc6KfWqW$2!h-+-kc5SWMgpQ} zC+4@$^PcVAZ%Gi;Qtp}Ph$=CWdDnB#J^MK~bmAPn1RBc6R54KR{O5 zIm`}0#b7X=KO$fqI)^_@Zs`tGlxeoAWGHzAb>e1FFF|f}W$QQSCpRtMzIh}%fyrvb!j-CwH)cV0Zjo^I00Bhdt0xB zak+|&>kLxpDY8k0>2Ym`PV;1GJ%m`TSb}cjE5&8*wG4RUZ$}b`6?yY@Cmt7F9|_Hf z3uX(#AzRKlH>#B;Q-NQ4Q`M6pEsZWrcluVQgMBA^6^H@6()`I86tMveO-4$wJ&v{5 zShou|Zu^dIs4IsKjz#d+zle|&|KkyxRgj2jpw@$}yO8Z^`G-Lq0d`shGuK`M&M3eH z<9aeri6p*&pLD6YxQbU_ML<7VP905%EreG5%&U11;3kwa4mcOJC6H`#aAV9A652^P z3YAefZ#!FF{N+%WvSskO7W^|qBA1GWCVdm6;jVG7dC{=dMfmyAWBEXCT&a5yMIL_| z;l1>ohNF%R|L4OzX;m6GvKR@W7*Rw~MMJ2p9}WVS8talg9s;>(JL?3;LlF3ZL+3n; zLmS_4njDfk@>;MHeceK2PEzEMfZx+bp{h%MT`T;%* zyR;LW)JuBzI4m!HIW`Ex%b`rR8r0v`ttB_oZ|X5dCowYX09qZd!t7VlXKu(Cg{?fO zAgw#<5O_bMi!2*hEn2}Y&R(i;9rh**zUE&6r0HDE>$0tVU9YHQZgozZIn7mO?c(PA z)$8$)QJDuF{cxE78WXy=`N~rj&Thm`gV@CehvJ=PeG2hiEJ==WrP+i7VU+~;B{f_z z+!N5-649Ys=15JsN9Bd&M5sO?e7B%qd_cZ}Fn1vGkB?BBHfN?CT|2Y-e+HavQuN;_SB zBhUoNaLmyXvq|Z_z992rhv);2<*Cj_FetJRyO^ZmpHZ|+O9{aviQa^$wVTnQY1I3T zxef{7*>DTauMphv{-RIM?^Rr(AAPUlwA->`zuML^6aOHIv6HbhVgmD_Sqm+Ne1 zPOm7{wHBo^myw7A^^(l^JE?azvJ%Su9w&4HQ7%9+eT-1-ydItt@5 z5fKl|It9x}PWoifX;W`y+SD9N)~q(!wK*UTE^F=9*X$b7B)7X&MEDp79;`)t=Q z6I!5gbe7y-{qyH7JK_844=Fw|S}5enMmDBppxi%`Kq@SLz?LtUW-XmDo@0Pls-Y#= zUX+bv^Kf^1_qV~tjh%U?vQMfsZkP|Nx{p#oxT>Ze48GVPQDNdpz#1_tQvv6Y6xKRb zV2hQnBk<{TwU1!wK__O(-hUYlp`bbDn{xmxVQ^Zm$*g;UuqFv!Ze?1ESedU^ho2f- zuD_r0x6N{NgwRGb{m}`rzN)Z$baq9u1`*jI`f!&zVNh{TWe*M})CpAtYeCagh3o7z z*SwuPSPG@HGF{wZTS4zP{<6Qh5FCnHsz?YvJ&Lx<3`Dr~mK1Bsou2^Fu<4d{Uja_% z-|H3^UgA_INrHmU03A%OLq= zDNQqQwGBNp(mgqgg>=ZRN96Wugxy4rWFb};qGnGMzh}(Q7`2V<(wyFzHpZGwQfQ$MW0Y^cj6Wz5WmDv=X`gHa8ig(z5=`Y|AI%QGP z?8U!;gAB0As&}Kl_&2MZT9&_yqFO|BP*ZErQXE3&P}84>lG8>g!&KZ({Wgkn=4kZY z0o143OSbqgpEI;4j*Nzp*wvtwvPK?nmwYt@W%p$8woxmnXuK#6bq3~iW8u* z%RWFyOKjC4Zm!>Ms0Go^L&$2nrkxC#1x!vcL!n4g(suZ!XcNnIgeFO)$Hbue+9RtK zYT!bVtT$Kt_P%%V5}gA&Ad((ZU;6|6OoE=<&xqcZ1)nWMN~{Wp&e(W!Rb_^%?)8XB zf{)Pkv3QsSYStbm-el&zF|KM<(i||L!F!zDJH3!!tK#Qs9O5U0pw~P+UL!@XYE!ZtyInhcV&Zx

Cj6f#^o)7xYK{l6IhJlvV}! zN-EO(_N_ca<&=229UqC1Yx(Z`lW;_xztXUOaSjKqGpN8E%zr%SoHdaW9SLxiXvF2> zA6;E2PI)nG)C$bhI{A(rS!gF>WFv5W4Tq5xT!#sJ4sPRc7-% zf}%GG;HabTag_Z_#P_van=Fd?29~ouiwMZO(WavI#oepdb>u(GdR(TCH5l)nw$1wllfaXg-tO=I|yDR zh<2qdfWW#Tu9VI?K&C8H(A8f4vhMrH>dKUq$ixQnVGes$Z)$1E^{Op{sYZe)aiJ7j z(hdV4*uTq>>=P}>Ii+5D?=e^z>9z665!G|iW z7`Jn|F`@R#o>q2-rSSbI0c4wKtQ&_KHhb|L%7uFzfTP4O*-s3Q%d^ z{fl0SrwA@w9h=5H$8C3~4Ehb3#^rGSXk=Muh2b*8Y&s*Gkk8Lvq<;g=FZ@VtbEahm zW@yj*&D@Jz_wwLR2Yn&(U=#zP`INZIe}{~C@RhO(f~*J3(jR5QAUvZ8M(;DSSs)h0 z6Rh))BlWj|D<|pIXpV$ZLt^{W2KmY*Oi8*`hO=+w8)Agojtzk1i74OBema5wp6j9e z`O^K%>95xmgr@HD_eK#l&^)$AyGgIW#>1PL|)QCT^=Jl75}noXJ3vXuuYB3 zb5viN>QLr^?kk!%Cd%|#&>db;BBO9$pVY%>u6GCcfhn73YTX`bqfwa+`iB?o}FP zB9*}4j%jjh)C>y4wX11-!{LJA&grG{PTpDXJzKY+9#(9T%t17ak_ZQ* z!a(zKMOGQ$mUqP;-Lnl1P&#1Q)0evgj_{NhIRv#G^@T#O( zz2pq+A}K%sJy<0K(k^O{#-cTC6{7wuQXf7N@iz;;#WtI=avf9I>wAirIF3BDaZ@e# z47^?8b(JPct<(>iSTyY;A6cCj7qU?uGC;vbFJ%ToBq{ayl0prBDcBcd87CxCwY~%- znE~0ub%>i(i><4xQQ|QkM{b_c^t5l|L1BT!?&a1;t7mL+bAS1F{}i+K6IF0bARd}V z-ZYtqsenO=kn2AE0$)hlo*}X<$8bW_Wx8fyl9osPY)kPqXGV*G_Y+K7PY&*fX$fsH z*D)4~-T$P}s%13`XacYZj^c>67TkmUpVBL6^`#ak7yoyXJBA z8mTgaWxKl(jMf?Bgt?Q{AW?kJ)Q~9F{j7{{jI6KhF@H@R26QgLenK z5mC#i(-uA*fu2l2B>J`uAQE>qcf*~Kf%6#8?G^QM?XS{m^HF?L&Q+kau1KYzb1`A;B-3*=VMG0XXeOnd~VtWkTlL| z`xX(+OK9PcMo35VWGB$PnXKb!sv4uel;<~=!(9i_HeaCbZhJyv5{@#arQGiJ@>)kj zoqBXoH_igdF~dM|N!OI4Iwtim?ZEsWo)w3i^6C}F&3F6ph)5Xu*?3#Jg=v2X^`PED z+~>!gu}$VCOCX)o z3#M)BQ!{I~5oBK&SDK2;QfdpuoZQ{Tj_oRq;W?XOiUA#57uFN4eW_;amPy2(Kk~D9 zT%WeV_x0j(+69q+IwtpvrLsitWIjNCRtiVYQXw2V!|rN&n*|8xjsI+=M^4mBmVwHW zj9a@l4T~6(H351ucKU0}*o!5R#Qo&UZUVl?>lm$pkrm>}3;Uj4NRZt~Nq)w6+c%F) z4+q>tB%R*E$m?kOZ;N(`6ANXyu_f);RNw_&^P%v(h4DD5RD&Om&wIG9$ymeskJgU- z*u*K`VnImZtFv08c2e<0cm*X!tv~1Ko7>ap&rh!LbaU~}j+AVJ*|2Td-ptWsG17=+ z#)TA_O|{*bDL)7y+UXhAhvebcH)v$oOkGyZgQ5Xj3~8YIGCw-I)F@uFq(fc0g6^DQ ztK(Cn_4*bV^4nI<5Q;!q3``!TGRMaG3t?KE{2k?jD|X65kS1HrkhA8c8?>(`W~^X6 zN0x?+YYIa)am;6*n%z6ol0T-_Ee9}3OqQf%rW$O&*s`Cm)&yZ-Q5Kasa!W439u$kt z@T#Mi6!ySRr{V0#46Ak!K0|^$XxZ1btg=bPwZ#3T_9wR9-p&{c8L6v@ew4H)diwBhQL*)gz)#h`nk8hl$D>02*MbA~-j)m4g1d~xtx z5wsOWR*Q(aWpkS@8Zt&TD$yrS$m%vWUz1t+`bOk&{@!3C@wpi`b5S=+l;G|P zN^6Qf9ueWpb7?29*X0iL90 z1a)o56d};^s5BM9u2Ia)L+V?O_0y3f4pT+h)7p%WURkq^DJ#iN7fZ>}(UWfZUM2GA zhzI2Wtnj`?2>qw4)o-ZBf81uMd@`%Iq8k$K@BNoo*ZU`Tr^ow3 zGotZ?fnnjti1q2vA#9p8_WKuRap3-Bh=TR#4!Pdk8g@H^qQFDJ1i3vWaDi4#^Zp=2Elx07 zC_`0R+4dYOn1l}q$u+V*j(ij45hBT#X2mbhZh8NB_4fJ>z$5=+SG`eWUO9iN2{VaE z7CqG7ZLA@(6p{)WeMxy5ITEExB}E&^Ao@{6$nt9B5m$l{2&pIjOfHbC7W#x$B~1Pm zKxJA+(=ga`p-^5IiShj_Rn6>SXIFHOb(BTHkQK{E=8u}g*jwtQG4>YbD#^k1G|=cJ z4u44kU80*jF;noAnNtZW@66q3Ze{+8sMHMWOeoz9)AFO;?m##Wm;u!2_pT{-!M99x$vf-H=xi=GmUV~^ zep2vrx+KGZB*cnmYb|DiID)O+Kq(BU5V1u_?PlbSnW6;f8N zg1X%hEoSjvmc_rYp?T+A<6L}4-Z{IJEhC@-7jYRX7Y;q~%6hzzi)`cC@w`zFuLESQ z0E(=i$>VICgeK{%)V^6K@niIay0uKf0~BAEG-35kFh2GQG25?$kI;$64&} zl5fOb?3?5<{#OZ!t~nt9ZKZj17st(9ekPKSij(Tl%opYc&7dZ!C>hGPcs{LqXe8Yj zj~UTeQ>#$g?AR`S5Z*y5l>$IZ+MzlBYyDr{DXmnawC@C^T*q|C$ujfjuD8a5Bset$ z3&pbhv8`?q!w(pT`qc5(XVKbjL~=|4q`}1Y1WITna>tSJhU8uR*jae(khO;QtFz9D zNCUmZjctJO{$Mf-R>${)ZpdMNZLf%aQ9jWEb2&%bH)H2VmE^4kEeceA=yBOz_{l+| z$+gowti`gu&OA~aG0>S+EqjXtIHsCy7kq3>$$=YzHD*?}2)lC#5iWNlNN;$D$|}#~ z=xsLUJ)W8Y7G9=WM|h;ai=p{Fs8 z2;efYF9B;{a-n2`W*P00h_rosqtfTdWSiCfA+afXXC_T44k6#rI z%2ShCyStuDK}sk(O+k0`$N2;-NJ!dqS%p6|S>t9yjuY}TrX=QDSMqWlsWTg*S<35^ z1J*cLUdDKKX|EWiZd8FJ%_R{-K=|Lh+g@T-W6>QetTm5Wo}?etxMpy%E+ND{O>k_> zdUh5>aC?FO32LNVt8xa_> zdesr4?3i+8(&Lj}SbmKaBt0XtDAu+RMl#Eg>^O^Nik>W_oNT}ToL6&aSE%%4SxwIN zRa#auFyUGo*4z_7-V~SC*@d2RYEXOAu9?u8YrhWD6kW-XJKA8urCn;OiQ1a;EHkXj zN`#!>zQgezi`>>%R*)5UEaAdJqFQ=JZRk3rnbOx9&#!oz!UGU%hx{{LtwrEDwgI+Y^%SF#r$bf+Yk*|Z{JuZm@wUrvCGzX? zFQI2K$eu>VDG%b)g#tzv;Y3XY$|40&6N{fDVvuS@mhv*ALv)P7!X1R99?$167P@ZLeOJAA4Fz=ydASMfUtjEHtcI{k@yr{_g*Q9JlsS?cQ zGwm8Tw`teoiW_aJYy%e$lzABqz!+SVxr2#)tkW{$v*4#+Z z3w$cbw7iYkC3J2O<=$APBQel(M)%$&+)-p8t_-~EeLH$Dq=32EaTH-WN4xh8pCImz zz-R2gFl^|**mW`dKos%VZ6Gi@T=>U}Lk#;MTkCbRiKPL3mc`IyRkSg$>iRx+`h z*+f+_uvk>LD_SzuVB~@@rGWvacWn=)HX1A%GL3V#?bv2!(vXjYeFGy|>uENYGNFnl z2QQc}4OuJ3@L8bz9X6(_H?OIJlN1Hd$gB*l&A6CizaUa7Q`9$La02atfdQmBXC!_@ z+DG_grpotZQhA#+Yv#|WBmWw7{Fh}nhoKrfd1#nb{iMs5MCc4MdZ4-KxP-Bh!p9yWZ{V0NA$s%IF zmQGibfDRAYFs;51CW7VRG*Xa!EIBhxztJJ&G7)wfN`n_U2IvX}Xa+JNm6Hff3_)+Ju7i{te!h$6F+k^~lY1gxuf5hmqU zf=`%rX#cue%krBE83X4&9UyPkVIC


m?3rro|7R^eTR@l?tR~xSeOPa|3+q0H=zOcm zA|K3h>IQ8GoyJBpB~1~*g!EAuOb_2SNMVa&LyFEUw=Ge5z8s%!oR^a-`RV!bt1DvW z$lkKE5&d7smuE-kFV~(Z^atit`vZ}XKX`k5a|MF^0SIeRSOeY<bHvhh7B6RUcWmm~vH1pq+x{L%=8l|QSv49vW3gK68wEmxcF&{8Mh@{BMe=E}IptV{=KFIqFi zOI^7xmAh<3JLlUQA$xT$`N#c|VaKlEA%j_qWL8ADDMb|@o`LN1`0#|nMsdkxD(r)> zkSk>a#+x&k{uCcaJmX(~^IMAamhB5hWVwMw`#^Xwk}&*FKmBw)n5a^!k`7`noLV-6 z?nxWt4PnKbAxh}fzFACTMbZb~4gGJr2r2lLOfc;rAzB#ss+Hp~Lm#qLav%%qy?7^) z#Uu@tyAm2L0(@HZ2$L20p%nWts+Gvl+boPsCIw+R#5(xv64QC#HxTzUhq==zrX`$? z=5I_tsdMtV$zzg#}vBc33Kdj z?$Suz&qmmmw7b0IOa1k}3r39_%;nusv*$Be7bHTRcwr*%E7#AaM-$ z2SYqT*&L&mxYkL-4V0Zh!x2xRQ;%3DoXE^P?1(pkUXLK$wmL1g^W)Zx&A-&L@uJTCv8%BOoMu`Lqd^xLz9Py+0vpsgqLIbDI-X73)v@z`|4V)sATmy1RxC&}R+dEaADh{to!( zSITu-?;5oSlFwj}GC5V&medS)thgOqkn0kEe(0(BA`IWJPk2}dyVEL4m_E2=`kBoz zVxy{h(5{X9JY|Bgjkq#cTgW4P(juDMSCjodNq%K4#xC+4l^kdVF}iuiQXtXJAg7;ZmI=cM~kjoyk9~ds8WZeoJo;!6A_osq3`I5 z4+U%5lu0Whn=03BL0Ki;jZCh+D+sd9pEDO1gv&S44HG5_Yz?|JfvFReCc12)C#p+R zrh`k1+dI@7;cP&+<}GqpWD*m2R326~q4t2ZjWAm#Vjr!1=q&P#a8j}zuj1rMA`Z!x zS{3@9?B-)$ULafUmBylcuF*VFBzxtw)2R?(;(dw~kt22VB}Qt+Wh@xb7HMNQCh$^D z=4lz!O>v7QA7M~KPbdCD#prs%+sJhd5MTM<;<-Q#&qA`m+YZ}`EH3P|kj8eag*17? z1Sgjpa}CLV@cb*CmpXp-z!Go^hO`9q09HSV<2e~Lv2zf~^ZV+u3QwnioA&}3CKF9s z);TvTk$JVG5_20(Ike0oQ_+%nl>afZIi<%}uyoxnN%S7C%LuNLEc^yU5+`)|O)L`B z!#}RwrK>#KHSNr71H^0Ezt{*E?2%(|QSIoW&Se%^mY&8{veAN=U9H4GbdvNsg4^-N9TrmKp+j0O=I zZ?!ZkJ#7#Q}?$q$yv>;~uui){xIj#wHFJ>!-+|s)(JDaZWeCS&&VLQA-<+-?0Xp zuT)&fwd$se(mLTTsJMFKiFT9hI}XorbCZj)KNa8K6L)YosswYhb~G;V{JDW(UOcwy6!5^f%Zqb(L7M0 zvpd3w!4a596)}IjkBZIWThQs^;wKm@o^bt9qruYL8=1l>OY1=S<5xLzt?b(U`n%&J zAZyjtIGZ4_X8srW^t08X81#q%8vToN$natuKoCi63LJo$I>M8!F^9!2A_fzSpOrRv zt3qo|KN8vft{aQ-5;}h2U8fd(wqm{SmtV#(NMWv6xHOvPTKFHt&H_4o;}~1?m_!O915Z6t z-DL8u&Elv-(_=IOSdlDI6&cbDw8UnK@2MV(%UB?QNj|dDn*nPyVThKny%4CRfJiRk zY}>N1J^LeJ7w4Bq+M;&{1GPJLt(9_hTBin?_;MiI?tcpyz(Lsu3Xfu6nxo8VUGP-M zN@}1(uuoZ7>spK8+6Zjb6CixNZvAyxlps&G{=4ftj1tg)=q{E$iQAwwcHbybxBdkW z33YQdw)BIixR}EW^sy_1Vm*OWoMBb=`F;WYW>wX}rDzHu^RRdVM^vF>b|sM{ zR;QJ2OKBdlmGzd_p{JT6UrJZxpVSulXp*nM$!8B(IYY`}%{wfMnVpD)ERe0U< z46)?OLuL}yBr56G7{SHn8ZVzkwJVx3wUgbf0M$u6CN?5OGssIT1vFb$QLtw>JywDM zFs`TW&iv&swHRjy*EfWd=!urV4Ku;6MU@Hif%cpr49V~rT-e7r`eEOfQS?RA8vwJ|+5ysoPI(cPyZVlTU|>=t)Y zqlZE%2efa*b>*rr^p4}z<`tpnS%7u2CaRb~&{;ix@F)VSN#fnga!n=l1==xbTG@;r zcu_NheRSz{a0ea~*&RJTzV_rD*1qJZs~UzOm)vY{@!VQKzqpQGpkng=W|0m0bv-gN zbg1uQ8{x&stLuKWY}6|Pc^Ah0nxJc1R#1;Ni?1~WXCY}_-(9cjiHXbBufym&)S&*t zmaeOv0T9IN?EGbYa=AG>0$Q6h%+_Z|k~6zV(Sh;ug}9WW*Q*yyRyRDqMu_9_LNDNx z;MfdF9XZDQa>97pNYF9~b2iX!J~b?$at>WPZpO*<6HV-SQ<*9ctE?T9_JV6Vj|deo z+GBL5kIY#Jxo$6k(JO9dqRv)2j-?;llQ1=PZqUY$Kuj52Z7cphU~ahfiQe7-Tq7Cf zK=+(A_oX!RCiMC3+3~j>vK1ae{8Ua4@_o(iv-7NY8V>6(+hb$2@V5^;_MYc&iD|bM z`D-#KM(3BlET>}a>{#-WrsTgOm5{{5!a7Mgy zJ8*UMA_#nVZVGA~zR!8}689l@C;9)R0{^RPI+qDTBZ=6%$s?%1dwW=GUE{D)BMG@P zOmO0Mfks~B_x14w;ow+T2&e{fWdbX1dUA2}?dzMPmv4AVHpQ<*t>>%FyUQ2X?e#Em z3M+tU-u2o!a0+0gC?no_a}EW_fE$7j@7}lpnU$8g;P21BKfBoeGp6IS@3((>MsO1K ziMM}scX6@(AJ-}|zWo}r8^dh><{Zt1-~QqF-CqhNjNxwoTCbr%l*E0vzqxw%SLe6e zuSqgT+||dwJ?G(+uQ@NNe%rr2+q~ZXhG+}ksBnf&e-k+G52!eJj3Bvrk~|!fhPk=h zMoJQfGy6ktE&3P;e|N6hYCRK4Fg9z#|J?qc+y4vey$&5g#6>X#ssdJ=zl-qV+_%@a zcq(bDif6{Fc3*3IEbIIQtc56+6YjR3#(YyIPGs>}jZpZO*B?piv4eKil)&lo3sDcfI5jY~ zYY{PGASqiR4Nsa80L{>@SV^?13(`@ix+^91O{e4{G8@NLW~QZaq-%D9h0Bw%_c{n0 zp|@Kr!+d7@cKyoym&qrl9TkbOv|(^vb4D^SW@`n}fm}2Ru!-in98A6 zSQmg5Uj`2Qyj)jzT&7bhrmz7>&% zzdOc@*zv#Lb}t67K?x2_LvqOGM1nw(P4J*rW+FDvn5^1#Dpqxk*Ftr?5sHChVH6pG^5zq07pCPLQjYDF-^c4y`$ zC|zn{S1gRX5WpRX7r!o#9Q?qz?c|bg6oli6b&>C66oKyH@cTPu+!i<9ra z7fxO11#e3?IQZ44l0f(_$zsPJl!7vO#X|@Yxb?_xM5KriaDV}NIaNtXKq}V@C`STE z6*Th_I}Rd_tw##uFbVO+3fHS$K<$Px_rn(;y3BB+=lqKPMg$G~vGpTP7oC_U3R?qL z_gw^N4LnQs;b9AJJF8xEo}|S&&6?_kS}4tQ)d0&^Kn(8EL(fx_7{F)X`>oWZ!Ob z(ip6K<>O8+Tj#bOKI(27<);;b%zyz-WP==e)<%QzeB|i+2rp5e=62tW>=V>(36eHI zx7tL|IZ$kQ9R^RZeLQ40V#B_;cvH;Xg{+3B#YyaZ+azIG)uOt&I`p#*#G1RWU!2{( zJ)`F4bl)R{`pB=;3=?CxQCnNr7-8EgC(BD5+6mljFr^GTM@-xq?KTi@wpDPbc^sw^ zwU%0GJ7V%k7Z7(xI%TGm^t|n~f@UZX`MZDGnmb9wbe9|=!fy~dcTn>oczOg@nFcf) zPnxO(Aj_4p^I)AW1u*hr&52nL8m9Fw;sBsBF;?GL(MadWJoQ^>yDc@bWdO1Y!-U!D zWDS@tm{-#f;3X3EI{{L`)=|d3!jnm+8n&C{B>k3`1~tbKd$XMoeTwqk@N!t|Yo2?1 zV7!V^ZH6WnesQTbZO#qT=azV$_1~HTuHjP(&VN6!D%@f*y;k+mfEJ}0Q?^ERYnF&o zQ*wT;Kk>5oJt@$nPna+;Kcx7VtuPeIdWMt=LCVs`r3I9@{`u!?X2VA6#dnZJ^Qm_x z=+~B%YY2%SIn`HIdR5MxDuJ!;_oGqv@G77bNNU&JxyCgvSdO-p4zT|8|5J7~TV+G# zQu$KmJi`^nK1}<#R+MTPBRaG#2`|{*g_k1d2+DMt0vb|YW*Ji{`pAb;y908C|US2 zA?NuZ>Y)<9Ci?T=v0yrnF&fa8ZTkpVw&m0aE%b#)O-piY_2AoAA=fxxvaouy#q`MK z^3v9%lT?eMecAS^@Qs>z-ellE3&7~&K}GiHDzAd_Zxamoqa#GnEU$)!XN6s0)dxG; zP#(Ux`)_A(-*-L6m}Qd@Ax&-!>agNrE}*RiSAMqAw$RSpbpxL;@aw-=9pB*>r9t97 zC*yK(B50H5@<4CQh`sd+l(2$o=`qrP?17ff&O5#F{@}^8Y`=Zp6(2FZ{%qr`S+!#L zfAZu{N9`JoDm>V%KHmG2)pv;YfBFe+?V^F&5#51RJ(4s3q=92L&7VH`ucK)?k$Kj3 zx}?sKI$B5*M0W^roBLml?sK38DGRx8;XQ_ER5Zwo2{DzGu1RHis6lEZ{RMIq z7oCe&L!E}0b2I+kKXrnIfZbWCtRU zJs1&7d2ft--5DNv@z-B;!|3g?0cPfHB;?cwvr>BO6CwMr^cDQ`0ba5dR7;2ShD4*$AYi!Z_B@ zH}7ZCU#``DXBn3b)A3jnO&f#NH|O7{WN8g&vOQQ@qHAQKJUmiL`ybhOAKA-Sq6JNG zHuFLvQ;eAxVPWir?Fhn()k!gFHhf*I$Bh?-xnj6Sb`BNd;eEET-rPA|7C{Qnc>U;Q zPMvz`_rjO%we)TO^g4)B5M$t%C=Ghyvy%dIiojUZP`y#>Y_+2Fy09Kcw=`|u)bPE$ zSii1hWF=?79&Ix56qeR*;LwC>QMN4F(&T zee2cdH;SK)^aiM<0(~v@i+bt&>J((nbLYDe9y2zv7@^PTlo!^Nq0fGiKaRJ2U3{78 zZbWzmoW;nm;nyXhd2p7X@!J>w`gLqVyts@G1&vq`Kph#S2HVKP%`WEy#IEl(@i7C=XXKut02+bk}>&x;>yZkG)5=sj*aI;?S_R5C%_my+T=bEHKG2 z!q8t_pT6U)zd63;C9J6Rd808S*v{lAN|_z~1*x610cX}nzoEX+<=X8LCN~-28XNam zOtsCp%sRHX9^j8J?TO?1w`cF(VtbNo;WLA8G*!%}(IGb*FyI8xE7y$^DROk(AJ*Cl zJmyf@aSv_vx#h%GkR#RFUUt|JQe-sCPOEL%77M8fMF7MCK`9@3=_{{3BoyM%3UD9KSMg^%Rw0Add=Y`- zwobKxB`<=cR%?wG$6AKk!U{V$N#N&H0;o{v!1nFtH6>-Xn#cZQ<~Z;bO;pq?2*Oaz z1zsp#*f`ueXGda0*5}%b2Qx6rpnr9bftCTV{VoE(SM4WYrWn8=+ zlr)Mi)*wzFVkF~cvnq@!re$u1Wdlsc3BwP*8r9tde9#Qv0!)a{Df0z;iuB{sh^^Y& zaCari^0X+ic#Tj32&ZEj4te6PNiiO1czBhXS8$kpqlSpJ7JQp~$f08}hm86-A~CAUqPymIHIc6~deRkf+#2NqjDL9?;M ze0!_ph|^&Kf|Z0G_T^ykAbH4Q6BEeN5V_|!Z_cRS%BDJi4{LJR z&g4iV@9K)so>>MNS4Ptitm6om6WBeBNRuWg;}rOS5kITt292>YVE0d_f?ot|CypjT zFIQ*6qa9ogtP5ZivW1z!IhExYdv0f^jxdnhw_RN0 zW|(yt9r}!(%-b7jHWVS%B5}d&YfXK2S6nZeJ5JFYsR=t|TdwKHL%~igXWpo6?*^%L ziuT5N?LQ*My@;R({3x)juixs!(XBq})4`v%&(99+Gfc%}&R6DY`^9>`>5e5iA%mWK zB~Ln!Y#pf&%r?=s7_8~ZxZ=m&!IwiFYin$Q{nD3b1167*s2|&%YT8T2ve-B?#{A=x z)jwLL`&s{*Y=i;ui<(?k_?zx^AV92S(xZ`!bm5XF0qIb;jRYuTLhf?tEZE3NtEqCm zX5BCnFOBDYvkoK0AqOW$=>0uEaC;5)ssC9ejhq2@1ZY4m@G>N;nqUm${!$M~!i+Y^ zgtX1X0!YLBP0{vVUV@Q;DIn*s1!H1ujx}a zRzL`E{m0p{iH7i0N||N>8?W7Qc14k>YkSmoEBh~`=5jyGF`fc+|MZiR$8)cH2ZOTM z3op8xjQcPMVj_P&TWHrO#2cLcx9dT{43`p7YXb@sVm=W(-^cTs`{|R_p%rO;z@Y;7V8ao89QDCn8_;dFY>8j*b~}|mE!|x^uJRNM;Lr{j8{7mCc;IsGaON0e0 z$z@j(YSvHO7B4cYV&U%NL_x9o3-+FYG`^&EemNf)B7%&aIAF{aS)jS3>;Th%+IUm3 z9uW|LktpyCR|g z36Awm${_8h`X)b&+S`Cs-H)6|Bh)z(}E z&!L$4ez{dZev?qz%}WF?;#}ekFm)n6P2LMAkFGBVR`4!bKn=Wak=hj>v3i3yH@WKB zO;V9OHxO#05Fe)=5HNIN!x>0b9yV?5s*v2QI3#zjy(2^Pg_6JAil-Mph*=agqNbT+ zULjL*&CtdYFdktvj2buon&LGhSFpN5AEgXrY)v=6L{VK4+mYNc?#kYyFr%Jj<7UNj znq^VPUI64Bo3oqO$8R=O43XMivh+ZUTfy_KpAPWJi$PtNFFU}-`faBvAGEgFFin8A z7I6+{GGrPDqP9>AqXgNjT}5^km)t6*Jk;Po&FvkEu)$>SP=;e;!0wHiE>Gv( z*_N!6PJKT9 z!2^}k{6`FNMSUd^CGOj`(xW0}e4Q9FqB9O=P6Nr|f|w4kP=y z?L2x#mP(kqTcf+HcCQfV9yiC#?A-2Oe=RpdlDxFrNA%gjDmKCC36b2}Fy(*nf3sI+ zC5M)Cf4X_IUj62qKYy|!5mEP4UYGq#zJ8#`>gI4qr<=oF-7YuBOMUoy&{Ia=A^ydN zW%O}Hgc>19P?SNq+ML;A4;DiVscfRo*=C5KY@Kr6u_DbTYd>np808ewn!VxFnVXX8 zk@0^n&Pbps`tR~i#pZNE&qG5k`%;^P>7#b^7F+L?{4y4_Xx<{;UpTv%_DH;GqQ`b= zX;@6Vg_<1Xj3>lO9Ym;i@ytWKEU|ZpfSIA^{Se|>@9q4n`)9-59k=B%(6dLh&#`B5 zL$k)v1HjIkQmWOcAY{a7cB@1PJ9|Zz!P&;|7HS6`PL&A(IGK$_M1sh-5#c}w$cs6d-;xibVt=ydgaI_wJ~yqq&a?h%T^&uFa(lLo3LUd9i5?i*J1K@!*_Jt) z6E>I7hYt}nM_0e^9Js|Ijr+Chw%YsEXY46A+kQxu<8Zb2tbVq_)LZ|rMbDIRADcA*r8+{;`(#y{cnKv3# zl9G#WeDmjjCHJjb>1`Y&r>a$G;YVeWv&Do_#8z$*6Xq3@jA4)=GDq+c2KC0fLT)Ec z*`}m7>|*v(4lglw6~>)&b4@7XVuA8vWv?lY2z|lue)|Evf;8K$jn9|IZ}Kgmep17Q zdb`c=C@+Qn(92I73|op_w>9LA&}&nAUuS6}V0lbxvY5r$k2^2*;y&v30m~7af%Nzb zPmn#Tqmr`#PFYyrS6o0XLC`mFdK?h5Pi`g+L?rxq_CNGq!*XYL5Z4n#dssWExT%AP z4UZz$YM(tsCkHr%1Y9;v0JD{cD)8O$xz+dd1@FJ~A@-H)_PI2o6l7$W(%+CPU3q@i zi`jaDNwD$+hvF+3m~N5LW=7@@}ja)(k9@!%D1 z9>~#C7`>j3Q34unsS+KlC?u@Q^oJ<5Dx0_7&b~r&Sl=Hum;83NdM#$x4(vCOTbY`E zQ5v?;?fz-Ei$u3Xb%z*v@pkU^FB%DGz!&}jK#Vqsz;s%h`%i&=)Kd_xb{Sl>$}tW7 zXz4JX6Z<7U<=OFf1R2T_>}mN@9-*Da8l%=S%|c;3y4)P`NR0_hYq4|-R{G{0B}gt; z|GfImfB);IE~O2@qNLCd(qgE z&eE#69YhY|mM=4!8Fd^=B%WCb%FJ#i*|RmayJ7@CQdQXZPn0wCGSqYj0mCaG}Z_Os>xX`pwcUU zSVByy)}!p0tv5J2A;d#Zkt;F4f=#V>qb)jI&r)10F(&OUCDn13KmF9*X;krS*Z$~VZA-9#as^o_GqEWU0;I)P*kmSk|p?mv9& ztn#*hdmXEh#VUwt4hImQ;ja3TkmqHca(n=h5&X>_Y*-ltHiEH7P?W)%#4c3<)@{F+wXn3fhz3VJl3pvOPBwed^ z1G`Yy#FEXpVWli}O~b#IO5UVLOxm?CN8AS2p+YAVfb?pGl_@7RsY|yQq_Ko;em29Y zDl1#{rFMv2^<~w<9`vNy6%V(GWQDlD`n5>@`PIwos^jc$4GSGwSwD_VPx7K?UVnS{ z#&i#}MaTXkNutdx8eY%T2w?({u1}>Hyng;%`pL;Lm?K{*odl8y;mHG*flLN53-a8O zZY07M~MbNv4Af+&aR3*^+_ z;O|f0CONN2I)P8=ZWY)dB8aKn_o?9ot!RDx;{53Lm|QRJMymV_gSQ30EK+1Akqw@! zxQ2)nu{5c%R4dmeOV1q4a4QnxD$%LxzGroJB$l9EtjtFi6R`nL-*LO)8_Ks%;-8J9 zEmz2&AI#rRWi-2#=94XRSj*bB%T@~|;v)$s5~;3Z+&dQxB*-tlDhx5shW>yP(Vygd z-EgbJG+}hPxdjeTJI#ro)^;>^b6FGo4#0!RG&c=r)d9RYWBg0DcJ&SpNv)|MG65)_ zNy9Z;k^r$Lx3}jb){)eS^{EaXC+xODPnqqROtoZvON}SQRS{h3?ooIm%BR{H6YQzR z$MO%8F}U8^&wBq0^7T}#gO{*$zMn$3A2~!G3#W$n=iTc5Fcy~jT{vX=rS14>vbg1u zgOOo|6fi*nGp6@vC$k6_Y|IReuUVmZrLbyqo4a$*z6uET3;b%=$Z>=jhmgS#!sDkJ zbhfj_FQ*$N-67CQR0G%V0z$>Zq$vopN5>KE)|bNyjwjEXiJ|*Lv7B@h{bU_bC5$$*?YLHO!vcf zkQrNqp%L7I2u_4;?n*O2PPQ0Gu>gp)$jBMA)htXhK<5a0L_(ACblK@=N^=B&5Ol+L z^T&vLLjOUy^Udq(R)UWaE)trkdZXZ65)<#ts|4;5%JsRJA^NMs@9>lqKL=cNGThO)ATsr>($0W!iCu)KZl>6EBG7EXlmFfiy}1 zW7iLDQ;NEpr=42>WP+^t4ZQMW1m3P6up0CSYr z*KS`+PO#`z4#n31RJemhIhzpkqeqyo6FMuP`I~$JyWNo4)hy+nL@AMOqfJ30oB837 zBM_~u0H&h!ZIkC`OIN_teo4*Byb%Z>8K9e_aPL6GSMC`7s28EpSD~J%?241eTuAS4 z-o1CU#q8zLAYp(V&sW?e1A6){QDeGGhaUZEF~|a7221CPRpZaSW~QB*?aqUsWwVO( zB#2iSRqvl6vc~J+c`p2Fb)~oG8zS(J<*hd(C~kFAbX(<7=xuh2o7%yu3>XGP%=R-( zUHvinzb}Y0i(=g6Q6O;SkJ$(l=7Z)?X#z6>4dFI(w1@Ze(3^RRs?M{y@rMn?>))x| z3kzTFTA;NsA&ESl1s=a#DlAb(v5)j6K}U6GMy|nxNh0YsF$=m?K-lwwa^n>h!i#UX zl}{_?f=KWl*#H}g-!z0hw0oMpbi zW$w-~kI}Mfg?>!xczj#cw~cK>v$|Y|ArC%jmpDJ&1tctegf`!uDa$ ziR=+>{KJP~K8dUpj(f7Qs2qY4*3lX+1ycpQO9NZWo>c?P62Fvh1Xj7 zA$UYd)PP9|pZu$=zwo%m>!ZJ`p-LDCLlhS>;onR>om9%c^cbqCvng6)Dktn!+8;WO zz2cjM*W`PXX6cvBxv`1J`7a1KlU@%5o5##`Xl(txLv_=K)KwQu=b!!Cd+~XrTH*No zrlkam`q)J(w3qvmU>B<;ntn9XYS|4}Gp%L`WZlP%?Y@d^X^SB>C$rhnYgR*alTSJN zR5z557Fm;2nxK9$alu60n_03G#d#c*=$tdK#x-q~Uj{vO7h(fLYr>j#v@%6DLFMw~ z!%;P%1}lVr_PDPhk=uHWyym#Q`^J;J(mj&@isgL$7yEIeaFrB&8wFVhHfk>zbj)}R zow(XqWfa0*Q4q?t1ma7`tAPfjW@V8ka0@}UAou;#lBA4gi@Zcgn91)13(Wg>;+HL4 zosbj$?wfCLD`4E?NWcDz_*|o`|Lz+HhoV7gXU{vEq((GvBoa975H$M;^PQV_lCe2T za6?9p_>RpIMv@=kC^a8eGm;lDk6BoA)+7T-;vR{9wz?PQUW|C6euZWQ>Bb3fIz(gF zNTT=C5!^h+U1|max;G}V`j~tE0u7Hh-JMEmbvnj)U^WKad@|Hgi_kDg?YH9!k=$zH zGOZY>N6v&G%#b&)nb=D2-5(5XDBoM@TOFLf%Jy*6jN!s5C8CbT@D_t+&t^A|SngU6 z4p}K?1UQA>L)L^Fj6e9>p)YREPhXvVe17^#D}E>I-&=3)7h0kn9ue$CoSkES|7bM;W3`Z?8uehZ)*9T+OImt@jq|Bp=zcmJP|N*| z3gh3owKC}htCQH% zl}fAtoz6ER4W~otDSlp33>>y`5zC0_B++tP%v+`t8jB;UEK9p}mDN z^>YV3a~Z%F-uW5!aDyEkeaix`U-n_-i`XXi9K!{;`p))M|GYz%kSHw~h62;9UG7w1E62OM8hRJ6Y{w8|gRr~hD4r2fjEhc)? z!udsR4#7DJQb<gEBk;cqp4mM7=tWHa}qk7-8hp@OsbSR ziUxTa6x0)HX5wh(B$z?n7mk1v>okw8oo->{W>^sB9vrQhz~X2zC1rJjrT4t48Fsq) zfzt_if-Q+ByR(E{2_00cGu_MO{z8n+XMTI!=7+WZ=@85WprszNacXUWLn<&T!zbZH z*zs8uB>RHEneWb8M_Zl|_!^=lLf{(*D)#BXY-jcdN zNfO_RzjNw&utrr2q=_gdkfjo?gn0Swf7@kn#qFDq%*C(K*inyOCI?%U4n%4zI(_a> z3qCYKqT~XZZ#1TIwj_85^INub^jv$AQy3N!``{pg4CjG0(qjaF=eP$Y%y@l4^WrGa3dFiS6R-IGD6+RJ}9aCfR@YyIqyurd$CY`RlTXkj+ zxTuc>T$1TlAjN#lRCGJLa@+1|{fqS{^FBJ;umCPYQbxasa<;Y1hiJUKc@~1|@)XhW z)hn!uy!dnS?HQUO5{QgGUf!@7Kn0&>%XsRL6pe395&)K%2Xp**?{KmoMz5@gPC*e1 zbpmOp09P5E#sj_TruB^XaovwEL`B|ti5vM-pfr<3Q0gs-fJ(k?U!J{XTa&d(Vhrkj zu=?1z^BzUtihAumqdrL}xXJXreaDSLooXhZ5;8~^t5klIc~yX}#yfkv)yh(;EpXs7 zXbWI9zARQ^i}$+#ThQ9=o9ZZjBn_^!669p!l;(AZ z<=rA%nw0dOiA&vi^V3U|1l0MLX23L z`e}W==Y(at!ee9)K~u^sZPq!>+U)+sfyx=f594}ELpIY-a}z`Rxfbh2+b8Z#H|Js_ zZlmR|k5B&Uj93Y?x=aG7!S?-nH$N+q|8LE+%c|q2QQ%SKH)N%>qmHODseRRo~xGl{tJf_BSO8-stb9j})v(?R#~{V}W6ccFo>u z^M#^~oeaL-3s@K?P{#pk7qjp5W3AO(o$9RER9=d()aU)tkDa%+_&|Z(OhOar*N?z; zbO7!l0kG@y&Cx0L;uFjZx5n}S;d=Nj3;Nr?4@U~6UOhA}5)@)(R`g)pkZNfLKnvB< zU6^dfy42E%%xDF}*0^yAZF_U?$haVwf>LqR-doF|7`U=_xYS-O5|~ zPt}Xg-m~x6{R$9BAJ)*!lEY3qjYj^S^^7{zuLiER-qBKX#$Wz2CW$Y8`OBPDpK2)E z9c@QnBe%Y-K3ky_EMsr#PyLxNwkK zz4oY+(`T(UKY!0w>oZHP&!a0;z1(ro!Z=RBpeG+CQt6=$9W3CTEFWU|6_NP`GAh^N-5MrMT~8+ zmN4Ub%3@^*5!=o4p9^IY8Fyn1023w{5`5=K@Q5q=Gop~2tFfex*_<=661?`F@<5)! zR^k&PvB^E5{8RNL1f!kYu*1Li(a)rvNjH;bip1*@c)mKbqGCvSBYmr&K|)YBX^+MxvJLNrtE4VcBY+hLnNmj9pX@r-6zr= z&FgDIOha|_1EEps&f#k;WjI|By2GAKVa}9rMtnM?MrL6-A!z$to#&i2zs1>8=@I?X zCC%ztJ;~%!aNj$k3Cx%0M4gIsULM0%Q`BIrBtI^!;De^>9~N@TNxJ)W@OO4hpKSW| z8Yey}K_{=lVFl&#JiA+Qa0a`7h`XmERDt4l3+J9CyP}zHAL<(lhLJdB7u9tfm}PTY zwYK%(D`$dJf#>1NOo_%hZC1x3GOF6>kt{#Qh=)4Qr;v|3#EDw0g+v)-&4dRYk=@Aa zhjF?_$g8n?Jf}&lpyiET8&e=3kws{b3K9*&U)o)4o;|AC`@^U4gbu%oPkBI#c9oy+!R7E{IQF|04NPlW7P`j)GPl=&a2o_E_b7qJjFslK z9@1IvR-}>QmrD!F)!MA6ee6$StAcQK0XXj;T}~+PKIcRXV5Fkm6Gzdjq_dS(_-I6_ z)J3Z8Wz_HgTGe>oyRp#jjYoOTpJA?**?@SR-DaivlV(GSr<^cGarh6z&96ZiRG4d& zxzPQ1kVhT0*xj7MwI~9LdAMM09Uq>!znNyyG#ez`gcbYX%~m7!Fa#35e@pD5?lm!I z`fMKET%CzHy4ph5QxE?JPHLA7U_wJJ#nim|h=E4ah)1ScU3!OoZHlkV`)4EvRcGt~ z(?fQ$dSUEyD_B{*h*Py6X67({fZ(t^S-KX?NWfoU)%e})M z!Nu@l`U%^rg&4|MrkZl+m{o6v?9!0H3i>JsPgFk=NRzr(^VRy7&gJ&`C+$_a;(D@Z z!^1`=%89@F_BjDWyg-yot?9;*eEq(B_id~gWb1=2jJ4Tv<-AwzuVY#!i8#o#AYZ-@ zrpm1b7Gv|gznF03L#VXR@o-*J)I+xB_AR%HoLXI0MZ|KDLbcWqGIvd0lKk}o=w|r$ zvzu$;y{}$T>eG#gJFJ7c-Q(kLpyzws_1!A?i8|HV5UCa1z;3Jg9w!bGq4CDuX> zGm{F!dKwClFW3BL8_ixh?sY&fV~GISs&P86Fu#EdLpNuyWH;4|mqdMGHzPgMt0vaU ziU|i-Y@*Z9>|dVAIwo@^howwc=_{G2q7-WLmn1zB&_N21h{)U?4r#E}OKvdISa4P7 z+Y7w|(jd@lJ}24n+Qa_h4jb=&xLcm`DvIqOkQ-4l&q~9r$lxqO37-Aww(bOO-*L0e+dXshxe9u>I? z*3G;dO1{74LC6eri9`X!&VoXcGf#I1=!6Qyqb9n*K4@;RqKEWon*a}$sgXz;?>#dj z<%;h?xYWYCsF&8I?OqH=>a7|5C1XbNvL6z+2wCor8}xj3e<#6$b|jy+m&P`2irLxa zUIgR?0EZvxKm@e5)>)h&f+;H-h*i!hqd-Z2h2sfjdNG{{V^v~tWe*FLAF8FdAF)gK zLrHVj4@N+#M)GF(}S6ULU=9JO6?F42HX%|8Vl^=K5}7f;ZF+x$K%7oz$W{ zgSz|0go`?DLY<`(g$!3EPb9n%t^XY~KS}AbnpI$)Vo>LYZd5>00_6MZ9P(Fw9jOE{ zKeaP1NaeYxvv^{D9FAa;WDhop%JG8>CH`#dPc=|;9NP=%d)>WFB(W=(=*Xpa=V4+# z=$S5`fNAeif<&J9bE$*mU+iT$$GWl1pwoxQjzUOb~Ws{ z(x1Nm!e(n04V$4U(}*vjdFQrWM$oc&=n}&Xrk!Hev0k~9P{-ni;$NZNvFyFoguYV18xUSHOy`7?%**k@lsv`h{9|!3ckq`8FED> z2bvjgd=ZIcOCd4R@9Q?OdI`fLA(fi#L+g?9p;46axC>p0dJOZMrsYiVpDj>UO%MiEHj7#4<$RMAaPtU#Xp zmdkebrW;qYtJ2XOf`yqYC{1qKIioK$4M^##*E+gIhB2f@NWk`?DTVOiQL)^v$rBWe!I9mm8YDkH@&vh(5_{na$|_d2xPQptxAXUlEDQ@P1KpA&-%)TdcK*6#? zl*=-^Ckpxm-hcq4QgC)+V3(_p*Q}XP42|Rly?+!r$ZtLwc3e(6n6No9(4~5egx}qB z%RHBWrQlf}*)(k!;)|yb^&4_drbL0R6A$cqfA?ZU1kMAB8uUcye!5q%r!BlTgE8pRf_@68+eGMy(TpErX6TvGhEu85F;_0 zRa+WV(_B$aVUXbAXBd94=+jo1N`x*+@9oUhwzLV!Xjf;iK#cFOA44R~zDdzTkn20$ zqXe!Ikj7?4L6O|Xz^S>QgtSw8p`ESZgH8JbnJ#9aBWSBgA;=o+q+8bMsg%s#u*MN~ zAZSe+n|5;4(qaIbN35ALVaGf&UCGiova>bAyufK_>kiK{;&l5qvl#{NRY?o8;Ax4c zjruNFMRify=_M7`b3X2fAjv(y1T0pDax5brwC6+OVQ5Q%FwVNibCPZWNDaLmQ_uT$ z#s(K7yu0#rO;u#tTwf3$l#)!Te4XN{6&rZ585WX2dD8D{i@UapZ4?LaSl|9EAKfP; z+%n5cDUs?+Q55hBzq@R3=u3L-sogGvs4^1 zgv~J_&s-Br+MGcdY{E`((PT#0Aogw1x~x;1M*(rDve#+efLC_cn9gQwJG-LD)%BJ6 z0U#3c6tXte>zMs~suFA%F{lA#^;$d#%4vT$a?Hm9-r#&}$J?Cwx{E`Z~VcZ0V1s5z7;m+r;qglc3F`6vm=W z&gj3eHmmysM~*Lx%8a)Vmi7))T~2TNc!C7Awe8N^f}=^3CKzhFi4Jm72|;UfPA%B& zLHq*kDUQXFO5n=KjKCiIR%|A3u1{=?5oU}e!r@PBh1pd<+#YYfZJo_J)ySNew zWA31*z4LJVp$#ScmHx6YLZNZNEKosGMh~w5O5;>z3m5+M-hxyDObjUPeD7&NT2hGH zNA3>>)+7U3Pz&3%sahdKb01I6D5+&e9h09(1JqY$82S`Gg{1f{8!Q-_1B|u0 z2J^TIT#hlhHmlR*D!h+MIGuUc-Oogp!Z084%k2*zN~pm9oH!>zNjz~zQ5?-L)sLAi zc=3;4wo%4d=8str+y6ndHFiMIPKYK&oV150!iRK~p*_@c9~=di^3}Cn-ocTZ#X&Fk>pQ+#+8P>t@)8E{6b@9x^v( z3Z)uXZ{)lr<>%xErl<(B`A$iiA%KD5K%o_xbUK6cwKS*JswR^{XZ%5I zpc}@Kd1yUy+|dDsm)Mj+jt{ZGeO4N5=y_EGi(Q%pqEX^{9{|v%M{c;Z1JPzQ{Jy;9 z1UzLt@^>R^>HNT#c`iD5X|mK%m|E)fFnP^L;R( zBS0footWQw)NBbI%Sz0R$`@LO-eYrVci82V;VrohPO)+ViGX`WKm;|GC`C(%+Bhv3 z_xAec+l&iqmocx{t24G_{$jmfEevfCWW>4|q1!oQ$x3?V)j6inW}*psfi1#lX)DDY zS|rbknz`m8Pk?vmXIH89$rVjgv2ANII%Pdq4h6Gpj_vEWGV4m9X$Ay# z-?TqiQo+6MfZyaOf{8LcerVf_Cn-8e3yXAKyS-(FX{Gnx9=|i%&}n^5?w3<*9PstN z+cOUJ;;64Kui*nSrFF=xO`2{y(tScD^T&1kNMGGS7ttd(C$G8eSpDP(8B*rqTym!f zhI!;kkasj&ph`JjU^6OHdO)n4ByUKBfe4eItjK8V^Dfv1(xPED-G;`zTXx2Kl8O*> z<81}{@STsyl!@#n(g&gz#>!rJcl9k2ry{W_rDGa{B9d|;iB&3Z^deH=MMX!v^i7Pcw_PFTO%c8$4IF+Y)x(RGPMpu4waJW5Sy9UFS*5bxCv8fKbJWHLf90m!Wu zZ|2z-*QZ=J^eYS8LkpE;QQe06G~QfC3m`lTSweYo>&@A}9i41$MVVwVr#2xUl>v5q z@^5h63;n=H1uiDVQet(>8X8;G+Iv)wJtl)gQ^pz!PMjFVI70sF>`CErF;iQ*blVsW zz0VDVGYbc+tTx7{Ni%%&Zt8Bc;|7;r0hdT_Qswp zu*9|&@^3G#OA{9&0kFJOOmTN>r8+=RLBreESoVse>Kv-snW7x>gp6P`_ZL*}-ry64u9>5z5>+|Rn~KI=oq6W6BDknFZJX`cV(!V{jFU+BYcRqh1+)}kiuX>96jV`U;p`q{_{ur&sP8W zzwJ-|+rAiQmI0@{V{Im}fV5mEB=TAjY==m5{gQLR!c`dOF|fJWYD`?dJI`R@hy!kk zyu0FJQBco|ar{Ys>m ziCQTmzk;C(Uo0hvwc6`!qEgGlsHpbwn@-?VGuGshF>Apamb&g1kP{Jf&jnWE(R7yT zuCdme` z#8L1GBM1FB>+t0ck&2SCDRBwfhF0Z_Mb99)c-eC4eAOj=DGTA2XBjUxcV@r@F(RkZG8ou_00ye$dq!?AsI_r9g6*2>uEs_i%w1c2;CS^dum7Fl z0*fGPJi7KeWWp^mF~Q|2LKI_bn@~kIP^Pg^2M#L?`afFXP=(c+tk?pA=_E4z_1_F^ zL=uzHe;?l8P72?x{V`})M@Rlc`ZP_f2%>@u@$#HUjHoP+IPgmGM`k=D^ojVGcApP0 z>_qJ}v+QKOw-%6?$n3_E!or}_&NvgFtL^XXg#$3t!8F=dn1JG^)ufy!M_w>`%>KlR*uWta^^VPwV z!za(4{OZY{JvsQ(PYWe|1w~ytv2(Q#Hy8R)w+WsE56ByhXbtz^T=h;ws4q#60%JG4 z5D+|6iMl{1FG`0dldi(&3H94%FPD@%{sG~bJ$DCa5&TE!TM7jzNKiG=hAIY_StW)l zRulGyQEAF3UG;j^E;N1Q97%-xU>tml52PC^n3LE?KNx3n-Apt z?kct*a%4{~j=p_;bM*2J2f6wuVlM(KEE=Srj(ze79i!z_d`t|p~F#!R@XnC zMr8OwOicQn zyE9@{Z_!V<-_Xt{H{6LpJ5MUdZpmmY*{hg-iPbOVj(8$HT1`8Kf9MwlRE4%6I; z9a(4yo!HI^=xR?A0tFV!`F_ov1j$U*)a`_xVxNdtB9s(aa*KGaqGaS5OBHki4#l8CDlpS%>#!v02X4N(0Cz0r@apVs z!$r=(!v%cthORqt0bRf!fjyE682Eg8B*m0WZi*daJ_A)O2qDxvVl<@6+4+GRDPMf( zKB%HAkGs&$9U~#Gk_&xHWM|p-PR?(X>`}I3&P0ld1&y6zbCkH5x3y(_kisPoLXcH| z&YY9FMx#^+7Xq$=e8KZ(mIRwd{YAN;aYf8b9aYgXIVdFA+2|m>>oY}X;?+F`vO5Q# z{BC5Wk4yJeTDY8BQKLnmV;EyIRwX6QZe?T5)r2BksOb^k(NK;MP!vQ=qAbCyYh3f@ zeuJ`UxwAb2CX)cVHV{aSmL;70cONwC8{fXgyqmUTgS8DZZX_d7HR{&)Vdo)f{pi-; zama(w>aa1O8wgMO7V}dbY6&_Wy{{y35f;KA?>VS(;}n!y>@9Igyj+GIMgQ0zqA~tA zj}91DKupwY3wW)&XI}R<2cOtur)?>1bP4Nm#!-AC+4NGE2SBd+Pn0!vDlq9)(+hc0^23-wvR>HrP+_c*J!k8vZ3(9+iM+?EDzV4ql@cTA4^JZ z_1__7dTWCE``1Da+rGJpM-&`AiaibSYda*CUxTNnRX3&i2uAPo!0iwd9yvye zjm9j^LhbrbdLKq3*E^2sy>GIaR!N+QkS1@dpl(cx_yz~E2{)mW4B}GV@nrq|;W$6( zz@o8*pyizm6`SHLn_FxFTImfIT;3Yf7x9v*|M4}17@ABd?}H zbLBs5tbQ12;I*^zi#I|JK{Y}bSE^Q@XX*F!w4uP>Q@sTz*K^mk5jIZ|g%>p+qwNGp zr}Y4VEk-Sa%g>7dn5F>dbSDLs{9%S3vlesAm^De1Tp0lC#6@jf04yGYs7aF8oe7Vg zg#grYE!9zm+M1+65Y<8S6P6zd3@MUzoA{YYd70a|Z=k{+6-*z%i(F@|4=)6)2I<<^ z#%b8L_|q8(AEK%pkH)lT-^38SRzahLNY1gsaG4kw&}WgNnu6Y4zpls z$Y&NW90dB%ZJf{UyXvNlt_u4qF-_{->KE(#U)3C5#J=CoY;`HC-2cgF+f&Lj-o&Ws zA{1v=Q(buIQZ+{#3L0Zt{v}c7cmHBwPh!?1H7o?E6)7tP_VdpvsF-D-wD6lVStog0 zjrmkk8g(Bl#Resr+a*zZ{o?xiVm*XREhe~IgXrtQA>K9h%m{8Do7VO@jHSsSq3!pr z85-j@82zdBx5^J_x<HCZtEgJQN~c5HJE+@@kpO15ns@Y<{orH>LKvG z`Ze2QKSo>b0x320k*#x% z9aU^&P+g_V7pGmQW&)S=rBifnH}Pia>#7W;@YdURr8CrqNf1x&670x)S%{_mnH`x6 z)K<{QHOLS?yCK>#D)YR?o%`>2H!$B@(3@R_cnsMp%F&TPSw&yOWl3L6(pE8Q;Aw5Q6V3;q^Ms^cjj`8U)w) zfElANEWm3yi~2U&k1K}5#R4IM(e~i z5cNsXuNNC_o?geO)lNF?T&P$qF@so(sSlE&s|yQI==||q3d?~5nOa+PN;3kPREo)! ze?PfYlAh-~AkkdP?Wh6+?F%8U$|5*BQXv05$NI!?LMu{*x+*sbVapjLX{z69uz+c( zGRTILKLhG#H_harkyTYx*5+2Y*1g%?`-rmD7v~pIzU^M{%?lHq4cfIXORLH23ba+m zgH?6KDyyj>?Y^W$5+jHkRKT1(pA@Q>IU<73j4vtrUM~&tyW*`t@T0c5@i@_xfsT)7 zMv4r12Fm-kWj%L{b4)>+{fO7Nb1*~6e=sl?DVx9sa#HTZOo?#Ex`}eS7uO0GihGul zpWC$?EPnT||MYU_zy0D@|F-}0moIHf{{zMEXh*a|d=wpdH+1ulEbjv<*XB8ANb2gDT6wYl|5LnBH zgmmzFRN5YCS0x|beg@uKiG!o_C-x7|I}phtP#h`DERSyv4!{4y&vC5d^e|^nJnPEn zlI>bV00iIYwkLiKE^k}#GdSK-lZ=~kQ2#5hr)2B@m3sfy;R-r^wuvb8u13PoNLF$o$b%>_ZxtUk#*<*IOoXp-2kR@ zPU$BdBrQt0`KGvRRLcEgxi9DEz8`lD4YLg_>AYEY_0TbfCk?7R*U@522+u!m)(gpH ze`QA-H6l{yW)<>pPI}A!<~vMQDR;Z-`izt<;2)`wL;P0>mSSeddc;`q6`f*OM=rv* zH2+w%?iy_!2;7g>R>@L>yXBHN9?A(t0iNmfpxeMs3LSPWA9ho>JAu)p`w(?1SI@1T1u z40|k>5JbYn^Qej9CQvwCq@6Pkt^|gqhh|mkI{JGEP@buEbFXQ5X^=|}bmhmV#eM6e zZ6({V^wUO6E;$ws*Au~zyN?L z>J)Npq!VWssSxC z#w~-(4&SvIQ}`*#WzXw3Hd$V3tNjM5cOriMOty;eJ$fc6_K=+3GJUxZ#0L|mgi-h* zLEB(}W~pWg<`h89MRJbKb_M{TXE`O9nO4hdgq#+H(vvzf)I&cv8;zig%_FI;#fFtd z#4ud)2ewUJJu?@9W1Hi(wRh5<=uldid=Go*wJol8bKkqNy8q?jgH4i|-ChwuLFRx8 z;+($&2((?-dV{=fX?yeZu|>w*w)(-1DI=CRf)zsiB`-_MjHPV!p~aXagXC!2wM-kw zQ4U-f+*wp-nmZvkDRpK=pa)lPvTd2Jat-o!a86u%gS+{+kksi!*M)wQ!Lr}j*djC3 zy3+(0bh4v)`9<^KtNC}kcP^1M8#@Xr2{NoBFISQw{52C{g*y_H+yW1j?4P0SK$y!a zSDW4f_>?a&U+LxK*N4NH?O)-N8MJy0l;M-{p}|c(!E365%HWWOMkVv z`7&GS1`_b&Aq2VwTujQbU{xkeSas#h1rG_4?VfXnEA_01x=kzTj&?+maFlwN&x{=x z<7?+Z&qMQYIYK7ED~@Rz@ZV~x1>4UktrZp#3X7~%ODYNv2{T#S)L}4OU3xO9$_qyq zY)s-3z)Ym`8WF*U*3-qF7u$3t;pKyFbeo5AuyRH3bUCz4G&hG1z%O@&h0B(%r~h zKF*30k)8F zTrZJhEy`p1_P5vHR`uV&beZYB24k8_Jd*UQbZ??)0x)Aw7<=m5$@K_uCRFc?7VHA} z=qJ_png99Qs;b}^{~tZiUT&@6y?N^rD<(=TxYk`jk`K(tV&hnjz@VZV9WO z*7=hOvYyPXDs^V*TvJ`r{Yt7pt3(*92~1wm=c@6XVumVK7)P zNw9N-os~&?D{vcjMW*F+xsn^=dJ9GMS_}4L(}wkLT(=J}30xyEX(DY}F6q!` zUPUoPI*ow5lbP>B?m#*oES@m%toI29H`q?5s`3=seGTd2F!G=8HyXOq<;j=%!s}NY zWSkW_a5WP?Za;(u^23Glb1}*jNg;76e@Gb+8brqq+PQB$Jy|apP0wz2CCS>ISPe>} zR!M-v&sBq8>^~QP=YRufz|_Yf<=yV*=Ea0BzmVYyx=s!)Ml9i~JJ0o)$Ec47wIt+| z{wudg*BN{lQ4W7TyTHw)+TmtkihHvAeC-*Y5hb~fxRItyfclob!4U^U5Cq-;(a}mg z_+;qm&uHJdHm#`q&1SHQQp_;A(SUhDwlyxkS^Kk-A-52v<_!rLr2CZDRaTD_pDi?A z0yKxY8?@=^i=_O*j2L5WKN_yosC$n9YK<$NSO4fv{F0nOc}R!ab#casZL;%~fw-a8 zCBPd8Jo(WlQ{>BhBLX9T@6K1QdWAnQlWpO`j@Wsca?MSDiW z_LE2sVPT}>` zXg8!mkT+J00W+ZHby4I;7Wne)2TyNrhF|1I@{Rnknmd~ZIZEw-DvL5rapjFa<6w|un0jVSEw7g~m=v}Q*U2~VO3#iYZJ%OJ>1uSZ~UOB>RNx9T^V1TkY zpJYyT*OM^qwk1G1H4LUzI{v|B=}P~dmJ_#Zoq{eA8ii-9DyJK~HQ?Y12OBg4QmR3h&3%qT_> zNan1lM#wh?AASM5u|?r_3g?Q5*=iUq;tr?#p<-g9HdCazfi#d`&%tL8O746zK5qbe zO5_2v^I!9I0`+8YP0VO6;+>6m-#3RyT-9ZOICziEW;xBIxeJMdePuD>B1jw(xhYw! zVp6!0-1K8!Gi6HNQ%6@iTCC46EWK)Re0=dl9|)YY56F;hYn)PuPARNZH)2mN&%b;k zQr8B1gF!K0u4}&@EAocc+<@-K$-L{$_4b3^><#0?$xGkNEaJ5L`~Ira{(fEX)kjcx zDhZI~e*`@`Ip*F9*+cs5j#Mu}8F|#p;_mKkkP8QQ$4k!z)~`r3fqrXGeO{!s{>MLEJmNZ+9gHn7ay+o0bpfYr zJAiBE$)BipIks($1V%AK%YHJHb|=kRX6VKTLidU24#QGgta&h9mchqD?3FQ;rqUGU z)KSJ;+$ID|fojbvm#v04rXuh1C|9bgjs%!)YnJAwkj;!ui|TaUpQ-9t%H&NGg6X?P z<5fcgQEayh-`1p5UoOGw!zKPkv{#rwB@96`-@TE;Czr4V%6%IF8J+>R%J?t~N zy5fW;i_S?HGMt+p)vvph<5KuG&_M~o?KpC!@jJtLHFjaTvko#8bK4mzcf!@ z$RJWz@Qnx^PgvC#w|a}4q=q)DVNwFEx7|$V^txs_V)8vG)Xe%4UY)m& ziv}QEYm%T@C0n}7m2hr&(P-j<3SLm#YI%Az)YkdzX#uPu?~}qZDniVuPjkYxt}#cU zG>em!Tu+ynWE1C9--VTaYBuJe>*y@ZD0Ew>?;ZFM(*Z`q$FYX+BVp+T1)ZL}GD~Ge zU-h&%#H8YcpA`DXY8B|)NWEP%PSYcM#TeIpJ4VCaDU;XQ#3nJ4m;n<7$ms8KUc6Hr zcWY9d#D-VT(G-vYz<>1O#p?X}@^DFc8JAREyx6B6;KgZsrzQ9NWX`jd)I0Ay}nC$ElyU}~k5RE`{;k_At95)@mG|HY0kO+ou{ zLS=jk##t7!B#3Dqz;ST)#85EV7Wdz4{lI=QOHQid!OnmG)$g^GFwrzRkoT^qZ|%;c zVB!}HtY!?%?F2ITP52oIFbW5WyI^OXP4jlj14hY-{Mqkke)ROo;>|r}0JYA+WjXcS z2qXisXy(D{4K`z5QNvQqveH1pS4qrLfAz4yfJ4v=5W9>q3env&B?MM*j*ge%F``xp zJy2kO|LyYN^8r)eiiZRV2$1ZPHTF9Twu@_U+AR(9_?i`=iRADJ6ZYUY)w|3kM$VbE z(H)hkcF8etrUV196ls`Ww@c7%2F6@{p^SJ8I*EJ{oFc5@Gx!cFFE3@MFv?mfmbCBC zd@xO=47GlJ3=0BMx%ssi;x)CG5xBUu2=)%iP)bNa6dXJi9TDIeH}@?%=l%f^d%X1( z%8UY8aHUaJzOxQ@lZL5PL%Eq##yU6FKc38BCY&WMxG45Ch5C) zXtbcu?&gln%!>Wl_2k`WQ(!zFPn|^Yo*T7va8oyIW1>!iqzHNWBu2+Mj5Ff|$kF$r z!b?2V9JP-|{)OX2#I#2)2K0M>{Cnb#fmh~vVzV}0A;_E*@5)Jg&RwB$4kG6ghM23( zap1MVEG9J&&0UfBFufr?A~idatwLNR(MiTT`_V3dz95Z*g{Sbj*BxSJ`}GPgO$U)> zHsg;^1##(oP%r>D-5F|oDPW+#JJZ)=s?v&(juE3)XbrohVmn}$%%04dT%aVSFF>607}OC=st;N4Emp}uDN(<>ClCbu=-+PkkCB+6gHp=$NI13eNX5gzJeBKDdn zKzZCmJ-7e_D%F>81K)gXTZ=pCZwSSOUF?3Z(>(Lml2cY(UHPa_AAW21*|$jdHPvf( zul|W#{^nYS542+JSIhyF_oINQi?6v0EY~N$lJ=VfP^I1Mhhor}-@^E!c6l~U zn!=o)9(Z^Ej5{<=)s!LtC|Lo;7(=c`jgC2YsM-k9lMilw33nG}bJvhJ2hb`lY-zj} zvkbLSM;jZvI;gj%z#fMTnYrkDMGRZKf3Eif@Z9&52XhTK_4)@`F?teZ*hV`kawM0TQD30iIu`5&VHz|UyO7Vp{ zs`OfPr)cGwpr@u(ryHXPkECTWy)ARw4Cd!?&InyU-096ML=QMp%mlpIltlwQ7AY3c zY7*tnD%`uMpFDP$w5mb=QNMKLF~31VcK1-Yb5`Z}A1~h$@TyXdXYcr)O$)%Dn2aua&-21j*KO;IPBS(ABrGGb8rg3dFUO3c@b08r}MY3*TE9 zNrDF=w7pwiy*CcTB&ir;Z5lQJYv;eYsfK&0a+>6i1YxO*TgIu;>yMdnQn8EE%QNNG z)yRPyqtwl|Mb{h#;<<_R;m(!Prr~HQ*q#d6{Igt4RoSwnIx`u6I&&Ir4Kw%Navq6~ zj#73=`?5mRCQ=b7?n+GbmZl8uq=Sp@2?*WOaf%k-vgg}Q8GP%K(F$W6dsU6}Hmr|U zW!aNGK-TDy_wbe>wotuxpOSR?@t8GA>E|N?ea|%}P|d`PFUAxinblAQ{TWH$&8-x- zE#hKN`sXyxW-L|+!fs^j!mewv|8*_4MT}v{Ai;TXRrR(mXqT4xe z9zdVxpH&!}vZ{S$+MH>yMOYe0CWg>$^H6F-J2TlLwe17MH=e7m6!|mUJ4)h&l zXP<#`!7mw|`O|w4)_U`*-8ocuD+`GtYkgC=tHygEM!k4vr4t-@=C&_LD8~^Q!ShoG zI~KcEmdz00zO9FenrNU=E!Ql zXy#Fr0AVXNk7W!A>)>qx4hCL&xgKZHN!{`3Jx*hk9s^}2_)!2>)sC&$q6K-`@7|u{ z<;N)@iAk?&<STsr_vB{*~B4vlSsR=4Oimg_X>xwE~-yVyCR#xt1{Dio-8 zL<|=o1GaV7gim`k+%p3H-?w`mLhIP3v)HAc6G-Mp+XcS0^g^-vrS-qr8xy_Twl;Ah#qNf?y6Afr zvd=yV8IJk3?#U5xMJY&6$(Y6{?woU5nnc0UNMpqwIVy5_0h{i(zqbtuaNM(SuIk#U zG-y|=}1Xhea#L;gEB*TQ}aZ$afbu;rcfY3O60y$2hnY@=R+1MPB zQRF7)Og|rbT!&EkwWyhxh}PRUt|)Is@YvsD`dduC#uBtQqnO$WXG7$O!ivHJemeYI zoNes-t1FWYa}c}!Uv)N{Oo6boalG<>c)j2LlqT#k;*ty|r0I-|dXh0Fzp!CNKTqSG zt3pe|ktRdPRjOPBgi&I?557EkgAWise`TqChe0wYa>o; z{33NIFPmD}cMH;W6X!XNwy_THeMF0c08CcXATV~)K^<`62to#zs3%h6;RJ5t66>9U z8lfR`yW*3s{Rsy+@iP{zYN$!K5(|1lvUL*H*Ot;miY6Vk``~*e%d*$h{A!MAT3x>} zqHTF`VaBMOZiU>pD5~#})=O_If<7ebFP@k1)XgVYa0BI!1{(w$hP;{&ES3{bhA( z1?k9D*YX=r6FEa+Q81}QF)KzO#Y_f~&2Ej}%#PYVHzQ!`sWn(Nl6D||$h}Wx&FD(D z+@Rf4h7mz`CWZFo@eY{Otv2o7ipEkgtfM{>08=b7e~CnE^7PNVMXPT0ethmmD#6Y2Wghiy#Ligpd2{eXJV%INVtCLtr*-3yRc>|;bn z^mj;;D~g3|#=-pK7L51$T8tzhY1WFaCI|GJTS`emve2YyMSPJ#wF{~b(L|U9E#Rb4 zc`0(cT(wLy!@Bu#6mM(7!STD%1dD^~t8?Okg=>&AsCkK9?Fn?ezEiE}69q#Wdz41Q zn>M)QwfGcZTdq5F482EYCc>!{ranM6tW2P|u2}(WtHnK9WW5v7Xs)$2>CvdI_jV>Q z@vYp_n(;iycY7Pj2G?)&47Cvpcnp**-cW*6k0ux5cx8-^iftdd6GOG_tqmQlRvgp7 zV{AMni>R{tV zQ8^hzO-WMZGO_em=NPeNogj-H*2l*8i5lGpcKZtO&<}mQap9n#P~;Y_1SW~aH(5^^ z9GR~C78l4-cO|s|oS<$toqS zhg+M7QcT0V42z&HoJ>6A7@j~2VSk`geXvfRrN+F-sN%ntee6^f6Bn^;Lr+o}DgolE2iQBDCOZ_8L8SC79-$v* z`p<$mm;TTwSxTP5#qNz?8M3k9OUPWDSK_3@Nr{g`u6hH4!L&U1Aj8^|T)B~7F{N@> z*!}b{g$xa8CsD@?AGZUx)>RV{#RBwDU4V@huzZYv*fym>x_$Y_fnbI}pmO+=~bg0{4Cym>lI z!bFMyF}m~%k3Qxf3N5kd99j~P?YPq1oGy5Y(dzQB8f=o21#9dfO!`I7*K6Fsl#}+f zIc&5iuW`cIzbL{`Zys|Hi4?gCugw<;bn8i;x>zC`do07HXFscqhDo7>1OD9>0e8&z zUH^WosYN7s%U%Z8PATf>KNpZ%FrPiBvitngzxn!1UEeC__etJ%b;TAjE9%9u*TXJqx?{_JL;_MZy3 z{MRP*A5*xQ>LsxWyLVQ%>+m|M!M<+t) zn73Va7BrJ;R0M%~ge^C|6K#5FWJ|x2_Qyg|-;?!Vhr(Ol4UvjaGWUuqzEd}Cc2FPY=5}M{t-^Fah2}kN3Bg|zpNjNj|5`tuakdhq|oW{MkbkPx+j-f&y zo4tA^iJbnd69Ld9L)Cna8;C=4qvR7Ynt#ioNO;e|`sOB^bq_|4#q3}G{$KuOtq9AD ztIL}fWam<-3(%$+qp><3K3I=_oruB`R@dHHqNci3wkr%3Ef|htHk)NxvAZSUe>ft5}KSkb#tXAkQ<0GyPp$n^~Ln}*ms$4eDgLN zN^dhwNH~w-Ms2O-xLI4}WO>+DEZ}x4aI?X^fh)RG$CbN{-!`^-^9&tmbx6&kQA~1* z>D#eqgw!$12%qdXJFktV-RUIiY;J#}7nYW6ZDUUXveBWNreV+)gd26yY#`{y0Kjq7(Po-Q?v1|PACFB*cD^Q>KSs_E zIQao5^@wW9AEO(%UEKbM=uF5`5=(KkF70<+1DkI z=6D~Nf8+8Wh~o@#++_YEcZ}HdRN3AdXVZoGCr`5c5GyI$QAM-;;^%>8;$_cB9*;2I z@rW}&HH)p;ELXFAD3v-kS!TZH&Rc&;O$C0MrH^HJN|nXodis?L1^M#r_eAf30E))CgsVL?fQbcdRFryh>_1 zgVOB)a9tWp_Z6vzmZtHg)>^Xn|KIp6A01t|}vzw4zX$tj?dok4ZBL zF2tM*fU$7DDqDY|mfJ`kW;R7;tuFu<$2`3{mj`+jU0uYY()mjpJrhMBk*X_{ z1UC`H%yLEnIlW2W$vAzaQZ5(iGe^nU``n`iAqV!L^r9-m9S!+g+dSz@VIfoMRynZ^ z2Fg(~G4av8I|5+>`AKQ3Bk$8*Cvww)-knA_G zdA8W-dcR&SI(N|sM3R}fpDBCGFHThXYt$l2xyS011iHPoT%zMr@*ac`<)qf9Bx4;N zbcjpTlhnR=57OQy;{N(8DcveajZjjN5jiO?%hv?)eX$&b){*er9gH1Eqh6zO;E_C* zXWP%2( zEc)RCyV{Y@!t)s|QH)wcp7deZ6BOE=1lS29b8crc0G!;DS|KggMXyNP0CF7U+ur6! zIHBg={YK@0{mm@Vg}5I^t?lSU*?H%vC-_#%Ycm)H^aytQd=M)U7GEv<@T6pwuycBD{Ed$2);`S|IFd zjoAC0T?u5QE~nFsQBTL+z}#XMb)4KlL-@!SlMig&fAq@|5xiQLg^4D4s8`(5xFKQ4 zl6%_=Z*`Ab8zU}{^Q^55iu1}Ovn&bWVhNzXTvE#-Y#*Rfl^FQlzx|?BO6Vg~A45zn zf|ltj6Lo}rAu8n@dNWX1`d5s2YdweQO+fT;n^vYslvE_XHIb;&a#nZ$=-imSS5Fj` zARoVbpeL#uD3q-WGk4iA6o+zxhInE`)<{+F?&Jlidkm3x&9N_1nssB1=((gbmV$WB zu!^08Ez_KxZn!+O%CnR*JoBu;h7V@+oGv{Dtw8?RNxyE9p28oov?edL@^A=UxeO7nr%i{W=a+u&9@b@%BfI}4VFjd}lsEb+EoH~dn#_GD!i zXe$!Z9mL=dOG8C0?Zjko)yMhD<7s`ilT1wv{iT?)ZD1l#4)^PYK6z_Hnc}lPufKcN z?KH}#xR90NujG9wCv@lwN}C8mo4*M5O3B}7s#m!6t4o-lpQ%)FC#QYFF(@$zvZdr! zNq;I7O|tf?-Q`*kqMxut$u<(a zx-RydAg!4%FSJ_Nt~E&A(>yV)Tf_YN)t|^6hCDraStQ8i`cMy6>!TBUc}{A4{bp5E zsD4mCygG*qcRIvMPr^%t%oaC$LtHQxsN&G)SLeJ*94G67u}ZG6hkLf0Xmq=KpUwb= z;WnYAn}2k|nFjDeP+ad9};=|rR*LfW;T3V!@ptmCc#Vcl1Iupg=lj0v<^HWI6kLg6X= zVsh_vS?gQ~H_3?hdV{2|qsVQyf_3FLwA>eJ?*LF$8bKlOJl)NTu`S)gU`d$+6@bzK z*%rv(m?_-4=4H`i5;rMKiq!ym=09-cqaWQY5fu;&rU<7H73{TFm_Lg1=HCcHR&Wb! zqMQ2HA|=`7lIggWkys5jqnBrBm&|zmF3Vyy{;eTN-$U)ZjP`wd z7@%%#92drKi!s^^SpaSq6frU*jmb1+aBw=sTZnOaWG9fi2?t0lFH52V>{nO7I>nV| zuv{}dix6*{qHc>HGIh!YMe&2Zb*(QEa;D8_3ZaRQaB_wae4_G99&fSt0`xFu2*Ayv zg=w|f<_#R%cJ!=v^qeyS$@@Y*cC!Nk{A;y#S*PIRR0sMi8K<+TOAcdz-bQqN=JJhW zKYYr{zX;PzMpDQ|YL03$#Ap=rZMLFDA4hLjr*Jh*)ypY+IV%aa1PL=eWg}ZxiWaWp zbmQ5Q()2Okw=HhI_gHjidXt!@>?cgP6j|ccjdwc)J5o9$*GQGc?m+m#y<1jdeNYI=ay5h#&t(ZRB?=ArlZ4DcNoS8B3$gTymhfJP_ithOZR<*Tc zPNTNi0NZ_)D3n&bQHg$Y0bYs*lE)9?+3jZM8A-!L59{IAlCber9UQ$C&7t}tR#pO- zbZS{>FK>%A({)}Dz(jST*VuPAX79$5zE;o5?y`6rvjj8bS zY2mga>=Kv@4r|I!AEoe{1UMxELAnrCfLCoi&e#tT#MFZ_M7AgJ0Ch(PWy-=3=kHK$ z$SK37(@}^U=x=&CEa>cr0aI*`h>!#YE z&4wp4jaX!pV<~L%KAe2E{|sZDhvyS%ME%WlSoqHY1mobn9ZXvp7Iq#@SLq9x7GCc3 zQk6U#CU1%ubZB3~Z4~!M2HiMoZVtUg(ey$XM+ih=-c;R$`{>kX&ahlW2Q4nbVC|(? zwXOCXm+97U|MqlJ-Y#SCCF}3+UsKf>B)7J8s12q5`|tnxpH}bJmm^fK;pOlD>F1n9 znavw14n_~Y?sLTlf?6qLiTb!l!Y^StKZs0t?^<@TM^=;ch zI((v1XcJ@m#yM~b7QcJP14VCk{%!p~PtLyoF{k?TfBff~HT+)!V6Q0Hg?!Rfj-#PO z?oZV)->;r{*urAJ)Zwq@=rpN-v zqYvmEgT13_8cr<=+l68__n2(S$6|5YJ8u2botxnh@bYMTdvFh&v^r>hYfmU~d+RLs zxkUJzG27OmtcWAOlv&+ta(dM@Q5eq7B+{|3(8%8%JZ9vv;<6ZyTH_S0w{Q_c~%X1U`fpx&;OLna!)j z9D?geQMYY=adN3Ls2y+?@l&A-n@dj&Xm+-tlO>L8?>?G!b^RQUu*A(Cik2u8Co$3L zW;>2%&6)guHT)_kcp1UBv%L8g%*1_PdbEa(T8uNhsXPN_Z=)M z#NjB4m)>I>3;Kzi7DN@rM8v~L^IK@-4ZToBdO8%>xRKtR6Zt86C7c7oA9LQdr{mRqk%H5;c-_ za@>$PJ3@e30~kC&_xOx0L&C@n3fwoJ6}uqHfO`~Xx-kYAj1nYTtn3E3D8%F!?ymqK}91>nm2Sn zwDGuu{&scnu;Xy)FwMc6q=r}&acg^bv{`?K;zR6HCw1`pHKq8GMw@D^zzh!l$<+xl z4t7ci_ro|eq_dT74E#;gCWNUiuc6gCZ*XzX}zVpYO z#m!R(?vH-7bG$s@@r31YdVwB5ogS*A6v17GL^LC&warpd{8?sQpNyF8#qe2472bA?yJEwrQk-Z97Q zs!QUUNHej5uZ~qb@RiAFEmr+2;_C>`O=UGnvmoml<=P)*b8#KG*Zw5IbhvR}SoL^k zm%E$(s|~r=U%rNfCg%78??gockwW#KJ=2AaD)^&c#|sO*26{pa2D%6$d>xJc-5)$M z2Bf|&uAAYm;17gPnG9+o>u3jeHvU0(bMIyOY*NJXdDS z=r`4MK)7k-DWo71crd0)RmI);2G!GfG(42BF(pJ!fRx7`@4^8;iHw^Q$08UdDM+SCwvP7Y*Ly?g?J(KCD!SUd`= zBYj*JTfm(39?j{fy<$DEEFHjgKtxRk;L(W$hxD2p0KSAs+kPi1gx$e;=hGn%X0I9Y zT_$9)BRQFK(%Gbez$}jgqL^xYY{QyP?r~Wn+jsk*O+yoWzzBIuq+CUyNh!-7K%>LU zIK5HdHz&M}gj~(F>N<(#%jw+x)Qw|2%*0C^Eu#q}XwS_Osp>i0wvD9JU;Z7d0#XZ8 zHy}7M7xEduVuTM_*3~E2gs2`O`-)ZlphH_dnF7c1%|{bO4*O*SY)xSDF#(_Zs}72U z3j;tkOIYc~N%$goEhT9OAjoAyVwNAj%a z!ZLAKvW)1pp*LJc(mNUahLLM#HBlNH*IhmApQ8(|th z3-f}`cv5M&^K|**`D3Fy2m$Wr16iB~s=-f4-hhs67-R}(L_al@sht!Bhw7N!dvrnA zFN0d}6k9AY6<^S`Om#Y;6@WiA=b~NogsRb^NMl+@Qjh?wAQZZF0)E>NW?a#VcN+#lt9h ziIh4#Q?BouJWf?Oo|KTojr%>yIxRxJDea3ezB-|v3jckp5I^&(EUCs}H-$;A-@x}Z z;;<3;69I0BZBF6UI5^zF&Uo9wD?&%(sE$pDz{m!ulB!{YcIgCsB`dJfkj&5&+-7D8Sr3Syetetg(x~CkO|Z>|BrcUi8+H{&fB3BJ7?LpoDnNDTDF$VK8!gcBte^u~{f{Em@)d#y`P)ktB~o4rfV5n&o7zTXz9hz^U>> z{C8pbG|t#!Mcsku@*2kE&2`<~>uXr<*%K#1#UR2j>5|P(7i%S%&Q}|DjPV^i`D!DT zSm!1!NgjQT!V!};q;}_rGPc+HvpXDkfE0Hgj&l~YCyfOA>oK@qBGt?!v z1+;3cTaY=WwB9o129Q#=gf^W>max~0%K)X>U84$5Xgql*<>u>+kTX3jB6zdg*uEn} z_ZX1)$a-gqYNeh~1+}UEo4%60A<<^O6?O)=Pg`OH0LXx1#w!-^qyxpwAe~PqgH7f} z?5D^h#?+6N&RoB8vs`x)m$)8{^~rwJYg(>bpPrw+ zrZp<1d}3bKr9Z3 z@!jt({$cUt36X^IvZQ;g$c0RkQQJp&C80G^ zOrUh>Y}huw#dz4oFKI3NJZ2MSm~ojLbfpBw{TVOqsrm7xDG5zg2KKOQ6;*LzO5$s? z@xqp+d_zVR7UAX^=fbX|uM8ytuewlz|64D=1^YjHeXU9lX$Tq#BvKy`B19ekD^&aR znu!_ymZ5j|hJ!=dhqQ9M!Tz_bH+?b;&}A6WM0PZ5G-)z#p_y5bR;~@p&)|1{P7(0i zYe5uW(bqq_CO0lf#_t5HB>QImNn2cMJFGEY3*WlY&4k111MyR;wje2kN#o6)k5J0$ z90&^Sjk|;^PV!e0yx(tJ%vI^Ch6b!YJnEBz)v(7tCMaOX@P0M>55L*m(5=? z>VI7XJ5Mb#QA+sD*p*IB%qF&){72e+IT+B>E>bhcIlhY&a{+TRUE*7(FOp&JyCm`n zMk}-0rY?E&eEa3ejWqNVPpw}KWq}lGsk|H+z3E2APj=369?I|3(v-H(w3(`7Sep9r zhyVmHPzu@3Lzk$mu@T6QP*l6U#iQxW=C`BP+6mq=RZfk3(_!)ET6LUA>4CN%G=qpU z^sSdKOcy#ES*Ft+?Tcaf-db2T^iRU!6YI2t9m06gp@7EYd@%(8%dtl9yOR9chXE#1 zc+8)d8qnQ`El1$r9Yy0?jpKan)=ECMlV+)Q_T6*1AKqQI0`sP{cjpbHx(H<~Y2Dk2 z`5^69;AR63ZUp_Fy%5iaec#bTnc` z9`LzbQ6$FV&_X&hPeBAAny+Q(l>R0#g-HRj^bSP7wR94V~)mh zdSmtCg)h;E@Xfb+iaTFVfw(lWjVizmrhlDs*DxKOSgk8T%7s4<66tSf$jJb*SAkt@ z4zLhc|0~=<_{-0R^W)>y(&RCSl4knNum7nujE#?yPL;6zv;5`J@KRD> zgpE+U%w2?rhl`_g^4%e;ir>CG!u=<;@!<8FgB?;Q7Kf+%pMQ3_e|+)%k4@94gp#bg zsI#=UNSp#1?Kf&1;hk==(@v-vm{P;@j}Q;fWZ9Pe+Uhr0$(3T%oKMpz3h<~nQot4i z(4)-96Vt|e6y{PNB#CcK&kW%fZd2+}3iEI#<-|98a1+5e^u&^0!ZJEfoIw?s=%NDf z@>oin+E{FW{5W#R0}zRNa5E_ls|)HQ0Ay-&n42lDS7XQPMY8e*#NhMgySIci4BK=R zb{#6eNKCz#oP@>&=XhPscm$OiLvBA^1@s%%O^hzSW zG?v|GkHQsKT(ov7SS=CLwK{XA=smz!vY_K=km8U_PC&R~t^0)amGwehQe))j@d5>k04JkK9xvqAeIX+isqngSTvDmE_)|P; zOUxsj^0}~~F#^y=l_}LRQTJ=m6yq|of=WuvbFImN8IY8A)epdf9dA1E?3o+f^<_XFJYsr)iqV$Uhgcj zIkJH<30Tz^=l&Q%tG%Q`Om6nx;jx5}nd|fz<8)fI(mOPy1p8?R`y zJMXjg9%mSqvKmmwJ>%1LE<>yAkGe+r7*&I`qqLi7*pFqd_fWb9vq68_dV;Qy9ve}WPx+fYlA5eQDJd~wx7T4I_BI#8*n1W zi21>_l!zyx__LKOm>HnJGhT}cHf_|Br!_((&gagZgN z3aAO#pXEJ|PlVD4m*pmf@0XcOjN;z63L7^0-OUYCjd7-gt;mv$7TgRIOAtueveSiDr0*dah z!uo~fx*+e{-sPR^-Cpx}v1L)$XX11USoXiuxw(*@i#QE8ZPU4~VktrrnE}!=EQo8%2x6BoR1a3n z&Yog0I74faU>urV7dDx`ysMq|gZ0E4c4rrR@I}t2+(cTd zz|wFB22M$g5pe9C1^q@42&x^wdbFK2GsGWX!Zczv#jBtU0ZY6e6B z=sgzeZirlWUZNUD%P@kT1NRRy%nXpt)Gxi_eHRBpSk#OLN~+XiM^9AbFm@t1J`H>3 zw=+8V?h&4bvF>Vg86EqN??;4sh-v;IS2~(bd<1@4ullqB=NFRa*p(AS z=gmlS#hGn6(ajFNvu2OPp$AUAO;+C^J`cuXmC^ZjKz=3qaMo^;w20fcKi2 zKo>@z|07N2!{}mcTtmMhA22MQ7VA*y1BPu}Ch96@H8~2wb*_55CErD^{hL zU|+t?7ZQ)KsqUyz{P}IHm?PxuX>+p^eV}@cynVpQzX47rj(1IhxeK%e&J7 z8_1N9*3`%u@gA--Hp>b7fZrdOz4X-(o+BzFQMe@MzZJ8W{_0`LA8?d#Rl&d0DLx3K zx4+|_#0;_WYY|9uybsL(1M`1R=0CD#$xJm>wlPoR1J_e~;A%NuVfg8w+G17KQ!1_~ znbl7=a(5OP2`4Tie4WiE>b87U70}ZR3Zd5jVIkGB(#%T7nWvfgI$ihJf;Q79c}EHx znq~T&v)RUb@8~_pN#@Yo`fO|~xoB_*2S-1+AS2Ikb5ELknq&iy*>7P+#ATewj@oDS z8eGi}jiX5gF1zOEhg1(nOp7w{fUC34NAOS=INy2mm6N#X)E4*a=iZW0d}|s(o`1L7 z4#BWCx_5p05T1OpN$>K^D}r2Zj(^`nvc1=1dAxB()+t_Fh2_lT6Q|Rs4+Z`t;w|3c z5e&0}yKu3<;a8b23P4mf;L0*AKxJ?Xd$?8chv>F+5p0rj^dyOgV%s|AA{zRGF;{9? z3~?)P$nIH4N6Ov#-4UJVVJbhQW^tu?H=#A6zXfb5{ zAg5kM%e>aAlPFvncxnkUl;)xURJ%5Q+6vWl=-Sf84w4$JLB4sY`n1S<1+Cg>6i*&% z*uH#v`bo|1A9Xor8{!Y*ovkAwBH|g2TM~zyAcK0N2oUpAx4@++lFf#*VD`|RWFz0; z|6RpFY+}8H$U1O>7DBc2+PSGm?q@2J$Y?~fH>BAn>z4>b1<5qn1ZVGXV8y$ zk!+x+FA}1E_#;V+3qRzlVquVpP5K>Gj1y_mt5HKw$L)aV1g&q$<8nL!%^lWNu(vro zpRPxM>XV{tS)Ia#h$ui1;sNFP9__x&_T%nYfHp_V6R(#5O9kXS#9GH~K3IC{9n#^} z#raOVld!Ngz;3KRL51z+-xZ&3*M%JzNQ^}|)e_6m{mG!e9Hhkncf)0?1i44bG39*g zt_hD&frnI4#{zZ6XXfS+EwJRXlM8R;5i~V zPE}o%Dc;CUz#aVgZ(nBQK)odHC8oZsT5^_ev@wA_=nJNfCOBQ%^ISDUHfqato@3*( z4Hm)VHbJi3uU21<`u(AiV-*zU2YsLY*O0X(BLUNO*4H|p*$q3hmW*VfHeO7+15!k0 zdbHMvJ6R-+;jKv|Fl$~de1Z(AAQtdK;Ow~>Kb0IOsY4O~zv+sWyk}I)!t%`Px`t|r zR2?I}fOm=gi2@fl0L7-vJ8!V?EQb%6scf{d>F$sx2WzN#4uI5o0VB?r3|C z;^4Erw{Cmzy1!X^F>glCu#G|GTUNe}b=wdGm1+ZkqVGD><>u*daJnL-&JvM152Hx^ zY1rxcBN{y7OX7{L9;ZTlhtdy@Mtc}FcEQ;ujb~FRe{_2O&XQH)k->_riPV_E#UV&z z$q&w@;F62$A1)LO&ud(QrVvPcRVMBq?yCsyD4cD8k|(L7b)kYxhSN&=#Jjt+Kbf`T zGZ{FT4WGF~`$H zteMsi35(r*zO(!BqbkeMbtJ(`IjqvFU;Ce)*@F{+Te8zck|TSSC6hExF+Q^WG; zF;b@v5it!r0Bj_4gP;QxhJAxn+(ZgC>F1@I>sTz%CDeC8HdLN~0ZM@p9>i>wv$?to zWAu>NxdBej%I9-^>?9(k0fciY z104UVcUXr|3t!1c!;kTjHiWfvArJImSzL-+w zhp_;Xq3dz++r#m?;aMEXR*%qW55&G%LS-0SI1h&*Lh^GM?x&DJW7GPCq)#FOM_-UP z#?xX57uaMTEu>ki@Bx*n)Yz3e)pvBt2_9771fH8hdL9l`vM#&!xW(+^a zf~N7%()mdJRCI$=57T%;n`FOl?52y}w7Jsc{7gt3)5ASa66V($@gcPsOFW*JkfJ%#p-~xB?5u4T0s}9dk zElSra8aV|d6VT&Acb^S9Cn+QnM3cC2X&hiq=|hK2MV+U`^}rQKSAE^Y2FimcVX2X5 zqdYp0NW;=J-gp}Pk{o`vJhTZl2of6?41MM5WQDQI%(ODe#j$5ZF4#Mlx@DZKXpN!s zd(2T+`mY-%tukyI+W(bTQ&yH5@BW*1Xl>BBj;01RNY1&7ly&Hpa1NJoa&~Ht zLtdXrkk(6#CR+}Th;X5d8G>V{ythr-Fl&I0igtn%2V{Rh`4Ve?{EOfI_Se7t={kdB z?ahPb6W*(7KbdfemEIob>|$T-GLfSsAxY@$54?@RY|Xnoa*%Pt4J@@-L@>kTA&aJ@ zY8PW^tG}+(>o+5`%!oJ>9JDEV1P6HFzI5}3GhRHxQ25rhdgQCiosCS4*rM3kLTRoX zrn1wWX?7h=^Mm!}aj=@Kk#EZ2;=fEgtAT-QW)uXg&6sCV2z!WIBw&VQc3&$MG@0V4 zFGuM>Nf9Z~v&mRVhQs#YDpso~hh4!5jd7$BR=Wx>VAZ-8C7wY{$1JsHI6<1m4P^P! zF(<;4UuP*{`!+SU(fu-|8=L9a$a?ni3zneMW60_#NcNn{z*1DB)?+i8BG=QygVV!n zQqt`-R;%+vJ+7?sQbmDvp_AGgA?tc8ALi=h>e^-CY}I5S!>4Ynj-}Zs#!UF0I!qDX zTC_DYymisUa97nJ*uEwAR~PVs=|&kRZUaTut>KIlEzZt8QGQ{9nBb|-;XW{Z($2^P zw8L}A0sHGrk+voi*60}$t{WKOvPik}196)BCJl$Nu?9%gi1XZET|=VKm{10I$twOO z#ed7dJ$l6$JJHJpWglt$o3rY=i2KMgdUbk#zsa)&G~cf5`wGm6xXAHyGoN6^{&qSk|aAw=2*0&{UR%b&g*Bjf2bp-}ISRL3V zFokQlnY%`dO7bt__0>ip5x@O0JIl!!B1(8-E;ukLktMo4lB@e_$*ytXlDTLFKG2n@ z97lnVGt>@$^4kkpPIw&x`fBI*|N7sa{16MNsksg6P{n37-Q9yCx$1*!;EMyS-BFpT z5qGZ1te9)e$w6XhAv!NAZDj|&3V@ev$@BKyr*%V(uCvO)F_9S$Q5~{NHt?E3U{v4+O^UqHp~ zZ^=cly{_9|&5UgDyPqL;AuU~BxO@n%<5NS-(@)&l3n$EZ8v6{h25avJ6Zz#5M2=tqnMwB&KbFWqg{=D=g@@( zHAlxT@0cz`oT9R3*@95XspKcrL~pm!H6|4rcxm&-W;z*tZ6f1(@#6&^&n6HS7Ol0m zSO7*k`)4mTJ>R)`Cd4)u&H43;!z(r5&b3_aytPDsD=Q%%VXn4YNCGaAXKl!qD|+Vt z>{s;cW)A}D60mHd?R?pG)r0x*Pkw3%$D&9kLs(sn0#iQ#NXZy_ug!E9u5gOYBfO6y zft^gr?Sv5(*%Q*K{!>3dD)z?e2ERcY%QO`zZr8S1#+X>ZayV_QY25m+IX4a zZhlP^0+74kUyuGJy6zw?XEyZ_`(LN0`yT5>6v6pyN08Nqa!K&-iPd9bG$w|Me6VR zw>dMA0-Wh|mxANhx)K*Qp%HRQ_QgQ_+P=@GAW|a3MYk<9j0$Nun{Ae;+85dTCdy!d z^Lb1#=2FR5;tZwRIk&S6^5$ZCI=G`01zRRe)O0JA?WbXVKq<4On`M8DMRNVi(s}yN#(C>=N%fg)vp&k%lG`Pyfr(yW4s5 zv>Wi|d%2nQdbM8Pe7qKMKim2Bt3SO=a(wgsk!pDhIaAC>(G#=ERnUU-Q!zFBom(Pv zOHjsw?B5=uh#xIDt;{9~6gMN3pp**c+J6t-fW2s6GnKW;mzlVNJC{i^H}y4d0_y#r zO;h}oMOoxPHt7P~;exPFS%Wz!hjkb|WH^(>PoWm=iVV5t&O=L2lhT?mr75I!e5UVN z#AS2m!-y}?cifJk6__10f9#4r6$c}5`4VYL_Xt7&e9bQ0=$yq$+f;Vd#P|D6Rp?$v zMfylT!zH-vUG3tNiSAq?$P9 z9RTaqeF$$++|$A8O=X7I@)hk;o~C#wCnTaMAST(t!i_|>TrAk#8!lAMSNMXt6xW#N zoDhM&l#K~@zS*+32})t?qiH#*Xd1gkqD!;W5OzO9vBhIMV}gf1iNh9iR+-XGH&WW~ z`s1CSVKbUO&4UqqyRa7fC&&9#H(XtTXoe*AG#9H^2<_>~@$%|~z+~=U#DK-yI)~Hah{^oy`r^@}xUL%$e}&u9ogpX9 z`(8$m`U%7zHh$c);GVi91v|g&N9%0=GW~Ck{Ie~~_?dYFAs^e#gjmqsA6U zCw(iiw3D8Zokug{fLS{*mq&zfNpP3Kg@HO|iJA$!sRfq`u$9UnE zmL_^d6Au&_x!1E7xn@C|ILML_(`TUgTS#3kwHm+|SY%aF{4uue&xp#zQ6Q1Hyh9>% z)0rgRGFO&mljt$l3sADAx}Y3$_a1s%4GKHL7bwtD;6O4*8s74NvMM_CD@{OfzKMmOSK?{%$dj2Y_Pp?e^e7W%y7?;FQHnkzpgAi+pl%Jv?EUCZ@oipVCyy+2G!NVpA zukezE!O6%0W!^||!qack!K_`-+U0`<^66ubmM;2AH34>?zOdBAW+0fTVB`u2RGeNCT?^2O$?0j@T5Km1}5&m5}cM(k&9D??MVU`>2uGe25a6_TL*; z*T+gtv8yowMw9eis&DJjHBp#*^^_)zg+j79ue7*^ZFg*or+_Pzmm-oq3@5|>EtGQv z<9=iYNJKLmEwIzKlU%qG;To~&;u|oB^39LUejFU=+@^P$pZ=|I}^ci}~ z^35y6GJjPXye~>?7lC&bFFE3UViLw5K07(W-eW{gDI8!xnvLs3bP%IXNnF-1uuf%_ zO}lk{&Dk$jI}aZ5miOc5qjvrvx@f5^4g`Bk6IWgh?RTF)qOB2{Ec!~|(atVKC=`SC ztab5xN|m;aX`Vm+cr;Dt72jw(T~az@rVDACE0#J|$9VN(u+=(v9Jw8$!{jHBSS<%6 zj-)Xha6*!UzJH>W^E&`YW*;vhHBW~oXfG)954ffTmGsXTCOCDJ-L$2dY_JP=hM8rN^gu34;!>xCRO{UP-@{_d&BA7;pp;0BQaM+j zWV)-+5+!nxEiQ4rzwZLe{yu*ys;>J3y*uL6 z{eUF9s=F(el@PZU=CE+~*$WUQ%@haAhaY-8#%d>fZufF{;DR@j;Zo#lhU>EntWuXmSS12kr3_ZV-zFO-(b6*doFk-MQ#jGb)bm97(ZdB{k)HL3 z^y9DPCE&A46aMlykk!{b4ux3C28|0#kF-oBH%nuE^RNxf3kMeuA89Ayi4n47kb(&> zgpw-kop1>vWVyX$i@2NYR1gOoAG#u(HT`LnNk`T10 zz?X=j&5M=FLAG38mQraQy1*i5Hkn0TV}k*f4p?`#^^Ye_N&2Sajcn9y9CpL?AY=esg@8?J zH4P6}pPyVsKe6!P)e=lU zJ$+Q*PTIH5lLg*wU1l3LM_E}=jN7BNh$-`Gy#$bRfzgY}qzP@4^%usspcACHa5glL zl_Y|?!Z**eSN@kwQxjp?E4_{3tQFSAngi*IdYlr6a3sCcJ%hVXZ}x`xts|AaMJKT{ zyRej;*Bo8zwy>u?gUt2|^E?g-(wZ8&5fghlS#E-p?1VNmFxDk@p{my2R}Gl1Wv>o1 zS6k~`t4Py}7X_T<<-Ulfg6RATX7HQQjQ8nYg?JE1{04x-wrGEErN0%@OOX;n>vGW+ zn56vuw8`I+-#7c0Nz8rq9FuM6{A@!6-+SC_U$a=TPWhui1Skq=69A|Ugksxn#60{~ z9vl)(zN+UBxrt^^LxQq~{M%YoNU}MJlo=^rLd4z4)!P$Ax~mYMoGwFnq(tNujJ-ik zGY3!Pr`JWD?rC9*|6)X|96$Ok_t4x#?75%%{E{uZe`sT5PYpa>9xFE)XTGJnbhl+n z@N(Z$lSK-@e1P&FP>oLwg|7sC6v_t3XJ=03@)pea={ zA?-?ji~hXa-+rH_Rz$xkuoZ3(5;PgSW|DP2h!5dY@L)a!Lp>3(F3otEAk?Z1&-Eg7 z_Ull-8x6#RN}yME{A;K{M9=Sw3#wNdpA!}aZM>4DlIX3NdYf_`X(>C9tu=R!$WAZ)m5P%(r2w(rMb%}@Q|~KI&e=BGNw1w)I8Z?jc#en zrE!`xuhTO-n?%slqp&uP-goWql1r;DfSVa)y3j&zMKi=*;$AchjLGrIMr!R3tVoV_ zDN_?nWv#zkuX$rrbq5YAvZFq{3T%B%9h);gJ#P?r?`3pIm3<#bLiYp6CBzm8DG2B! zP>k9|xNftgaPL9OF`-askQaTl6NUXIv)34%hVsSf(cZqQrb25t$|iRJ{&9bWwVU zUKZA>^C0{l6?g#k;sN_0XkwMnoRP^ok7w*xob}zkw|DIf#UWI((LD+o#y;o#n=*aZ zW!}3+{X84Gs?lx@s>L2aY~}a!XAx5?Iy^n+aYgsYK`;5Mo4`hG z@6&zvyL&$ndEdVK_yQq6c>S7JcNGOFky21mNkPdH(5fsQ8D(=q0fv9lg1BxN<22^m zay7QjE7Dcr{?V%Dx_!l3*(kEQ8|pqiVda-k!klD|<9M05)H7dKSK!#V50}OGc^b71 zWp*!4uQ{K;?7gryYyS^59lLD0(HZKfQuPn&ot7*LQcHd8!&4K35U{lCQsz+R6-mh1 zuSs>(C&!#;{`-lNkZq~R&%ZNlQs-k_e>D~e(57#op%k$j|I{@C-% z85$v(Tr8RE%)(oO(U2|5O{9YZ^LwZaQHxodCm5BA*WtH&O(< zl?E(vmKHt;vTyw_v9DG(rpdveg%v(i0Ec*`XU)rTYbp+m zji51)4VUfLq#X7qknj!UG`cL;0kG>thK_OFW2^*($noY_BWji$!S2b$^()FMzTaH)bBJEdW*ffW;P0~$ zX7sfP#T%drqr;Hz6+yFoYBf|sd2K-iBaKfgH6W#2nWPx@Gh|E-^m#zq2H(sOgf;f2T14n}fBnY9zD?evCa-4))c zNafgB3-*deBT&`TG~{eA%I!wp_rLt9evZh=LS+v zM>H6PSoL|kIkPp^fdj(ha}ZEYtgr@ei_8v`CwRQGz$Odxu&N>;oxOBZV`R%_o!6+u z*D1^nKrX)A`N>aqmKtWMzrNI8UuqCa=4wLzt-nGyH4=*3>eDu*r_`o*tllj}Jwq?WyP*%(VkB-x2^i0o| zR^1q!Y4=)u8z|1IwfyUrlc=i4MS7fV6Z-Ga5O>h&GM28nPNF))CZUgya`^Jy-8DMZ0JMh(#>yn z%kEpSJUgvR`XJ2uGls}gqhT&k1vrmUnd=dce^Hjp6eh0Nc#ML;0Bo!zwo8pGDzVF~ zU#Uq`^W+`PABXUXS86nj9p8y+3R@k%iz^I22EXa}R&=VZ-XoihGj_>Lm7w%%5{rNN z8*Zx0Zn~kWW{uL|MrzI)o1UASMK2MPVp^LAzFdHCDWmh^94UzOjoG?=7`E=bG#(Bj zg!)bFQ>0r$KJGZHI*lA+iFFO}q;$Y;7Q_^_an9dLZZo$0l?KDR%&m~7TH9!&$AXk) zb9i||&DCx(K3l>fmZlB-;prb2gB55;yt3tR^6nx=@zFP znyg18C(wt|o5x)k?IMqtd=tA zUZDwgi?x6Hu81W+Aa#PWbZ+CjWdJ)$&(BaWPmMR%B@0-+Pah?AA7GPLN&kCJL%i=* zPGzU1;D=im7Ql#%p8GU4h0Z|OjVwXy>R|PGha+=fFhMpKSd-ZS$gV^X%6-ou2Dy2f zhfhL2eDE_0M_V0!BsIkYa!eoq>IS1`SNf@3I`s$s0@`Su?V>{hs6<`$i-*(o0=ft7=T=^mpDN%?RnAw3P zk6uFm@+%>xiHR!Z zSz*+QIkAYsVf0gttzp>$hOI*&)s$pdq-)(z$MKB)DAS5t8nmkBJhB-CUu zcv7HYl2uJDWbh63{LwJm+knu0NrQe`@XO#+RgGj_Cxk@)tZHNk#Ho>V!f}>4aPUr% zsg;j&sm$4Hic%wra8cbD$v*N7OwLr?BHwIW*+Fv1kYRGFVcS|n|FThaUJWHWw|^wAEQP$O&u{DpnPMVu9up9Zdwo?oBquRvW zpq(0%2e7rnHjDPw4u#sI)P(w#goH;6+=1XAtTU^Unx@eig8xqmRr z%r5#=tJ?dnuiZ6>-CTX!`j+ggn~y(S2rE@m6lp7d|NSonrEX3AA=nq|>I+jQdAV?v z0+-BtDn)&@)?czxuZfV@{kUl2?&@RDtpr<|)1CaL^$T>f1i?tQl|&y2|INvDZn*?doG6oqs0vY&nm+&z--7!s5~i(!M`hDh&7_Z$!BIH$VX2LLoYYm=e*tEo@=<%V(tp7;=Y*d}#Y2jb-LkR0OsFr)CK>j%p5jinqDPLoKa zICE*#hEetp+-XOWU;{x~yjXIYwFMxUHrU1MB)thEjLX2_UycI?S^XL7*A4Z`2O57a zmZxF9=Lm~$MYS|OurV`Le6z(KoQ0q30Yur!Wzv%M*QuE0Jv?v{W`8Fca?{?|L(mc@ zF_wdOC0>opU}l8+j_TX$F?svs^>*a1-fa18T1L91I>n#gzWKoilKD+1nMoZB6G4ED z%m*2L{t|fm0ZBg?XwwW`rr58`Ks%KWQ-EvM0g6&Re-y7PkZ{vNK|y4!Q`oMEYd*Yk zJG%J->Zi={>AuxZ`Nz12jY%?FBnR;z1GykM9@Y2DZdx=~#r9YF|2H4hl5ehBQn4tZ zgUIIZ&bP5PirYyBj)>4mB$&lSoXLqem24J>$fv}x`Xo1XwckL)?&Ap0DuanwLLvew zW_nC9AzUN%p~$@D@mU>(5AG{VBmD+y&xvqJC4aDX#Jy>1gp*6*Z#K4spUCrpR`O~k zgsc*L+}P68>N|RVul4*O3Vvfn!9c!W?{`}QridO@Qo)9 z@6TjN-8wj4lEFo60>#`3$Gj`k^nDo7`;Y#CSz(d--@vKpL0`$V-oJz^Aw%TroX8HG z@mV`&jr$sg#QzZa%pXYJH<#pX@UIQ4!#-rq3Pqt@l@;K_k$+>4JpIbV+a?w86tlHw zQ{yKz$!6kMY=KZ4j>YF6u;Ckv4Q6m53+0S}eaj9aXJMrj^zKZ{q*AsPGpWeff-5>4 zJ`YSn4y0LUfoP=Wn~zhT>a(mT#SIUR{&YqhC4 z57wh^lJ!XDoL5UKq$id@_bhwS(mN>Q4OjaDlc5!0=)T2!+UqV*BYwK+QDEjyoUAmK zRZvo4yAEHD;uBTa`WX7nF(RDM;B1)}8}C6eZZ5WV=6Bel!6O8~9I+{$QAQ=xS6*LJ zf{#SFu;zAe?Ie`eR+!P#JTpIrhp?3O9VMJ8I(@x-L=sWE51Shaxl<{D0rc*zuDn;q zXR2?r#N7d;`;XlS&*m)M@+riE0a}GBEr^fYOZkdb7|AMKcA+=aU=Us@yl>ybYxY&4 zzU9Dnqb5ZA)5&G%2-0rd%Ey`aJSeDEuv+^oK>AxfAkpS)w`VYn^ir(L^6|TAyXo2- zZD*Dygk7qd;poYb4<)MyD6)oQH*l`K8&waAoh_)xqsJ^}Ebn(5WO0!&wk2My3^70* zd{Eq{3ksKWaXx`RHPc#eeAyG4W!retg*6cqpv{`jXHa<$gNIkLWrsOJN#IGQ%lq5NLvLFhr5kMiM~d#%*h(8^Nv>$4S4 z=b|>ru9X-x9P$aVXR%QK;nAAU^8fjdcaKM|F5-xL(~?&#k*Xa*2WU^n zJ-=Kh49E9mCi`V)6SLcRQMru3f4{4@LbE+FNDPY-KJ0zdHwRyyyt#hUaC>}PE&c7* zVM*QNZ8&$G=~+!b<_DBIMCPIrjtBUG5RY#ss2k((CQ^>E`|r2+9c@i+GHN$YI8*|> zyWCC;{W>TeR-&HWI?DQ}^qoV zOs`LM>YiAqG{AU#goIf|T|YUdP|w>ZX}4YdBr@mptKxm$knvBbnvy9kG>9m<<(De1 zV`!-%G)Iym1u(T+YE5R|F0;g2Hek-&u{X``bS3JRXZCU;SeV4T@rGRxnsx~?g zx38qFH|bqa$(Fep`^oPOl;Zvc18b$xQe0{|`2JWpd-DuUuYf^CDSu4C-De7*;u4&m zzcZO7>IO3b6a*#uycJF znQyN=vlF-}mNf(C*~NHU)7Mmly-Jq!mvg#m=vH*i`&yd(u=uYRdf$u%obvQwWA8f` z*Q?Jm&zXV;XGa@p&e>Dv(I-6Hor%Jw1Z z`Wj5+t*1v`8pF%zg|uLgDRG<>Ge~75lH_wE-Pr>gWLa(&e`r5FSn6`>)93E9Yhf(S?gc$ z(8w!Z(Yku~@LT@y-?tt;deoyHQiiJEW%hquj(oonC8s@9Oadds_ln1C4A+aA=31fj zm*_-#v!mCZT+D(v>4&w?O6F^NL zO;6lZ!_kX(7n4P8ZtZ8V133NYmH1m$lHS@vp}Qy8_Kf^3 zvrONXzoqHyJM*`;_Z;>9LHk>qzb||yXjo&{og1IQo$wRB{pQ#Iv<=`jtjXwGYpstd z$9M@s%o--@a9PQ%JJT9c3Vx$qV%r!p>Pg+&5Mxw(|#4cS3*fVxaiK~wI>n#Z9(ydBc`z>Aw-u0eTrM}|! zA-SV z8*i`Lc5-P72KBAms^R3qo~B8qm_49v*bK1)$kgZ87ekgWN$%g(=`jN@2?%<6l7}OB zP3Qy&nJly-{`#oOhejLT*rJk*p~YA03Pj^|OOuI15)eKN$>A%BCZF*I7owc{7-reQ z7-j$5T{Z_hT-M8q!*x6HEpcdBu2wZ}A}@NF2;$=zO&#*OX8U@~$sPuBzmetfq#9_C zzaCye`(0Ddm0n+{7s~zm2rFv*jm(+fTWOy)lIbO^$o6jUGI97RSdV1>#4O#wD)3AJ z2^2W*;#B~RW#(%|*+)5S!+J>aJE;pH^&>FuGHxc4In#i2LsR+FnoPF7hYk zjmbbXiwehLXdOyz#3w;b1r=D3tS-x=CtaH<7>`a4aLhm_4vR2wo7dqAV%6Ye`L6xR zCe)>81KUT@^QYugC0#0}Ok?0xlwP!h?6z8(D2wjfCHP|3J3P$Eb`CN7_oFG*@9QDMjwIBA^mSL?t(xxO-coyPK< zack`NpiX*|0a4u3FZ@wQHJRyke94Qm?stMwsZrM#i-~IP?NDU(nzi#0QH6gnFzZwi zdLFL1S`H&sJt5_81?UTXj(GrgZI@jbNYd!1L!50h)B-JIYZeyK8>;{tPu^1mPmeY4 z`dj3BUS{N8HK@%!?HlaPq=#nBz%_A5fk9pj#Vog<;vLm*q0A;5B3E6J=*1&#_B_`3 zGVlzOy~JrysJGW_ie?$;251(`_w5N5wxW(nhlBuj;#N$5bgDs|>J}C4%05wIFLsusl(x;P1>x>HFr zr6(W^wlv+`Za}nc%Su(HQgo#%)JZxWI6)M_5%mf=$7E8pQFvn3ps~%$Hwn8FW<9Q?t;qI^ST?B+;*vI4aAsr8n?RJWQ_|W+R#ep zaho7z&=XVLc2J2k-*)%LjVjg2ur+2fd2KQb=QTE>%_=Y`L zqk@E|V&`&o3m(d^^M~AFsIviIzpHGdh<>pQ;|2{^P@V}UNe_yZ>&3Cm{b&ccBFmN0 zc?MZ4o!>CMJgaD=&#R^T(Eny2wn4_v4{;ylX*Dj#@A!U5JF$6ikL!4-$cvXJ6rKCL z4X>6J%ge-)0X?=P&WA?*vFUS>WX_XGgM9o^YzT3?)4{kClBk-QVz)&a&Q8`hKaD#+ z0= zMcgl!g@r7vG}vsFV~N;VI}xWzbqt48 zb45+*G8ZAYx+vDu-42Q&_7GA@|oAg`OW2jd(9q)V(fy6WrQZL3VW& z+_T}DR<>D|R8sffr)va~mJ`R?iw;XM8&!oq^^IA}bik#)tlrC)<2vxPhHw|TMiLJ|4+c1X zpQ4oJ(FPlv*DLg`3}%4n4i?vDo5j6XP60K@caB<;=Gf4zmVvQz1+6IrlOPZ?^j&jG zXB>&isG!inD^@cY>lUBNn-~B|K0vYkcu9`+5sKeg1b1lpwX{TR zmSe_Bfp)iM!X-!!VvTk}2&QbHAJ8NA%g9^7t@sY{+S2SYS}REU15vpJUEi?os&FN! zHQ3(154C0 zKAH>1k}b@)L_)spsGp)Hreq@9u&6%xV;**fkM)bfrhzhix1&4P1Jx}6YH`qv?+ApO zx#(*B#A)QZna$h=GYn%os5!5S@v-tdrrGLSO^7*29$oHc5&yDsBrv#ITF za{;WAxH_jYtT_}?9GVWWZvO#=p&L!M3YKGbo?7e&O!3+Zp2-z}_q>fR+b2Dn%*x@bT#~X|AWnJXwUP`3g-=(73CqN-vRy7YYNF&Bq6B4T@HRw7R#*xdK`C_N zB2Hd9Eb;4t|9bu6DLFPUGhL~?vBZ%&?3Wa25E-hQ$csd4IH_uPp{P{k01!q(MIkxh zL1VtrM{Wztc*2i%*##y}PqJwX=7c3Ef_eqjX*gK<{OPLiDX z5PaKHcJOp%Z)ForEGU%8QrXbH_|J!a1eLjP+d;%K2>Zl2)oA+07=ufT(lZO2Lf-2F zMO#~JIu>wIeg?pRV8g9W$uf6fWweEI5aEtPLP84+8rW6W|mn%g{!JY(#=G4$IGUfZkm?E)3LbvI2u1l|#t(>6hXxbo;awq=r$ zivH*=Za{RQQMj0*d8Qx%!!hw$OO7QCSTQ#K@Qlw>vlqbAJwDwhL4IQ9HrM>3Z%ir3 zAoz3Me}0{-m(z)A##mS?$;oa6ThOUkum#pAjmC*MsG1s{fW|JG$4ROm&c7*AjPt#!iVl9(r@4N+r zWmB~g*K;sTQOpsZjr9J_`{+U zl^W#=TXtM&Z}kZ3tJ$Fm@hGrX`BG^7Jl8 zkjPK1$Z5J{WRQL*xknh%Wrs*m&*RSa|*m!0BmRw#J z-%oBnz$zAApnp?Z5$!Cu)PY%Val?L8@uRf}K3gPuFZCOgA`79H*hyDV27sDr!>H$=$GNOS%tF!0gm} zQTM{!2i*HtF;_kvO3}ml4NHOC3`{h5MtFXAhAz6p@+SGafV{Gt!N>!-8hvKb^LfyI z8!zZ{OiLPIIN)8;U*|rt<;6=!&EF)oczy#yroBd{0h#!9f*Wh=HzG#}Ky(hdmE02L z0V>I3$*_a0Dp;+fg7xPzIbUIYk8n>o5G zIGo8jYz#k)Z5m=CN+;U1wghp=Nf?{VTRZ>lq%wR}1arJ6A_G!^{_&pK`NQZ)?;TUi z7_mhAS#I5Q;O4TEE2kF~Q&N-)oVIhP87}B8U9`vjEaxt)Nkt}os`2UZOU3nUo+>&( z0s{&%%+2JR#|zSOtQ0$UikKvMtjrlZ*RPDqNY*Z>AI+Cb9X+mf>)&u(-kSa6=y@v` zsH*k{Tg@#U%Lw!TH#62DECS4b=KPC=yxJ+-;(YbqwT7IpJ%p*@@X`O9#`^zO*_7T_ zhy$(^B8FDk7#gTV(0fCw^7b!GYvDmXW?*T3^W;f{l3ZeCrQ4f%(X*cqQA#W+m~;z9 zCkH`ueh56qtpm)fFjT}Cee_ZXU6X~RDUxPv7Tl6pv)La0wnI&s2wXL%%bY_@p>eNh za-FGId>F89b0FiPJQS%%qrXS(cT%IzNoYG14fSRY%Fi~3{F19PkL`&gV`G|0aiG~* z2%Pc9^>mxWKEoZI!SZ?%Z&cF@z-O1sor3~5PQJ|ZaYzLcYwd24#qrN9XTyoTMDu52&OWg=JjmoiSPCMmQf&RbWD!*^!|_a-7Cu;g@vScawC)mrfbOfeaIR_4G;q>Vg3(9gU%W}b;)ISZgPJCiHD5El;y&EBm^Eb~bp=6fp> z%}iHuq&P*Jfcf+n+&;L*g+zIGO6<6_uIjn1Yz&lo6|n5dFE8!OoM1*GB{m_P7S6!P zi0#E1k1w=yr<{5etD8u4Cpnf}o}JVy8cF_cZcBmuV#m1xu?vPa3nci)&KZWSuy? za%_nQE}K*Nh{2UDrsboqigk}jRQ8d&i* zwJuV4miCGcai^pOyH_DvYP{4KYY2~?8F+K@C}YSw8%W4RMZr(f5u~VT1OLUDyDzMr zBBem+NVI-~bCRMf1)y1ru1LV^Nr|<~AVAJK|2^E`i9$JUn&2`h>1ZPzlx!jA{GxbM z7J^J-w$hM`cRxq)ne5FB#sOz^SnQ+lnC6uWChX7V0bzr)NIR2GK8|*r7t>>Iq7O?$ zE)6yE6^ucVUc9d?;y_pKDT()TpEpk+v+&+uB;3=~DSc zs!Mi=-0IHl*id3AP}D6(%XzeYZ#><0u>5n zql$S7Nrf!MXhLJ%r8B*i1u&iIidPmmLt7WSfrGU%gl%e273;~hwG!x!tpxdj0L(q8 z5fUdSk%eR|(@hz^hF}0SWxEEBPvCrj zHgHS>7Y3Ojn6!qw;&H$^!qk++7EZ#r(zYC#ve>b+<}s#Z>>8|Y&&^;64TC@!T?|qw zGQ!Sh@w)8o%q+o$`H*nD3c9Ecb^u}N`pznGiYw$3XD@nLLqL-fjFe7F4+ATTozwh; zD)`-dUABsJF?fQoPO%?OrDOH1&0(%Ii-v2vhl zo`v;pG9zh6IS-16&OchP&T}7k*Ie)1MFi>mewYOwS<3ka`k%cR^O~d2gUJLX1Z$XO zayBZdk~n&%Zg5V6y`9VAxlobFY1D%MT3oiiZ+pg_=g+NUZj7JQ(U=xEhf-sRi@UIC z4!C4t@I($`H@h2aC%Z5k^0`Hdr*rWxn{uc~a&-W%nPou*AR7>~Euu#%0Ep5#825^6 z%{;zHN@tYE6_XM5fqylA@}dRYD`+(rWB7g?7%{Yn_nz}*7EentuqPbN(ff{l)-QJtS{`*WzV>&O?S|be|x=i4+!| z634h;cdjoa#eXWnLFE?^x`A`IN__@N0Y5;2Ma?D2@Io?sj>{4A>Z*`>j2YwOVtI}t zXLKTfk-7|LVQFi#IYq(+sR0Ulj*VddQesZL@1uY2X272j?S_OWT7;!ZtEJeIr1KX1 z0GWGj=$IPHt=ur6+)YwsG_|#2LLs9$XE|E>ClYp2bT=dNyJs+UA`c$L<4IOskjp^Q zX%0t>uOE$;GtE2$I2^}_$ z4&gw#LM>m6&Ku3EMpMb7+A=9bvtJP>r|`@sf7ppLH%q1H)rIVW*1~Zn#UL$F5;=rC z*GvnKez^w6ng-#l6jw*ZHAHiaG@K2Gya{PE|FjMFMJtKpHV||fT5iQye>Tsrp7w!- z8*!L2H&G`Ghsl?C8Pv?@fp)}6>m*~Hu|&$^H-&anS)Flh$BnLV%E%0jv)p8vJ0lmk zuF0^}2>W;sW}`Pj%24>pO+a0urbj{bG#M-68V`;=)kwCPiWTuMmi9)H*SLpYI4+ue z(3QVm_eS7#z(}#axwnjVv6q+rYx0;Qj0U0rK<5@%+eWlKkqffxG>(H%2=@`8r@ov8Oo zm;0Gb+#EGQ-%mI6{6#3=KoO5*ugq zK?o>Es0q93cmz;ZWC_IXW*7ofEnz$3-bRHSvy=Sl$Wdp=6eStp!qU*y3~@|UJd$mK zf1ly-5TF#}AR|W#4!bnZYO|Ud zU`1_s?<+mx`w~f3qVDkGqEHN3)QAL!9#Zd#H9;teEYTF_1oXPC80k`EIQ3O9aS1)Y z7Rj;Z;z=n8J&UgV17lF&lg)MX{1B7-Do>?`#00nlYaxoUw!FfYh0$}v zF*@}HEHZaU87MXzk!##0ETfn&{GI`mEnVEjT_}QGWx>j15>c+{&aVvgM>e`LfdHJw zR-svudOtWTw{MoVjm%2{%GH^Sn#?`2S2N2j)M(EoL|YC%CziH~kqk2zgnlLIhcw67 zQvq>ZlG5ZY5s`*LbXm=oHJx5D7lwVMV;P1rVX@J-8ls7YnahteZ-$!^W8Y_M^8t^c z=%wv|OCHx36t7=`3`OZn2x5s=Jz^6t8*^ zYl{DE);7*UF#QCWrW_F)?`~3*c^*$#8ZTlxS@Xuo`bu16xMN#m)KH=&CW!3+!WTSy z?kSD!N^IyXuCGh>2Kzb`@f6U%Jae#SM3QRLxia`~P>fxElR)CZv;fCYd!Cdgj^5fx zhn!~vH294XGu@Os8hn#=azSr3TUj|aE{Y3;!(-}-0u<{9-=Q?8Gw=c+E%azxP2B3> z4i%E{ARakP!)Pdk-z@lO=EY z_;GiBSukG|=5+kHT}#sC6oqK8$4;t1{v)|A!%ui*W6n}HI%OO;SaB+On*Ryzw^!JN;$WniB*8%L$!9R$aBxlZK7ZeTvt=!7yM~Cdp9e3Y7(48STy1V1VD!4j-M zm~i(=ZK7__h}CBj%0){VGsV+Q_Q-vPgpw>QoFMxOqXo}fcGicckpPJQhd;Q-5vK*b zA;ck6J7FPTI~@7ZvL>5FARA4_^+*qE@2lr%f9^0+#?&Fu;odR!L+!x~!aapiX#(t}=Zcki*VSCCbSV%BB*lQC@7}+;w*2_+%t7-)oIN5h1guap3qYEX zxe*L=iUQ*bv%PB(3)#}l37NL8lLQItkaZu_^^@aNEs1ZKlzc493pte>mwCAkEWeR* zReJekE>He+k{?roWzoA54BL0DLQ;-%Y*N~=P?z(DoiBjmQG3UaTB(dQ`orwDiA+YhI5*TrdE^89vgY*P0J%qE13wYVt zFUwgu%d&tHEN$1_R$56K?VZNrjJzY-?OqE^Oz0xp5Nx)Y`Ln|ycy_nAd@2Ie-A~SY zVyT$^fHuf>T^Sl^5KHqGO6z1`lWqBi63Ct)F!yV8pmZDEU2z9vnn>~qJ7?Pby2~pZ zy`dgi%WF(8?D6m|nCWGT|J%d7JvCm)z;(rYu)0EG1&r$+aP3EQER^7EP|n11Dla4= z^X?Aywu7#`KXFtZL>on&AvEQ-+}-U;o>ePpVxo6&QwrR9uxt6`AWHokjd2&hC;hKk z_1%>L2HxX`4N~MhgVF)JJUBg_;jUSM1@`PoD90iQ!HEN!Ch)tcu|km%0KB$2Y1qNV zz>RBgyaf#|iWUg)LDJ?pK?9HlZg*=q9AZnknMJ~4byTi+H=Yc5n44*A?1N|l9jL1$ z5?BfObssOJXJ($Ua5)~np4`ZOVauF%ICEywLxZOAuyK6|z$40ZMa?JeP{w_^7u(f9 zV~`G%E~AC2g%*~`1&oR%Y8;>vxtIe$!Gl+V3XghXK34KSj(E^#hG<8(iQEn9TMIsc z#amC{e&y54tCzLFTGKLsIG0iGJt2!)%>j>zfOd8HaFLdQ<#f%f&2HC4B@!v&kShh5 z$jwz&F;?$Nk2Qne7Ri)zID3{$ofp?%K7U+(2z|>OU@i&SKXyxDY*#vs#LaQg4I3vd znloH}3W zDcpe+YZ6AK#lT%5Tz1*ct*0#LA~o5W!xO-~XZKFa1uj0>kxYSMa&n7hDph>Qu6V$^ z^`mui@zIRX*|1;T#!vk z9Ykkwz4F7n!gW6x4tW28{Le{fUWCO7#EUgFvG&0>kn&G{=AH)GjqS*+*-c+{1r-sZ zEI`M%=;7owP9|wYdy9j53hKP_LzP^QLNk>nlD=B#JWF9}2dpc2C1Ev=-QISCfet(B zG-merA@j@tk~iY#h8l`HViUrU+Gosb@Q9WbxSXK-~fXEK3K2jWrOuppC%Itm`S(D&VwTc;dBd%zBmY147 z?!=TkN~c>bNJIh|xkfwb@~dr?`LpaWC%fXLdGhc&_v9X%H)s_d64|{D!oc2Fbghp`;=Z{!UoGim4Ke z35_e&Tee|n%Vf9M>6jEIA#K+l;3}3YX9N-AL;>A5VUmSQoQWAAa5Qso33TTl;D+G}_op_PeaCXVBIZ&uvR-l#NUBj&(eTxSxXs(V#DpKq&58|rq z#-peheJ40)u~UojgL4GQNrSY>7Q|BZ9OX1hLiV=&hT=SUGar^#^Tyi(fS*tTM%ymp z+7HS&;(qZCq)NPw(|82NRQEg>(_%|xofBUuGhZyViF5Md1EZw0NHToX=M-iXad=aX zd1HSf-Rj!v`Q!o<)~Ox~uJps;0}*E_0ID`Y8_qDL3q|O%GZqGGb=(l%T22<`P$m_Ag%uOaY(A^Uowzh$* zDB3cxS9NH1W@pdz7JRhXzD!Ulm-DX7Ja0{|wwtHUkvlDi6|66Edo-Ml+_p_`fE~^R z@8ZqC8-5IJZh&I1Ni_2$Ix3A&7Uu{zFija2CQ~qOUgXddo}mI&KE+dK{TL7!MS7Qhh;jY`51sNS0GeY zi$f%@vSd5MQ6fs_q}}MNFCC3gwM#eT+2(?I(Pl2)kfzM^9qVQa?q8!$ex2)Hy5Z7% z@`jG3kHXcGyC68Q%wzi*u5t-WX&Lx2LS2qN7*NH-!^pdk?McFYVJ?u8h@*+BJh?I?<%p*hEDY6!DO`(qe6jo>IwG3nMD zJ_{Lvk5d&xO}L{12+@Xke5~_01bOK0rTPDwgEG^-wxT4GJeYW}&8rQi`1_5kBzUdjOLzaWcJwZjT~pM#tdV(&Xs4zJDNR4 zP91zEQYH^v27)E2(>^G+bx#R3=VuP)dMP;?XFG0K@=h>7iRy?q(Akp5*UIvttY|65 zKu%h~&P~s{>#jTRx}|Rc99&^yir$qu?n)`Ih^sdJ0(s2jvRJ|ZlJU>FD}Ek{2j98h zSd2^{ajBE=QYa=b9%}QWmSuFD)ewwDVdoV~sVl@>pirtkq+P#SAQsMNIT+_9w+In> z%LfszeEF8BxXMb5d2l+#I!a+%5oB4h+!^l^lH;LJ^{9AxQX}LLp z^d}XcAtIx*+>z(9up;jb?WFH#*B&5&iX%Jlm1`T4Doi%|(&}|HFM#_CAkTLrxaPmN zDQI#hCx+&LBDfR<)QzvW#m+6R%g=Ku@zFOwuujC7-<~s=1|?&2+AMlAsbL56=tjh& z)1Wd^4{&7obxFSzfs-s`pQUS9}*rIXAMULD|Kb&l| zhr`hYtU+Q);Txg{)s7KTw#&|tQ0LS_%tn`Fie5s45=R2k)-|L z>|q~hLde?W+(Y!9c@bkJpf&25R^0tVU26UaVcRmZ0X2NDg~HSDFC>t=5AsL?9QFyM!EIH2j;wSrEpKiC44hu)3k00H%H_^>B~|D8VU4ZJNkg{fA^ky$fuR-1{k0RncF;tot#V;gGbU(m2j;bMdejn z8)4|Dbn%=5-W0uPQpPgyuaslwCH%?Uslv zg2C)HfHHCz%NM-v9^3*$?qrsy?XJF9WuDC#s*?5hnJXGx%Q00(Ww|k3l$;JChc1=X z!Y%urkKEAql@wE2mf0gWTK+IE4}7NycQ?4H5}DEciJHMm6}2w;?sE;$NoLF4{LMx_ zp=dCGGTrJZ=>exRdG^YE#=Lz83pN61^23xI!)fbWRpHi737hH0vC&@d0mp*1O5gWJ z(rC;62Et^dnf+N`xbO7V z!l|=Qx&g|PKijEaSb#m=;O&zAPCPdI?0MMOYEKAUI^i0N;mc&Re3KH5pGM&`qPvJJ z!Hvc;>^ilQa5s5-Ky35mQI_+L0PD@^QanrftQ7M`$j5Ys!3i{i0Y;yxa( z__FB3*$V-TE*27wSw!C&c`HqgG&6gQba8%XNGQmIY(_IuMCxJ@6bzEt5nD`BC#jU_ z21qM)LnF>fsm5$o-8n_aRqsikA+)IO!MOHYgW^JD&+VkzzN(6UtPOUi<5KcNw$l_S z`OsGz)!1EjbFwjbs{e7r88IRc^$nzO*2dLzT4u7gj~cN7SpQG_WOda*lrK?Z(9sU=A98g}k;P6Gq7%-(L-Zb|Lgbp+epuB4B^ zGA-E%H0M<=`9j%YZmB!hla7$BwoEWd2p6Iycu5O7zf)z%K-DXa6gi%XT|k25#3Uyt zQ-RsXO*)l1AiKLard?7TRgS+csdf@=O7}CDqUHE(qV&@Ew=0<(xibMBDpwi`QU>ec zW#C9X=(jZAq~KBG`LK58SqvxmOfhgyJaVeOto;nUWa_ALkvsG(t^`j;P9Xx^?wXNP zn3xyYW7&JZ1XGZ@zPM^!KwZJl+wD`Ql^(4Jee`_6`1 zVu#Lv=DekZuYl=T31@PgO}+VM8FWAVW)=3!PRT~eZFoSh#Fj3)>288D9gD&eXxv;f z2L>>G|GY_pWBrJPhDPIopWGO>&PRlXJo=xNvo*QatW1{bsy2$+rpSVlfFu>>%2H*? zG=yGEoFj!%6r1rwibVBd*(F8>Qq;<(6mf4uL*etIqaNF_TTx&vWNkNXA>KC@Eg92e zk6p6w&dxrco4OOd&Hm9kM1u3oKW4Gt+BWt^GccC}H`OL*g$FQ#=Z3fa|S} zH6$Wzys6*J;CZm85uAkd%^8?=tyGV3T>kS%5=+SZhF8#2oyNqRYfPDMZQRXJ1%Su4 zm`iq8c4z%l2(Qq0%-Pm_bcxx{|LX1SuvVO5(0wB7#f4|HM{XyEG?WEsxDL`!Wm8L zRa{jeJRDnKT&oLv(cSvW#wxC;k-kQ{-;wd7X9f+5@D+_XJlbt{-#K$4oz|gd{Ad}$ z8`q#SW!twob{?`&Y%_*|`Ndd=FP_VQn0a@#He3ojWqq?{y6@^xGj@WNx?$81B?6L} zI;ScMx*Q83Lt`7nNsCmL%;>upYqP@+{%{j-w@UF(on1F+4k%GjkF8T_^lKlt?!9`12lIU1}ai>lWI zq<9$|gymR2Z~Jn`3vWlF;tt$KDw^7s!=hdwABj7)iHC#;dvq)1a#SZoFDcHA5HS6a z{iRsb&Zl(`k9Ny>ST9#%pzz#~EG%ZkRC4MN5dpk)dS&r!rX;B|9B=&7@flB-&ZZ{? zSpLD;`8os#6ll_Qb;|I@Nm$c*%{;yxMi-D7Nq6?B1;CmbC?=N6assl{d74Y5G;1ma z5>@5CwSaiL4ng6f3T3c)_}Y=aO&Tr9I|>q{M;0|_8nP_&BVuWpj_K>H{<5Xo^fL{% zssMzI&dJ9s#aP*Y?Vjlui|PXy#OOGDl}z;8OST2EUDVChssy}s|9V7P(Zr=FH3D_vD zaKe~YOq3uJV;dbxXa-nVSN40llCY{3IOFO~*CAAzmhS4t`Ls&4fhi9$c3x*5?mGN^ zlKujBro2~yQ?_X_*#4|TfIea7soh9iy8uc;SsFJv%hCp%xU+&;IZx=`x0!hyo6rEB zMiGjs%GOsm%POG6Yy!{dl*~Nmju#^KqED1UI|s}Z8IEf%L=n@? zXJtvR0F?x)88KXtH%pEp=Pu$UYhW+{0;Y~(<%f_sKZX^Plbx}#7+`_<+RP$nJBvk=*&wMktN;usz9|U` zv?DR$$kZ*`!8s&P7|J{46&uu{!5ep>7fWPDlafhbuz==9r~*kH0zjKUhL)oECk;A> z7GVZD=ygp?WulEOZJN9?AaC_fubrjS%D@-7&O=^eqW*Dy#xtugEa2s%toDe$IZb+L z0WFKNluCzaqP6m=acuI-RfELdAngL12yvZ%=L5;t@d z3R?zMvG3TZbd5SF9xV*Ie*BfBL?QNX;~ZA!Jd`mebL-JCjQTS;myNHeIYtYxierXuJ_L3% zLFkE`$0!M`b=wI9_~wVz`~B11k=2;5L5-F6y9GW&s<mrNHIrq`T`K#1Y@z44@zU<(Km zXGDW#o89y<2AalpZG$!_B1urz`v6%x4A=KNDC1t?XmFJ!Fshw$rnVoRV?}(RV{69j z^4Qw%$w1RiUvZx?MrE>DStiE3S8ZJK@08SaYi@aL$uawvNJK)qxq7`AJAvmF9~Hw@ z=3Sw4@7;*^@tm0zEh$6RM!5b5d{5aUHpO+W7ilb&a;vrV4Lc(xRXSlPmlEO zCtzR&y%clD24-eDz_Y4|O%j>W_`JDOVkPHRm+qMpPjYXQ8y3j-dgtvjV87!O&OQx^ zp%o)F5NR&UWVp>B<%*KLO*&FTRrIR+k~3!)PM%NxQMzaF%o*d17=*i|Ce{J12^Ni+ zo47ZHD%YL9<+{C$01adwfhmd135VgT?`U%N$Snn=hsKy?x>>HgfP=noCg2zT+Sz8V zU6NlAY+cyM`AH5h;%)sW69Z5jnunGNX)LX%h=M*~(^KM1n%2!#aXgU{&Xc!CW@RCW zY#^<8`eCV9F%Y5jYTU8Q(@=9_tjj8tI}!m{ls3^M$3Z)u#)652-$K%JCQdzpka>gY z0vi_6^>R&&+0A!FLIP=A>)3n1ENINZwaLv}P)a#_mx!|2EB#_xz19U3eQ!DvG+e7H z#VwGLjO!puTid%qwF#Ucu6M@JeU0slZn>uHa5tIn~Qn<{Z{I~S#6T03ub)lE3YZNFyW zIVVdWFflS;HO75j1Mh=2QKv}G+XMR$Xcs|D(;?~P9>Ccqek}?B{vUck^O#HE&_z%XXmw#5P9N{z(3k*NYhy1veUo^u5Xwi%2=}$vEx)2Ba&l}`2Ggcrxhbj;>Q1v~B|IJ)$|%_# zu9l*k=7mq{`xDM`+ja7W$pdQ=w8Ux47CgUFMuHeFbW3%s>>wn5tl z;W>92&bq!?F)xzUH0w)p80I)gF=&yr96n<~2H0~p%Ne*r<54(~ZHDaxG-IdXgtRnJ zX<;rZjV}&NP%*jJ*rpQ6{Nc zVcxj#DvkvHoR)4oSI8UfmwIZKwjRY>*41dE#sFM`NA;X9fkC#_) z|6f!IxyJ7AibTVh0?9C393`%|nBd zU~BK?%wZ!t@+MM7$K6|xry!GPIzZeF=UMGLB-PrlNvy@w)V1j(W81iIag}?3i^;|i zjE)XlCNs*QHr<%(gkl1oo_E{x@9sbE?&sfd>`4Ev+wMLw_acec*2mWQv*f|78wcu~ zt+xRME)j~sox!hLNk;^`Wo86=ee&xn5r4Sn8Lqe_4LM2+EFKbpS)F9o0WCmd6Bv@+ z4vNE!Ne$;Rk`67uubaN(xP^n9m4{st6y{EyCbb1F&U4toKpe>eL`bj50y|H2cZRS~ z@H#BdMIP4ydsJ8+p>2~~`4G?zL>z<|YVRUobtXw^hr=`@5tgGKq86@rpxR-{65Mzi zNnbKnI$?oWhK0tF0;*%up>33M`wY^o#&QoP&FjK~xv{c&E*r`Kn-#8=T<4IX%@>jw z+0mKh@3Dnipr3{do6le7KwMZX9(&W_b)9qEH7-pxC3+3O%{Ef zUEhcGJqCyY%L7+G?!C<+jMBwZ^pN~ak92Ppt6{n(P0Zp|fV9e?z<2{PoG8;s+L4|z zZ@W)-RarAPUw2L@kGhjPF!85WDK(7$v=RGBau$T zfr4Ph-Tkiqii5h!xHEuGdBC`9Ud$M3Od*KIAQ~Qu)sZn%fdr~y^=(SZX_?WAxT)?L ztcl(FU^TgY5jos~y@V0$hQoNk&9GDhL_r!90G zR0nl(o@`TtsR59dSMKL(csagV#q>2f-NzA47*T_KPRc9RFteh~ke3=3c(Kzgl;FiAF~2^w zAvHr!2H;f?dzi7oNXt$4%-ZS#$;d%D3tX@ciXB4igCso_NfbSa(~E}uWxB`Oxp1yY zt{ir|Pl*KaLyWPB+jR&qsx*|DFYYIPRpV5$ExQ~%)(}b-KIq03lLtkXD-^)eJz{QN z!N;t(>B$)V1qUZ;3*%%RawyIL=1`CKqHpE8^501Y#$v;mAH;rjfA2at?nC2;N-8uP zYLR4c8VG+DYeb?E4exz4vI(Zus77Q?y}9Mc-x96%@3~hA`zpMileuv0L7x}I#Crly z7R^vzcVp#3)W7FmLdPs=UcWMljNU&Y> zJEJPcRY82ZFfs)Vq7 zt|Nsw@;%+7EB2sSJTC=R@_d3XwhMJZRQ!uCFHQOsWQ{!937O)0c2&`xs19(7tP+@1F9Z=V?(N2x(xg?qlUXl3Jr#6LK z)RC4Df)Abr`V^(4xE;5xs{@Xx1vHqo&mbv@Q-z;nl?{!RjDf!W;;yI-bFzjuGGT9a z+4wsuS-^DD6I+dt2gPWnQb1~-bsrn9a!6KZ-|u2PhP&57r~(A856 zT>fj2x17pA{3=d)1Dd=fWA94elfJ$JZXc3PhApuhmkg8N@vJ-V&`J~K+*?~R0g%MS zu-VINh{N*LL-&ex97~#KHVu?sweFM1NG#BCp&4$aCu1A-=S)b38vv&ky`%i^_LeEN z#JXiEqWE7Xo`*du{U(=z2h-PfxX9Yl>K0^E@DVT)dTs;{4gpyS_kZ#?G}z5BoOunv?)w!8B`mp5$G=579-=r+2;J z1-E%V8rB&kC$COpnq$b8b!Q7aIwt|AfBkdrngj9a)PiIm!WQLoXopEKpK}e*JpCEB z8FEM>TgGt3wZMaJk#n|YR=HIdq^cIwoRAHBl;u6?TB;~Vt>T$Qa*La&W{6h?b*+F1 zO{g2l-2kXXe7v_k80_BN*jzfbuylH5>AqT7lPHrr_RpPKIyEQ4maGZ%?bi_7XtQpw zp)Cff3^0HiEUlD9DfAL1nOBZmK$0Q;X)BtrD$%S}rwMk~Ty&$Hr?cWPU#SR@tE`I3iFLXjzJ%g(g8f7lV+pvP{mhl8vb{E7^a~%PWIH=Z= zDMX-~Fo@VnEc=Ee5DXWUA3>bwYFUx&!u!KWNia}_{}N6p(qB>zK;C7PGA`ppp0juh ziW^QJ$PIY(fqwdJU}3e?bA{)q`o!{d^a1(y6qq!^TM`!sr>IQXJrp&Nei>?#%6pp` z9=l{30P&@furfHky{58c>)28QiRdhvFpjXYLnZtX#k4GBi_S`=A;1!ii=tIR0m800 z^nMAs47=w=H=5ifNGcHsXx~oB7}WdX%riG1xX4}01aK^(GB7!U`|3B&g?iJ|Y=*KC zyaYHUm5b1>IUlijW_K2+=59Nds|QW(&4=KIkt0y~ubwW-#V|t~YR?09WB~ZfP6P7avo{LJMrD?BV>AEa(qMZ_ua( zxnfX&U$rDr35E&L4<~*GE2Wyj;BC)%-pq?{pE+^80&G9@U@_O6@1j`K~1sdccg zaJb$6Q4@oJ<}5ppb(tE+yo!kBP@qboJfLyEvkC)Cyko-q{q$FnfG3{qp7ASN@%H-6 z`RVsN@Ah`0)TQ~NFj@(ZQVE8? zrf4`!RC=sR?5w1VjoyO5BsnF58!h;aLnXaW#y(YICvK&HxrY-nt%X1!>7SeK9cgvy zZB6InN_-0TGGB58!&Je9mALjw@DPm|eRQOhybC%@oYa@{9@{>L1KyQCPt}R;`J$ zC%j!ej{5;cp30wz8Z#CJb>4SIrb&k%6>5~))C@XaiC|Rmi8{+#7M58r9nM0B!elZ_ z?h2ckjkR@Oa^ETz-KcQ%m?z+1$qW+Yr@+)|AG`s_de-E4-P<^S&U)n&Pcfub6eL(` zc=4@8PseM$h|vbBaI<85O9(N(#*~8bj;7#@PbhDHM=38(=@A3iS+8oan@Z_@ZmyYQ&)ZWQJOuk)P5sHPLCzZl|gHNJ16n zq&qgr+A9ltFs6HWX5o%Btiz%V_Kg^~C`$5n=j1aI(rh2{J`y`f(+DxRZf}`)c5lY- zuG9%lf;2}`uI^edEuY^4wfO)lK+gffyW|Q*G`Q%=%M15j2 z#2RxxU>#sbjY~BHbKoCsm4LDODK^Is4^0P*f%3r;YSg!wgNa>*v}m63d}(cK={%tT zVp`1$Y{`&0@ZxdNjgv-GwiMD7hERdhZ^;CmGL2esM#iZqB=48L>kMQ>RNX7HqoM@jz!)r-8vUs<1 zqlZEbov3X}oW=5uUyabf*=*dNJK)e%xkYKJC-IcTPND6Rwq68@t>%ZYOxf@Lb5rrj zl&@Bx%c<-w4q`c-ty7ElipmGnS|WW41S(L4TTk(JG2V=Z}tzC;?VuwAkh}l5Rnfs&>7>$>Pjn&%6l4 z10Y;*%atWO1Ps}OYQI8)Pc%cjCwZHgZw9UiV3LmD2e$mMvf@i3)0DQ@5jobJjK}=T1J`Bv*oZy6n-X*HUsTiBFVvo8A-~~ zM;3f!8<5hUErZZJ`Y8{;bdsm0lL|wWLz=YZKK}+=P82&VRkBD%*J$_1n6>ZGEs2zw0Ip27bnGdq~RJm>e9b*52)DU4Q^;=MZ#x!lHJxBaeG{Z<4d_hgCS`2cTEh_tLufAVtZGu{ zUvck3Rk^9b$wGAY)@PbrX$Jzi^C}1qEs=S(@H^JvLUgE0Mf~k>Yu)aICx71arM5FK z;s9onGyznwGAsgsnDv? zMpQ;L>&iRM#|7vF%*B>WH8S}#X&u~hVtQ+53?1uGrrX!qDbR<#kMhk628EbpYG6hp zBPJ2Meufms=Q%&DdtT`?AY<#>j>NiRhszmD=Q=Bw)i)N%p#!H2DNqf$h?LxXvM|pi z!3btx?tAJrm!iQD!bdY*!v7H);1S)BF#uD@ZbO}kV?Y<|b~oki2438aXrQ9Gza)Ek z!5^7e7Yj||6-Z1r-(_GG(z2}JVPIe>*#ev3;jx)m*Ux?RFOE;lEE3N@**cf z`SsE<^&d*Y8yYWJWeGs9ptOKy;wt1$yCg&wjAnrvJK6b+0cJlW z-f~O-rkemh5yEZqu+@u^oWSiu2@hA0Y?~Sni&nH9gd%HFQN9;$PpdPkhEBi$8cwNQ z8MM)GM20XxEG!&E3|}GR!r7HWhccAT9+}g6Icq?*TDLPhKloebIa#F`F7SgM_y+bx zrzZh}VI8dZ!K4ICI{#43#+>4HQ+`;Mes zf>Id}1n45plh8!A240zwdp?6k5i%s@3f&p+E);w#IGE@mA+|c-cN&h6IO`^hCD)X# zAhL*DMDj-gjA^~eYf7z1ZYhw<^ zz4N`ZYfJLdZW3NL6rHkk)^-?;ZU8Tj7aXGVzQ+g<2yp5YqN5|*4KYo{_+o1oM~X#F z9f&NGqaF+$D0j^In=!~}aP%kxPye7|%Vr*b=*Xobw>Fg~ZGww<<}Gx?bi)!8 zs++S22IP#N-?(MM1weVGjZsUY?!ZdL2x+k|R(v>d|&JUdTkULN?dGz74xYuEaAo6G+3JTM5Gw2!^ zJB3rh%CeI16s9i9YwSWWIEo6goY(IP>voUozZ>-5Q-nw1BIcjeZG^GY4R@^(@;I+JhJ=p{S*UYzVvtoIZTpko}~@wpyqaXhP!+Z4YQXhgEqJn()CFL z1MqGmca{1pLjuebBhNh0H=+(met6@@Rw*$~BCqHODrQY=MroAA>Ym*^Yf)brXq$|0 z>cJg^f?qf^!~F~u^voV1Yxh~7X4xbdG`W@qN(V=RT1EMDYIOA*jXTf~2gjjFEMU|W z{5V9JOcV7@i8oR|vp^Pqt26Fm0!UyW{j7-UjSym#0HKlyH!W@JR_|Oxx7*jBowp0N zwyXOP4kM9|#NPU)B1Lgv3u9-{<^o0yyJe_7a25#!_4xjEk5^bp{G^%cQl9wT)81>T zjF5rD1pHZDo}&X4GYmYZjcj&nByoKvtt2GvA-d6mt+W4vH47im+_s51Hb)qj;PA~F z*u=O-*QXgMC@SNQ+})40-8PG`juj_K)NuUGQbK^j@^j^Ib7{+BO1I6rgfM0NU;2gY zPW-|G%H61O?V2Ukh!G`~?iM@(OcsI*N}#-DrgOtFq+S42NPNM?QVO>V~prptwP2l;EzZ4S6x;a~{ky;w$;!WZ@42_dvquNAuBMut4Vna$3j3+nD zK(h^@uAb&Gcps9?lDTF^-o}23*VVR))WL*d?SMl1tO=q(qbgGEEU}6W7ok}n@KE!< zNjiebT>~=1Syp0XPbiPbQSIiK#i6cpgqLwg+$+)Ye$f`{4i7TQo(4si(xj0aDFqGr z5{1)J9_-*KERfECL*am-xUI*jSoAWYU{f{C5iNbfqWpqk0WBhk+&IT-C*oul=;}M~ zn)xmPfW5PrrqS22L9_E-Gg*jc=b;N?ubq)p-|!;g@`Rj`BsM{u^AN_6&*JDOv4f+E z9=4W3b3VK)$j5EqoTM0*$J6{U6dDG2O~RG)ow#j=k+gdH=&HnnHl$1#7+dncnC%@s z)LdEP9478Va^o^xdHzOQ6ZjqP4hWQrLfBz>8Ehxpp7J=P0SNYIkJ|4CXOEdG<4`oj zWmbwh&+X6N5Wmd!o;WyEU}^k1cm%Djgk7R#XHswx>ydJIUqNc4J4c*BeRsurOZO_) zZ@|+u(@UpkRw$yEw`P0u!iSCtk{ow-9ZUE6%jb_vs^ULHs(BDex`etZh^q?8VNUt< z85oGdV|T95Tpj0A7au-J^NkoryBeF8IE3a{RNdF1qda&6%hOESv;-ZYptbnb(o&ph z9T@3eJoUQ8bDN#L^iko3X5jb_A{Az^zD6J`NIleX!2j@tusZiT*_+qi7VGQ z6s!%7G!Qbbr=*MztQNEFhmZhHn$@0JZ+0=0h&@|p9J*RT^td0kRz?;UV<0g`w1&)+ zu154dkKU@2RW3EhI~J{*ges1=j*^*lQVLH)^wn$QF&Q1)joisC9}U$K#AkJoCXkBM z%Cg>ha&Pfoh$7aAdm@S=&xfm7UPMmLF?W%m!dO&e#XR}Q=sn+sZ{!?4%^hHhG#ye- zfC-0aP_xp_B82Er|J){OR%b{US*+r@&Axt>W}G7#Z$qst3wz(p?3iK1Fqi?(wM&Y; zV6Byv6#ZllJB92U+3RZk)|O1i(+#gI;4G_OfwHvHpyYh;fDaAC$LU8?O~3qF7fxuv z*BY`?8o0K<)314GhQyTkPXyhw-6nowPva6eWt)$7-3GZ+&pP!H$CgFyY9T}mOfOG{o7(ET#Q{%uW{LlCD$2S1oSKE`~)iS4a_3kTbbDK z(b5au(RZ>JxnW2=gIw9quAPI!M_Pj^Rr$;vbQcz6ZgL3HW&{xz_FX)=vVOR@qngII zDjJh5Rf_j9PzBa<9dQYmb@n)K=Jpj?G*n(_e=KiHueZ6R49dx#68(DXu{%}pkew)> z64+Yj%`~e~FEC;Xu>jL?kdquAN6ERnG+FH-N@`QnLESiKSPLvH!tuNrWRYZ}xO0ZW zSfIgtP_2<&OdqNHte$e*x2q|{*L7Dz2(1}d&4o}Gn305wiVODMB_^zFNquJ`jU$iB zD|vuq8(5!c_S&-7HLby5yZHX0tAUJ-Yn>}#S8V$|vD^5xZcWC02^sm?xev9`2e5zl zgv3MCVyD;eg5(6JO%hJp+GN8F7d4oNh|nMKYOdLH z+M$Ip#H29*jCA4m9e3|_pMI1Z_n-tBTNBSU0Bz79*u8&o!H`PUoEU)?r|y=eOxw1r zjcUs3a>l{nXSjw^j6-7JQkpY5p_t7&<5Z_oYaD^VccJ7Ge)E8GZ_vrg?ED@^pyNrf zXLP@Z8c)CL#7w>?XD%3R2${QcG@LjKbby3PT;qOV@LE1nfiH?x#iAY9PST18is+Bg zR#;P_mML&!_UNr(&i|~=k{#~B&c>Lu;kgXT7b(;xPLqY(azFzwq>TR|8@~*Xv4<@E z|AQGw_-z24RNvdx10WSk{2$w%PmYBi3t{7zZTdFfbJ#nMew6NWeqh}$9g9RC2{4Al zEeFe^4YxXxxz3_cXKuNf1G5Z!O%wXve>D1AF5MI8a*!Ny2Y8#hDIu|RO&@u-tcL&W znn$^I9Rjnb-4v@S#?Y9)q|0^WkI#kKG(;h0k2!wZwmnFpt0h%JWo;rLQSX{VAx!|> z89w?p|EwVba}X+1jk%O{9?68474x_$Sf!XRh(Cat!MoXFV+^k|U`VoAB|w$Ttg9}S z6=%n`HPI|{3;!BjFFlDzLVk-yWs-Br7=WtN^s-c-0IAQk0QHKx$OLj!E+*IM67~~u zwVR${M%WRTHy}Xj$)xXb+lowIhENkfV6Jh3HW?e;t-)ZgYQU8-!P->T!acR)_7xm* zlADcOcS472UOW!O#s!E}Xs6|_2k=R(LGfv7=7Ed8r8^D(=5RfTqf=YA)mYFG+dW^E z?#w|6nkiilfk@`xp~6KKo)l8OLol9>(bSf!p;#dH^dV_H;}hfNJcurw@@{R$j-9UA zEc3KrjcmPSn~ZIa)4E-Tbz;vu*yQmYPCP5=mZJen|2DT?x-fAUo5HihcqS|$VDL)XC*v-*W@Kx?t3w9 zCz_F?$!xW%oB;;m+)g;Rb!63}C@JB-L+3Qmj$hE^Y zd*l)~B3q~KKW=xjB0nnjL>ey>ZD<8Th2ABRt3jzqXCS2@&bUDeJQ4DIS}9Y$$n1{+ z#nX)@sVJ$KvdX}_YEq9wU{u_4V89GPOGwZ-SYP-y##77TyC%eXnlBfSU!imH*Q~vX zAG`R06#xEM0Z&4Pd|GXpHD$&Eu@C1q?S!%!#WIrjjj0GzCR?=a*9k(jvT-1`U7MaO zl~f{qH!>u>C2{)ObT_V>u)t|UiUE|8r2J?=eBvO0Ci=~>5`l6uMv&*S(@8^1Zg3u7 zcjr2Z!2nzBF%x};#Rx3i=zA6&a>3YPoh}q~Te@4* zB&vLS_hDQM6j&S46YvP<;b}n6fDL!|T^JpCX~@XNZWGvTXiU7c$WmXB4Y7gC3kF_% zUcyASNeX>o`Lm**Ktz*Pnp9k5NBVVT}elVh1=1>9z81*2@!=)*s& z+HDW8O^4jT-FXY*5>L5S)L>uC?x-A}=#zL9LtqLmk1zn6L=3R(L^>NO z;h|-ca}CNvd$oy2{AG<$1}a1L*~n9+?8$o^_#rfVvYSZaa@Eov8p;g5{JAu5%bI8C z#_U;z)Fs#X{6ec7Du|=P3w0>;v8JovQT~Qsw9r!Mth;Cq14_q~L#mYKk*%R1`Cu(4 zXP^_q^j2w;oM(kVUOFewV^$O2o%yE|h$l3f(XHp|=E^`Jv886p)v`q)rJ6KT34=TQ7kSO^fX zoKG=YxF3d=z_`BdVLSRjqkhr46esF(=M5v*+F=AK1 z(8Za>xzd_l=tE1v@S*0n4D-CIcoV+GAiG`TG0cO{HKjMvRl=2+{7(Weu;06(~qR`&z^a*%z?*+ecVD^dQ zHzv1w%sFppdw#_|hsOpRxuFr-Uk6(V7|QE7Gluz1SSFLY zIpJI9;aufiq1&xh%*?`aS2AgC5KRVZ_Wg7|v~q=2Lt^vf>?XGhxM^1aX6%%4V}^j) zqdWCeb-_r*)(&;CbIU*+4il=9_(bLpCShFRtl30r8H^Pk4sDJT<^0?9G?`OmBbJzw zT1vcSmdpnvpc~!EL(6F>G}W!dyDRS35n|PXHgl-cZ~?@SA5qyteOq`@Y6b}CrXJi_ z(M1AzB4tnFrm*~zEdwm?%WRqr?v7GHqdIhbLgoK~VZ)NLvHeC3b)!hqhBbs2u7OiS z@GF$q0y|($bjY)PPkgcjGOUM52buH(RTl|aFgebKsXBp?OUH17y)$Tbf{jT$rJ$;q7UT|bz9!jFhy5Gq#CXD# zQP#N$3z`u5F-`H_I#HX$3`wlKy2p8RcoZCJoGKlq6#lo?mc_IMq{&D*ICjli+Kn>r z!uMevKr%JX$U!F=1QcYYL_|BS9bn!yKZfN66i@fbkXS9~6^vor;sV^V4kk5|8={O) zN|I^gi4kU$qXidaoKzPP+srL@=qJGTq`)D5We+3~CDxQMy9-+0k`gE3Bk-T_wj>>m z&r=y&@NQ!wr-hk-?n*_s#Dj%sawBu~IUJ=gF#v)Cvyem$zqUg*nXy+<1f?3+f{3=~ zYw)%>+0NoyC?Z&wu>(xT_1i1**l0Qhgbc8-O4qG0G=c$3C2vRb7xpt`6N5sJ!2+>x z2EUOp1ND?uuQ-hA{<4F$Ve@rpj(Wzg|=Erq2-$8Vf2vKI|Zpm_R;~_Q;%qM}yXuog)IIVA6qz$tGhM-&}X17j|$5@v1TJ&>D+@r*tb}gFHs| zNe(EuDt9Ywi{K0WLSkV|ZlBnf?ByEaxhB?lGh_6H>?E7x_$lu)rw=uo{0E54l0HHkzL#PP3)%>w zj*2FET}`; z?rmj^$pp7CxRu7sTJPdot<~JV!QDp{guq6pnPmHNUZ+d*oa*~FWFYLJ06GVw1aE2a zfr+5!q>dJ$&e9@z>xVSXm-r!!jwq z;TJa3^|j)pl6K4#4PbQfRtUnT2`P#wDVyAxH}oOKDn=wj^x~VPLzF!Nv^KFZVo}Y& zE^5)wg9n1TC$ZRbi^z6^P9f<=1BC(-O>}qAC24*T3Rr<~)zK(TCxYsi_d!w<4b-nR zqU?~k378Y5maMU03HQ4>Z`|b>-F3OCq4UC}Xfy60lubh#DATXueUhT1UBP+e?x|G=4YvQYAg=IpvWx5-SkKpc=0e?NXoXVXebZhW%uGROZl~_ zC@%tJ{K79k+Z4%;A{-Iv${0COfTqP!T*K-ru|9k&5Mj1Qx;sC!_uQ)Y5Hp{;~w z^K5P`L)?8}48lM~%1H!Aaj#VfCMjgsxve`OJyVKm?fTg&!r3r_S6kQ@x45KGYYG#A zz5P5ADYv%4T_~@Tu#Gy*Ne|qN(jTy)P&SA}0e7zwT7jadguciGDK=8{ zNmP7*tr_{zn$TfQ4vhKCW|Ebukuhp?kcZsZ4bS1u$%ddVRFcV*z-vgbi)n!j(UF|H zh=D(Ugc&f1AgMB#du3(r!s122P`QjD9V1WB0e~BSQn52lo=67M0T2|n$OiEpru5J> zql=#!5muBIfN{d&19QMEI8tEHiOZ%ap{I7Vjj0z!R=xR60cbYV5YOO>hQq2Bp;!)?jj|6YiErAO1xPl+V0^mxAuS{1)w;zv}KxRqZVC-QDRpFxGelK z;1DJciNq4mt~-&|!o&@QK|tiH=`4awkDja`=SmiZU0o-!tzY5}$D%hbP&JYLA^MY# zkmU+sAz=l~B(O?$M`?D9_sGIK$DS}N*d%$_r8?|XMj8h9INTMyG)PR0=$FW5F3#W% zO5I>&V3p&+#GH@Q+9c8s?<5b=zMPu)E7dTV%xSZ0F&+P=JQGP>n=@CWR+P&23^JhZ zrAwdSE|WaHei1Ak>oeTnvF-btXDx>N1C^&P#rxnu4xrW@9s7^#NLWK|I?iCSbNOsa zMVE<}G#hR_0}xHq#zIR-gaU-RY}-L|BV?s)8eNWuVEIc25A}M#r!8mNYX>~HR(sWD zy-H*{0QT`*G($aSPHGm}i(r53E=~{t@l^o$n&LRMeIU?ixYxv|0I^;l#-T5xuj}xS z+ULWFO6V+T8i7hb`it3P>7_hSj5vcNOtPo<6bB)+Rp-{TpDk5F!VV_5%eHSw-!In- z9v27`+Eh%X#kC~DQ0EhH=Uk09>jZSAMXz}wc24SdajhcEGa<|cH!qlzaxq3F8<5$s zlKnvhSaDarb~9bw@-aZVYnOQUzcS?H)fbYRW44C{xsBz<(6+Hk+b-i7_J64JMn)iD z;bjLz)0mviQ&c1+O2mEHcp|oi&4f0cX_Q83W-#dA^)-Q<8{jP^t;e4NNR;9?PnA=D zhSbGSZ8fyuNk)36FA09HyNAAWyFGv~<)- zH-nvjZu69ayaD*Nt#b%Q!4njmgqwlX!}ojaZ*-7!lj>WFDXwQ4R*w&=Q>C6@tol3i?(a_yM{fcP@ zSz|snoib3S>v%Z~RaaF}I<)vHGxdn)F~6#%b8IK}Jg(!Bsv#w@&CR;@NQyIIGY{?Y zc|ek_E4VVn3Cmw}J!!FW#7pu5EST`=cfWvHW1kkBnpzJ3H8leYu?2|)dPyn7W!k{E zqOrsRG|lD9Z=Se(`Cgt+>|DP56I0(kefjeBJb!TYCr+Qd=1tdJzWj$*U%vcDJRivC z#~!$R`Ox&I@%sZjf5+Q*@ccc`Kk&T&MN5CtzI^%Lcz$8u<;#zt)sJq`GS9my z^Upl*;`tYzf93h7tN-ol|HI$QJPl>{cOKdKZ=LU){(IX0j_L2>{~w*YeEEI&w10q4SMzjuZshr4-o5iGW<;sG`2I7k7j|CM`KZo|J0IPdn0)8d zKTiD*j{S76*pBk%=xP18@N`JtC4`ha)!q^w6|GAX>_|BhCFEjH+ zetiQYILY5-o)wO)=#x>V}7sV{ZBKh``T-*m$c5bUekU>>)YCop7^ZR zFSfs+^@XiBwyr+#m94LCeGTn?GG*V^xxaaP=Z{;jocPtwuXR3=7GKTVlkJah-QT|0 zetG*+`<3laZGU?Ei`y@2y|VSw?ayhwuJv9Ao_*kJsee!Fbn8O<744~s7d9KM&*jYp z>iilNsw>p2(`pedDcm8+hcRGL7`rXd&b$-9|x2-?u{9Wr0JAc&q zs_rs?|jC@yIX(J`Ipwew*Ip7 zZ>_)T{B`GVJMZZHednE>f9(8w>vHR#I{)1H%!zk(-rc##3jGbU{y&W2@A&)co!{uZ zdg8BJf75!P`Lu~opLi|f`Ul>xwl~^~t%I!7^PBd>O6w{!51V-G#Fz2wAx7}oozLm~58j@m z_RlkeU*K8gxt`zthP{3ZQ2e$0J++^+%ir&z>~!akr!G#vZ2B$JZ=HViHLsz^*H3-> z)Y<6|zUEPU{u)YrEzigD`DXh4dOm;9BROZCx3qq1>bIwE<1|mw(tY&u@14_>YIyT; zo&VGMxvBT3{J(bojT-OAr@!MAeGTKh%=bUz^KZ3Vyxl+bNNWE#p7-YYjQ0ET|NHSw zQ}V<3`^Q)P@zsx+ddF2)@B7hxzqIcSwEuy;ouG~P=kJG4&0q6*ygR_>58~WCzWv|7{|EN}(EcCW|Kt1Ly#FWm|K$Fk+W*u0e|G=R?f?1xzp(!o_g_2xR`%ov zd472R|J?s0`+t;AKg07q``^O<*U<9k)5GiAZ)iV~y?h<}`=aJ!n#Xls4!k{;UtYoA zw_se6jvmzmo~@%P_Up)OK(_cFMrs*%6{)*|Z zoc^lmub%#z>0g`vb^3iH&zJL@rk(qy)~C+!cZ28T)G7Yoe|CdiB&J zC;x~tubKK3{{NS&{*^Kx%ly8VS*`JW9G^dcwmzA^Z{_=Go>%kvRjp5LeHwpX!}EFV zuW$b?@4uPnTX?>%{pR-V?DY@vd7iqTM$4bn`V>lD$G1=C+xNA#CJ{{ntc4^6DAK&&P`r7 zdHv*($@$5nlgB1+n0(^olO~@$`IO1ylQ&M@GlieEQ@wCQnR0bMiB$pFR1R z)6bdw-07c~_=w4moP6%&?US#Ye*N_GY5O0qUhRC(^pjYnr}Es<`EJ(jt9ibR=Z9I{ z8+m&Z&lm9hi>BVl|Npc7Bkdn;uXnzW&%fEe+{7({s;NI zz~7JOd9SG-;@doX^aIT8UplLtg`<4?tM-utAH%+UD{r5`e7}IQC+P2cDE*83zIfto zYG?X~`1Qwl^W!`}$*6vczdzsp1^)jRzWq4$ev!}b;`4L)d&7Yz((*H>euvM$#PiEM zEA;nHPVQCv_U(JGeXV_y``&xs`|dkR&6iQ@GpAllnP2BQ%$p}Oo)=6#mOlTX{Z4*8 zu&=}0eYE+giKjE3&55)8e~9Pvcz%;w{{!gyCO$WmxrM(kpMD#)-%PDkW+kephllGssrw=^sz%%*v!+B2dd=EALjAxS1zs$_vk3RmH_P>;Id>PMG*X&~s z|H_+Bq}1QEKj^^C2j0P4euHn9d3zO~FYruWv!8OWpvFGFeITvO(b{|Q`TcqKt^2-h z-)3``wfoYEV{oW%<+r2!o#**&T3Tc5-_E-);q(9I+0W-U@$U7sxy<*M($4I}m-FWF zQ%{(>hHrnz+eh>FD|q*nJX@U)oBqw|-{Q^R0xuuV-}&h$@&6LPE%UrDpTCZg%<=aK z&(Y~){QshfkK(s)W*!TjpXC2f;@zux&Nb(ocTYWW`jZ&(alYTk^JJbcW<8(6->34t zfY#nXotyZ43(wQ0pFaJJ>1R!U%=A5cyN>6P*L*JHe}FQ-!Sj*4`If0$^*??7y?X1s zs`Hkqw@$rn>g~M!ciuc#wRm2@b34!TdA?)nJEwkk`uF(sO+2rm*Sn_g=F{Ds519Ty z{$AqEf9LrqKEIIXMLZwP^CsH+3Z9SU^XvHiUjClmzqbD+`|sPozW>brjs2VZ&+dQe z{;mDJ{pa?d-+%x93;Q3~e~}aSag_XL*7r-8?ZWiOPj@D#CZ{K_nf&0%M@@dn^+yCNu@l!$%uYOR;*G6;YQNWk$4?xdxPIc;#FHkDPrTm&Nn7m$2M&Dj zfkz$qs@Am!9(UmJ2d+EtqyxtfJY(XsCQiYFzKhYmx_K(+;uEG{$!C>*1gmhd^R3L` zRlIouYjKR{HGKXQo;iI6{yvq@f6DXeJgcnJYo|YpPoK?mns4pNALi3HcD||e&7E)Q zd~4^et#9i*F!Al3@92Cdd;Z;>Uuykw>w7!D(t3OA`#Qhc`u@(Zw|=1Wzgqo?-)Q}A z>-Sn8OW(iN`UKkf!Om~CeyH=qo&VYSkt{OuyY;i3 z-)UW%c*VrecYdMsi=DT0-rD(npyMYyKSwW*I`9XrU+Vl}>z6yf()pv-h31ncUN!N_ zwE45GpKJA+mriU=^!Ro)b8j)HuVuWy#P=_p_@aq7PP~oX`W2qH^ZY8$*YWGuPk+Po zw@iO8pPtJKeH(wjhhM(&eJ)@AIqEGi=6k0;j{onZ?~mpAzUl9$%&!4YU%|H@<@1m6 zyf=OQ1b@GUx<5blw|x3sO1zlAKg;v~adp?>QC#oe$KSgN?iPW#%f{W^2!R9-!GpUy z#i1>GqShXl%o8L z?eT=Ao>unhpl5=fwVYl9`V43Z92qz&@aw>D0>2G>&SEbGy=;&FSp83|*EAq#puL*j zwcWPFfj0GdHifEy{J=l0O~b6k-7WVWOK02bckTH-OHH=){y}l3$K6l3pR`&o%l*(^ z-)L>_YR_-jYqzCO%pY0$*wR-*9=5ssY5vR7KbHOt`Y#Au?oO6ETZ*+-b+_l>fWZMn z0zv{p1HuBr10n(<1EK<=17ZSVZ5k^B{tk?{x^v8(ZF+sJ`~XX52JE)TcZ1Hb$CH8Y z1`f2?aw|8;o`Wn!1!dc-K+`vaqU>>|rCFBREd^K#wv=b{|684(D`ovBmVdJ-|K&$ z|NZ_C_&?--%>QBk9O(t_ZH!ApXd1}_U<5xmV_Z?&}D(k@G^wZ#(kZwu{N%DA9P>qzTP~Q zX-u~=|I3@j|8izB!{VRwA@lLpn}@f~T#L`KG|$p}OA9SI(_M};#g#0x*UK%fu(Zn3 zH5R?r(g8~cE$z2-wWS^G`aiAO$~LzDpO&q(n$Grml_lrb!{zpTfP?l}OSQdj;syq~ z>PT_@>^|h$>DuOc!j<68cMo@;a);WQ$hR_wd4qvg!)@sS9^ye>vgngMZm*x`1zzN3 zPVy>m@)mFN4)5|FFWC}*#^-#=SA5MkR`NAVuUdND()*S^wKUPv&zAn+cYFNH(p#3C z-fWz`9|K%(+pBjh4RQsz`dj2(^Lv&~;^$uLI^g=qey9B6zR+)%U!l9m-OCl?S7)WR zGu6uUGNrlF?J>($>`HYPxQbjku55dq=W=ZO2TQ58jCSrx$J!q58sTbjjdYE2jdqQ3 zjdhK4jd!)WZt$DnYI99=O>#|kO?6FkO?S1sj=5*JX1Zp(=D6m%=DF_jo9|j+)0$;z zm20&9X-hQA(nfoAforclUgmN>bst(g5810*EUmP(!(KbvQHIMI zUw+5+p^Y}b@A}g9qw7uk+_fqj;LaD8lJ(qFm0aJ}t%&-#FG zx!!esHP{{MzTdtbPrBc8f9U?h z{ii$JFWs-sZ@S++za4%z`;BtfyN9|rSWoXfB3xJ5S~}bPi2G$L``>@x+LWD7gtOKD z;=Y9^ojF;2x;>ZL^E6Ado!6FTxo5ho-A*6T?YG3e)VGlpzw@;3p#$)fyN<6Om|L?tV-Wlh8`M>Xq^PbFgIq$`B*R!s7 zY+bx>YvU7Je_z>}TIasO?=HXHuI{ehu5x#xtA=rYFS_>=;r^RPT@SgQwt0WX^RCgn z;7)Ls(9$7m%LVQm?ePk0?>xUXu1npQoA$e%-}KHW(fL$5pV}KN-S5879zVC7C*4jz z;5ntq7IFGq?^^o6{k}b}u_=D#{@V1J`%CvX?k`MVS@N^^Y@6@J=8vq_KkiPhEcdIf zSc_-ael5o@%`e~2S)0x;yL!K3d#vy)_pA1+G3EIc`4#vTnl7>Wlk6G49)9ii?DS4n z`K|CvyT&Wqzmo?X&12OXpfzET8Fjv0s~cx21!Y zZm{I673ULu*q&W@HoL&yDL0-yj#BWG((%W$-HFB1N#zpgOiD+MV7e%Fn66S9-Be>F z-KAuDC^v?libc>%>OpU5Abs#`apqP-U&Z>-PYR^Ja!)cqEl9Jynz!|343hE*km3kb zxn_bC8%?mLQp#Y(!WkkRCq%i$glfLIgeg`;xXLvVK}yGZnM9-%PL$+)Q+d~rhZs#I zkyzypAx;`(Yt=iIToR-l5~VPbRJWXDJUHvc))}6Q9F^F+X5NvUM}%H>ENay8!&@)XM@UrM1silR_Tph%iZu@q=6^tLaMQfVM%QXb_}EEQ4| zl~O8IQUTRczHJlU>4i}%RZ=GvQ7>iCAVo4%DzLqiw{9WBe6c1ep5c18Ds5luEmul2 z?F_ILolcAO3@-7**1$;9g>LM99wkLE+H}I+j|j$CjZ>x!##+o}W4!4e z%l(j6+Wn825^S`;UFF);f>`UHGEC6vcYk-ql-fuua>mVB6n% zWtWdieQgiyjrFq6w^x0brRfc1wqha7k%rj*+FLG`dD1}Z`FLaXERb?pD0O0y6wYE= z?C2G?9WIe_SxQ^S5{+W^~Bmb-naYMVJ!%@w#;OwDb18){kFEA|Cy>d z-PEhI)6z4zj2@AG>Zx;mP%o1g1QX!+I48}-t zjFsXUCp9r%iltS`WP(&ooAe_Sr9>u4Lzpb3Fhz=GsuagG={%-O3u%`&GecU;OsR=k zQU$Z6am!d8M zm!i2rTErnKgByLhH%SHDESi;ILH4?NT0hNJ-o&6>ygn&fQW8M|`>W zNW-~T%HgO~!+p{^?w20q0V$XVrDz_K8aXBf@UWD~aVd^Rq)__};@#7v@|cvx37;O9 z5_m#tv)^6b8l^lXrSY^B$TLzZ&r0z;CnekO9B|6H>(+*<>Un-W!SEh}YTgTVZD87+q z@vXF!@1!|=Z*r!Wzz?QP#wvc)d{g+zVgtHhz1W|9`o*-|YLs!x)X8$^@T+oD`OWmD z^=aDqT}t2&S1adOAEbgmEp~~igukRV{x&)D$l)I;l7A&ThH7%AHwWBp1Y191tMzu; zm}dQ#WZY7)ji`9bCE_oo(Mh@Wbe0~Vi_}C{Q-Mu+GTlthJW}aydco$=LJxOaxigO@ zdRpvr%bh?klM^eYx5;JiQ3QQdBbmNV%+^ai{Y=gh)zjbPOu3K&rlnSQ3j%TPFP4Rh4P%isn8nH+!V<-b zSgKeY%cNYEyFKSS%i(5)%7wC0if5IS$7(5%HBtm?rC8QUyI3!cV1tyxMrkgmX}%sd zNjYqm^4TJ#vQ;(W+2)(dcI8I0L$PXhDi+2r-&9UlZVYEg`J5>YW4APlJyI-Z`Et*e z0&UOZ-8S~HS4!esDVFo36wa55xIl`xU%cMB@mwU;u}=!&VyVpbX-ys5kef@T5-#(_ zE_XNe^u9A!xV_&R;an*Na+SNu`LS*9+kVAjZ3Ns~E`= zb<#i^fA_`)aDx=fA$QApz3lVm=0;PW4&5Y8cHJ!P;TF??j@(;KXLaZ{cMHz%+7J#) zq15+<+YbC}HCA%c-5NE; za!2usyDe&(X(_Lo+D-AiW_91R`8M*p6vg-Ml(U?5P-)}OUKR19G@hTN8~EAn=_nV< zFH!-glpD*hQUbpzH-g_4+ruBy+59Or@R!Oh;cvw}{3B)YuN2RJZqJ2I`(}clw*^~W zE@=U7DHlKKxXWJ}?&>7Xbaj@>U0tOA=<0WK#}c*EO|c)kc9()(J)}?RDb>4r`FY#Y zKySse=_6IrS8Ao76i35_`aimFuNS89m@N4N6Z(DmdnSO1(VohmenIcUWlkL|wXpn8sCCv6~ zb8WFDPhgIpcih`!y#%k)nCFYlmxi!Fs$-#)&mt+F#Zm%Gq(tjcd8Zf4GGFd;KhL?& z8joOwl*3Bp*0M^fV6~La8Y!Q(QUL3uGS*Au*&sEsQJTnUQY@RKX>69-*&-#dRVrbd zRLgcLi5*fHJEeGbNwJ(R*|(%r$eB_$yQNh2NP}%J=v^<#oGm4Bj+D(_DT{NZK+cmQ zIp0_A0;z-xeYqD&3G9<{xLC@yk!0_D!?{!%!e!DxE|+?8g|FO|QW{rD>Fk#>xmwEM zfE2|+sfcT&P_C6ixK4`WdTA;*_{trUhH#_Qz)eyXH%mjgMatz?X&|@xVuz(BZkK|& zL-KH^l*(OFBzODD9r5!X;Rxm)#j?29mwQx7=RPT!`=v}CP~8+BR4kH*q~08p`tz_< z$#K;v;Ss--j$=|$9p%leOQZXM(g?u7K@Tp|K<9)HurT%;&Mf0T;%U4noUrWRI zMq0$TQVZYta=(}2_(4kHN3Go|ep1ZC&r%M*C^wZ;(inbKZa%+hO=j`CVp04d#qg(O zzvHET{4KfpN2=yuDUtuAR`B=k1xDbKig8ON_({3=OXYNuqUkIRp^KDFSE+z*QZn78 zNP0+t^psNQC8g6_%A}8fo1ItT^v(MEPjg*seaB?_nVzxjFU78jYIixE^cDt~f=%fR zwA`cCb4z8A>3&lc0j3kCJOZtpb3|+uK^AkmLCyBtcbe-DQ$Gfq2H8BS7-HpCTa6$> zEapr(o=_>BFw@&Ml?>~FPwQx(hX~V~R=1u=t7{ePSX-3o3j1Ddww|W9+#q61RrZV7 zj=q}e?E5#EI8&gNOCsKCylVZ)6cQ}vOfQc_{~50J)|M=iRIZw2E9Xqbj$SGjVLktL zSCiF#s;STe1~+ZL-NOXbSRHob1SIpkRG6_(qdT;-;b zXF6uF6!Io zvPeo`vAvgTtyOmPRr1*R#H}vpNNWkpq*pi)?xu zEf&N+i#cmDo{LRs7Hj4blk@vGjZ39EE|cQ9+~oXrj^qlHGo}#0m8MABB8s@mRAh=` zzscEC2XnQx%DZ1XU>aq)A5%+-r?V7C7b%IZQV!iZdCqp`kwJGUi5|+Wrl*uj zFDZuJQYd|V>MLc@PqNVpDT)D73NFxZ85((<$Jrm1g=XiLRd>(_PY=%go zgh-PK?c~|(EJr?JiVY=PN+Uu_BvMKzO0pvjQYkS~3bE2u;-t~UOKBuX*(6HEBuTL( zOLlIXFP0)jkSbN#Su5UU>qoj&M1~Yjrj$UIlt{KzOO7;_Tq%z{U%7m#HwDrF3Z+7d zq)>{bR7#{IN~I#oq;kroC@Q1@R7#;#Nf}g2>C{NM)JlcaNe$FXQ8f58REnTc3S^j1 zO;Q2FrF=$66KIwa>^vs#IvvPJDVSH6_-u9)lNb;~)Dqx8e$xR<49+-Sqfu|l*U#mj%`vr+ofD~Xb+OiPTziVms;P> z>59d2hHBJvrj*ET$-^Gin8aC9E@vyZjB}*1?A7$#oU2#_=SiuYFJ*9nl+J}x78gnJ z?2{6?Sh609l*^^kG%l0cxm>ayiZq2QrBtqxJnWZBxmsGv0WDDo2NlcW8mXFVr3$W- z;<;W5V;Z`Y=+oT8%OWE8m1#*W}V8^EaSNAR{ zh`Xg!j!0?T1sfhcfP#*Bb9+VP!NXp=t6wSjv9hX9RL@MCXPM&?v z_8h`viVfj}6v5+CZ=O)OWS&&4n5U$bJS|1rQ6KN}#_+6ar16|$nLIBI=LIR57o`GT zlA3v0n!`z{g;%5uUX|=j6RDZkr3T)Rj_{_G##_=7-j-^3M_S0cQX%h2{dixpGfku@ zK9uVCNXp`4DVI;AXg-xZd?rojbE%mxq~Uxi_2nxmn6ITAzL8S+RvN~4(onvaiuggw z<3}lkpCmiKL0Zi(QU#}^XnvIv`Ay2styl>$QaQ0w6>(Au@zQP*q;wLcB$A{FBumYBB)b+t zN+DIMCQV8uU9zK9QaqW`BVvg-q+=@d&vlt{Uh zO6inI$&^dMR7gcsYW-zVrI_{qq%vxxd}<{Rby5)ZQZ5Zr7DJ_28l?(`NgkS{!3^*0 z?R^C?La`v4rD|HFR7Og5jPkwHqkZMZNXd+q@);*ZFy2?LwR5|Ff$iHXnPBo>DbQwe zMpAQ`Xu86F*QPSbwAx~`nA~}q|3QmIFvYanVr5LV+;2@KOtV-&`?3gRy2%+`_0TQ_ zGD9k4rqyu9ce9vfu~9a?YG!wC@7rLpV&+(`(*iq>$K;I1Mlw&zWxi>-mCLic1x)K` zb2STaxz{tb=IQD>se}rUt9Kf{mu*9c?+y z$~jAv$R@QVk zPK{K~knBuj%YDXT*8ej-XDVe+=XO3e*$9=zoN?+5&bAs(`)WAH^r$J8y(VYb@;KMD z#n#Y5&NF$plJk{Y!3Cx}EjO79ZF(n7^;~4Jk4=f}Gkt0r!Nn$L9>eX-4zEUXsmXa~ zhH{zJb#g~>xoL}y_GEH}<-TvK;7XN?J|2lKOJ9l+Gw~0HFHd| z5i02*$0ZMsNP~G)s^+oIZCy)ji9DR>+~z;UG>OMe&e&lEPnevwX6Nyk)?03!omtuH z?~DlN@U%3HXDqkIR?Pc6Yq2`h^LD*TTiBhZp*(N7i#zm!#iFSsMcY0%a%jAq9kLPWvly{^lyep05 zJ!vBEo1C`H4xrL9VVm^_k@~O!gi7wzXlQY7c%I8uHUr1?uDP{1L zl+D+Y9ifzl@~z1kv991d$;0=`P2&ftfFC70(^yKi^IN?wNZ}VLi&IiMze+9qCdKf( z6mMsZdh3?(r`+jt}M$saP6OQZ~_2H8D~xu~H3jCTGmNo_MK=1Sy0>DV-!KoMb5ukFQ*c zWc@#B7HLu~>5^R^Dy5StHIXGnlkF>)Bejw%)srWUAz#Y0yY{sDJJ-pzP^ef6MN%2X zQZ6M@Bc-NqZF%QXrdT57QaKe;DwR?`RgzsFD&a@h5u78_twZeoPV`38%pS>+mOF*)yFA=MiZ- zk4k%aOd7`tDUQdb@jM~b@}yM7Q&J&MOH+A9isjiZo=ct2+d`h};{Dbg#q-jGydZUV zy(m4yOVUYRmfq*2^c$~8F4wEl5nhwt<8|pi-jJ^0O{tu>q)OhFDtJd)!MoBi-jgQt zzO;k&mS!K9LIeRO-WLQZk=QHhv&A@TJtmS5iA)OJ#f`&E{LFhVP`Q zd@tqkgEXBVeX*aUG=7%s3>RrGr=&1`l@eXQNk#lFmGXx)j6bDj{*rR}TPoxqDTjZh zhxt!Rhpygte?Bg0Gj1srKj~BarEog=)LE*ei&R5bsex|N47y7z=^<^Sr({zS0!>Nr|rh(o_aWtqhdLFh~j{KpIY<6zvLfwnWcL@4W)mi@ zC0t4*LYhaUR78|?4$;yR#7GsyO69~!ONf__lOW}jD2*XWnoP2^6pv(QxJbFAN_K{e zbT#QxBNF16iN#zlA0)%hEd`xS1N^4CT*u& zYNSGHqEfQ^3rHbUOVg;4Mo}wGp-!r#UTUDBYtvw74;RKz#ro1H6);Q+ph=2gxD?C? zUv9HBkQQILkv@%*hA>(RVT|Nqtdzqz$;J<)Dq4N92~s?5QaTf*d?xwIO_q|FA`ND$ zl)^NrfazUZHs5aFh^e$o&oiTI8)NJ{FN2w;N!CltV3ui=t)doYcb#ZwVPjYC&#~C6 z*5_(qu2jQ3X*BbtsVwkmp){LC(lQoHD_A1UXQ?!uWzt@jOXFA}#j#Qv&nl^w)skJQ zEETd=n##Jat#(w~8DUv3jbVdS%SO{>_PxJ~(@cI=E{H>XceNZCATI%QMI=PAYZ^0YLA zXQULKl_v9?RKoKnXL*BoL9t5ub@TR-3g|D5XMis@u$x_7V>RrW^lqN520K4-l~1ds z4Aw}iSljIzm0KrW!g{HM4JtQ`jfxd>nv~Bbsg})B5nH4>wn}AelS$~jZ2V7D)}r$gA?M$VENI9swKg;FVdJLIx+)y|dLIZwGs zoG;mtLMev}rD!gaLfI#kbFq}cCDJ}Fm5RAcs^@a4nk%FYTq&h+l~log$wrDK8!7VX zpfr?gq*Shz26CO`;d-fp8>CVWb;xDoe>W;t#!ZqPDU|G7cIgaml|r~pis7(S#qClu zcSv?kx@03oQUiBO`5cj2xkswvUa6j=QW^J2Hc}+j@qkp$gVHG;lCn6~&9lSVUg~+c zn`ft^^&D4h9FItiJSr{Vv2LDSPPti}kZ#~{>2jV>xm=#?<~hS#?kUBxdAghDOeYr4 zGg3CsN-;dA7VP19-+W(?ig{7`fS05kUY7DXsi{29D~gTgRjG{Elv}{-ip}5+#TM|U z)0CoxulMUJoB?%47IQv06S>xgC6>*i1f^3i(VL z&*v(a%@Z{J`A(X}_fjrDNHP2b8%2GZ4 zQYM|GSUOAN=^_=%)A)zW#? zNJ-R6mr*BKPhT2KgVe}SX%UUmWQIxcG)c`2_i2PwL9^6Ai!_IkQZl2Y(TtXI7$b!+ zR?20Zl+Jj`uK1TcOproolS-H0v`N?}Zs(wOecZI|Mi;VU;&%3+pN zz--Bm6iO+~m6Dk!l`vn*W`UH&LaCTVQY?#oxl5#EmP!pQlNwnr*G>PrfQg%pl*eRv3 zOUmJN$@+g%5obz6UAv`l_DEAWOIpX-Qcup2;@KQg7 z?2{IAu~fw+lAX)$D|eY>{XZ#-E2I%zDP?e#G?x8RC09$09FPh*D6QujDb(*;DTnK% za;}$#af4LLA<6DZB+cL^sg9ea;oQ>Qdv;(Ww<;FIZBjLdr8Br)D&h|5M(&i(;x3== zmezAb+Q~iAx!fyF;HY#R_xWP?`}BZhM+&7uJS3%aOq$QbQWnRh@jN0e=25AT$GUre zJq_c8VkJB-#qflb!jn=BPe~JaS{leRQU%XSwLB-~^1L*j7bLqUkyOb`QZg_5a!*Q$ zyds73sx*<;q;y`FB6vf}cYnVgae_*J&z$jV@9pU8P*QNeOh9GUy?t(NoH!mt^-fkjm*J z71LL;BZble`b&!$Af3-ZX$6C%D+rKmq*t0ikW@f$4^PK$_GAW2i42h%2vIo?p;7~3 zzNv&uQ;Cogh?GKz>f!yY<`Jz}7BNyJu~IB?(qQ7Ha1x{>5+x5wQZC7o-BDJ`Aw`NK zRjMORvU?IqlgN<9lPQ&vCD}bGr9^V1IC7<42@DD!=zC(Nv#Z*@)#k7 z(JV#NB4smDN@0{#%xLLm#z^6em4X;2B{E(rrBxck1gV5Jsfmfw)l8DIm@F+|id4x| zX$jM$CZW8p-ZuAtkU*vMU9oEH+3vY?R_SP5PQmQYxFJ z(QJ_l*(#N@P5XmFw)gP%GP2kq-N8;Nk6lt6r%NL_Lu%$s$?o_h?P8Bq!dX%=XZy;X zBc-rcYT{fegY(qB>o~uMcMPbO-J#%)qIhowkvm(sXHD&|gUJa*S6>v&d5;yLLKo|mTZf|S9FlHJQfx{jA68~>9cctsk?tI~X4lk7?XsgXCNO5XJ8 zEvcHfrA53W&F5Vyh4-Wdye}p5fpjAuN;~*STFJ-KX?!AG#i!CGd?xMVbLk$wkT&wA zl+IU@9qE@Yjjm`}j$+`&voY^NVC>ib*B>Dox}!se<37 zX#S8q{3%89msHK)zHmH6g9w%~87$>6M7od=$;SVrEW#waQb1}VLMkFsT1%AVAzBI{MzZlg$?jz#+4!H7 zOo9|kqEt?jG=O9&3Xf#}RUj3UDlH&QiXdG|A;VWLQ;H%>N+(;&Bu7dmSF$t3B>N`; zse%G2i$bZ4BB_#Msh$!kky5FVGRdwKm1?Pw5~!3KsFKR4mJ+Gy=^e4Ylv?RZ>Xche zy-y92{gZ%HPNP)HFv+eIkVZ3HTFeM3k7g;37AcvLQX-?IQbtR5FAFJ;u~Hi2q}_~{ zVrZ3$nIL7_UFf}^#!MzkcBEfg$z-X3DZbpPQWevrTBiHTwM)g!khU_jr}vw9IGU| zQdG)hjg-z>DV%lESk_CWY>*1sC=KH@DUD5%_5Y-Fwn#SqC)x2|sgdnc6Fa1F?DT1u zRLJR4DQ8ISoGF#FTgqjRl*w7rEY9}D&XJa}SIXsF$?j#*^OlYy-{Ux6v05&WQn*kG z!oyVkn%a?i`^*YaZ}HB_hLI&BZZq48_g|JCAUgt+$N3Wur!3*CA%lNbeub- za_*8!xm#-Fh%|zGqoe)f`_CMj!9KKEG2MU%HwQW^h96Zlsu z<-eZZv9D_TQzEZQaY<#kr9}LsN%;5jelvCP>m+5;SxTUb6hl{O1l^=Ox=ZEskdo;s z*_obFBE6;g^pR5NE9KHp%A&uN%m68afl@t#q@nHrUv8k3Pmpvu!M(i4o!2s0N@j?1 z^9Yez2$c#5ldS(I1rZ@x|4*`il9EOcElnmynnSFVMVypPywpH~G>SxN1W8gc$&&pe zl4So$BNdS<*?mo=Jkq6NGNe*6eX%U5jBIHPIg(u|Dn*hfWson~l>$;Og_2z&ndG5dN~J=wdzncqsq&Spmg1<9%BhtisgovAFO|?Br7*OY=K|*(hf*4S zHHJyiG)ZSOT#8|Y)Jn6|LW?wwk&^wBlr){uQY2%f1jb4;7$>DMUK&rUWOsa*QfZS? zm?*_ENvdYDl*trn4pa3Btznw)^E+L#f5GzAouSw`W=fUJk}{Yrr7}m#Wv*n$e>Igx z=1V~=kQ!JhwXjH9&SGg2OCrQdUT3vr;N#l~l}Xsf{&~-6=$}|D=&7 zuwE))gEW?nQVOR@Gub5VVY750Tcp`+l~ULy?PI%S_p+6O*eTigpA^XHy}aLG9?tOT zOyxGRTd@rG^z!x!N;yl);%upwb9~w>+4+B(N)hKtLpfhs$psy9*%iYVDmIRbq`B;q zTDVv$;Swo=OQjQBChg#IsgWzBBCeDYxJt@qzm&(-QX~hY5)Mk!xkgIhTFJw8(o(LM zlDI+3+sq-Uo*Sh~Zt|^-n^hx^TO|8W8mW}qqy!F2W!x@haEFx3oxa#zQZ;u=tsIet za*x!)y}sB{X*Ksr3EVHG@_;mq2c;liC2`zW?hz@CN2NR-lPWnOZRc?* zgD13Ijpa$j#_*J{#?w*`&-C*C_RHZ}shsDeN}iXBc|mI8MX7|BdZl#S`75566>H|C zw1`)vQM@XR<27k4uS>bSAr0Y8$?jz<+3{cLO5Ty|-)yBjcu&g_$NRn|`asI#Ln(=m zq*y+d>`GBzxlg4yKGW7ToX@2Ld?BslODWp*l{A#Er4+uAlK56C<2z{`-}}n_pt;TF zM`;{CNk{ovTEQ>UFi!dOtF)Ehq-=hda`;0U!Jkqre@O}aEoJbJl*qr*NdA*5?8c(r zex2PZMB0E`D#cHljlVR;-AM|jv*e+RlulQvnr>1!-KAW5NLlohGU+ARy=r;W0M4?nmk+gwgDVdVq-d`9VN)=0> zOd3zQbQ%@XFe;@Ks-%TfOCD;ZSZbvb>ZIw^OLa6zWek;SXp~YJCXJ=3xA%_abqtp( z7@^#uG)og`kw!66Dq)nb+-NC}F;XpKrFo2#>KQNP(<+_C1SyI(=_V#hWlWOVnJg`2 zinNNUQa011(M*?;XqQr$Ar&xFTEr}=nAuVebA09IN{g8%O<}$?ngvo73#CRDNl7f0 zl35~^v$VJOu2*AOCN;5K%3y`c)v;2_XO)!2YN?SmQY&jEyV6G*&Uz`64N?{xr8G{H z=CVoJ#b#+NTO{lMNu_L)^4P9VXbn5GmE^Khtq)PSIHTQrE;b;iQQ5TdnD`s zNj01;HE@n}279H|oU8ekbDm<>|C2JgK&s_Jse+57DE3LKxmZhN$A6_VE|tb{nUuig zQY=?UrCce6ag|ia{@(vPfBb4`BL}1+4oXY8Mrz_(DU0i*$y_hl@n0#ELsA%mY#^4@!+ZY*Qo{;QLAyNxZN$Yr8 z%HSC(lV_z=o|8g(UfRkFQY9}+6L?7~;$>+tC#5Q0k?c+((o|lPDtTR+%NtTYZ%Vnm zC57^~bbxoHYTlLXKMAEw-j}BHfv?<$Qn2eIX&4_%ReU1V^QqLzXHo^9OH=qlvg<#j zG`^Cm`C4k?8_DhzC)xEMQa#^GG5jFemEKZ3KS{a#?2G;4i=C1h_*ELoZ<3AwNp_{T zl+K@0B7aF^`CGCpy`^&gl?wPzvO9(J@qT`za7k^rCF}o5N%%{Jbdv1945S*mNMqiVyRt8Az43wrbNSaN6G>kwgmmn#V zU}+J9rSS}rvI&tY36;VKlkEBrX%Z1q7Ln5FL`juIO9jOE%Ed|*#7TCiVCftZq~#>~ za+9RZBun?Cy-?q_Jd5F7&MdVBNpVZO@ z3Z>-~NfRiR>`rk~3Z+s7Wzts4rD0S^?Nmy$sFL!imex=s6;mtOo#Lbb>ZOr1NW&Q_ zmCz`SWtbF4la#=4DU%V>R+^=3TBKY?N;QmQ2ujiptxJH<&O zXp^QgQJTmkU+!cnmnl*aQ>9d`rk~7Bi$sW=eMbhZN6j=?vyb70i_im?t$e zU#en(WXFG{1{O(kSu7Q^M5<(|G=pVQ8Ox=~tdJU5DFv}gDrdE{gEdk;Yo!USlNPdG zYG;GAnT>tCzi_iTO-f*s**`u?B^;0jb5P3Q8mWqFrFgEBO1NIi=LRW@L(&*- zl#;l~r<Z=O@ro4BtG?K4lKtbORLUEY zUH>5s;Vr3wx1|ldBSrGA6vcZ|2JcImd>~!NhteN>)FI~%m>(;a!zWTPpGsT!Op4}n zDV;Am1Sj}Xu_V5dGWl8x<{K%QZ>5QR*CFSwdfzLylOGhT;77@>|L~RjSz5p^(n?NA zsr)LH@|$FLijxlUhZM`7l3o8HHS@QW!aq_3|4LQ-CnZ2%@7~e+f6_SIQY3zT|95oQ zU$L=tl4|KJ-9s0to~}|n-J}w_OF8tAqUkA(qL)-dZ(ppBWY>R4_D>N~6#b=m2KY2k z%3+W+gaE0EKq;FbX%xXyG=rrEhDZ|#kqQZw<`O1Z|4*tT!ly`S7*WzHqNNIAqyl24 z9O9%};-y>?Bpd&ehLa>!kSy7MS^1PA+4+Bcy`zX3q)GXt`;;LSlPRT>CFPOb_nVG$ zio(h1`@b`_bEO>eq)75rV+I9MD}`!7IYrWViludwNKuqZHvT6yP%hc^ztT7=CA5P`b86!0?R@OMCWSFks$i0oz+@?%DN-?0rR_|U`ZHZ>qFqX5 zhLq1tsfbz9EM`mNnInZVR|;aDG?e*L5euZ-St!kCk<`lK4mo!%S)y17OQlklNhK_o z-eQGR%S!18tE5|4E#wVgwJy$Us6|?@Il*%Tlh|SV^wn&j|m5SLW zdDt#(XNMHdPRXwSm1;O$D&Y)i0%!Wl?UvHmBgJr*G>EgMI?n0qJ&Ih-Ud6_6u0C&e z{f9J~^Oc*<1yURrN~v5VRkBZt;bJMDOQc3Fl`^?Z%I0z@kt@`e8m?5Vfvcos_DeZj zEmd+risPV^$~Dpuu9e>4I;nu`r50|G#&Jk0=0<5GH%STHETwXbWY>R4HQXjG;INdz z?NSwYX#JhWor=|Qm#^I2(teIeW4T9~#l2D`N2NIKlS;W?s^S4@0uM^9JS5rmzfuAZ zOF0~u>Ul&;;!!D?$D}+?NOq@3sgNh6dY+V0cuGt8EKf`K@Qk#OXH{-E&q)(`Ub5rA zQWh^t>Acj}d%jc|FDq8dNhyI>q!M10rtz8-!|PHSZ%E^L(^u{->0JIFbMFBkMY;ZO ze|JN#fsjT`XelHlAqga;Lr8!S2%QjmmEKiEB!CDA2#V#PC~y!FupkyhELgD|6?<0@ z#V%mS#(O=xcS1ROe&_%G-p}X#Pjc;j&HX&j%wui$MRCbPDyQ*#v!MrJVm~ z)=%G5w)ocUQys6g6J|*|Y2(e(cV_AOKG47SZIXVlT!wzM7Q;M0nGNy$Y?i2B%=&pw znGM#jW-0p3EL*>ut=AuBcj`~GB|2@^g&Q9F*Z&ITG2{0iX8iuwY>29u-KeT&C8}n| zl^)GTsfHQn|Cwc~mRT>=t}Lx%)#vq(MQUZ%Laogr z)W(eS|IBVtlv%c-&5{&jHbt>!9n{V&S#f4LiZ@GBd$Z0;Fzcm6Gk*V3X&uZ)D%os; zQp~vii&;l?G|NyYvpA)has3ywA?j@Qq%zDFDYJq99Q7<^SuR!CW(mqM`DTaJ&8(rP!0f0B%@WkzY@m9W<*BDxa}}Azsh3%*ip|p0yMcdeCr5oO z*Iy-OUDektO8qKx{ms%fz-+Y!nq_KG1ONIsS9-KutcIAysMIV)L(RC-quG@jR+$@a z7OxRz{QkqNr$(8L(`d7f8e!h`2=~`#jLsy%1&^2aRT5p!BYt7Piomqme zH_O)zW?9~Un8jDwVAj)nquEC9O=j!88_n+X-fXtjdrPHlG8^XIY?h)eX8iucY`C_X zwa~3*{j|+2O}Cj1(RQ=8y4@^CJItDDr`ZMCWtOTt%({8+H0$NP%dD^WZnHkxZ5E+> z%vx)YS(5HGOVE9lcE8U&ToLU7%VlVUFcRdc&-*-ZYEXTV`>3+pMeJG3%mt%`Vb= zW;uG_tUw=_aiwpwD1BtsQisj>{fAkOJ~j){QL}#f#H^1#HA~eovuJ%*S^BwI1AWoJ zf7Qe=eQCL1ePz~7Uz>5IZ?g=2YZk5JX6L;@n z`q_-@znDeqlo{85skGnBdg*twmioi&4*hAy_5aM?_o$(N{V!S`vm|*dEx;^JRm`qe zRkJ%)&8(TKo3&95v%acnHb}M1GF00vOLfe+Q;eUu zaW8u_e*a+>tq`*ug_ zo!XeisI6I7MVWE^KeLvKG3%~avkbK}WB*^J#hXQ`eWfK-TB2D`C7I=` zgIPNzo3&AjS%y;0Qq<8bTAj@J{fAjQrJKd6vss!l%%YTOmZL1QY-O8`R*qSB<(kDQ z&n#VC%(zp88NdHEYol&vDJn2aQlVLlx|?OIhuJXoG;6IQGp_$)#+@R}GSu5FTYbz5 zRZ>~n*Q|&7ndPW|r42C4)Ic+S|7*sb!pxF1#H>K2W?ePZte?uv!ZpmSwT7F;X@uEQ zjcn-O7H_Lj4gKqwJvG`&2Ww2jv0iuIrOPz7;b^bB!&aKcVR>A^yF}x$`?;!jA5FmA zJ*pBk5p#DLYpn~gU0e}-fhIK^=iP$kX)-x?C#j1y1#@?P8m*~}!{uT%tzj!~3~fKr zbQ=oS|HIsPi#5|MTC-^3?yu2Cv&p&f7HCex*4~2*<#Wv?mq&{}nnyi%mocvNZO>zf zF2dZsv4-ekEA6KFR@z6GFr{O8MqK~Ta{YCw)f=pZmMhg|m>bG`Ewa*nDz{Ru|AKj# zLpfT)v^+<>I4#9q^V#Ltr#@STeTi}WmlkerBx*Uf5-ZdS8{P=5#2#U)F4vWoOVKK9 zEzh!(Rx{T|S9f=KTf^|&ZLMN-6{XJFXf37AGPKTe<8`%JTU|rxdgk9;t>>Aarruy( z%TWHn`2C03FkO$`%N$S84YYNyR*L?CC6Mc&4Q{*)WuR`fT#0TX_Z-ulqm2#62DtZ7 zv2JcSwq64BrI&7@UNUXj|7Vt?&1Ny$g1K=d>z~+X45d(8Z5$(XD>){Q(rpYci=m9y zZDy?hVL9YD{!5E}*i7xf+|*CjPV5baH&eT?QgU5&hgp{H#O^1TsJpPeSV!HBxfYq) z&3N5w)Jyk}bI&(Xd$8xIm!W%YN~h~Svs~S8^}6W+a^EnNtF@QhCR()7gV;fAiT0WC z`(Lvj+E0tOXuF9Wc94k7F-WuZN!CRjtpp=?Cgb zhT`VNJUvCJyOZ81Jxys2#`XVrjoc@N-+$Pv#r2=<6UCjv%((uuSzo=tJEt3M`|4i| z&%KuMdJ%JTsI6XNY;F#v>1AvOr7=2WIe!03y#`EGsa_@Lx7WyRVR$X|I=M|)s@`B6 z+prY9N$Dbn7pb=}H}yUBwpm}jgH`2y6{~l#+LUsqFoxnTbDf~~8;+|NLuo&KfF)tc z`jC3>P>=n8m|KQr>M*t!8>Azcd)E!n$7X|c6nmK*`~R?mz80TiuEhu)vwH13pP5b4 z=M9f~k5jr_Uy%C%>!>fWB<4^LeTBLAco%(*#ZWrX`wiya*{uIz0Zb{s|K)kO_im0( zkSiv)KqoQRmOF)EZeDSxFl+|aR6m$S>&J#;YagY>82!Xh-0Q*lf0VvQu28>VeQ1%a zQ@jtpA(y6Knd61z9@cM^y7|odAGu_Cv|oQ>+s!-~lWr3;&Mf$#7`m0({nRhVLm#pfPo}@*(YG5B> zxvGh|CDMhegXO@z4OTsJ?)mmmeamtE=b$6r<&1Z! z8c^!&W=}(`1@+o1$ShWk7>Aqpi3$!H8{pp02?`#kNj zk0!L>`hPZ#T(t=*_4c6cGPSi_M@3<;F)eKrjk#~)^@?FASy;MatzHkcBiDxM>!3Ky zO0))U(o578`-;+T%C~ym)eSq&@Mfz3bKg;UD#YA)Gu0h? zgE?8I9++D?Myn_0K5y|VVjO*$s%Z5h=a#J5D&{ph$vlWu?;!t?Z?69rRO}mGUzM2U zsIS>9^)pLXf3su_Fl(lPW(gYPGY{{?!Io>GA!Z!^HH+3zvlx|`ZPYNcmKtu>MkCB3 zHL@}{%8Wa`_{_u7aZFHgJvTR6Xlzh%b!X`sXEsXXD{X>VktUk;(*)JNt*D1I)p8k{X1Op;4=QfvN+)ZEO;tn9w0d!xWmc@&RvMx?W*s!wtc&Ia z71wb?iPD8;O?8p4)T3ZsZ1v(b-z-g+n2py0vj)1b+p24uvVG{>PoZrT4m<-tTyBKA7*K~$}CoE z&H8DbS%$7QOVu@IW3}Fl>%Uamb!HQEy;+EEFe}kNf{JUqXW3F4EH_s-nnmg+vutfN zi`UJSxm(O~w8<<}o6QQf#jKhBS!r9%f^@4{q_&y0(QRfa+HMx1+s!W4j>_Clvn=g0 z>!v%*xNDDDn(ne!FhX~mrE9lYuI@1_*B)yTuY1jkb)VS(5$QXBeVB)*z8&zsjT<0 zS*DH#6$iL?d!9bAT(mwlOVKg2D1BzuM4y{A*B548^`$L`+UYCHHPzQY0VBzFA`hnuVx=S&*_%`8LdX8G!D^R$;TEZ0_?yEZ6BU}JtGii>dYIK$PczQ{Guy0QX1!HxcAI*e^;aLWe3h7S{MRf^ z{ml6NhgqHmn8jqoBo3+#kv&%KIk$<_? zOry-=G}W({<& zSufpZ^#XOjS-KuD3(;P)UV6|hQ~S)?=%Gf%Zf7mqTl*~+rH32&*R!|k5wlI+N6oI% zV`dXP2h7U!xLFrHVV0yP&9d~AS$jQgHd4=+h3lYMJ3VVwSI?Qn>Upy!dcmwv|1uk{ z7tOf-i&?Q=Hskt#mG+8RXT56HM6a2J>UFaeywqzGcSxpII}#W7byhnnmh8 zGw%Op7OW4n~XROGxT%{qC%GfVJ(Z+6o2gIS^XN3+%5 zpUk#-e>S_-`-|Cy-cx21y}z1e>o>DP@9$>u-apK~_55kJ)_dA)j90;K{m-NCJRY;U zUa#3}o&d8zPZcw-x2jp3s+onTx>>Gjm@QXLvoh5(8>8B0169ZDLe(`JqIzaIs&96! z0?l@)f!R(qG>cb|*<3X;8>C>fLWP)RDAbJKf0*(653^2cY}P_e%((xD&!i@5X4X>8 z%_gXYSsO)|wNPYbuBF)!wK9uTYcsFf1pD`^mOzdwZSY!H=6PL53@XNG;5)o z&60JCSx0R$>#ogatpAzi>7Qnq+G^HHx0?0RHnRlXX4XmD&8F&hvzN8Qtc!M<_0TS} z*1E$iU3XU6U1n*z+pJK#&6?>RvnkqR)=Kx9Ezx~u9dy50pdK(w&|b54deF@4*=N>8 z51B=1zu6!?Y{vCp%(!c}S*aeYv;$_n^|)DEJz>^OPnrelDf_&=pr$dd)0Nubc7vU$drq zGuXeC+Cgtwu7ln->#ldqChA?Y9KC1OQ16>@{-0T@J~V5ikIc$+IM{#wTB(khCF2i)eCF^r+G1gjNnC0k8 z>K)+-5!e3;;exY7hELW!(at?M} zzhd36uk>4RS+hc{P`_K-rTW8ckp2uVt(C!$JL+`sFmDm{Ql*eGZ!wl4Pe^H#cuG&o z8&cN%5oXM31%#A(J5jnzRmj~;Zl9`BuO0PvsT#TczFc*N(v938)xi4DqNi$_-K1J( z$*LVv<~z0%r#jeR>XoQ2rQ3Zi>R~T1|G57f){OZ#L4jBqrJdCP`L4 zsu}11nRQSnYzptrD5Y7B`+t~atFu{KWnj~o!NJ4c;#T#tGn|faxIss zJhPtaf<4LD2CFN!mrq`*@~t#p-OP$qU>2o9vtV^M8?GK^8R}`q{ol-bdU~0)P_bDX z^=3Yw;#J`MKXUUK?=Y2++siX5R9|wB@Sg0Wey)@p_y5KAkz1hw*m7)+24V{+?W#d$ zGd0+Z>;IYY`wz2i8j87N^dnTp*p||^vxd1+rl7ZmV^8zyXJ`aApVzmyMq+aqTQ`m3 z)3}xv*&0pmYHX~=FqG@C7>&g?VKX!iy8#Q-_>i)y8?Zr|5K<=huIr(Rm|Ny_)CCNs zKVxgDN!S+Zas6lPI!Y5Y1=~fgpQiG8tI0HnYZ|#GOx4Yrjv<_vF#~glf|E4Utea+; zeXQBE9n92Ut2s=STiQ0)Tx;7{^RT|;xc@JuEf@;-{~#BMP1VKZT47x@KcuWlBy*@# zmr&YDZW2eFy2mRTdgGLz71yUN^&Ey46QQb{69)tF}z(` z!_sUoE%xaua#7^^X)X0~$hFiuGw%Ok_L{E2(iz?Yt*6B+OhG?gi&bZCNS zx}Nt&8{ar?U>y5=*fOl+_hWM!hTb0J*_T!F=ttT#+6$%hNuF;_4OYAeyj(%U3%Dz21uNLXxO=EZ0}>R_5L_%hUU2$@;*I>;F~SM`oRM*eqX1%+mF- z*#I3i>!wf4rt4F)d>u0zsL#w;|1*0-Uzjb^mu3a}DrAFi>iPYL<MRzp^}S z!)I`keq-J*V0gXtyBX_$nETdvNq=JVu)&_w*itN()3R9qW3k8mKSE2r|Kjz%R$lBK zp9N6rJ{99t#q3g5#aj42@ztm|nB`DiPjze-!&|Hxl&-`sQcbh%sueoSyNX=AYKN9B zS%VEx9ZC_FQgtzRPIOn*3oTu8HFkmOhn6+pgw0hT=GOHB)qr|8VY}53b5oV7AWFBA zi&i6S8#Y|Q*jrc+g@l$i-;Uj`P;3`=xx%nJeYtS#ZeOl(Xj%Qou)b=-^LQNN{vS5p zbTzZFas1bY++Quo-9Wu9im-Z(6p3viH&`th@&;^#T9JDh>!sG#wy)Y?`*`LZ)Rua$ zQOfmSth9ro%@P!2)PnOAj~I$)=;Y$aoEPPS7DMpL#rQW=W-PUilb>{>%Um8mAaUGQf%Tzn zZw*{rLk&~_T7 z#hPl?U(?JIG##5kDZl@q#T<<5|5@7+nq?NF*=D(#!!utI#N(1GyM2H{<$$)}pUgVvkXJfv#jeKaG`Y74=pz z-T_)o>3H4;L$$`Ni>`7lcvVxi7IRb8TkFi`>T2v}+UDt+(6T0Lc<<(GJ$4h;N7sgy zHYwv{(OK7#Tge=ns_U^BhQj^d81HYC=IS2|r7Gj?t_`jQr3JbXt4=OcH<^vlMr6ZA>TOnGq2oMU>iL^E|u{v)?VyRN?HG- z)NS{)(mt#KHcAh%jP<`-`>7XBu7e)N+_z+!9>Ls@U-dppi$a!lUA>RFQpQoF1LXQs z8mGsx3|esiH%hax3_Zzrz>_rYt*6Mj-uOrGfgG76WM;qode$a{@1LFJ~xZh7uXI;XX{JsJ=)%*uQ0c~&(YUr+4{!512Xk3LoQ~> zMLJF?!tuNl*eeVrLMO31ePjEMdRutrVfvn&n``~`1LpP`B5CWVIwWjq$EiD`&pf zqjgwm)8o`@t~TV{)EBBP=H_XhqA>SrwN^AkDWx=7G2~icQx%K3X)abfvlWUnyGHTY zB*t5)_Skx?l@hSJzHuaCQ^*Zc60h`dEL|PQP4m@DHY-+&Gp68DrH1+6S3T6xTJ%$= zuwfyKXz`5FC|!yTPfijCx^rV%+}&dyW?U)f;=kXMMu_M-@h^#J&2|8>haMQph)UKWs0#Z1p!Q z)&Oi5xnd2(9>+RpP*_>S(^zW_=Dqe9Hb_G#y^hzsyGpTx(v`ShG1AXEs*jD{TVihB8(D=ZXzffbcq@J|0rF_=dF(}#oTs9niiUk)MeC5ro~b%!kY0aB&gghPm8JNjvDpX5_0nx z+hi>z*NS=*bUC?un9>1ShFwJIFkL}u45d+8jiB(MrsHeh27EER|fQR$(WZ zFKxBjEKh5&bV_^ZDzlDS%Q!N~jng`EZk~45)n<{p26IDcq4j3Ty4Ea5*J0~v(NouB z)fq~LZZPYqf7tRqPaDie>PE9--GohK$Whv8InMtx>!4fAT5FS8du=vrqAl1Y+Q#Ug zmgD+=mGy2lE6_Hxak|Z{o3=B3rA+C?x*Z#j_0bM&B37WCW<}bCxw$b&cVPGODzN`A z%)f`AqwcbLLv=USj`u;1c4Jc*%3|HaJay~dhJ)5B&x^az$iy?J^RbNgCy z^cX`P%aDib0JebWh*PI?k^&o@_3VXG+JsHelqd}mKwrDx10=^&+_ zQ87u+I%CNF^c=(6K#Ohb$uv*CITb7O0x*Rid<9zFF2ZL2f&QF_x}-z2?d#`XWGm&ZKF(mQ60^{#!J zCh0vh_Wv=IM;O~weSkggvk$Q+u@U-+w)Zf$6*_D-Lr20&^$Mlq^f9>)u%a(!2R>@eIKF9Vl_4)dOC6_z0GE`sE;zvqz^%eF#R;aJB z->|{@Cd~f}ilh40a!YiaA-nbLjyi#P7+#v^B&7jNbA-Ob+`DdqzQ=+oE!7X0TNiDu zAF)t!-SiV}Q!%dpf+b-&`Xy|%HyOJ?r>N)FJQnF!EQ?%*e#3IGWc`kH!r1?Z3l)8OdT>m+ItcFwilp0a$wh|U97@LG$sSu`RKDJh&*bFRRVb~NbN8t=*0hXr5 z;r{QaiE3h2s-_Hur_1NE8Frd_7pb|~JhiaWEJc`gRAjjS+<*(zlH4p>T&h-#Z7DWR ztczeIJy+}z;)AJ|vCs>w>nexx)> zox_KDyZM%D8BD2%*O&c&SO8X}EbMU_w@@}MHZiu&$|1J}Rd;M3)>b_z-S4xWn43e#mZ&#&5Gzt2>bV@h|1c|2U#nNBe&pVww7L3|Ta2~R0JHHL7+%`y17F)g^hZ^`wZjy&)8VzT84&WUyzH{2<#iIvqoaau@)Ld z+ml$dMw`WG4CdBkGBlQY7g0J%<9KJgrP(r#CpU-O3QfS~VRJN*7F#Jz)CHJ3M&R{K z!rWZjugTP_&+FSvQ+OT?u%?of;kpEChXrW?rSVu(U5X`O zy|s|%(F`lnWsD;U%hV#XT$S6id{m1WN;0MGwS=~vu!dS{Qn8w>&8oNgs$UrzJcj$r|az#e}QhGULU5QvHn55 z{#cYYU;}-2WB6Ea8P-ELk)tWkW1|_ze`!0M((bwi8;MQOCQ98qY?U@+c<7PDym z(=1+F!%IUZQrb+n()MmzjL1NzH&@53i&C)f?tU$BPxNiX+ z0}%@c@wCN9`LDaczfLdorI(#oxH_vcab53v{;ndP-FI`0b+^Wh7%^zjpa~PEPrp-H zS+04#ib2#nGcZ#7L;PK}kZS+SPO9l21p{nW**#HdPF~=uMVGvHi9SyCcdmhN{N3nm zN=Uiu7zKL%@Azwf-jt-$-x+ew%bz>71y;FT|e{Xwk=E!;NztFrHGiGQ;QfBdtA!E@%sOZ9#StrwV zVbD5JlO_dCqU-D)d|eL}`@7iU;`)Be;N~1*?u}5<{dxNIY3Jp{de!(i)u_vVUDd5y z%S&ICz$*OIHmnNYMiDL5nu1zw>(;GXrw$KwSxi-SXL-5%Jj?e`S?upU!AR&-5$lo; z32)k>Rv@2HFK2_*sdL8MzN-LD@>EkmP}87*=%ApWfS`6fp4|lo1qS%~1N`AkSzr)X zhB>WKq>W%5D`-4;K8Z~a@D`{MvP%-_!vPO+V8#gYjs7DO-HEvW! zgAuM~4Xd6zd;Hvav!~Cg>>q#e%o(%ip6Q=Cd(wrY=Z;reWSY|cwo7rd@O*sp&&Jc- zVuU}ICs==?^~ayrpJx5JR^H6|Bdp&oOWm2Wag6k0e@>sXaIJ;scB#+$ioc8ShjJI( z^i?c_Iia&b|NO4&S&6P)&+a>`;BU2S*AZpI z<_?P(9^1-d^rUI-CAxId^s!UtjT;|RHlk(b@Yv`w9?|UKv1L5OXhc~}8u{xwZ5KUf z?&!Ib#`@ARIniY!TzQPIH-q&j*D;Sg9`}vvar4P_?)%DhZr=G{PxtF9c`|)j9%|FM zhYInzuZLVk(pBGRY% zxA))r&nd2>kTT9hJg=*wD!zZ;_dM>t?s+_`^Cu|xm06!JoL}>-Mt`rYP0+DQ;X-A1 zrtgeZ7P{*Gt|GERlIi;kXZbPu0SF!vUJ!j7N zX=A3&ujrpSd&bQ1v**rN?Z(woi~P0SXH-SCYSkK7tJajD=xF}wI^0r&*X-@p_{x}B1oA|KzKmDzybhWoR^*-@@;?c-qEE_l4-xN=U z{Z%|u&tE#-`&YFpa-7`|m{WAj_w<{3+#6yOXTMi;U#W=a=5SY*toVDT(ke1%$Cd$S z_xatOy>fsL?8N~W2gnmp%~7Sn|2G{sUT~AgLxTo9+CW|O_npaAjL%i3)T;QS8^T|7 zf1N$2A{YK}of?_=`YIn7E@l2ge?=?u75Lg!)~y%u;w@Mw(>t?L!(4p2f zlJEZ-!Zq0cqPr$YHLLvH-_@R4Rqg}#&FAW{kz#f0_^ghq`tlW7f2?1bi>tJXww*n_ zEceY>W#_j4>%L#8Kl#6GGyA`+cm9~tyk|0(d7gRJeUI^F@4oN-)10MGMRYZRu zSEpo2x;@~S@2?{I`=n!TwJ4xEyLX$peT?i|PT)I>E9tPGrv-a5;@Qj7jeVU3?43zw zPgor1dM)M%|6g_+&&%$T?)Q9kPFIhdj9YF%dsiy&(SCUlwC~Z3O1f=_%8ap zY2AEQiL3i}f4k2sd&T-c@J0VzJG*V9KIQ6_O(x^0Y$l;%77 z_*ZrQo7}zUR7&-o)pSlu?mYL3?vDzwSl9QRe_ryu-um=6W}g1H-Cxy=^2GkdnB1>I z{!^FF^*jgtrwV^n=4;>{Dx}g~Zq#2j`G1x34aq%J$SB16(b=5$InA5T-%rZtyO-mh zzQ^s}J;Z!(2mUJee7R!wIZu7z;-`_tzTW@%aAig5hWc)EL_wVb0s@9fb?V6X=Z|&H z>L{Y~Kb}dS-|w%(kNM8->V>#nVQjq=vh|q<|J~o>f2()7C!=Dz&x&;voT13b$Y409 z>vvAynVu$2oB8{#&LCf(*Pn}w9KHmtK*|RK0pNY=`AJp>;)HR7hdt8qHPX z|E!B*b>pn(7Q>Rbp}6UN1nCGQ3Fz?pJo=LK`2MNoYgyU8QqCISf6`XILN_MsNJMa` z#X6>}byAX&&UC$!`X#M&y|GF4`E&m~)OW%6=vPXUsOB2eRkrsv^rg;zsP6{&7e#}T zRyIm%bXN05^qd(O?i+d1==u>s9o-!IAG>7cT66cSVArvv4GXAvZU47XZf~0Jfn9sN zlUzq|8CT-#@ne#XSMHA{aN)3h!5cDx1x&-c3p=&dWXVjxCC|@ z_Y)lt83%}O!rSl@`~r{j4e$)S0*B!f_}ciE=$Og)=E8+=5iEd(u*kTA=vV<)!fN9x z;uhEnKY}|8b}?HQS3&~&Slqc*gTS3V_!iue)nDOva7X?ffgHthGz52C(VfS$4el|XCw>V(z>n~&;dTrMLt~>g(GhLr z5{JMjm<%&vIjn@MU^6@iRXFC84p~MC(J=%@!z?I=Ww097!a7(Fx5G}@4SS3Si2L9X z<0YcwefR)Af+KJOPQvd-)mrQmhWZc)K@bhGkO&#fo*TDw35jGmP5I4bAxD{@Pop2ZIgF{AD7NV@bs2&7DLx_f0 zNQW$A2yrM3H%1XhL%FexxEij4b#M*b4m;sqxF7bxBgO~B58*Hzfs^nZRN8EH1NXvyc*HnBd~@j*k)`e?uI?aBSgmmcnY2|-Y0$pzO@I|AabD)dcagz0jq(Z3vhpRjzJqG#DOpr zhQnxMJaGa{hnX-B7QX5!Ou(0GyPI0V1K@6er9r=Cy@B`^qv7(Zg>tjK%@|efB~@GsLEz)GIWDpuoO1KU2r!%1fK%C+SLk*p$`m& zg-{MFU>!UJkHBN_1$+(P!uRka`~<%m?6A;d?qC{w*@HRVHw>CV1ayP$Pz)t77)p&Y zqGPx*lDH7c;XZf(9)?GaCx}nMGw?4s1h2uH@E#n7ui-oR0e*ttVOE5nMMTFs*a+T8 zKh=qjS`Y-mMi{Xfv^Lrj9nnTCF?2f9HI=xvN5t^jt`=o9z~zJ>4LdtgIC?x;1%8F!;WSibOD6$X$=6k|86JZZa2nWMrN@eiy;-i@%J|_A&^DHT z77z>VAQz^?BDfhg!*K}RPCaM|D_|8o4~O6tc+Gf&_zrvroTQ)ts0!7g7Sw^3kPCBQ z3ET_c!O!rU@dxowsI`OF0}`PCM!}_UIqZR#jl;xG;20c-U*S)1D~!>Q3tb=|ieMC6 z2$vX_5$}e3jQfcXz&>~c4j4}m9q+*ha2URX@8AdU?2@WMO{fPAAOs?yEkr|mNQ9v< z3?{)$_!x8t-z#tNK7wc91^5kgisjF*)Pr|GzcC&Nf>3B?v>--9EOdl4$b@VomzW1# zAs-6h9(WCg|IQhqkj3t+5?Bk@z(F_xr(o8fe&!Gz7a8-3js?a-qGJ*K4w0w%-iACF z3KQUF*bI-t6L1iohnL}1cnjWz_klUf7MF)DLl662J)BqQ;RvON?L80YY)|&p z9?q8Za89^K4+zb0)KRgVo=26G$Og{{S zQ7{q8VHqrk-LMB90uG$2D%3D)5mTWPq(P>ULmUXB;6f;eTi`H!2S31%pqkv(1R@|E z%3&`&3Xg%@Jy;+Rf*=?|AskX+6kH1Bun`WxL3j>60oA4+EuI^9{ zd*A?k2VB%at)QGc$?SmLa1Zc9H#LPQBazq*X2N3F4Nt*g_z406nKv*CX2U#VDe-PN z1fN4d1Kw|tZxj+8B`_NkTEw zdg3NH3`gLop_v~2yqx+gSl+EBuHm`EiMu`e9)5)1;1Bo{wA-VW_b^=O2jgHO%!J?e zQ2sFU1ZKjeP!20#9c%!-?$H1k1RLQN*aZJHwh?cG9dHNS1^ZyX@d)urcp9p{;Zb#{ z1$Chjgg{eh4(X5q*^mqQP+;^VmOww)_9mY_H~>$;yYN2z3co|6w-_#@Ku72eB`_Uk z!V0(&Zik)l1Uv;VfIebcKu0`^gHDhIy^VDrdvqrpfUn^2XC57c&*3Zh2EK(8@F(bV zh7J0H*B^=ueao^7szM8hgqG0Sh$1>-paY~pCrCFkh>k8sA<@ypC?YzFp%3(helWlo zOmvhQWkkntVJ75>=2X`jm zN4K$~Z@X7%+=Me1nmy=M1Z;W9tKD$;E6RT%|0_`e+?OqYUmUO<0V9pQLs&c;zbJxjmJPuF5Q}8q#Gyr<@ zX)p`s!V*{mYvE=X&OWS>FbQTs!%+5NKnF;He$XFg!yLE|UV{&e!^AJ3Hv6FJLVXB_ z9YzHEoZ3MmWIzG*fkW^vgtGss2@Hf0FbXb(MX(aq!3Nk3kHBN_5^zd|sz3_lLkX0^ z6qp0$umrZiov<7B!7=zATC@MD6XZeA{6KX445#2gGd_oq6TvWG zfN_ZUJ{&PVC7yueaqKIGMe%+P6HkLXjFtc!;6{kJjrR%6Fy;~$!Y!}~PQXd<-tMOw z(NWuIMT~+tqnx-H?t`b{5WEA2;Ut`fnmgF$hj=K2{!k7};a)fZhu|If)Hp#r1u;9> zeuu6`FJdv2Kq)MMg|OS$L%a|6!h`TAJO+;&UlEVPkMI*5-Nkncd=1~g2{;AKfAb@@ z+3Q0DBtSBxKr!@zK(@)lA;-9dxC#F7ux<#4y?!oaoB0yhT!rst@YG_x1gb(cr~~yO z4LU=P@hR~bv}Ri`3KAh5cGY7&5DMzE&ILA#{f#D2Bnt7~)u%08?Qm%!SlOOc#_uDU2|76Q6`b@DAJ^?BUAsY@fjacppB1 z#1Phjp#(}{7wm-t@EkM?WxEX$Asyn_ZtDz(;R{G)n=2JMK^kO0Hsl&zh+UxsO5qiF z3*Lpp@QLAJ8>|*2Lpo$YHWV6tiDhslY=DHOyf%;qIZy(na1CsS9q<4ovHjH%Ivbh9 zEXaXwP-v78OW{hm3f95ZupVxJ&9DQ$XB+M(7&Y0WNpJw3gA?!rB(km55lWyG`podt zm$)3(!Zq-Tae{afZhn|;9?;9|i-JTbfbp*|9+(ERVF4_GWv~i%!ky6NRo*}F(pzj> zgWhF2AOohtJmYer<0jYyTi|ZcVLpdY1HzyQL_#ZQ4Q(M764^fK2)WP$?qb`-@im-+ zLu`Y*3HfY)^n?;9g%e!f?UcL38wc2q1B*o;EP~{?)3}@H*aPmk{$6+p9)?HZ0K5rr z!%y%Fu()x%1uKsaReVpLZozUpg6~a;h4zpGxlr+aG76soGhuBc-;b>s9$Xm3v0iY; z2i*5ad3$04--&#ORP3jB-z#oEeGcTp>9?qV%D>+K zhu~2-058EIaNBwAypF%xzVq-B2S8P*IIp4@cjr{}19x7TN*w&%`255sNO%>Z{UhNEJ;ZW+D`+_@Q!wQx18 zhyUsPjHjt{{&wKm=WDogHY&CUE4Bmw+w(R$@=>ffZ=)-I{`TVe=W(39y||S6|6iQP zaX-V^3+{Z5zuW%&j54>~>0yx(0RQQHk7UYI!JYTsqG+oMZxw>|pbp8xT8 z+p8Zk498(O0u|eN^i$8Xz2yu*LIcNyx~Y?O0ui=!Uf`wm60yoG3pfdpeD(c#-+$?z+_ ze=7DDR(wxX>}RW3u2wAPDwabP%Y};1amDAY;&W25U#4RJi`(zw-hX#8YS%sK;k%G{ z8Q;|z#M%)&(AA0X1>d@uvr*PR+Ts|CV=a!e*s!JVR15_Z6U1RB;xJ+;F_zehcquW8 zID*)b*qN9~yo|VVyTAYRpY+gqSO;^{7eH)9tU_!}tV(P{tVV20tU-(>)+ELdYZ2Y} zY7^TL>k#9Jb&2uBdc^j``b0OKjfrkLnh@Q5XiCf^HX~*cn-jB%Er>b92x2ZVgIGq) zB)aFBMI27dCc5WWMqEG~Ms&}6IB_9y1kpX;%8vQ(_D@&rM>!P1`YBXw$5+&AULh6H zTOlm(XwSjS7Oi7a_t51P?Ag;0QcA2YxrvHj_+MUV&mF1k(@^;pVU7b6)2FO%bbRRFbEJ9l0W&s?nml~&Pa zxCZ@2?o8=l*$_{6R?Sj-4H&a()v9?d&ONQ&-={kD8wCbU4r$snrfx!qx^+|QHdR_) ze$%E+3!5&;3kc>4qW{ewpF+M48*uHq$L0RmBB9^!fA?76Y3kLSM@+#y_xt~2$2=lyT7rqJGCyW^?41AdUEc5g zzW+aevH$*nwLYC&yQBbL^Y0J;m7~WSZOUpY8PY$?5P!92ia&ewcmJ(_uhkv=O-PsG z*4wxKc1!=gd+P(fynpL&+^X2uVsz`XTl=?ua_fs*&u{%z{?m2-^w!@b!2e74{O^}} zeM_IN|KsR$+wrco_za%Z^gOb*5f5`uzvu@_kTmJ^TP5#F!GUoWz|NdvVwBNY( zBi1$j>8=0Z)_-;DAKm)DZ~b}e*WdW`jX!+jZ@m#$`#W#^mv8(}Z~T)te(lZQfAc4A zYMsnKctijFw{OPZ_3IzkpZu&oYyE8;{V$yJzrFd7-~2z{{C_z5rEmT6xAGak@~uC9 z^SAtN>svp2^W$%Ic>l+5wjT5EwqE@2z1czQ%?XP!-<;~@%{l+yU$*|UTmSW~|A8Go z{>iQ1e4|gx|830AfB43K&g-x8-#_rHj$Z#Cf_3ZFZ~k}A{+Ae=;jQY8&u{$^?|z*m z#)`kQTgzMj@Qwfejrh$Hsn^jzdgJrfw{E@B`YXTmo$tQUy8W$he;1hitzZ1kvsQU} za&%QZxwtIa&sL|;TRW#m=TF_>+4k!=Wg8|zBs);d49Tzldp=$N9V<(m2SnQuUE&MU0gk1JwAEz zHP^U(wR*X_JUT7f@%!TF{J3bJuFjucKP#Reom^gR-QM8}7w4y6>oPB{xWbnwe0g!c zDo)Oyzql@*oSd$kW4__jO4P^!Ow(NQ@$UtP66^^_H- zy!r7bpK_@BpO9)Hn}w{wQ;{OC*~>7jdmxjJ4wp#SL)`^-wgm$aw zteza98dv(@?ZIGw`1aQA1$S)cHl(YcORzcT>Wh=-S!<9b9$s{H#UQqIQT47hTZQPm ztJVvCb-cP>U7pcbH%DUi;s}-1(DJR?d3&RcNtnh`uh1>Y@a5|0SPK5+^5Tp!<9O?K zKWP$!qAowzu1m*1e6`oUVj+279bG+gE(|s4`e0%Zq{NzuVhTM7P?vm@#PFWPDa_9GcZ|nBei|5*BmSg!v&WYw&#(FH?2oSag8yIp#H)*QoyZ?f96$Oa znnv5(J35p9o$=}9^5Xh}_iQx9pvIqL?AR&a&5s_PuDUB-Ix;#0(c*^guUh4nPiY@-Wd?qDkB7WRDY4y*apB_EN zCLEt!7oTcOeth-A;^fKN#PE307uVRv_T|c_TKU>vot?JE{+t%Xj$fQT zZ$G{`e{%A))#12^;}@4q-vyQ}+=7Mk`j{J7tX5y0T!*@DnCFn@^qZ{~bAEMk*^+8L zzqmSy>xFv8S26ciH)P^=Q;$LnQ8o1!k9Bf>y?PpLYH`u)cG{Khs)dZ=uYOou;8QM7 zj`plv}GcNR+yM30giZ_mnHt(r5fgvq|bH9xTq*os`fz-To* zM~hxRy0}`^ExUSl@nsA_{w>n>cqPS+tF(?UzixAx`WMF+R_>$6*E9{2aCY=XSd!$v zHGGes2?boW&YK{jov(DzY*CwGmhKBo92{Mq^K&%!kwgbebVA3tO~c6LYZvD$O!o0s zfq^^@-*2Da&bKK9T+Uku=NIj7x?d-{@+G(Z|MQklkgSW#_UXyl$+bi^)?(LCk{;oK_IL8nBqeAn6eyb-9kP;UuuD^Z`x_WW`#regTYe7lxGk(aX zK~~vNO-+zx)?9Dho(m-=AecQBP7F)gX!Z{aOz64H7!A3-X?l2feROhuRpk4=bH4g= zb@_vAlfso+Mjju%kih*=R?X=2?BwaQ>*5rbCd+kvc?1qUc`Wh1eSGqS`P3YQeB(Z! zgO2n8<005-lZVCNqqTdH^x*@yT2CHdoFWXzCr3}uFRr-i+62kt>KfQko$~nTxw{+Q z8KGB?E-y~6WcTPVLgQ5^uE0TRpat$aor|DwvT;UoX};f6oG+8fy)LCT z%w1f?)^V$#iR}}k4Tlt<>)Y2CPh&(a4z+vK&HN)@R;~p3JUMUM4{jA>O~RKaS0Fo% zz$|VgVyl*4BbVRQ-i^>nlGBRY+=i34dVP|EfFIU@jYb3$BA_j;*a&XuWDXwG+^yRw zq_OMz3B>+S3#b`lq8fN6Ar%fhF5Ygv{Z^}KeR3nXk$!>tX~Z^=Rukss(^u!rr@G@& zd~sf5s}${B9wW17N0(m|-*0{2u=6nk+LEZM*=k3Nwx_wD96f z=B$M}TwS+b+FO9=Wpr9a>njXnYa7LV030h`9$lVTi7*80?}?J%ZvCsp_je2}zW+(l z`eE^XocF~U(dYGQ*TSNY0oXXj>&q7_oiSk8^=a{^w_Bf{fJ9GMFOSZzf1I1z9H*F~ za6J*9zMEpUx@RC39Q|?eO(B+I*!tvk7*^n$Uz~zF@a#5D`1U(L=R6J8>Z`}NMMZ)R z-|Dm1UgtHDR6~8wUT_l>=<&1l#H6U3dApJuxDwRJno{2&_b~<0jAHuP?_pLQslP)a z@HJQ9KDG+$&Nd}e3iy-uHvxv9wm$s>0BZrLxnin+^!A;t_ul@4AK(6T1G;j?Cs)q_ z!#{ePFb}bKxq2HJ%tG*an2aC2-SUv}m!gkP19ASi_369mh|k__UREbH-WRRtFF${~ zNJN8cyjX zet?rzR_fcCfob!=R{AlGD9p2}EoO7R(@9X6ITX*6$-rV(@yYc3Izj;62eBd`9 z#G4QO=EHb%&u{L<8#U+tmfLZE%Xiv}`_P{IYShP^@UhPQ*mveN-ssLBZ~4w2Z|Tmw z`jTf`B2@J2`si%y3}h1@rK7Do^@m(B9j_j>K7X`ooqTzA_SJRk=<@94U7jf6zelaR zW$Pt3)jREx?7wHN_lK>Q@1ObI2Q0pS`9U0Bw?3HjCN9E^d^q>#nv)ObRX+YBy?Nkw z_tX^;1s?2q%Y+oCy7-?=TC_WWLL(afi7(cAl}xApwrZx6S& z5ea!3kAuFk80ecJlCl1VJLT^%BX7-~Zsldmr8Z zxZK|9RJ+|?|H0m1I2unro6hEo{ey?g!`3H)bGH7iq|b7b{O{A&k1;>3pZth}pSF0t zLx~4w*7TUYA3pGfv3&6e@VNrhWLxxBF^AZBPw@5*zljsGL(%+{E73jsAwZeBIt zD}lE#&;;3y>tXrho)ODnOn106ykGf*n`1^-fBXWv?25p`J$b8jCceGxH`xrY{Y0R* zea%kirH^NSZxpT%9T-fHa_zUT}3j*q@B z7W3UZ_XLwUxZnD8XSdtwPZO+yX?84@xgP?cx}SK7b0q=fD~wJ=D?VBg#{pHr{dF6s z(f8a+oDWEd0PDse^$v)tQsySf8u@^S~>VvyBH1^`SfFZ$v+()3BOh+vzMwL}`a)Y7lH z*@9QR{Q46Pi?;JOZ|i3rVCdHuFSr;D`V>G0+zRDWA z+Y+?Sj=r`wT|5R@TK6ypN5#|23ucy@D-y&=Ar{o40(^8_^ka-{QT!-O(#hjwb*@@U z%n>CL7ood^sb3=qv=MfRSpMYHp->Dt`I;C3pWc3Ua(qn0D(foS@`Lr2(pRru5E*>s zqW)QZ>MLK@_k87c)KRxq;+{U!1cWiWynu#2W38CV&rwM!Te`zz5))6A z(s%~8$1SBEDImgJonG)gLCrf~HSqR^AHc7)#{1QDzh507;EP$=tt99_0e2@g`YG?y zWq;b5Ew*>Y!(ln* znfJ#duGvx!t9a+x_4V^l-hKD<^yS&s#pTm?f1uopziZV4*1GfI)_YqYQ8Ze##<;gL z=W-ssI5|CTJ(V9mdE9y@F?&B?AXBQWAkR#z)a{}t?cA9xc!Nl^ga=we4y_kHyt)>P z*MbYZO!I%ddi3HcTp8ACb(sW7Jf}ZLaQr#G`UwP=Wl7?ZK1RGyU>vKmORbKsguBy) zGB9YZZD~Ya=P(uMI_$1PY5B#YW?{2$zJ`2}@5C6|_%H3ZJgiD;%Se zks1o=$Q4TRl)8l%kG9ZX>x*C7q-2z1eD{R(?&|7YqXz(`9<$_jJ-$4;dgj($U&3=> zL1x$Vs8F(w5!w2s*65;$lO)YzNUvaeQ=r zq{+#*P%`8g)Qvuo%7Vz0!3zaf@|-IxDhh?~NUA^I9Zz?v&#O_nJ*YmP&CB`hcI&;b zTJJpyoyCi+l-RM^YDW)T4^^E;`4k~ z`#{&yz2b+w4p(ROvB2kL{jM0PaD%sCSG5+`b#`*~xHaTubqa&Fx_EH~KsKae%5~Yx zE8W}w_+h*4qgGM;_DAZNI&|mBZ~yiWV1QL466PUGL>q6kYP*;L4$hW?vI5T2Fu!9r zFHk4Y2UGonmQ<`c@-3MMaoVk2vEH(Ko`Zt?w^9|dzrF_vo{+ffi%44Q#gwAPOGEG&jLc~-FjjQoq_~G#J7%AYH<&KC;o~kIyyT(Ipw2gSEw?7Pgi=m zz;UK?&rY6S@lpI)o%+Bp9G|~@e*G0+K8KyZeEzt3bh^r)5{wc7U0%76f>SP;qnG@J zkG*{S@?-v9J$}(zokeq3=dCA~tJR~ct5P&i9WQt8*H7{_-*7b)=WowXP>6)!*k=bnxoK76mD1 zSIjw9@#Ly?{gR6^6eqmE#On>IaPsu6KjHZ5*%2d3e9jy^{`%?p3!XcC{$fQhpFL@v zUA=tVf;WqSd!ObogXih*yq9JYyYv3t^kF()e{=7H^*1`^o0s?CjQHOdt&O;g?bU=^bB$bV39Dl%4#3@yrU((USd(b9BRjv8O4Lx0V& zq~_)OcVFJwy4yH@zi})H%jbL;$M-&1pYP*zzK_%SK8AyawS2$z)nh1qiAC$F4w%?y z7p-GHb)FfGa&1vt9hUvZ%6_#vZX2MqkFMG&!`_+;C=kTUbt|Wa#XkZu=-e+)j#a{# zo<*lXU*Uf^Ogk!S1k=^S8{$TeigaN@@rcPC?Irlt8n>%fuN-w3gW|zr)SXr1QMYJ( zI9m<|avcSa`z~8uUMkjhElZx!z2vYvYjrxUYTTN(`20a@*jn;t)Z+o)%c`}#-5R$B zty$}!Yz?}tnGUMfVAvXLw;r^Xd^c-7c+gt32J_ZV>wsSlCapaVT7zC|-a6>B-{o?_ zBao{_YkSsO_FMhdsN33kuvaWb{oQ`mDf*pi+?|$_Ub{6{H|EVx_IiCjEM~)UFeqj_ z)Ba>$?DWcMxihb(?bc{9>n)le=)#0NlXBkQ?hpF&W%FXi=k0GMI@e_A_@J5=(|)(d ztrpY4ve@Q!d)2&{RXYlEhQ*-TofpGuxXp><-J&yIXnYCG=KY;=FyYhfac5b~`-6F( z$=IEahefYHDISdbqxo#N+^LF#{=8Srmc#AwU`C7it>8Ko5qxpVC17@@KoLMi*ZGKzK2IXkZ&#KQB<)GLbPs>q%hl`9S@zefnJScSn zSYkLG?+&Vm#m=C=vls1{R`c{>Z#>?c9rUMzV!J=>mhpA!U%vnNV02JUJF|j$n2g7x zIBC0_Zl_V}ji>#?@n}v{W?Xo$nicbLG4ECBdktS)VLt6M<%6o-tI7_|D*^BayYu!$ z1Em4+x5a0RF|dcBFNWoGuNZiSdgWlZ=#LmVF!r$O^cTZo*5fi$Zr#?{YaIE2vpdBB zf>g!zJCeI8V>m~K3Us4i4*G}d>vtHsy>iwY_NRSr(5WUBFC*rEznmhDZiDB~auAZ@ zmd9*Z8rp;Ys7mt@b5zgSfM#&ho!o+X*`uGGVtz12l;$%eeH$%UOe9FY1+&^=O6SG) z07;A+&Z=R5r$606@Z6SpZ(3FH`*Iqy?$0G!gRO{;cBrez%Y4^wDnh&PR(* zyjdV%#hd{c_2UEncKVE}Pu0;4-3;w%$U}u<3=m0Are{9Je|IoO5$9EjVo&!=%m6a- z8u^o~4Evo9GasUWa>bbMAf?kPiBU2QP01>@D^AScyTN0m6rn37i_y+}VI`Q=mQ1!! zzi=Z%R1nsfc?9Whv6tq**H7}GPj$z&ijUEr80e&aGaE_7amtNtI8!ec(-Gedsu4Dd z`NT}l#|%DlPRan&TTG_o&SGbt%tR9VWI@)`Kg*i27;p=;W<9fUQZg+KDe2Kf?utbw zV4wH?DfXwgm|`Ce%e`K)hmq{{4~vriVw?76i)o3W>rb=6iLTYEquG3T(x zkTs*vdYrRl~zF>4%4RWB&Ml?ZA?v zg|6bw`izVKmQ`)-H^R>)Z3(Mm@e8Yr{hn(6Vt#Oj)A<-fgCJBg_6c5O|C)_g7`rg; zfQMS8G$cLx7yjMf@Ahp`LjUb3jTro$jND>qmtXGajG3itF$o!YRX;UnF)gX{yG$oW zuX;F_BkX{9@{eC@U+adx1|UfOXnr`rHtK+h>tJo?AQRNe)~4ag-O|t@ON8YcxbTW| zZOVkPNeo{0ycuuAr!qa|Tn`HtzDM78T#fG)Ly@~E_%Kr^a_=cK_M&joPrmomqj5?ZnIjQ-w zz1t^x=v1f=U~XqRp4oH@Bzj7l2U+J|y|G*6riL5F`H9QZ$qp3;`9{1MKK&Q@a&#HOToFzYH?>aNE z4-_%)Er#2QP?^Cv?vG~jmus)tFm1MZ+!O17<$JipV5}paICD-8L1$Cg9Ah7>l; z>~MS|u+re!YDySFi;YhMtVBGanPRLyD|dg+9Ld?Wrm1TN-LM!etf$pv)>Xx5%#!4>slJf`DqqT_n%>X`af{aj44 z3>%X`ju3E8tss*zkqACHn~f|;f~htK{u0!*zg#N_=Cv@uf)#;MsOgOA;bedi&?tvt zF+_P3M^!fj&M~NezNtK+?>mdx!~vYA>tT{?(GsbNQmfj`C|quaE)9bNE7Tw9XS)kg zM}SINeX}F(U36s~6E|IIm?QNtVi0i;a~(>Eq2XQ*aEzQHj#$DNe0YWUo9nk z$NO3wH2gRUg8WxeYJWBa(1{k9wx?x}1b#ZIs^N_ZF#yT)kxg`X#5xSHk}84{y{4e( zrUXb+%jr(BVRn*R9*#$%=Z3>EEDoV9u{QJ5srJfwIm3&A=0kNYP;_Po90S(fu4IfX zG|fN;)(-|PN3V#AM#zX`6sKjB0!n3Ke#tgdW>j{jgC=hT1pjppMjbzWa z+uxlFJtVKUc1&jSHo$j^PfKVC%p>!Y_BLHq?yRftv(DpW>~=zYc5W?p0U3J?)gmbhMPwTlOJvyMzXv zg^0@6eKiwJMynN_TI1H2PU;q7d;`QK*~UcmM~m?S+KskO9R!6jao4R7+0vs@9zwh; zJn!p~z9a&3&ter5I1kO{Pu*aABHyUtFM`Y)vJoE|hUP#8em6r)Iv6Jakr+fs zWM;cYCgFu&i};r110hSr=iD$vE$DB4y$}(?P z@r!L3zWJi2@~dg2bZAI9mstbAOdk!X|=}dEIc2HNH?n z;ATW))Mh8RAg1M(ilTk%^?RksAdyB4*rYUrtsZV8@|;flXR`vXf+Qc^*cBrUSOCOJ z?<**dFjPLM`7NMTW}(8;$TKWP((M967G&3f@T-$+3>P;s{i%c$c^za4e6N&Q@}3&m z4MMdrcBK)X{id!z5G_5ejO%2=)~QIwZmR4i<2)A}k`9L);3Xs9Ngv5c1t)4(CgZ2B zk_SMz=?9_OHt1E1LxW%5AY;W^kGocG03+Bk3)q6uZ6JN&Q@K11aLt3cux}3}dr!uSvvYp}U6cnG)PEb(l)y6*IR;zhW zh6Zj-Voru4gH3P_UR7RnJ5O$fNB6gOQ!^#i>zRy`@r68kJmq8>@bHZG;8n9iCI z9xj0K6JSCL(3EU8LK-m{Q|2EL-&sbcEM)kmSS7PK-jnJWoP+`iXQy9aJe8~ivzpPV z&Adz&_b0rBNJS&Jt8O0yG3qJ@3LbbhzwoLnN!UqQIU5`uG!0+M4!>$A;`9-CO&Lt` zdQNK6V|Y@OInt|SjveF4GZ!0!*fa$-dA3d@g*}R2#I-Pp43OGLe6Xof2@px6kSMUc zyo#pIv2S)ESh3dy0MbPyRO5&BKy?P?g2dDaQcNg08O~%wWjYg@0{ta;HJc!ZBV`{q z*yD`)gFitNAPhoENgTXv58h$(FNkTCG|C zp^et|0>V~fFk6sA8C3f)`4U!v%YatlI>tn-Sn?UrkSrnqj9xSt#aXAC0Rw}F=8^(e zw5OVN2Ou}zxRk_=3o}`JR@3BGI*(^LmCtaZJ2rz?coj0lCf4ixHwh z`05B~YZvb(U_SXS$zil7@`o<}$U(V9$Df<q zrQFUsk_QA-KwJ3hc3r9~avX=#$=kQ*D*q?Q1+?4!E`#oNq~vZR+nqI>=RVM;r=ZYkVEvJZnL1h9ONAr;b{+zPrlARdPK*Pq!U{*D|q z;LO3OV+N&0b+mbkRx^Q&XUhEOR zj&x0d(msJ9sAnWXYg#4!w8jb=wUgKzS%lYUigkrn&ms8IFo%_((V8T=6L$g{r(qhc;wt=y;IR<#PR&=+mB zJ0633on9sCLWHoENx>&&U@`tSksCIw+Do)llI=cW5{~S3-YOH2Hv$}aode?i5m;&v z`3XLndHmPO=xplk%o=l(^C>&vi)1=oAc;)@NF9poM7@W}Ylae2Kt>hOtL_M-#BFv_ z6=~mWM6u(5%j%W`I0%ha<~nGhY3nRFYpg+7xD*{4gr@>b-V*jLkWN)8Mz+bsodTI@ z3M(bvYqo2nVpUvPOySxrZosxz9h0rN@=ugU(gL+y(rV`yqtqe)rr;J6YSz!=I$GSh zLrj}E2@d?}I-E#@yg;jV00erT5{9b2F83YUt3-=Q-WI4 zw^<^4Ll`O7VQfZZG>PF7rl>=v7~h-x3*1^DzQ7W>yj7K(Oy8sjyFzr8YK;QQF>8bd zhvj5RGG|#f?dcv-TnGQWEThMo+iXizWClR4Z1^jO#|W0RA&o zB`L2P0n z%GOzcv_Aq0*tQ8%Dy<8B41aWED!@p!dB7_o=8!$J3J`%MS1sR;M!XnNTA+&b*J-z7 zF1nlGD=8?kU~#vG+4Sfb7Uss+you3?C9p2ZkcKCv(x`gcY+__s{Eo(uf2Xx7k#*Nz z5eV_x8hMC@hT142(oXU2x+-L|GCA`zuV0stEJ3snnmWoS- z;mLw2N3Hg3qtqS13NRV|g`Bu~g6;9cdKPq}rXjKh>CC1q8p+-~ld;}k<_sa)Qc-(P zE&zYYY+-91Zd=g(%NUMODt`d-+AH_^R2MU3K&d`J4D?PZf>uDt{4_)L`oryMpPX{? z?YV13+@D-bIW=h+0}*LE6;P#kl2PbmSW|QZ#(mXHtv?(s_U5UkwmFOS(5*j2OESnY zlUbjVgrGZJ2*+3kg)Kc6uP~aE1#$0MqoDj~zkH<(nh>8#D0mwsQ+f9h}_Mc>0&2rY#2c#aKZS3#xoMjoS1)xTyBu856NOEb0^bF>KPYCMnlJ;3v{Ru;QpC+W64ny2@gDvr-$>M>C}O#OxjU;NY5)!nK0|Af!W4uhOBE!{Rn76|l5v3}QY-@s@MM*s zyOEvj4?+kVz6}vFry6q$kJ%+Ps41rIHMocRV6b>!@RwH3mIQV+6;|6$;e*TtQ!K9n z_(e8FZafV^+p9@W;7b@vbUe)Ts}l#3;H<1wDK`?$USn%tGo=t5n5EJibVN3t@%7)? z4w2b-Dr=tOoftJakxa@XX#IUoKu;<7pVC2*0InuYG$r&+tXZ-zRMim`Mz8`vZmgyF zj&0HbLM(N%FjcX5JhQ3>VU{YCwVbV4cjF$76Iq!;a>JW8u0f_z3&=7hMCnC1ujI#F z8Dl0IsQ|tPF0v>UV^#;y+OJz`HS;vQz!5k!t!-cs;M6FEr0YQ8wB7-2_JkRMPmPt1AM~dm&@@fr#>EeyVi|mc?Ye-ryeL`ZWzbSW+ zWh|3Rbv9zxi28SG`6cN*zy*!)E2&B`5;Zf0>CX5k3qTvzpE+4*1rVwvqoT;5MtL#x zz??>jTV_@R^u@d>t=}xeRnpON?GEaOVf`acRajdu<&8RKlDv*cQj%j)+cMpBuj+BiX%Y&p7q*!p#}}f2wdq?-Ud4lJxmuT^(R$z55DmAZ3P1oD6SB!k zGH#7g3gDHCAu=pydeDs=Fe(O3v1I+(`o1=)H$cKiABUDf9kt)%jr2igzC9ne!??kF zqOo2i#H9DoN>YMm0-QX*k*x4}mhN?Q?nNdy7?c|T1_f6wlWIq4cn~4Jy$gyOLBg7S zt(}6|ET6m{e$V{_)>n?HIb;DdHc(V6uor7Q@8bbGi#khPPNA)_A$*BY{crYv6TFFp2K=HWB)}TAY*+O!1 zXLV(7+oiqhkwV1qsnqpFb)ln$Ot(1p9*yCX$Wnv(sV~hL?~>ZlHcXo;P zs1A{BGUpmkGH{I&g*8)!5efv%aNf*+d4_T*7j+*lCZ7=0K^>nVt>XI)hl4MnHs z@iN5+e>fa2ntVkfg;5MR^(94Gj?;y3P(-HvP`P!Aon;UsIk360gE&Vl56g6R;Wt2Z z3VhJl8?&8PGluG`lx%}f3r&yl#)mPyX;q3_Qy2IenOeNW?X7c=KrDZn0G~qdFd7@T zbs-$VkJc>?opeh1JED~EH2P5r6w}<>*5!7^3ad|4P>>CmzoA1E2eB|p#IE;s zI6%9-{8s6Km=)2YGAhP@k%QevQbH7x;RuxoYqzn0(AFs8z+DXCkQ*~ZcsRT=FpPzb zauy;`X;m&pkmE@oQ4$L6NqTUA#IOU$9?f1D-H7Gu*|u-i^M|)%l-Je*_72y{sca*_j?1E?y@S6)COGhJ*`2}|g3NQGZkuzI1c=1&=~ zq)OEF^s)zQ7(kVbgSfloPgKmtDimewP5U&wsG(+U0dTXV*k#A+AdJOy-7a+-eCX60 zN`fAwLhlZ7tqCwTje!XX(MZ^4Lse0u!C)K2sKR^}!=#W*l@Ua08UMRzGMuMe8v-w_GZmaAxCG#bSI)_{i0$Z;dQ;Zk6 zLZlJcC?8L$2s)_TKNs%r;BdvQ(^8D-LBGR5g!!q5OyLW%hBV?7&;*>ttgQQ;*(Q5P zIWu`6#uCmbkGU=om7=7_EXzo5>U|3F8-Y&aQ1uv8nX1*4y2>A6sDy^qPaA?dtYw^~ zm(=UaB~V8O-5`=gi(Hj*k~`8GKAaC^G`VYvak`P(c))Ph1*lTWHn}itV3LFsJArbv z^pKh9sf!N6=xx}pjhbT|6A?Ldpc}U)!35gytm{w`WS0vc$v1m>Hr-fzG|4lQ!YzhLmNOq{{?Dt1ubEwC0Lx$gH|L=vS8H2HvPqGgz1!0ikdGR{R(yLW0S8iD(bK z^7?E8XyLLUwR4<5T4-uyVJY|^fkXJm&ApgKQ9+`S8ValnhpqEPJD zEk6kch5X=nl4Pc;faKo;WP8NYCsBy$WrtWWVykADop|Z+|F~o!gBj%AWN~;XpVb#D z&5kykSqwCqx*)16uqLUjWuvT1tQ*5`v$+manFz%l)*NaNwim zc+9qB2=88!-fgmk@<=IfifQQaQxbEJZQpHl&^GOnWAvJ@)%mczt^3JLRWzanbKpAILzC2D2UNA9f-A?B^!jI(&FLL(WF<}QD6X>`+hmObJYJc(*U z1fL(U3(bgvKg5mRm?>d^6n`nb9V=9rKr47mPE4Zv)i5_DYiam3zsr&w!AnOS55E&1 z&?7$Zyb!QSr5FVn9LjKP+#|ab=Cl-i7RQD|6$v(a54^}KNluruqQDVxSXa19aIrx0 zzTPJ!%qHl6!`E(LHG-hgKFcYkFeA2_vX_X0u_%FwF(qZ0>!IzZHm*U5?_%v{!CZ=c z8y|$L>I!!-Ja{ol^?=LYXqb@528l9~C?@6L?48Q{V>bQwE<~-fHzp#f9Zqg2WtH*F zeFX)n+Az!|Pes5bvo0a&3wcJy)AkuDm@eC~0fnj&?E@PdkJ9{@KL>?I8|{bJ>j~Bg_>tsV^yPh+;&0! zk;`JhC8_GJS)EY;wL(O8U3#9K#_=M*Z=`WDA zX|}+o`_*!texQY^D=9#b_%!=A^A>)XL9QJnjV5wKv%NMgZ@9@_@0!qQ0)ZeO$fV>< zl7EtlY`wA#<&32&$hxb4MkUpPhu|skL+ot{uGadoR6C;)%cY{;BS%nkm)t$83Dd?^ z41ee-057llNrg1)jN67b$P^};2{a7o08}JEAf7^Xaw(LpYwS9GNPUSEsTa{|%s{U3 zaI~GyM;7DvYw+R)@P1H=x$lO>JTcD<<@MH)(n)aV%@(TVnAfLgqd+MZ~DD4xr`MjvSSGY zNX$WX#x#wWtg6Q=ZKYC^C4}@yZ=`at0vJWA-I-e!2)Mw->u5uN{7nx`Pkzp}UiS+p z;55DE!P)snf9t3(_RjLim`g}>30MFaZHsH-tret)X?JHa$z9Z!Vvuc(?)ca!0W`vnXW>C$|!t*Mjost2g?-joBbw?pYYl~t^K9*?A0Q z_=Ek$kZ3#X*>ae78CEvHh7E4|Zir%}3$VStb|a&DqtJmxTIAhg_gA+^5FDZa5sUi^ zW9J3dB7FnxIa}whdOsG1GuJLwJ0f(~)RoTv1Q`KsUQknAHiZIW znqbDp@1uOeV{V~p8X=#+o=>Fm`BjkaR+ zm@zY|UH2L->=^Tru6a(plQO0n_AeC$F5YiKYtvB!ctAa@^hi4f1a(5V_lVw1#f&UIr- z1#i)CnGzt(dWRU%ZAdnjp=NZ43vK5#q=0Xt8XZyRx_d%G33hQg`GfYTEo?Yj5cp|o zy$-WB9AG?ZU)SxnOLOB*qWSGQCkmzA#X1#?EVEd7_a5Dt%qOsieJyLhJm^0Rotr9y z+m&sEGm{4xE@!}1s6uTBf*FRshTZmXuIHbyUg;b194x+_h}!_fq7N(sC84n5S&9A+sKkpd*o;W>5t>ASx@<|9 z1Mv>DuCN6XCKjv%DX5>@@fvsWMv|BiITqh|`Dq<@TSF^>5!nEh@@#1{{ek4mHl5J}@h7XG4iZ7yhS#HJ#KnbL^>Y?0}%|W4I;j zSS^#%kuRw2HrtD-OEMg)9rrLP!D%dsrANkMHxAk69!zFh+oOm$C_Gw@2D1}{2q33x zN|rl<=!r0KZV1+ptn_D69wanyD>mLsLTmT8<{``sHQuT*B2XLTUmfH?XJhy_>XCO# zFjSN1K)jmUKt!E7m&0c9P!$7?D56oozqx(P!$nt;5}*QM5JTS3!5lh-*&%f9Lx+@{Y>ctj<>R2+kzlHELUz)MJtXixzuM z6RypOA1guDU-D-|p|_1x^csEoDg(CUTr)51fVKfO-32>#x5^K&fr-?vE5y|JK~O!b zC#F0$Wzvfj1wPd@D|O(_9l_zGT%3YG!5CGFQA#%Y@i;?zqG}0}+*}cc`rtp}GjBKD|%4%`~*#c&SDyXQ~9gGt0n%adp<1V#XVG_|+ z(iDNa;YWE+5}}zzWzo+&VX9`sMRrLJ><3fOo1q8SMMmooS=(kc5LVshL3xO6BlEg5lk=U^W#e@>T?dY^iGgnNg9_IVYU>c@c8j^DkvXbFHf9zEKaseA6fF zy}8@asd|465j9FgC&)xzL))N1+4*5RhxD~k3Krg#&U^rT1jrNqAkkyi<&KnNvm9jP zHcQ%=lf>TSGxS-`ivg?`4{7h>C|#<>jvm5sIV_|_DT1}wrXkAG)X}&vZXYQ2wx$nmkq{G)l2R6H>;;NeAzmT_IXtHHx)RdGGW^U zyy|@*g;Ya^*izQAB=-j_BE8xldgmW`5p8r5+;)T^_TaJomCc>%|gVUuz_u^Iy?a=nWnbb^B3fsKSRL^GfRI(uJ<>JSDSdkbY3VH!{i z1A2hrs-x$9d7pYyhZ-!{l-uTU)36o6^W?s>B}l@@MlIsxs3Imw1YFN9H;mk*Y;Fxg z`4}bzB;c@&#d zg##0&cVHE%@5Lm^6dHw0X$P~2*N{A(r{l6V%h@_4|M z$6j)JO2%5i-SFr$l@&Z90v?M5xm6s~l@?JNOUv@YW=k+Av6LeK%P}rq?*e4h^sIw4 zHiTe%JrU?Y%!-oXc@M6Z+wr`f6#S+-OhJxyYH)oOMSg}(-a8BHr0sfDn4jPxAwa4H zKs{JhHj$6P(mX&>L}XBy%-z*aFr46n=YkFZ$7=6P1#o_zoswA_IVT`>1?hmZ+6Eh{ zhIWn14DTOcW-#wqvPkacz)4TSYj{tpkU;z?TE+Nyja;A*P$uVMazHRUl-%)xISIx? zwKvI%EGav%8_VT8Z2MdsQe#Rr3iw+q-k3M)zO_BL%z}JuuAi2pU&gKZZRs8iXDUsa0XsI!-X-lAdqI1CZDxl#Q)wCXQ3k)184n1d$VF3Qsde&B z7T3!=*|o6WO&n(gptEww5<1E(`aGRX32Zc1yZ3vA-v;@%u`lTusl>qU3;p?{#ldx+fmQY06=#JYcaw_aP9SL8)+H-I7PWP(KMR?U^?3h}|B zuzmns&6+_lhd5v!JSKGH&55U~Km&kenO{GofMgwHU9<3>V75UVl;l#^tCzhhto9ax z88sF7m%Cl76Vct{B(NHF(+U)qoYFKyN;Ti4I8W!srecatm1ujWi*LRSvK%}TTbhPMbA1*=5 z_56u&S#vdpe_SIG3}^uR^%4*$ZenZX_XFl?)VU^d#IDm=J)unRX(z z_BuLM4Hwfu)=+!}5MT2{1_=tUAocaUxZ(mVTO97Z*85b*%9V_QJhZeiuv z@`vmx+od37o!W+sw5V8S9y-@ct>#*^Owm1KQ}VnGa}oipX$@6>DM!oQAV3*0%?{sa zE@=R-d7-D#q#p0XN(`YbSYoezI0!7FY%-yzo_SY19HgCE%s(t3CwQP~fJN-GpaPzT zXxka8Z0LS3XGhnf99Tnzkb(#%nZP+wfw8;h+)#>)25cOm#vk%MJ z5*v(pMro_D=2~U2?y04VatTtpq(O&^v?dNGsM9Pxs&Pr_?@C*0oC)Ur_wcVmF13JbTo)kjyG&$L2F@)9L9Wxyokqz40Df znpL<1AT&D^r{Sf)a6T0E)WKEbYTKpu1`t5C(AD2SgA&rJY)FwG%HM0w4%p_7JFOSSKh>XEN^87tl1iZb4kPR;g-GL{ODN)I*Z3 zdKANFJVl6{kT%GtwPdr2#npslDe^ZOe{%F%_nPDyPs8}92u$!j{2U*f63Eh&w_-Cx z%xRc>dr;P7$HLQagbaw>D%ELNO-G?I-)!NU-MY9cZRA+f`8bP6r+O>$dVKfFyH=u{ zvY-|T!L(Q3*ET$^N~N<^XI+4+U`{_$k%Gc)+G1t^QJ%~WMQLO;6Mb4+Bw4zMbpZi+ zsvO3zx1*3_NRCtN+rn+j6=K_L3j*Fm1AA4UjVebERL+Yo;OsW<>~dt!n@#kDyKqH|-7L}YgNo%hl$;(y0S$@yp~Y3m z%=nXGL2}rZnw`0ULn_i@071IkRzlRJNQY1>w%TILi?zx$l z#F+yKNC`E%ujiF`L?}>0up_&eu_wU%zz)fU6y6#kv4By#nx3BR0-@Yf!hONr2xy#~ zx0d0Ew9R&Hk@T>_Ix1mA5%*xr9>yD(P#yz4)hfCg#I7HAjc#gT23-hCjzQGVq(bdG z!3q&p>Y*_ezHGM|gPOT{+&zaKvI_N_cq3TYrHmBUTJ7${EmfK`#KT&SHm8nU;q_)0bBP!$E8 zBSWgbVFdWZE1xVBq45|qJ+r+84GfUmIhCQ5w^In*KRk5A8v~N1m+9C3XEEs+WNIvN zkPHCw!l;o=XgoSVdkFHhFw|H!YFjyyWQ0=nT~Ndbr3KvPR`{k~%)Hp8(RpE(d5v*I zm*d1?2P|?3p!G#K*B%g4kO_|l%JqGj29ub(E{f zYod=T_yn{L9OX!Ia{YzIE^;DlgTen;JfzSlK|~ZX(d~#zoy)-6d#UTvg_DD@eh5?k zd~eJ~Qxh(ecIklKvaeRChxmHZ)CPsYl34BZE?=hHfp9=oV3QbD|2TlgA2ZLwky^wVo6y8H z6IykXyk*9$a(=VkZWZ1jHYRZ7#~3LMoK$dEQ8q%!_Cp6K1ZKi8HQI^)gszjl);P;| z!6VgiN4s+f%M5m5*|Z7{=%w3WDng4`AZzktSX>$=G0GXbU}a4pdVz5~nGQ zX?w(YB6Kv@vd=iWJ(_FTeYK5;&4Jp%kcgbKhtf~&3P&?W;@>D=LBQz4a9ibY;LV)a z6=+SHVg^Oed0a7^$*>p^U$Pexd$Ve#zsh$ZIDr(!VV_DI)vk`v*!+1tkAubC`WUZO zA9wp7q-y$rwUk`svTAK+m2nb>LUf*M%VCm}4sk%p!>CU{1cA(CVl8nOd;z(^(pb_Q zW}KxXjfB+_Tgvq6aS|*;K{s-pa*TMS^(#u)nGfgwcor2KipL6tAc?~+0zrXfb{ed) zjbnRZQkE(((S7qCxso;Lz}n+blre&gQf&rtS?IM^P-|-l)EAD4_>n>m_VM@DlZiQT z%<83%!Q7G^TKiEHO-Rp?8;7;0N(*>=Z;E))$e=noQJW+2+GTA7{yoJTlzF%WOO8#Z zSwpI@D5N4TI~lWTknkGOW7S#SB8+H2o+NU1BdkKK5`+rNt7ORqYjZ@`L887g~Qq7eR$e8*? z%>_HziB^g7(MTf!dtyas))#%CE~x5AfFRrVv^%E_N8Y_zOA1AjW{5;Kw1%?;z^eEH zQ0(!-LX5?{a6so!WL^fo5 zfUipAljbL*#v-cqC}CQXcOZG6vKTtfeSGK#tL>4D9hS213PqL91(e8)*pD!fN;BAo zC;RPjk)zBTJ_Jk&t1A?YqmDpV12@4z5nFYM^#hHeQ9%n+{1(sgQ|eX8rxYv`&{NA` zc#Ri9-t3vf0PhPe7?vq9A|I0oEkh9rsaA3cBH*OhYgdn-f&+;5hewR#R0iaol!Ar} zV4I86G%xC6UL^w}%25V3D6NU0nhWu&xmQw~O8c6W?De<3J&ELvy-L^>J)S|DbNc$+ zjwShs`XVrWUdIgnP@f<_h}`H7uGF!cNf|}=+f*N^92ooHNu{;g?xtkN2s+AYOe!J) z4k>`_!fEB2ob=E+qZkh}08%?)l%%nG+;x|12ndysOO>~y4q-!vJgf_tkH->3#-1Bt z7)ONq!|^?U8X`k|R*%m`dg7 zWuPs42TBFe1yRCv(2h@u3`bN}j6X}xX}MtCq#lx)kbvZx-OMrd@Gjx=HU9D2uwxVw z;va+48Y?@fEi`By@gs{tqAx_!M1hAk45Bx(jm^b@4EckcumjmB^iq`X_WLF6>;PM3 zaa9{s+zt)vddFaS#QF1BH@815n~GjF#UpG{PP*y0Vp{dNv4rSQz@TzE4b?BSmY|&y z59qz4J>e{pGtqqxpk4@#qn*))s33O@VDh1|otbL!x)8cRP7H(rI_vILi?aGml?1E; zgmAipx7sG(%cF7ORvYV&Xlp7DRuzzqh>|#@ezbK%%4KaYrR)%QTuMubt{#2kT3a7o-8xl4~?B8hH{mf=D;TPp|l2m;YeQ-Y!quHLz z#ioq|mNs2{uVNPK4tN9pBW08KtNuXZOW3aoQYtI3Rk46OS5GNEwug=N>PWkTYp^C{ zzIMh{`VDP2!X-)io|Tpq2yvp#-~-iuMkM_JzJ?9z6Z)YFARST~OJtz?3NtHQSKWka zo)wel;nS9tUSL4Qauy|?xv=d z#3)sMsv_YE%%XP2nx>_+1WpxWl*hftli;ztgu9gYf*`_*u;4)fhsW0erG03JCA@|l zbb0OFqYdsno3N1ZWq8?QtvtZYNpADr?B?W_+%wd=%^V}bXT^quj53ucUdE#+~ z7~gF?E7(o-DDe=7hvqbJHq87GMxVzy(fJuCu$oeAIBP0_Bw>@_rcs+dl;U(%;7IKs1b5xW9?Ab{4Q z|AfoB3qRLIs)fFwaYlyJu&Cmz%UF?8AyBZv$G2X7{c!ZIc-{h;65hgkJfE1)3PgE?q7a&2%5{8!8 zg5(~o1rk?Q)nOJ|Do`Q+i9xh$5_AG0LlE*{j3s7I3HVkv4MGi>gTX>D4G=jc4H8Yq zzzzn7?14*?kEkdq89R$np0GANQEQ3RkpUokB+Jje5Xg)UI6=+P_)(wIXL`D5W?xJQ zg*`MRmbp8*K4;Ak`bZX?IGu=}d8!s@rXQ50haq{{I1|F+uiY+E8RLK_%y%R01m7>G zd1xFc!UNHAMG#-HKYiNmL@QZX@B>96PDvpl_z&AC@Nh8cwh_g z2q|eIdxd0$JWvc7+abq9Qa#W~`?|XJnT)Ocfkbotm?;7T+?FS^VD+201l76Wp22D( z**{VVWDo{ybhBMAk6W^9mzEtF?-+!-Kzf?V4Md0^r~`%GpD4 z?Qv(q2xm@tkAQ6-_ zrwBdTgn+B@s8Dhr#vwuPPVzWt~Qn(bQhR80E ztAeg(TPC*BVcycFDakWSz$d*?$(O?gbat$@R22$wSB4PjSu2_|MQjlx&s?Vp?E^|7 z@{_g*14Ufz&XZcfNvk+zxWF;9?sB9@+4$50z(!S51`8Dwk`JV|h-bFjothn}PuO16 zhu}rkz36`ZVST6?6oEWHIh=w(EFueZXN4$CMWT3;pw(;<*N4icu~VQn>O{TMA~&jE z*DN}*{hs!}T~N0S>6XwpyD=&-Qji6pM;)ZAhDVKe6biZ$aZlmlF*1t z^x?Ib)N}>34so!j)*=BO=)-9<)7#-@$bD;F(ZpI zC=nhMfm_+c2kyxbmdbUW6#zNGgh4_~B0}|63G4|XBe2t6s)?R%+>H~-Td33QRGFa>vfrCLhtW=JVgLCBBu7t@>0D^Pq zBx*A+)p9I|s(`qHQ4GWhp&4>ZB{v2J?lFQvEQTH~WYCLOU$9CXCzn4J*1#CD@ZUon zJ8ZFp5N~6eT0?v{N-_)@oDd5z5+`t@#bJIF8W)RmP3@@M0h7sNCJ`L%KobL{BA=Lf z7q!JRk+)eEk)BnVn1lynDf3QG%B3p>rm(W`RK;4oj&LCS<)RA#N-Zm`g){^x?t!~p zp&bZYY&$`?>K@tnQAKR9jixokPp5Q;Dm!H4@Nm!tIB}4U4(eU&qO!-8V{rx0>V8^X z=YV{O>%t}|MF~OYpjv>Ft0rw>c7}<(O`8)a5<@AKO`))TT(VLSiJCy2a)`e(S*yq6 z8l$OflKtXAFlbK)RHS4Nqn=`Ae!!>oqMmdpkyFJDF|q0;;j|9n-gqKThX9KSq8F5S z$53k(Uiz67B7LfLEvdrTcdJ;V9+WaZ?lTw;ed-culhODW(x_zkSc*Ob_9ZGKjNujHeaJc@Nu3_-$OD4vjZk?I zgi15a#L9920VSK5_sNVkK0J>_mK49j%2}}x#*&^$u8g*e9JZd=ssy|Q9w_?>Dp4(i zm{+VEM_`>+JCc_n$uit$ zWeH#{ggh3rOQW!%wp18pZBT0BP$Z28aSx2-Kr+FyJO}hM1s_( z_8bcRU>E_V@{eIE-4;VUnT1>sIH~T{zj$DytL%TSv@H5sOBK3b*TfCk*~uREkeaku z;6|uQcO$|@`Ni&%?zW?`s*`m^5=gwLR8osFXOm@QOMxVJSpYANIug>P<9m3b%tE+2#;UKvk<8 zKLBE9br+gLqVyYEh|hP1PgKEgeTnc zt7I6E2)CXsR(E+;W2P{9B(VcE=2CH&iyni~*E%UZ41jBRmJwc0HLX<4!9HVTof9v^ ztsq1|<`7owR^+5B1b6a6AJu(}@en}%VEq}3@uf*D@^!XEq6LZpaxj35sr-pNc*IqT zSQ52^&UD30S$nu?vV;TbxWa#gD-$ykN75_WrdA^O3{{Gg<^LjM=+8tnwegfxtwAE= zN;UP8mvj|0hiDgdl6114q-7U+9S^I=b5@XFlw>%>X)759PSGxcg#9+=Y>lg=1+{f- z37N35a{;nftYWc&V}nCPJ4N@A+BII`(gO5V$I4_0WBMbohxK#SJaTTdxx)QVAd>jW^zf@$cWbX0(*Iz0UW5Et9}s+S4eD;_mu?cDG-PApf~bN^L?I)aY^lO6dW4W`re|F#p}0Sw z1SA|)qw(b0K5jLhzH9hlG;DuB*;Oh4&{ka{5J~f+zP9h6Z)DEJ={zt%h33c*hRGd4 zP?!s>F}LF?+*suV9Oc56uC_)K9P&;?#zXuzNjqFGUJ?1hBAApCz~0XEb_E(zK>=8q z(n%4PA`62fWkYTD5*1b^0N!)DeFv2U|GCj-CBwet9QnTAK$x#%;TL>8nd)XnBB?s0r1y_O1 zWzVB*a+Vmc86_q{qlCY#$np_Lm35cI;7d5n=q{qdV}Wgsp%DG#UYGH!rAjJ7MZ!73 z(~yP0{PJiNB?UoJsihVJOu>X@M080ojOTy3a-0!NfhD9i55B_*D%r+{*Mc>QR0SIZmn+KSlCXMcGQ4AF#&_krSf`D+24O zWQlhoH4AIf$q5*!eS}RmwzClO)tpCuM!k&H2AU&?bkh~6iXI7T4{NI3s;l)ve%=`g z6|(Y{h`dh{>EOf;Q%X2*@2b3w#H4FW9cV_h2A20&xquoxh$YoNmteAY%}Qcup%dNl{ioP31|z6K99ygJPd@?H4sxy-=gW( zGCdcAT{1~XV>`WqStrs}<#6#kQ4J9DhKPBdn3moWqtR#VmAa7c(bidk_2L|F6XvsV z)GQ&=(`DQQlz}l;CJAT@JlVxOB-nxS+7s}Jvy+4>1g)A&WxX+<{RzQrWXrs#i6&G9 zFFYuo;12y32%_i$rO}+`siF@+bZp{q>J^%Zz-M@RegJokSYi2e3rq)srKm70zCuRd zJ**TfVM8|Yqg29Tz+hV73yA_45XcV-z1mo(4jQk90Xi|laMd3cI|L2W#Vn<$VE4EY zW{v%NNZiY^kW3@tr?u#@N|%OFJ27T4D%~Q|G}S+ZY3Oc~-XT*k(sS&n6>%`lxMBw^ zS;2MD4H4!kRDh;tuPOX-`V|txUZ$39u8fkR(R#W# zkfYDzR!EC$D%od}WHaR5A)20Ah))nkW+@g{5Z|EC16^}cmIjZ?yKGqygwThGHS3pg z$zDuPK%ExgvxtD0hk>J@94Ek<=fPka*8zrVFy zE_$40$pD=pS|NhOYxoVYFUI^K7S;8UUhgjY#=nx(dL5PwMjA2Hjg?Ssy+m`!`^Lc@ z&m&Fy7Pk2t;UiKJK2>Y%cJvIbIGGjhS~5a3#K?MhSnck_fzrUNMZkDq(#-U;(4W1~ z2_dEDOYo9n_=0#a3Oo9sEl-nKQF$kSCDGE_GlMfpCu7WYNnTEha;5V%G) zKF>=bbgOpTV^*8OkX9pH$8@2Rxjjh~Yb{eL$$;m2$@d^S?L-ongFUS)k?!cLh|c+u zm%53?K{S-+w=oz9?#WN?hwIdGEk0r26F!C|4-7n}V2o{fLF;S=%p=B#g5)FV>Ff?H zG=Vs(&FpxA#RFLgjEJiaQ~*?HlO*D|k_nC<09h3=sxOXa!=?V`>@?!?rC zYh&cI(n4pD6Yd!dI>J7wA#AI~l)=~YaA_UPN^G$XRKzSBhGj?iS+x_>Z4e=ry-nSO zSoR&E3ch5YGvtvtjd%crvWHo&PhNi z!x=iss38IRh4_-^Q9c?QugVc+JOg0cOog5RQWpzpATlhop=I|cngf+jw*MSR8k|rDq|xuB&Lj7A1(;zjOE6wY4Z`P z>-7l_QfE(Uf^n~#Q?N*UOi{1)jx@aY8qRXLt)e^#Lp&`Mr7_H}svKqj;&=6GuSnZ0;kR@^+2ZTR@P*4Pc8(Wax;AtfA zr_zFb8caM)4Fmfa_Vkb!iqwgZAzOLoC~L@yW(}I3sx1Kq6}@cZt#Y?fF5BdP?x#u zUU9%ikvn4K^-0;mU3a;z@vyQcChrw`Y9VGihx80eA{|$m)eb-m*oOz=!K48IVikGZEF*X77%P(2XrATKeE$~==oVr*CkA66aA9%@BtS45Cx z9$Y0lN72asBI-z>Fg1>+cd&nP1jiccMscfO*aOgTnHL^NI| zP!GPd;wq}Vj6U*O_a=$k6GrpQs6ehGZP*olqs1Y_e!d(i>tm?vS6qY_9V zo>E|*TrgP*w%;jB*mg$_+yH^3tjWfGuS+V8k_k94JrPJ%NivdHaiFHF(_FtH)dTf{ zA*iBZ0NDXz+>0)R_LIyo^6D%GRF&>6Bn&F08g+q*ND zo|Cgd{BU=U0n8~yJWEIwatByoZ~&X26#N3fN`t|5*-00Uwiz>y?Dq8?Q&jLPZlvJ-kJmmsey2RXa z11-zMAyV*5yh&9qph!7vx z5+5>!8e35v>LY?p3=qq_72-%o@k7=hZ8Ws(AB@-t8DqklX^g+-_y48sP5j$9l0DDd zd!J@{?|b$rtDoBvEl?zN>-ho!kb-!CA^;w;Y#78r!U6#_08*k_{pMfa?=Lc|P*sH7 zv-A0^Ef%UGv$FDtjEpNI(bgM#Ai+k|f+pH8su$QeOkk)`#Z@A?;3imsrn02jamy0- zaHLe}9K?b|CoAgCe!*{+K@%cU!;-mUlC>gRF!L*D{EGVS@n%hIsn`H}wlEkVge|=c zorXl3PLS9Rbu&03QBXgk+Y$p=t2ObdMy09v@O-bi2_tCurf1kef{h{XB7T+2I!z6f zsIku&KRPo&Gw^kWNo*s?P+-zGd+8_IQb?<>EB88HI zdRe$2PA>y_c8Xi=0#7B)}OogVulVoiDc zfy|iCxeT;oDdp2I;ru$Ohc({uc}kvY4$jwkR5Qf{esLhAOR7J6g5Zd2jo^7 zY0ip4*}@>9%3>3zAC{9k2)qlj(c|o0W2Q-|O4ni*Av_@XNUiCSw@e2gu8SRO(#x4i z4rM8gHk1rb&fDls*#oh6iri<;LpZ{Dj`tLiN5=Ouiw?^KmDf%VZ&>dlS#5nBm#%xtdiR7)N)q zM8XBYdJqhN!1J(NRq)yDaxjcq&sBe%)bZ$$QrUYlHQ06`VJs}W(TPrnq7NXq_^pav z^ojz0ZL<_+B{2N}xr7$6hDn1i*4xA_`YrI1#MdyjZe3dYJ@`P25(ZH)t^NRltIYX_&r7BYvRq5$CRPhd->bXN&xNpGA;X8{M?P z9+<)p0^ODJP4k+5u=QWZC4-n8JnaRVO_Wh(6yq%mwp<*57=rvW^CVH&`uJ5gG|WlkL1P#cLzLJ$E6ULg;KlhurG&xlJAxJ z$+UOOZ_r>*?Nz7Ape4;GgcNojr3eepF|^EBY>uB2({)(o#6gfzydkt~=gCu3Uv1!; zh(iy|9^${~F(v{bpbrZJ4aF3M&4Sksbz%$5J9O{>ho-m3uR*xt>m$#YQ8lwvfzfO{ zM|J#`K?Cnkd6qV$gptLp`$Pv(fDrIPMg`UoG(gB-d;(GmGSEbkC>a!)y7KFRjAFaS zCB){Z&H<3!)7WQht{@@pKVp=aN2qqth#)?oFJRqd3U4Z+h42L3j#*iQn0zXBT{Rt+|z zLm^bdhCnA0(jr-7Kr{xP--SfaOYv>7H6bBFGWA1No+qT8V`vwjTCf z(Fj?kj?jcdgA@r=t0SKw^o_?F5kq-6lu!WSbx_!A7WQfd@BC1BB)7g2D)G9s{RQvT zhz8~rdk*lR7q9Wu@5^Iff+(r>SH3FBx+OX0sK$DvVL6m$uNW0jR-yzG?G!!I6rrEw z$^8l<2NHX8F`7PVEN`SL)=nzu$EbLuz(3}Z%0@78YQ-&uEOtI!S9lR<&{o`MTC$bjDvko#(nliJ@PEIcsFv(aMz9 zcKISXMMTiz$d>G_DW|X;Z0nTyRDqfIaL}fQz#1_zn4w@{VOiNSg)7pY;XKl-mPMwW zsQ*#r7|H)wg!dI5MTq90f?24d2D%_fApVVBT*S)++YCkS6AV}k4vHvrosb}ybW?R< z)j)o)Gr2gFrUBcnvuPv=^VUMA0a$`YhWWK^M z7RWZ6YyJr*z(ZW&@XcWn-QT7|MERf!&{;M~n$%;wW3&9kv-FDj&xptz#`LA9i?6Gj z+K*pDkZM|g`V%Y*66h$CAE7?Of($5?H{DHVQXlR`ZCI_U?3HfT$tT?~3;>E=EWh@J z<_>=M#>&9F-?Lu-75&h=pN7@k40``T<{T}cCyp5#7WG|BmmsgH9EAp+_}5V{VNV6h+=gjpw08Kh znuUJBgn(G6=FtV%@1=~8VTroHZ%2o(nUyxue2>gH zILM~(yUCFTy9%C;Kw`@u%Q7nrRkpc_{t%jkT0;AcB3MbSJ=o6Slq2*H3|U!GXJLjI zNYp?(;Gss$hgOR4_Jmi{n#uR&+97e}KygC|JcUUGIRJ-lg)Jk#Ec^X<*VKf_f%3bN z0)Sb-^oFfKBP%JRq0wYGIpYtGvAC{Po%NlOXT)AZnOWvu!@scb!2GL&U;E*Jpb!S2 zE}IKq$7UtHK8L?+sL)+Yf)w+?6O)wkUPDqjs|L{q*$bSIN1-(4mSy898kN&i8m$+k zZ{X_?+arnk3@#*Gk(rtHrx??~Gh9>HtifC~Nf)M<@uN{JGRP>nFW~Ory}wSSlm#d3 zh*UJQD1KnoU#QVA_s~^B1MT1_qlpf)a6}vvdWYHnNZa06%q)f*y$%Bl!G?l1A@!#y zQ#joRm;5GZU3-HN-TslO5vg)y{3&v8MV#Y#b6zq_Yn## z^dB|q0fm31gKLqsUc&5N2qOVAmCq zky`B`yRpgVSUd>&7rGtEU%DcWy2WKh`hA#0Xk+b|5jwEf;Ng%650TVu&k{zfeTIo) zfs5S5VFV0-;Ipz3i3iq!9;aLBV{s`&JCtD-OAH!S)Z%iD5VvawbL06Zg$YMQAXQO2 z@R~jY{h(#+QqyiNs>}uv_IBXKrj=61V96`gAI-E=#8XcV5k-_@mc@%~#70HRpd9o7 zoNSFWM>n(boL2V;1C0U&vm$xGAXq5H@hjsErTB$pL#a;h&~^wImBH?c*=Zjcsjfo@ zVO6ztUNyllZikoQhnSKTl-IDsr)<>R{}@CU;wWt2Rmn0=xgwUXB861*A&Tnt;aX*BB%D};g@ zVR<`v!=Ne2vt2kf#D(Oj?&M2e91Sk~km zhO-dOV)n!4gBrG&ZJKirY!n827E7$=;y@+%%BdK&EWD^%ND6#1Cy&+@@*axv^yza{ zbFx)oE2W||$oA!xiGFAfjxZ$P1Q@`pFik>UIb%d@p>4tE){D08}c*yJc+tTM_e z6_`w)pk(zeJT@E#xYj(78U?8t_LA$~4yt)sEzm~rbrk9d;3UBMBJRjP!z`~vX<{xQ zrfT+5MQJs-velo!q>L~9OnqdXRjB;p!3(Ae^c2;TBJzm7nDg!l+y#5ZSkjRd5#3mi zxPqu0al(Qp-)L4E8@mr)k6|O9n2MqEK;YQ1na$j)0`_9<09*dYB&8@h0Cb*JOajKWPU!xVf=Qv|H9L zQ%SM{z#nMcrwf>ll?blkH6_z+HOdqZOsk|8QE;2{8S@ z6#}`#KS2o#AQX`hO*es>9Nv$uRVGfWTF4|)+?WB9Ml*1f2~|X&%7UeP#qeV4)!_C^ z=8&NP0p=uX6P-X&;^wq+qxdp|HS@2ze{$kiJ!FpJ9(oQrrs5b2Vxs^pzCn?I3xd_3 zP5-$5vE^PmsdXs>E6@2Zar5eg%ygk4W0c^ns&3Klfkjs_>8bxO*9Inb7Ds|2l< zm4!4&I5s(zj)7ck9_BABYc;M?p`5_rp{8d;6V!z@96=4;4r^E$^FK@n%oSR1=%v|m zG~KY6LU!=eQXmto53JZ*_%g~F%u!108I$q}RXwaCPt#r* z+Of(LS4a0!Ibjx)kxUFN=mqU&>ptF=^h~tJb}T%n;hv?Uc*6{&!yv%SbT+W?Q^5;$ zm$86qajF782%nJX=rLG_g2?uHasL{6%@D~=067rZlRf{HZOndf(@-`-IYV4w#|Vmp zgeiFkr(Wp#0bPi{(qkwpiy$m|40(_S7~LH{)7B71&aqF#72#w+-yY?k=+z}v+Q9F zXH8_z;mk*LWX6Sr*TrWlUMI&l$czdLlzId9vg%YN&Ni4#jAWU`k1j@q$&7=1?LC$z z?s0V7WD^PAAKdBjsD{!(k|WfmxXm5pF6s*|EGTdkiX!m} z?1K?{TStd=SU5z<4FZ#eFjbhM4{)NUOPIhFb`r5Tizn8ahC+_*B_I|An@f`&8EjN| z0{>chd&)LGChw*lS*ZXU75Np*1r{GsqPSpIL5ic$-8Za&NS!PXmHK1#uC|NHP=|R| zp>B0JSMiAw;B)aELUG2cd>ofLctNpc7$NCbxN{g>)>*hD_DaB2a(OFPh8gh{5=t4B5p3ieV} zJPU;M8Z|h^{{wkh_P_(8TGhqi!NsBtszKBm#SNX5BA+Wr5X>vPvccP@RA>X$ba9(T zR=3#UM_2%~?pRT(A$ea5fF^5`qF@t^!&$=vh`W z2u70&Gf*5pJa)82be<{}OI)td=T%G0a%LmFj@|&0gx>)i8{&&uS`qtLmGS+;{!t@P zc7^UFJ}Uiigtig=Dq1#aN#V@lE;!g*I~C9Xp-uwLx3L?s-=M@g!MDLYnQf`5)=zv- zz_w$#t7&>wiN?n-C(glEgLOpdfDAB}Coj~+F!xOkK*Ncb;A$9e;keO$;j5(03fs^? zNbp;;Nyv~1yT~^n!Ue5j7-HBdp%CT!@C{tVPUCcf?U2!jwy`i2IU52a56&=PTnWvt z+Wi`VqjADA2DwKFrIO46sfp1*sn4)-oHk|ll$gY-f!K>MEjb=W3lj}e4BZ8*&)5|o z?crisIeLo|2n1N112Q#QOiWbbJTM$-AIh8JmgLl^tieK15Y>VXX!8T>4Wy0N4^KIq z9vWMGBNl<0p(mg->0+`_6BOh}XkQ!k0%Q>m4ptwrNT*Fyn0!U~+j82t%!=@js9|ed zz%Y5I;Dy2Rg==XIUnLYrGATqlwwW+n&{t-*qRuK96_PzZfnv7sTSrQkR>NbC#Ca9U zp%chb&?vfx;g)*>ToNmq*cWkFtbN!qq|m?(LL3fch20}am8fJ4Egvcd_f+J>w;yG6-?R1vEm zdQQY(88?tF4nYH0Rq3~mPibER)%r2cKebIfx-qCpJcW}$V3Er$BwFSk&QpOyBFYMy z{vsvA5Uv^WzhTH;AQwo@vkha+m3PZ9z=yRBnCV@jLzF2VZIE%9dk`AJ%yB_p-a9C; z80_H~QmgJFIl}^qCTiEQ94qjsWeGXm(a_QuCSSz=Ym|qJ3njv^GTNBdF$?h`TOl^< z7(s9hQ-n}R2^;><9TAcGRpk*0e1Hv#^bG8YrZoFQkhl@ji}GiE)vRrjA7_+u4NnzR zb9m?!pR1K<6C+fl$47RN{ub?Olir7A*roOeO=~#la6tk?!a8gqJ)yK98c2n8KnLuA zX)h3~AbxPwwbuwn3?sE?7EoQZfUrMM6(MU$^+&W~Hxx$_}oHTU)%*+}|y{ZocIXx&}|!APYk#>k0=kR4e#R5b#**2WAQM zdo==koW4cVeaIhl4+RRooFIqxvm?(`*-`<+z)A*{IpSrG^E2PbSLow*4skZeU6bg5 zI51U3#R;)PM-WG}_?81_2|cv?w)%GC2(>O=@3OuUy@0l&!8m0SkX%FQ3JS~2T3|p< z;KMMTV$?f9jfRV5<3yY0h`tPo+(hMHY)Y1c60kf0V_B@%3TO;)*g@;0=NcrCN@Drn zI!3_=Iv?@3f6NP{38O*g9SkP&uY?4^$R0H-Z(dbSrI*DBi17f4kw26jr#xfHWp~yF zvH}bYJc#)GU8utm3<~Ld3ud>5O&Z=Cf5&%gHKsucf}}F!Q9O)v<9&4 zdz?}B3Rl!O+b{&%^btD^`8z~6!m@_!N~i2?U#UPPj$w7lmi0EB()gUb%SA-3flOq}x-s2sZirwZZYCp)df(V|{ zXS*9P`>-HIf+TQIFnTd!YSi$JqxPISk4%J^LHS42tFAxpF&=^Fn3f-5JguRvDOcf~ z(5*!4kDfe7U&FA7FT(~dG5ny2K@WZt-Vi;BhU^hKup&GVe9#*xMjide0X=bAB`PTJ z-KS6KKWredbqq5tXO~WyKYlmGcJdyZF_6OIeE@_JeT}i-Q9b}Za=Dl?Y4+qzNNon85#2lG@Xrwn@{lL<8Pn84PIu-;Gs%C zvjtNzz&4l~+fSm`1bNve=V%PnHboeN0=Ai}<~WTR_`)#QPi# zDm`vlp@NUm`(O=Z%|v!aYrD6BsB~Im6DMjzEIzOHps7;ZMFkumkgGUf14aue1!)3z zcM;rul;O>7+)P-z@!;8OZoEOmgYAyLA|iM51bVE60~gHVCQ2T@i9HQQeUJGGf;DPe zIE9PBBTfoct)L$u^fY3$tkn2qVM@R%farf*+1wWw+T3jHOU+O_E)~dMzn`_Zq38!9r z)Qg}N^K5P7`6j>nd#qguw;OCv!mYGg$70QjUOYru!%{6y>}k2U(X1W3ZK7eSmY=Pa zA8#BB$C@Q{;m4?j;5wk1Z=Y|iJua*@VZNTRaS!jmlNZm|UsN77_fE?AIXz!1zs3I( zz6)Q4*Tv(Lx3${a*OkrZFE*b%ds*A~R!p3@G?KMuJO=P?`^(r2{oW#zK&C^=( z^rXTj-`88k*R={_C^Qo0qEM@zJZAsR^U~J(<41+HlOjFG+O%_0+<&z8M>E%(PRG-ik^9h|jwpw_(RkkDXWF}LPv3LXQGVB)^&-2QMT1c^zM4<2 z=Fz=wuiw5J&cAP8&Bxu|oX^~M@7}q0cip=+_pa^Uow;{&_ip6gUAuRE_ipOm_1wFU z?%lq7*K+T6-Ma(#?#jJux~(nO@4G+$%flaE{`8mczy0S2t<|sob>*9T_kaGymD(dJ z#pg$}tIJ+A8jlL?S!XbaIv4F}yEE@iXSC{S*d=Km^#`MF)cZK0GQDo}GV0UX+2C?A z>_zW;Hx4bStHh~)y`r&Edmi<8XWWlwo%XOjbz9>rp07qpDc`*OCi-weXDY`?9Tl~s z$*@gVgjXKC{PE{s{`U9q>R@!;9uB(EynVK)q&kke^+xly`rd6*+1}-3ev>U}0j7xd zETa1DQRjj&NUGhAhJ#TrI-B;|@1u5~4v5Z%CFSZXpejC$wULs`?u?oDj@y}FK0f=U}UFLXE-)0^UGHY zBzyo0%|G;dBPP#$I_S;5TGD~8IArX)H{|IssTf(XfPwZHZF6&e0d|e(%&(Sbjx@@e z#Hoz={&mqmiEc3|8AmnUT@Gee<7w}&SJ1z%4bnI6DqxO=?3?fkgM3BxdP!n$1ep;E zcH`NdjAooy#Nuo)kC^6Hy%}#Vau^BGfPXQsN8aT1+iyDt&?+3y)OK7ECD=D3OH!Me z$VNqy>Z>oK_sn7!lOd`!bbpIn^mkV%6xCV6Tw}1Li{?q^LKmRZO z%m3!T{qO$!|KWf9pZ@3n<$wL({`ddm-~FHe*Z=+h{Ck$v^TFJmPur7=|ERAms{Ga6 zKmYUBe_@?l`Sw9!_2J8Z`R@B4e*Ed@cfb7F`t5I@|NgIz=}i}Zx%DjigVFr|=UPN= zay9DA-9N0YEi=w~FLGCt3042{kh+h)9~Zsh^ZLfR{pPMi#CP?^i9;pfa58k2{WlI( zkHf2m!R}4x?yTK;@4x!00RcL5yIXG5`=ABV9W@+l$b10)7nB4Qxh(X1y{;=Ax=wpK z9kkDTg=ueoH68IOSFbvle^)N8^S8m@BmN#cd@dZ8T`IBZkeXa&&%wt#lwht}f9$sE z{J^T!H{79a*Il#jdLKK(_N6WK1u;@itVZh5Pm{uIa6WR~@er=V-_v6q=&65bU-CF( zfphK2n0YgvT=d+q*Pj47KaAb{V%qD`XIE4B$m^b)4L-VA@0wD2`l5pk z0@%A8h%X#bui301zQBJFqS_vYkDs#!P@_~T6n7>j$oEZWdc>N+&6H>>(gH0 z4F2wYFO2fAJ-dJ}Wk_eeLR@9v`ds$TVA{F5><@b%-Qu>;?x<^_lQ2iK&Ui{+jz1I( zQ<%!vy*PvQu)Z8RyK^-i-Xv8?Fv?!^pZA&leKt2*a)+(0wf6Drk9LEWS>9M~IJ~}W zO?&6PkG;;l`$i{OZ{}lGuzBagva-(*Q*=UY$$SN#h_{{6*J2@6GO;GHQ8g+_K89lm zrm_C_hP_L**k{vr+(o9Cb&kMOLUXSj*yYI7r_+(9rQI2V^ z&$7+}8q0EgWyZO6mvJ0GKoyKGd?vU)qM>anO;79T;Pkt zUZLBbei)24HdZ%Q*WAU2)z0Lq-@Y6SZ`|y|pg)J?C|CPgeDgfMVVU(w>ubr~dUCgs z+&v0+m+LNk%&o5Rtp&Lcg#O^T>`;1E+`EUav3EeUO8q!_x7kI2hu7`t!};Ze92nRf z=y~C}4%$ZDtIJ9BVKBcC z|Kd}lQTq}cyVpa~glW-$TtFD2TmcNX(xzwz(G7tHvmPUY0M#2^52oXhjSv-%e%swW z+H3ikUMTnvTYBL)4c=tTs;!&B{Qk>3?(4!v*F|3!RuJC5{`1<(@S}^qjlP~=IV*u} z9&!|F;h|c8)xy&r{j!^nMJz@)NVn&s_AnBcXEQcx2c{c57-%VAvULZu_8H?DdZFV* z(6Wby)|(&}b*7r|DhN7&%(&_dQs}Z+j#z~Tof-F2SB}*R#+>`N7I_fESrArTt93rQ zYPDRe#H>Ge?T)eIEhMP+8AN7&$XUxT;+~5?yBZ9^>ZmWSLxir?3ZPbd-X3kZ zZnx{U!1^4#j}oDu2M*nCSO4eX{q>FKbZ^)HOQ)XA++g5FovjqGGoIYoYQ6Ym3`&17 zfvrDv7Z(>x3K%YdSydJV_uJHf7G@#o&nSm)S3Z+{_ZBc|X*%U{ocRQgnNR4M(o&TG zlCFKvcq3mM3D8KG2RF`Ta~f$A$u zz}W^SMFh-9y)YQ1OXVr|nC=&)W;)IQnSL5-gm(wK1G*{Q2v%+-kV4X;qlVqkt$CPr zX&xAQG!IU9Xe!Nyo%xg=7{P2V<$=EF-P@Tnb1D7S2zjyFDG48%sjmX&1A;GF2~6eR z0;)4VE`fTazLhY`7CB3g6rW(;WIj2|j+JucrsHJ%Aw3v+OqZ$WbeTM-zQ9v1bM||r z=U&czz1H!+a+Y`L6&Si1jrYX?~l$I(&wU+^_Z@Qp3_BG zdbS>&UIuUUm?=Te89JgInKCU$W~{TwlEW1}W(&+BhVUd)1D-SAQ+loqS?sefWGd8~ zrc2~0_n0nUsoCOzq{*WmbMJR@1$6QyupTW&2{M{_pT|sxke(?|>DlIdoNQ#v+sGBT zajV3QT#@9+mkE!YspRE)uGGu*Tp{GhmeTJIE>nf*FNnoGke3@CI=yPQH293-(O-JlGT~h<3E9P@#hi)(& zpP#uwx&sW9F312wH^_98&!xg(Inu8W&)b&_+X5RrAYGt`q^lP4Wa~B@nuZdNCR4sa z`{H#nWg3_PhXq=a-4A%aszZcelw_&8dPuHdWy;hxnzu} zv)nwP(A*o@X+e6fgnR)WW3mV%82FM!7XiVR9I{we!<9U$+`<$CWNK@WO#OVGY|oE+ z?djQ#RPPJqNMPym5@5Q>1ej|$cBHu(bTg#}Shmyv%$6GG%-8kfd_Km+j~*Z^2O07r4I#UmpFdax}ZOW^Txu_Vcg zPgaa`^bDBQurq*meuq0%m23&6BBR*e8Sg*$~O9n~@U(*v2v~ zvIKbOm?bIcez}#LUh}M^48!dq>A?(nviJcfpTcRL(m0{V44D$&pUs|OOqHDZq6CRs z-^=C5XZUM|f1mM~sgjYe^p2$8?+WLPuWau=H%30HrIyW3El} z{G#mvnOazuOfd$@mo!E7mae>>Gvx@7DaLZ-D(qot^zIMmEyL)vP}?zu*GYN_7YIB) zNqbnNJJr^MGvSgy*-jWGg|QEC95~lqxjI1G1kXNA5F%+$uE#P@gYoU&neo`<&07 z`KHDpQ!4>7Z%E^nDljLxb#jtgXK>lhjE0^w9}pn(f$`|~^aps%cL5nA*tWXbw%WwPuRdbLPVWWOC&2GLM#d z1mig~lzPs5K!EHApp@yp%hTG90GT&(IGjgep7Yr=RTBDUsw6<>jjdoa5iji<0q*l8Bf{=_V^gZ>T12>rL3 zN*v5^bHq_D1Im(<0mIO56u^P5R}{#(f*xM4T$ffprAgme=W^l(K$VdC%9;NLFJ!^y({!B$_RVRo z?*<2IKZJhra{XnhtiC*-cqavtue2YMG_ZHsxtRJ&4$_sudqmY-G)n+=0kzB3+o!bP z;^fA`M@g^d%DTiCI$PL8rDcl82f>QZ*8_N}@Ij{NxEtb5SW9=pY;GUvtnh!vPDjE> zpARYNy5gh;pWCw`;-&n1B&Lx?x#>PDKTSFW&-9)Enf3}Wn~dD2WgqEYO?;WD4&sF& zo@>gVg^qId&9#Q)T#ZM!s%D+B7z;i%N~(}6ZgPezptbi_MXW0_rKo{f0Iur`z=zIE zz4u||Lun;#aTHurMBq$oNw7Y+1+^xH{OJmR5N;-)xRsI_BFoDdQ5}6YpE-xY#)H~) z%1%u0i`fc3A>ea0v$~_L(uNQG=H#+z*$n4Xz0#lJ!jlxR=&e?(gYWVb&yx;Ll-I)D z)LTrF-_>W=_(`SNb{reemB6%INn_~M2;ZqL^9n}kI#o{ZBCcB(M-}<^jmBo82pJBt^yxZZNSi_qvV~k{OG&Hp<=NTpw#)|!g8?g3jXPlU_r+H~4=I zI<0iJ#g|*HK&m?z?+xoj$kTZ4GeD?XsT_mbSu__cORj8@+huT3@VLZ$ah^!udQ0@N z?cYj&MBpXg5OIvNe=_QntF&cU{-Ow^eO@yPY`y)a$m5a<+8S~j9%hFn3_2^H27Zn( zMINW8NDF^4Q_02|7x^A#^Cfd7&gqM6J0?u7$au=cIju~~m@6~z4?f?)%D`r6eipY{ z=^FYx>A|>OQi^>nLkn81>wGbmDZ{T^(QCc#pp&00C`rI~nMbnXiHEo3{75CoXR$Ay z&zP1%DYxH~L-IA)o@T>DK{_|k)UdC|X|koJ8iX1iH8bOTIJFvh(F<4X4uc@)=J@R5 zs*m&bHFeJzq>Tkj6ATY%UEzw3&##G|>B@&>Yo;-jOxsTx2Iy9fFP0^i+;2lKMA9#r`TYx@GA*n5>o2d$T*}!mybG~z z%`%)%`l~oK=TgDGt4^<#ZCwn^G%Nr=W(GC@m%+6AVNw5(k?P$BUS6f;SlsBRgO4rn zZ8T%^M2`hB&B%phzlc!p-^bI|aNJH8yDSU4CD=9q@wpjwS~f9SR8Z`OQ|Y(Ct##Mw zlvg^MI!jVF+-z1}F+bTQsgKe8lvhmgvm{ln57ID{SGatke5BA_%jL5z&BtiouAGKg zJ}J5H3M|=t3Hb4Hu*BmfxlKII!EN|&!Io8n!CF!api3$NbV(h6E~|pawp<-F!s~9X z$-X3INk#O+l9~v-q$*atOeDF&{S>(kT-G7vUDhK4FR6&YODZDpvQ9x|sCkjAiJ;4> zV(=xMV(=xEG5C_&`0|r}5k=A${c03ylL0O&lLaSr$%2!rWWh;IvfxF(XbRk_NCvp5 zNEVz_BnwU|k_9If$$}RZVYF`5BMnE964R@xMYq4}K&-U1zX$f1b zPs_7CcW3cRmR`)3XQp}Nj#o6@C95Ce=@*;3ERELM^1Nv6(p+JgUNm-TuCQGHacv;H zR5;XLzkgVF(c|m_o@uV41hnRCrSxNq1~3rDTL}xCyPcGv-R-0V^KK_4$agzwfq#}Z zg?B_Ln5kQUQLSsRE$M@FeipQAP;7bHV|NW+El+#yuC0HUr&2kRg*{1ndQ#|q(OLGC z^p8CyU1Cp3FZlPgTy@sh*`+j=BEj@S8A#wUQ+Z;RG6iDuF#BlF)nh4WJdNX2ROjq9 zlTrGQGJPOMNH$AgTaATIz3IU$HcN9G2XHI5acsAs(8gtDh)6Aid!gZ0Dc;vM1rA!P zzQt;|maLgdltGfY?-MgAsILsGOs9V)eN7mgFchh$)@YF$4@*e4$8>r;3X3;lNIF-3 z%oe%xDe1S;BNV=$9-R!xcWMS?-JgN@5y^nUXaty;9s9(hf?_*zp!B*HUS!?PWzx!+ z12Mw4K!Z^pWQ$nt%?xcjyJjU>RM7Of=p{1143Z>6ox_f}(Zdhzn=E!*m&(!?h71|h zX|>qwi_24zDBW)!7;q|^Ge)H>G)9+OW!%DyWS(aWxRozV^=vNdP!mw|b@eHJ!Y_Tt zrt7j4L$GBD9?zDg_}RQP#r*U`kNZ0L%vq4-x)@C2?hiekg2dgN0)45A{wPb*+o?>c zu}VSHU3(jBy>|<2U3UxW`|TFk_rq4hf#6y+7us^{Lh%4&DoXJZV&TNxFf9DqOawRp2G8d?ufcrmHZrOK8;wk1Q9HUxSF z9Isf6FTX1p^O2TjZwYqSEJ(Njh!WsOus_Y?s1d60F4si~YoJ(bEvACi|_VlBS z`<-J-l;gr8%c8`PA(w7t%3i;Cg;EW0Lp8-fT+ImBHFtd}*v=B5tVyo9Kj2>)jN`FB z1Wyuls9kAFExMO-(-qsjdF5lLXEEEVqy88@=BoQNk^?|hL6x+?^}5AT_qk_h^I&)$ z(=+AaZhx@PXzLrPa!Oua;ETqEgfeN#m-ECQK<)YEOv%^&_PsXL8r3+3(e+xzE(#8dLpsGS0mK zAom6k2alahfW>_D(_ZwzWA~RcY34x#$~MQpoUKLVfm<&IiOofbp34hg*1Gc27@o8C9`=+x~|!si z{z&)cmla&G{Nmlm_r?}r!c~cxt>_vYonj9S6Pw_P2=Hn9_^E`mN1XF9dnS*=uHU-9 z0AP0X83z6ZJQEp=3q6GM~|f1 zqyLo?k3IU|kNxQ-u0Uq4wzBYX(M#b+em6XtuMWR4Jb-~HmAH<2mG0J)h1$Wf$C@hM zCU|i4Col1-irvk4!fc!PZt#eyH+ItqW?rZ6XuJM_UW`q|!BNe?l#rOTm5m}|yR~EI z4NIF!I9zr8nSDFJ4pt5muqu(t>fzTZ2p_8Z?`PjOxK$vPS+A2I-rku%9RLT~U`)ds zlHs#p8@H zN1gdccaE`t%!>MP?gl&XQgM698Q)&pn{pZPb55=!4wsou>vDEp7nRxGezj_lheT7F z5o3@3NM~p7Qjo#zy_N9ExF~~5MW&Y)skDzs zL{RT-VW}$h5Ht9EaGBJR9DBWa+vl=c$MHZ3M6`Dmg7;o;`xXh!TPApKZ-?^qPC*fo z<5tTt_k0xR3fUyy2s$866dksG>dOmII-59Y84yiDd?-QuJ-8PO1fYO<#kp;mUu%)o zAW4s_kpM7?->@2pZT9U;O0onk0y(&kNjKOIjZT$>@cF^#Hfhn9I`H`(T0I1htKjq4 zT9e3#_Ng$-niFae!ukW(u=6W0VTy2Ro+3?|hy>|9{=`a~G%XCeyw)2sp`Z&?r zUEluJvT!Fp$cYbaVqPge zW{HnW!sqEps$;dW%IIRx6hQ0_{n^EyI(lSJp{@Yf?!_OE>K!f{cClx6tOFoMhFuM$ zco-J(!pFJr(Jj0;ObA;MB2y67fl*Mf2}rXpAMIk##CuRg2D=EL4)XCBLKKD&dm%(u zV2c+!;X@dO5Fa6qiV(*_2*D2g4+9&*8SrZ%3c{}?_L02PD`I=0 z!*5?)kN`t}123m%W`O?%%#hoZuoWsZT&A`bFZ>1IrYFK5-svOTm4>EzjxKH%o9J4?dQTjY4+H>*1K#s^_xs}6@dEs zt8(!WV0za#$o~pVh5c1otsVBGtiCP&m#oZLyO7aOWIvO`RMrofn22^E6Sn|t`=wn0 z`K^}l5x+wVw5?KCGJ&La0l+N)+azgMKz>7HM^sK1K@=NHyiB+P(gRP{axOr8yVYL1 zSC8pInKY1g70k96vb4|-T!8pqQ0whpFX}{kGC8o+)9pc^bu zi2VYv*y^z-v0v50=8i$hS>kM!J&T!a1Zgx^HThbS3!{t#&2e8$}sUWMrBx~?S-l5 zTyol9P|2CSb2P1`BM4YWKDaX}q~o3=>mp?V#{X~`_$ph_vW`%e)VpV2=nKo1&G)6w zxcWjJ1y7Z*sJtaCPx(^2fPe9Y2xTr_jtWw-?37(N1sei9@uuaKkMEo4a`$i<6WtJ{Fnn z!07I$wqXIKhG{x7`@$6aR>IL$W`nchO82I0PEe$5LbArzn;7x2hX!?wJJaTe)SNJQ z3pv*IuiZ}R>U^3-Qwa-;7V>H|YxjF*>CJf_tgz~5ncXM`XVv4Z z3U(x6x~LASX$ofP+63d${b^9!<&^(&P;zGKh_rF5hJ;npw@fpUo+(zc*Rw5p(12$I z5>5}F0VV0vpe0qlnxPBox(-PuA5R8sNi`Kv%Wh*dTO`JiR3@x&muv^R4OzUUnSBly zIMGlT;{H)us-(q>;VaADyIp|~Xj#7a!6k{QFQ!s&ee!l%>P5{Hby|xP16nA$h=$LC zi`Q6^An%Gep*JG3{#CE_zBe5qrYeuWZKXb9D3waGJswkB>+_jf(6#vh>uS0neQwcz zwnqz=qc!UKNb0F_7rAVYNGqLfK5hja2X|7T!7z`tVdh$js|EVmoNQevgvE9DB8{z! z6q5RxBu!TTPWK|S12RcK+RLVlH2ffb9{pL05YuM(0ENTA{|By#ia8D)(YIW1|V`IH>TPa~I(4(00hytZ^58 z_&9B|p^8JC#+TaO0pCY4A`zCL{oj1g;MzNtrX80vA}0q*%NEGLq`4ZOW<~;1QWf$E zes?t~K2f0zE-neRuWb-IJg23jkhG7-$l`GBesK460@Ho>9ZG2e-TSVM5M3;S{SWTD z=C|%UPMJXU>7W(_C!>2%#q|e>iM!}EK?(&|KOpeFKiG_iDywtP)ls*SvlAY7+m~nE zc6c~wb38=2FCLbnTwa0j8i;dWD&#QI`! z{TBc_c`}b5%bZ1kI6dmzL_Q8Xh!WKaNzW+^b^3e6(OLeBn<5#sH*}n3**l-o5EeZh zapV&Ydzb-vqCw30r~wj%6jiytq{4o@Erxr@x!vQlgoh8092qw)lzZsMQeWwV)MI4% z^QbzyX4>0H4ZaFT4WUm|+^mKIj&!(ER2q*IMM5V!_321PhiGky8DG+- zG)D((Q0D6}0;&D=shio$J9pMs!8(qRu>2A2 z=JY7G0Wz;T;n)G*Xj_2(2jTQUedM>_h&Mw;Mc;iLS{H<*`R%tm90%n)hr+pEU5#}1 zS>a4xCoiMUx8DlNe{mNsEho|`O?U2Wgvv4X3br>()@O#6Eqpp_&RXbCH1u^UNMl#< z_4)p{-}o|7Nk`p+f+gs){UPVREv*(alGCa})qIr-ov6LqnM|TgslH;5{$a)bxUc4f zBNKn5Yl)3!%~Y>imNA&hV-Ws`^75?%b*O=0+UbX;c*I%09)E zVUHc>w)jF<@7&qfsfrv0F%?>qlOVnE`5Cy&pq7sv4-N~P?(|AWH0Of)g@GOcRYT7HJRy& zQ_R#FjM&YaVk7v$LDYq!f+9Wq5L+w!t4 zYqnUh$MM(VOhT9eJ@JDF3`b@N#vN=JH*zNAPq*@nXmQF0;^Qz53vm7Ye3^pDZ8&YL8zk?s%6~QAs-mEB%X2gPje{l+10#fPfT5l zxg~9y-36#LSAy|tkZ!Lt3_)z9NHiF_O+`ncJ$7R-aTi{gqgF!$AqLoTOvd=u;Igz? za4mmhFnt2m8mc_IF<9&j12n!hIJ#&2G~{YzE#}w4_MBy~TJqt5y#dDZ$S>j|{1JAP(9XrLJ{-OqKFg0BL)cTHpfE9uPA15;@IuTO{h}K3 zqpWdXUBr_3zcypsSwZY@0cUdS$=#K_8fK9xEI? zqlngF{b@^GqT5Fs+-kHT^8UCN^+}tH(B*r7w4?h zXys9G*mm%^!(>YguCL4s%&lQd$%HLWdQHsT4JS)=-eZ794)y0iy0JM?Qd!<-i7{EI zeT_R!*n>Xk^?C|oX#ZF1Lp+xElzO% z0lr!P#n5`Kd047MyBy`+sMhv>(0`UA&Je#QaC{yl6F&9ig`FSV&+Zp@&;8ADFr@p7 z`wRcRcPnn?d-tvTmVe*7!Vj+Sz57@9)%Wh<_wH-=_4jn8SS*l=P|9$? z6Gs>O^MyGfkAJA^K$aotGRQLk;0U%mWI7yep?T+EiX%F7Ja;M|=T*g~+8o$#d>9g_ z5Qqe>0Xq24yXpoKA$$PW#qpsU<0LG_J`jbVDx4e#tHL$py&i)Sb}X=K6*Xl*27ja3 zQnW2Vd|vq2D#gSvFv++&Lfe@8ntl7=u2rQ~_pt@A7wBxmR>KEs@dO%zixJ7_k=e{| zSd8%Lm*WLNW<+qbT}}oastm(}lY=L!^y!?gGzDQC;>2Jw-3&UI`#my)-UCg>OPn0k z*LE0|`PNA97^@nYTV6=l)MQFRS0@b3phL&nyoH@5B8a{jM@rDWv}_VFoG1?$`3>N1 zV0~+%usWIkH0BS-J|i_Qrm#kl=wAD!cN033qB5)>v|~z3!}^gJJ>r!j)Fox`o4(;Dj1G4V7&0M+cSh7@#hEH}VPwP4*!AT&YM3hm82fJT`?U1TFA>DmQygfc8~ayiUu-O~gu28c551IdELM!9zqpPGVgs=5YN5r~eqsM$b6XrZN(C=JZ1TKEOLTuUg#EkB3`Y9YYm2q8KxP#XZc$^<3xm^Co3&~ zL0n%hTI(x83|=s8VKGWNJRT%;LGd8QptM*U!)9p5=@29QEI#lT-Vh~f|9*s#o{g9{ zhVp95``!)GF5-D#;?JOu$pP=nAwrfe@q>qsx`4H0;8-YKjTv(gOS~l0@-hO}uMS6y zCsQ;$OXTXYNJ<|N({!L zwE_6wA(KUlk!kov%`u?odc)yhqSN3xGhTn;$A_S2ptWa(a&#!WoeFOk9S?6e9*+Mj z&rl6SGrRWyyZRh#6qCY6XWMhwT=)&&fG@_Br)&6%Kgr%vK^?p9ffgrUm*i#v7buh^ zp>xK*XBP*{55KubpF5l8VR%U5_itAZ5V3dE~V^A3t(9J!_7C)Lb;g3XegI zW;%onc&T{q$7hHina^5|>4$tqEjwmBn{S@XG6nh^&7 z=ymXXMQZxwEt$|DW?+$(95P7>mxo}aWy1+xoL!%1G7b$(PUPX3dazpsPCRJOcS0-I8nq?!M64lZ?p z{}8I2O!Y9s2hPu!YtKaHij}DzMX3BpZ@+Bz%`6x+Q$&kIDi5hrGFY^We5Wh_a+vKd z%+wbX%A|F{5|;gBKUcGbLL>9`6!r9Vcr$RC!77ynde0>PYRD%QmInm7%yI`Uu=B^G z*p$5fgtO?Mxs}NM1YK=kx}SoQC;ZObPbQ-4R}A1!iT>)RS&v8NklB|(RPJFAfO#z% zZ%`4gv7YSLpVtd7e))Ng%a89?UGz`?=HLFwWr2}<`0(L(T)xufp>FtliON(;Q~t1M zL7>ocq5(9;%e&ECw}sXkBOL~>s|njS4(en3Z zxV-!azsEPcNj+2UPj}tbUDxFL{I1)*i~923U3d4s`{%#W7(W07|K2KnE{zpS%@q4O z^4Fl7c!s#tA1SFYP6(IbEU2H)34t_QXa0Zyj#7%4d~}W z>z8|09nD1D+}GDjlZh)~SvViF7og`V>iXG%>^-_4TsOx4;h=a}ue7)}s>R(_qjJ({ z9URsgHRlr=6}l-4SmY3zHKBefaBN-u?6EfBowF2j3KaS-Jo2p1bQF zu0)14VLD>_?nf&kJ2~*^cFvJ9z60*~h2_Bu%i$gn!MbZkATAGktI>C!*!-Y*$|QYh zPAQ^I5V4oh?(WgvgJ^rd$;Af$K~xG|@gUlZ8*V-nd%kBLM900eoxyxn4kX{mIR1_3 zp12nONxrW>xx)WOT4h+LtTDvd_-&frBf15cN?D&!RI(6^+n4g@Ql= z6nqDIpxuA(>+JnSSZdn%XRY4BUB$ZL<#(QYc}{(cQlx?7fUk74`rhpVhzLRSz>>G3CU2NGm07=F{1OsJD8)`Z8V3yMNn6 zf7-2$>Oz5+kudy0RD}y2&5sA&`NhlV`QPt4Hocr*!RhlG6aKjEVI=S3F_`l5`N37R zOz7iAbA1o7jH$&hL!xvyLcgR;YlnOeBcI=82E<9dJ30^O?@C$Rq1=13`%!m%ug8y; znHf9CG4(`l_4k=uz0?Jfe|2KS%KymaatIh=GdUyaY<12aYX9uBp(_~-ls4~D8(tSl zcB`{%{av_Ke)%bFo5_gZ1BJGUe|30IeE$}w8}1LKcl19MDOUU~;el+AYIqFqDX+95 z0UAoE%eTC=t)GzZh(EWN5)?n0YD3rFa%>gmhD!;Kgs#`ZPnIzlMR6{uO zrhEY4JoJ6PZk{go%X~rWty=_lIO^UM5~4@By7s#Mp^R=Bn10G2omGrSZ!}Yazr))G zZK|HVAEqp-`*rmQj(c*J-mkwx%4irrv0oQLemr2F0JnSpu`1s^w_?D`;Fp+g#ICfA zYQ$c95*MY-;4mq1U&N2MyCkIK8XKUYnekJ)5doH}W`NZEX>q>l0hWD;MmnvY^7+a3IR5kVjM3j9m!#XU1#nRddxa~`E>K zY-w}wLe1VW0ca+gGFC7Dak8?~cyc+O&eBPs zs(eW@pH9E2A~P>k7p{B4sJ1UG1>MEOkp4mg&!^j8*A%f?GoDt}u}Hr7=gxe6Qw4t^ zpZ|ajjVPZ@qoSm2BMPCyzN>e=OX&>GNry1)O4Y!+3dHoQOV~8xI=Do@P(Qle`QaT-2*f3?deC zzc{+e+B_6H=VX6tFl0vsZRuHT0KB5sN`N z;FSD*vuU25WwI|>I^y&Sn{tSP7z-^ic(F*;M%DSO_5q5>!XKLD`?S|RzJO{Pp^F)* z1qC`dNca3<`-88Sr${q*81Fh;ya5QdBAL#IJvnfB43>I@S3J|^C&IyaW-BeKnu)l) z-eU0$cxaqOf-kE7%+=vU!tC}Bngv?>2OW>!mZK@LJhRv>u%cvyn8`kO3*hefTzmWW z$7x{P>oMDC*#nVHFlDjarjkWUScl-v)4=R{kV!(qO|6f&UQMmLx6%xa=nK}ZUchf5 z)~se;ZE5%=D1Fw=8%4uUGV! z(A4%Zr}{CM67%uaFUqFX6Vx+rIo)_6BjM>;vk~A;(B^P= zi|8divbe?cB!-=4%XOSl6wMWWeEIId&%dBq{maT%pMUt~?$*Xn+6LXe3Xm=NL+-egGi4ZLmQ}!)MRIJiN(OCae(>A4Jwuj+E zS+SDVmZhku2eJ&v#%-lHZTErQruiOEhG5aK9{GD~r@vir7as44gJaG)H3@A?Yf{~ZD7h<=x7}ETO z@7QgE$>GK%J8JFaZ04tJGO43)Xv+)1)!4i?t(!YWRhz1vxO(J@U|gNK3LG+P>*zf0 zJ0@%!i;WhzBwM`RyO=W&szNP0oY@Ik)6*7#567G-43Xm!M1EnKxY0n4D0$XPh*M~m z@1nS)vMQK>u*`AU76YlUE{$uuz;=zRFJ+OIXoMDtiJ)M`)Pm;-YJbxqGD8g>GaoD~ zo)6DKQ|dlH@XP2Ib(5{bZQ?8sksJ*!nMbg`_RFs0wMQ}^>N=Tl1+u;_2v^-dw(B!p zH`dU`$?mAEB-0^&{5`YHqo`>tRzfM#sYZn~M1IM256q`cd-}0JXg7$DwSZmZ0za&Y zlsqoFqpeLpumj5eWB(I7ptNnv{q~zV0AQs3=JRjA{rLIayLY!?yua(=;v^Z!?ar3t<@{#R%O4rr^ooWcA)JWbyS;~96VYMe&ZbSb`e#V!4MNXLT#cU8_|4X}T{;>UX;&KJzHF(A&?n@5LXuz%BB+b{tYo zW(-TF3tki2>R}sdysP=Z3z9hg;X#F8qluM-Gr~lIpV50hdoJCt|3cWK2XLY=wuFW- z{m_i~d^+R@)k2G{8T-ZEI{s#3_#K{iYmGXkmv)Q$+cKK(6&tPUexq_owpO)V*>ALp zTRgT{k=f&d2XmJhM;p(WQ%*-TV2$m;l@R4RSQxZp$~Fh!7XetUw#`ylWu$&l%thRq z#$Jmq=Y1Rd8MW2^F*uyIVgtLG6(07Apll6a!6}v4M{IYpgCag$piAZg$!VJsEBxRJ zba@uv6FbuqXfMwZCKbucaDqS1VUD#f(U-q;O zXYx~J!*uXs)rM1czuNx6ZmPY4+MrA=gDvNR#m(owAG_UfZ!YDar84_I;Kp#G&&(Cn z`yuI$xoo>nb&Foem`a0B$LwLigHrIZmz%kl`y&onR!FegDvMc}{LSpWd5UNd`U?4+ zROSL!^3$5LpLINS&0AOe8W;lk*QN|+E5UWo*}&@`yo!)1EZij-uUUS;ZN_^f>x@01 zbf3H58DP7}JJ{c_#`OrRJH^03M-jr?2;Zgj=%M`}BmFuf?sZo_4t$3FzE(2PZ@Mx_ zFk@`S*}JkUyG1W?=PgZikoDCBUVDwLhQr}HS0)PPNJs-0b{^`gk=0Goi=KWkA7}WL z!!ok*L9vYdE)CEt$v9KgZfDLG#EhU`ap&vTHw>gC`tlkRTfgg07UAIy&-Tmdux4JI zv!jTD=WWJG6F{Gm6VpI{Yr2qAZtUby`*dX@;5MD_8FQqD!Bxe;b?_qmthuWXyrUfn zFvNW9gG!T=aVQ0-d>>f398TQ9P;T2*y}uY!I~v8h*9bg|AU4unVoCBQ zcb{4D7yckCPB9Nxeu-A@sf2s@P2B(S{*OG03u3d=)$H@tNT2BnntbnG^zFT$@m9fS z1&5WFtzW*yC+OX(UVOLmQ0_thpUO>@rJ~;5O93bxx0n0N3;&1J@I4Xw0E`+k$<5{y z<+_QL6U)^IntQ%*FH@%Pag@1w&c8$EfJi7vhc0MT@Vc@ej#`TE}FAy!D`s&n+AS^ z#|q*Ct``K}Ab<@fJXi%V#QWFCn`DvB$lF#3wIo&WY+1n;1!dTt%p##@gpbg&R~L1H zIt9N!*a!1s(I%$IgYg2wbx&SDqcaf3$mvb9KMbMIx(;=MeKUw&m<17;7g`dWOauy=UX)5uE>WA%5dsar^& zqM)VnV7w1{xNa)`NE=@|Y!!Gk{yA_|KKO{qtRepvUVB)ml-8fJf;4os&`wDC83Zxr8 z6lX+7kFm)SzdV@|%Izug89XRHGndBQBriw4!jyV})xp?cak6Zb>oDb0p7=563oC25 zgD=n-0)($j*EI+dQmx_U=Gxc13Af~=%m`a;TE&JoaJPKO2LcP!%gKG|5I;;rfkwH+ zZ|k^txQ`nr9I+DWl~NNI&l2vNHT*nxYeimc6!*);!?JmAX7gp+!{t?Bk2%akqH5jaac;dHOhk9n|X8lU6BtKCB>v9MV8G{~uKs zBiDF^yDyK$EjHet)D9~9$t&rn7H+SV{j#db*p)Sw#}tM~aN||0eo!gd*7v&Y%rqFMLA z+?eJ=5aasv5`5kTKQLJwM5G&&(sVQLw&l7JZl~w|Mq2~@9SH7k_)xgf*LiO^IP*7R zUKJX{p0p8*n>ilS_DN(-$g3)&`N+?ewr=rjVI%A}G>lN@;{A|ztf`1QYN_j}p~Nc> z?c~myiRdF{9eUv#N%U}k`U{c8C4urBn&$5mm>r1bt9~iY|MCIf7CPAK%&)|vj@8q3 zteTwDgBXM`FqG5+H%Kjg!xC?QFKddGw4FiWFeFom&hcDlRIf~1r8Pti*49c@-`p!0 z0)!*>uTeUiM#i|u&FCd~@Joa&zQqdzXbmSwjnUAt>B_^kzed&a>6|%eGA&#v`aqZf zUgv8A&U^UKMGtNp@-%Wm>|bP~>y*tG7XSK3w#qs@dQd;!fM7lcoG!dB?wjJK07FKt zq2laOR*)-X3_BnZgB0%Ew=WDEZw#ofU!EChxLtD_}!A3VD5UeAl?$1AP)m~$M1~BQG7IGa$X`(W~Q$#RsE)j>Y>)voX>YiQdr#F8% z5Poyh)$9DK0@OkeJ|35Fm!JTe58><{-F@hCogqBguf;4AfS{KXv;{(Rv_u~kb3&!i zD=gM_cQbqV|FZY~@jX}n|Nmn@&3<|MJq%9_^Lt|$hGAnEhWSl~(PS8g`8^UU)f7@x zL@cRRq4`lQtxQQ1p-_@)BovjR`kwPRw=+DR_h*lD-j~ni^8M%ayrkRnetTTc{c%6% zaUMUOzaE>IF^+re8GGy01o>05bKDd+Qv$*A%4cb%&)7?UbF567ecCc>f=rEFjW6I& zOKlt|xIa^_ac1TWe(j3%{B|nhDf^xrd&0(x|9|}S$!vb`XZ(vXOlO*#kw2NjW(6Mu zHc7<(ojEy!AFue2QrS10*)w*)YsmIHHn6D;4So${G*4jPF=k(SYX zYTo(nZiC$-HI>bb&Ye3u#XlzP&Hs4Z*`IypKgH-iek^;>k-e?S?yb^?&BtCdr?7wN z?0sPNPA&V`y&t=n=sR^5drNqnQMdjKXZ@C`&iYZQ>~}BqBU+}mFa00;H4NKfCh?yG zn_o0#f47a_p3U4_`K#d3?9C?gJ2DN$TV%LtUPCqp=Ff7QNo6mQ&HGfQv0rYR_qrB4 z9J~Bwso7pC-*~%Y+yTtnZQTtBW6W5#PqUZ6?1l|&Q}Flk*jsVz766o8pZz73`eTgx ziDTGVc{ro^Kk+~Q|FP_VIf)fAYbyJ?G8bH#%QsNWZ!MY-yLfjRgT}ODFK+mckS9*) zS$c~g_IiU~ma%G_;@vf~l`+eFjV+!JPEHn2?liG*F` zvY|9*ia5$PGmr7k+2;2`raRe{Ki^G}&pg3^C_S2r7aRA0xZlWK$=R#lX09&#M(FfZ z_61J%9sujLSP6T-kiV#6cW&jE^XyLK)8~iIC)^ z0Olnb?60n{AIN9kxd9ZrEf{~xiXWBlJY%;DlV$_<^FuYEk-(-IGk3_mw_l@61b+ z-MFmuY3Z}OR?P2nCV{)3#@umBpRqfoNtE3}O+IaQ=h>a7PWH0#m^THFOH zj{RBcOk+^GF()IF{VB_MW6)Tmp?MTx_kraX2C&o2B!6-p!cam;EzRr?9hnY47gL;ktF5?1w3= zC+ychnW^pD^QXAbGVM+V7MO4Q^SuLhY0N)sV0SHGXKHpGi$GrXh(+@9NJOPqO>y1$ z>7Bo^g6^^pRt@Kf(nDu^!&!Rs7j~HAEITcgjw8PwEe-fz@nhHSJQsgn@Ic zdfLy3n{C8p83W>te(~lXk7MugvX50uYcb+xv11gwkqG;_E`OJEgpthNMw)3Pvmep% z+uh+a7Z1ZQLfJo0_AiY6;}5Pp#>4Y)UxVFbT>N|N>#C;BV=wkE(5Pyp85K=0$H%8q zVxUjOowa=`gf;akpWMR7(X^XSxyx}r!Krf;+S&BrHjrjK{jTpzD9i+ntcB|e@jmiTxSE%7n-`xstB**`z_FW=kn%3=Rv z*}rREhSx^+FOmJb;mMw}e@W~gE8Z*L!|+OD|D5b!5i2!~{j0?OIm}88!(;9M!y~Jf zha<4Ik;J?Z_Ag=h`G5Iizb~M;-~!K)fi)e^dpzg)l-FwSH9l*7pDwe`@0qgC`mYbz z5a^#2Sep^S#@A<$9slCQ$x~l`b^1(P0B=2#{Yz*6@=`?3G%-Ym5f_~2k@6ooeG-Cv zdIozrD+bnP;JvAR6MTE}RM%Mln@n=$vM7FDz6 zEJYZb6vc}~-Uz;t`6DhkHi;{^kpfKOchhzm|iCi3}vEoF5jCEv+&r!H?cx$~bEd9wUO z8kXn3>q?}mG7R3&WR~yT3nJeQo4R;E{XI+WE$z#%>=_Zkn&rS0|jDOnQ z0}qUv-?3~NpWoTYStNIEK3A+8)!%D_C!ZgsTU3-r<(2!*wauc|`N1e6E;#4E_Z!do zfwdWOXXBLaIOe?J^}Z=`=G>Wo|2?n0lDvp%ZEm0aEZ@*wB44;qX^k2GwVQaAcn8+b zuvEyJ^7;bVHjdaU^5x#i_uuo!D~cs_o!9(WJCfyF@t(+c_|E)sJms<4)1PgJW5?qH z_sY~_n~%6) zXTn{_0DsTaz}n6PF}vad?mU-#sXDU7a67WaF5$=)!|KTPHW^2@*u0KxaY}Gxmw_)1 z8;>ba}|3yvMWQYK*ibDk?2jq4l7AUh@3)p$(u7q2bU*(8kav(5BF4(B{w<&P#qv{#Z$eAJM}H#P7wi)n z^0`pQM?#C=5jwO`s1tF-+ag|fSZK(8p+#t)2R{6`@H4PHe7A@TjtVWo`g4znIHefN zvHn=(&-+aH8$T7g;ys~$9}0CA3C&0Q1aQUhvr)&o_XU@6P-ynMLeC*z8rEx|{!>_f z>JZwYf7g(wq(Jzwh;tAdsMmph7NP$U9}7O~n9w5Bu>!sy>c5HlPoeH4j88VkFGr5s z2O`hL146S=XEE~UV|-R%d))+=gmwvt&td%`s3!~CIU4;5$2de_oWil56y!gJy3){J zC)Pg{<8TezF$>${T7a0R5kW$YFrnFHg$`{dv>oPs2g)I@DEwxBzvO1PU%2bvO_gqmCr>!-?2gNAMfL4TT!P z!VkfEhGIX?LmdT8#d0Us8?jx~<;WAtUx)DHR7YqhXlH0FvP-I8V~Ib?Ey`I zitBDib}z)eq5Qne*WU-qUMiV!KPbOO;>-I(2S5ix`E@*BJ_tG(Is`ftIt)4-Is(dH zXYlr;p!Y#XLsOt*pktxqpsCRD&G&^a&x)k~lbQyFxbOrQb=p)cap(~+}K_7=c z0nLG~f<6g-3c4D)2D%pdG<2P*5!t5P^}lQ8K(PJW8~}&0j=k7r^T0?kC!WJ7fS+j& zjKj!==0I0NbLDdOf|*^_-n9ntv=Gbl%>li$o=WD#X6u2UpUruPQ3yQ_Jq3+6Hvp@{ z&3gRI4dO6Xn*DICqmsFy97dSAf$r2%WbRN7BL@C$sGqq39YzQ=6dDF~SI2SGaRXeG zIRRLHXcF|c`2y5oq#_=9fRA6Sk;{d6ILsZ=VI;r67seVx|1sZc_*bfIa^rda%wg;_ zIfs#f&6ewtYTE37&w_i#RM&YHx{)t%7(1W^(EXc) zfBZ$E7d8vsxCP3E9hb~*Fb&)$e6xEj-v6?QpX0*fknLPuxBI$%=5k}*0iipfJE8f| z{m|Q=m0=*8mxxO~;O$)Vo;xD)xtni+pNZuTXeDS2Gzpq~Pwcv$f?|=U5LyJi235_I z{<+AL0nLPNfU4$+J}&aaKx3h4P}Mw@z7cucjbAL5$3YXI8Bo>rth^@HlLK81&4(62 z4?~Mhjo^*$c^t=bcl8$i#&L&n7+MVV`(60fdY$m6Z>!4yr@?0(e=M$5hyCBlpPCO5 zC&|$8z%!|GL1;_S&Id+a#F87_gD+1qJT2|za;afx=L@b0V%e^_87HwfNyS69lPbBE zXeaB5Ml88uHucE-?a@xw(*?0yPn;RMug4p8CZL_Hr!QhzPZILTywaD-+wtcE(N5Mg z9Ir6wjaZJKU7oV64IiV2&`yr?qlo2tCwITMU$QP)k3Y-7$LKM%ljFP^u^i`b zEd8?P31B&RO;4kp9G@2uOMbH%C&|^9?qAXfWI6blC%FVEA$lBb;|PcNH1JRQ%|*^;NHCC?CddD^nZ z>=DBL@jU%3d4^f?q}t@EhCHJ!c_v!&Ommw@ob#(A&s0mEIhH&NZSvGWo<){C4_op) z>MqY8qMlIXc>?X^aqMZtavp9%9vSob!XMKy&NbO{zU|jr>Uqgh&vr{ack+mOY9Y@n zmOQ&GdEU0kQyY2qS@L{n$x~>_Q@UTY74dBA@lT#lt+Cy8W_^}}A1fPS{c@bGv0a`9$g|2k zKRL~FJ+lmJ?CyF)j(0=ki9|iJU(tx=c8@XRJJ*N&cx*m!;16d3AG=sfo*tGwGjKkY zd1W2)I-wErlk015_Ai6GxSBMyY2EimcIs->j}hq zQhyQi;#Dl)0euC!9r`lV-FkLo`7Wqj5C85C?@z+7Vm&*t{4MC4&^MrYPgW zySAt=#3@wv$BB3cG`fyhz6H7ydKg*^b=RNcSnjSrXR!Pn^a9kcF6x7ZLY=0%_9vNt zd4O&2oAvMQkI_KHE1@9`MZ6Tc8oC3T2X)t7Ph8p3#9@qX1 z9U|&<4ihT3PdMVk(2U_?`FyC~eIgEohCto*$K85D(XIe0*ONM0zZ zt?!SC*mYc%P7>=chQ>`0v3Hu#)cb{orV9;+t{%%{*L;O&7d=t%JD@V(2D2Uee!v4_ z`A(?2eAmVa?l`zpP?;|gbq|H6Lf5&eF9G!@LQ|l7SZ~;T&(4Rw4NW(fyY@5hDbx?m zhwgx0Fx9o)hSj3oM(7r(Y*%c?uI)~N^IId@M?+Jg>8852yS85NH=vu$enn)Pa@YTw zz96`K=uMM%%@?&*a66!KKiY{nVVhWf23i8W2~Eir%d?>F_M`b&?(Vpdi{(3@dC*f( z)%ouo$ge=z=U`CTpQUAmzXE<}kch*f(a;^xBB;CmY$+$&=R$Wtk3&`KjU6EJqzn`) z>rF!(nk@Wc=o#o`s4-Y9PldXxHx0|vp&8J1P}O>~yYqKo9LC0>e7&yoNiIKbE_WCm z66%cLFM|U zAf5)D0i6k*4V?>J0L_Ljh01<^a##W&iUKzX_H7--Y;X=w4HutY-0e2fpaPQ`$xWe9?c`d2zs8?&^#FyZWM^uDWhB5`m&!NqAvNqi4kQO@AWYZ7oT0{-+qDc4Nv1NJ65{z?D!b0t!*HuLePVNCwZaPc#k_hqS{Wd7#C9G` z_dZg7!*L7h?mU)wQ*e3ErM~=)35T(wjL;%Kp$P#(FI0pdDD+y8(8TK^k0Y4JuJhqG z+KsFv_J&{XI; zQ(g01YbrRSnNTNmuQ|+)?51Yy+AadzVQ5-&-rkkVixhbap(Rk6KddF)?naDg=iNzY zrOrZ^n(DfqxGrLO0#wWcN4C59;1?(OWavidh6=)O8ZR^#T4<{4dSiMC?hJH(ZxLTM z)s;7r#PUkeXs8^II5T!_mjJE^n$Vx;apl4WiTu&fkx-f67)-az86(>HjTI`d9~_9c zK%>Wr<#EttXaV#T)ZO*N87w~sy#NhN6?s*!ADrN$zvu0Uaj;n)ZnM0p&GHB=zj=mU ze-xI-xT!B1%X8fMw}cV;!ybE&~#`fwD2$F zza=!q{D9SAT=-kWLlKAH7V#FWHxHVHJlRmEk63R4bU*aAukcIQkH+}%IIpbG7=NLO z(BsfR)Sqao>vms_<;S6w0!1FbYC>;T7wTO@=zRF=8i{xv{E(U=F03UqB~)k*@`XaP z9m0<*FEqNc(58rs5wEBs{9@!g1wCF{__v{kQ^YyY>^jI-S7=-Vp&QK~$T*B*)V&(I z;TOI=T*qZU=0{XB(LUN#SFQkD47gY~Tp_qPa0za>B5)(YopHxSinkZfb9_}gf7DMgUo0^DvpmMzz&^{|kEI-#<=wWbY zdhwW8Kk9b$6MQmMx8IeJXA9cf^*aUQS^(Y~b?dG-9P15*%Jt?VUI+c6jd;Cs8Y*AM zRBtPMCsg`_5Law1mP>yG{4o}O3F6mJ-pj9m|G0(!8S!>(SAxp*W+I+z@z*2X zWbtzmzh?0dAuhhhcbykv^@~{lD`)j53y3zK)Zh%LZY&jH@D+-@mACiEAO2pQF~X#ElU1@80l7Q%ih2 zLewWeH*SaJGVX?0e$Lz75)Vc!Kj$5Wn13eCA0rVvL};WSmamIa5zFuAq#@?tW8sf< z#Pajc48-#D{Y=F2bz&A`{yh)=n2(r$UxPog5%cd|@W+FQtBcTBj+lQBfESK>bEN_A31F&4i!x76Pz-Yvg;Kw0uiTHlRtq@N} z%-(J=AJY-HMm!sF8^rSv}m&f;k@b7~!kMrs9XTz7r_x12!f-jHr``~{BUmovE;9rN2 zVJ`hXN##zwUb(l*rR+G3_p=L!@sV@C{HD$Fh~fX;KFwV2FwVsCI#TWwV0k9{FYW6l z?=ZZ&i199CYOGvremAf*U{34X;uYVFa*Uk>KU_wd1FNm)qBlUkF}qUpsyk_)XBQrn>L% z%i#7Z^6!BwR^(5D`$>_%0j^9pF@MUM>c0L!a85P8FRInk5Bvnh{11Rr z?Qa%%)&4FAuiD?W;8pv(4ZLc9_kb^gerl@w?Rym5w~BlTxSLSB{pB{epg6VjU3qYI z75Q**F^YT~xPef+{dXw1G(~)_?}n;q}f9p^cD9=5q& zst8_QkJ;sK0A98JHsDq3@1a@$P|f<&HS3?JS^uM&^{>~g|256}-_@-DGtK(XY1V&3 zvwp80Vm_leHETo1hJ{BHwZb^dqPtbd4R{b`!@&$rZXcYJ=-QokL)-cr9E z|B9u4JN{kG`ajjI|15a@v;Lc!^#}G6^I!G#P7Uy?uXmb&SAG4`0lez#m%iZT_}iVo?gKB! z-;SROUd}f=ehGLv-|YCOz{~k&$8Q6_3u-Tt z9(dLMegIzf*KR%>126k)$6v72Z^!>;so#z-n<(aSHK-jQ3NG|zwfWx+d?&^HUBL}j z|3YW+>XtJdEMylVZ);N|?W8~?GI{B-bgzS-q}5WGBJ+wp6`%lT=?Zv!vq zryai!yz2OT0$#PhXEf{oS+jnRzG8l=*6#qXTE7##>h@|0UUhqQ1+Tij27y=YZz_1z z{$^^{zf`mSr#0)@3SM>m-`1@EBhC6xfiHpD?O&I{{jJD*^b_;B0@N;lRd7ud`ABf8 z^>+oYT7NQl)%quZSFL{@csc*=u2-J`FXz7h8=}aM1gBd6bnvS6KMG!T{9gdCI{rH~>p!Gf z|F__Oh1y+z-UR30UujZp_s77i z?(ZAHtM)e!ylQ{n2d~=SYol?e&556UkYA+-@}ey3tsj6RIh-S^UW^*9`JI$+3`i-<$SZ_ztpU!M6;fo z;N^U_tH*Dkn3r-s+wnEPtLAU4nLh@+>h|glUUhqo0I#~eCWBY)?*j0uws4s|5o5t$0r`V>i7(|)Nglw zf51||9Y4=fza78QQokL)5xm?V?fAFAR}JI86m%FbG~nv~`=v$TPeSdEZ>Pciq{v?d z_pc)FGf2$0%22y{s)K8y$VX`A@1mK1Ao%f$^`vX^3&5*xua)3cx7P;ns@rP^c-8H- zAH3@J`cyOj*P8jSfmhvLo`c2wROKszR~?^v;8n*bN;7}FX8s}IRr@z{~y1 zj_(a#?q7EN2=H?L+wqeu_1K-i=345p;~%lqW5+*hsmHGV?Us7%_)EYY&q2+4PHEQjqh>vSXx39^nBMhsRq(3wCmg)${AmZiH?*&*?q81# z05<_T(T1O*$uHOB*J<*vY4RUw@@K&R0<{~TKQ;3Q4j1!Bb^Jp$`AAK^izc6}$xi?; zzrSR6{+a_`et*f1UkP5me`5E0?akoj`zLn%KJfDW0l~AE{CHS@@BamO`F@LC{-43$ zmhH^n1INnrF88}IMu_)%tsbSFL{-c-8vTz^m4u1zz5- z!*0GU1HT4p$3Fw^WkvoqaH`|;9(dL9ISO8Nd@g9#^M__V0VBnHR;|YgUT!bD`5ysZ zZZA8&3wXJ`?D&D;$3yM-2f)o&nD?oCDhZE&jf9|5mg|M%ck>%R@Y!YDB> z?8dVixClkQEx5jl{7`UH75O>fRO` z<8uR?*J!bS*?s>c5L_tKuD|ubwSwBspJ;HspaV>GKmJ3(-4C_PKMCA?MLrwcDne&u{pJM((aA%-)S*)0so0&{-fY7LG6yu*T4mj6WiC0uLw>x ze-rTC6!Z54Hyk?JRQKnLG2muEKaKeB?{CZj_XJej@7#Uj|V>rYB&B9H2H<#*DB`U zsLAJR^2OjULT{Mrem?&NuEPCdyM>zSp05io2HL}h?*ncEG{c6U1@3X^(>DBb;C4do zUN7$hcN}`wRQLV;4%{uMXBrCr;fnlNa5EM8dEnMU z?T*ju!O81^?PlzLd|tKicKP48@OJr6f>-_g_xdg|?^XG$$S>bNv3tMBm?-kA^0mQt zfZFBnqRCGJFZ*kEy|L25+kKwCRg*8!i8T4uR1=b z!K;qXFW^iF~kuR1=X!K;qXOz^7X z^B8#5@p%cn>iE0^UUhuF0IxbeSHP=|kJnT&A5_PuI(XIbi3G1YJ_+De$7eKn)$y4P zUUhs{f>#}%&EQqXXD@iw@i_`!b$m*|tB#M6A?AbX_*4R~IzElTtBy}Bc-8S43|@77 z(!r~a&x7Dq$7daQ)$w^9yz2OT1YUJ~&VpARpBvy+$0u-_-u$TpUUht;!K;o>Kk%yK zGXcEn_$&agIzDT_tInTSz^jhW2jEr5=QMcL@wpCOb$kM*i}|2BK4IWh$EOSU6sXi#tVyz2fn8NBNLwF12A{io$Ee*|hbKaYdEq{v?b z=Q~qv{s)4qtH?J7*F}-<1x|H*27_0fKdInV=g)NTs`Dosyz2aU9K7oMc@Dhl_`I&k zzpu$3*W@p0^5tgf9ZyA6it4%CjXcwzeSVZugM?NPm_OAlYd#0e@~M?p~?TO$$QTi+gEM63R=M(Vq`)1bhuVvi(I0pV4 z)QV`Z@|g@%WnI2`c=$_%V=lEH`nBAf_L}(1?%_(Y4YR2 zySqQCHGe-%{u;)`YoQpYGN#@+URm=8f~yI&8_#;+S}XD$!1Y$-lfg|>u7Rpg%o zw?~mL1b1GM{{>v7MPmHz#9^!DT7(i@>RFuhrmH=g)TVs`KZ4&HP_! z=Dz`6wZDPcV!MVz?Y3_uIMwm#1zvT0Qo*a%KOem6{8jwNwj zoZnKhzgvH=XXOLIRe@H6)-=`i^=UYk%lkn^B7O_6XRW_akHT_!e=0k^19*9VDm%U# zc=>v^Ct~^f+FDN{mdpEzIlGB*a>ow@FFzl#%Rf|;zYn~8zsRnh@!;kCH0=1v;N|cA z?fB{7=RocF#o$&b@;TtvDe{}ZC8J`N^L!k2gAIV$a zKdBB*+;`GpG(vphDn0&@SgyK1wgVpzmG!vWAA4!?$(r?y1b@F`J(Iw>JDynQXD0YX zQ0wu>$}a`y?)9dXU#ZEj2EPd^=l@nSjxhWz$G2VJiWK=H;La=Z7s1_DAl`{(W$tEAl5b^Iy{B zJsuI;Hv}rTZ>Sl&_O~TC>-(SX_}-fQIPh6eyL#q>vuKv1>hnk79p>%J&D{@m0ZjbrJ1qB7VZn^;;b*4^`yD!AC0c zEx^myJJ#>xS$BrKXUjDr^ zJ3a-x{Cj71`~>jw@0Z!}>ENp+iuq&ZJuK(18Q|sLgR|r3ftP;|&W?Z3rXK73S!q*` zm0xR9kCopDUd|u8{p%%JkK+FF3V1nx?DD?>ep6Smz2xV8?&kB`;P)!>?}9H@pYeGa~^BL5}$f#B`N=WFn3iu?ue3l#aMI`jL}JB&BLUqX9#+v^IJ zA5!G6gFmat-vqBZpKpVg$3wgA>#-8^QgQzKf|tibyZiy*n)TGw ztS3yfo+g_0L}=F2Mzfw6&3d|O*3(n7o+QnB25HtaLbIMRn)TeTSajaMthdx-$8WUMW5>UwS$#y>&tIDLcswSqH&oB> ze&AKFe}cfPUjI}CuX;US4ZOVmu{(a&1~0FF?Dz)Y<@ng~&A{*KrFMND3I32G-v<13 zMZN=gxxMV_j|DHcmmS|7yxd-Pe4=JO{Wa?uqFK))EGSPoZW#pMsa$%WnG~1uwUk9e)D6++Kn=40o@`Pixlmt!6zXWIYbI^<34g z=QquIZfn-#^|+YFs@uyCyz2G}27mANaz8$mH0!CRSx;@vdg_BO>8rNCHv#{*BHt2x zg??)J+kscDCswnb1kHMqz{~k#cm5p?Ud|soemrqsMg~LUbUWb-~$Gz&F8A%>nZY1@EsKSM&MOH zzlZ>@`uRmPc-7A@V!_M#V|RRr124Ci9p4kY++KEkU(I?3YSuGMv!2nK^`vUn^MGbO z8JhKEYSuGHvz~>T^*pFq&%>JaJfT_7TFrW%*Q{r&r5?NMfgP55?D)4V_1N)yW&Y%Q zkB14DdF=ZA=l8(>q{x2={$EA@6Y!M>snv57d=o|f6!`Iq{5P8Q{GeIS70r5n2Y*s= zJa2(l-CiC!V%$}?R~hiC+bamX>h`JxUUhrb(5$DfW<8BH>uIT3Pqbz|oi*#}u31kX z&3XoD)-y`8p0S$sq-oYOMYEnv&3fi(*0V&jp5>bLJf>OCQ{d(CN1k8YoiCmNFONTV z{6_Hd_+!U!122z1cKiDDk6rzr zS?aOlPgv@)uIK0Pn2do9W?8S)vTwxW<80T^$gIgXP9O^ zDd1J#Pe=o=`hLPR&HQsT^DoiN|A=P(r!@1g*UY~K{2r+My~6=Bc71*K3Ak?+`4Vuy zLjQoazrv4W5r((r``iCmc>gCk7h6hMd55{&VN`?4{GYnXUrUp3pvgDUca|pqm?pmk{C=q2d^-s4oFab(oZo7( zo$c~h1E*R~BzV<&dVyE1Csnha`I_~t(X8in@CTuGA(BiPm`Yteks&$JRbq~oFczPliveg-Y?Yd?_ZqJrK4RCUM+0|2a-M!n_j;{$mc${cw$47$i2DRh+f_o@cE&n*pda}Sjs#wn|aJh>7 z4sh~*$kyYV;d$?b*saOGugM?NZ));opAp+tb$se;@?F8J);~&_K_hYy65#Zt#`Cj0rKr>B^Ej?&j zuP5h&TMd2AhTj726KJu8zq2N>%H!bvg1YqAQCgb$lqJjt|Gq#oIKvzU2lu=aF!0*1+%@=s0qjOIX-UA();9_&dZh z-wnia+`XO`{cQxk8DiPK1L6=YPev@)HwAH3EMJCL_IDj(xxQ_P<#_Ey9E$cQ5zBf@ z5LdzSe-X>^t@whdU-Dsy<#fdI0_()iN!$bT90;^xf!^vn@ zzK#cs@@Pr&*Vy#r z`=E6kZg@nIe09$@q~Fdnn)Lg5wkQ3Go-qdBm#ynv;@OGhS9`{i{_CD`r2mm;chdjP zGlBH4dG;lJhgW~nce0(49}BJZhj|So`LuY}QwdQZ<{Q$`iVmyCX z`7z$d%`x@!b^?if%pY(l;^tb!&BK=*y zhQZJA*8Uz~ehb1s#{5Xa|IYVqqZQ#_^F6>^*ZkGkE|C7_GCz?1TV<}0{*f}*NdG(5uXwh^I-b{<-;?lt{eCt25`I;`zsUS8 z*$qhfnZcUBvmd|WaP2)~5+{LEnO*R8Vc$a-q}8^!{HZ|C2^V-ewZ z_n%GXdBA@T=`Zo0NBX(`3p|z)`QPm9;@FhAeYS3KVZAZlLUW%z-uHwJs^+t9|?Gq^q&jZN&2q^>?ZyEfIXyt7;gYr z`*kv4AIbk1@Gj}!2>5{XeF6`9@H4YDf5pH%cQ>`=nCmC3;M(3d1Ae**c&hWio-f?n}YuK$Rqq$ zg8n7*9}V*GWVlP75|)1_;a?8&B6*)+AJPvE=Gk5IHxDjD@-2hQl76S)a-=^bxIF1k z4X)_9n^f2#cbr2k6!iKPEo`AMXIwftn#cT|{4`YkI=BmF)VGD-jb z3bRN*yTTmOf40JW&(p;C6jWG9^1oNeCVjt(OGrPo;!@I&skn^vlPj(u{nUz&kbYK0 z!#GRSzq;ZolHXSGDbg=sdHBP6UO3^#|E}U1GLKQoFuo!31Xfx{@--_xOZqXDHjw_< zO3#!2f=U}bza`fDXr-Mb|8k|>q`$w?K2QD%(mD>uD!oJUzf>w9eMjX3q+h3UA?Y`% zTtxcOl|Lr^{*^x?{glc_Nq=VLW2C>b@)xB4LgkaB|90gsJ^3qX>-Ze0Y#1ek|3&4~ zWS&cv&yv1Tty~NRetmQh2ZyB`NQ)!!arK&PtQLH|Hmq~$o#)o`G@p9s(P|_UR}3i z<*H>!ze&}yq#s)~ko1RDEl2v(s+K4H?5dSW|LLk#NPk<^5YpdUwYt}zM868EhLZg8 zsEQ{LK>5PbVyUu?-SCT^oN8*kp8rg zR$f6w{Yyexll@jMwaoa8Ttj3j-}YWIBm%?K>CT*CX)WhYLiHRdNsbcxb|y7waFx(!}t(l9GHN&jGt-K76RjeOGg4c$xnHA3Gd{g$Bxq~9&{ z0O^kjEhPQS(1WDEH1rVZZw&o}^xq8qjPySWJ?a%l%%5*UkCA*(%`Zs5Ud@xFpHTBl z(jQXuH0jT+d6x8_toaS;@2q*A^bgg%K>8W~X{y#M@lYXUIS4h8M ztuEfwaiD*#Zlpi8R(H~0A-;Cua^BfkE5W-vu^sbj&GPO^_#f1oN9I4r^7khAi?tSz zym#$v(hsk_g!Bj2UP}6zwU>GKCD!|J?G+^dO6^BT|C8D)y_1MM-`9SE#UHn^T=7dl@g`InqqNdIl;R?`36nM?XtoZCs? zzs{?q-@MN2q~E{J8>F9BXQ%fNVtgK~vzz4Kth1N&KdrN$^v~CMm-PMX7La~eUBeho z)YGQ!0g_LyTS)q8b&E)UQQc2SKez6ur2kRf&%8$w^?XtHD9M+w`l)&HYu#fc?^o{x z>4(;%^VF+%isYNu`-=1j)H_4^sr9}l{pIz}k^Wj%&uC&Cw$}TO+#`0m=Eb63-crW8({&YU$H(tA8OYRBKe5=4$@DkUy<}j)~`(Z6YEzc{kiq4k^aN= zYmola^=p#;YxQfB{=xcnNdI(wdK}Kz4o*{Mp9YOczhZ-CKI4gb)wDqil8iCx2a|qU!(pU9 zx8VrVf3o2y(%;r_H0keaIEM7UZa9wgZ#EoH`oZD%lYWcviKHJFK8f^4g-;{>Dd96n z|H<%~q@Nc)oAf^npF{dzhtDVdpTifDe%VIZq+g@a64GzmXesIUZ?uf`(;KZI{RNF4 zA^kOtR{A_j%){J9Pm}zCM$eG`ca7GQzR~zO(y!e31=5dbyovPNH{MM8{TshT`u8{9 zM*2${zfAhiHhzWlw>N%`^!GH*BmGYrze)P%8oy2Ye>C1h`W5(>9@)cso^9gBk8QG# z%rm0NJEWi9>FW2EnF zdV=(0nw}#40ZqRm{gkF>NPlkAuStJR({rT1sp)yrFKl{&^e;3ujK_%m@gnozBK#}N z&nNs_O@AQs`!_2g{YuR)k$yP)lImU}f6HcA%;!Jn0{AUXk=mnpYcO^(hqBuP5K>MEg}7sR)%qy z*uJw{EhYIUTP-8~7g?SoM4tRsD@eYi)gz=|C2A$Lt?8 zjG~X*4@Yey`Av*JM)Wr~>SdCDC+Zc_KOFTQ>7R|_zbtV*k6ex_^yS~owVogUiYg-c zvaP=${U)tXk$$()@Mk6LTh^5rndf?nt4C6#*a*H{0pr9`s|x8*8aZUCYj_5+6*bvgy4^~ z8A0+tw7HM;e{YjQ`oV3-l75Z0sb%;#aIJMlw4G3<1>wiFO(XLRZ~Fl0XSPiz{hYQ_ zNdJwt8Khs-b~@>Q*EX{Z|3hko>0Ty=6KP{J!XSNd7RZCzjxUh(1K}zcIcm!3VV~Ci%MU zJ}3R2?T(ZF&~_(C|NeHTNPkwlv!uVW-8ZDawcWR*|6V)(3pv;A@>RR=$yb{i@M{yRLqd4&Ef+zJo96_wL~5*Mk^`sO;C5@PF)3mCWN2Q_YWmbKjb$c1$SA zw~VPp`iU`4(jOB;ZNkSm^H`qy2!CJ4;bfkojwAiX5&VUY_mO9;ErICJmrw2%Xai?_B&*?OU^tW`%ApKpPrj!1GPMM_tc_(`RI@>9W zv-05A^o#%@_g6zTax$e_8sZh?)HP<45GhL-7fmgCHw*1 z3}Zgw&*}CvnSV>StEB%yw_iyAT(|3_|97|FNxx>?P128vyG8n`aetHk%D8_?|MfTz zcIO7y?NuD-Mf&IBd`RCX-Y^ys<5n@g49Pc+uTJ{i;`yC0T%%05c?(|e?lc^>OAmh{*3c!2b`^++fEy*;Lo z{--@M%05QabGkiT+Y>gI<-Y*7?sp$1yhQTfCFGL+?S$>5U!~`cviz6C*8B~78phLv->v6sWS-$Y z^GJVo&o@bbZO@&gzq{vd(m&EOpY*@#xsUWO^?Zl)|LR#l`qg?JApP)Og`}U@i{Amr zb=)TPIz;*ldmSeIoL)yre`Bwsr2lrW(r?@+jPzsru=~{z z{p#1J0m+Z-)0p({@6*hm-%-GNJe<_01V&p}c83m!PJu3f zu7W-XeFORt^fdGW^k?XA(7&NR`J#>>XeDS(XdP%cG!ohd8Vikw_Jt0G-Um&EPKC~e zWOU4?&MXzlHt;y$&>(0!2?~cR}BUegN%@^&LX|Df9&NEc83*PtfboTTri8 z#CQZkD?@8T!=aJTcF-QsWavofc<2;p7IZOm1@sB%GtkY@SD-tg`=AG)2ce%s&q6Ok zuR;HU`tA_@3x&3Tc7pbYra-4bABE;Z_dpLqKZl-%o`?Pf{SA5>>hr3oqXM)6v?a71 zvHpeLbMp`Nda_11(ogZ6-qfo4P3Ky#r7peLc1px&<|FSHFb z4muE;0-Xe11bqa$8u|is2Xq(oBj{;p3Dh%BtiLw2Ewn3i0yGQy6!cB#2hiit3(%WT z-#0}5YS4Pn=Fk{uZ|E54Z0HK;YUoDjYtVhr6VU6>GH;6YR)9L8O`+|fanM1~vCwJI zrO>CLFG9CNcS7HTehfVh{T6x^dJF3HmZ&cfS_xVk+6dYT8Usy$4uXz_&VbH`E`zRx zZiBu7eII%h`XlsrXaLUlPG|?HybkDvco=j7bOm%3^jYYO(3hck&|T1Xp~cYi&|jhd zK!bOQajOb-Lf7E>u`%KpXdE;VIuJS>ngX2&oeG@;T?~B~x(50ZbT9NM^hfA*XaKHf zYe8E=lcA?@9rGjfcc|Z9^b;Bh?FAhL&44a~J_X$Z-3={<{s6rR4c;f#QwQ1#+6$Tn zT?~B|x*K`~dJ!75U*u~5?FbzKoeW(JeFmBbEr6bcUV{D$t?-V>U(Zx$=_{2XxPBW3 z9Rd9=R$NbvLL3tz+PB&)bTs^N(DBeT=p^W5Xa;mTbSAW*iC9k-;?LWOcpl<~(AzD= zdb1H%#`W5Rh?hZEKtI_a_(u^x4qXLZ1KkSl8N?f)FF-d#UxIFHBl6}Veg*m(v_Th9 ze;(pu^yf{)3(t!7yAZz(-3#qlAeO&__EB|&x>*R z-}Ss0hyPvAi*fkh^}HB||6R|Earoc$ycmc7f3N57eE&gSnC_p$KQ3|@1<-Tm*BKl} z3G^~Ff&W1i#ub9wk2s}=@DIZ;hMt06fR;e7L2p0}^W$~by1Y%_VfaCJVm+J8!X4Rx z@MT(%8jME;0ubG3KbqXzO}v; znWFt|sNAmMvxG0V>+0De&V?Sw>ml=IPORcDE%DcgaoZc?ye0m@62J8>-=Ca@C$`BY zOTMcXUR?j*;jQh(>jbCafqd63?fz6>)jQixJEIK8%?E4NLymfY>2I<5k2m z|87hBcM;3}9zrbpdkV3v=X=DmKbI~1EyR4U;}4&MVtm=3(J&vC5zBmaEpZFPa(i{K z#61wpdIunu>l=xf-R9kVOh+vHKhM&B8DiPrHHdK=W#e_kGT(=Y`QF1H#}MPs4;Vip zmfP!3%krQiF&?u2)e+0_X^dFb+X}JF7jIeK7qJ|#;fUq-pM*F>lxr+SEZ4sZv7ArO zAeQ5ii&)ODeHQ)$#Bx4-j98Ax*NDxVg0LDcSooh1%l_U#EVu8!me~K07(bcMfmn{8 z6S1tn1!7rW8^p4|oh)&83%@U>4y)Q}$n(YPmgTz<%lhB5fazQ(g1Kd<7mF2hD=6gyuk3o9dcp#Tk)jB{T`h2}zUn(A8jsm}x#RV*|Iy5T6=e=hVgH0BtOUGs#W7F<)PwI8vj z@7iudytw||2(`A`WFB8#UvK%f7wxv7-HrnsbQn9K`Oy8)+n@0>jl;+{dDlA98}l=j z!^nUxh05c5@@G81YrD9?qFv$yp%H_)65J56d}W%@1hbth7dlid&yk$j&Xv3Uu;3D# z^E0l)NP@10?t~UW&p>ZNjTXF}Yu+<^MBd~Gkv9!`9C`s77b%wSfbNGLHq|w6-cZ4N z4-;zL&RGfEcWoDaO7JnzSZFFV6Pg8`Z>nn^=X8fTPWz!LGeqo&7kUcu8E6S~^;l1` zu8|)6O3-0!2o!qeuu$ixLNlPVF4kq7(~#Fa&nAfL7!T(1M=_Sm>y|GO%j=S~A4Pk4 zopct<<#pG07C!W4!Sky5W4igdtyA0_nZ;R%Wj%`!%j+ufy|O#kS!*oqmm-$+tVAs9 z{{!{#S;QZ{mqb1MsLLM}5X=KmFOs7P;=`$^O<>kUWD{>+p4=!{s_I|Q-p?>NMA{nIV+VoRKZSoUWN zV%guFh~;(pdx-hbmp=|$;x7@)*GWGjmi7OGxVC6+)VeJCTM2R6FFbY_>Cg;lCNv8= zADRta3S9wR3C)47Hq~`M&F;=;w!^sgIDe(*etGy4e7VC&g=Rr_n(EpvJ&E_2y?@bP zXbDuDUmV#&UC%QSdj%g2jfEya6QSYz1lJTA4UI9?wH~{?v1o72o3~%AXQ!$9dC$Eo z+RMBd?}>OmG}~1Dyi3tu=9S}+TOji9Fx540$Vst02kLi9#M#hdXy}*1mwD3=r<>|D z#`N{%6V;e~L(JbX14X<<#$!CqU3#UAQ^iZkr)50Fldr%ijWYwyL+mEGe74x*w##^K zpn1rY%g2oi@Z{~q_ePz@xb!k+ydV6y$(~_+`9T>^73)7L<5}W9o8o(-P9t@&sQ)yU zr;hP8>-j;(SfBV_D&xnCd^hFtbYHVR&)>v&q|Olcd8r`dOmDNknlhdx*4Gg6_%#uGiwL*iR9PWLo7pd6pclf~a7eh15^j1hkiph(8!MgLC9 z_{EIW*zl-fLb-E|}i&KVC4sizRuO{Pk zQGb|>CyVXZT*euq{`NASA?hjRGri4Ah zs-HJ0g|0VxjPPSj)z2G?_SSk+#*4hErt0S{oIuxG06!y*FW1kTX|{8{o-BMotgpya z{k(_K-Z~BulSSSrQ}y#kqrEk6)Krl-+Eo3#F=%hin>0=2O*U0O?+vuK=Dm?A^4>I6 zKksd{x8_|sOXOW)s(#*;Xm8D%Gh5_cZK{6Wb!czRo0=u^rkSdrH+F$&FZ0GO6mb$X z*;M_!w-?`=R~~v-*V?(T9qYEe&Y!N$_xSZbTpkRwN(KmS$P2MD$diOzd zpXZz0-}8Omd;hrq=pXLcWv#PWd+${~YoD19<&6)eect=_zvLVFp&s2{{*&+jhVM-H zK>wE<{^H9NynjxI_VT~y>+*sAFTPhl)J-2s+y6_x^?&30*L<4`-_N(Tz5MUz```op zU-SKoZ+`K6zQyh3Klzq@=>IFPapFV$ue`>&5A~}h|8w2E4}Yk)m;dDZzpb0^2l~Iy zx8lfqz9-tt|DJEf)%Wy&=WBoMeLK@${`Y)4e4zh3U-rg(zRLFUpM1kU^#3K_=nwV3 zv5X>ALrP*R+@a zxc3$IoRvp(~tU4{w0UV5B2Ew@}GSFH#z7G@A>|HzB$JC?fcuy|DJFD z2l~JB-C%ytcXNCB-}5zD{-5Xj-1@%#WqbMG^Zn)n{om)Sta;BjtiAjv-)}zj|K~&b zcl_+P5A7Xl-}Cuv-FDW!ujjOv|KzKzf8YPtx%fNZln?!*8viHXZcXp&z1quv^8MfT zgZBsezsq4t^LxGt?d3oD&iK&(f12;k5B(SK{?GZoZGB(=t-bswU)1)#|Nq2yz=!_G z`~Q>g!Uyl`DedJy`TlQmc>M4^{om!V`O*9KZS5tOx%v3-qXa%m;G+aSO5mdeK1$%D z1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXa%m;G+aS zO5mdeK1$%D1U^dOqXa%m;G+aSO5mdeK1$%D1U^dOqXho{lfb+lJ?X<;@bOb8{r|tr z;{PlCsj=Mmzp?-K_lp00E+f@kKmErCv_J~zfGePf4uE+Qz`fc*eD zdIt7NPu6}a59}8!e0zd@i6hv5CINe^80`P$4E)|A;0L$^KT-hvG*6blotB4b+bcJ4 z5b(gEmlHV5a0Z9Pj^MD44-SPc;P9Ck9By&E9o~3?!>_I&AnqU-L_jcA27-CEEyM7* z84?hrJA+`m2n2h1Ah<3D!FK|nUqu4U*zq&F0yE8#Win_>mI#>Z5@6nmLDIMIdYxgQFb}9F^|ixX@e~Lu~1^wqF2_$3)_9Zs9Ym}nh$e|alqLbuApwZaia=zOu^sFs&_SZ<^^`OhI6#Nt&d_0sJ9OA! z9Ml~LDo}^LF3{ni6gpJ9L5CV69L4^*$}j$e4sCqsK>u7PnKk!I?3nBUPJIbD4YB6@ ziG1S);H1{KDoI_67@Q_LgVO;XI2{pzQ;Ptcu{${TlYsLaPjFtYzG!5fGsWQi8SVME z2uKf4Ak%n27S*g@ikWg9=|F5|ca{{o0y*XaL}Q%11N(MF<*bR9wZeC%{q`YiCD?IP`Ch6ev@iGmIZ0hEi7%*J)cIJS_w$S zJdk!3g4DkgNJoX;Oap1SGf3Ac(Lj*qxPY|Ky2pyUWJo|-&FxO9RSMD;caT1@W!@ym zgKAbJkb+!I2L(ASLehWNPd<-RQfT+jVsPsy0k^>daHB`eZKm$AfE;P#f!ji7)-7Fc zT0&}KX#ZBZoeQ`f)(Gbko7!l6N3Muz{j4jv>72pso;$exiw|!9u?M$5jF&cHp`QXJ zg1d_l+&h`hOeZJRwW%vesaR__k`##xNtsv*?tQG=?~ubY>JwavDKqTqd~o-Vo{|Rc z%5e8`a9=0@_nrC&ok{Im9=IQJ2ltC&a5w0l7LmhMjZq<_F3ixd91o*&b#GJ+xCrjg zZTLFEE+TOMmeRhp?mtGX*}CV0Ni%W*4@X^}Yd9cY0v>%_!6VE8JSMn<#|#1<3!TAZ zwa#cI1(gR$nH+g!P3f%wkDRdFKf$9gl$pkQycB?}2M=VUbT$DFh!KG-S`4yO+ZHh? z?&c1%O&Y`_HtY$qJ+!@uj=rEzt01;(NOp@4GCcuV9Ub;e_qZb-mgxerA0^Pq(-}Gq zl;hseX;c9z>xnw4T%l8>2K_Ic= zAuH-spiBRQl=#Y#6griLRXakbQa{;elL;ZRjP-(7uf&DYxz$+>&Sp{ARMgt}#)eX^KkfJIL8bGXVAxn?5Ug?z4 zW)XPpGu?Pfu9zt6QUQ1!r!7}_td~{-UUfY1x=-7G6hUWM&HCebyjVN%C@I?}hR%a| z(0QZ;I)~~Wye6enQRnH_91mg+lB2%Rc@~fDyv`juZ&!G2fzAgX5tf{IqN;lcolo$g z^A~ay0iC}#O#BHu;E2Ld==`h7JcaE-?Mav38oOVKZ5x3uelE~uv>Xk9F6xM!^K6%i z0_d`cj#yi>aupVLL|yiXpv!^gF;mDJsqjcVd^PG?D}}DjM&mW?b_8{O>gz4~Pa;eJn`Fv6rZ`;0{+;LIzyFhnUYj-O-F4o_+Bgc{?&^`8nYbv>v5;}A+ zbpK@htSsoB+7z;tNocTz>|zqkp?1z}_qFP?L!o=C7`i{_vE5&LK=z;&d_62$e}XmF|Kw2wHrw-0UODo2J?k#Qk!lx8%cAy^3fO2V}=lVEcAdL z>8e`#X&a@aHn~Sd2>&|tI9I!j#j zNX{-rJ)^7#B;;g_GxUt37K2=hX;kkZm(oKGPoQU_!Fq~V-qA%RHeWe@3OzR|gSJA? zU6!LtEWE9&?TIlD^(=6Ko+Y(O^y~CCctFoeDfGO=hn}}XmM1~aHrv)VjN59~EF;C# zuv9|NujTgrq36#b4~)=@XC2>(oJ(r*iWmI(Qh^3RFPR1{Clw1&uYtNXYe_K=^%~8Y z;9iQ!eFsCY@kZq@g72Pav5;7NxoNd1P_JonyW`L+$#nS|F;yv?_|Pj=!nWTC!K;z$C3C1p6l|yt1IT z&bBp!952vi?I0y>p|>Biy=$GJ_r3b~^W|@pwF~bk)(7 zE{U(3GXjr$rK%kZ-n~N&L9F+LyFHXR$Sel$g?IaUkP}FOE5Lh&Huoj=58i@3a+P*F zzbn%lI{#vVMa*6+obI(BemH|r$1Tm<_Tl=gO7u% z!*1}Q?wZdE^93dDnkh$Lg3q>yx%J?)-3yg+yq|g_!K4%mz zOTnj_?xSbOM{lXn;4#@cOC26spm9)PKOZ6M)8Ga^-}9lLopI_!EcI*Ye;d1cn9n5O zp>5VZeaPXrRLej=XDRfX7L*79(C=!f zrUm-x%<4#l{6hR*v;FEPjM)JF9=JmPKr!22sj6QD{iF2{_Yljrx;f{`q2RlH)HvoH z?N2c3KP4Bp*|rTMmxLjA)7bv=Izj*Kiq6-e|3S`O>wjJb{XZ`vM~0yO4PxkjSA(L- zQJMb14pOlR^?$BGRivz1V@F-Q@=)^ww*SBApr3d!fRU3Y>;Q=i4Cvwv1N@{gV59>K zh}HJrn!M++cuKH>oo@ zZ064QfI2$nz8tN90WXwYN3jFm(C7T>0s}j1?2eNYN6hag&e9@F1ZB*cXFGp zh5nJ_NkO^@1|HJvlWB-IKZIrh<&~= zD4}*~C+4Xv1YLtcv$-`Jw2ZSFgZ9f{(60)#90v3Cj!*HRI5b$Ql3jPW&(pBXk5mU&pM*PFLm2D6I+U@`>(|86EZCh27ep5Gl?Diiv)aUDMcN@cZsE9 zLJAVr&HDxW{eXNo*@JJE1NiQ189JU=lI3^}>szX?8&2v9v>lF;JB*V09lmWU91ea? z;kXj~B5KpFlJhde`X!6NFXx`uLOd+P0sJbh2ffI-+}gz^{K+=jdx<*+ez$F%jf@xd zF#M@O_4kjSb{_oY67b&~(%b?3cN?seiLE?h*B;iN>a!up4TjKQdWg)l;58PvIm3|t z+6AtpGRDxbgp~Ry7XJZ50yVfdX?{syh|&dyOqOFG7_z{qt08B~DfPV{RT&4#=7%F36 zXm36Y9qIr>Sykg47#d}|+=(3bR^T%*G+ynM%?_QTzaNYPZnq4%K@R2e*`c2qM#W<3 zbu{$)UUK9YH1xIvh8hjbzc61-VCX~Rt(T;{L4y{OmJpYiS{s9gBNo&7_ zluZeju7P28wyY|0Ce;Oo^|9vG6ANw(4Z^9f-C)>o+C~LEY@7!Sn`%CtM$Tr3+|7Yu z3w7&HklH{87?xf)?`u3HQv|~{DJ?N zhUsWWo2AS^s`3pDtBEbf9frN25BN@lualY!Xt+q{ae*X9Uks3 z#~om}k24G(W-0D~{o-qv){|l%ZId&pZO}DdB1fVHFnp{ShDY#V_zX+w1X9$EJK@6@ z8lf8wc;y1aS5V`RyR^#j5g5La!0?@PWWMfM5i!?GV0eWThF67x2!?+i($WOOE%aG; z+PZv64rMn+RO9G*x<@-m^ABkFQ`+%Tu}}-ce~}9YLx8IT1axv?1Nx{h%!2^Bcmb@O zMsNY)E)Xz50s(V*5Re|RFAV}VYtUhG?3FVFoTy13jqFdjLBK_K2mm^=h0=Yi%~Fuh zD25a8n?fXnK*pjfAjfy0KvzpyJt^^agFv^)t(PFMhpKiT1cpi>aE=55*E&Ps9#;q~ z&ml(?DDaFU1l~6^WRbH~x)~P`KSO`to7BjJY~WXv;Ac5XgCL&5>nk?M$pM07oL&m* zFNL5T&Ja`*O+JC3N;e3)#A&Ud+v9z6V1ymDJ?w}M+!D2jYt{yk+LR)4Vh0-GYp$%r z!e~{39gG+o@~9t-P-~-_$;tRo^Peyx&4nG26C3$8j3^AdejY{~3wihhjQCtRxC@MU zq`nZvjzk9Z1Lh}dkd0KVMkBp-RB0W)ZPdRYmRA~dnlv+7$pTUy+&ua*-uG4tBZtaS z1&kacgOTA1@p>3JL*H6}{fD5D3l#}p!^kvMQ%4xNPX8!{94CYU8sc5Wj*3$*9}1(E5E!-EQnHs+ zs;N<;5T(B3X6)vPMrGL?u46ZGn5-H`?V>~ld>D1q$Or6_QJdC`qt%UJ3-IK(ma-LS z>^lb-RYf16ttUb>)~s;c4x?H^UD9FHljhMLXl$8w(?nveu3fsF_rR!sD@9K1 zXrg>R5=Q&E!syY69j(^lJYw@{Kcm_E$zfMCdRjKIG{i<#v7_g?!RT-KF#2snbT4v_ z1|4JUIcH*wv$=9CDiWH-KcOPAD~$1|8E_sI1#y4tm|mQr8zYa{GmjlZLB%m!L-jLY zO!+oa`zmV9r|g(B2*&m?#NWX~q78m~abN@*JKQoN7JE@Yr4q)DqTONoJM>2yyd(0X zVC>Y0ybu^WH|%mDJ2p*@=D^r>B6jRHRU;q9?$;&JA7f~>Z7RVGjhHjov4;+j!`OJS zksQt!!Ps+=nUGPvVVt*kJjKmP!p>9?L=4Wb`{zhuw7)?F#hJ4G>Z?Uji z!UlU78owhYxw;z(q?Q73!M!aNXYqJH9vd8>zy}~$QJ+X1y&zlmZS3bKg5dEX9dg;= zX!8E*t!5DBOnN^#TYkiS@39;Nx1PB(>hOe#WSC!NXHcuA$gBne$PDuQ))k zAr!J9xL)fdA$M}zAo#86YB&~p%TYg&+q;!ia@M+g zD6yVUrvCu)@j6>Ba%)vJ+ywcj%2Bbbe5tA?0OadqqKAQ86CRVz%I`@*{=FPi%0I$f zE`q`_1ZzRz);zM6X(-_AuVRpD=n_`3y>^ieyT6lwg5tW06Cp?qii?)wmpD>!$wfIVeCQK8h2?rpqIUEkD#kM>mGT_n@@1?Fc5tvi;#1P`? z3L(8E5aJiHb0!-y+F)Id{j&{?aX9s@zTsQ!*Ij=v6f?eN<}2(cjE;9@LoR7~ZRGfM z6mnbFuZ$czq0iV(j`^5RPbMalUObVMZ$}|U`R4@?Qp2zzZIt&DK7@QN1r^^FR6h2g z8s-M72vxlwsAf5UYKb$bG95v+i-4+B2CCx>sH!@E>ZSlxX63Uhpt`T^v<5FX3t83I z2k~wmQvIw6t-!m5nipH}Zn5^oX1rT=0Po&`RKIS=yEmawX%60&he8K##kPgSdLF3Me#;hM651auP;f{6U(vhG2car zmO*H(`AjycOyTgI(0dvjMh<5iYb_c1ziF zEXxfq9s^;AYnT2)N@ExZJKHkkIF6fVE%3pvw>4-EX}R4PaU7?mYfuQnO(+a%R+NyV z-XaLo8TH)EC0u8Z3F_*c4;hEn*hR5esUnA4h`PMPpB= z=ZMr^O{yGnS}3>s4Aed9l7-|@tDGJR^&odv9ccQTYGz;U3n{rQ)8YtHl)^;=>hPFx z--Eg^YNd%)pLPbd;W0XnM+zmZx)Fi;k#^82oQQS(#*kV@j%A?!m!AHFE#HOykNK5a z*E5I@(bLjE{ZrJ6M{Kw~cRa(xLLKix_*AvX9m40#z`q_^e^WPw4N}l7j5I1w+YsR)O;%{H&od7>nP9+*!+pUk&@b2ZT4* zET2P6AV;4;M4y(SBXCedO-3+Az9IL^Ai`f2^eY4T9=9_N( zi!?RJkp?1GGzD+QsgKkHgoykaza8YXOotzmL+@;xwvgI+cfBv;AT)Abk`meTV<%` zUu;Bkn9FU5c&)!rh&5WvFU9`d4YfC!@Kf~l=16?Q1{B31Y}pt@$Iij=%{gUD;HY@}{0xl(O8G>V+6 zl-mWck+rIndWgJdJ!m6^RFOV}$S-SC4)TZOYo&zPGBu)5V!kSY$nPWZPZ0T={@#4- zd)ot|oa?83Pb{EC-sI9dIZA@4PW7M2vF9sGVHzpGs(z&qHRz!@hM3bWhh1=hT0tYm zs1bX~C9zhPPOdNtT{nnQ>0k@FQl>=~JhVz9?MUi8lmb79iq$EvVJ27rQM0vO!^pA6 zrt8;8agM&+9OE)w=Bqx(==F?w1 zIj5NSJw$iX3P+HNFm49XY20~@HgSs{-5R;2oQQ?A({ZSq204-+}B^Kw|I4DB^v6b46<4Ci&>3Si#(yEOt$NmM{ zPJ>A0l$y0lJgMGv^?Pzuy_;0TnD^d8{Bwwny=2?+id+_}PaT2S+X6PWh6X&VQEZ!b z{X%lN%nf2+MC>Yr*xz{&*InE1Ct`bS>p6tEdkV!3afY~n+U28|yQ@*0D)hDh;=-*{ z{h7P|C@w+EN~21c!W4im=dI?g3UZ?&`5k&8uW!c?Pq6*-b(+0h@5>4+xG zpwx34V!p#e-XNHeZrhSgN|Md=cfhc>_Ch@l%rLn5VzfC^ch;aBTT{*y;C#~^y9|K#+B*^r|hnWN#?e}F8g!-eu zGYP0M>@KRVYK%OBs!xPnYlI1)?R^J-(yD68fC-J!39;;imyzpQVdAK;iyPR9v9=w2 z>>D4ltPLh^l_MW^;$9I3$DY&Qw<%5qAkk6K`5`9kJ^Z zZI|JQ_s*I-nOv;56y{(?ZPb5*8JP}UL;MXoltrp44Jel!of4M)BTUpPgoW(H2kHw4 zU{VL`!M=D{s)(I5CMNC=m=qnSh-4?t|A|BOdF7(Fy46RoiDGNuF zPAVyUHt8IKNjE5j#!k8$j_1LoS79iTo%EvuO@ql;-+F}dsHcXCc zcW%x_@GhVgYy&!4Co55VL@oOYdjO@U6pl*x#llB~ne@PGpI z#d>mz9?>bwwPT);3YmtiCPj=A-+?KaHR-9O;x)BcFlC4S?i$RzRdgKZq=vx@J7Abu+cIL5cw!Bnr7AwD?fC4#Aerdx-}sburHlcae=sNFPn zY653X3eCpb&ZH3P6VGFHeEp~#tlmJqP;wEqb-PE*G676oW0>RKs}4dWK)uv2$7 zc~Q?TsI_-TJkrNpbqos|9*TG1Pm=F?r{Yi2!zLBN)RR(}T4yPAU^-+wz|^+dWen4y z+Nc@HbZDqulEQRo)lyKp^hC)0Q84vurRYnT`g0B*?vJMR&n3t2qiKE(u`6+Bnod<> z@j1@7o;F(djKXFw+t+l1I<69?sX6sDZ6Z}q?6jo{^lzA!>%vaEZa()ICl(lP{0AqN zn{J-RiIt`s-{Zt8({&3@tTtb;;zT$kc+Er8jHLqlqrVAG#{{nxC?WKy;Ppt9u%trp zdI(C$J0f`9A0^y6EO^}$C45ybck-#3|}1$^IBiWKmD zGq|ZtU!dY=!Sp?HG#sWk>hC?oLJ!WMPJbTKKau^UM`DM~@W~|G#MU}1{&U^4EYt{M9jJHBBcaj_dDu2S+P zY~oe~iKkVza!C9s#8PpU6i-nkWW$U;9PoLkOs}iJ zfkEb~pUIhd3OhfTvC4GqFtIWrdpE$0{P5$)V8)G**7Iyq4<|@sl@8}2DN;-RP0C#m zB(1VzlPX*w>2ht_AH))7+dPmQO*WsnO`6J-ZdOPF<)&F|l1%_fG$1(4COuU;J%Xff zX(o=H>Dt=k2s!cU4zmq+#WlzucX?y&6pgz)sqK9RcP^KsdYI{@qAC5EeYgmFrmwPN zXLjZYimAg)wfS5vmTj2i+XH4!yfYw$WJhZdb>^?5nV;&`4D+ zCH1Z_^Bw2V*3Lw;_zcVvIm4{(4lt{~7-og=VO9(eW_{`iv(`&t)(!#8Dw5O9IqQm? z9=KUr0<-FbFzbo_ezkK!TN69Txu9ACvwm}h*$z%H+sz(k_t0+Xw!O+Zf13nm z4{eNaCqb%)t`5eK)!>{~{09I2WjM@!h*^bXVP2F~N0{lppO42%uE z0(08otT_tKyPLDfeEJ|-`vaPDC^oVqJEwwk|K?C&=`hTxtzEbnFUYK){1h*Ei(pQ( z>4rZ}t#4BKk%yV#ywU7jHw1INLxnl)+&x?vHut#s;^$~Uh9Q108W6)_Xai`7GWP<9 zq4j&ihq<=|FxUEse1iI!EXOvJlHi)MlJ4F!x#Af>;vX zy?)AG67CZ^>reP}mY$|6!-HJl(NW+Dt70L{Gb=ms zVP1>&A)j3H(?5Jpu2nj+^Pb=9WW&QNYtz!OSD>!V9S^NG?MlVMN)1XvD6n?vBP>fb z2@>9&4eXepe?_2X(0lCsZ*m?gk-LjA?N#?4qY1zA z8mFI_tJG5tVy-4@u$H;1c7^0yPHeK-lv~JDzf+>?Y;u#{`&*{EO@nkyb-k`nI#X@d zJKSfgZ%ZNh83W1R(Z~GCxk%O7I_XzT^@dGMH4n`fD&3E6yRJ)@2 z^rqJQuI&$eZJ7TnX0ZOw1JZo8NhQL(#$eAT zG(S*Z)Pg-vq51Tp;(TR&!eQ)5Pu2W5x!qZqPygci$x@iVT7P#3_Dtf#{Mk2+poCH zzpWeHN=h3H!+Rp^YdNlC7%^vR7RY6=Ai1sEZASRyf#fD5d=)0qu?v=mmpa3OO!K*( z#1h>+>LR(BZJlByH@mCwZ?NEq6c&8eGITe&N$+>8f(6%`$M}+>*V-;J?502qzNMu9 zu@uo8GA7et9=RO0i=0}D7W`o_*O6uq>w#ZM@d@r!E)+$r@69ee5OFXJ7FO!c%1OZ! zeOo>$XE=!LCD1Kn8EglCfG%3Y*%2wM1DD||r;XHX{pQmyR9F8oDH z!`8Ycw8&R~UxkOg3B6s&E*h>CjwCh0N8}DE1;j3D4+R&^;BfIpM~yIwACSvgx_|=n ziHZDxD$d>oTy=s)pIP_yCgt%IU?P@$1-k_n-PF~@kQ4FS$z?mVs13oQFHAQzq=K~^ zGLg&OtvM!QsnYOy#F}ot;Eeh2j2jEcU0;2xfs|w$n(|1Ui=}KoDHd-f<-T=E&3L@o z*mpV}4_V|=uC{X(xkTglL{by4ToVtAzP6NHBNYYplj$XoDhVv+Y50A}9Wb8`!LnIs zF$t+!28*R`?BY-ai{nf;Pm;?iI*}E3OfsSzEIg+_A4G~8IQ!O;qy17$Y*X~;J906D zQq0kPk%b4Pa*!PEh!&^n`q{{-sc7*^PKb-watd_u0nX)GT%wEgBen4d$X%KkT>PsX z31EppL-X2o+eENLto-5!c8RC5Xc{aTX}YzSm{Ro*#}ccb2CX7>CLSz_(x60|yA{Hc z=}xd@32oUcg(VdZu;jKUENNh1$$c(rTtaXAF8NacDTF|ZpBto%c7~MkVn~U1g_KlV z)^z7hF z=E**k+G_{%WFt!T=G4oRR4Jqm@PO1&8g!p|qUKKhlVFW(0P`e>Q#Mch>>-t2)J%P|kS9uz`qnG>X*+{8TQq121H%;Tgi=5b6O^SCEUg&oXeSCpz# z&?`Bq6xmAsDTKNUX;SWxr*(H=)B4}{Sc1nQ?&hypfzn26?TWB?1WF68Sw~%ujPNoq zHZ9a(=S(V+^mq8AG|W;&uUwZ?tcJ8)MUWQ9`ORsE?bx(eMiX74q^1x|3O#h9xuncD zB0vF4`K`U4VYh}5v=f$!#jvy|50(z*0w(;VF4+T5^hZlaa_4nvFlVKf7I5&)(wkCP zTEn^MOWWwKVVBY!G8mS*-|1dLF63~dm-V9K*ky$RSa!^bU3N}!nfBjkQg+4%({)|G zCl%f5W<6ol@3`Hz?9nDh!b8j1EQZE2%axlMNk_CiESr&Z-^xgQb}$mZ0!9*4%t(R{ zGLq|PdE`z;60?huOhd~j>|-QJyBSIH9!9bhEuWIZNLHie33-epcP}F;*w08R4=|Fd z?Tn_ z1G{I2h}~gjqxt;5aKxJsJe^(n&V24Jv30-ObqF4kOSK^u$Dwq-8=EdP6`#dkN&4z7 z7{4{29)|<6>*mkIzGXZ}m+JOaGG6qGN_uxENFV3`>GX*7WW4&L^bn0*8RO;W1?drP zkUmjMfdsD@DWuQQKP+IplC@|YtD0+OjkpGP}hYJXo!?ht-of5OeiB&d*+*CWh4;Ek_sHEolwI3t;tb4y`Fm zL#r<$Sbep9FR}*1YwU3!t*)ni*uxdAzSGuq4Y{B|tM6rzQ~5e|KT?#TJwKe3Q8a4x zW9{HBq>^_2%V3p~!ZHU~{W@y>kFdrr+&NiOTI;nJZ+_J>^eW!`#W?$y8`VxYGr4JO z9nO4HH#;6@zKfXnm|feXiyuOYg0&aYNNx1JPHxyWe_VKaH_a$w3biQq8Z#?9kr6dy5Tw7a{ zhVx9Yt|Wq&1?#Sd<8836EllXduKU>m)=Q0=KS@!wxqK`+7e)~>Sl^Xmk=UbNX-198P%xy~%Lk>-8$IH~F&ZEeAWX(5VSFG()M=I%swjYW4ar;vW^!HjxjoJ;? zpE6f%!~ywcuakJBA5CiDp<1;5rpd2?9DQxB`UDSsVksI&jz&vhy;je2!~r=RG%Qu? zW_^!cuoJ9rGF*krK%mSfy8lkfLFF0@8-|7* zF|Zq01RG*iVRP6G^G(-c@v!WM$Uq}u|Tlrqt&aA^)oH|eET!>LFx4}>#H~mZ= zmyyks>ZJooOPFesKV)_{D1XI#A4C18B!32XWHJX*GzBuVIMb3@O7Rso^GgJoZ^i6J z`_>*yv3rKnzLwqSB^MusjWpl6(Z2|ff2-y7BQAkSuJC*G_*m%g`v76LBRyD1MjVH~gD0Fs?Gg}*P z@nGXK57_vlGi<`mqwV>V%UgTJ@h2CkY$MrCPMl|47;N5RB6sP|x&@nLHA@?a`5c$r z-9#h$-Q;lhb}ipRrZWQAbj7;UjH9DX z*XNVZ=$+h~>?Um%IhmxLe1X{BB6d?Q&0Y*_ z9^?j_M{@8`o{&au&UvVIi8J%_ZA-Zu^K&&P%*|?j&F{?5xm3vP=J<$$j`?U#l!% zz?O{gl2&$0Ru-u>Yx^TomuoZ+CC%OBXcla#(YH<~h22E3<*qWd1=XQmJdpZlSrztzHt?I)H|2t|)dN+Dfh2)&v*WI+wuKG_7yEC^lZ>Tqug|kG8JcB#QOZ;OU~+?izWG zD3<2_ufx`jDwhn{dM*6WMt18Rl^rGfW&F%-Y}V}N(O3C#nT=81`Qv@Z&rgM{6%&Fs zuvvvh!xfYn&x5REJ5c5VlvS07GFPLlOIav$2g<5eR?md2Tb!4jWtKwL-B|BZ$a-eW zmZ44As@myn)^`N5{;XXbjyAi{r9>MsT~@SF=my(F3~Y0k!nU5&=|>xb={`Uk!;FUC z$%%LRX--%sN81J?*fuQOsg>OpXU!{NwtS7YE#Q`aTdD(W%cZm>_0wIMEl(PvYnd%= z+-BHT$)T9pF5J1x_M%Bj$nMXb#O%@Z6tLMxLhgSF*+wCo{UUOw8nWLQ_I41h@5Y^w z>~|dH_Rbvt?Sna9+tpIoevadv`^o{f-{39{=QgzWo7(%okirf*N4sNO^N42jc^mh< z9ci(Vmte;_Zp4oLoM*k`QA1oM6C2D$=R1Dl#_vQPu+xQm-cB!W+|ChtOoPvOPXF$t zH;HzZYS&!EpFHLgl{-&SJPi-aF)RRbJX20B%FZhYc77LPw}{;(P&!FqSI4L|Q`uep z&HcW?pM>d(u41p-I=e;4-kS%z{9-0pVAtqzOA6Ut>ad$V*j*Fp*1|49p}MQ=u0`!P zjQvcPW|9hTTjC_lOVypA?+?AUgI(*5h6Fr``a!#4*N(bKdo10Db{(i$Hxu()LpGm) zT}23XouX+yIokESt?Mju;*CD;EgpUZ?fStLcD<|FpdiO9?O`{+HkD?$=hY|1VqS*n zs+{SNV%vU{IHuD5$Lg(r?`x(SG5uoQOhV_p6it)*69w4PpSj)C1))2R)ZIX8a( z26i`1n%%-~LN45LL#7!Mu}ik9YZklv-=;oOuuEoWx3%o_T>M9!hiaVEk zMCPiAXlz4cxF;H0$<27rU>YEzvA5}675$=$tWTR;%X$mq` zKP64Urt7h!Nv$v2LYiXu>>gwLCt3Rl+&)6sK7xY4dz#v*eZ!BaV9ys*JJBcqY}*!v zy<`dx37g~8Zhp$!&5ulHSB()vl`{4hp&TE?=Fs;#b4J=jPKX$C=xd8P(;OjZjtAtV z@*szL2VRh~Me)J_IlB~JMnTRw`IqI8(VR+A3w-mzhl?XWjO!0s(2u=kYK=>;nCwrx9uiu@Ww-k~Cv(+EY> z-`IPFfxTKO?5*Q8LQy8C5sJ3a_WQJ*hIV^@Gt?EKBD102AE>Co=wy$ITJ;aspkf*{ z?PClZuA|~y+lEW1xWEbaxf^VAP;r&M)`E(!8Z!@J7X{kaGvuBR?CYz)K9Q8&*0+92 zE?;d7KgF!C;?{Ma-zLTzYfud1E#$+#{gmk;7ua_qi}9wUH#CxajJH|{`+!T7dME8* zypuIvv5a>reHwiSXkRN$%roA(xr}!emu~g0))aijc+(r-`<_Z+AH{t4{glUezoRG$ z%4%J0H>+;AE)>a^9{Y+vfs+- ztNnL5eYO9YDc273bM^Pmk&1Rbb|9Lz!+~X-c0BNkqdX|k->F7%nLIeyUu)-!;&QbP zvrt@t>H1U@NAvI3;9vkpb1k%BQxtCwlJ~={ln>utWCf92_t*r0(rf( zXM!2o8~Vl-BYS7MwTkJ4^)2O0C$ausSEiFp-?D`1YL6?McX2m4G7{xAnlBtDwjA>XACmvB zVZsTLpQ-I5BX!-v#xH=pR?D$iVrkWVAtE*OhV67V@42Zjjq-x^$2wuHu z=oBt^^)`xT8uL9k1C!s&bj_6{r^bxuvH20YFFKNoRoWg+2R)x0H<|7jIj0 zBgqxIJ@RMO&9{=KZIA6j$%$4M$Y0VjJQdl$G+0}(%*S+vg8RRq{FQXzS_)^e`4?Ov zU$2uyqGzx4cYZ|A+T@Zj$Zx10AAp{5s5In1q$|kg|ETH^0|m17_anMFK!G103P#t> ze~Wtr*+YT4W}OB1kmQi!pwq zL6^zdYpCGUmVi8T@rf;<3|-{RJrpdm9Q8sMQTt~j_vpfKE=gI~o}?^nafPB$`uj_r z*NGjVC`t@Pvz(wPO$tRhhQ?o=*N#L*N4%iuxbCs1^BS21imEsSv*@O}Dg%nl;bq&| zqOY_)7dfx;)2H8eUgcAp_SAV*cUA3IY|-!cJ$`atCFC%KV*Xtp);Z%%%!K7^@nq9A z3)2|HnX2MsRlv7Uyjs<#g)PpyGw@5M(Z@`0ST(%0R19YtyITtuFpc!e+*77etoP4m z8W|@j-p8Sn4cSsCKBBP;W*U+?e6k^l!zUZ!IefAqMuY5_2DKLyUvPlpn{H5C$03#t zVtXil;tIv|mD=JTHN8hN^|^c~5ecD0x{0Yr*5NTseGEN!Y)RLap+|7cn-DY*O2#&i z037G7vh{?L2%6u+kxbnTUmU5fS-Sv71{x-RgHf`azFA)~v0eL2u^yl?VOx{(KB>$! z)O99D8gynEIbyaRSV&4``g2lJoNB1)Pf9cH$y9g)b%oDCNfLMQvSbl=x=S*eLJIif zlIvy|_~XPJ-dIw?X^oQ4+UM4wwEG-Nz6nv%dA_BW>#&7>f`q^%age~(xnr2Bcw&8GxlTuDF@Jr{-|_*4m!38l^)oFj*3ueqdk=Vcn}>K zg31IkC=1$;PHsYF6Stw0U!$`0EL0{!WhJ?&tUoF{UW6+DFOIG|Ac~`l&m0`=f!uJw zQ3Mq<7HkoXiD;sGAj)9_O(G=5lB!8Qg;*kLB8UnE6|ezfp{ZcU3aD}B3fJgGK`a!F zVrXQ7;Hsc3%-%0-(|fj7$iU@OX-f^sLq zwX;y}f^F!?5S06e1I^uQ2 zO3UW+Qr=$!Xx`5LH1D#FnD1^w^Lq~v^S{(Dc!`%`sM#r}`31V0Z}75aN1A^^ zg9hT|fm1jAPt3nyPxGIIfO3%kpEE6R&`5jXWo?eMz3OXE5QFBd1C+CU4woG65f1wRJi-L&90uKz7#TF?bofF)b7 z#$GJgZbJ)lCMpZXf+JdK8D8$C{h#5;!;+yG(?{JSsUyzge=E&Wiz-Q9T0c~tNiP~TYq5;rB|Ej3Ve z7!|yRs&Qz)Gt?Ao|M?Q9B7lT9!@0I(oa$^`SwT)cMf=^sGQeXptNwR_9P~1t4HgCY{}TT4vZMQ{lJ2iF zXbua>xdv^MkeptAcA}7+#7E!$8+`Qbf6K2r;LYuo1MB%jdSH7+QZ6||`J!^51kQ>F zEZlEY+->zCK@{)c2-C5<_~QitFcJ@n=In6J%n;t69Bb2h=aTA$lTUw7 z3rz_K&ksF&h!!?-yjfw}6?ZN6k%eenY2ky4Z*OBo%)~!i#lr6L`XZ8F3c(R_W{l|s zg^NJbCOPL$csLWiU6=;1|t1cUiyW=3Zs8esLUqaAdtbJ(8xRM|KPJ$T0;yQsyomDd)b%BXy?Z zHpDzXWNZ^Xa>KlLC)w3h{Y4S}q|3bbG_m+rB*tUGk6Wcj?gjl5MvuJcO^^K7OFU}l zMUM{eNso^Ap+_er2qI1tL|KXeJUoI3kvY+?2R%C7i5~q@PLKXf=+Qr&>Cvq&^ymRM zdbCVQk5;*hN84+@%0rz|#=j4t0l0GQY3ZnF?z}kqBGfBeJhqgVk8R-PFMIj}PZ&=1tmapNJ#UxH;&VYZUGx>6li)5t`;tWG4&3j2z#1d6+L+lH*2?!_zrl2)H15LzI*r*Feh^Aa#$&`j0<@ z`@L@Q+%K;OpFvi@V4{!G@k(rKBg*bT{@PRkII0xm&+V zgH)!Ad81P}5s7!*=_#p_p7PMC;V4UcdkGTLH4+&T;~nX#eh_s+;v(&)2}oR^zrGrY zQM$J80F$kah8nfjp&infZRx2G_3Ahzdg;&fMxsL3bQ_7NT7(Q%^cSi0HwK?2d*TwY zS5{cg2=UZ7?rlFcx!k;g6o*%=t0gBP-EuBDDNOyKgPw|-rRoq*{j!-P zuO(;U>s*FVtQO@X6wSS}6$%IdgdW`39Y zS|55Q!j7K#+MS+R!Zq6ZTw8i3-i4ll+5gOr9`p=gsLq^`(KDA4gt|Dc&(<|_o$-vR zHo!=ZE-)3jkn8}C?2;~oSwKj%LV*$i3UsDZpl6=D)3e>rU)CmwB)R?u~R!S zR@m64{dRz`v02OZ2pem)F2{t8Oe>1Q#v*O>abaVw_I9GMF-tqqOW2sMm1GDTleOB# z!p69UF=@go(?rQWdN!bX%no5y3Gdgc6m(W}7tc@rZovw9?u*!>Ly;s3QKD)*AO}nrP6waT7*e)A- zHj{^6W1D#>Hnx`Iy<-{2d&ib?H%4qWI$OZ;-myhmzD*8bhzP}SKG0A92y_A+*2gy5jwB$L2u%#+R4_ayi`HoVN zxzJK4TUzR&L3vWK(~Xw;X|Pc$c5BcZsY)=G-jj+7!&fa*5i&|kha^Z^M~VnQ7%RQgA{eIfkdnxL;=)s@U`eIR?IxtK?X^UPvc7HM(2D0}la}t{Pq$QKT=gfqUdyenvZ)a>`qQ## zK6jU`n-&)?mTjqCbc|&BHbQo+hq`LnNUThr)GvpYf%mCFEGy8UFEC&>$}YemEu*j) zh-DSvZNRoM;CaBdi|lFH4W6}T8)cgR6x&9?ki)iWB`v!r(6Sfq^t^;?$hO>ri)|51 zqF&s+eO`{}`JM*$7Jo4lonJGlx`UoyXD$$t?PIv)FG$lNraW{$tGw4~tT>O(?=zjc zjvd00cz%E7Dgzd}zV;X*p1-K~e@jg9kn}_@d+~_ZWnZ4e*BBV`>F474=M^c}v9MBo zLZBCLMPeg4dv}t@DDlDso~(zzMi(YqHlIM!B;#N^VvIHCnu&3~Wk)sHy~v7TU#N2l zC%K(m&CM;TiG=Lv^g8uV*e8iQ&E|EQ3g?n@ncK;MZbUCk*+GuQ1xq~Xh1tf7Uy+shend=Z>ci>uZy)uL2jbt;Cmvc$|4t+H??YPqJjbu&-Ra+FHMU0`zh2->|2Fod ze>XVLzi%1;u5=B>EDmbtJ)mD@^q({dQ66~aLLm+>Y?LTkKx5Se#Pff$FE>6 zz0_BrmmolQX)L^6$FKNY>iAV098c2TeD4@l1QIiy#7lE*=%s~T^b%yTUIMPQOWQr@ zr5t7^3w z$dAdvXajnUZPe!6IT+8c{rnv{BN|W&o(b0H@8npnal(6YbkI(+w*zVR!t^0(3MNVqQOybSfg>a*jr+`X`%N}Cu&19DNtJAcSM@`>`&Ka3tGekQlA=y43ac3qS)%Sq(5LlO z_n%(!j>O0DzX=Mc5%q%*)z9Q+oxVUK>d#84-l+e}8BYZTSKnYdT1&3@H;lcDqgCY< zF*rJ_;j=UxeZuJZ6L$CKbE|^s`Zr?dTs>+)?nWMpWL)K;`d0qZ^-uUGS+1;}im}Jn zsC-R1qy{?AH}%_xJp}!*RoKl2m9NvHU06B)x?(Y&9c@U|kfY7IfgR+i8I>o=Y5A5A z!9vS-sZSoJ<(7%Z*U|FMD+5!UI?}S7+O(+r`A(-+Z)9-W;ndoJ3_hEjT3e7|Xqr=N zJu-|)b!xRB!`SUktzRR<_$^MYqmW_xUrwzy$Pl^NsdWf4%-QG!1K99=k`o{!4U4xr zwYnn1vSg>$S;!EV>C`#}88)tWY7N}w)ar-~o6?=yK`br9sXYZ5@)Die$05TB@Vq#+ z$EP^8L+d38PVElJaG}-aApW%5_(y^L6SXmJxc!rb))X*R&D=Wto2Nd9k>fJ{B^xw| z8mhPwV|WxX>j5>6(N1?IXPT<}>+qrK zCrLH!AgOFSNiE$*Qj52e)S`5f3N$XMS*av7eG5rV-b_;CH<8rX6p{*4OX{LzlDZ&? zq(&u@)QF8FRlR|vs@9X#zyy*yLf?C*G%PD@iBU98{Yx5_zFiuYoF)xhhm7APNW%i0 zsqrU5jj^CDOT%Ok>X(LL(2k{Q&_G(GYS5YkrRq)q#Y@#~;N6v~Vcz;)sy0E0U#c#J zWNN9p7-IiYb*=_4k*c#ac!X4)tdR_ns^gT@m|#PVK#*lT#^D7<8U{Ic#tJDlHh~q3 z@DyZxAW&mhtMnjtk8r2PH+-Tp5ie@;<_4!};1&N3IC|0Lq0eyiU8o)9PEA2dYWm!P zn&w-Pj9oBw;U3$N>F3%nMqtuqM@`GDKt{D2HN|mo<5ysBnKlX3wD-z@3q(IgjSo>1 z*tjN5i+eBpiGS#YGHR-t{KW>*^p7=Dq12ML8@m+q;o!`yL!egIVNcF>>+vBR6UWhw zrZ*66p=Nsy#5B7CC{r}|srsda9P0uW4$(ZMy!t4)5M^M999XEmokh0!bAm+kCmLYp z+vcTl{)B9kX%sKWHpG<`M;l}g7N{AfZF5A!1Yg_(_|44o?WuVw*KQm|D6O?@)d)Qb z>qBk}J#s4&-U&U5D_0y6dKj(gpkDeWH96<2vtNqGe2vWMy3$Yam`I3`CM_YKJuEvW%GKw;4BAcd5-xHrbUzmV)*Xx=ki zSlY$Ysm#YG4p>ReEX1ybnk%8FSD?oo(9eG@PDogq20ay)s$fnQmI83m+$~Ux)SX(u zcz1)0U%pIOdR2cr6`hN({?Qwqi>+Mgj?Tr@&R&4dEovBdADvsk(>l*Za#eM&SN$Xt z*>g9~{_#>|ISL7Bvyi26kNx8*$nx$$wBU*=`tOex98txzf@r}8RV;^uwAH9$4`ibK zf-3ZoiS`|;cmNq}U!h86UbGO5DnHAO7RI5^bk6f_!W1Id1;48ko=6)!j%IikweTQJ@Dvz4f)e=s#UNJ%a=9Rv z1#;;imke_JfyIJc49G2%(ke523DN)# zuaO3XMtr`JR$ZPb-At<)OFtu>Zx z#W9^z|7xSvK39D<;`k=;?a=DsHIW)}(LdzsVX->=np*(6Q)@%3=UBGAKzE8Qscq;^ zksGaETsho=?!;amu0eNV%IlrbokgMZex}uNSG|U!e{xO7iqJoqe4?p_ze8zt!DPc~ zT79C`_8__gKcV^p_X<~6P4R+rR}Cf`^iR6Jr49X)r0Y27xHcK)KgYFk?qeu~kJViC$JbFJAcy((0 zc;tSlpEIo)k}eDl0alor2^qrB_-tWluEwEV7+M6gsxTDhXfI)C z1drzrP3H0Zp=k-i&{A7kqn6T|S#bYv5{02}P|Z*L{=>kHKSCHLgZuxIA`BaXYL@f+ z4-4dN4O2lcR`XsAi-KNkNEe31@Lmjy<-Hgd&+jxW8Tyh6eaXrYhQWM4PZ-AdU5A;n zg<;LSU&C5C8mI=?E+yE3hDdJEn&ZKe9I=L#+qGbaX1KmmzuOKw6hSM^`etwJkZcfQ zkrcc?RY(d*pVPGFHuR)RN^7wjt#u4S9kkZNp4RqL(%KL0Xsrrhwb3}HBDpdeFLI%^ z(`;yMl)kqy+NP`W@Rj zmQPKAY|{FFrfm9~HaKukL&I?1a>H2O^9FFUH=ImB7H`y0o`@`8qlTIcWEqDVn$nSF z3Tn8Pf-LaOG_*R=hI{aykQr9$8f5MQFBjqjX^F_(rUAX!-0VUdp1~@P%nVj^nD${6 zN9J73=2T=(;-p~>|M6slMt6>cXdE8WvXeIQ%qFq%bB)I!QsGtk`*|#_;R|u&{MztX z{INO+BC?IY+R?_9O4_*IhBoe&(#FCo{->}^UGY~VIkK{@ubTV~VeiJ%a@t7UXk$gy zlC$J^RK>=3=e*>34*0;PypT$Y(gon%ia)IF?7`T@syLg}@&)y%sokzG5n!i=@Il#D!sCyAH9;^mtM*LK)iBXfBFPz9Hjg14{|zE zf8CE9P1j0`$?4YmiC^L`oAp;d#&b(}2>QzT5a}cFiUDjI98hW;b`l2^d(kV+hM0pm zAPZ6~aX`9m@D3c1T)xzd1LCH6enqd`h7TOOL>YTezzX>2ue|$w+zHwwt6I{EXBSV_ zdC(^170CUX-CSPfhkuK#iTW1LeWe*Qm+TGTfZZlPeM2-k4O=EO4J)@~$#=!;?f;c$ z6$dL`icNtwv?)a2yk4FK@ZY8g<7kt7m&%?tEzsSXBH!huzY-$PLY7_L@?BY`V@u_k z3!#;z!S;Ey=@0$6(ef;r-nLzyIl`Q~MZOEzm)hmKWaYJY0c*&#HB%P3hX9@8y}TTGvJL zT><449`ejI;E$5;^3(e^$umpJEyDl?UGAorXPI=0XLT7b z{w&W*uSoerp0!ecZLU14$X;wJf*)C`zaA;iiZJKd$ul#{t7plx0xQ;=zW+Y?krS z+N`WvR)M}P=HtD2+>Gz`i_P<_w%p2^Z)W1@Mb$t4h^Oap9a7POnwM~^pn0iPcrAxE z9e_8`oUd=J5)N1m(&ppVM)mwlYyOv~)kz}EdmZp^X{}OwAqtX9uFm90+N*QbbDoG- z|B}(GJ9*N_)jTP^daQ9|JX#4_*HvmuuU0gUe2-Rwv*qe#cY3wmjb6P!b;A$z>MMix zAX?*BZjqri{+EZnMSsF@yny~RPKzB#TWq<@r^Q)GTcSpFVoQ$3wj8ZaPC=`;p%z^RTAhztj2xQVQjv&O*Q1uJ>1cHa zYPn(F(~j0iP)mE|8V&m6m3h}Q^d~m&-;4f~nNEI%{#2OrccMQ%y=luo5ue|u*C6Zf znqVVdb8D5@<6l+=OY-S8-)pY>u_W0#r$kuil=$+MD@j4o7o+~5*9LPE#cLl|#WdmR zYU?ahR4YCt`3vgB`y_wil>z&SK0ea#7kVvd_K<$!wa?}B+I;ROxdu_oYd|Y?EmJ>D zL?aW>wPOOkc3w%ZT@H~hr`NGNz3wWd*FWTD%=K|D^!j8^di|TaX^m)Npgq05sAjGN zjqEo5{RE9PRsA*;jVy&AI2yrSIO6r?{Jmd~=XAH%_gm6tpb>$lQ!!}72nZe0>sA!t zb!|47bujF+5r_siK@Giergq2!5+>>(30w^m z?ak4;6#+!;r@M8EsQqo}O_h?~oC3=@QLDgsBkG7mqK<-Pfv6Xx5cQ&TqK@&VH)n(6 zov4$Y>CFXB^ybnGqAt?4O(E)HEl|;L!>)#?nXYLpQG*Nn<^~ONCtv_dR7BmSEqY7B z1S7sg!U7sT8%n}Lb))8zu!x#3O(ZN5z8ex258n+5OLnI>Qw4f68+vpQz8ez8Kp!Mw zMgZ85ur7E)B<$5>y&t`KN_Xo!Qs#CMN+zgLm-aSD3#eckk4q$I|iJCl@kdx))a{S{YI`UbW39Zt%U>}l&zW7%s`qCu?_9cgQr@%%_qDtge?Ih<;;b)mjt8!2hlAL&cV z0;`sMOUh#1Y3p*WX%{KWv1FVgrBT74pR}&mV1H5~lhf8M<(2iMwAQ?{B~ zI~)qxL2c(OiJNhF5_i3|UA*dL#Gfd%TW8_9#?1)NLT#p0EOF)mv@$@emOx;T31$8~ zh*lC0$6j2=EOEA@ZPoDI2qn2tmkCWbTaOmNk#B-fhOI}D>K>cLwhn9A1ZTpnB?cwY zS}KK7p|unUr4iPW3`$6UT@XsUl(g-6dCk8l^sNv|PXHCBP+Fvw?G;LM zxP!Gcizf({rlks{5s+iA&Js$YwNe4n?gzoKr+J^PP|^*p-qirs#^*3<#}h*jiS1I; zOaXhJu>Lh_@pjoKCZrl;8J1O#rdcW%|MH*n7ph_-u}e{IJ-R|ezBwB0{QYD3#c zRsVDy_pGU%iLf`QlI@=v|9FDEy3MKm9j}*s-cArmrCDs{Er=kU`30$3||lb!_E^R)>X?Ep=SsPLqyD++A|Z z9()kwLOcYk$%PT+t~1HSsJdyh$;A>Y1&Yi{fzoZIK(6AI^b5MRZZl3=if(QF3n#^H!bu6}*3NA>DR~!8 zO5cr>&ZAqqHsB;Jx^-X&PO_j|hqmLS;sl&jk8T}H$4M>dmL(AC`xM)sHJ2|8#<0Bi`v*`R5X{ z7lD=KIJ<}w<=q*>6_Gn(+`W0GP2cb%$!{wUZztJp`Y+N+uCEKdqe+mC#!mE3Q-*Z3 zLL+IFj`nk-ciyAf%2V;ktbZ_Dc}XhyAh-!U|!N#A^yWGAa_J`wLNhDU+D zyPEpJr&Uy5=|wIlRe$?4$@j1N5b_IsO`bnuXJc)|VRCprWR0PkZiA^2)pS|!WM%ok z_g4lg%Su6IX)vduGR`xC%D}`9LuIktNGk)=I}nvE;-*^J0&c*SMS+tQm4T^$0+p$q z>Aiaz`7Trz2#!`%2BF^(s0;uDLr|F)w5))ZWeJdLZ`o8x^5ZS(EhIBk*FnhsHh_4f zO-)eKd(YBHHR6WrzgBZ4T{9$}V|4@OMA>VN&fP`XI(&m?uwAnU3s}C-{}u z?2)f4s`{TGkBbhy2v`4M-aAsh4j48c?6Id04m!{WrRB8|^7t`uq%GGSman_3KKwm> zP_4hZRlcry;?05dLCd77)#8KOLGTnGfb+MQK6o>!>M4C_Z!bP{op|dweK<%v-B})I zEU)Y%k8jf%hsxszX`Gyej>v4GBb={d9X9CUNWPG@zeW$oIMRpX72?A%2m0_!FZ%FX z9wzMY^!O(}l3iRPhclvNWpQhWbz>@Yj!>YAHJ2tohrNmubZy=xO){A$9 zw#QIv1`Y~XZQE9E5!#Bk3vGGmVH#9tLUj^UFXE&(Z43Bf+ZGAO5tC027awl79{a}b2 zXw9_NB||CBT8f2IG|*Tq0uGBPD8oYC3JY~BaD%Do!&7|5f7t9!A7Lxaffru~9ytNI z0UjC%LTgRQ1sEOPV@V8mFG{+B?iT+x4n6v;#j`h#d&T)H9tG=1p2y?w zTFYM}@zGMlh$ndbg2`Gp`e?1z1F+B(YB%sDOjxcVVavFg{&-xr?c_ z+ktk$t878=3BZii+RSIH)>=ZlPTJD03j*ybccEQ1&a|r)0!FA+C8b@DWwh(HllX+g zBvCuhx#vy6KLn4bsEU(B11Yb4?X>e^WqKc;%q?!fjR8yr@@lD;?voctDDG?K)@f6!V{xSqZksJDY@(+0f=0M#3Rh%0alxfWdkp8BD=l}ZV@=G@};jI=BQKfd_l z6Fx7Iap*;G#k$d${Dxjk<`I(@(}E;9;)|&2ueC&zT#&h^ZS=N1w_S# z@%UFdoQ*xb(Tn)%pReL+cP;oWCRw~2@*?YOc^JLeVAG0ylg zl)gv_pHnBk$Ux$Ye6zO

ESJ$ML)xZEl|6g8|?w_+;wZXA3?G-PRR?4?OW2!3X$A zvjv|>UCUI#$4`r^1fP|lX9+%{);3Y_2?WBJB=E`!K5G5t_fDN_1!wjF)>s~df} zhR~Nu9LM%)QHA|^JQl)PFSje`%L4*^Spr09cucJo+HBMxwSLi^fVvo{TLg6rpe_pP!r@pA$0|4ugyRt~ z{+#0c;h!JWctMTzS9HT}6j}7OoZs@p>GSOC_2Qq4W>im&~Sg#i3@1BUu7&p!&dsxw@>Wo+qb$lZ!~OyR^f(*MFsT; zrtbuI`pzu~2xZ^(u1NkL8m8dZ+PlH}P&YKRR7u}`Y%q19p~=BOE%h#li0>kT@ml)s zM?;UDlEtBp^xaN_;g)1^cjIW2WQmu(_%5$}jAYYe8PlEvx9vo4Y)#ns=PmMnQ(`MZl`ajqpZR}o?vn`;)2+Dj&;-)BziuG)x7 zbGdQ$egdy~KRI~%ar%D$wAHu8_Xchvy}xa(e+4VG{Tmsq+V*dJs~3G||3+c4xnTc3 z)LQnkmLm~EU)*qNvVRi?ybkv7FkGoZ|Ffh4cS=8u43^DzNIt5F+k;2TYCmrwg(DL1 zI#&$H1cvHU@H)Q?ye=RSuT!PtbrDVsJ&-XBTnbp}&al57!vM}&pN(;Z1H(VqG92r| z@Mgkrt~bMH+!$tF3^!`NIfd8fAck*ScFn{%uspdQ;f2AtiQz6_epX2cM|9cvnY@ia_1_HJ1wI|;fPkG zl1!fT?I0$ZJz1Zrl6-g7XAGYHSji+yELqK@K+qvAb`A(0`d%ejU6DA998^^;hA2pD z<%$_tY6?B~mr9a&WzbM^1=Pnb?COXmhpR>%!?VVO9o)?%#p=^OD#@kK$0ad|rFO_{ z96ulY;5bfZNjr$+MpPv9$D86Sl49_tWaE(ecvHGNlXMD9(oF#WXvvV{P0X?_32&;k zZ0U_Rb(-@B;Z2X>RWchzMS_drBYJ(fbzQr+M$!0K{_?o#IlD|^QY_c4f z&AFMOQOriKWHz;OW-GO0w(uHk2RB1X=-8smLr>zdxdO8lwf`)`W0KvNZ8*&7%ytf8 zwhPOBPU8;)T$t@L*q&Nt8wcqS%ytu`L#S*I!Cz);gc_CYKaHbr5S^EKbSKe9)yx@5 zv|hF0lZZB}W^@P9Hd)UqtmRtkS%ps10`Qd`K6Bn_{i|P>fZ(~TCg`L~1 z`-BWK=UgD!`PjZc`1tUbZB=2xI67`3+QjUK=*FHVrT~K`l@tV2f4_sAU`l2`x^8NJ zVlhq|ID*-$4D2G=t_t>FZZzI7H zUtRk=a zbC#Uh@3&|6X8_s2>{$@%VD^=6%)S`{y7sTTT$ugs^7+N~@4d{~k@oNXjK62uzxOx( z{hR&!06;j{zv8Uf_OD_9*P#Y7Z zoT0p0)OrlU!_~E4fcw+y%HYpP_8@~HnH=*g-}{Ulkin=&sMT~Vk{s+Tujx;AKQ2Fa zpEMNfzpWy>Pq;Bb;m(9!&P*6*$Ar(7Oju~kgg+gakYvMztzg(Vu2T3gVRvwJ1QU+Q zm|zCO$1yh0l?ip=fpT2gC1=7lPbS<0Q^;{;AYsBY7nSgzUMg{1lMWC%$2G~O?-n_( zi3j2c$2BW699Pf6G9krrwIi1KCpxbB7t5w;kf-AsuXM*%7A*TN+i`UYmMu+lTn(rG zNN`-0k7baW@~7i!EtchGIj(BQvi<5q8YU|;=dO2Lm3(=~eaBTREjx6$FCY;v;=ZCa zdb_}?FZpoFuEhX~X0ZMe6|ojLpzRv>2%vDH82 z;@+^iP7>~&6t?FDbJ$>&-W(B;ta3OUdf{j0aDLkIN|l2}V_$+@0K^D8lR9FDs|^!A z!c*^pogkeT5B7j`o(Zf4>AYlU`i8(9?m-t`fVCi2r1KVO*Mv)Fr(3rCBAuNC z7*FOX;dsy4CqPYMjw1~kgLK|};ER*aj@R(3V5xG<)%L6-=e(*H9w(KpTALpDlLYK| zRDWe7Io^a+j>W(P&KxgRuFS+fi*%DEIL;X_Da=DX&Rgbq z5Mz?nM_UNTh7gHN)uTg$c4B2<<(g_@YO0z2gzQe{iE5T;ZEONLn0wuMDmj=o*$~Eh zJPL`_FsEK&`3}tKYbA5~-}I$3nNzI5oKnC*VNMy~I8r&~>)d4Wj7%LGCeH}QPUrPM z7Rxhu;7Fb^4m&Y$xyds|Vkd(mbE<^kkvyX>c4~$eZ+bDOf5Go2&j?Q!UKS?`FN-pS zm$@mz%ghAfCHLG4FXO=%BE00DTj6C4_(6o1+;c0uT;R-{tTYHOBLFBSyi|kFR(Ki6 zZHbrQv(*YO{lV!Vyz~RFuJ96U&Jn^(83$v%MEnzxOWl~<-;>ElgX+TMDn1*_=P8-I z9F!F%uj9{He%Ey9pwN-bNgX=krhYPtkE$xAR zE(H)1BhT%aGv*(yvlIAxnX?z4*PTC^8LD8;Vf-5BFAdsb!k3~!`wux>8*DS0IiCmg z0&_N8@qb0~BY~U_1$DvxJSeEScKRh0R9h1{5(ROvlghasu79dz&hI$@DkVq1*@whO zcrk^eJyY~jGKIf0Q%pAJ>?Lu26^U&m&R@Y4v)q|tp1>5p=sFA}UQxL+g~S6~_8X@7 zy}T-r#ETF=VG2mVRO}3L9KaNX+V78HCDY0eVz+Ox;xylWeTSOgUQq%=#&H&RFFDRq zDVd9#z+8OYnG1I>InLtlCC6C^jB>}BK*XBuII{@I%20~q%ub*+%Y^^i;Q!Dx$C)+o zKWsShyW`9S+G%$jXNn+q0_u{tIL=%M<#^aa$8qKuFeM#l#%gSbIL;KbYi2vnjM3Ok zcbw@7Errimn9f|rRQ;-v&T599h&Ie+s!6^W#g_tSFms8liav?fMa&+wkGXu`FlG%} zA3JqZfXZbh$i*4Y*P(R_OsCt?x<$rIU!ireA=CU+E}OXfV7=9SupTA^mrVF|1<>{h z?mk#=bswxZCZKpn>~g`0xs-!QJ)FKJV=k|pnX4dWu0B1O>i|3EI>Lpyj<;p5p@g~4 z(3S~uo8N6uV-^kS~jU=gFZZeU*X#;&V?d2I-G{S%ni#$nfW=_p>_>(*ENiot48UtQp)%Eu8@?uv-Pie1a#zi7BoTsf*o@r9cH`%ruq+$v`~T6YS&YPXEtkBqr!%{%}p0JlqL!rV8+;xjop4u5jOmV z-4;(mQOxZ(P9VJzSwgl58$c^RuX6j7`yMvLDw*3(KA&ue;`7Od2sh?-fX^lyRD8zR z5NONXPPj0)GH2$dvtw>o-I?3n9?b29lqr#nDIJ`c(k($)?~Rqc(}nd>iNbnyhOmAd zRt`=P)(^(Y;kp~!r0Q7ht#?v&wEl)fs{RfuN0ys^m#QPvrGOBTszG>6hEyGdm7glW z)K8JB{Su^Vg%#`%^OH)d#tuvw>durixuY|*lRHC0c@RJv+6?obG!#$@-$+AEFyBc- z8F!k7mbx(I9A~EdQ6uq@hQ=$Ias|wH($ED?DrFr1SVH+wm4?DlRsJO~PHT`Qn}md+7d_wo09%kX=A;Y9( z9^qI5OyTe^y` zLg@j#U+J*MXoYk?TjtrbFAt`g`9Z-;&^r$5AESkKY=Yqdgz z$poJ72xbs(3*oW+QwWdtWIa`I-4yKVc6Zh*$ADh2i{ds@yVG;ePd5@INhB0B{@@KD#}B)UItS*Ig^=1 zWD63B4DQgHPGrHj*R2d93rr!hfCM7zhkM<1W4-Rfn*oqy~p%qy@TLWAOeF=nFysEQzsO;v))r-{1E|2Achbj23|W6qF`JS0W`oCBB)^w zC4vYm4RHvBRfjl?;9K`P_`_;M9DHGAA`V`#N)ZQV{goCXYn!ONuj)Og59__Ex2ks; zw11;+$~^o*tv%}v&X(S91yvtA?#&7J<59hE&#M1@fWreCM=iwRgRDD713g3p4wqFV ze1^jjAH3tOgLga(-aak@>(kqv^%=|u?|AFr9gljjJ|Cx{@!N185q=<=0E%iDn&8jJ z=>$Lc?$HDp{1i048-5}h-vvJ$jqlU~1?BiQ_@QWgvm5IZ&cO-ewYbk$TA%OG_)?Bi z7+=J(3*&S7P#zEK`S=9fXMP469|PCJ9zEZupz#Y5(D?A0*>z|_R_&KYG$9>0H&~yA zlZHQ3^@%H=SwQNe^i5Yu{d{ZL&srX2yf~6n#+&!okji9T+rOkT-FyHxOU%qC`?~YU zzOG!duQP}2Yt!C-OZGK!P0@J>?z0s*O3AVBz>#PyJxPv5Ijj2Y4w79}^*Icbhpf-J znz>s^Ws`Y-q%4>9QuWbRtW(KyOM|Lasy_9VD}rRX#ZdcNef%8rb?K$@?OhY4k>w`y z^S%SBqJNd;#zU<-sCGQ_{T52U^=7`?;s3MyW&6AeWcw8PvVF2V**=^r+uNNZ+uOBI zwzqSyZ0}X<`_F#aUJLfUS0LNV@@0EZVc&;&vb{whxDES00oi1bjR)D4eVFg--po(g zOXa8bV}470ncvzz%UMZT=>z&@<6x;N_^1IPRPSl?=>`?nA4=iZz3Q}t5y zTLR@QD8p~;_ozU&BPL(AV^N-L$AVnhj;I{jj);A-9qPTZ9b#|xfmbip2Yx`<$v&78 zwCO$jU^X28;Kx3Q^<^KV^QNe^<@K6`>+ADHyiM8FV#Tzer(`pec8bIer#a6FB^EN z4;%QPHyh;HOEpOB!v_5X<<(F=Fm?SRHmI^LbTrAo%g4hfTuDWrFeMw*A+SM@+}R+$ zTMzn#>j~(SI0(LCiP}m4C&-w;y#w8o12~(LQ@47ALI!o zm{p)OLWAstLH=-@1th4sR$kNpll0vIQ61a=Gr+z^Y+wfCa${mMDoDkmSX~?!qpJfY^u)>^ze^P2M-rEOkkx z_gdzQTWh?+v%~%k9T)SgTR0sj$4p1!kP6GC7s5radjW{r9T|dw^FwOt=p#ZwwnY*f)h(@s*i>;V>Gb0IaUOJf9uJ$Go+?rF5+z%rXhLWP47uODW;=B&=h$h^ z6}S;6^c@|7uI?l0Ox#Z)=oLmeCexWWrG$lEQmR5O1GAs_+{}K$xp~NI219Cn>+X9trQxoNRKiE-ll*P+t%zo3R}&*v5z0rk}BmAKhS>l`RZ&bcSJlY+%?R z{vE_7@kJ;$%>{;yP{ObY*isT(;R?gT`GOQ{!4(z}UHKw3OoR8?%D?`ZNqi=lnQDA7m(0v-iD@P?ugG8+ zYCMM>kh*nYRRlL zJ(-1_9QIre!+vyxVYodT-mM!9M_GBer!5R0<_yCp*ud~`-pScgCugVO3na6%l`uTU z6^6(24$ih1uAU`x%@~yk!&jrAB@W-nJ2F@5$Xx8maNPM1$DRLhY3Dyz>d4$w(}P;( zQD)frk$Gept{-L|mBx;vtVf{X5oVyRHazcH9;cJKQi;27}SAI6Q<^ z3OX9~Q=`3bWC&WkKZ`zo7Dm6Druhh?|C+XO9E_23?1C@5LdZ%O zT83^i;mXmW5|nEbM+DO;-(Xf8LNab zQFbt9@nu40r~cXCxNgR#R*_pg$xqVdIaa#dw&`;}bvFurUgSDj1`j*zP?hxvR>$Bsks zk@?8E3f0Gn@3(wB(EPxByc}R`s4a|*;gg_`3aglJe9@ixsJT+rCjlo2=97paEX*g# z*zth*WE)@BFrPeQbqw>-^Oee{m`|KOWqg0|(z%{`Gyzr*=SV5~H=s6pZSjYw>p%Bgnq#5*t z4an;yj&09j0}OQR3tr8DPF~G`E-f4ICms7UR`XHE26{L^pcStMUDB|cfw(RJ8yKZ$ z1834eXI{;~1Xl=jcZ0xwyqbZiq#9Vss~K3$s~KqNB?b<*hQLuy5EyQ*8LUjNrGYaH z$#u$Yaytl&YmR%N%uqKT+M-M^GTob}+(wO0e^+Lv)4*kQmp)Twelk7DS8nwRb}53u zRdsh-l^IEU1aK zA7#d3)15Ds8ENK7xibCX6y0bDtZzJ!smus4Pn)dF@U>J#Dl@#y->+6?@QysI`+koy z0|P-1D>JC+?pgfuqp-!L*3#KzcHMA<8s_qWM~x=38(1LYd|j`CS93Ql_Tt0abQHZ91rQ;Z4s(RiWj|5@njc?#gmy zT3-FSlghL#zEY@8@fEoh!u#Zjs>@Qs@&n1h^QoU(D$B^vY5T$N8r!mXfeyWc;CxXp%f7+sYj zj{7{!$`Qsz-`YJeKkg@mI4e!N>aHH+g1ncVwRtLjz^nT=WO2;)a}gYlcX!}tR( zF#fVDjK42~3Fy2vVT=b%SY;0rcJ_n`*L%Q(I|592ualAPbYhuYEW*L0`&=o%afE<4cFiEh7Ngg&Z$-5^^8tx2}a7mvO)(a+0w}nYd zYl)Mre^fV^w9XwSrQnW0*8isA)m4@fLMLS#9R6V`)^w8A4kqozyOg0lpQZTn$t(p$ z>Sqjd2&257;)Dq(S&H27^;wqu$pI!^GGIV|GLPS5$!&BJF49_-Z0P}$zQ<}&@||=y zSa}2{J#~diulWp{ob3XW{wISVTW1Jz(rHj`4mpdIJsuErN>9#u)1YrL z#Vxt`H4QT6k&72-P%SLj(9eEpR z@C@Xwp}}foDl&h2gw%Bwkg~W4(^UvwaFm#`v8Di|tjZx3YiRIxJ*k*UgY&Vj{o(tM zK=1_}slckbw4}lxA7dL0{wbea!dgG#W5S@4vlnQH13uIm8sg+3hPdXCGc#$(FfBRL zlZL4DS96%G*Fg%Jrn|C=K~qOUlx8q%5C?)Zhb}M?=2Hreol5=?fZCA6;4wAx+o^O%Az; zwcgW{iwYXjp(7Xmpdl~!k_%lX^#M{!klzd07PrLDpjON>=8f>BBc>cjrnd)~TO_6v zA3;O@bAq`B?qqH=XvnWvTzr_hjiVv|)a~pJmxus(HE&gZL5UMaJ%*;1W4xyjfLa4VLgpTY5p%dL8G(rZU z>Yfm~QURfxTp@Iq5<H#t4-iSX;dY>j2(RxA5q2_&h_HbO?Er|V?+=l7{U9>JON>PRNaT;g0j zDCCc}V-St}(OPSWt{(uC?fS#y2s@apwS~#`GMIw*pAykWoPzhCQr}yg@{cV{og{;) zD{WxvIcu2uW&lhZ*dL~?^nq#TykXj#elWeS7ffH|3DZyXh3U`wKuq7>VhkE-V@}vX z%rjev?JI-WMK%z7!Wv?q4S*Sa`@@VyJ}~2iH_Uj}4`$-^Gx7SFc>PSgeimLoYteq1 zjDe&nIQt~i%~ggFMw8{H)Su~Q$GV1hG=*YV4BedS3bRfamu{n*)pD5iof2l@{50#H zF$~YOGnn=?JxQg4t6;E-e#htEIbmNO$o>6%XB_#u}DOy^>nFxywz&-J(?~P0^!j zh^7=tZTf)JOibGjn8^KOZNzE1FA9&|4VW`bWsXCK7$?=jN(#U|D+JTB+X zTeFL}qJH)p$rnm*`EsIAe6!Zdo?21M$R~tE(>Y_Bm;EiAjcUs1C!7tYbH*7Swy=Da z66ORMRO?w@fMG`|%k#x4j^%k7BHCG=Q%{&P#eliqb3fU@99$&lEOUf88e5o?>jra* zy1|@s8O*sVhdDPLV9qTq%S9CIT!duJ^}^r`mg~;lSLQtAmYTdGj<=gH;qB(j5e`Y5 z^C!2Bj-lbia4aEW z=HFsmVeIroh&%2gs+E!RMu6Jg5!61@S2Tm#KQ!P!qI!Y?)S-5wIwpifgL=Lbs8@t3 zoI$-Ym<$!w+d^#1L7m^cOiAut33s0b>ch>-OlsS1>|-JBJ*m39b;fMsUVxWfv;+0G zX3q-Z9!%BdsB#Uc@3t)bh4cu#JwBbnbe(bUPUHQo zAwIwf;)CkG`!Deb3>)?zh@XCYZ4vp|qv7Nf^0QM!|1TjPKd$)Yw{~U|kB)}YwXC?^ zMT}oF`QTfKPmh^zCC2Y|hWP#UHb0S{_4W2yocPU8vTdOS4U zDj*&qH2(bUb-$5&)%BTYh`Vp>jFDpe71QFch`Tq9zumlQHt`@)i~kGp_pQbF=Pe6P zlKTO-H8)8Q1SxbT_bP}O|FgMdK5>u2gn`7p{q~yoq(>YkvxoRU&{>dpyl>cBNv&|# z@Hep{lMmRyJln~|{}Jc;7@vMmJt~YZ#!`<`uBMq695U;F;=Ea=$Ge4N?QOwwV)L+0 z`5kpG<32$$HpTY_%v*{^U2$FuW|4q-|BmwY73a^roji?wM!BM7{!;E0G(Q_1+~`Wn z-Qmya%GUeCJLt;xrpXuS%8r(q=jqBWlx*nA_f1(=LV5Po?IU3R2}uo8@fpm&@X+T^ zIwLXIc8NIuhLmWaSO)WNN%?E?q;vx)+s}U(hW=UeUt-b$asGQe{>=^+*oF>Bg$2&8 zumC@b1%pFmF5-eQTuoaXWgJ`}6pO}zvxH(*eeEcrI7S8wg6(0!oTtK8I?l-h7Oc00 z1*u9{u$$W<7wmU~1?9ZwE}hn};2RfMU=UzIvokDsq<{s#a+BgTxeY9o@py}=l}G3T zRB7E3auazx`oF3j#Dz}$c`od|Q2AP17*QziiCGY)=E!@zqYIbml*2#N1 z9+cw%Sg0qk@XIgc?syt{!NU8-r-kw!sF{8)@1dg$I}79vQBTN9iJMq5Lyv zy6A*X?!1&Px>+bk$LU2+bL38V(a&1Bb1+?OTPSy0N*CMf)-TO`MP+2Hk9l(VmCLWEAvka3+IB;@DFW#?%^G3K?G%M>)>o(5lnb?LT-ci__;uCQ$rK}yH9RYY3Nuax2Z5#`^#<0h?wxPP;T8um)Pj!)(7a4 zfjM%k^K?mYq1T@sH+)^y2wo!ly#F4@0ZPF~O@MLXrVJ6lp>pawaeN0*>hc*$iK zSn{nwnINad1#*g4Htv&Cyz<^&IeAByyg4B!Q4!Wt#3euJ}a(j6#X_MO%OL?2zyUhIEbGdgh+5qI<1tC@&U}=14$6RsgjxaJ5mhOwV z86qyd9@g_mSXvh{buKJ@H!E$msTr32+&Xs{?HyzOW`uAikLzP)o|5ZztdxvYb3(pdUn1Y0+Xt5Y(OX<@I{=ms z?hnhS`M~nc-mv_5KUfZ4u>6%LEdQ%7tmxebR!r6Jb$H$XkGC%(8T&)kcd)8;(vGpJVi%XZOa?vsppMb?JPB@Kx$B#)Sxn{ zL7pf>3eUI;oGdLvtqPS+10@j4%lA^pTDq+ox$SQkSvv9t%N?fCH zf;HR24K5)y@fT^;Vsw1HKvH|@v!EBLR|B#gRE4x zmE(0?cChBp@TTtKT6@d+YGqr3!Ldx)nvD@y;#xOrSUbW%k13n~pld%jtg%tH#fCb^ zife--49{TgjL;q`aqa4`M`K}aN$6vHaqaogW^Y*gZK&2&Tx*`5a1Peqji{R~u6N;MC6Vj%E=fAwO93{p zPU8&g(j8&lPFq-aq#LY5!Ft_Sa#)96pX(5k>tceIU5qVc7rp7a79G3jOxNADgLMzN z@VxG2Df?19!M;?LurK|&nEj;(cQyOcu}$nRT#~oWbQCVpySAHzi|uV8-Gz%SoPO~W zr(gUa(J_7;>1c&mZ*^-oCO40jL^@ja~$X#K5kK5}G5}QiNlzK&q z@j%OGlW;b*v3R!dG{EF_M|c{EUP{7K6?!QNPlIqc$-;fPR(1UV)T+uti8ZVrp_hd# z==#sKvanz2`e1ZFh4tY&S=iTf{j5S+SS?*2hjq>8l6{z73G3J9$U?i*^_z9FP;a_^ zN1-fqBwfGT9oFY-WuZ&y`h$8|Xa-$hKlZC1I9w2NnJ}y~!5=eW@`J3h`Ei27ZS0{Qc+CAa>J7xBDNYkHxcosgg}#cTQiCTwbv~>O@b_z zixWr?;0~DteML7+(GgLsH%uX-m&DX!eDo)-n{|e%L>y$PM7llQFaw#Q3W*qi_w7zM z%+Zpd4lEKw)kPA7VRUvRD6bZwZc_cgC7^fo_PG(0{Oni!B${(fHP;%)vAs_0$_y zw8pGr`*Yz{7>cX5V6n4BPPow)14 z%XhjE=B5MB$hmdmF6Fc~6am^SQVG=ry;vKnaFB|>kOmG0EB$k{l!oS~l9$ZekL` znc#VJ>S!yFN@#y*%feq8w@@SNn)+UA5MRH0xy(57+cJ^$8| zysRM!g@HR4XcGFU-YF|&caGAe(K>b~7Z)w2t;Pk6X{9V+S}6;dR>}gVEye|pX$!PW zE9SG^SMr%wRqNJ^Y27)9juyR|i`Z_9fdl7MwZ_b4>CluugQa7RZxu&nNH0YX82H)o36#pBRQ5Lv=)uyxOwA8jnwbZRr^~Iyq)7(1 zc$U^>9CMAOp*dzBOS|F*NoZb9THpam%axFn;si-L1tgu8K~j|qBt5lEFkRxo0vw=-%0&FVo37dX2|LX@@@?Hs>KDt7( zl>;P?;Ld=qXu(YWT!7?h&X61@gXCrIkevJ(Bx9&oa=sHJmoP{!uQ$#S_Iz@LWPH_s z3VX_UMwMv+_r{p$G_P9MLqdM4<;pMAvyCP<7=!Yt7j0h6sApwZsTU--M24S&EG+$Qz@4KfY0B)No}K1z{-B3x7lpW1WK9Imc^=u7MSsLgnqPiLO@F1idG(J5?RG;dNL&!MC zI*)Xss=!xHRAu=3iK>rqnbqb|!%| zEG1h^X^5RILdyMUWu}<&oL@Cg#qTfKksRK)>07{dI2(qA(1*u!&@5sc(nDs{7Y;}mL zf?B6{(9dyhKro)3bh8VB@xW$xzA@hH6VZL0xOt2cHV1Q99_tTwuz4B>jj(R#i43e; zQO`xKEv~RRp221g!$GZair>5ubz4McZYV1vuT>4Fcazrv4afG8*S?Jf-sH6x8U@K~ z4`oUAQ)r$EXl9 zkjLm6B$CHCIb9)-D;!~S9rsCnT+CfjAM3f>(Bo{hZjr}04J{*&@$vYR#|c~ve~gBi z$02kxT7(`W*4tz3^ACmOv9FFi_Qd{>CtPnwo&dH2!|*o$8~gJc@6Qv-Vem;-1iXPQ zR>5v+*rKdA#FHlsgAB+MbuhUiZt39&TYOB9`j96?w2xjinRNsG8ls~ zw)~0tw(WlTI7B9B@HIln|&t7Z=sojZ~+VghIW!eju4qxF? zFijm;OKu6LZt@4<_*Q~|5+w+z>~QZ=NF9p9OiWerCZ@*nO5c~+LFxit@%zfQeyMiv zF}G?TyARn-S-y53%9N100aHUkYQ~gayT#OsrmSAn9qNr6h3i>1ka{gba|Tku4N}o; zkcu%JseeX&w1T#24`jYmMI_jZHyU1si~9bgCPg|3i(iizo$ z`DftWiNl6^wdIVLkoJ+47urtH?+W; zW(dxZ;cC3(M+-*MjDeOKSgP43swt%xpnzmaaW)wBB; z)^fixhuyD`!B%?|mDzp0Gi-Hngstdrz7;+D7O?vXT&B3Mc7UxT`09UOl0NQ>+%A1T zfJ+wleesiJ_r36QX7@e#9_YTKRJ*?6v<-WZOT?`c>IVGA9>hr0D5JwG_FypGI@xHy zOeik1T&2Q;;(Ajz;Xzs5ARnqR^S$cU*!x3{u@H>JjuN*na)+&JumM}JQF%RK>jA@S zPsM!vQd?l_3FAv^#e9|VMWbTAm#OWUV!m7{Zsi@iW_bO%LY-9W`J+Oeh+P8HgINkS zrexUq#9-X5PzM->j#8*G3GV}i+Qaa#5el{3;Q3Aw_rdULnj)^l{H?uWUbXrAjf#1d zSnq%Ej(^|5$ijIAh_0xZmlt|so0w_O?>;ZB;lx(Oyu{m^t}5pF8(#NT%u^d)Emq8n zG=zF9=Ee4eOgF=TBZ_!W!=Wt2JhUnaig|+!;hBng=sFdlh<7qRTd9c0RWL%KZg&$i z`BwnG;GMrnjP?zZy>Rdz63xyh`qDF%wz(Y z>5b_pg)|1x>J7>oRhW`2bSRj!a}G*--Ik%-Hg z$|CjL#u1r~wcY1pf28Eb9GPIO{+>4L8QJ$XElKQzCI;-OeJtY36f7Vm6BmFueRP zi}W=|Sj2&OFNGEPKj${sy`C1N{?;RoY4;gH8#KlAw9N-4qdfUP_^yJt!2}ijs z7#9|UEI5suse+4p!L}$@*fx`IA%k&gQptjWo0NiaQf!e0cjm}~-_dPLb+X_Og|gsJ zT3LuS-L^q53z6f(BnxrErA`)tUjm-+3j6FkaoeWPVB7ZQRpGLbQiRWxg;XMxrYxk& za&it+qSwhbU0p*v6H0HPI!>uKK6{5*J&X%2OhD0bTcPn;4O5okaAC?$Cvn?3qwil# zSz-R}2PSlwzWIqMD-Gm(CPW!*&oLp!3ASD5?!bbMZo6r&TEc|5U}6v3>S|Rwru<~I zab<$9xw?S~?e#aGFrlTc;WATpneO6^-tT4t#V9Z)TrqaqF#)@w1lwA;8e`jI zYuN78oZv*Flk_Bd9^Kv_BSv8RaPBm_eXN#5tN6}gdsrcfR?zJ;btLK!x_zNNY+vFE z+t<3o_H-p|-{}I|_wYvF9btQ&Eo{d?%k3Cwxg9-uvux#% z)!hNI2Ii1RKbkdMPa==ftPm}UOe-XjlWEpe9f=%Ivk;-=2-?7UK^7_`up#eJM*vwX zP+veM`=aiEO!lm=_Yg|na$nZ0eD2Gd^<(pjZnUp>cf2F*btS^?2xLzSB_AL=-WdB0 z?c;Bz`)MB!?swjYNEA_`_(~K&jqS^a+-~qirv-k5j1bUKYALeg}9;Mkw z_)EeTWtZTUC-L_s?_u^0u1sq1aYa^xhT}=N{tN#~+9SwMM?#D*QU*Tsbi2UpXbTQrW9MArYXaC@5 zCr}h`h1@vF+xbK5bQhs=kc6V<^^Ne7Qra3-t zkTcj7a>m$0POvrPOyjv7C*|=?y)c>PEU<-~rBdR@*jvLQsY6;_!ztnGG(_WooDHpW z+6AbVaz}o=GvzNjH+z8D zF631??6eDc6(;WV=4W?K;+#9@ah{!9IM2=l{0=)WOPq}o=NlR9vc@FNu*>x`*yR&y zizRH)nQWD z)40~UfeY!z7(U9Nm8%JNUFPEps;psGjh;b;mO(LBwStcBsxL%Ah3;z9F<4D^HM_tr z%#Yo|AQl_`gby+Z;1UM-@?i-ce3$_m_jmn-9r@i6c2ir}{h1r=?%xe|kCee~^zhpq z;Q+g1bJ$I5x_h3U-9)>@O&n`Cz3J|iI(E~U?p`C|;H{GWl3UvEgi9?UR*PWwhI=FC zI=sj-lSK|ML+I`j{A?XwqC>^*)A-Rky!1se-{GZ~sc$!jmrjje9CUaoHwFCc@Di2g z8({a9y4!mlUbNR0n;l-X-ktE+;YD@bz+(u@g z;V-C`s2!dUYB*`6NtKO7M`%)&$#w^^Nkk+|(C(=pxPVx_pxUCQ3|HanPqRnv7q!P+ zKwEBJWuor5(bV2>1ufcpwU4-3R{N$yK2W$=aVsxPxL9WS>Q_27nHRKM{%c9kVvS?8!iw?|_ zMW^Y^5uN}k9NY+O*_bqzBj_1ejd=&|4U-^j^@r$KyG|6y8P#^ zXzSZVHRwZ;o8ltoZnqrWLO-wMmcLwG^U`?XNe5!oKyKl^F;|2OZ|f~z(cYG(Ets&1 z-6A82HQwrS-8bW@dsI{6a+-ycc<$Fm&)3vltq^lfE|A-5Amga3lRe}748g2`RV_NWggFT}iwn9LC{FSeLBn?PQI$>j``qvI$`v>n77T;a9a2}>A~PN zf@x5gO#$TXedzs2Fv*{FTP~O)Tju;AKxGGgBbee^;`<5ZNlgkp^@zPY{urI!^~haC zWDg&^Or;Z}9{6_BsGAc0s>*!5fzl+)*Cmvyxr9hk?+=|qh9n}^onY>4{Ni`A?L5sZ z<}uF;vU$w&0t~yxvK0Tkpa?a-bU_)9URWSO9xten%2oB2VBxrv>5(XW(^1#rCmis? z0Vo`BBx2qP1>{xo?X<$3Z?F}9G|#~I+X{cw2~x#mnrHEVyk@@DRybmE8LEion{q{9 zusjIz+8d9^ST}uB`d!xTCfBUwy{fb75_Ac5!FL6nx^DhFK^ODTYdqBinrv6HOkSicJwemdCn{uTx!fkEJ3dJFl_aV67Eivye1?0<}#C%tK$oKMq{GnV; zxhWEx$2J9EYuTnjxP-G!Uf6!N$sOCsHaX$S%{DOx`REdQZ4=E8bA|k+d&xICn!oi3 z`6l)N`6deQyF&^2r*@I+YiRyuExF#E=6}DNTt}t{w{~`t>(vkY`;qHV{w^cf)Vsg!o>Uy^<*g>x49YySRs`Iv7 z%_Y~IipVuqrs)<{s_7=4)pScbqv@7-TGK7zl%|{dq^4W!2~9UuiKbh?aZR^Dr5XiA zG`&xVruP9+_qxt%6ai;63g6Qjh1V&K!sDby;Z&wkAljXMC!+DSBN|^TqVZV}jSmow zuNu+#D)IUXyuJ*tFFLDnK(srDiqjegM7wi9v^xhxyK_LaI|oF&b3n8^2SmGbScQBE z$friWSmcXBJ`wp;$QOWozR2f=eD28SgnV-3quBmW*!~aLw=V2kC-$Qq`_YR1Fk?Rf z`&o_stVF&Fs^e2K`HfP8A?i$%UD`SOr2 z8~M_ZFA4b)kuL%H;*c*E`J#|dL_QVr1t6a<@_8Yj2l6=~A45Kh@8A=@gAe$wy6|0f z;=5|cch!pT%7X6-@Lg4DWy2LzH_#q*BRoJi+ZlAJHlW)tfbNO|=>bw=_RiADe7))3^*OS^lj+`# zdfCu6x_5J-Y{+@KH=DuUT@J7pznQ(q4X+o;{40%)Ei(UVTiAQqc>JNv|08~yvH+f@ zMi#)+)W`z7@bi@g@H92DfB^igWdVUFgh$z>!lS%0;gSBN@TmBd@Tlyx@TmO0e>(j< z_Ff>N)3WdT2hzDt_eM3*xt>JaTOEuB)V-!#d9#IU0qAWjPgB# zP#}m0?0t&iV8V9+;q-s7_utJ4cZIJ}7~cEa-3eEP?~1s!&&|S(e{MJHYri6IsvCn4 zZBZZTq7wJ9wxD+MrnqfV1$mPlvuKI9&)baAf^YojzTtJrb>vM?x^Hy7RS& z-}ri^hP-}jE_qAdKuAKNxG#dMyx){@mG|qL%`0z{*Pohf{vfXrOeB@Oet0|iU*vUX zT@&IqWE^JPR^C%?;3e>}#m%Of^4M$3jKNVHX0l3FzsTQyW_)zB;T zlipJMuDZ5))V@V3ch+%`#A-8#Mtm=oE9>@7rS_%9#m`L z>J`WtizrnZsxpu9(op|GQiA~qH3g{IBQ@DvC0~<<{UO<$)X04ENT$RqK4#s$8+^E=iYHwoRnqON zq}x~F4pv2~cmOl0k}9c^Dyfnxsp6G<$1AClDyfnxsgf$G!b+;Jk}76S$G;U^Q(skz zH0(xILG-B!u&*A|KPk3L3EB2>{B`jQ1!^7Oi!H6Q-?4wvESHb5e-duxC$fJ6Ef?(B zKZ%o%IEr7KXgv5g`$yfh$$>1azLg(K7Ts(t{5Sh2wxO_sEUGdG-(mj*pnr_`#f7$z z-`PLD{2>=O*MI*TSyXJfq9%*3JnXMyf0u<@J%NHTn9W8km}U(H$vU#&Pg;;wNERw+ z!A>n%IFc6R>dC?xv|x`xE|NvrJo8OK0rv=7)PfsUc0CF=tn9kz3I)d)6qLI`!Bws( zyDkNlUne>2D(Z}H;zf7(*u55ojF0(_=$f=6x+d+2u6ghs(KXx=6})za0@QjH{9y}v zM>o*-=?3~CGSH8agFeUs^wD~D6}LiHaVvx=Z}qcu?5Z=>$MJ32H7Vhlem>u(U5m2@ z{VF}XR;Fdw^o8tN4%H{?*tLyRpK5yApWI8d2mRJyhu=V-i+4R98n_MgU%7z3zOJF3 z-19X(zD4d?Q+-?5ph{8Sjy)xJCCkiRw9M!`8R&n)#(lz`lDm>+=5Ch6!=DD47F=P! zjKO{L7RW3dx-ey1!6I?yRBvkLHj&YD7AR z{U`W`amUvM_BZkmqV)s+AX=*(VgExuN?TJyd)$WoZ?T2{APi^_;)6Qz&!&N!*GVHqZnP)kb*)&!IPSM_z_zX}N;c<%A?cND zPM7J2VX`?L#tb{z94AAs`?5J`WESps5jS)PSGvK$G+Q{hAMbw59u8i#Tx};4s?Fc@ zBNHmk-|Z(8Dhw6J$b_<9aIi`V2XC65rjrS=CQK$VA zI0-Vr!%;l=GHUz*IQYIT)Se`L^hBlnpVLAuG6a;sh(W zv}3rj;DY;+-vk$nBOJmZZxCE6aR>@76+G{eOBoJh!KD~ybiqZB^St1aRr@+q*e|c$ zGfUX-Xn2(^>=zBM)xv&X!>cpGeqwmNM%b@1yqYBJcQ?F3o&dwEm%@I}ht`+rgf6~c zI+V*ZDpLsau)ble@<>I)sS(QJRSid9Dvy*2aH!P$?Hy%NR(++H@`$S8tf#Wr zsl9iZvM4oVVLBYT=qMhlGPszPN7C%XL$ywD=yqL0o3c34a$%mbxY&H-hO#)j-dLwB z`e>kcl*QkSEHX|?Hb5Lx;$3}bT-e`Xmo zlPtlM2#2$DB;ikbcuyf&qM(NhxQ_hrK`mJ_k{&*(Crf6~!)N($Tq>oSSc-9}htD%O zd>y0hYWZ+nDy5oOn)-}&w$uVtrEL@n&c*NU?qTxERb6ZS?d9EQ|!T&rIg zu4iXDwd{2?jtCP7Vm~8b3j!S@|@X(G8 zP&A`;`Z2O~G0#jG*n!b+%A^ z+!=~5+dy$OUzyT@uT<$>y`b1oi$NzDluK9{au+CW;$mHf2iE+6uQeIyD3KvHZGTHL zV(zMXk<{wOqNOC&%*_L7d?!QF_)dnT@tq7w<2xCW#&j6D#uRY8H882OP<5+OD8$JRA4^MOUlM*VF0hn*@%W`$6VF zlVT)C$4bfRvAnU+n#PE?cCVu`f%S3+8dYXNbx~AB!-UgC@miZgW$i&+l zwh@0v#C3pU!e?+y#j`UWLmN>I9MiOgpCV&Em>#&35juKo4=%FA|DpNX3=*L?% z&%FM75|PD4#A5}mh$0-}VGYMl-kE%ej7<2UXAk24K8%cnW0f3r)_-xE_#FxT6v`Ch zv0JS(8%b!M6C8VZTXUL>jlw+u85wg+^dO;0+Q( z@hAL6j+Df%ktlW@S28%>!wrrP;4glpBzBD?t#f-5|Ja6dY;R;}(9duj^Ho2#dsu0% z>PLnI*1E+LzYFwuw4u_03<*HZF&W|;OeVnbInHo=p$v|%c7fxY@upesaJ&F3L09PG zXAOh(#1D;_2I5yHhvQe!-HG^R8(#Myeu>6T1@TKTSUV6ubuT#ntsNYfJcRtb4DP=W zKPSVOV&cbmeS<4`eS<5c`tD+Vk9l(j`_bcXvErYx;y>J=#Ks*;l&(du5C81pG^DX05`sVTKeG53EZzS3rN@j3G z-$*Bp=o{%_10@Twj#X&N79vqPR|%+y7tmyl*#{$IbwZ?CFGMEvgpy=*Z4)BX9HC^J z1C$gL3X#Pe(KoV;Bl<>G@RdEXk|X*?R%824aztOu*@~G9BRe^wZ{$a#OR^Bf_p(A% zYK{!6AX*AMh?W8mqAPJq5TdK3&-&MLF%QKnEela{^Rg)i`fl3$I}{Di7ihlQ$BwB#Sl zr5}ZAb5ll?a6{h~R43ecPEW`PoKV!YUJ$AV89P4{s=uZudP_LIzNU6Jp(e&kJmD=> z@z(rJuyCWYmW&i?G{&p{5^D6Ody9pdqJD6~&kIh>^n?>T`@)IKec;3oy`fa*4W+(5 zP&%tWlx`g$mYzo1ojy?3tuK^~^Mo?Y&sSE^56Y^%p{&CPPO|=R(tm(>GOjnA%<2Ot zEBnI9XP$7%-V08R=m)3bz2VdjA2@ZXKb*QZKs-%)!|7pt;Iz6goX+-y(-*zq^n-qI z`X6sNGuQ{t#Px?WIRnHqmA&E2qdsufx-Xpd^MtcAz2Iy{KRA2J8_u@+z}Zjz;hfh1 z@my4IIG5N5&h70B=dRVhjk=th(#gj^(Vn zgNWz;G+I^D9v>uZ-J24mFXmo*PkZXaj!cB|N#+~u<5xmbRd>yhG6wuyTvl`v8y&QeP* zvbeX>ab>N1Jc~o4W?X44S;XRsv6RQ}5Qp%Maj7~Mhr8*xIHX46ZzL_hj_|$;T7IK| z#d+Zw)NZLMzmu9_DO)NPU<^Vnw(d5!7~mRO-r*sZzg9r`Z%$$buadFV zv|=EEicv=yyFn|0uo%v568P{dW>~)VWb$e>xyDJrzZMDjw*@okF-Fmt$r$6Rr;25v zBwDOE&CM@ zyh%J%{2xziao%xqaTS~&G^0rFK|Fu|VgFOK_Z4YVC6_i;YH2&wE^VheWN`lNt=#1_X%LqF zE0sLCd3~Cmo7bmd#@Zi+^JxtwsY0W>>Cpq>Y;GalOY&&mJbHl?(7aRx_ci1H1SET4 z8H(xN4*b1ErK@8;%`L|N@}qQb3zkdqSFWSEztRgf2o-EiFWBRi-RT9#FHmia7k@;m zym{m$nwO-fdjlNdf;*nU2!p-v@zO55^b=mtlV0#VNb?XyXs;(;5S35!5)ab7Vr|b{ zx_2hMFbJ<%EFHuls&ELsFa)87v6it2HJn2)#325#H@z?mp@u&qjoDv#l*$lE^yIb`Tt1z z?trL{?f;pD3-`M0UO*vYCmNL)4a8?5ChB4XJ9f-VlvGIGH%b!oKvP(jDr#627c2-^ zp2P-X+t`@9EbLMgRFq-?v4AEoqRA7izt7y?9}6>Q&Y5ryv;L8qv!Otg+D(k2b@=^!C zbrw7)jp2^7-lQ?iogLf&oUJ-`@IP3GNJBPfN;Yhw2aobIb3+_IA2)nU5B|nklMP?d zgEoG4ZiwXP>V^o;@oAVr4_0w5Xu~*q@S-O>sB(WA2GWBMxL|VqC3>)f%O>-tJmCzU z`u+6a3;PWZf=_VB#on}x)X$)YB>UAfr2bucNQUJk(%79If|H^{y&cDXB@M0g(3|C@ zairlAJroE-N~FO?4~@2M*-jdc&_nN)+tNt`H2YVPdjGNu5v1PFej}9BdriJM!Eh*K zGI%_PW>2CHc4(>NaDsMmM!I%!bc%NIC-l(g7VYBs^w3JPcJU;7=zEiPaWI;#UDQMm zfrU`0T^!Jx9g2q|D(zxFv{k#<3qqyZ#S$EpYZpO>;m|sX9onMMe`y!Bl-|6lU4+Ad zQM<^gV~6r>WUh8m9*#8HML0rC)Go@Ha^-7w=m307N4T>?HLtNljp@SZHpC_PoA$H4 z79fm*LF7ZX&BEws^w6VpVN3ujA&dzH@THa=_OJ+}BNb0CVRWhsJM4>ee|N*-emKSn zqYSn=lY~)e9_;X7WcGj_9&3Xj-Dp2wc6dBWiIdpj=_X-xJxU8Ny|q&q6@$#8k2UanncRfSg`43iLAztvAy6V7-CuB#_*Q;lud`GQ)VCPHse$mt7(^aCZEV z+`t#9AvfT<;|9^O!!LMjmeYq7LOQsK%rf_2g}r*S!r^>+%tA*Nj`3!NP-ZQh;l>Ih zeOciO4_5f2zzS0&R%kYnnZsz|HZz$CKK;ybwD3O`GIItk+~1QG;+1-4EWZsEo~pD3Hd51I zCN;Gwq{eCC58tGQA6-aIk%`n4@WrAg4+m}n(`@K~8Z+HtQ)cEs$uYvl~FysxF-v%E4ZRnK74pubPUN@6YBye&j znm8X^OxQ$=)^L7I!X;YtGiS*pRMMg(G%ZbJMcaH0MZ3&md^#;E(z7D|)*>2FyI|4C zUsgmTU%NzOgexmz-mIt+wXAbvMGt&gQI`ih;x4cwUJ^UfTgQ&PVG`qq(IaoCi1Bef z*^zM;F@6R;G9_J%A4iXb`52DO;s#!he8>&FBm~y{GeI;NxlUTVKi5f%S2;iNl)nQ&yM_B6<;7GMB?yZIC4BhUc-)*Oy7FaaHOhiM1nrq zSyr(_pIkx=N9qYXavR;Jk58aS9@-b5(IVL(~s9(_u8TAWi} z(xp#+M2quF|2;}?3bhFZdecZ+yw}rEe4rxdqCO>9V#N?=D=weXYcVUnWb5BopZE?f zZlBz%J1hQ^+h{%N?!}IFFJ0}bPl&%99H>u@yfPwQpB&-Ij=mn+OT&(inDfOncJ$rp zyS`>er#iMb>y7#J=*N!V-qj}=EAoHWC&g869H~!QS$4iopB!FR`-wgoeoeROlPB6f z8KyS{t0eX5`h+6vvikTEksXaHS9a(VOX}V!(I-Vtq6-a2zq|Z?l|HGc+%`pTs;`b) zr8nhOZhlQ~BHrw1eC6iT`lPh#)z9=vVDlVZH>KeZb~Nk!|NT>+G(gLa?w<1NSB9fU z%W69Gi3WT3NqS>i=^-C|Jaop6R@gL?_3^n=emKjHHrUR(>WwSaS{10ZT29!}KP$H1 z&>IPdCa86Vgp>6>c8ux`$9jdh&1T02+3S<^@efrDr%D%CR-@D7CF9u0N!{nLW5LeN z8};$<|8;DVnwiLtwXLD*--)&G4Pvulv)KF&J+_cfv*rdk zISWH#YkMeYLegp5wx&wRB!fWmX8CSIZCn$6EPqYF?)& zJN9P}!?7+d@OE5K1>RsGW;i?k)|G%{x^lTH8dm_(IK%O!sx)1IJ3IclDpA)&kAI_O z$G@*yaY8<9P$lX-D>IVm=oVE<4#^3sl$=48l3U)59Zy!};{1BB<5{X)TrS9WtCDkb zRUtd33fW<6JzmIf9mf?_Vs1d?raJjRj4C(BxZK?FMlRBJ{C|__M}`xcvJ0c+;{kPV z{Y_Ukt1@&W=?T3mL)Xk@=uY$qfehV=0jdlg#@mUvRB1VfDj^rCO2`GOl5>?g?2hbCrLekD~VkIx|#1HiYAIo;C%FssX(6y*C zbW2njy6`5Sp>PS_j)2zahFqyErjob%^iTXsUhr>%@F1QEDxb9HH3}7 zIaQ@Ks_{6)VPdeE?@y!|J7gef9;&8;AKe_N;`k%PSp}roOO2zfX2Lnox45PpxfRGc z(zm##9J$3c<;X2R&b7V;rsjUjW9R^A*>#Kgu%@S;cN_8fe$;`gUbb+ zgqhO8Se$EwAyEif)zlJ%YSj=_A@l+8mZ*RI5FdbFkQ$o^H}$b_OrMJX z5Q!R60N?b5+(j%kRE)ev$OrV~o29*B04scE%6h}ez|h|mcJjRty4-N`Q+|W0h~vkj ziWpSud&poX__C85_zkKem>;Jq0{M~bg18R|9oUP**uk4*~v#2qQ4;y;>)bE z#H{^M72;^|Vb-_#3DSXsa1(K$Nx{TX#QWQ^pIRsIL!u+km02fwGwUo*W?k&YtdSnf ziuVlbDxJalbA}jJ0)O@=@xKtieOtt^+)OcSC1}znYjw<;IraNx%$mm)`o}}-M};@ne{qX^|Q8bp{^~N)U}CP|K3Pl zYcr@T3>vyZzu(omp1KyTrLF~AsVkl!T+J!eHO)j_jalSDfQ3Bp-%K9(rIQC<>*y!V zv&I}{)@Kzvn@NvT^taXLzq%#77t5bKe~YjFse|Zy)8E!j^9(fnwviixXb-6V;Tq9< zRt=m$e8g$-p6s`r+65Nk6UB|K{PrszBMhgOrwS7qHVPA(opW{w6WYrvCkPWBn#p%k zH^ZraO^zb$R4if*aB6QjwOf(bkne)I!=>+{0d<7XAK0nFu%b(bQ>PIxP_Y~$J9XKn znL=R9@a81)U1tahHJobZ;Z+eFz6#TDmH8L4DtjCG9&P)606a~SRT#cVt4}AZ_H84p zPFlz+2hzYYa*$Omwpli^3cRg>thu(M*QWzVTDY5O#XvtoIihPS0!>zmS_I%Ix&KDqRjN`A^ntHJn~jJ)up$ z=67CKP zALr^AiiB-ZRycJ@#lCJ-H?hk22c4c){nJZ2Jxcu>r$Uxr>;8XqUN~2*K3&E2E>G9n ze(EFkF>{v~efFLIv_-yVy&=2OaZtM+&Q9N-3#-AWpF?U(J{ON&h@BCy^~j^|w>SB$ zq3`9ours~5kj0t7T6X50vVCvTcMXkxo8{tE?s@pk_}SBZ3}>bTYf%V#;mpU}Oy8M* z)%$Ogi?hPI-DPKDZ@TQG?{&<6eUjl!;$)4DoyjOK0k@_k?5(Nn%+_$Rz;Gsa%DW!y z%z+U4GdpvHt7bsC{><5_3BR#36;*+c=zCT#!sq6?ob zp<@dnI!1%jYDfFa#d+bOOATi_F3$dp2FKJOm$5CE$DX813!8l2q{05Mhh;cRY8QP> z$23=D*2ufN_)+4lhu!^K`D$UVE~R#-F2$LnOJTcoDed{Xl$JbQO8stKiWTu9#0wD5 z170@p%)m=U+=zG_;xULv?b4+N=j&2~@^q>2GoK1S^QrJNpX!OYgg6CW7w{hD=#pyp z=#r(~x@5Xbm(-Q7OX|qeCAH`3l3I4^l2B%n6L1E&74Ra!3jogpT^7P-gwqg?2VNZT zVt^M7yhx-)AT1nep*gz5$MkH!y}G2JJ-Va-p!x&V52#*1MRk%;oy5+3U1EElF0mz7 zm)L-KEyx@SY11X56B4aREkbGmQuC0SjnoW;(-1Zy91EVZE-_}eE-@O|k-&}sb~rG? zfMEc}*uA<$l%I(56H$I5%1=c3i6}o2<(tqQribXJ4s=snuFi;0V8kadGR1A4&RD-o zXG9&0sDlx8Frp4d)WMjsTW3r|nh|MnNQ*&QG}0oGwq%da7>@KXq#KYPjPxL+2O!-a z>Da*E(;2;Tbn&O?*};2t@m+g#@g2K$@$I{G@h$nf_=Y@Pd~L2S-nmm3UxF{^#Ft~J zq7_xdC`hfvS8mv?OKnCPDwT>#rJ_=)8AwY53?m{HBO)~huqePH0gC`E9I!CJCSrlq zrFdaHNfOB3qYDb()^I-i7iuSLhqQ+J0_HCLb)cCYeKoElY4chqCGlO!ET)?4>;MtF#{(J zI7Z;a0cR!tBPWv=VE#x808S8agF!P9aS3p`Q9IyzaMS|O>Gezyz zC7=cgs6hg1kboK_pau!3LBdJEFhCOUEfVl860!ly*rQ8G1C0?haY&CrdNk4_ksg8c zxqycOZU8(O@F2hg03QH4KhSw0T|zp=;OWBP>A>J=$KYwM$p6W`F)sAPBg5Gd+=R&6 z1L@g`Q28Yzn&{c7Q0FBh(&^cmHrE4W#D02qo-aGQ#D|^z7WPod+mqD(6VSu?jV^0o^-dkiL2$p}o?Q}%ol8PQ75GN=%f5ta08Ic%|#5ykXuHQrgsh+XvT zCHus!WW+Xl_PTw0J{hrzo^5quXWOT^?KPZzeEI#|?)4qr*|~u}?A$x0x6{e+E_>5J zGQ9l)EI|!#>Bi25*yGNU;Z}RyPh|MMNzyoWZgHjM2QnhadFTik;b*_>OGZ$8<6biS zVQC8}V_Xd9KCy*QAj6|eC$^E{W2@43km2FB*PP_78h3VXg}rVNd8^R23S}9bhXL)h z;h;8LDqS>(yyb8f|4H7;F3n9NZ)L!K4SB1To?BB^`3ZUJVd;%iWH{71SCHW_)iH_; zf0Ldw3hZ3Ug~;QYuO%0DF1zevv1SESQO@nUGJLmYMfQa+7HL)#RBr66S-~nZf;B4~ z4gE7UUqe#nk!D4W^H8Z~MQ~ZwYq;7ui#BUkPEJI5_Gh|#JPRCIg@UBS*hcbvU1 zMh|fQS|~=twLzO0?P(UHr4%umTEr-rM~vz)iBWA_J~yhxla=UotfU`j&qXnun8hf( zf7-+-ToIbYsB9ej#VGJrOGdb{lCfZ^i&0@XVT(~F4wPx^>xm6Xtwi@}_+#1Kv=GGW~Hn*x5Fl!_~m|G(_ zd(s-t*^^d?UY9K5mq2SU_;17tUT`3>s-o9ch+bPEdTqsp%u2!ve`F=gJy^-X%kTb^ z8e=ZM`wwc2gQyfMv6gN0py>n9Q`9K=GUh8XHpqvuK-@Q|(SV+%MzGF$h|`)Vdk+X9 zrMm9oH2Cykvw^mhJ20DuqtqATG@}b+pK|U0X{z@Bv`B6Vd0GTFraVn`Vm1wT$e+b& z11VcsdedE;raCd37EqbFUYw>nF`K43F`LFc1G_&9;PqqQ!yB;s^A30ec7NWfW2^{; zmcWFE`->TrRl$UZ`-?Ph#%j{tU$#>xEC8@zz(N5F2P^`xNWh{2TM1YkU}nGy0AqmRG0|iMEE}*Qz?^_J1C|C@ z9$;3$Y5_x?O&x%BS!g0np^1{2CVHA^qCeuOLn7*s7?w^G!x4`_dL&>`fT6rZBjRXd zVg{EE)%czN>NUEuX3AF@rmRrU@=gvP!BifXk@Ja_HBR5R=@{tRDn`e#1S$nPtO-<; zS0hVAkf8oagsw_YCtX?6*!wsgkk@WZEa)Wx9Gb7LaPZHxee z@`Q(Nb9vbIiGBViDmlY?YYn!t+Mr_jP;IF&gU%5z8D_|z<*ArgD)yGhpK&)-th9IK zvW@a*kxrPT|BTd)7U`U!A}bx@%SuOLOzXF0H1;OcXr19#Ybc!%+HF27T@hAT%u3f( z|1^UBoHsk>G%HPUX#S=@*Ox1)bX|5gR=T6y{+O;SoCFV&r3KCdZ_;&+ikzo(owdAt z4_ya~Hl@e-d;v4N)PYEq4=b&AVWqeD%m`6uMu<8yLe!ZNqRxyEb!LPNq^1AmX2wI* znGvG8_X|;HMu<8yLa=^(A%+BzC}%s#3vs8l`};bpK-sMs+=wnwAZvVA#M z{vHm*8RcV!(6P`_?L|lWb0MVc-qb!vX7>3NbK_}Xkxl;<4GeM?o7^7-x-k1Pe$TOg zHKkjN!M>{O-ClIWIBH)LdiEV=|9KLfX0WICV0O#Zp-bs#t7G(08W?vm?6AE5p`O`y z&D1Ys_QI+km(d`{WL&}QryWyc=&G`*e_3|*O?OL;rQrhVk>3 z&gHD|a{DSd*RQnog6!nZFy+_y76qw7A6E97$jS!#va+{%r@zLxDEYN`TfISk?OR$l z&gR`LzxFXLGxTO<(=4*@V_FvO$;uX+WM3^U`yxg5ZK7qdX4!W?E&Cx|_HCtQCS;bT zXJtFFlgPfcI#yPI{Q};Yu!G3HRxW+!iydpa>^55;l=;2nU! z@MYx^AOGcC_E0Q`gxJqwS(gthhyH9i22vlfj47I5#WJf&EYs5RAQUH-<$1F52{w(V zSY|ehWix2`j1;kK5-ksZjg>F#&B{N-1xzf%78od&MJR9V6w87XvQ8}X!EBYQit&rcEU$bKKPhriJBqda@Nn!R1k-mYeE_q1ug zragn9^X2|1mJ85?sanqwofmq;{ZT>HIzM?wuE5G|SA9m%V843*fi&2Q`>QLzP`Un^ zyp>*jJxo3uSh?w+@-|37l{Y!}H^@bWR|CG53nOYG-jRz^Z|b(u(H&vB0S3n)^#OXh z^GLp&)m{dRpjrDWKFoLT^ec}lbARQDYrT7CIFvfvJAx#c;P@ABNmnO$8}6N4Bh9^2gjSn-X9qOg z+@BI?t+{t{tu^;fPw26^cePBx@zoLQ+-Y-vB|#?Dy)ythYVMs&h{2KJ+&5I-9-Jv} z544--$w$qV8@uESv0+CpFvoV>g0-vXa#b70J|E^NP0_A4a34VqNMEhc#>SYmv2*b= zQOB*`%+V$>$0JYX;2JsF*wjAE>BfL8Ogj;ls!<8TD4(Z*#d zDfe)(SY-(ka^5FoY(mwdAFO4cDZN^+SlHYkvra zYc=hfF319C*BD_DLc1o;qFsZgu31W*5aw7jj5`0Fru_j^3m9QYGo)+R1gB`%pj~Uc zsq;sZ_J;s~YBy`woTARe4DISi)S2qeoa;a)fuO~#U0tKNT-N?TftQh{UGoso285l+ zW*c>G1`SBFl*m=u)kP-l>V3$o5LERQd5PN9+2!nGElh0ob5L(DuHE3dkvg}}UNwL@ zb5)kPAJ zvdY0&FSOqg*mu)@m+j6f-vN9);M08#m5Ut5mePK#tojb^XPv&y+fW%*vE7&U%d?O8 zFYTAnmsNh(&rrFYFQ!u=R8Uz8x*E`5;|uOoE>s{-jRm}In$DF~K5~rwuROJopAaga z^D{$LPradPLZvxdocWkm&EQ9ks&HbcDhAV3SjL&A!ZOY@6_#_$b1|P#s@hk|3Wb zD6KKdC%`MMUZ-BPqDyN!DZDO$(mLNm*d44rv-_q(wCc<23H4Z`NU3mk} z_wZro`}bkz-`2D96X4H=unrwNk84s2VNQP0VSIxk%*vl1SP_@YUFS&o72=h~D)42kC@)E*?6GonPh{x0;j$aKZkP zo-nOQO2RNuNy$W;?hYyOw{=@bN<1raJLNsh_Rau#PrMsDzk(}}oHugY$w&OSYt!>) zJ`m0q;V--3+t4>vSXv(nQ&JcH>CG;@sMvl$f3mhJ{WJYZR+aWbf6{v1bG-g!(Nvf2 ztVXssJ=CA5_hB`MxyO&1Q=G2mCj5`G+IVkPdx%q?pq%=62d6&X&Z&>LaJu7C=$Q~h zZ3(BWt>XFBJ_Nnt;*#l;7P5;aQ{CS-T)e?kFW%>=m-IaK()ZKOOk$Um3!^;f_^8Qb z1iN&;ddS;!{F1s+qv?bgKCv&|y*&0WIw71tU{085a~Z6+WR&)h^p;fhZ@j&SsJGze zxlC^vszO$l&8ya1qVW)~xAZK1eX`yXY1Ugj`HlEVgh_m2;2ZK2Pks)#uin4!Gaq*8 zk;pE+^ktX3@pHg^_5OW7zN|J_ydPY8YJqq^kV~fA58!$YmwmYbtNZ?C&_=rN$2F7g zdtt(e_a!c$avv{Omq&ZB%hPr2@=`s!{3VuNu|3jW(c29eJYstguTOgbzni!F zyRyrP-t6)QPj)%Wja@z@u*;_6Kir33g?73i)y*y#k@OFXz!K$GL{f zmEWK|ZMf3R&mdPi9j6<~R}QWs{S{Y{HeBsfxzSx*T;$N~6c-n8IfSc2%ir2BE>0`e zezZ9Ojzf_(yU@lZgg`LL@I{ED+!>dvl4_hMI9DLop+ zMJ@aaw5Y*_T{ZF>)gn9^&lVRIxv{I8IKN?0wvJuh$z3-sisRgdMI-6e{T}RUQ6F~o z4D>d{MUeWr>U1?+t*d|ilDt2!thPyZ?kl^vP2QDVk@va03#JyYw%GnNkAztH+h|CU zmemg}M=>D*_AB^Hs*-BO^dhcaULVMvWu?Q>`EfBl%kFkcHikKl<;(G~EA^fnALuxP zFbzF4%}~GAx&N#jFHNoOV)dItkG;d{bL=cuHU`-nm&?Yn&I4|8yx+7_CPV$csizWI z{So_>hqAHf)Qjz`{&eWs>4y5UY4#sjea*D;5r+Ckg^ZJp5v3baW#a%JR)5=(a!lS) zN$Ve$4jdux*jJJN8yy)}W$K_KVaKPCj*JTJ-=Ee0@4{!pX~TXKiHD$Hyoojxp{1%tg!t5Mtbqa8!Q~1Q(yD*(gH}J{UMiJe`I!d< zHubPu#ryMV6%V#t!+E6V;uoOe1R@lyh9;s9>k-456D4LLRG@|m5yD^ttOOw^7yD|s zg;2wV&vYaa&%f~)_f5peE7u-AG}qA3nNExYQ1`%aD7ZAbZYGdFZ1mkg67p#yS1AzY za7ig)4wsY?=5R?VVGfs+66QeGAy=4_VXt$N#9OqnA4pGv&cS^XB>JV0#DTQ&_40~9 zk{HO_V{G6%w6TTn*2Z>TN8>}j2Pbs!r6i$?*UJR8jmDuSn$(>(hRCcj6gJ_&VM+>0 zp-BcaO$yhs#$^&~{I(BkOz~xn+uT^=;S8FxjW*i4vBs;etnq<2Ykbw4HFfX7n)>^( zrjb1jO_MBQ5=$48VC*(29^qVsGwjI&#AK;z%6c)WlW!PFs&%@i#eBm^l(<@JBEC;j z3?J7?(WtMOv>fp;9!hLh<3WgXNft2)sxC=A@x!-dVq!ZV`H5ii28xNPYPy6p>Sk#A zM5XRBiHYzA*z~O$D?lt)4P_(5S-4`N5g|MQA{2vAltoO8OcxWw5gvf>M1%v&E4~-P zqEdvgQ2YYTLRbL)_cRHiUAP(vp}5o32%#N_^BjfHB82wgmxsCup=qd_5E`YX!~wce z4MiX{mw)q6KI?=~KI?=~KI?=~KI?=~KI?=~PaoFwBYKE@ig$#jG_K0rl*vVJK7&=S z7V*FMs-YcX!@S~AkMbYs`RMZWe( zX4hYHXV(J+c73LnUH{ac%V;cky4SzH zZcCh-;m!^s+H5M_` znJ#8p5l%&TAHrEa?9K^3*UUw*;Uby~_*^sR@;PX3;q%e#u!!dDbkUq<63xr;ALFt~ zG)EvdmX~Dq zrZigET^Hw(aB{&oJwL>7S9W1{;S}obsLD;7$XTZ=yF0C1=}*pD1$Gy5%+cg*T6OF| zau#L}?|#^W-HooyeoD@URsYzRoHdk}8pzo|i~(|v&p&dG&p&dG&p&dG&p&dG&p&d` z6Q!*3Vt12L$XO|!oH<4BuHQz^RMNYd7ILPM-rZp$X9_lvGmtX5yC?L-m+bDb6mkY; zZ_kXQcOm6|=1qFH3KT2xTMk^wOip7vIgLg5899y4tRbh{y0N=8fEVJItq@OgI)=+1 zoetx&NT&xN6>8n5(?As$LN*xgHX{~^*j%KvAq?TZ(?b!4Ow(yEgh}b^SHyX&^y}{8 zJUD&2+sP%w=M`4Q|0>SQ27g(cm*JSPS)3PKu5%UV1#(v&^8&cW+dSTMaUNc~a>aRK z<>+v6ZY1tlhI<;Ar4qwyFMPI3oU4BB@B)(&ZW6I0naDby$Z710JZt@DalY z;OEB&%-jw>VCFj16b!0x$<1(YFpteG0_{FEl#Nh^8cIXR2!@XsuExWo`PL8~13Fa{ z>|S8Se>Tb&=hJ)d+4nt>Yqa!Uh>qP0EvtAT*Yu|MmX_Wy$u-^Sz2&8?o8+2d^xo&w zrX(Bg{Y%U4r9ulr%zxOE-P_EI%FFX%_i{biy+Sv358VHIS3KCg+XB1yUy0qr^HrP1 zB<8x%HV?Cy8<`^JhFiqkfwZk>x|rLYw!!Fb+W=SA_NL6*{=r+A7tw>Yjn}ibP=2Y% zi-+JUYn#VU+-)E6uaLjQg|#J^#eA9~=5_Fexov|*%(Kz9?df9P5!#mPuzo|%Kc;QF zeOcQ+er0Ss%CEC+^|;^~+U~2Y##)utm|BsaO5cs)en-~DDa1uy$9GV9T{Q0A^14Wc zd?K%lSg7@4?cKoVk#7vB*maQ(>Ei5@_HR^pDl8+*XNy$#jwN>80Qq|RHP=e{dPn)Y ze)9FsE5mDOkFd+{4xl|IHoH%w-eXlC4Fz_0_-IH~!+B~r9@eJC3;r&w9e1(wEh*yp zT5f0dd@*f*ZV}J#qW4Lwfq0* z$?i|)J$XKuA7{@8xw8A=-t7Jdp6vc-ZtVUVUv?kbN%t)RyPqeq`}=k5{xOqS?LzOL zGmF*uMAi62)dT5$n6|C^PgX!-eFgUl~8a0Dq}YF3|0F+TJ3w1 zEFTJoDy)1c^vbX(d2eotEG;q1Qm9FmhGKt`B|noa7GiamB?+M#HAE32Yh|(1n?1M* zj*Tojec6L{jxVO6`(@F{pA*DLZoozi1~&3)lErp}V%78pgaToJT^92Y^5Y{_iVn)F3JsuxQfn9OrGuOoV1jf|#>LQqbP&@T z9*wGrm`4W{)z1Hn4uVk}PcmNJ2ga-Wz<6~Z7_aUF&b-f z^wE41nb3Q!_!7>Yd$cT5J``me__lm#7=83_o6kPE&SCe`(SEg48s0WM`mr=;yPVx& zZ`vkjw+&#A;Nbkxul?Ag`o4z8ZhNH!=N>76?UoX(yQGAod?{fUeH@r4C1mGH32>{L zkd`AQ#J|oS|HGd>o-lwt{-Qs7oYjv#t^)Z}ZdLuSA+V}$_-kZ8_Sb~IhQB`CBdrVG zEv*aMC9NAs|4PZ1*7@a0>%4NMb<$309nF!_JC(ctmUeta|4JXg{`#dq`^yH}Cw&cn zzcGOQJ+D9e`=fsB?_|UeDYyTW658?@ zoUk2I&cwY^PVgQn2h{)CE#>s4Pgd@da=btwRYx3slom$bDZU)oxmCv8Q8x1zyYi|LcZ9BFF-P;-I0i9SgMb{epaz>Whpny@t* z*dNd*>p*}eY(*2cq6u5kgsnk94FIY?Q1QvO;*)K~C) zvKw|u*=RyWzLagvld_9)rR;*8Qg$9tvyfy4JPmLo;NQ|GSx7?PZAWSZGMbBw!jO>x z83hA72-pF@_6N2fP`!XB0gvpJvby$2SslBjtoB_}Rug@an=fUd(OGD878;#}ubj1? zKH0rP%0i>F(CB?2FwrLmK!cJCK^6^~NYF%pW(Iw76l4aFjYSel&O*sqC^-uyXQAXQ zl$?cRu z%0%IrC_EDdHv=2}+y;VJ{DMIg2#Nqu_=Ca^6qp5>$UGC7Z+=Xl{JvA#+@2$CM&1wm zvnPM`W1TL24W0i`t_4fx0>qXeX7Xp9zYbuX$NRI+`~6s#PhUgV#60Pjm|W?X=$+Cp zkvY;Y7(>5Ir(K`!m3~2Y{(|oOC1{uQOF+K#i$75Pfa*fKqIOCfx^koq4`|oQ9nuDL z=LU4=hDzG?-EL_ETC@Qz+JF{q*hRZybEOU0AV{ZOKY+#v0?g74F(8Nr_6M{p9t1OJ zS0ZS}(XLdG1%W1Dr?kG6cCE{i);H{s)>qK31AC?QY>%|wx?5V0nY|t}dp&0Mdd%$g z$h{B*MqqzQyN;Tq^>gtXnkS`q5j*Loxjf~RcfV~pf(ZG%b_I%offt5aqb~!*33p@ zcI}eZK7jGkd}(b(2kdNMn}M1JR3lL1 zfEokTPiWU=U@rk`I8eiYI+AwP1A8FtY6by@_*yRzNFbm&Qd(!Ol!iqy4Z|U=l6KwR zA*GelQW{$NaF>*}iFW;!FQu7iS7)A-76+Ob&_shK5(FqM4W*?`qFv8H zGm<_fpur$W!yriW1GX2ir5q{s8GY)yLrQ(PS4wT)Bc-bYA=oamt=gGWPF!oe3xW=mt=gGWPF!o ze3vAAm!yt-DXBeAN@}7{|B)*t;kzW+=+m(|Qj&Ftl=LNi`rckCDRPgLG@m|Q4w^8~ zjH6FK1z8Zt0zl&r8b1(tfj|T{-6JJ-?UoWdc1elt`BEajdEzDd6q{P2bElNZa-_s! z`t)Brq{MxDrNlg7X9GK(KE0L2PcZ{cExROBL%w9H&67;dT*-t*)r3XWRERj%7!wvaQ#Rt} z9g-;(q(+d&fi4Dg(V&Y2UIg&MffoimtZ=4x=u=EvQvgu?f$9fTFQ7_5#Uhl@h4T!0 z6Ei%aMtuSg<}YPWQ+?Re&8kV}Tz;S%!kXw zJ8j@U*weixk~N7wb*7UnX#Zz@MW0ri2{xXmwSMeroy4Br>uq=@bYsu@n@ENWeP*zb z3~&Q7yi!QUF#2qYnPj{}pM|-yXR}OX6A-?zkWE0?1cXgM_{vN+jik?3_!*vkE3jt? zK(f(iP>dJV8%U4ovwfUvyQ!<2;n{&c?AZl3_N>iBEGGI~wh&8NIlv2lP4k6d8}{^P_(3 z`AL2n-slMr}6Qu^Abmshg3}W@NM(8KJ40kmvq+ycZ%GuiSDefe28_A=T;wwmZmSe)F70=J^Tt^4WA z3^Tzg@a1Md!%Mt2Y_ZXo|3MB;%nQS>7XZw^%g9*E#mKJb5^?e`%?M}0#P;M??EQ<_o1B2`MbYKd{E=YUZD~-;sYmQB`Ssl41&9{ z_@E0g$Sb}2RmI3dEK3cwAXKl0tO(&I4tg^}scI+=p_LZ#K}5Rvz<{ub@KA(%^7pj4 ze*8UcUX+Q<4N50-jTSOD7yl>X|6ITdx*1*_#eXaQpG7UnTqplfV7B44C*03O!Y3jX zY>gT+w;mz9NFmgQIy#W%&tu_3(WZ-t1*tI%gYa-Qv;?6@HH6Z`!3_u|^O_Mdqpc*I zw~T~CCpjE*G`u67%)^>EZ#n)G{P*Nb?0f@XZs&*crFVWfUxw!|;Y;%TNWMJJkE-am zmMoBJM=mCdAM$s^1zzQ=e{)tZJx)_s#;fcNTQ;-dTv=S%}_Q zP^+e+cNVC9tcB>Eh2d%ly|Ym5orUO~g=XF-3)MbZSOiW8 zS!e~cB!w)9Hj@RZ7-uij$--wE(#=8^X*EO-#-BpG;gC%s15(H$xExuug!kEEwa*r- zeYRNbv&CwkEmr$%aTK?~&`aV@PkPP8WAM-r0;{h7=&6v2ZVH*{rI1JnmcVzdk3urN z6>?Cgkn>QWzEQPkE9srrTOt4BBHg_^d=;v3QE0!)O$DT%v4=uOhzfnTa^rZ>BcO6! zvFKrN?*Ccz2%i-@J%mmzZ5=7^wc7g}l6T`<^p&^9*&CkA6(^^UnXJ$y7d~Tj;ZRsL zqrTSa*g!hJ_R^$aI=?xDys6MnVZbVc{yc@$DKveOW_t+TahY7A3qyGi&;r!q1fPLh zBlry58f+1_YBkiBE@nT~(DI%Ng}t2`G27{*&?db?A7B)U*+z_UF*}a42eV^b75X=v zpuKWaG?K5Pc@0DF4G%>#PEa&cUEYOG@5UD zv01V9tK-D2?chi%nt!_}8WVpw$!-xsG{1l~6QbG18ExKaoayGhGK4fKnmyeV&0%2L zP~;_dMRQ$w?Krt(MIFeUB77HeCmWTxXS+L%+=)R|9{VURF19<-yTtbEseZRNpKMvO_HpF;Z`c?$;(e>D8GyMoU6dgPj2AlC%5qOliR%&SDBZe zLVXn1-n{%2KVE)Hu%NgO@lagH@bXh)x+$)cc=;(n{a5bVQXcX@W8pV5pM)97YQRoTR^{5GrLQlA6 zqb4+p_XLeXPq^+uP2lQu8jV6vxH8m)M)RJa(PM3QX3*u-RdKB=y=kJ$y~^3&bh)3K z;@Z+%aedrJ5rlNQ`~!{PRoe0uT^_Ahgn{MDuhZpm_NG|6+~}nUZ+k1kIQ!LNx;)!Q z5vKSm!bdjy3c9>VR0OEr?4-+?5F-4*3yL(lDncqRDALReip+LXgw5!;y(s8pI*n}7 z2&KHB$aWNTg%=bF<+)h;F-1YQP|$yQK_B~}piW=K&5IZGageCE4dexVY!E`+-sVrv zOC%S?Z5*E!ZqxXab3}{d$K7Npme(c2G;Z)4xn!KiZJC8EnW1s}q^IKcMG9FGXC_Os zb&6Yzq`0l|RNRsT#ci91;(==fqczyACf|rV!d3tr24yRvb2a>t6F-Jzh-}+ zzGiQzPS=S2vFpjv89c-2Z5r_no?&!u**CDk?%&|$q%lk2###}F zO_Pm^I3|RArHFGxMT{tQ?Jw`kp7rxPiuhT@zvAWm_=diV==&XWK3^3g#;6WB{G0{% zwLg2#^tz>pKf;cmBL0GBq7ZT0ROb>!?3kIX3y~5;TMGc{AYQ(|uJbf@kUu?QKFVJiODs*rl6N0|O6Kil^8`ql6? z6sBLD;W)KX|AW!7=@0#y_zTfH^{X8<%M$c!3d*XJ^lPF>h<2m$MxOo$iBqml&}j3g zTzRT!_m#Gk>wg%j(N>k6chRptqS4j>_42G$_Y`gOw3K%hEmkBCSI6ub`ZZ=}L8yLB zDu;S_mOG;LKTa$k?x$Z3`yEaC)%z+p{it6Pu40C%m)K!~8JQg}8xa#Aw-9tQ<*{{si$3@!ie4~%^EK^qNW4oqS-wg5i%180|H^jw4 zJWP|`uTeaHQN!EtSMm7Itk{MSk768w#abth{$g$J`F`JtwN`t-U&Pv9H6G^_Nd3V> z3ho=V{IyQ3Rfk5cIwEUvTdBo6U5!|Sn?{X#)2Q_a3lO*KgY=3=v%SeI%=WL2+b+!RaQ=E%nB7@!?=H-a zv-SU2m`!k|7G`eJc-%+!!YnfmzQU|@jmJZ*-@+`6r6yrkH0mhK`at9HKMXiwRydY@ zVOAJKON3eDG#=0%DHLW6)95swiq6eV(e?LLbZwz#k1M^Int;+q+=F)@0Y%V=0 z%*Ihc_n}>PTbQ-4+-??TIm#-Fgjpx;jq8P3SzJ}yfIx4ws>E07djRUbw7*j3T@a#Qs0rwiI=8vSyU;4w_2|JEXSY|~)y3Lc+m^qDDw z$7+o}&qL7{Y!mw1GA7NK9UMqZgB^xLG7*P4ZXYc%pklhALHM$YLGBLCV=k*j?a z`C+=y^8<}%FO$%7lg4wnMeuXccm|t=o|PKU`6)urR*mPfZGzuB8qY7fDW0ph3BI3b zJa?oBzTGvRg%-hgq{j1Py5M8dcwRIKJ~oZ#9kbx`RO8u^BKWvyJYSfF9uG8Lf<^Ed zsPXbL2|bE6Uc=Ib9&0pSWBMpw)4M5N-)$3o=4revDMF7AG+w`gVv@$|h(#d#HD14g z!W*6*QiSf0HD1-d6t6n1;`P)dbl;}&78LrM(EXIg+siC;Z_;@8l@;#*PsMwtKExXq zMh60AlSLTRTjRagEDU_6@h;gW44SU-uG%IH{7U0}GhKKcz(+`%q3Nz>Ht-!ycP~ll z?vpMECQbKuO@eSr(|wjja2uxS9$^-QXPWNcrU-7`HQj&SCb%utbT@4gh>NECFIxp7 zZ4`)K%Kszj%LAe+*Z<%5#p5|ggaaxJBQ6Y!0&XBKpr~*_Ww1pwOVUbOZc_J_rj=AK zI4X>|FfIYPftpEbNvTQQig{-rwiyUQOJ?@e#N+x>mc`Tj98?|Gm1c|Onk zp7Wl0o_Bd(fk4JV^^j&wDCYpVp}V8a_M0)>DJH3D77hPT)R zI-3puyh5N$+3@cU3G@UT{>vbB1g#QiHybg*BCt?4Vs4GVo?s&iZ34?kltrevuuL&DvNrcLXh^es8xg2sMRQTgGE(Y z1UZ&P9jy^$5ODwG6)ehGA;>SVs6dqv*2>o?ce9mGpXmlo&#S~VMx3L=gRLp|X7kSKm7IG5&r)=b;gT=gEMNXbz zG4E86lQs~(VKLZ4adHiER~;aphCRenTSwmioyA-V<1y|C9s{M%)!E27TNOEHtsv*h z50cY4P$G}Te5bjq9;_NU3j_IVRs}hmwv(KBmBsv6K~7{{CW9GqX2zKWW}FO_Lzr<)HDHihfO@eL4K z4ia}aGajubZm4m5FLB$q6L<4&;%<7KxEuEocReJ*Q)qmF8BZZ;w2?-Pr8T1T+en^a z#&@bnBjDewAdMRw_c}>qiDNF<~nA) zjokgrh>^lKadYmV=^^R=;U+iWCcn={4#IuzVk0F;vxE2s8#xq&HOSR&TnHN(f!mtE zM#fZ;h8t|;s0z{$K-oqMY4Ed=aY#1UNCSpC8>*4UO|OsexbGw9HsD!H-%H-SjV@4z z$1I+WOvIyy_1|YnjTD8;B| zLJvwQ1C9@xq|{``x1Ca|(Xn*9l#0P|kG<0U2Fm$FLH*}^n7&=Y$hEC%# zLucesL#I?}=(t;9=omO<=2aZoxtw|ekbrdf#131 zh@rK-($ESSt&q_Q8Lg1f3K^}C(Fz%@kkJYmt&q_Q8Lg1f3K^}C(OL;!v|TIOt`)kr zLf2NbT`TI;iaNETPOYd@E9%sWI<=aBN5Ln9KN50cju?Ev_W|Drd>`)2Yes! zeZcnt-v@jj@O{Ad0pACFAMky^_W|Drd>`)2Yes!eZcnt-v@l33A|`WAKKA} zcJz@-gBRFdV0(e>1-2L1USNBH?FF_M*j`|Jf$ash7ua54d(nR0E#L>f7x-S_dx7r- zz8Cmj;Cq4Z1-=*fUf_Fy?*+aW_+H?9f!_iC4)Aw?zXSXo;O_u`2lzX{-vRy(@OOZ} z1Nl?Dge!GU&gpdB1&2M5~0fp&199URcn z0UaGp4F*R;y}?m;)Zl=Q4)k9~5oAEN1F{{E?SO2D40+JY0lgg1%K^O{#gGM^9MH)D zogC@lC#MV?@;T&l$meJSjyB-H;C06g+;-H!fy2wU8#r|0(1{0k7&z+BQGbs5bKvl` zkOSEqvN>dP)R#ji4xP9W<)H(I4jgU6iy#*|a@3!r{q5kfgU1dYJ9zA-N`oCT?a@aw4Z8vyOM-S@gLA!g8g;jSKp_puY?4=|cTosJ{#KccK3}f$apg6WC5*JAv&4 zwiDP+U^{{B1hx~{PGCEM?F6IP;P-&v1AY(qJ>d6%-vfRR_&sQU58B^@_V=LuJu#;YZs_TT zo^I&rhMsQd>4u(e=;?-@Zs_SQw;0^$e{S?YH~OC&&z~F5pBvAg8~ASEyMgZpz8m;% z;JbnE2EH5kZs5Ct?}q+v=K#D61E_ZZ^$w6z;uYVU;uZTz@k;Xv@k-Ni@k(QZc%`9Uyi#{e zyka{lUa=k#uWYFluas4YSJv(juas^Vuas;PuM}IvD@EWpgFhGiS>R6xe+u|b;5ULl z68tjwNu}6(w?gb4*dg}b+Aj9?Zxeg_EMjl)l-LXWUf}lvzZdwu!0!cqFYtST-wXU+ z;P(Q*7x=xv?*)D@@Oy#Z3;bT-_X58c_`SgI1%5B^dx75z{9fSq0KW(LJ;3h)eh=__ zfZqfB9^m%?zX$j|!0!Qm5Ab_{-vj&};P(K(2lzd}?*V=f@OyyY1N;`T(aJzxq4cuc-MgTN00KM4FF@Poh)0zU}+An=30 z4+1|3{2=gyzz+gH2>c-MgTN00KM4FF>K{b?gQ$N1_yORfe~IW{BKntz{w1P+iRfP< z`j?3QC8B?c=wBlGmx%r)qJN3#Un2UKi2fy_e~IW{BKntz{w1P+iRfP<`j?3QC8B?c z=wBlGmx%r)qJN12;KME!VHb|zmi zu?V|Z>;yi10uere2%kWNPawi45aAPu@Ciit1R{I_5k7$kpFo69Ai^gQ;S-3R!0!Y; z`~tB9`0xot_yi(+0DRim(es z*o7kOLJ@YM2)j^(T`0mX6k!*NunR@lg(B=iu><%W!0!NlJMdu}im(ku*oGo(LlL&2 z2-{GEZ79Mv6k!{Runom_;I{+69r*3QZwG!m@Y{h8A4G%?BDMn`zKRH6MTD;+!dDUD ztBCMb#CG7rKM~=di11IucHqOV6k%72uq#E_l_Km)5q6~ryHbQ*DZ;K4VONTRoxjR>DcYz2NR@Zr~p z@M}c)H6r{P5q^ybzea>#Bf_r{;n#@pYee`pVk_`lf!_-JR^Yb+-w%90@cqE|1K$sP zKk)s)hs`U(<`rS{im-V_Kk)s)_X8icujmIp>{=0atq8kTgk3Abt`%X|im+=%*tH_; zS`l`w2)kB@ZnR5@To-jR3dyT5k8d&pGt&JCBml?;Zuq5sYLix zB77>*S9e78l^+#-)?=ct8g$5q-z36s65%(A@S8**-Y-79UwkrTDi*TGOrZ1Lb z)QP^7;(GoA`cke_+DBh9JH&74%bAXmuhEyv9pn%CQoTbepfA-q7ADe{0_~gcp)dKI zH#6wVYhArh(U&peI-S1M?-=wjeL2OE@E7{>VvK6gml2mKlD@nOn*!f~1 zm1y~u16oey4|=*^PhZu)gO2Q-Qe~r7va;N9|4gZJxn9PnQ|*J*Q9b%~vwqzat&X~` zUzh3EYxH!fo?fM=i}ZAXp3c?Nn9;O8!AAXTk*u@XDC|SEu4ALH`_#IWjmGX%tI9?X z(xM(&?_yDiWF3g&qj6T}=n>I;bR6!SWNp@>A6n0Av5YDswe9kidT5GDvQt=6x>s0I zf`tjfl9&pisJccd!o)=p4kal1wV+xlM5EBRR||_V zHMJPs0(oUfVLAh;QlwVl7%^eV0JbL!OMcL1N|$KsErcc7(s^Ntwsc-tqAi^lmgw`V z*gTH)hntog`DjE{a!Q-bph=rDwYi2(M*o|4q>WbBmlLIp&}xsg(W@=)-B_+I?%hOu z=W=Pf*?ajoP0w+){!@8-I2-*y;h{U~=!dU|ips9VxFdh6QpgH6`h`J!^tM<&x*9j? zxck*J3b}<_^&XZeDa4K?OA2YiGA4!8xm*9H5SwGLOCeS)b5ck-mOLq>%;PLj$XYCf zQb_4wKKheUe6%0yr4(Ys+9`!ZVl9Iv zBUg&CmXh?=6LsvgeDW|KdrveU`=Ck-)Um5t_qLF~_n9wjR>%I?)%P3ewYmjLjB!^J z6R2o%cQ~ksWnE)8xml17{)vs`rufEGk(e#{Lw51F>7A=i zivv?xTz>D29dvMAlp0sqzHuM9lHwFXi1F%mX(8ziIFwIFOa_a4aQ@VPsd0~EE}i_n z*+nLjzn@2tXu8dZAklPNV8I*NJnp$*_S0nKnuVj+^0-&rPvw!`YB%X7zu-9yO0i9j zCpJj2&CajBkYeqwFP@WPeU9%!rPu)DaXW|_hxz*+awS*0S21@AAKK63&iBlfh*8q= zjkol>s+Wl=)zZcSyb8h(S@9|eKZM18aa`hY9nn1QbG!<|5B(oCSEpuy`W>kM0QC=@ zN<3TEP{XYP@)$P_xiWDFH3T~Ep)~3cGex+`eKcZu7&k=^;ihpaP2;AiQQS0lFgHCA z!cBj2ew{&|G&+L2=#yqthCW#u%}p=3uC~%A>%zHd6K7NCljyiV(4*T@6f&LJfqZe)3lquMXq! zSeXPhB6T%#B_6+AQzQOyT@5SQ1GoLl7@y}`RQv;Uc#=nA6JEC~}KJA@r$kpDz zh63atMgCdjUzk7m3m)H+KW+n$_qT=-$xz$+#v{sG#ck!~%3F&)4^=6(k%8i^bjjl^ z{)#jF6n*ej7XPIife9Ym%HqG#56iachhxuC{T-OfH zjxqB10hKs;{O|efD>Xr!|EDINFg%}3QWM5{5LxSt$>aHp@^(vpOaxDupvJeU2{ZG; zM(~8ZJo%`aaKBdO?S8$?mi#aiPgrr4{avYAyIrX&-Jw`le-SmG&aqvMucD*Ot$Wug zRb>kt^VEc=a2yR!ICEIBt_kG{A2`@VI^tHOn&3ciSDw)BC9U*663r9(5JZzE*hHT2 zOB7G|kEA9J_R#&xCudkx_#z|U!-h8ZrCnm3m{ufVNA6#0YT*`5cx=8Q6 z>-w^gK5}RAqT4+2wSvELH4%$06I*bO5KnZ6^TaOgWar5^sc=7?T$;y@@x-s9c;d~D z@Bw=7>FeTK^j>Ubja|VKZwDWUlT#!2%CT!56DsA{)hFdt8N;1&EVk~&VyR@}zxh|U zBbFo`lZ`7_lA%gAo?%I`4YIL;s7YgMb~i(1mhLKg7PeSB}YJNv|J}V-~Tb!@K2}EFM}Q$E5MhOR}Mx zB^^H?8~UuW;SftYy-hY)SkgOSd+(S$su?_?EXlb?j-jsQ|C9}PG}(q92v71lu4c#v znIEo|4b&<}|HzVhYUF5C?u!aJ3Q~Wu$WhO-aYCp%Zmd-vafXe{IUq-sIB&cwM-}gp zqYBx$d;l+E;|ll6hS_Y~eU8BkWkdQ7*^t1-J>accDH~#{KuZ&e8d@Gw>sc_{X>4t<7=-{yRzOAc#1Acq}d z<9AleVYWSTn1zk6J}HM`^JCbvZ2ar>a#)F3ct9P0IEs%yjxs4b)=xQxH3e{;ge z|GGz(S7|E6-s11Bl;r`w{2y3o&XU;BlgNzI+Gj&rpUI+ zTe5UHiYJf3L*GAwCr@x}l4J>6Mx_LnoJG{+Ij+w?ltl{Xa|vbc^V-+xTT zGNt6DPI;Ux-Z~(QU$Eqds%0$bNnWv67901-;(nI=_zqdLu;k~or%!-+p1i(eU9z0C z%5n5wdC>1H8T0y1dC+aGCdv4ROg`JKE|Uk9_)dQ-4_a=O2Zgd^=QcS6@ByCrj6BF( zEf0cXpYN0h;f5xEyH^f5!IFQ|>K@X{QU+DaAs?}nAz-Ps%ItlX649D@PEO1{Acwrl zQesei4dE%H4#?~fOG$S8`CggrWGNG!nX6?M%TlK9kg?J!B@^`7EM@KTc=5*O~gQdQe|H=%W z`mSF51HE{G_tPbEf~g>i@zhU6p4z%#@Y6i?GjQKv{v z(!$;M^3>m4*B+PSeS`Rf5FI51p=dPW6QVSR38O(s(g~q#!nDN?y}&2T(#Z=n>74Qf#*fqqD;wm^<;UdBWk=-AYfj0VOW%|?mzM)A^i9}?z6smVH(?w4 zCTv6BWI`G2f!~BZ@S9|m!Gpi?&MA50z?<^MTPNg={m12veGT%)V7-R=% zE8ln?Wg1V)8yirj4s;vxt;pYk{4(&a1+5gc67Uy;UIc#NZv_5E;BN%}M&NHWfo=pn z5_I5iB&X!p?w*ui!>0DvZq>`L^&gX8>t+-Fd{lnTcSL@z1?1*8<=2`}r16COS_6s! z{xutlSy5(7>%mlVGZ&kx_=GjPNrInEcqKn1ici?mvF>MbGo^yW4`&nhAp?GDvM{aG zk&N$Jci=a26JaisfAFkmCpRPgho=cs%lN1zBtFpo>J)-#z1TOB+^FaHd_`MGLS}y0 z-}r=g^!(<6h$-rX5BCt$7i>aTH8K6fCS2`&Hkynt-AnMuPWb8|iN`Z^^8ks*GxlR8 ziH`#?Nhux0C)`E^FFsM!Mek=5v31tepz9_L;S&wc^;-#++E1L}yEvF6y~-wLw{82F zB$nnszDk{_qU`T%;u6m%XUMoRtU@FSl^%O5Nxo}tO6L=`us`a=zgS2-_{tonA`-uX zO)Re#redFN{MsrKe>sv*tZ3gLlW`>$V(MlS_v>oF&xfcDV4T1v*6GTe#^OI>I>aWv zsh6k?;S+^-FrO?6ZKfX$s+84?ve)!q=1tk8wUiU2LUoMh_P&U#1I!U;r-I9rI zD13%Z{L0;0P7;u~X(RDF*+eW>GkwG+{xU?J_#X`?em0vVdM`aJOzR62l?hXQ`d!NO zZk2_p#h%R{3K<1^NZc1}Qdl*K`-x2&uH%*LC2=>{B;!G13T2bV9w4R=HYur+nEtP= zOGfcY6STITq-s@&8_p)(Q$gbXudT@tK52>G)($plt=`t_*rb>Bwl2}ydeTOe{hdwP ztvAgUy_E;FR-Uw9YvoDUvt)XeO}e19vS|gI^w%m9msLYdYth=b^@pSh4+)>d{f7c1 zA)x0p=z5s-$0;q6Px@TH6U7!1*Uct5g6kTNrU; zX0u6u=nWRs8_cdZSen-6lLw(2a5gzSgiju!-JQ5})WoXayAu7r1w+-z33>~r>vt@K zO`d`~hUXI8(^2>go4ioJV<*_;du=3c2%EeV*&ngV4-HW#KY^P!oK0R6#V5a{D_Ej~ zSL@mp={;$Ue&>qxJC}p89D=QK*jF=X&Go_KDGi8x@~-ZAYbXh5JE$h_(~cI|s~;`0 z*9xg{(@=-ivRL=N6-`KZo)u zBMf{Bde)T8NIoTB;ZqiOEfUC|2MRv=51;aI!KHnC$}k0LSd+WTje-8KB+=;Wpy_NTD{TEM@o${>k?%SUmzX|Uy&!-V;+Gy`5L&5_% zwBb^Cz~(967apkepL{etkjK)-cRo2WJdlZa_TfE6h-V+(lj-hE2oIKM#e&7|wvFMz z#a;Iv3-7k(lZ|THq~JsI!hJM~r)4cLj8N0&>q<0wh%MY(ucu3Og-UdVmJ^;<#MHFs z^TLBX?bTswT1EcX3wfGte&4e^?MVLTXVo-2cHf70hqAQpMF*Z#(>`02^FB|zRT%T8 zn)a{xpB?1s;=-0{o<5xL^dyzk@bvWipI)z~-_L1Ecz1E$u)S(}2~pEG8+m$_R>PoP z!(gQ*C%p!8c=|Q-meT7x#y|6^3k5!Psq5W~;awIs^)cs@_2FI5vZ+tZ z7hdF3pLT70G`uU6O|5b@&I%9ygH7G*?w%7K47xkt3J-eqN|x)DEYmBwCh)Kt9<;kd z(!+!2^>njm#rW{x>4M8Y@~Qj%Cs&6DaD?pc@W2{R%b&tK2Lg}15Z-w!icdY}$?Oa7 z3~G9G`kdGE!aJJ_L%vd{zT-c!L3-x~oBCmmbgr*TI_I-U=QbRW&Yi|J_EDXy)C=_0 zNM}o{q_fycd{*ksm@B>0%BFrYgiqzjxD~BVZ3CMgr1Biz2?6OGc2k`z!j5!x>J|Mu z1=kqv1;vOIc3_>uf0kW3tMA@9+YHxCI$N#NvA^u>-NEYAo7&Y`Ebu;iT2D0~rH$T7 zXRS!V7)7cKDZ~yqi+BNNV-_8Fg-`vZAaJD(Z`B3NB^FZQN7alcu7{3S-fwZ<_?6@iIKqA) zxz(<#H%ZQA3(2{S&z-l(4)~Nv@p73|cGvmkB&qDWtMeV@uT9$C@wzPDQ%MikNAhXk@1=*m z`D8br_N&usqi-~Cqep*;qI_%GrVPb8M8TUV7*wHr>C69)@i? zy~9F}EMn8I1MNr1`6q&cHKTfVeh<|49iT^p+v%YuG)vJzdZfIT9%-nfM*<`Gj6t>Z z^|c7A(Cuyzhym>#ZjdaW@qP0fUyABc~JqgR{(K{A;IDyRw zcl8aWwKm5{f!3a2Gh$Sd!Dpm8C&;w6Ka$UwVWo%5cGIJ1=)=FV8S3lwXxTP;IK@Gp zq=(J=U9v)dJ0{EM5j6G@JCqu5-guuL#RkY33qtvf2VGsMwAR69tUg4Kn!)^kHy5Th zSXQCXbMEeRS|{gWJbA_|sApd-MYKtJIF!xU>J$&rIx{L=3%F79ki~&`OO!!uHW^QNb3gn)1xsE5X)xlg94{TKI0=bWGSEFM{U*;KI2Ddk-=vC z2R)A-ZPIIDLoJ$saf8hyx@N@ys{`0aY~~=~{m5nxQ3tvC%m~N+L-dU;k$mPTj5@0` zlikCY&>6QlIi>6_a(9hZb{j=La~c;?}E`>|D(a$#nW% zZGd=Yjy4z_MU8ys5;yj|@4BiFKi{oYb}rWkp9lDoFO{8ZmZc@|nU6YlOUf?W9%WZ0 zCx2FUL)NYU-&CUPF8(aJUD@SzU42~Hoy*x@mEBp8P_$jyoz|W6fU?`PTiJEF{pD54 zZkjJ2HKlQcb%-l-kP|d+jdT1B8W-tt z?4xm#^SP%fT#cDq@7?s7I!c=AmpPBdot{4gjZy2mqR_Zx^ZPw~=5g$P!0UBE z`$y_bKOz;8tPb~ZSGF+DyQ*c%^cb( z-=T~85RgF4G+_pkE*t1v^(%dl_Kl694@S7dPtpgQzY5<)A8c8&@C%+fsdMz-XvhyZ zn2v_@JIN#((t^2T8lue|(-3X$n1*O`$26o~^CkrSWDlX%?f;E@7wK_NAm2&zO@HH= z(=Fu7^LP!DYuKpsWecB}N3QiFbzM*OA$1l1dE^?lw|t4VMY**|Eykxo%m2~}`+$78 zx`upNP(i-bW4d3{bRgF>9mq9J2Xajh{dP^8zb4mkhEE~6R*L^na&494vn%A9(P^DZ zu0^=MKu%GFnmNbgmdG_T_6n10Ssuq<$+dLnSN|i|FvA8}ajoGW(`Dsti3{nnYOnYh zUDnk3%wD=I5L~u|jwotPsHY<`Smwe7q({xXSD&k@)2FUZ`{QNhJnr99LwF{(foE=X zni7=OYAw4d7+e;vG+kZDBYEa76?3eawQv>0f1LJ}AEpsYS>|!=^uK>JyZ<^({D)mT z5AfHtdpB(2nWyKINj%e|o&NW0&UHbQiu({ennYjsOg1SWn6=Xff2G<$8)}x$}CU=>aYN&G75@B+llU zKk1dP%OCq5&-_iV_GL}Y+ja10#M@gm9d9>kIu2Oft&fNU)%m25XN7ohis3+Q>&V68 zK$F0;(yw5h<3RAL$s!K)%mV?5M=aJ3PyblXrMl$%51W;aO$#zr3qv z*&=z?M>vB=&GO(ZAD;C!&hk;Se$%Do>QZn}Hl*b0Qd;zUv!3tM^UZp`SI;lj^ZWGt zVm<$=`JDxPma=eqhdOJ#UZzwpb4xE%s+ZA%K+s3);9Ap1>$NWNXrtCW9zCBIGM&$w zhq!5cR#6bAi!H5gt-htabJsx-a4BX!`wO4-m$p2SM!=#;YCJ@f8s4Bub#*k!R!ftt zH8iRGb(&OmkS47?K$A-M)1;DpG^yA|lZvWol6fyp%H2bgvUby?^j$P5rHUq*cG4uH zl_o`2(j>WpCea-<@$Pn-IIxW--m=id>vf7X{SC#Mbx5)1V$#xVQ7X%~DV5gkN~LXw zQdw7_R5ny9m5o-VvT3JM*<7Vm+IK0HzTHY?aF0^iw^ym`uU0B=*_6tGeM;rs{fdLqMe4M)JK>J6@zjkJ0=n{~u(-$AP%X0uM{`S@!Bwc1(sE3KZuW;MD$e2rE=&St%< z=hy4sr=9LsXmuW&b97|LgT>hZoo_YqB3 zTZ+hM1r2=GwP-%;dw1KX)K=&Ae@ktrwb!OC%UNQlwld$lJE`pl2YZg%avfOeW2<-2 z2dJ&cIjDl#);L2xqqY)9%2sMy-TrDewKY3JqNpw4d2s`^`J(u&UmYoVbYD!6uBH#! zIx_arhiYBpev>|Hc7}Z`ef9>M^Jl zXfbg-G&Ho>tfy17a?9^HzFQi);sl%B>G`xj^xYHjH9zHOn)1Nbe&?;89T<99KB zQ}7#y-$wk_%AY2D= zeYMp#Amo*{?tNRx+fpIqrFs84Ovqd796CYB+i?HNUwQUJT?>6eUbXMSKc#T1_tSVu zsdkloD23OxzZ5Gek*`b2Doz$l3KT1K-aIBLX?0R~(1T-570KDXPEs-*#tl+<-#$sn z@ve80LYsTNn-o@RV;$Mghw$uIwSZOTel3huK`6_v=*c=w3NV(IUFH46FD$h8joB(J zYzRC&j4Z0GBZXx(q!3$q3v(^RJa9-@c!!&Z5_4Y#F$Zj<5Wz4D)9`D?Z;HEf3Mo8& zfE32E?3z7f(J+>M2;9S2_A!vpLkNKopU2rm76p*9Yw-ZhSO%DV0#}XNm{9h)aGw1U zuBzeF6SEbmax@+(`~fL+mx-kCj_=%3vZzJNHJ2i{1e3#L(H;F7L(t~M5j@+4-(0P9 z!GLB1;R(@7XCXIzu$tYeU!~y6gxh#cyDET%RnU*{DrC`h{kjj=T`c?iAw2tUNNU4S zWKmyh!Zfnz2ju;Rywf$L0GpT#%8^b-%M8?!MW-vsqB^aG79pT&!BRE{3r!cH<^?$x zQrM?SUgX8-30YL8jlnKj8^z}c5}y+p&F75PjEhBC+L-O4wD}Wv@Hr_9CjY|c%+rD% z#Fa$yIm`Hn)e>e(4JlGw*-k00-0}PeQk>QC-V7aiSBh(Nq}ipo zCdUh2DX!VEzetL+i+oP0W5{+XE;ve^W783i=m_s?2&M)g_%sAllZIew-YJ>v8iL8E zA((gSAd;P$CSWkr=#!a>0&*7+qrjL zc&*8`t~I18xJN@=3KDSunbDxUlbJuDwUcY{5r+B>jo2*}BgwhwGr(~q?kKnzlja7PsS~)M`Or%NP zEgIcB0NSm=>Re2_;Hn>2eK-+K@}AePZMd$~Q}`G0;tho|rAV!+k-QkO^u{19IT4C4 zqS1kqPsGcpAI-MmnW5R3S(sac`kg@i-bMXrb`hQ#rE-;K0?h8${#IvS)l7idLG3Sh zwpaTTo^99ukY``k%4DZ&WwMjCGTA1rOm>V`COc9qlPziEp39rGanI$=9qT`*%k9CX zAJXMsU&cAbyIB8*mp75m{YYi?>ReCf3Wvh$+A6Op_N((FXYsk+MYKqr`?ZsvposW0 z_b1mEi|LyB&tn6M^K$Ff52<2o-8zGY*ShF&x;(2p?Oeqi6E9HI1mGU3(+kopj^#UdS1EXJdX#dJ)uq|_^x^ajO}1zIj>1)vpyRt#DRXr-X7 z1+5IUEudLJvw>CzS_5dPg9UZ4pbi$)*@8M-P-jaYX#Jqw0_`?vsNXi!ZyV~j4fWfG z`fWq~wk0DUb=!uzZA0C*p>Eqyw{0aylx=DFpq;m&owuQ#x1pW4p`EwcK(FGNw$>vQ@tNlS?qq+Vkg$&oe~xc?q45pP~=_FoMsUqK$prA8>WVQ!0C>UZxK%zNR!&a@SS7pT0mM`MkMb z4mZ+!PWyMz3fe>uT^51Uh55YuI#=CChfpnW>%50?I@7$&RY`W3Z1zU}m6n5Ldovb>92>6}oea-kn>@ zk=mf=p*y#%(o;ZhSzIHvn5(3gETj>}xCNcOB_@Q=``Ak!3%luH^L);t55jIXhx2)z zSfLz-kf!WJ*v$sl*r8!JZLNC`gx#$4j6W21<220Ru$x%vay#s1jH@pz>;{;>z?g8@ z4R2l84dP(8!up#JhV`G{7uJ7*&FgahN+yTYzFa%phbby)24{)d6-D%%-Za zZ&u^C5x*NCp-fl(hrMAp(yPO6Bzs~R8C3dJ_+sUg9HQpT)W%>QmIEOl(1$ZWA5~A& zd|wY8ubj2|=S`=N-PX>8iUfqY^*}MD%?mmYsyX*}FSta97`;mfWgA-eU8Stp8H4ze zr(H2gl)le$R&=iZgg&&oV_6-&H`(7Xjb`}#M>=SR-G4--8O{EN0-Dj}KXRC6H2Uk8 z(~JiH8?Vs}%)b<3uBLD`J|$H`;bJW)TVaYtFk@IhO(--XRUMpnQ~9zuAbdchS3&!d zZhC@f7+DpV-(Q z#gs2IRkDKToNA+AC|{PgzP?$xR_8wYPvsio1LnNvKfa!hroM+}&{22&CmyDwZhPMQ znJ&5&D12G@vLK%x;5p9j+<((Gi`#azEAN)(v#)th_g8~Um9JttC_3V;HY_8%uP=n> ze2p;(y0$?3z9(JaIX`uotaM8=#v|w!AI2c)7OViv`E5Qu$a6#TL&vGPO8chSG>jI| zH`LtN*6066!*BU3zgFyl!1Bix9*Ot6!qXPeB|LXLXVaC>Dm61dHzS(oKHoDtlZKnT z@BEd9Ww6{=L)6@I?e8FbKg-?iU&3j@YW;tqNXuM#8uJTe(`B5=M?;@wxivVVkA}X& za*sIXKTAWOV7YKuAETl1EcY#c{pWOOG9s|kp%FOan})*Y50$(hZxXy~aQD329vuq)Y2o`wy>_)?rjak=Cth9sVC_ zU4t_$Tw1r>7b8gP>YOpdq;-q$-SjcfZED|SA{U8`d{Ec%;xKaQuB+!Pxzt!qE|yo2 zi)9WWj9e_(Nj^XoJ&xgoe2{Wwax%G8?D0KHE-lW>+^*)fx4v#CAK(S`DfvLwqJ3Wa ziRFIgaZMqYvf8%YC6}Z{lPB}sYr$og$t5g5$o<{%G$S7v-5qzy2N8%1Lq4Fsx5LRL zcpNX1i}>C{E@rmY)sRaA3tB%`RiWdB2g#+fZuN6=G0p#`L@t>f38ToxVn@`U$VD6u z=_40&L=LwIvM#o``XHsx-Pu4c23ij;A{V^}$VKe)Ne*FZY_%{2^PN)?m^!vfNXcO8 zc#Dww1XCy2gh@YPX+VuI@k6H0L=n(tqevc87of;GrXrl%q~Do(ABy;yT8tu~J%S=d zpqJZ($)QXw0TD4e)u%vR%GAxEmVjylHI}IsP=6T2)e3d&Ij%NT2x*Y;QI(LIZV@J| zMVVijikNAWE)V0X15~hF2Nj)j0)*W}fiIZ)0}9kb$j>N{!SV*90Q&KSQc! z!SZrIjYD=mO0=@P$3gW0kAe!2KY_X!l#f{6GoUnsZvd26S>Ae3azJsgybYk(LAeWx zh2^~lO2J^Bw`C~Ld&4TE0ptQI10DUbJg);4$^t+c3jfaXwCc7%%oi;0XJ~O7qWVCM z1oaTh9|GzsP=8|i;X`=7R^?lunm|3n@+X422GrkKKK5s&+yx~al#f{cG*IBmriHNl zOyFS(dO{W`4wjz{N+l@6S$+;Ek)W7C>1O%)pww0iDaqAB>SCKPp^D`fR11@?Rtpo) zJHL5XNUOufH6g7Wconya1e_}6Op_(zrZ z`!VyQd|cXf??cMR-i|eolJBjp2W!dq5cE=8rHg!D)HWiQd|#ls)ZeFTF7@|u{x^q_ z?{oC;O#jjE$oD=E%OT$du4Cc$_r!m)ntbQg#+CD5!?^MqdL40wdHx=3c~bNLtqo(( ze-z`xYiI!P)-~)F-n(n4xt$=y+z&AK*HF!!SwrE@%wH|>`Rk+kd~L7g8mhT7Yv>jW z`3R#$Y2>41U!$8`Mr5ta`t{}HL_U940-t|6p3e`M`20I@d_mk;zMx1E3`e$M4tYDgrtlZBE zGPSLoD;pztK{leLl9fTtyIL@Kv_OhqFZ=n`_>ZJNBnx?Dpo%<-Ga?>cg@Z6+J`l%h4jPTr~OA-E~ zZ%Y^LHPQu`nipVdUXV2N_hOMnx(M_4qHg$J)D7Q@FnllShVKO!z87HlUeFES3%cQZ zK{tFa=!WkF7`_)^_!b~yUqHG5!}kIV--6rv6%5}Cy5V~PhVKR4@Vx-T_kwQtUV!0y z0fz4d7`qo@G-Lf@lV+@6gqM4CqaR^k@xDdbEZnJqXGBZ_w zh;*-ZcYR8_ZEp8v(v1hU8$k)X%PgcjriOGcW#(D9%tUsYg>=O-^V~f47&ogJk|140 zTh(%NvGwAM9{R$D!3wH_m_TQuRVvW56-U03G`58ka7 z9=y6&ST;~CEW33`Sk@1L!ZQ3_EUUyPSRpKXgPA`>vc)DWE7K-Vmz8qE$HKB=PQAi1 zGxoy?%d&7*v#=~hiyXSl$Z3eMOoq0faPqA1Kwktmd!hGLtb!39sDs|$;7n)XfvhlY z#(L4;g$Fi3@>*STi7pu(7LxH0Lvp$<*`!O3)Fo3${--WE2+4m$^Fo~N0m(Lj7slw4 z)5CaSye@f7m-$=ey{4{(QObMTy{AKqwV{upix1Hurag2>q?Hbl^fJwQ8O%nQ>9o7r z@YOU-Cr?z~)h3~pchfPEth}3oNoD2TxJ4lqyfF3au-E9ymg}M4)0N)q;`ek;%=Hit zU3pdid&za?SScxRU9pilM(^d3B)iOcbvDUHw8J^nbKwh;U59PkB>M?gIKwf>MY3B$ zcwx5dvv`ud+|?IHvT?jZVS)3T&q($by`T?&ha?+|GYTJo2+7(1KaySQz3@57PImT( zknAi+w1dov&_U95j(~H*TLdFL;G0v426;4s7e2F~?O9&)8g^g&y zi@pm($QnFgg`cVv7@Z0){AvU*{4tyt{?o%BC2O@(F(Dfk4H!`Av zqH-uO+{2+9ns9i48U==XJQzR$PYyMr$u|Zxq*ZJK1amC7?BH?k*e^Av+Tz04%fSX(R4fIYCc zJiP-UZT-4zVQX4`l5|FoF9WF_L$-&d`Yg3}^D67%zX`KitgG(I zSN=&iYXdbJwmE<=xRW6ArFO7F1(w>;kD{R>3CdDCiKzyX;6*oYHE?u5lCYU> zPM*a!rerqz@-0H$M-=4%aKbXokKY5OA z{)s|C9x-<}XV@>ri!t%yCp>A@do3!d&5vqj-fkj^Y*GIEq*J4I*CQjiY#_ z5*J6j;*JX?ULm|Kzv9T-^(#|q#mn%AdwIwpUPeFbW%$Lt+qd{TN zXn;l~H27NF;^j2{1i74meI4)l4~kIseX#GLNbZo(4|*A&A|2wTF|=Rer4G~)@lu^~ z98a*z!X#}_3g5egr`V-&7j4iR%3eG6ZYWX ziM)wv7h9Ydq2$d1-k!f%&PPe!H1UVUoAoo;7C67_CvUbJ$eSITkB(}$EDCUmXIp%& z%gaeFuEKhfHvs3PByaG6OFYSSf9w)Qj?_JHIY@Ffk6m6TM;bnTEr8_u1Qouc-Ll?^ zZ3(fx4|f3#U0gfl{?bY0NWMk*mgHvf6pvKVEwNph!6dKcUh@wmudREzk>uueb!3t} zRON$sKJ}A>j^-Cul0$*~W$aKSLQ$LuoaqQ!L_z+mWQl34&Czh2_YSZcIjD_ zLV8}6kZ!6G(wpg)Tc?Ed9=fINw2=OUZn+CD7DD=Xosc2a3mJ}Qg^Vd@g|tDW`!n6r zjdY!npdXft5Yl0`y5&B;>V@>O9{cA7I-snT;xUwz+B_+lq(5)>$rW z>(apM|6*HT*_3dIZOwFITQ3DV{m!=Dn8vm~3~a^zm(Q_nWIEe6S!UZ@w(d=5+vWzo zN7%N-;a=C+w(!rrA3FTw?v|s+*|r!yV!bVO)0w}rZDaQr9VHveKJ|S@Hcs@sB#`BI zyUh29|45hDCbGP465IBR9SgQI7ynz%bhLPP$eGSq0$$G4XzBATST;}JEN9|HI@s0h z)h=fSIoF~g~ zY-)bWLJ}tP4ug$mPm{x7-Tis5k;Q1U0?6Vi6=bn{CGpBNe}9)Oc4aQl6E6%MFLqS1 zkiQ#muIDkc$Om% zXecyKm2fmDo(Vh;o_Qvs>E?9LENHD&j;3+C=NeA;)In3^bWdka*VJ*kW&kPOI88Ji z6$EZgK-XNgOy4U9I3lJTkZOwS#mf7}l+jwVCpxi#2hE8SQ|0>^} z(%lv%e*h;Sp|kwSc5Uc<9@;*9|5&lyo`*;)_=pI~r8(Lyao;ktUVmB)%q*mohLoadXciI+Mg(B@ zA&xKN_#lo35!|8e3T-l%N#!=utis&0(7Yy)xQ6t7%lt^=VSg^=($oMdk}Gspjf&0n%^*GNGCW)dHxN zL)8S;dZ;!+wFP>u&})a@UFg}MHvqjM=#4;c40_|xo4BN!CtOy|bG)LOH|4x)p6dnG zJok&Ld71{*JfB9@Jl`hOJilhuyg=xM(a_^iO`@SDXoEiQ5Dh(r2u2!Ok2q&(=y^nI zLA0ARv=K3J+1^29T*Y}m)6n-XtL8gjQO%!qUNwIn4ehv~n(up2HQ%p6H9xRXH9w?D zH6JBqYgWxqyQG?*1?@a&=R><3swSw`L$wjAEl>rb7pfglwLx_Ns)JA+f$A7k$DumW zs9GR2sTQbd=s>e-f$JsJ0{6?R1)3|W1>WaX3w$rA7WiFMEeLE-EeL_C4ytMo@2=vCFHx9jtCe=crS+&sdl4_yz zWz|C0E2@QV=T!?e7gP&U5z2Pal=G@Z zDBDFS+eKb9^s@%lBA^!my$I+Gtrl_##eC$79FuDlnnyce##7p}Y)uDqA~ zWtEraiptCTyvob>g38P9qRI;u-U}7pO9wsBy+HQ@-3xTDLKUA3S8np35S<(URZfFlcdkCr{U7aneC1X$?hw?EZ zwbb#lYN_)T)lxKPOWiN1mTE4lmijcPmijiTmijfRmIgvE1bRB?>7keU0t;IQy#nZ! zL(c>~z?TBPv;}&t&})a@U8HA&-T?H5pf>`&G3bp$Z{m__8E)xij#pI6a7!=4ExpYB zqG}n=dl}Aq8P0nd&U+codl}Aq8P0nd&U+cods!Cr@}O4$y>jT8pjQvQM(DLb?*{a4 z)39w&wLx_NszXp6f$A7k$DxXg9(Gyf4Z1hz-k^Jf?hU#(=-!}vgYFHwH|XA=dxP!` zx;NwumOdIQiKg5C)9#-KM2y@@L-A97yh<9I>k z<9t!&gBs_98s~!==YtyOgBs_v7RS)E_@HU=(czdL$I@^NO^Xkj79TV%K4@BeP{Dn0 zd->q@^1n6yv)F}9rbaAUO)liR)cJz-9P4HFmUZ zh)FCgNegdJVf(qs!E8S*31ne~+%{#lyN&)P%yzcgQDL^D`GZA*>yVXvE4bdZxm^|J zC?T_3xFfXLWj&{!3UkWu)%;7C&ym|e-cxn|oGITE9; z!t4pN{UyP5l#y{^suvAA!)Hx|<+I7_L|uKUs6!t{yr^?m_OUjRZc3>bxu#N#)R@G` zSr{=8MH@y8M8_$Z86=9EX;=dvuNJFl*d+{$i;j_e$XYbguxln!)Y35Ylf;W+G!47O z$Gb(`-S~)JA&Ox%><%CG7SGTyMpzhTuuc)hYc%Xr>^xF--lJhZaH6B25CfF-E0qvO zB_vo0sk=YDS9I*?J#s^I9K0u_3K%{P`x~zCKl#KxFWl7s$2(ZPn(21R`&hkN{inUa zwu`Wh5j>{R?bEmEW7ziDL7}&`+ZP7MXxaAVUpfC#@YH~NlmBV0~Fa;kv+KVX{6gLHf~4Cm4WLYvh7X$ zkZ1np`Zl)x*5(uY+4jy&oIALQvz`uwtrNU-J;#d#FX#K^LxShCfw1L*S4hu^KEccJ ze(7bw^J)L`je@81y{0}v_p{}@1y4UEZu@#v!X4rUvRJ!g`e)0vf~UYKnhqu0O$lGzmwip}4D800 z(b(?cDrw64a{L9&c(-$)plQ7~b*iArv`*V4%$hK({wmB$P~>q%PAwOXr((@d;drL9 z%`F$o-7AIT0r>giSS9u~kUj9LYmx_V(;aJ?<)JkV^56=(;}wKd(;ZtI<-stzBNPWj z{4%kdM0dnObY*0&JOpdLp)&jeq5hEWNQIaSoea8TH$-=YHqjji@Q=|QhY)gg{gq(0 zW`&@uf8uUNLcM>r4I!Rj_>H}ZhJ{sF>7mPAKUS;YxN>wjdNel6=99g z+4DeHt+5Ui`b68 zTXLd>RTE!$L6$YbGiyEkMB zOZ1(+yX1>E)?rFd_#d5Fxc~Z@GqmBs5(|%U)`r7~#7$TUuX|r=!}kuo=p(EcnyL-2 za$w=tKJ&52?}R|7{eGEFSW?j0A;@o|xv^-&tsCr1S@=M}rtRABuln6~3a_^E#^}|S z{#Po6R~w22$Fy^TqhF06ju-{8yh0EY`e&>ZUJ5uPtZCO`$=vXNbf35^zuRD5-$))k zrs4l*ee^223cLUG9(g>vp>GL`pn*3JY9poxy#G%Y;kKb~g*IZb<)lA(TyOni6M1a% zf7{F=)^r`NA&;>jb2oXcw|+4|9!J`-h)oVGA|7M%ux zkRPh>O-wo53@B>hLlx31<=T(Gklc zo^lhiNN+9qJBv)w3JEOoG6o>EkzcWXV!ujLYQG9*C9C|{6leQYZKv&54k_X8mG-MD z&EH?QU!7$cZnj^YTWY^jqa1;woz++vaiyNoVf&S-2!~P4%KX#ztNm~!*nBv`ex=E5 zY_VTC&NWtf^ORQ(E3z+(ZM0tMh5xDnV0xC0x$8e z$L^g^AYZrRaKNSj1Jf;l)N$`@4fz_#cgfeO=w=~7H>ob(K!VVryhWLvu%(>`Z@Gbe z1Jz-u!Ix|u;XzwR5u{8=*!oOSNyEO<)4W9o36n6l`0;-2Ev`HdK^QCcCPCebCaUok zOheFRD0}FU-hycex_x{vXbkXSMH+(CuSgw`+G()|JPzam^+Ek9qr9u zMYav|=^eV`c1%~#r*~{orgsF1wPdT_NVfWc@wA3)okMk(Pmpa>sjdY_uoM~=s>NNn zO&LhqrVJ!)QwEZ@DFaE{l!2sesjv$n+nTBF9T%p%i6vUdwzfG;_c6a1Sipevhd64n zGC6E2`f+Og-j@We9o2nuL0GG%x-TvYYp2mD`vzg{JQ}sQQCPcASy3O}GxHEI@h##tM6Ma81tb7fH&L$-*8 zgMl#yZ4|ERW3iyoAQoiedWi)gkOCCR57HX^eE1MjfwIJNKEMCPd_GPf7HDwHeUdhc zUvaU(6?@Ju+98c`umS0yZ=9`RSegR}}UKUaRLJAmI z$`3~9TCP`6z=ygD$`M{^5)0?iXlloz#Zs}*nXlL$J-t>ectWG+8pVQ7X!OFq*O!U~ ze13>%;PXR7L)Huy?J2V8RW2-Ai((KBe13>%@V2}@O)N}@x4VL=frP4<8(k~>#Gyf;IS#(18xnYT$b4F)dKff#$ zPC>4YIBTQJ%y!cx!+`Bts#JhW^f###L;Jg>(kw<^kxDTy_$#SYj|(W3>dcxqq|y+S zx>OoaDHXLV)Vd9Q*I9JUh90EXXbq~C3TAco3sOP-y*-nq{8a1bDyi7fGICTZ>bE}9 zO2vXvD#GpafmAeZ5vru3QS6WKBSnK<-PMu-L;Gf_aM;!|BIU!9EBYP${%m<*l=9cE zfAlSj?yz((mh$Vm?|4e3KGp}XNJUjN`jZW?jEo+#um-6J76#EjeztP6RATya+DfUU zd|+FoRFZEy_l;CUFe{Kn|3^96svK=mjy5P4YK&hr!?5M+lTu-QXTPhIA4+5FtdC|( zCBC-S^HTBf#@Q+sm)~VE?v|9lO2uvFBrmDZM=6i~jW%Z%#<$nIS{C2SI*4y&idTZ}7`RxEjXI#;FwpI8S zi+TU?OuZ2JG}`47i@~c?w>GA?Ps|e*`}DpxDlDF5``_EbYfqK!u(BOhwuAQzR|>BU zaJ%8x+8!hi2%%#=by^{G2*WShm~XmVT!hf_^(0Cg^CN$3#{9=o8|(Oqi%AHohyArS zc2=P935%VtSX=A4nPB(JG}e=wV8?nZ#@8mr_`174n#rtVV28ILPjJ}eW*YknXG;*) zy0X{}JvDQMNH`*{5~AG{bM8Eta|??zVH++i4peNzBZ)RPRIv^3P;A4!v7gROv||q| z=GuD2P+R0?+_7bf8Fv5-yM@I%#mIY@8-G7PrWk*ZD8}D|ib?l~V$waJm~;DK!nlwh ztQd7`>{)D$V$|KL7dx#!WiKiFyG=JN|88EBlhV#=MMn1+uirr~$F zIe6SliaB_tVkRy?S!QlLN~A_H`d+J;d*4+i*qf{lm|*X=X~jF*xDCUOUBdRrja@5Q z+>WrdbF^{#VFw3cdl{Nq@@G@T=3^h2Zi89Zb=c>JfFqAJCDVgwcmTNxNn|galc!>`?Kgh z^amF2;Kt%z-C4ZvTx~qw>`g2_E%53+7Qb&()1Wr~C?99Lm&RX{?MJ#0Gn_Y9#eCjjuHjn;nhEoK2gvk=R78 z*o`}~pLFXDq&pI;o|0~UF-W&N_GjTY#7EUJ{f&opSCZ~vMXJYy9@-|SzC3-)d&jC4QN@H?@kfp0%{Rzd>S z(KlFp#|BrbjqhDY8(91|k7sQcB8C+mM@46?BD*Sb@CGWg_#XpY&uSAIdvhbmkK?_E zlE{yPaGoeU*KnGQKBfsDogmYHp$Xv5t%hwPbXphD^7k37=Jw z(GC+CJwy}!a*~Xu9Ver@V`Ma>gp6v6$*6k~nchPa{%s(mQ2wQyz~^+r??y5$jOy(x z$+X6^WO`l&nLbix?^|%n-nYEk{?*)j>tB$bQ9~`^eoj|&k_0w)&J!Tp=PT8sh-f=RR41wfzx9B@23fEripWE2(6}xYpVz~ z(8S$lgHn z_fN@Q{S=m@(o$?)Cb1+pv+o`lKA5MzL-r+DzF9~1j+p6sviGU74dEgCl!~{{k5|Wi z0g6tmaujbd&>X`ISF-mR|6JYY3O`U}pNI0eL1o4xse6Yih$VRjsKQv%E1RZ0(Iy3S zJ~tviGI3MCM|FxOUizVVR|$tV8ie27;c|^7ZC7ILRAOa+wlYEfa-e_G7xI^6SllH) z4jxu5lE17R7P{mw%ZBC*$d;C#iVcDsO_PrB22&2DNk#YUzLG8Fyj4xA7znEqHU$pa zKM>+mc?+9#@yqEm1WBW`uA0s%=Y;^%qv!u1e|6U?Y!aL%h&JhgB8@{Dc|4<4aCLvU zAw!7m{@kZd2=%)^>9!E+eecqJAymV=BtxC=9d8vvMa32#UjN@_Nk0XsTC_<&cax8W zmz_I_NqDJ^-!>ome!lc=Vbu`7c|I)rYRX;t=8$z-nEX+~){Qe*vi*9u-P&a5^};Qd z{DM}PtWDMg3fEZjO5TpY(r@Oj$J`b7V#Br*D*2l4y>kIV)YSl@Vab@uo4hetXkp1= zA!3U*IsOZeGU3&s4XS@=lQZtk4UzBE_ey&CPQ&KuLkz?1+T`-#$qvG+nV;D`l<(Re z&)gv29UWG^CA_+}Z+C+5l26xLUBa3M?)EeJl6eS_5=-&JvNL4a6!YBa4nuyH*l!(%d~LVhcNp@qHW?g-IM89p9mWX`L$21fXB>u{ zEt8@ghD7WA@5mA}O$oLR{z{e%cKQdCrOqmr5^iZICrfTvK6{HS>7gl!md}dFlE(F) zy{}D4?d(Y+%NpU@n=Gp{%k^Y&hyRlwwJ8TTJb06(l$if;iYy-Nvc5wWk9JxAOO|-| zGua9+EPS7{Xrn8i)`^JCM*N4qwHGLzmtNY*d4NUDxxzPE^YWjM2m+fUC z&b5o(6?C(d`5(BVSC#o6Pfy7gxc4P7O|fiDQ!E?P6wAgm#j-JtcSMP4m1d`TVwx}i zP)zf&?3*U0!S%*sF?EQ4E~X9`#MG{d^(lw{FhNM&Y`;;?b$9FIS?wc3_`^r zx@*oHw#%bbsA#6U0!%{1ZMrMimF?PID^v{Q|4tR#m5EPoq2MmwO@sn>yr&2S9(ZRF z3Vd*Ng@QGF`rO86R*%1n8}Z6u5jT#)S(v!71boj+o>+nl`slzXgCWl{IoY<~wE^ODGN{9dZk?wkkZ2Taz zUH_fKQl;tI)YMOQ z@aw4M&leniZG*=ehhID3uEyclemJ~w_;m#SZybJIQ|j>BEQ7;u^JwZph|x6lh{@qM zWahVaqr-25kbNNc<4`WNGH7Zc#OpM*1V?WGR)GN-r+i0r={c1>b7iTg4ML%#Q7D9? zzCul@Q0QwC3IiE&5e%-#{S_x|>U+Jp_vJ5Jtyrk)tFmG7E%~bkYwGKQ^SJHZ%|f;( zP5q$rUa!ONHrpJP!|wz44pWEU2iHy8!&2{Z3*OXD{@$7TP;37JOZ|?w?ZHF5wGYM| z)*C`_$Iy$Lgy8nxeOH9w%+I~CZ5aq(D+K5AUW8~fO?~RZQvZd{95H&(g{3_=gQZPD zYa>SIp=}YP)4JOaiqZPc2VaQMI?M21#OM&qrgkwp5G{fjJ&&f%wr;v9M*H+0DHo$P zoommC(XQ5WO=7gO<;fy3+R=LXS23D!R+O2hxq&dBv!Xcbk6^7uiz7zWqXiSA6iSpr ziBc$03MC4ZO`HzNB5s9gwu{% z!OYSo>{z;;?dso!(|%pHCBo^z?hoe*r$Z*Obb0;fuG;iDe5~teL4RzbaI}(}U>_~- zkIfd2^2L~hqb*8EtHjdRV$~UKdQkUAvxQ@g-G9m!jt!#D2*+BwR_zdu4fC5#`U9VduaMGCzf8-dGA+Y`E8nh$$Ira!t(1h{o2O$CE9eXs*!%5KWEb6yD|Mc zD`SFA?CSi7u(h>6DpAm-;`q-*n_+*i@Dssj0G{l%8Pk;)0iS#4XA6woEdx>Uo0Zmq>@T)tRtz1=J5h;2iruq2OrJ%||ZipmglxoF_znf&n~&*YB} zc_x2+$TRulL!QYWAM(sfbe738kuz*l+4KYQOr1e?@kZImGr1EHc_!+#x9l>8&;b1G zXa>44_sKKeYh{-kmGaCr_${XyRwWSMNxt$-N4(6+Gtr6EFT3awC(@i8DZ5NS!lMUa zHb@PMlm{sjzYt|_O(!0AT?LkhFNj^ami6gkSC(bdQn4%5>f|JL>3I{~g^xgs*cHql zj9tFGleNnmXC`)W6Hl?rjaQ>CSN>q^nu1Pxv2)z~{ZC@&D8{G7PPh`yu$k`|#Ln)M zVrT1Vv9kra7CVEvU);`mzJy^XOgDSQ&ax@mjL(Y2&b(T&a}9oTXvSA2u~UPnZai75 zlB`up*2gD$|_e=N5@45F!{Zqd2 znC@PtCG~9g$_KmDa;v+{cKbb^d04jT9?kiy+|$^%Ctd!+?ZGOm5UA&)ae=vLI~<cjQdJgnHK6iUIEp)BL;PD z|Gn^g>Vx^pu{3BUD3Uv*SzO9(fldQ+iBcc*MHu=OkVYP#3Yr(Gq%nr$gXYQ74j zw&xIf9MrkqGuWPDe)OP#IHUOCTM6-QAp9is)9_BD&i(&MJZEc_lx#S1w^Ds5k?yHS z?NJ~0#?EZI=X{O&(A9GF!Dltdi-*}_UVd*AcUup0e3sgSVd8Ki{3mF{eh^^;fLmp>{9l6GkK3=D)i3oG?X zkB~{}9ST{uB|^LBE9DDdMEL^9Jgd(6h3>&A=VZ`5Kbh1yKhiz_sZr;QRH}0x(>=eG zsB>&*)Hxj$>YSVOb-QYHPRj{(PGhY)$5gD&fs?bGf>L!(-Z6Df)^T-C5`EplsLs)q zsdGZg)j5Hu)j57u>Kxy5>KskII>)t6o#T8;o#S{?okPy5vwxwls}1Vx4EnlkQfL22 zUw5rhXM>#mn7;l(i8>qPY>=}-UQn&h200t#Y>=}-&IUOfb-O5>q~I&tM^W&udjHJu|U1I0S$$EFK;N+dwD~l-pd;b z^-J8~A{hynzpB z$s729mb`%vXvrJ+fR?-=)q!Py(Y^VUyrB+1)4irDdBar~mOawD|CzXL97lhi!LolB zS&o`N@N;JJ5t*&)XwE#`(&E;aY0i=vEN3;22fDDF5dN04mC~HVfY+w8oSo=r(&l8t zT3QTx#y<>##-KF7WJv8!EawTL{$&0kOANwsMCLHOp{BnD|P zm?;LWpgGUbpC<<4`@t**1x{f(zwxh*pd7k)G9UE{%HKHmIktD|ed87(XwZCgmVLa& zj_sXQYafxR?7185BOPgdgbk5oiXo z?Biahdl$Q~z01wBZrH~KLu(%0`-)j~w2wm<;sX0PJGxh^99ydd4nX6ovTvKj_68_P zv?==?ih3nlA^QmaaI%lUD=@QM&1O_+#*}ut>a_z+XW2hCUG1h&O%Ee<9xVP-J&SCMY#>mrtT6 zcROaVy>TMjn~t;E&)*O4@=5gMZvJ|v)9x({Xw71KFQFMC9RdD{(QNNi1E4bmslSwTticac z(>px)>*(D)7q-u)WFeBfx!?C0cXR)qGk0_Uo@jYu5Z`mRWBVpBo~zw&Z^!ndX0#5K zimf(2V7UKa$D=$m2)t6VAR-fnC z{)nk;Kf2WRXG~)I3tZU#laty0^Y(22TaIjhs}tLQ+aTVsqx-EU@rJ%syb)3>-k3-C z4;aN8Q|bQCkn|9g|1q5%aFy7BWsdBC-*fCh_#}29(T*M1J(V3e;=&G8%wz{n&teCz z$n3y-4(z~(Dt4fI4mtI0GoO}Aji1m1!(~#VtyF3>(*r-0NR7AYfqxcDjW_9m zXGK!ub$Z}8gVfkebDuLwP19(us!D2_Lvy8Csc9X}{ljUg>1CQb|CH3Ug66KMk(#_{ zuJ0MCDVpZKRw*^9Y3{m{Qj;Cc-FQN3{DtOjJ}x!>OmjnwQsWbv8*xr*%AmQiXQd{6 zz0?#^Cp86DOHJ;_q{gudsd2FToFugk*{)rX+H9Y!BU0N{WCHP$kcl5@?yh2~$v|@t z)Jo0B*_)@OX7CKBq-O9=mPt)#Y3?ax3D~n|q-HOg+lVYZrn#4qrB7&X%L%EehvvR> zTxv4Y+?z(J=_bu>FO{0AY3|48q~@35qNqe_GSb|Ea;d4A=6+QqH65b4PYhC14$U1q zD>eJnOU>?eQnM>^J$_7TYFlqNo#p;xgTpdyUNtJabf+DaUAogcgXLWmS>AOQmiK4W zH0e$rD!X(ivqWl`L-WigsUe5vJ*biz4$-_%ilv5WH1A2R)X+@x#!gEOXKCI)PDu^b zH1D?>sliAOK6gfHxJ?hLDy4=DdQdtkH6+o4Q%^_@(e$9paj7AU9(>*?HLRlt=axzh z^XNg3b5cVOJ^06RsbK{@_)?M7pr!}c8l(n0dT_&8siC7@YG|#K8d|EQhJs^KLtur} z;9Dj&cuZvngYKPsTl%2!zA;7mz;wU-SLuWDzA67BeNX`RU+iF<_3=}wy#)@zw1+4! zus=KU0{hbhFR(uia-BoX{ODmdzaSq^@C)+s_zZS9M`VYOy0F7WenCEN=NIJT8|S3= zUC&DIJD-u>cdVD*7wV*26SdN<@zc_+u~X8mks9gNP_=Yxph~)BGfB5PDy3WPC#74h zC!|{~$E911$D~{JM(LKRLb_F6F5N09lWygeO1H8~q+4ml(k*?F1Rn^}t>AOgt-!O= zEx$9;E#G?SmJc`@aNNOh1;-g2M{qC#esjVk-5jr!ZjPOlZjPLgZVnxnZVnuiZrY5} z&5jD`W_!7Gv$agR*-|RqY%Gy())z}RO-0hpa)Wd;e|;olhif-D*0RF`+=1HRfAcHy zCU>BweKSpw2e^D>iE?!um8&DnU`O_e>mg^P>w)#s zb-z04x^Jy?-RHD)U2{sh?p`BZM^(J;TqRw1G)dQmO6l6fN$J}73F+F{ap~H~G3nZn zQMxuzAzibTOV>Keq-&_~*IG-YYc0jnwZM4%5R-Jx2Wh$^O;@Dpj5HmQrcf@mjF(9*W2I8dNQu-kR3x?73{uNoevU`# z`8ggHK3lm;crA~+%|5yVjbbKuIwE#Ze`I?@OeZJ<+KY;$qKkdZ7@=rVQuS(D0 zU-_q<_*bQ8@UNA2?5N%(j$uyo*nsW5CE{3XtvJ?9kM1#wV^#EM7H@LL3V1U-I${t< zhg`Hr_nDXcDvsqTVK5aLt-~Z(aSWQ!Ny9$gh98S#kj7x1GN!}RMjYeMM{x{KPi$Qg zitZRlj*#GIa&&_COpXrlhJCb!_eqZ8@q^UHaSVPYj~-R_@JcaSrbu~^GHb3bbNoA3GPp!j{@^XzCfe;xT= zGl?BN@4}8^VG^_Wy(51)`CgdHj=n#I9sL-ymc{Qn?Ag%=JYU~6I%$s%8N~0>cs{bFs&b?qp*Iw%9#Z8O z8VyMm_=Npb$G)s6k~Ykz?WZc!_R~hy?C4Jq18$PEL2ee6>dwt_j^g2*28*s#A0tUE zs3ob5HaO)t_Tn&;sK858wASGB6 zrVgmI`53576zH)l0;QYs|W z8efuD2FWC8^H(b2t^D01tsP-^@q<5rboc{EPr!fp14xHIfb=;u|Fsh&V=B#GS4z_1 z4dcNctGO){u;r zdvBGIjJy0@C`0jbli`c_@kj?g`123&cc%=+%S}cu_eGPT_@c?EJS|2v(|p4x{_DgD zWy$-9T;8h?feLOBBb2VE2=~s1Rxw=ZfC|sF{7;k^;mTbzM>yMQ^UE#6c4CBJUQ!{3 zKUKC9X81M>FSD#YC58|1PO5Ml?;?i1El;J!QrjRo)?BpPu?Jr<2w{7y0IpJVy0Q(69P)Ez78#b>y>PbRbcaeJ2kiz6$rcVY#y zNvvor6)Wm%#R`~{Rg@dVihO%k;M#Tfft=q4X@MeLh2*11Hz2*LNOvIxDiV*eLy^iL z=@qFG(jG;sgOsO84I8H9v4UdDlcVza;hy7a`P^Mt3%6LXZsMd5Xu*%xX+^^HtUfGk zF+G)^+9^NYAe{0wh^LeWsRlP{%_u)ogT}6A5WfN5G}W~5CZmSG`HD3*gmz5Q7W_lm z!wp>1>Y^?99sl*bv8YLf#BT?&CQ{j=fvWLVBn>1t^OlYF+cfJvT3MlEzz6SX3th4L zf_)f&Yqk&T=#d@m!`l2`y{#==5Rh7}Ele|zd^B7ABtOAG3JtWde6#6Yo zF~Ai-VciK*c-2UXMmyn%sK|rAcojAqgyI&XP~2WC6x&LLVk}BuJi0OWuC{R4D3ts{ z3;%8qO2%u2QvT;?5=y5af({YV5Frl{umVzX9m4bo%R*QI!b}KjKv*lnIuJI1un~kk zMc4$Ab*vRi+)ITLENxV>7U>HJb4D0O7)mh0P!fQ!5QOOvrbk#B!ZHz-hcFYuS`gNO zut9_sAgmr?tq8LrY#3qX2x~-GJHoKMeaWanD0Rm|sGi;p93?VMe5Iu<*(xx)Qf#;Z^0QVMb+1)f+8Mu{0t z&0vP}yq=`kxND`IPYomm?e9*szdOn-KCh(?n-6!4I$T#8s0}1ZnIf8g+o{#%FyM&3}4tW!(U)OFTD8@ zHT>+%3=?Wr^!#*IwAevg^oqoa0vuQo=5-gP+Owh~7Q5*}UZ9EXz^g8XdMg8b4 z5pq}1qG3dWgD;Dai-`j(SkbfTtoS)KD|T_#7Q<)KfRO9w%!-#fu;N$L+TwsIta!Tx zD>UTl>{xMvd6u(q#7K)XFa#zX(%V{o7LG(hJxgN6$1UI77V>Ti|{5YK)H~c zHAP$eu7xZTa!ml;HVfUtp>c);54T~z3voXI>KnxRQN@b?3)>BC$s{!^k<7QB7xHk1 zC2ovd6%G&ZGd?_M+3^2_+z~eaTj8L)R%m7=7+PN`92zwVd1_knk`~StN;a6uZ^D~? zmSG3sh=`;@omolZbXIZ%iD5jvOS2+7g(vOcD-G)Ch;*C-a}e;g-(+cZD}SnkMgTX`H;Xv0?Jg`iqlynl4P< zS$=-<&it~;JM&5>@60NhyfdwM@=krxAPXs>^{6z2* z!A}G~5&T5(6TwdeKN0*y@DssL1V0h{MDP>APXs>^{6z2*!A}G~5&T5(6TwdeKN0*y z@DssL1V0gcJ@|U?_2BEl*MqMIUk|<>d_DMj@b%#9!PkSY2VW1q9(+Ccdhqq&>%rH9 zuLoZbz8-u%_4}LuO z@!-dU9}j*!`0?P!gC7rmJoxe8$AcdaemwZ`;Kzd>4}LuO@!-dU9}j*!`0?P!gC7rm zJoxe8$AcdaemwZ`;KzX<2Ywv*ap1>+9|wLM_;KLJfgcBc9QbkI$AKRQejNC5;KzX< z2Ywv*ap1>+9|wLM_;KLJfgcBc9QbkI$AKRQejNC5;Kza=3w|v4vEavo9}9jg__5%} zf*%WhEcmhD$ATXVek}O0;Kza=3w|v4vEavo9}9jg__5%}f*%WhEcmhD$ATXVek}O0 z;KzU;1AYwnG2q959|L|2_%YzefFA>X4EQnN$ABLLehm0A;KzU;1AYwnG2q959|L|2 z_%YzefFA>X4EQnN$ABLLehm0A;75ZW4SqEE(cnje9}Rvq_|f1;gC7lkH2BfrM}r>? zel+;e;75ZW4SqEE(cnje9}Rvq_|f1;gC7lkH2BfrM}r>?el+;e;75TU1%4FxQQ${` z9|e9C_)*|Tfgc5a6!=l#M}Z#&eiZmo;75TU1%4FxQQ${`9|e9C_)*|Tfgc5a6!=l# zM}Z#&eiZmo;OoHGfv*E!2fhw`9r!x%b>Qp3*MYACUkAPpd>!~Y@O9wpz}JDV178Qe z4tyQ>I`DPi>%iB6uLEBPz7BjH_&V@);75WV34SE_k>E#y9|?XW_>tg8f*%QfB>0iw zM}i*-ekAyj;75WV34SE_k>E#y9|?XW_>tg8f*%QfB>0iwM}i*-ekAx9*Tb9RX&oO? zIUQ&aPp?(>@m+G-+w%Aq(KK%TVwPxXk&y5d8=rUd4YC{^#RAK9)X=#D72h=f_>~U%XfQ|HRPt(_$!m_XYcuioqJ*&lT)i zD+W6o#bD8el`4Mqf`<%Za6iAK!DEnyd)&^;HwNy%ut~nr{e|bh<%g+(p8wXC=E2TZ z?zf|*#n$l4a{pXfYE;5a%J#gng=JUi`>-jI2Xrtzkq7iJG?53=V0$7DrPC9E7$z9Qv8Lf=_Kno(@fK}O$M1NzPyVAI#A zQBw1U@=zn+B{UZemjc`dq~(J{~1Db;>f@<~A=P-Kn!zhXpEAnTm%m}*e^}W8?#QgH3Qo(3PZF!KslJw@ElaB?Sm5=$Ik&pS-%g21`x*ThsYo`K8)PFK*Ote3Z;lrh)jP0q^f{Fu5VhrobFvY1Bj`rZji4JrH-c^i z-3Yo7bR*~$pjUui0eS`K6`)suUIBUq=oO$>fL;N51?Ux^SAdRQ7P$gfyKl@W_6-@t zzOovzuMPWGv0ul%Tt0B*UM~BvvP7-;K;XVE`=0TxkiG(Fqw#qx_Mx8i1>l#)dw=?r zvhRz;en|IwOU2&hwEQHm1brF=DOIx1719*^9C@Yf9pODDz3sfKv$s;wL?!LT0_Ww` ze6JT>yS-yB+H(F$D)zQw@2Vm-LaI|FRM}po%Jw2Fy{NLisIt8}$f&ZtsItAYxLM}< zTvt|(jw*||K8<&5uMgl|-Rt$-)@OY%Hx%<9=lx^;k-Ue_f6Pu>-pYII{71NPoqr>@ z((^ZQTR{H;ZVTuSd!lyHU(am;{e8H>pT9FV`12>c=gz;K_uTo5yob&|m>Y$cf27Qd zYDX1&&?Eex5Gg2Aqx_VqQ7_Z-ua&t}S8@0|MX^7}mln z)a%F-Rxz_bzRKRO{oa+k_I`I2Gx6Ln7PSjAT|a##SeQA*wxnN}uIYNoO_+`{cr1(K z23Z`2FLPO}N3qFb8uq6YIn@psoN5QIIMv!NJJohva;j}_cB*Y{a;j}MI(jg5$q~VJ+d=TFq@m&$$ z8S!B{UP}y4rv}QLPEC|Lof^O5bPCK>sO6DRU=Q;$Wt})RE<1UBTv=HQ#JBbjXYH&Pt~}7)!@_*LLnfnNoF75G))SAkyzeiist;8%fP z1%4IyRp3{FUj=>@_*LLnfnNoF75G))SAkyzeiist;8%fP1%4IyCh$$*o4_}LZvx*0 zz6pF2_$Kg8;G4iVfo}re1ilG;6Zj_ZP2iirH-T>g-vqu1d=vO4@J-;Gz&C+!0^bC_ z349aymEc!`UkQFC_?6&Sf?o-KCHR%#SAt&&ekJ&o;8%iQ34SH`mEc!`UkQFC_?7=3 zNnaii#gYAARoK!cG6NpS;lZH>G$s;vvl4eVGI)R{S%^nK;|7dLAcuh@y9>I>f};#D zFbBsl!#M+@AZox9)Fe>DHQ)hWftc(fNuf51^5==TYzr?z6JOe;M;(21HKLTHsITUZv(y!_%`6%fNul7 z4fry8UCB$ zzZw3U;lCOFo8iA1{+r>y8UCB$zZw3Ufo}%B8Te-4n}Kfzz8Uyt;G2PO2EG~iX5gEF zZw9^@_-5dnfNui63HT=9n}BZuz6tmy;G2MN0=^0OCg7WZZvwsv_$J_+fNui63HT=9 zn}BZuz6tmy;G2MN0=^0OCg7WZZvwsv_$J_&0KWwICBQEMehKhPfL{Xq65y8rzXbRt zz%K!Q3GhpRUjqCR;Fkcu1o$PuF9CiD@JoPS0{jx-mjJ&6_$9zE0e%VaOMqVj{1V_7 z1HTyf#lSBHelhTifnN;#V&E48zZm$%z%K@VG4P9lUkvhG82H7&F9v=w@QZ<8 z4E$o?7X!Z-_{G3427WQ{i-BJZd`!adH3HuVd?WCUz&8Tl2z(>(jleeo-w1po@QuJX z0^bOHBk+y6S8{^sVk}H?oG!Ls%XpG5?iAz_p@DcoAWtVL6WN|lP!@B3dTqb^Z{+FK z>Mt4I*+8Fljj-+XS;zIwU(jc*%CP-s3*yL!x}pd#QMBSpPK(x_-oy>}4|BsrBR7n> zxM7GD#W=a4yPg|5>$su4mK&OnaDxkCWvu8IkSv5`UIRDeKspPe84yi_XbMC(v7%T= z$3i*=qRUy)dPs+`q68@TLxDqq*7N+SI-WmS%k%FY;rVx1(X&lFAIM3Ed46{z&+lyD z`Ry*A-{R!?E=W5dZPD`s&2_wdq?VUoU`5-G@bdO1Ufyz;m%AEyxub!XTV1@o(8Unt%M6)284!JbQr9dtTa&T!EM8hB&1W_MW^cJLP9WR@zCz){>h3)j_{iASkbK}UW15@9p*J7jl9Owz-xd~)9K_jt*q#adS2tI z<24TGJi&@4pc4g!Fh~bM+8@#!($vYTr|Nn2SIp>N$E)4U7*xxvJC5*blyWsnxw?!Q zmo)O~!UkTQ=i=2lP{@M9%gnd}3bU9ox}LkT>bNVTmb+4qa92tbccDhP5*oP+d2t~x zF66}(1-UTD0l?)4ISx5`m^-H$xpT6CJMX)=bHd4;WA)rQQpcU1TJG#R!kwK>+}RGf zYG&L9Q7c3XA({u#9EbwiiQ0A`tBD`TILr^EHSz-~4g5foiyugE@&mE;{6I_{KM-BZ z4@5#P401t`^M@RV9Ch)$DJRdHtmk?6>o|6x6v7LX;Q5jBi02xz9uH^N{;I zzg zx|taZ(H6v_Er>xw5RFzK3avmGq;bQQ`Y>ZjEw}!Y8LdaSb+UYikNDCPFj;qOlN-fm}4? zP)ZJnqLi#CB`ZqFO6s{~s*YRGUR&-R;g*ReZW%kwEkn#$+sG|w9WAGs(dFV6)Qu)5 zx1d_KK&KEodC^1;%Y$6Do|_X7ax==ujPfx( zz%h)Y8HWW&Dh`yVX>AcVg%xsBPysjj8@P$*a}zzpOQsI;l1V);xrYOKB^F%g;GC)F zM%XaIhA|mOyq*_5z|nyt{U9%j#d$T3$V0qv;vg>^)AK^;7ec4dj-vnvbP6F;Nc6k_ z`~vu5cv!^q3k!LEUIEX~G4TAXe4d|ii07vrZUj`yDH$R$`wF(iKa3y4TH$=T*|Z z9wXh00SJ4M8e*k;9|VcwMIoa2F+H*9S#f+Nu@oAK1y(JstoRu#v0&hoB@maPIIp%! zDOm<7XT%<(7 zf&GJ$+YM4OoHHrwjU_vbQnIyDN-h9ODTWWYS#fJ6sYDPe5roR`Sn-EeQu$X_d|J`3 zdCSn7CF+s$ilDCaPfKhLu!3!HPdcSd$UnRA3R%h=oOo z1$?kfA&4A73}(f5^u!7qf2t%_*s#Ke6*j)I5^J>LgB3nl;j+~g|IsF0@t#!yJxEs)fr*cUs4(q=#l5$uthxJ(K!}@yY7ee0(eOOO`ej8-%5K_u$ zEoe6@dA5?2Bdg`eYWa7pB*{w3k=1QVR`FOM6@aS%Ttzkx7)`;A26rotv65Zjz(5*q zKNzThfeILS1-BpWo{|hW5ywjQ!wFR0y{No<-(V$wfD@T|VuMv1tlD7J0jqk{0tCVV z%MQ3&f?9Bzl~~n^(t(NqpQ{v~EucG5z8NUreXOJc7Di!7Df(p8Q7=}44L&P5svC37i)yTvb>(Ul8;~%h=Pd-~Ed=E)AT^=b5R^*zQwe`6;g3BK%|IwD!p|!3tH7_y z0cZk1cd(L{TTgBzRrfKaj#NGD=f3opR*%;J-9B)8#RWRgqby=OuUq*$Gp}9Qv1wjQ zJb6i!oVfjHKAmTA|1L-VLt)S7E95_9_D!E4q+{UxJ2YTw;FC=>U{YCBH{iaq|8oF_ zp_H6nk9o}{qEi0eWt6{17w7khpff?IDxIF+qZ9ReKZW`Z9i-pg)04z@T#@GoKy|Cs zFsLboN+Noagw9q_7`L-^Co35>k*&K~$>(~qRh8HViEXOHHa}1p?Xw-DeYRt?&vuOV z*$%DkA*^KFL|&N1O70rS_9!da9)IQcXEmi!my3Pnnextgm7?U{<-9O?X8N|D-4-Ul zp2&Fe-|U{0+2p@}Wu{*wynI8L;)bf0%08p+4I9XR?<*_Un0_^Iu0i&h7(Dzpd1k0a zn0`H2bC4{|!H{>d$d%y#o-n=sS@0gR=z%iW%=FvskN#1Z3NGg-$TQQfH9&~kFw9m$kr|L z%y`d9KQcdNQ$vj~J)EO6FH`pZGC%o+-{WLa+tsXtvd`4jclO9L$x!Gyve0@f`UkSm zF&5fP7CM!coaV)-D>=n0vvg~sNv@vF%x{*!*Dv%m=`4#?h^G(>w4_eOPwjD0SgXj`@9SmzxJr$I8dj-pP#r3T9(;Rq zrEEvFw&#If2^uQ}+Se+d7^*UqPYm`z<U z;&;f`-QYQuZr3X|x zOxYp$?gNFsi(9FO+-Tk)ELP@J*wg z9LiE@r^+u>d^pqv&#tJ~BZ_~A5Lf(;sWu-X=VJzv&y{@V`ztZXkF=2dJSE=-$oCM# zJw-PkqlJfwVI|UBO8yO9s(uo%v>+=@ zTiJKxU78x*``Kcex>mQuB`gbv?1ZK!ZJh7aSt9#-5@>4HwbWZQEmKVIqp5|;nii=8 zy+c3HR6Jf%hkNe!(^MC{{E6pe5KZ$RIQa%m3ul(t-m&LtY716-q^T1_m8CQ-z2`VU zI7Tipyyus>?eSm(wN{mv=u97z-W{d5ygN;CdG|fV<=rug%e#jamv?6VAjA*7_K6nsfhSFGLf z5LBMhSGhyAwBsJGt!NQRit=5Nq^MI6Qt(G3r3(ihiaYR7+<}MU4m=ch;Gwu9gjs4# zWalhqabgH5*{KeF-Kh?J-Kh?J-Kh?J-I<07He_eIG9h87Rhf{m)1gdA*x8~?NZ8q? zOhVY{QYInn?7(vM+H}}GjOFUJ>ARVw*{V&S%`7LeUcEMb2eX{QV)9xn%R^Ra(<7Lr z!|kiproYZCmjro9o4%Y`1QxK@roY21eVDAEO?O}kdM)k`Uq5a7erCB$be8Mtg`Qdd zh>N?by;N0pM3K!1R5|G?XH4aUs+?iSX`K zTwx7WzRvC+apUXkevk59c7LbxU3P!_V0DqazeQO(Wxq>VI%U6O5KFV{w<=$1_ZKQ( zYWL?U>#yv`LiR0kPV~l~JT9!uG&<{=xW|_X>&A@?^cFXdm0z3CF)pla$`FOut~2YU zO8K=d%-Us;U){{CS2rDfPn15kxgt%JJ~hz!y8K#@RetpjE8U=%U-e?8c(}c)XQg}k zJ+<g7F9eQ1>T=vmoJMRiYN&%}rF z?pUjwx{8(I`+2GtD~s?JWzQSs)L>Tj4mdunY@b0+UCzo*_=~bnkPyzw1_MNyvVx1e z^`5f*T-j&1zKbo~JSosS@QR?wdzNxvYd~YZSJfledOpGdg)|&&GXWgKYC@KQ+vX(E~rgcz8cN5 z9GvG~ULXe-x;Oko4z^Ag<(mw0a3d?=W+ zX7ZJPG5Ly@kgrJpb`#w-GP3<3-8E{Fmss!_L|)R4l_%vT_mx@fOQQmG6#;sAsnRgX zOH(cK(qTnr>7*j_2%6b8`H>9BgzDu-&>RiRk2rlr#j>HY_ciYq4m`J6^L}pMV2fs7 z0k~KZxk9&Y{0&j@T*7buEGl+h&2Vad-#JTEq+fljLGyb%(N(;I!=NLTqM~e|;~mZZ zheL*^H2WtM$^B!Bz5OEtpS-TwKd}C}4Wgo6<#+VGbxgD0HCXn6WC!dXSd8z_x{Ci%4Ovw~ zEBmrP*JR({Z2G6JLfLy*lkHOZYgIZ=r9)LZL#5@b3*VDRFpV)#dTtA|Nj+bgrRM_s zuKZhiE~an&5$UXJjM=sirY@D98Bk4Q zeIVP5M(LS!95KDO9+RGl?3p%KdUmAePg|sCo!4H!Papk}+0vD*Ic$3?rSLnQjx>OLVwJ+aW42pHDXfawCaR?cYnbhCmC^zqR_Se!=6}a3 zgRRp1Ay&CmFU>#2Dlr3KhBWK1tnw+ngatS&b%I`yX0KwE&sn9}n_1;{gEV_LtK4mr z=Iv*duj{3Gr&;CumD0R_vC2Y!QCV-0=E3OEYAJ9Tt86t&UT#+Tv0m~*`iNDU7Q!mO zsFbFyVU^e&d)n))^8bv|jA~}r>ZKV&%pPu)W_-u&j~OH%FJ^zLQt}CBcAZV~xnCvu zOqNPBsX>}Ki`lnTNi)N6xzZ}ljAQm)Hfd%dSXQuH<D{kNnoXbm5y^3&bif6 z*nVcusFcDsGy884o8`=IHcFw_nZ3GPT6BlmPnJrHJXO-7PMfr-4XjpXKLwTptU|Ez ztkRX{tzvcoRuEVmEK)9oUS#&GrBY~1l@#i*Nud_7jLd!=EGULT zF*MC8g+9UTx4}vQD*>#v1}StlvwsO&Q?PX(w#Lh)h5usqA4;W#h{3`!o3wBktRZIq z5v)$ITEKEyrG-0~{U2aCz$(Bw59cf-rGriY9Rqr0wKQMP?Ek5h=5Jx>Xp`ozVvgmg z0JE9nXL>2*Tjq$XmKNe_BkIUk%(25DEpRi(dsb;dBXi{Hr3J4uM{%_j3=z9g3VNA2 zTJ=&;HFJDym4YrZ$Dlz9y1^W`E2W?xnd6T(DG=C!Nu^R?nn4PDhdI6l3$6tgf@QZ# zfvwE(BUqDQO_fSPyj%(jW{!V?g}eoJLuwf3F`Vz&q@b`WDJTkJF%a9t9RI17Lfov1 zRZ1actZJ@73fa%9ke`s9tV+pGFsq6ymjb~2l}!pjbqGLp2tY&vf&SuD$V_jRlQ)7=4Mq%b8||ixrGL4ZUd`&39NRoI>8zMjl!6V3K>AEq&ZXI$)(a9 zWMa;QO`0=mmF5hwsy86!53wL5N0m!+pJG+-LJIksgEE`rG)Qyc_3yw!K<1#V=A>Jt zfbUq~ zTX41$bPMQq&|RQCphrND8>MO0tU6OKO}oLW^_7yeoK>5xlH|jxTdF1Lc~*VaDEVw* z)jfL2r;Js9I!{!8EPftmfCHQiQ8Yim=+Ghytq=afj8sZj%=CDrvEQskAt(Tw44DtNATb z??Y-5EP~`1B*%hILh^1_v#(r=(6gEkU<-93A`Q~XHYpq$;n0Xcixd&eY7QU?N)bs& zUJW|3QkpfJ)tHp_)ZfTzn)Q-@Bda-XmHgpFhe7iHk=69rBndVp)H4b7OoHc^!D<1k z9jq>d2EsRO(sUHhbd=uo=yGYgj@67q3JyqcU^;5xbX4W(2{%W`Gq9Rt#A2pz}Z% zf_5PJ7^{5?`OiT9vylH>(1oC_pq)6k;M@ToaHj)z`WQk8T-*1bE+CGSC2n+MhiSYu$_2kW1# z_K;1IX_X}NQb`UnNb)jPTL>0vn2gFICt4->Wmb#oDZ`cwTQY3PuvGyTG9)8IaxTt= zIHSSk2(v>k@qe*eJmF`&!D>73#9YN{uN$Npn^-LxTp2FNa6!HgJhn~XSIygEwo8m|0+ovR4QpBt&%p5)hz?d3YG&b)Hm$~R`(=WNnpXQ7Iw8+pmRVM z7$v{ctPZa(eqXV=gi6W(WmdNhcU%OkdmDG0j@9jlcsIm75Jv^ne#`0%U?ER_$de!P z!wmg4`XzqHg(`CZ(uw5Cf|3Q{61_9*mX^aMipVX3slzg$`>3vyUm zk^~DaO7pR#!z90#>VD#?{9Y`!O_twVy>8-ny1Hs3IhtuD&EK&)bUC#6u)1y|X&z#A zy#~_UQBC@|o;0`NOd*6$lxB;vAM@cTEVD=sM~l_V$l)05jzkWllcX7&+8iCWlA{me zUb`adJTr84f9%0r?w0#Oq7JKi)%~|;+)WN+{m?klM0-B_jx>$*+)6+(-s_gdUCvh)i1S@Z{VW-wE$6HsPq3p)H^~%eVbO)V_n7iKl+OLsZ#RcyUaPQ zh&1eE&X6GCjG7~yYc<080`^lP&R8RHZf4GRYRIWo%$Yq`=QJD1;Y2GrJd88CjSfH1 zlM|g*a-!WxPP7=v2`8?xhxQ3{+nl(M>v3F<;d%tu9$a@>Nq;;X2kHqp{swcl3ld5C zqu_HId`E{-e*|-$HIV*5sJ_gciuEnb`4J4@t3WqwtcE`3J+~rWt%!I-x|K9wJ&lG@ zJ!uTwP+BFNy}g%eN#DKr3&A?qOg%Y<0`5b|`f`orSmYex3YjZhOM3B*z&X{sXeM#d z;bcZ!{K_AmCN6(vr*v2J;F{^g71Oii7IDQx%UMMFMuT)NeDZ4ne#@wpv~+;ByXU+^ z8n8mzV&cLa{Ub{LBjIr%+?7!q2x>#9o*Z3kBu$~^|_vYF1vK*{0 zFE7Ji@hmy2Q!hthS%jz|=GrVWj>yXrjPkO?av58NW9j4Nq*`8<;*gg;gvfWy^($56 zo-z;ny&w$Er|*Sc%`Bk5iFRW>_TR2_AMd8WP3tlJg#K=AA;pLZX=l4Z+SzTCc6zMR z&XG!K=a^mEIj)y5Igr%j#;NyUbjV00sUNeG`fb>AxOnlMVc&{FKeYHhm) z5dAG0;d%o!mWbOG3;s&0^di2Hzi2f`FBa;h7vE(ryxF|yvP&-_W8Kn=IRHoVwsR8T zQxK*|eP)>nz5|k4Feh+D}NQ*&QG}7=udNB-XfjE$f7dyf0Mw$mQBaj^h ze*%2ie-ZX~!Tv7T-{l7<@yyv31xI0P7uv&JN#LiT$5PsrswcJ422zU~r8dDzYLhBS zZHk@LD*O!av%t>*A3dLQNbLe2H%TpSmRj62wG-gq zM_!u2|0{DD45SXneU8J7W!GXFM@MFPG06z)*Wbko6)xm)}I8X-% z>fk_~1$;o)wSbR1tIh-d2>4^j4yhYQstc*@NbN=m8RzO;je1g_fYMDu>87A`(@?q@ za4Hl09PsnNF9hETz5{%xTXU35i@lz(jZBMo$Ly60To|(;7uZ$Ws?r5mX=Ei<8i}fu z3v-NeVLA@XhbctYRAIcbDpKKnWmTlYaW|%l6ppzuRitoOnLJYHL0y##yM%T^N<>j7 zCS#zhlxPK2sPb|PrRTAM*DU#iB&+He5*)eyvn1jAT8FRIuF1=XQb=(2<-#R&`p6J_kxq9GneLJw68cix$q%dh zI3`R~SwnRN`3n+U z2gzUJSi{j8@|Wmx67n)@IAI}wp(W&pi9+%NYIewS*3f>4gh1%LjfA|x8a~P=Aqhn! zB*{!daPx#@RgsYFa`G2{C~`CTVG1~tP#*PY_j*ryu?F`bKkq#`gJC|KlI1)Efdsj( zz50UpWWw++o%duS5e?UF#QvJx$|a(4(K_-+U1Ri+*ClexF;w+)68PZCz8q;|A-bAH z<1aSwN?qg2y@~DA+h1A9V)}zY_HXJ8vN+}nK6)>X_R=*P zdMv*qi$hpr+3<@(T9tQe)i=^Ai??X3G)k*lS>vfnY1Kv6cqv3Q3UnpXxA(Kg+jB+Z z=T`dm7S{O9G|`Cl8(yMsb9Yw|ecOM!X#B@~ahRYhlD-{{K6d(cN`N>#1D&DtZOjJ^ zp>JbdlegP1ACTzVp24I)(6{gPNt6VoswH2|4$n~|u@X%0$@D9_pOT=KtB>v`K^^_w z(d35|RX9_XNm5jTS1P}MQI`28f%l%#za&R-|;ibI`3dj#>%2V-&Xypm{L#V1% zsK!2m9sa2r`(^C#8YSwtCY97%_Z4fmMseqAs&jDXYN|7F=W41maOY~OQ*q~Ns#9?1 zYO0k*pEcFds!XIR6Q;@psxtnnj66QWo6Z}(98pi_HqQ`;W8FTl&^`E6b%E}==cXBC zQG&9Z#-8k+;it)>6!(l`x+fBHn*~Xsds5uqo9HW1J%+#2S7m_@J&SUBzI>PNiAiW7 zy2CHHzy34b!!H;Ag1#Exd;3$mr%;4`PhVNxGjWaX=^Q-z0e!VVvGYn{+=7d`!*3hu zD_lW-B^y+VYA^Dc1z(86`v8pW){gb{H)_Z$`z82p%5=LK(@wEJgzLu!^8LRr4 zsrs3u`WXs8F~JUgCaHdgseTgGZkFn2wCZQQ>L*qGT&wz-r~0{4jfG2%#l#HV;q$8B z?W*6os^594-`(&QFU#;d6I7CFKUDSmit4vt@9hElT3+v(U(?qLls0vqztXiqsuW#b?`CO$ew->0tmT~DdZSxG)v|?TOI?=SsU)S_>fA^TSHLAb= zE$y)Zg+4Z^(iW8;tCFJqgdGTrX;}zsI@pVSD#{{QQ_(EZWa&AvoR+O(O%2!@gO;sfO<4b` z>A13GMcM1D>0@>2_oTl0XXvvJ)^Ueu>WN?ZS6$P+!Ih`z9FIoV^fxT6Et>v`Wwv!k zcI!x_I5N1-*H;|DW44DzY+^^gQFa15@;%lrrV)wjv1{LvhjHPTbw{Vg&G8mT{rq)D z7xa4d)A@pH(NZnwmps6WwX|9I+$r%*E1&zIk(aj#^ z^VZh$d6DkBU-DUHtYz{DpP!qs`b}NS{}SCXFY)X%eD)jc*j!u$)$>^~1Lqs~tjKac zD-4{UC?T1Zg1qUDZ6M-UqLcd%vSY9G#`JUl>>@rtwVwO0Rx;`DuVmO?hEj&-cr*9= zksZ_HqHwV|Ru&D_TF8C0Z*qqP6ylw1dz0=b{y3 zrDwp(N8+(~J~K_sb8sK5UfDXV6|JB7h}L`Fy4D}D&SB114jzf;JhX_*Mt1!73aXsz z$$9B$MYdvJhbr`mxGkzI&N3ZIKe%JrPPd;v;-XvfF15Uy|fby2F&Ggy>N$|Xm#=srJQ?gBSrlPPChqemN?Dn@6&Rc z>E1QM@&7>4K+Qb4121=+hV`s&;WVh{%T`YPl>(yjwe)v6nd+_E%*lg#J|FKcEBKt} z*~vXa<dEAmtW_Af6L{=iyU5U%br2yRL5C!^7_@wLW}UHJGDXb5`=uN$54J>62>u zsG6RqrZ=l-iLjmllt4(Ma6;Qui9%K46g%0i3MGrL|Cfi2iEj#d7~1TxciG8)=!CPA zS8zq4hT%QcG$2QyS0fYF5D6eAo+WTE@(WzrK-Vs@AG zM2mZkR$3*i`rj$PA9@Wo%_2)PuVri{OOgkhPLajg^Tmf@u-(c&T&|j*Qq2cirJs0) z>`l_DbTO?}dLkZGj4ZjsK8#-X*aPw5&r~7wREEc5^a2TwzxGA|S&Db%r^yn$w@#N< z1$m1Pb;^pG;pz(YA8vwQR6#=6uoi3j+yV&e}FAp$kQspk&bP{F81qbg63S&>A3g zH(1-VO6c0Q=t-Cww6FwKyiE<=uo}8tC17D`Y5@Jcb#1>^kzZ9qN7c}M#oFFc?X6Wq zXH`R&i_mo=bbhLMwptJJ)Orx6)&rMX4~P=dwu5R&i7I&nzoPA9C8S|$NZW>0{&&+w z+uzlwWh!+tG*k_A9BZd)potpjBsI{9YM{r}K!>R+{z{-j(-lMQ-fE!R0(9-Ol|Z*I zPy>x+B|}fM_9!*`YVB@+T#a7}N&s~pG0akHH{RBcQaXS^XtLRb0cob5>+bz2{Eit-=9tZwULj#2(Qm{%(QBg|3Ce+qM`^8dmdr2N+~`}JlV zC2vOcnQF+J(cYqcP2cs6sB#HvYLX7S9ksu!^ZS{u z{SPWXMU_ug>B7G2Ysi}xm3H-I93gLFMUQr4Lfd^^du9J%6nPVW1-i*w7*gHdGPv27 zycMSN{fDmoY=6%k@>XHL2qbUa@9%CVd-0FXN#0h66uzwvDSTTUQuwwyr0{KZNa5S* zgGW=z-n@bH2gqKl=R+OY>%{82WbY7b@9rH9BYQ^&YtE6kt)8+i&CN-d=H`8; z=H^7b=H}>JaVlWLyg9m4;REDt&2YSDZ=mKzR_|v!H8;{dul!0gFoKc#x>Ju0todFu z(A~Fqjb@j{ZTMdaC!%4$aM=-uPEEH~ofFndT z3UO+~kljafqm77DyRSbxrMYguvG%m)hD-73)ZUHSJaKB@jSYX%T%WwT`m*Nw{dMvM zaVr1vfeOv_k>Q=;n(G7oJ_DM87}bMl)q_Z)J5`G1H8mqSeba_CBUwE%e$)GV?rSc` zB(zf9sl%Hco5ZO$g)>;Fas=?isjG;G?$n*Z^-MEDd%OlT!&8Et)C@o9v1V(A?{65q zs5|xTy6H#6sei1eSH)@8=k3xAw+~z>)eN`vNWakxJ5{Z;{ycEd^#I*OS)A7N(h5z_ za&{X3!%r`k#OWt{R(EUM7ue}%;*&ze=|p$nQq3^w8~&MQ#MR&OmS)86{`#ky8wv6A zelJe%2ok4XO%7i!PQNo(oIbdI>;-Y!vVQcp;&cs6HE$Twiql;iABz{KuTK-F?{0kb zF>(3KIVXBo_#mbp^LE=orW>`7%xMJbV8pXny?UFdNcdj^-uUI)#s#rO5c$zqK zPO)-Es8&9W_ZbjpzK{Pzh%*n@-%p^dZYfnrnC1*=(giuXXj#o1?kb-s?46a}Co9PTn)hCx>r8W|L21 z%E9pmXes669#CB-`FI>V`$4&U{0VmUfXc`8gyZTC-e>cN9KV%M?qg?LG4hX@eDw zjI_ay21nZ9LgOQCXhuUMZD>a)hP0t$e1<`Lx@~j$A#t{&KV+tC!U_p#vT5vQOtWmt z>0Px;HYE*}{X;ed4VfoolfRcZ+l9SQ#M#Tr!u7>OSwzdEE}~`Xo-59d%Hr(Z0CDz# zGDOj&E}~`1y_V)Lo3fz!e^$9<(jb>iD0`=z!L!&IRk`5_K7|k~}3O zCE2*nQkEGzw-6oiGG=ngC8=skEGTroKqeYgB#tnALy(K7l$jvKBg#yWVtgS%I%a~D z_)piJdsMyRxS|2Nb5AN)#VCs6E|tOuqT)7{a)Gj|loeD#rCgk6l#8=Khk{-UdZnAJ zki%VKUZfoUz$QoJmCF(L401%bd)E_k#Q2qMRrH0}TTiso7vjgKd(#~cJ&ShGgy@?; zkE98So-;eC)~a%%ZvNa!6B5QkchQ9O!P@EcXZUt|kuFTvh;wTu zF7}}-5giX zf1sE;+I#-^FX|YuP{)+N?%V-AtuAoS>7x$ZKIe*uUwVT6I8Xe_)~0Pn5evLHwu#y^SAJM@BiV zcKV2OKe)fiqE%5nf7(y0Ff9Y2TiyG`pJ-LAAdk|j&FtJiO|-6A0jb)_&XZpE4O*36 zKx@(pX$__$*7&0cTFkU&tb*2B?X!LXwIzJ0@>Ol| zI9}eg7Ij%Ii+|7fd96jh9{(?GiN8CtMO%DtsW@K|F3!6ai}SZ4#QDi3qGPV^8?&zC zkrHk3MjvW;p$;t$V zxEN(`@VIC{(XrdT=uadrNLh9zPVN^ENlZ@P)jK36tM}8BBqrT+;xJh~F6Mkjp33R{ z{A==5R`1PF@>IIgdmb~U^q$8I#~&=wb-Z)=U#O{;B7A6l?k+qQ*uT=1M3*0yD0 z7NqFt@|;Pae(8GeAM-qC_IUp_S$VbWxU59QN6$LY`gLHd-;Qs*MaMtTGE$%Jxw=lK zY##XI2(~%#{&7;-RPe`fmE%$AF_nK$@orm=;?cH(0q>XSHZ1&SrrQP-$;lQ)X3{en zruUv4!`3O@llQT8iudG{vYAS!M%k|L#~4Z18GMUrzneWl;rAbkrE={FLc*#9Urlk#h^b~&a zd?(XV@cbv!lHGx`>Eal7_<9VLv6!SK)GP^SCdzhB`S*SIye_$)Mj=QIYQY-!}YpHd#cS{+yPTFXx+_&@} z@~3Up@~2@I`BN{}S>=#F^*70%Vx5NDQ#SebWQlzHeyMzWTop_Y5}kF*zm2#_NnB+jMm${XiUrme&kdWp`Dm1!$+S)S7`$#EIV%AS}v$MHp{Ar(*DR;u-J}Hy#oIm@dV2|z9J}Kkw zbrC)(BZBPnN$D8)_$i;A)U&zVXUDy1qI3ANfoo6YTt3vRJ(cBYj@Djn>HUmqZ%2-M zzotEvI#l|)_GpH(Tk@%Nc=fiRmzk_xDpE7tL!G%=Rif)pS04fmZBmry6CSV!V z3q-x5dJ_L|egFHq3o}4TqjbkweD|{%-<}B1zF@USX}0i!&a+0lZs9lfsI&$ z9G0M$!?v&sk6Gog-R!~>;AVlF1FoK3coN)FcHt@LwixB`OhrFzOwkVyEs(>xf*BrE zA%`z#7k*hThbQ1N$t;JbSmf}R*#(_L4$oF}!lPB4wI2U}(8XC;#8q4H|LF{sUaj)e zRC=XKr>JzKN++pws7fcObfEG9Z?IGOLq3T48FS>p7Uf16bSk&bphNjXK4>w@9=sWP zFbKor1Z_cy$OCw598k8er$5hwHvM@vwCT^YpiO_C4sH7LG-%VGC$Fa!;=%?`+7=q0 zeC2~R)R%9dRk{n$_tFNfh#5}atrdamNP@VKyq-L&yRe(+F1$I|xQ9M6seF9DP=xDd z<+&D6s668W^7_udN4HH}dDB7P303xlc|J$kT>SaWE9qVIg|wj(9epR>?e!d`)$GEV zzE7T^+eU-Lg$sB~&|T=cy3Iki1uA=ST^RP9`iRcJtDro@E{yjkB~dp^8GSZl`8P8I^jE!to4gqV{fnAM}O5h5cnGXRdc`Q5)DZAoccfQaYuj88`|TRYqW#r-n)Fz zK^H`OUU-r&h`OA6g)RtF7CX4`KzXcR)ZCsvqP;V9d&QI5I}ZfGga2RbVzB4bZ?qp~ zUoQF|`n2ovtQA!2TsO@sE-q9W@ket98{VS}FiYz`U63}|xPvZ8Rutz#aoO!TQ`$S3 zSDwD7{XF)noTL35A2R<#H|JgUDW$o|cyiNc!tms#x!GeOBif#JPn(%)a|gY?(DqpI z9)>G4R56t&tSEyE;-OFBqA zK+3!9(gvhpFy69uq^x3>HdRV1h(U_N@b^b8ka(G0dJYm4;HUhDjmIW05k%E*(RPg^Nqruw_JAhQ81B z(uxVZlSq%lVw4|rwy;YB(7|Mn<#|@=aW8i1GIR!jWd~*(@V;V~hJiN$i6B@-#%@9a zd3|&>B$T4~E4wrbiAhLALE<#K^f@H5ArTLWVVvPzHBc;kzbhjXmzOgD0Pwe zs9>S^sBMAnqtT_}W3ohi9JyG0oERZK&JGtJJHy1s1EJ#M2aCiffeXbaYZvG~Nna{H zu`CgvbSxI1j7NwrIb3wD3=>_+p`t5yk?3k(D7uCh=(-**72TmrM0fmR(VZC~y6xej zyDLm|-wPGOZ;=qI7YdQOKqm^83emPih|$HuO(KLlGF-S5!-P9KRJfgsgnMA2a6ed} z>j_*cde$xxJ?V=@5AxB|5iWYh!$hwfDtcEg61~X_MQ`o`U2pSJ(L20E^gdiH`a&Z_ zUwpXe%M25J_E6E+wMg{cTPXVd7U=p{FBSc%OGJOcV$t6gA^Jzd#Q+Ht1CgO(AaRix z$X+N0oC|aV1Bk27cBS9nXS>qx@3UR$_xITzin#i04?9Y-+nr~COtbDdj zfGYTZ99?@r6t~t-W*K*u)eN8@h*i+uD%uBVAE50m44~i>ioFGGZ^0*MZNc`t1w;!j zxIBEYh&+Vbf~bZ1D73Xg?X58TfaSd)3Ze+27HU<{S^*Wmll+m%FDEA_nM`IT$w^MY zb=Cg#ntDwlqnW`_iu(Nupn`q&xWb`UPHr8_8-{8X!5K`v@ihz!-`h8e`?XG^` z5yCOGrYq_-RA$%_^_tN`>NUeD>NSIf>NUMA4V(f0z9&N}Ku9f{ky18K0bVcQ^#EQs z;B|45%`YBNhg2O>hg77fL&^%(Aw|IB1U$CDqXHfT@gP40)m37=T2(L42NOQ6rLy5n*R{S`^-O;2;+5AW%CNdjiti)0C2*e z)L$4Msr&UiVZ4fV%s+&6;INPxeF!UgKP&)u9jb;QrBv%xuGla8jgs zs|J1K4GTulhogX8z$MZ7ijPtO!C8;yFQ5j{N6gT|N1#gt^%QihKV~|QkD1QnW2W=? znCUz|W;&0LBTe7#6UJlA-**e+p5F-Ly03-tR70Us7>}#1{X!UbNf*W=3^S$)<2D9y zrZ65>H^V^~_pz4DyO*@Al+EdN-?s_lj*zqP6Lg+_Vx6a-p!4)okv?V!b%e5OK>g8I zQG4H#x#f~M%Mh83+Nw3Et#65Yk!1eb`0^vvW~nc(Ms1aJ&ZIq`&Y!f0Yf-zm7PaNk zyK18?0$E1wWb=3Q95qXygKYlE`1T`YN#bSmZ_v7sC3uo#t^>I~G}PwB(m^+@6d_b8D`2g~i_a8(STNZ{LAr?djI+iqA$CgM9|9+i}XY*@Db^IFm2Cp&8;!b`| zpe!}f(v7zNNd>)-|RI43k?J61y{pT`LI-pEC&9uYl;|(02BnNZh()V z+k-!}m=!(v9UIx^E%bJ$q01G$J!<;tDtbHBaN{z1d$9JWZuGXmC~*45+r7rFizr$p z;n|Y;PgBcM6z%2P5+IxZuC4P#JKznoplDyN9>L)mC_0eZw22OvaDrTK#Q$+DxU1M1Qnog z-0Zdhg+t2^hj%6%-kETCXTsr~31NOe^QL07Z@b-k~0aqD)vf<@`su-%KZ&WyY zqr!*jvyO0r6cSetl^JefES-ShhY!iX7D|0CM7z>Zgd1&C>#HZo_0a!|@L)7AnVXK* zLz9aLW?@*K9}&S&B%q`zQvFHr;nl1^%c$(tjDS}&qBhottcUlPJ!-% z`$*8SF#GT%6w_BO0mjrdC}CKGb~!>ktI&jUsF}ME8l~L3u)S3Oc!Jyj^8mYGGz=8m z=~9ClLagR?%>~5jYnLaBje&41L&daCca=f7oEoUxMUNszySnH{ZC7tY<|GuW^6S4K zHP|=gJwvhijR|@bS6*NE9*PaT=;eoYCmL}dicQp@cvk=MQ2+5z-c}S3^AGXtNyNjG zhzE&4>rKZ|ndx}z4_t#Qz?!`{s-A232TIsjcH4QhUCV>{vfNdxOvBr@@85~RFd+ht|dHOd9w)8 zoMhbR7`L9KZ((U}u{6UhO)yIn#?nLvhy`-vlR&4fQqwHP204^AM$J3 zWi2;pA-&XeNFg`*uspZr(#{ta+c` z4xr@Ge439zo>FspxS*7pf2IG4rDpoiSXwE_%_f!H+)3o-QEB>na*LplTO23JEw4Ms zEkXK@f1rgAxt3UGxg}+a+;Uh#!E(#bx#-QsT#E^8w4eo(xt3NfdQ-!-T$I4FOA9FH zTLvb`t@bblir!quwY~)gUC;twu2mA{)+O{~`sPBe^?e(;HOyLS-O=uAK}&SBNK4{q zk(NZ;%B`sisWn@V0>x+ycErSJX!mgSNEf5Q(pqaFj5EdPGDv+w77N7aVEvTkVss@e z=o6#EmNffItv~1$FNx7HhUzp8MvO^<+ZAJU4zeXv{{s?Z%*L$$ z6Jt8BK#DJu8dDF8U#242QmCJd(1P`x>Eya7*Y-3z6Dpw1n?OOUzDk+dQ}#^j4JN`Zn~IW9EyxuJPn!`NYOmUZ^ZK+MdEp{V)4AsQSp3$eCwH5tnHj3S;m`WzB(lgoGwOC{Ang=cw zTsXJ@jkQZBxFC)7tiWXJS-~mRvqCl2v#E;&mk2HsToJgcWb4^ADb};=0CxfI1%EWz z`bC^#{i2|;e$frEJNO{?#Y&C!KWsGCb3Br*=Xin-vgUxSIY6HS>F3VXSi41QtmlD0 z5AvFq3J&DW2Y-HXvi1D36zlorfFYmxeSilv)-RWW>(p4kg21_ei#4AqvVJ8EP$szY zi|%1w8V=zfi;5ysq*v(RJ`#N^ZZ2sI2 zyA9Ua24c6edOJJp1}gUU&nUMWwCb1f+)5^KCzUcUj?(8vvTGO^@xg9kl*i4H@$9Gc z`GJ%^Z;;aGwK9qaqf{|UktM_z&v$FspN!{KG^Taod3lx)xChsU1I>6|S)=wio|oA! z?ZorES{KA%w>W>81aALk2}}aFpKSMm(yLk*EX8g~jDC*M_3Tc9jkaERZXLt6441O| z@h~dy;JK|7&mCsCyhQ9OKPq;f*U_I9Am1(yg8-lNWXuDu&zFu*HZ`*nbI z;KCqygKO^xmAGL7e9lzuUK3}?II`f*Q#3UUh&s~Ww=bfsOx;OK|Jl$t!Ts_!lH$XWlMis zC||5KUi*)jZ^Jm+m*8dc#Yf8&??@fil+xjVK2@@XQ~%j{jVqCt@(y(~&@= zjy-;8i`=1OdHk1^%a)aEKrieRPtWE$&KhMIhuPGc?&DQet!vlguyDitVeuQC@q7~w z9f6fb_?4L2wcBu5G%L&?yWJXgyF*8?yN2sH-*Dil_%*dKYUNK*r#q55EVZ>R=!zTIjzXP9U;+$v!H~`!btN*6Xm$;+ zy>K6O33}%N)aB+MK1=H8hm`jx$sOal>L9RZnV}Ab)j*eEbIhgJ52}MQxJ&<>B3}w3 z@}&=*wf0N`(upj@@H3#{!wVix< z+7$WnTlVthr9{3Q;V57JT$C^G66DKgo#o4o8R}IvI`t};3t3faBVWEv*8~5+~o6ugYDihJJ;53D)0sQ~UR+q^>OB{Tg_GhP0NY z)xbSCN$z?`NHn-44)eSNm8Dy;IdbW|h=EtF~?(x;|?Dz8YO02rL82 zt5nn2L)Si`o<2Gz=rPly)O%pG;e~qg>9gyJqa%bK=+l>@9(dlZs0W5_J%Xv}E3_(_ z>N>i;DRrf-2CWnrg7L@-P-U)I$91nbfL5&Mx`U{8bQRN%u3*~Hl}tOj0<2H2g3;J& z$1BsV&}ueqvbqc=xX@~7k{eNLoCdXm-fuo?g*;nrj^ich8|>}Ob+0MH-t)QcoyFLD zIoDlq6nkfI-9MFJA2ru)I*$ER$FOA0bz6XKA=ljjY$06tBVaqkT_wOanY-!&Y_`B= z$6cKd@f*0SuR(kXcQpXwXLDEALcA-)&*H9bf(Si#H3}kpxvM)N!UrNG?&@BkXt=BC zKuH419`5SlJ?18Vx$HE?%F%^Y9!(fs_ z*i#C&GKD>ZU^-LSQ$*F!d-`<3o;7CURT&l63Y$w z&xtnQaPwQDP1C=6ne2&WJl9!lFOshg!)y*o%B(wYP4*Pmowg@?!X<8jeD%p;l963FP@rYujA z;$_~xi=?>MXFC#2OkLF?q6uZCbW%vyLTZgX(bNQ7m?mBOxMkCIk{s2TQbm%(4PU-N z_Ow#IJ)`VC29R`ZoB8ww0hR(;|X=wQDr zUa@)K;XC=-;Q;Fp`P#S3CihF%eynwH!0+k&(OT)+h1$eu{N6C_T!p}bc&LkpCAdz$ z*1=@a5Pg|CQ+qjk3UmdTA0!@nkp@O)>5TqSS3?D zy4uSpH&amtI>|+&wb7`@EQ~Y0C+pIwGw*Xi>$-w2Csi-g5a#lm9Oqrzgh5@E5&F=4Ui3v%zQ8B*^O zQ;UzVz)OESNLUc3zx|=GAP8bUnIZRP&XjvAUF2R+X7qtFrO$n~+!y+y)R*{z+?P5- z>N9{pHbcHKZKixC4Rk4;|#^~!QMFH1eo`Mw4 zT>r%!q$uI~M|4QBle@Vf11XksH~+O4DQ0sww`!4MK6f)C8z~NPH>(aH#Rl$XTPjj4 z`ZfzGG~6wHCIUNSw_5fi#X9cRwKN3g#%>KJBgJIyHr|I6A>8fRhmazj zyX__;du69bUF8jBr<)+(enUoTq{^q$8`gPC{I$|;Kl%#Z29q4c2rM3<18BfKz~(ji z_Qsa2FY#+(CD?U-ad|EpC^KICFB(YFqRHj8`eZZ!Yl{Yi`k7&9N~ihcSu{leIr8mK z^lxuKHog8dhCi#0L->xy)yo)BKGL9x4zbMAA{Ve}ZMZ*C-|4;14|405EOz%vx z%|*60l;?Ms!~EiirCVI(-y`(+JepX}sBx=4e^dVbe>wc(5bk%F2>d@}9W3J;Xc8Qp zSBxh8!u{UF_$*p9VIeo*sNubII^L@|m-nj3EUk3(im2!T;0N(DN?d5LNfT zbTl!*bo4y>YnY}Tc(JzbDgSmDFAprO`;FuOSwnYd41_?OH1IL(%8>>%jPIcSa4-@p z89rISFUAczdw#JkobARh_E}ojAPt;naZQE1mrt#}i1!LM%xgpw9&!VBq1xqvzy0_r z(x5Yv2VbGpJ?K?`Vl%&}mu57$nkF6GwhxgYh7JSrWHA?5c)-w8g2W2wBJ*Az#`c%- zhEhuZgIhZLb@>l>gXha=VlnL`O`_E){UNc~HHAn_=l=MD1>$97&A$UHDwpwI8@NA? zQcd{cn3gTm_y}R5F5mGp z+58tTtEoHlD_)N1by**y6!lvFju(YqdUY%IssXFr;%{+HBtrZxRevj8{L#()lTQ3$ zfEMHJ8aR$vy0hwl<)elT~!{ghGCoz1O6 zE?%5;**X(1j%MYr>7KeotnRh^YZ6}M#B7ESw7P7@@0h6;;hmb+w=?lOl}i^F%6C37 zPCtVeyL8Q%f!}g!igm|pD_I$aSs7xPv@RxXgg(qWyIEdoEH9W0xs$G6R)t=J&9$e{ zYl()N8Okd$rj5sxS6mp5qPVM*;RuFPjU6YHSHc)BZA|_@<&_|Ib*I;^YL-V1qX_Kk z$mp$%-)!vAC_4vaq(rZ3xH~1wzHyZ9z*7D&^s1-HF9^M=(y!lzUP)};eg(bKYHI99 zujCu!enYS5j76#Fl~m*9arAPTv2iVWxyaOb8NCu{YQ2SCLGrR@?o*HEm?G}8Ok4TR z*~YJ~a-Vj>&DB5)6(V8iu}p}p+@D^?u6llXr|Fjjl$yc) z`Hms(Axd4${pn+>Cnz0p(x3j|J~LMDLaEN&pITGRUX+?>Ja8SQD!4y0Oy~bZsc}o= z?nr;?fbUy_DITS6;QnlokhT1$WmyT5{_HiKD@5rL=FDD{UT4ah2y6H?C_~NtIp8e+ zIjTh&3%R?~btq#wcXytXe0M=E%J`hS>z9EtPI7kx>Q8WJzs)51?h1SPZUorFK>OEm zccUHTyI+7k477hIcUJ@UFwp+<++D4md^ZEkV4(f|+}#treD^yrgMkiA=I&NW6Mdz- z|ATVlv<&WUBa|DbhtYE1ZKD-(_Xbo5PM^o!v*VIY|x7|wl-(>>v?ak`Jm6o=D;v^YJfW5ckeTx{m9+> zT!+)UxqF&ioT1?EX)|z!4N&qS2;H3YR^Zih_c8!J!RX(-VDxW&(luup2_W?f;;@lw41L8cv4+B2};-Y{a4Dr!GO9DS1 z!f_Cn2635yivgDby&QZq&@6zvfL;as)zHM0&m2t$dX&#>jd2f@&+@@$iSilj5P7bA zW;RtPD4%(=*kE{&%4abDaIb=KMAg=$D4)f^LsULXWKxQ>%JI26<#-VIaQEXyOokid zaMw*r*J&rEr)5k^PtxGzXmC!sIN42yli_%$0>ssk?@auH+ zIQVtmaNvlrE=?ibYo`lm*Lec}b>P24`PVZ3wTypl8RcKg_}3N#|0Bx3){`CtzZTO& z;n%{l+9HJLT5*IxS$iMD9tLq!Dr{O1H-#A|bc>sU?dAKE1o^%b#QuZDI)YAI++;(q z8%JOqEN-04-Sq; zsc3P$W;b2@HJFaJhG0Y7YxrHK=FjHgccC^T#JX+VP@b>t+tSe2!PaJZ=sT(^8v50= z_nBCh$MB^_&{^fr(x?FYZIVLO&;nDGbL$-|B;jhUsX1nm}nExaLkJAQ4DS_^v(7I<&N zYZEiYsS-E*VwyPBM#d$AsS7Oa3#Qgg!IV@Wm?HNHrf5nxGP)7yMxYykZjA1=QsNJb z8k1fT8!VUS)rt-I^~Zk|FFQ7*%n-|M>kAf%b(!YVo5UYHX#aM2uI2rwqS>2H(ht8? z_lccYo?ln_gJ?z#SxsU^m8sE9Y)*s*uN7fe?uJy3ltMdIj?mqZ%8^RC zsb>^xlp}QcpK`Q|?$8 zQLl96s85D+G;p7CGWFf*ra(Dr zE>w7~F_1B)JFFaoe8(W)G01le@*RVG#~|M^$al=LS2^Z#T%9ml zqD~k-s!kXzRwwj*qfY4hTAk1eqdj$;n?@a*2S0XoJS+{18v+NPrdD-aVw!qqBsiA_ z=MEffp^bXj_mcZWytwS*v>?1VfZ<|>y{}ALh8Kr*z3?Yq9K`x{ZmeJD$@**+tj`vj ztNuJML;d-%KE6}^xrb^Qr>%=apIN&&$EN=c;!A#}3A^12}d7#}44w0USGk zV~2aowtuL<01F0ig*_&3i7;z=W47o6x0~U{PEi^*UOXU5gT{*wL_b(_JXiEr znc8ec|1e{Zujn6PxUpLFA2D8fTl6s-I~AgDzNs}(^o=vzP>R0TaPyETRT;1TC`u|* zbCT$5ZrFcZ^uxxB5u#t5pqECgP?*q{%$#CN}RfZc2MZYLxha~!AQ!CtWpviJhT;gcF_LAsbWxV*I=v!>O>>>K5 z8Tvy+$;WsJ>afeyI$87!HuU$4elZ4mr%A?+LeU>&c8E&^RsijKvcQy7ni2j+zi#?(bJV9tbUWQ&Mt(&n2 zo-FL3Gl^1?;f6x=b2GJ{75%(St+k>bSiY1+zdloY6HFEwZiu2!l<}%8dV3lAXNtZ) z=CT(>NmpwGo-k8$zUXH*URogfLB7{T|0rYU8=`-o$?~qa#72K>v*_bv?EFOZPBQdA z5q)7h=YS~Hn3|!Mx{Q~KM8Ev{B0JG<*w{WI`umt#&x`(5#*Pc3|B(Jxo9N?Vy!?mg zgN;`mMPFOvwLwv;Fm{|2rNG9tGSN?Ey!f-|m)39~N%R{uw%-%|os5_N68(dW*Rn?Pto7ac<}?#zs%4VBKmil znuA4ei-ESZU_)=WD8cT>8d0)s011A!ruO$mKQOeBCi+#GT2n;7F2nW5qQBYD|E}oY zYP=jNdWV_XeMR3S1HDtTsr9<(J6KzrDf*5YFB(O^GUKJ?qCZ%h=n(y5j6E)*f1L4B zl<1$R|G-OJl4K(duWN{@5LS;iB@_s&YZ~@H5LQ<re~5mxp##e6TU?rVU( z=&L&$_r(dTEscB2gw=JmF+T~bE1>5jtQMK+j_Nli3oD1f#+Tv6mHCt82Uhm-1ELXDD!2y@u=QD33DXeW!b;F4c?m1SY~%;iLEj{-1iezZ zu+mEgs|71zUTc}K(v1!YSGwwil};JLN?Wb4QqT!2`n1A|cJ9H88exUZJ(vex1^iQl z6{$dnrLQZFa1ZG63}J;Q1VXq6ZvuTi_uy?tiw#%IksiR-ix1*5_zxm$&Henx0=@ow zgkc1t_z&DP{0AfG^CIG4zP~BUwr|@G~9l9X=NUgG<>ME!sR83dCqiVY99l^#W z=o44dRqv=eGq56Bey~>m(+Q-nqHAaMP*^Xd2Yuyyqz7Zg`n(D9g9Fsy#)AS4GA!mE z9G@;f_@1r_G!)y(51=X^=%J1*R2OClqsa_`8OY$vJ-BAtcN7`iYS%j=gHuD!0(c|} z`9VL(yRV<~0g}5W$`4@t{3()QRPB!B&$)*RnqID=+B~^}P6^3n4)Q}KHG3rMsMb%8 zgU5#CD5~j`Bj8yeIV=~+i@AqjAz5C?J+#k2vYQskt}UDAA=$Bc>P4ilo-RLhqB8Vp z`lVe+p9C^zLzN-@eD2{ZG;jTE?x6>}{V=+nTOSO~1nC3yFg>XEqIu~(ScQOL;KN7{ zGmUyR_wWtK>LK^g3vwG^RT{;rG#q;8!Vk=1#1A$0^22uo`5{d9KYSnh^1=_yV#E)L z^+orDA22<2vx4r*6e^hQiwZFGUD1~zRKOa|io^p##d+>wM7B`z9rth(#K11SicH!z zE8-wN09+L4Lxl=xsjWgqD1?KlJ&A`>0uHJRY0hep<+OJ45a}|WC+Sp9-S+c!+y*1Fu-8( zqI@o3M>>#xn6X?rPyXxGW#9Z;{%eu7^w*MrUyjIsy|+~DDF3x_<(D(%zZzPF$H>8B z>Ay?mzb-fAxgv*ASi8f&9pH~%k^kyf%6~no|E3Z-3@@2DNBZlzA6h0qdZ96n2=7!$ zXpZ#gwT9WRBL@Zd$g5%IB;*j*aL@*6dz%tP;hi%7cZu}q-3HBV;hhRw>Cy70gsZ|k zHRe}RkbPf+h9C!cKVCxi=DP5|kVARHd+#Iru9k( z)FKBTQ_Ti6HQX5f19AvdOOK-bFW1VCKBsz_M_>5~jq;<6iwo_ML+Yj1+|ktF%kzWL zRIs=5GCAi$b&ThHU=o|0b2oIaAm`@7^eH)44RfUAoFhynlXEtr^yt4b`h}c>4k*M` zWsq~JEJZokfFC0)+075JQoxim++=SdDK6k1 z=?h4)?g%N);2xRtNpVshDUQn}#h-JJn)Z^SJKUpo9VzzZ9$n8O#ml)z{VAlVpL_H$ zhZJw)9*rI*#T&T4`BYN$g!@~aO^VlXf7@n~;>FzGQxB5jh1}oH2T1XJ?(doVN%3s% z?|)>F;>q0K^R=YtA@{dO8YxzAf4`GVin_VKeKn-0o%=iR5GfAcM~a=&NwGj@c>i7< z0H!AXJ_jFA@>>~vK*?`K@Bt;i<--S*{FVuCJNYdQ-gfd^qA56t{1!~_`)sHvJ)TMH z`s^sJ;j@DoMW0Qi6@4~_2??hadNz_)?pgMT&j!%SJ?jl8F_T});3Q`9OMbgABEQ7y$T1D~ z*dvP^%itcrkwT8m<{ta!l4Cz|kC*3=V<)-C!H3DQ67KOwspQyV?(v3fa;$)R9Cd^o zYv3M#mPwB7y-c;|j{Yy;Ne#;2w|fCnblt5vvSRqTxoYwWMSNH(~>YQ*$HJ_mYxz+{mmnQt~-B zGB=r&EaygE)sPZjZsc_+9L^^t!+E4+@DM4f*+)v!(n(2NDk+I9B&S9T$f@BY!J4mk;J z{^W2rISFMu*_TC5cIn7TOC~vKK1fd193UsF_LGwp`^d?%405tKot(^1BPVsKlQGHUWR!-Sq+8+0g+bb;FHlqa2MhBe##ll&S?-<6TCjom}!d>nmaMkV<7kqdNs zja-1~wb6b-9=-1%k3NBaAGzQJb6Vtr4NWzsr2J!_6_PXg1>_8T4$h?IlQZxwI0N5; zGch^jOw?g=CL)`h2|Gm21ZR;mfjV-=CzG7A@6ox-XfW?$VIct%c;YxqzImIYLfX<&)DDdE|6iE;(JCLr&)( zCZ~1TB#ZmOmaMMKRNCJGTqY1aa(vb zN8tl73~ysE`>Ktyui6m%stvNQS}*&mb+NA+)K^(m0V%6ELdweWNm+3oDa+3#Wx5KHQ>BnHoJ>kbHKcT?fRy$fA*EgUq|}l}O3k^Xv?hm?Rvjj#6^BS^aTY1f*O5|P zCMiujNJ^6qkkYvQq%>w9DTTMGG&q%%25L#E&t6jM1v1@0rVH4T!S6=F{1$%KGglZK z%@79rG{PWQyBaJ7%=$0`RT^QSoc3V`%z&yj!Y$Tox)lxxdVHOKK>{Q_N>4u+cazgD4(v0 ziz>=STVZ9|Hb>ZuhqjtwV;_-$ib z{I*3l_$?Pc$fMK2{~DxI!~d$zL6SF^XDBX2QYu|80k<6=GtHX;HM8gcy#XIB|&KD27KEj$=Fm!(NPD$FL8>tqcb;+{th-!@UfLF+4Cy ze)597{KU;peiGy$KiSI5PYyZCPtK|2CwftSa>+`5@|eg^u^>OSnIJ!X(@B21OoFpD zp8iWIKV5GlKmF8Je!92mOP=hjDJ1)1rpQn8mgn!2pB71|Uw(S3{=>N>-4RJo+5Ke~ z(;;X!w2kacaIBY|o%#(*_9{ZjK1We^Vk19unJ7PdStUOMTX9-`wYP-+Cq46qF%lm#45J=CWJH7Z^l8zafy_x6i@9fe zoaASFH8@3;fm1*il;WnvDPB690t1_8InMI4uP4dRzO$E~{cJ5i`;D4aJ7}3AJ!{iW zO2qP%$Vq9o&eF5n{+-vQXZJK{BP^`h2>bIk!v4ICus?4j+n@JrOoKMT{=AK_KW`(f z0oYgu%>ivJ2UNvSH65vKWc%|r4%63V6D+vgByr;cu)z2em^%UWF$5}~aRSk!+&yc0$7dt49BYW;#e;*_l0*l$`eoE*g%Cm{<1uI1>ObUfK?dI;YEva zd}yuV297Ju!LgIM@i$EtGmeDebt{;>x^Mi%?eUcmX^5vetpDh$E;GT!kG893-qo|r8 z_!ktxHv(vbfO;9~0@MmUHQ^|oz!Q$r2|VFw8T8bIqjUmKI7%n*grk`UgyMm0p}6xK z)ExLVYW9XSsF_XrHM2>-W|{^yvq`^Zng%szCZlGry{Oq^KWcV6h?-q9QL{7m{G&Y7 zEZED>Bc{pEw@s3t{~w)RYjUGgVNLKwX+=#=wA;}*O6?vu4%6wf#z8tM*J!qrpMPZ| zKTmUzpC6*rc8yVVny!&bD*shzw}MSbx_-luM_c*aXs zEf&wXHzg#Ad2zLRH!&|-qtdp5%L5k#&K;bKMzyyWTx+r_+pJM#SMOD22d1d9eIV=w zVc^NO(Wnj$fdjrnmEhnYjYEmxqEl3dT$5FYoB-RTs zRS=f0DhN(f6$GZL3VgJx0(NJ&yG4&84cAYK?|N&{VOa5c*hPas3DTgvJT1zD zcAM7;elO4p6lg~V{-juoKlwp{{-eR4c53mbpbdL>odU&Yi0=k!P-dl0eAfd=^A%{f zMtoPHKzks@HdlNXIzX8qGP6j_hlOkSuuv^3bhp+c=&^0rD^O7`-fqso+bvqW{f+`1 z)8Xwy5EBZ)a1F`_?)-2lM52}t1zSm>G2m}gpi(%tj}HyY;6sCR`Op9j%7xPA($Yo( zg9{{dgoN~tVgW;3ey;`{0ooCWJK_mNfRTz0!a5hdy9E_uz42#M=&JAi5*7OBRRk46 z<~m5HqcTCxc1I`>q|FM|qAZBZ$^*X$e7J>E87K>0>8w8#s2pksI6no>Y~aiehj1Q* zfioMVWm9QT+QX3gFwhPI?QpLKDr`2CpX zPkzE5*JYy*U@7zmLN@vrbQwQzJW6zs{a7){e4@a_MUUrwLoy#Ku=F*_yraNtj}x8k zF`^TWlFZo({2z!^D{wkQDirwZ6C@Mt$Y+8b`Ao1QpXmltz=nLL4Xg(7#U?+lK#x0( z8?K`1bIm_Z7Styd_>aa^i!d>C2`B3DP}{Um(9F8_Y1`1u!7I}?qM0L0Iilyb2ikt2 z=WbJqbFbd^9=a2(pS~a62?Uds=#CGV#zS|!42R0l9e0`$_W(TuA4j4fzpqP<>W z)f9UH&8X4e`U%aWEEj;b3*wV{w!E3^o z*1n-v{3poU*;lcr*9s``qbzXD7LK?a5b`!C6sg%l-dcrXU&|B^;mBy+6i?v@bmLlu zd|Qo>FEE5PLf!~7lB=rwSr*L!+2~e@LIJ;@TZsVSsKT2tqN_l3=HLr-D}Y{8%#@Nc zphbIeHrnfwiS|}yBdraM(+1KwZKM`ylYn+qp~#^X5cs-5?HJ{mvBfM(s5RM3d=>Qzwtxz1(;P4?Z<%J`_go-_mD1#*5X>o*^ zCRyJ}ldP|zN!HiVBSDeQ7od1w!Qk6uMrafaV`M3iph{kOqaR7=mqY zYoYC~b40$1YFJMuMtK5iWXK&*Cqv$VZZZ_Wu0gQIOpL67^=4v}z$l@NVrCQ@16Lv> z_!w-8gal96F)t*z!LAh{fu1)gBsei_qkn0dkf1WQbP7AG6?(-GT@$!-geuQs?c z9rRY5;A>?=8@JW;cJKW{oM&_NOG3z~9}4!h+6nPm!?G$N#489LI|8wWp#mCjtz-` zL7fnir0}(Bp!?yrbkV(WTLzG?)#by&7G18eB}|^tA#5(q6gD>~tojRuEjtxf5AuX9 zF)704*$S(_bA&Am6;|WGJzK$Z2Zhag1wY|{u=z&?Z@pjGoWD=ltV3Y)|B3Y)!>h0V5E!WNt@Y_Wm9xUeM(HeU#vhZv4vct9s?hF(;!uz3W8(eanC z*+)h}!e%Fpuvy3uHm7C^8ygh7U5>D6p@MfkENp^u{3A=)1PAyQvpqZPsz zD|nwQAtF(!=ly{UHdG_-DERH!LWBi4iZg`>rvpOxaDfnBu~!IBOB2GQl7(=1J8lT; zRW^D)=AaN^3!_#cLRBb)7lWv{6d@uL(pG>E%J}ff3O)%WyRsxu!4X{$Sq4H!L1+~S z)q&6$5E={v8X+Q_@!3M+d=Os}-cA*$fpx3EYs+}S z)JK7^CJYRJ2x}t2P>8T5jmgaPN5AO#%ElLJg&9dpUw_Saf~$=72-EVX`%Zu-<{YYT zUm;8@(@$O{Osmui&N_|YT&k~GE=+@7yPMz~4Fe{@S!L}z0iN8L;B2cm-WQxBz52S^uSH}VHlNbf&a)6%Jkh@55iUo=A?fqDAQqecd z$x$P`6i5}@FM&#XP8vmX@|I`3sh<$hw#tX>%9N($=_hPoqFwAeA;JIZV*P~F=1;#S z39YvJ2}cZ3-;rH*iV3G6@Pq!97f8Yt#e^CN)Ye|uMiMfcql!pEnuK!o6WYz+X-L9a z#RNFrWDSY8)crb-#8=kFc9R5Lm;E)_IaIs7jO?tbTeY0*EQf`)BsK;X*^*crFy%nv zDos)(i3KAfzxz&j%wqFdtS7^93_CI$!7wt#UL~>SR_c9pPQU z5}Iq9LK&FFyADIJFNb&S)bg(970Q)BDh84cNU7QU3n2<+SQh_+&k_Cwj~xDmxj>q& zP<{j?TOi>ae)>ojKOKZj2O-lt0b2l9gI@)H8PJN3@OFb)yxm9+KN%n4C#$mf$&P?s z0nY^=iZ~gHIyne@$ZB$A4nGy{WNJ(nKQ&3qPt8&&BY*^jnpy^=N*zD7L7}7^!&z`H zKsN7ar%-N(pao((!SBuC9i5Kwj;;{x0nw5|8I!{+Dvt1qsw`e%*7Axg3g!O+X%I*t z#0ur`R!}Wgwt!s#y8-q9?32w~!E@UMq%a^w04WAYdlbr~9Ns$Z2yd;+;;oCdy!8(X zWeSkso~>(u)C#x@a3A0y@JF+FwUEQBq0ZD!fZeir^&*85Bus?rnCJ+;7x)3-m%;-` zf(HOac8$#DT{kF{`+*b;Bp)Do!IQIAC?Q{04#Fw+J2%E2W<8lbrV&BX<<6JQl!bcCM)<(^TU#m|TU zA6nFOXnV)uu|k=r$H8Nb2GTZ#5+2EP$YDCTNp(Rhv*`F2ppvVAWCl_dkjep<0nP`U z2{;BYG>aEPv-oLF+59vRT9d<1!$fQ=oA6?tnq`lvLn_#^f*rt_!d^hj*yf@(w>Ll#~oC4rxG21b-A>0|8zG zTku`8`Kb#O%DeFXcmc@=NWnl_uTTy{(G?2iBdES%sIxw(J2=H>TDw9C>8I5IsRBr4 zI)2((h4LAYih-m9QYzpi@S`9c4Za8XUOUMqb+P8uab!7*2WmgWk z3@|)EyG+1ofa3sz3_D0;2QusewY=SWg}}p?AQs*^V6oD{I|nbA)ej267D!MqD|ld5 zV4l(HjzVzC;d!ScJnx#t^YE(kl0twQwi*Qz1`^aV9|(RJL~T_F|A05p2i`y+sl&9q zdWS-A15y%@(twnw<6%T5ya6P59O?=nR{o}Kk zkf6%`pQLkvi|RQ4{v5}ZbJ)Wk7P+lgR<0{*SVUn(6j+g)7uH)uVO&H#>~QIf?^1a^ z%qYQ(IP=CmM|DOQJ{u0{jBQ9mn-SWK&}JOPCq}pqmFt>eJ>=<-r$gSPw4*xWKdvLg zhjirjah-&kq$qsu!Rzc%-2_-<0xU8C-=GuF`z;Wm_Y=^2ir!Q7z7$`oCHPV;g{b_9 zj@D4Q3?d6eR){k3nUBwnkfDJT4Ww;QK7xwwLX!!~(cKBSGA1;mK{yQo*5kSW+aX;* z9zHMN1rC1vb#f9uVHO$YQqVA^^N>zK!<3JxjIVZO6rwSF`x2Kfa59zg6&{Gw5{T0h zh|>~?)55>6%@A22!l?^gj8le&2HicXGnkI+3}}P_ zjWE>Vg+>@)3d8szorbR;4UN{&Xbp|lV7XxTab0kGl`goO%D7B}{Wu&xqzi^KQX_@! zaE=@}H^n$NrAKteXQ+G6V>r^JY1DX7XZ0R#5 z|Ft`O6L|$Uh6l)L<3V!z-U-r+1($92DoH!`UbhWZ%Ga+^dB`PSXH@=Mt$ZEh?|O!&JTvAq?~n%qo%Q;ch90G@oUk+euq__rzJGZ7r4WGR*F_m9o&MNt?NQil4N> z$A^KmZLKALz=X`JgD&#wglWQ-^8NNH!;t4wgGjxP)8+I-bWqfr~{3|3P^Ic1md++ z8G<+;;!KEb5L+RRrpn(Sh6B{IMkhH_y`P-bQ{^VA{vHiO;WtqDjkPq;fL5XK8<{2s z&p;Dy5_d-}ai6awueaBd*WFd*^}{sK;v%m*>&feTATEcv4B~Axa4y7H4f}dgAPcnC zlMbAr4pSBBh^B#gwWPyZNjlQJ?>r#?G;l>NIcKgU=Mo?;gSZ^x zN*Y)MF|2y7p@y8hN(0wblD;$-=}W??K-XT|?jo;ktR=7I9Uw1nrh$(gA+OZZz>07d zSnt6c-zy_0$Safvy2DuD=|iLieS0H2VFwF*6Fs}gXG;TbVI+M{1OJyT<=gcWyzfjP zZ{q&s0QtiYR3GH~V=U<%*W2}G-zz%OJNChS{luLKUm{FDw=cPqxQkq*cQe()!o7>A zKI5o7`|N&s_MT(9EnW4xEpWcOrK482rM*(OrJ+iWC-axi;pbPw#Q zA&sPxG>%r0#<5z`IPQW6pL)`y93xG^m81zaY%~oum5-kiS*voKPm-mT_&|My*Eetqg{WMJNF1sFik?HY#(^>mNcm% z@4ZEK{U`m6Pm&Kz{mQrG1Nl<+0%Gd8Jm(=|;(Mp<`a9zNKRo8?H!ds^B;TOrUw633 z*X>p0>t^1D>-TV1OTH=RX8+ndJmZh#o06XMpObGAt}if?ua9|lJx9Lb%M8gkdBm7)ulHnfU?lGIhq~>4(WwJymB9CjEkZT^hpFBqLK7 zU#tuwk;NAgur#vVyJsFTHNYhfQ}cQdkao&fR9)A*wS!Fgj;fn_W_Zce2~-sh7EM#B z`Y^grA{SBh7hK8L3Rm(q=?*xj%dOz6BXZIHv%1_eEDF%&mSUy0F1Ms!ms?z^%Pqo+ zG+k~!R;1~2v%;AAq|DT3!kM}k6G{m1jTLFSTyn8yKZ!B-^jUQCF|DH>s@A299gv=~ z9h9ERs*|3|KO#L<@}l(AFR0p>_s$`vo~n|bIzQn0lWu-h(&}Q+JUvc|JZTJ;m>WrKij|PQVMz&y=3p z(@PRbWE=1C2R44fO&gi^7ZO=Z)%W_7|4mGT!qywb`&Rz6bEd6UdSp8bC?GRS#c`x( z(kAJV22a0UdIZh-hxEv>N7^p^d}D6cvrO&IJ&H!%Vg7y6Bb_35)N|r>>Cuwj!3^oq z9YRlu&=V~5xP+cALHdQBBGmVPM3T_>1J@WdOK9v6+=$RvEHrNCPtPKXeUUmjqM5so ziYOVF{DU0P#@$Lql=i>$fE;m_-}pzA^`~x=BhGUNR1xLgASy>(5br)A*)tsVE3GZK zuWML))z;Vdg!al1Prrg4DM(lM4*pm3PNqT0y#wEAuMoaN{V=6LX?%zJ;Wad92@hFy zcsJjXei+y?zA63iCK^;AuXLB9=WLDd*$QZ(^k9zV2;k#{A>g_{%p zu2t{GqsROcQhe5NDc;r~#aoX_@s@fi-h5PwH@zsu8;?lw%3 IwZx7yQH|WIw@}S zpcFT9K#CjQFU8^H#o>`oTvv@0*XfkvI;y3(_9`jPy;q8BctMJDJuk&ME2X$SFG+Fb zC#1Nt<5FB{gA`YCOo}V6m*N~prMUbTrMRpkQk?Cu6lX;}*xwyzMm;9fgBX5s3hI&e zNwEkz7duuX#g00q*pX@}cDPE4_3xEpeJ@C{UC&Fgot0870)I`GqEFDEBO;F2MtG)` zqDxLlhpy6~V}tA4q(hlP32p|qaAeIp&R;(@tz z#1g=Q&P1@Fb2tm#y%+x~9ck$8za<@M!y|C%h=NDx(h(!(>ZBulX`FP#q-Q~wd#3(b zIx^%TbEP9TPf3?_q}=;nzjS2O_ePEM;&u=>u?0ms(#+S>9C4tAf3S%!NJj=e^h4=L z312aBWILPqnsmg)pyS0TBMZ9MGx0O&#RSjCPfJH|5yeX{TKTT+7t;cmA&}SfVrHdu zw7ph3+EFhZ?W~fHcDbaZUda5A4MR2p*(hXqW_J|lqMrXLRZ>0TH`Yg1O7&*QFm~## zkYP<$J@nP*L*{_27_t(`wnJ71Svh1CkU1f%gRB9^Zlv3g?m&Jgj=PZWgKQA8VaQN_ zJ?gK&d*$J7Wi}oWPu*%@hR{jOFe8E)W_OctWlC`vGc3ZFurei!X&)$4Y~7RAC{rwY zW>}7=3Ca{jVTKJ%+M<~7NY~U&4JBT>LYcB>IbF#Nk9wy+rC4rySKUz}?{Ot2OK;9= ziYe-dGNrkH|6C<<=*lC5O60rL@SE-l_bHJb>>-m9+18KtScZLD8WhW5_rwe(!spR1 zS0cLjPgX+(dpuu>Xz!NhDiO_yJgG#uvdM9~0V|O1QzGzP8>mEV_rCp^5;>I`4)&+L zu0#f3DXmk&?=6pg-)?v@C-yosxV>-RQ7nh4;mz(zxk?yf&i$l>8=I7HnMa|wROiwa zcEdZwZn!B@|KRVMnRiM{^w_*gn4M}*df)z53A6RR)2f6v3t-pagDpykvv11@C1lSB zv;U(^Yxp4XStY#Pv+>_bIKFJO^A{_|mC2i_*5{3ytW0+BjLG>cqNdrkOP79SP(nqPHF>=F&$h_|%j!GpY&-cd972}&!yM5`IM8(*JhS;^Qq3?g;EI9D;&SOe~FN_6; zU#tmJf*USYb}7Mim!^ED1mEj>d{7DY8|}fd-Cx!!30>W%-M@Kzjc zk7mKUy1%Sc;%zfo@NZ|>gR8s0_*RKmy1y(?;^Ew>U5Oh9TlfBAuM#)XeJf9i8-}Li zJU^}rWm>wwT&={l5qt0}c)jgS`<0@dJ)*>&put|x$8(f8%tn8vsKpnbGb*ZMucGER zDQae=5?4!uFV-sh=V8%3b+tzjO4@2Xz?_Bu1p=JDuTa$Hi#28?C~vP46nRy*PKhgTQk302wmv0p2S4A+sHbARqKxnhMHyz& zWkvDBZ-t_G-%mTG#Fbute3_ysd;l2p`FJ$0=i^bn&26P*)hP;|<{5YJ;b?q@YnAgZ zRt_t&)%*4jikwid$i^x~mSHysbKFn@?(tz7mw)LQqY|)N=sV_pdzKQg9a=`hnDIM) zwN34eV8;I%Scoo&g@k&4F+%wvvWq^1`wBRTrqlmaz$Ql-TX~sieUBA9+NXh=10rl!<;1F8qnz zG-O#A3n@SlHzgM5^&d*CMPnfw(2Ad<6+5`17+)X@c}8I&&-FAGDlx7}EMz~mhcvK> zk0?_+JUf#WdXL*+VloYB^S$!160^1E<3uH)pBL}D#e2d)y^q88a$zANL-Tdi5z0d7jl!3ICyScmM9yEvra6SR!hNyJ+Ce~@=EDCVo9qd<^ejn zpq|V^u<-eogSz=?LcN8niB02B>^)yp8;SRM|NM+Dxte{sog|dAzRM(`lnwrwBown- zpOA$7No;ZvyS11kB#27Qz0yt+GtPCz48va|iMh=s@#8!Ak7b@t#MW!9cbU9xR?Qi6T*&$&h7#=WsWk{B<)FPprVR}kx) zWS?9o8eb-CAPL*Jge4M$ZL);iC?xr!nKsdoU||~vY?CQ$ga2NzO;Rn1f^A-^Cs8|K z8#8Q^B-ERQZIXm-?h4y<3ES)uwkZ>~DG|1D2-{@AHZ8(7QKC{4*An9wRv8gCxGQWk zCTueznp7v`hw0>V{4l;l*rr0*rWxwaiymJPBg7>}$R1&_B4M!(Zm}o=i&=%mJi=mQ z!eZincJddZk`n0Cr4;p)zDyps^hgef<{l_()&h$K(mAudj8Vp|yuT?oI!P!8^*cyh9@G6Y2APEXeS_8NVM5@5`qQ*G5|n*G5_l*M{AO zYyM`#HD8nAT34gt+WEna&l#?^4{rL*aJ6~haKGVN!{Fu+!?n7L3)&3Vq>Gzp8Lr;D zzM#o)b>!k+tKsU9?`)Cbn$0e)XQ8{jj;V%g!3B}0S!hjR=9~7=mwM;SGxWQ8i~AdR zJNoM&XQ8iyv4>vJvCymDoOpvjkcNKJoBoo)pCt}w(a_I$+XuUNnL$h(_zi<2J-S_n z!NJ}Dw_$MDqx+>{(95QG7`(8=myG<^(0!GL-XZqTuRV&*z`mkkDwARiY!eL&j$mP- z%r|6U^J!QNy!;#3IU0uVxv)8Xx^lyyb%H&tXx*Y6EbM{au6GQ+yyB=t7PfuaOoKga z*Yb!^7WRCubc2P}VI!J7?4>IY7Z|P>m(MlY!`gc2WW(j*yr@)r*dImOFVYus4A)uM zMbBf@FwnVtfyBc8=?O9$26psBgc>eNh!$=b80Y-JT`n0I6_ODl8L}s*vap}v+R-p@ zY(;Gk3pe(=vJC?jen*I5U^gEg1KVi$bRjI}_8KV4MdQPxMdLHqhMobrK{X(r*2;Zd<(L&fEgl##-2o_=K ze`J-x7unbKw!uG&pw$NdNFTQ9`GsQUxa4ZJ_b-8ltK}lSgV-YwlszJskE^RD zkrOP^5>NZ@UA%vp;mXYe@*nb!$bU#%QCr3$)?O`4mtS{ZTlSXxdV^g$#3CLU+&D{q z-C49H*^a4+N-Dp;=ZJ1u-T~dRjZN}_@kaT;T?A2<4=8*a|2~;-<3Au_lb5_Nm@nKu zAQo;P5DT{th=tn+#KP?ZV&V3Ix_Ws(wx;bLt(5m4J0S1BMk6YZ$oso#L=|$*Bd6_v zyl*p&s68U@E1(ev@k>G8H&!X{yDd^9NDbwXcouQ^^3NZY?n}!d`7DCRS+GaE%EPQj zSa_KA&^nPei*z-9AxI~so?)YOGRmu$rIWki_+2`#)JeyKdESXGo_E6AGn^td;I~`2 z)UcH`B}onCE~#M;YdBMla|B7^Cq_1aXfaVf zHMSF4P1Z|VO&+`TXU#RtZjETJjqKKb&E=oTB79~RaU+UF{GEOAspcwSw-n7~X1Cta z>c-fY3$(h?I2Q38yOpigRnKA(Ke1aOnyZ1`nyR^6?29qYrLbH7(dq`Xzxakl>e;P$ z&DA~=b0$$NGM0Urrn#)MSfrKR%G2t+?2E58R|UKE8_i{6w`OQ{XS*w+G*{_#7Mam~ z{is&g)xB?>R>udE6vYRV6je8H%r8Yn)5sjAER~{Qwj0EbHN*DIU=~@(*U;OIeD{={ zV95zd%g6E)l9qP`e$6&S4H*6`ZLnURe@NPpb>$Zar40@~Vm55;dH)Leo3UpY?{fd+ z!P;5N#pwmwS@X55H0`Wd6B@a`4^F)!#`4I6_Q(>E?iA@qi9PZOJ&P>oGb{RICKidG z>>f$)<4xAr@kRK0KcD;1pU0IV={xw0jlS)2>B9lvny#9i0e8>C3wOX+C%kY6d@FlM zdcfT-&&{s_?p`>lRXG~9s^TWCs-#(~Ds^jBWi47&d8<}caZ0Olp4O^dXSAwg;N0Na zdavlT7tFo8cWW=Wy?>gm?KSs&oUiR|zWCfq$zk&sej+)7G0Q4BtbB^y5mhPGVIHRL zrc?efl22UPv8v53d=A<>xX(d*CSQ+eZ>RRiL)_0Goa4afpq+64gZ4B=!=zN7udP;V zt@b|itS+}RoJBVF&UjImd(1`h@Z2b`48hN_uCfqog9}j#;m}7aN#R}OI+0t7bX|=c z6^U5p3HZ#z=SFQlf_;!3&FEycQ1|Hz{xv^o zuR1P2HBEapkJwEmaHTEH>x9E{>A}4Bmp&k^+QHr9Zk|j{&j~NRCBjQ@67QI4FYi)b z!ZPY*rlZ*tUbmZ?Kb-Y|cEQ1(Eti6};>Q#-z0xD?lGLI1=RPcH!@f7~NqN;Q^koT8 z*iCQS$+LFTdweCXX1Tawx0HuqB?UI#>P;r^J8Ps(QT*B{Z0FZWq5Jy6Z=?caUdj7* zllSJNKJDYPq7mXXLGpG*&paD>8+U+XM?7e7vk`YPaS+GFINpQw2!cXq;&?Zr1rvPkL7d??sp)2|&eDO1 z$)kufypx)~K%C(;5Hbj8m}9k*2kfTXIJ$|W?Nz#@XgXyQ;v1V0n;79K=Y3A67*M7S z(T^iRG=n^kPhOK3pGEwdy1$%XU-$1ht=>O=O1*!qRlR?-MZJH-t=>P}tlsZ$Qt$UQ zs`q!DRPR53M!mlSTst^7xCU@8a87U);L5?3f!hwQ1Y9vV2e^E2S>SBotl%u*%-|xy z8Nn&wq*isqc#FDW%&l%1ZB{poG^rbg8`TZ|lj;WV8FfR~X>~*ADRn~!xOQ-Ea1G#G z;GEzp!0iTC2Cfub3Akc#4siM4vcTEES-~ZMGlMgMGlEmVNp5xhc(b~GtVvxz+NiD{ zIjOE6I-{=lpH|oVPO0m=TGjQPE$aFXaP8pS;2OZWz*U2*09OvK3|uL=5^%-f9N_Z7 zWr0frX9Z^gX9i~iX9TB!lbY0Z81B{#JFJuSH$g zze!!=YgE^ComAJHKclYc0M`!A4Xy#43!D>N1-No>W#G1hD*;yw&H*kTToyPRI4d{{ zI5W6Na7J(nIH^^2jJK$cF}La%ZB`v4O{!zKQFZuFst)fN)zNiYb#$Im9Ub7>!MVXT zfOCO!f~x?x8(bNtKEgEf9 zi$+eWMMG!QBL8W%$ahLD>S|SsI$PAD4sh+@+~69(xxiI}s{mIHt_)l$xDs&1;2hxc z!DWF<17`(i0cQqh0%ruLfRmck)#Huo>ammR>f2}3)g!0X)x)RM)&5p>wXa28-Q`wS zcQ&i5JHWMrbAvku&IQg1t^!;+xH531;7Y(1gL8n(1D6HP2F?o30?rK11kMOf0VkbQ zSKT|Kt{OY7t{Oe1t{Q1oR}HtQtNdTFb3b%1LF=LXjR&IQg1t^!;+ zxH531;7Y)41m^&k4=xLw4V)F61)Le237ip}3{E<&t{gw5t{iJsSB|!*D@WYw%Hd{p zrN2pC>1$M1cAZpLo;Tse&JC^soC};2Tm`staAn}OgDU}749)>AA6ynV8#pUC z3pg{lNN`4Q3OK1%T`}IGt{8KxD@L2u6(ddRis43eh5w|w!h1$t(REr~(RoT;(E+X< zoEuyNI2Sl4xC(H)!Ign41y=&D7@PxKKDaD!HgHyO3E<4&OyG>*6mXJTEgWxF3&)z& z!qG;xaO9*~ICMrW^q*D>eW%pIu2!|MvqdfJ0M`!A4Xy#43tTn03UKA%%D|O^D*;yw z&H*kTTo$-Aa8_^@aAt5Oa7J(nIH^f37;jVy#!jjQx6h~rBd67Z;ZtgXzf~>pwWtMM zZndDZSuN-Q*AC7N?ie^1I48IYaOL32z?Fh40apyp0WJ?*7C0L?D>w@{GdL4CBRB<| zbW+W~cSg-0JFVuAo>KEiTGjmF7B%1RR`Y$$YJOLfn%~)|=68T=1Lp?U0L}%@39bTM zIk+-#rQk}yZ3O24mk%xroDG~6oCTa2oC%x}oD5Dnt>%rNQuD@I)x6ObHE+bN<_$Ni zdHyCf&)2Btb)8i6&Yw~9I>5DqbAxLD=K|*hR{^dZTp76S;7Y(1gL8n(2bTrT2F?o3 z0?rIB5}Xm70#0gGbH`iM+%dPBJKC(~jx?#c!;NaL|D>AhJ)`D!omO)@PpP>b;M&2t z!8L$$fpdbZ0Jj@l8Msn#CE$v|Il$$E%K~QuX9br4&J4~3&InEcC%M(-wu_kr- zXrsD(`LCZ&h=AEox4e zTg~ZgR&zSQwS#kmI|j}L&IzspTsgQhaHZf%z!igYfXf4y1BTZ`daHE>-KdEMWj5r`7DvQ)+exxOQ-E za1G#G;GEzp!0iTC2Cfub3Akc#4siM4vcTEES-~ZMGlMgMGlEmVNp3Z3yjjf}Yf`gD z8`Z3llWNw`88yp)TFvsEQnR{R)vV4IHLC+$J2*GE25>HL)!-_?m4hn-R|>8KTroHY zxO{L~;L^ZZ!CAnW!I{7r!71RRCN*=sQOz7Xsb=0jqh^krRx^iBshR#(HPhFkW_G#N z%+6*tvjbc^I5)Us;9TIG;3~kCgDV493a$iPF*pafJaAdyY~ZZmEa1%GOyG>*6mVEK z@sxgjSEYXaZoJ&f=H0bV35j4+W*pbAD?hAXSN4K_UFl2ubz5=ViQ^8W+aYVlaWRe^ zNasVA>3wUq{FAfya=iQ#;VzXP5H6K|ww?XfBmZRd`2H#XZ_u-2hWwMl;FRcpW4-=( z`F}%|WQ&RWjM;*%!CM;m{K%H(f!a~|f0p4HI{AO3N&cUsQT|_E3%TZVlWSeghf^m1kVv&7)htrQNacx?12R9p$XPnCxiFmVV2oJm%|IM4sL%(!c9@|2|SSSVwl;LzVE> ze2hG8?Y$O2o=)>j3?pTho*VxrWm{cZSYEvr7IlPd-At!69UxmDqElK2>Xwm*Zda16 za79vVbS7_g)napsE?fQ&lV5VzfYdss<0_9 zd*6GG?7oLor?36bLWVkjS1e>(FTS87yN#EA*+6z9 z=z3{0UW0huLx^uSWgy42l}-5&0l?Xmk!6XU_9z@|O^-r^Mt<%!EzG*r3Ne{4T4twYmY}yvT<9NU~b-9L_BjIHtmJwX&~R?eVq2L(9_4mvVSgmH0>G>)c(2iG6KDSUcp;FZ8VTg`;Pa2+E4bx zKe6cwwNDS@p{xI1p=Z<0yj5Q`@YYVB%iH#u=))IhMcOBF+juWN>*T%o@+QjIuGyas z_~&k+Eyq6Ln0>lK&=rE-Ez;#8y#w@Skt*TSA=4if_3jY*N`<~=5IaTfzvtGNUcW5p zCpP^x-n;34Mp$_^-G^ZGZ2AoZiD%RQj`L-o{v86pv*|zi*I5H@`uQ20q33p(5y5RZ zBU;pF6Rn8KoAe-?aUZXHMmm>#Kg@H#7w6&&EJm74~&b%2AAMn}D z%TFW++|kpSiOa7>vzZI7u6ixto3mngJGjwiW^qL`SBk=0_@y!PQ9cA_mWk@KL{q#X zZ4v47dN%Vnys~tm8Yjoycv5l21_zj)O*eyyH9H%o8uD6>cN7I=D zef>|7obBCYHL*g&l^T-LO=n)q{c{bQ3731jh;@L@{0Fwa5^Fl0dAEuzZ1a5fIayFh zXa0*axH6cumPO&YV9hFpc^IXg7`eY+&D~B*)n;og~&*Y1EgV zPY)AoD~-D2{Zleo;J~U!Vm(2lzP=JLo6O%sS=6`LhU3hv@aV=VJE_^k5<5vsG&N6$ zXI;D5Oqh9o4OuXYnwJv0*^a%$Bsr0q*E7jOl5Rp&>?8{wqvl7Lw2matr{<@Hf+^Je zE6U8zhz_SybB*uy56Ju$YCf=H=Q?ISv^>4XZvNqbw!^en+u^Ozc4Xy*w%KQefY)~5 z|KKCq4(!OERm2dgWtZYf>e6-@c|&JC5Xfddj1BeLE=%@=X>8Vxa5n21RH^OC$BH>^ zSFy*mRolfasqHEaW3y^lN07Fwf@#OKT`rha+l9p-1=_C8o44h$Hr>^$tQm6hSFXk|{E60NLRknQZ@Uu$J&z4Kouxnq2+f7ivoHoo&-CgTC$ z)%n6hNp8D`oFut!?}R3jd>0XnNN&Tj>0bM+FMO~4mE+Am=S?J3% zov=qw;&m(+bwo$s^trb*2r__oOMEy_dSADOhbXw=551 z23%I6MA|RXks>`R(n%scBGL&wfab?#xf2Ii^m0V#W6^7Rf}*6vW8G4ol=v=6J zj6M1nfh_u0c#I__h+szv&3Kq4C01}r;%@O?CM4T==z7ZtmsrePl5o=(5+o&#^X0b* zLn4>&*p?Dt6y)v}q3%nB2DqYtPU1EGBw6x%cv$*skyb={yVnSvHW7-xj;9mGMS4f8 zHr$L?3Bt_Z#^(jR2Ju3$R)ksFDncuLN28y^PdO?1DvjO;pJVpuL*l(ryuTve-wtBY z-8{^a-y*^+Sw)y7M65uVrMxSlv!pF$m!@V(Tee5U?{RFJ#* zf5Yg@{mlQP&Gd;VOs>A4y)6BV3`M`Hy=M}^kH)-M zd_H&N$|FalnOiUYu2)*rA;K(Kul!<}wAvwf#rxK0($DUCHpfW|P2BU{!eF7n7{a3e zA+&S}EpO83dqPnGjs72hh?AkGF%x^I{ZX1wim-yx19wIIrC}a_=?}Ms3Q0tKGIJ&K zj9ke)NoZI@V**8Bs4fJCvd07qB_%@1%}e(^E!}6j9Q~GbUod~-6EjVOYr1)T-fPm# zcWKPL-bGWS;so!{&PYG|zxT}R$yA!zN@G%b14pFdG*Q+f^hAmFyG8q5qUvJNFu7;e z4QYM?U&9r%=+bWwkQ>$FeGjq6tn<9JgbbUmY&Daiv7Wy*k)hkZt?^`7^1Zo_4Bhj% zL&@-uG-hjW_Y>qJf>81ZKvZPHl4rf6!=-nW2e6o*W4VLA=&a|jV|qttAd7jDefXKa zsJ&KSm4S&J*Khy_ZRez zvRb{P6m=AVDS;;ry+dZT?RtkJz#h}4u$XhmH?vSh?~vg3Q(rWuvY5Z1T>~_xpRKm* zi^{R{PhXS=6{B2X(GZ$6m<>-WF*hSv%pKmi6?Z+Kf3Gie*Xmb~x%8`jXvDV>_Lv_O z78^K;#aj3Z$k^0M{pvSqY?jJmbHZ8d>S-)?vweqy#Xid3yHCF|ld{;IMiyHgz+w+a z*kc>(^(!fjeHG$&p!Z!G>p_m!rC-raW3NIw#A{w%-t)dqzXINMp3xVU2C~?jp0}px zR}2IF4*LxCD-xjcN6O*?>h-G;_vMa$z-Qy9`n)lAutuMIma=#$fW-$*WbqN~r9OSZ7|JxrEItm+VDUB<@Pa=Wq8wPeB&fkXOe~MD8%p&8-911O6J%$;t0|;Fvxq6SX6{di!Qff~Kqby;%!V=!_VaN7F1ez~)+ z=Ux4>x}F<>*g0@qpHrQU&3Fla@r=BqUv>dCc~QgARF?2XUw4JxZW%b})Z3$EmVi)U z%eK*k|2L6YCh*g3iR|n7P;bB2P4?^Uw%&n4{W8UKf{uk?0no!-7% z&n&YTzsxfkC;M#JFd^ZYh;#(0-0q8%luNGW#V5MneDxo zOZA!6qSj|4n5D-1&N+Q%61(-dK5G~?A7uw!`mFWLTO#C**Rtub*Y&XBEEgwx~mM_>BTlLvCg;~ae*lZfkW^4RM;p`bK<1Kww z$u#@y+4cJD6Lj{%o&(GESqOI3u165Bk%Rio^OVhA%+Es>eh+2nvm97=XrH~N_u{BN z+w6V!6@3=TxuUY!KlA?OA$_LIol$>+DTFTaiGxoSd=ho#(L-c)#pNe=k=4~la1U{S ztd^Ul%yc^Y7j7wYGo4-2B4wiL)>bKNDxLk-X=$11l$1S%&c1;B$LMSi@?WL1KSq8c zo&62+Ey$luXa9hV0XqAC$S9skVVbxf5*H+g zXyP$wHbQd>bNZgRL(8P-gHkVxKs*)?K z56FAqBXiGI9J_IhWoysLN92loo`#ViQ_5Z<1K#(jgYPV0ZIAHs_b`r>~F-GWo(7Lm5^~ zOL`A?xaFN?)5 zgJSWEpD%tXz95#y6pLjtnPMT#WIE?fz7S>(Y=(s}(R5CaSPOHz*KkTI^z)TGg`LNx z;=5uMOv4BFJs=gBunLCF8Q`m9HWi7rFm0ml55-!TB)%YL&ga?OT>r$*R%u-%yLCri zRL0gc$cswZr*FuM%KI*O3X&ID z*)`A>_StXbMP~M8vb+cZ1Rs$X8QDmxyhvevjq)PWDBBvEWShHLwza!uTgOS+R(DcP zGd0R-<|aAK(k!Q0-Ex|(RZhz~EvMz5k<*HhUyS?`H zGq`qe9pE~_b%FDN@q-x#GXmx|WMeoUKP}q`>Q>MeBidp@Tg)x8&C)8{tjM#Sk!_if z7eihGc`4*&r(|0>@+#1We3Wy5sh-Z}N;BBp*qLmuEsD+kxtYyqu9KKZh5}7MV=39&Cfa|&(A+C z&qu4~Zv%#}SWn3*w$pM-))_e^4;*IcQ;NZrfP>XiDv<9)z6<#cC|8DZ zyTQ4^p}#5UZwmUGg8rtUzbWW%3i_LZvz2li+!(lfJ%2kdFOYh!hsg^Nh;ORAz}Pdi zQC?u``QWO&z})k}SMmZ&&xgO37g&3)?~)fF*kGf)AggEiU3o#CDC-bqi$&QIQMOc+ zEfZxCN)cr%L|LaOTZi%(rwcGn7hs$&=xC7_bhgS1x=zard}rhZgD2$$!(c|hjDi_M z{&=gr5Myc~TtF{`l@`KE3t^>&u+l1o)UFmYJRlAhPGq&M{}shhH-%ih0+ z=r&pWYkp2bExj%8l1cXj^AWbL={B99R(W~Eer7dz|5Tu}Jw~lFaJ!*P-#xHDm4u~J zYfJ>Q&gKQIDS^zo&B&}z`(Di?Ve_f=*=sqWBrNMCoh`qQHS27}1IG?Z+s+I4c?1P! z){k7eMdjSLKt|`l0j67`)RG`L7%@f)=61I9E=x`hX30wt!wh$Z$%R~=yy+#PouJ88 zMwVQQ+YMd1(|1lG#tYrCZW4N!CfCE+Aqm_}lTUO@pOMh1G}$f6y}WAYLYDmBqq_7> zbiP8^{Ghzx``COej{JyBh7Uz(dWFtk&e{1Z0@(b++>M0(M>@Yn^vL0Bdz6F~3(n!S2Uws;!}zMV7iB1;M2k`#D&f%;>FrKBPT771NT zQ|B5?w!!3D+8`D zT{`}A+jQyqx!wW$f+;+20e;H|lF4Ypf_0*)ZuD)d$ic0QjVKpH`e1H!3R}Psx{FLU z@gJ~3;#4iTezEpNa=VkVg}TE18`(l*pRZGge|i(EbQ!`h3#ak&3*&sRj}fD{I$(&> zh3N+ZhK#iVL-KyZy$Yw{p798|?LVkn3MbFohCFRk4EN0Vkw9*5h1^$1Ztw7%ttH=& zao-x>ckw-2K_}=!`y{q7zX$uSzIXLlzb2DL`{n--eFyi%VRZ50AvDWRKv|iKL!kfIZNw3m{-*8ov z7+v^pUe~1mtE(Yf^0I|L^1WYyWxW><5^XI_4dBj0f_J!x3a>fpo=Ge zm+nQ$Ib7wxKc}h7cn$xCyV%qMu1r2mQ`d=FvP3QUT(|7zy5%mR)haBLgr^cZn-LSa zI$Kl?nS}YD)J=k&OjCc33LY1FjnMm>9CDJSR*S+$p;{8E6=5Kgs8tdbh;wEf#Ob!D z*1x~xah+|D8$GoZ`p)q(f!Ap2rORnw>N2u$26Rg-VfNH(z3=YRWt6a8l|$eQ&L86WvBO#VbY1=b2 zEr-n-AgY;Rf<4IQ37SlTZt}5~R@ir@RJUlb|M*RvZLHVas>>+GKq>3JmZ)3W#;4}L z?chE5)*Z&u%0=yF?_ZAV7S*A3)drS!9BumpKLuKsFuqe5)g&xxzWn4wV%P}sJnzSt zKO4*5)A|PvzNK3v#)9JGlhbL#(0yy7J#9=-uy5Lbxt6gC^jWuvQk$k@wlJa6$eW;I z%j11y%mJx+60-@@DBLuB+|L_!w~UV*8=jHaekpPsy;oq;jROa^5(A!@*=l&B zWh>VpJ8%i>7Q4g{F$)?yzIGDh(7=JEI-3|scdbHS6F2AL4#6lwqmh##k0m1 zDx|h6VRqYfeks~Ud|ST@7|!=(z8Ek}yelqFcoHE|5+*z)GsDYdV%F9C^<-jR5VL(3 z#un*%%D>eW=J$N`cU@st?}gv$3avfEC8WBmr|t_3vJ zx=_km{Xu(XE{8 zGwq8;VU96W{NE~4Yn;H+70(%&?E99cM^uv9sWg2moi!|Pdi{2}|Htq~r7$o}!% z@~`aaEz1`Dm8G9!wp6nJDoy_$+>bpMenIh)0E7vn3HHUYcoo6*4<;b#;$3k4 zLk{Ta;@!Rd26Au{UHn^Q?}7Ipvi~c(xCY*T$o{(!9q4`MC33KmEQjzCH-tEeOA8oY(KW5bj{Fq-A z@T0>O@MG~*maz<*0Rw*A4J!rww*)2%`0sB1&~EcMe}K37F5fD(dDJCu_VNfRn=5#P zl+8Ohu}Kh{1hEOkCNIz1RK~eY)dTm>&>rD?*tAFLuoX;uBp+MBv_~=#?M8dVhV5h8 zBT3jarafX2OcZx1|L|?-)gE@EX6@mPLUL0`8ib?>-lDXJDa|M%_KXeuVB3#0;~^Yu zhnFdB+jlf$S2uZ3+x800cn02uv~6F}jAy&ad~I6`&3GOjsy#31AsM zzP+j`n`R7|SjH!TEaOW|qDm)%`Tn@$sO($4J>wqV1b6&9x@02X6L#bTghl zYJ+1Ud{Ho-UrQ-A{%>wc0iWbrvVnVtUh zX|BodcvO3@Ig~AV(!iEHBg*alF#hk-eZ}u*Y?r1bc(x=<){%k3=~_cnzxxkbLlS=s zH@BD>9+NiJi6?y{{7GL+=B0uS(!8vz1rF^!|K(jR+IvGc<#;LeUhncH(%j*`cL$_R z%}y->ZnRspj-7PLp`qwP?ZaTP1#xu1wOc!TbKr2Jz5qGJP?P&F|cN{a>BS(C zS4Gn$7rAq?RllH1F!McMUNx02`GC7FTlElK@(~7!ylOICa+5nYTeXQUfw#+5db;Ee zzaUnvp-aBTXpvWo$KR_C(g?(xuN8mf1f8L-=*)ehlAHEmJ zKR%~Ruhf$}j4u8AizH$)T{?D%+AVBBP(v%o}xN)PInS`VNrM zuKi@Ra~~On!_m?98Zz2SGe32b(S|BA>fB34E1oB#J89;vN;0~QW`6a8ZmFwUx3uA~ zZmIj2wC%Q?E?}A8EDQGAvjVP^I7ztcvi2kiKjuk(Pq(?)r|l$RV?O!`36IPTPGVUz z!|hq~yes$X(uVt&e4$Gl8PpVA+UW4~r*vs!J@aG8|EKBO?|Ns=?7i2lS?}|VX%saRM9pDQ6W`Uar@wgm zS3^iAo+V`b5N6A^}Iw6OD{ewV1<8|}ME59_!(*JKE(w9nl`#suk}e`lX|*}BWvr}?d#g-F|pVXJv?phcY`<`cK9he_rXX{~w zSHh4QITQF&!jXR*{}GCNy-fSm=W``L6(1b$X|@9K_OjzWd)V== z-R!u-%#Jsev*Yz;?6_qYJ6^k!9j_^6$16+Naq|v#++R2^vC6mDG4nQd%v8vZ<@5eNEAjsR zq>f*KPweQ?uX&H2&*1%dKDmUQ4-irc@BQ=fykE~7Im3E=KkwJ`p}b$uhwy$q9|UWk z1MIv9HktSBdHmq5VD%3<$$I@D@7wx5?x((!U)B0FpW9D>O--G@DS>QL`rF*{X7wNrc9so$vf$w5{( zjcN_$?9?r)T`Xg#u2D_j#ZI+Tt#v0mrBm%v2|IO$YS)U`sXbKdd7ITeOSOC3*{Kq$ zJutCTN2xZnjh)&|wO(Nq)M6U(Jc3hcL^y(@G$IOg z;=j>|*q!X;Pc$MPgyMH;#DXGrQl}9wzs*jqq7mua*~tbPu?8gL*Jwn}Hg@s@8nLO6 zojgh-@~haX)Jk?Lp@N+>=j(;h3k&kNGMLw!!7x}1`UF4%$c9D;2 z*+sv%nWd+SS-K7~i=&cRnkty3{s6OB_A^WEK4z)e%Pf_9n8m!CSxjbTDJW-_yfS9V z+QlpxJDDZ5lv(giuq5nY7Ndz-!it$?MiH|FZ)cXkx0uC$8?*QpG7EkmEDG9@aDj%= znM3Hz0bI{MT+AL^$*#Su&asEpHSK0~^=4LQDQ9)HWvs4d7ptq>$?D9dtPb~jUBM1k zmuF&iS;ed_qlnd|ZfAALZ?U?BZLH2%$m;mNw|ozzKKDQ(wF=KztK>5BT1^#strqeF z8nL~cthz-bb~Z~#$*R7cWEIv;fF)W1OElrqj$v>>4_X&~#>x_+x^`+2W?jl)Wk&m_ zo7gT~;7_9_73mS5yXXtw~}m>&7(QF`Pvo_b_> z=gW_>9f7U(ISh|u9~Lo_(sFGQE3U9ge_^{SJ4#G!$2r)}Hh%Px9+|`~ihrb$%WV35 zw(}N^e9hK)i0!mkd!J;b1#>n{)FanKO!CztH=)k<$QPIvS#E6)V>=PYi7U%Du;QSI znhSa)sB0pzLOaqLamJ!YUKR~|(a4)ESAJl{Z_>zKsz=^;(W9g&lB`F$89&^nMNPVr zUd?t^MjqR&MNQSDPCe==$HxcR&LGX{Z9OV7%K1wzDy{A41-5fWM}dXya9rAWlI>`U zF8oo8T01XomL9b^+-a>IbtLN3>sr)FRgXIFqD8e>shyQ#Jv43gdTM@uG~qB z`kMdQM-5^^PK%a7U7ZZ-YD{!@f{wb-keH4w8N)|IB7T~oOK{erM+s>LN-#u(&L5JG zkUSyDT$(pjXqeY8B+PHk>p?BNEom~$%j3T0g@}?=l;Ecp)(MbeM(EK29(we2jAae; zJPS#Ir<)!fuISO=l8auzhns~CKAbJ=(Z`%7g#(;rv~Wl#SI9Pwyhpal2c)uUvs+1U zky}Y%i5rN@rJ~FO(sp07R8|4m_kdIsXqJjXAOl~w2-qNH`6W`B37^1EEvtphb--;c zYeF61ww3`iwX6@i0qBOHdsyUVQcB!RWM0Y(dUOh?y|w5qd&#oJGq}@iwWT1OGjpK z@TDVu9CqnQActT&;sLH*z&cfjn8;<1X7V+;gcPj~fDDC^OGZdRLJEV#Z%@+E1nC^M zO8(T+!EZCtfhUg+PCP|AaI?Wf0*+%uy%~`3yboW&kl-KINCxYu#s+FDc<>XtJRD~U z520YA1z*CYL2TU|CV6TN$4H)vNzIt2u|>7Yd4{k8Oh6jKOpcWyhOo*Opp}PJ+qvG{ zcD&dS2GHVO_ZNmh_t`I`K1ki50=-6KUWM2Qv4O@ch4_bM4WtSv_t2OX5c5T)EJ%2O zs(1*N(Gw_)$L0qjQ6P6D2ZK3u8jX1kpmZu#rIApA@L5htiL zclUxy6V#ad{qR{xW7ZuY`%R$L1hmcm4`|E=GudC)75}+3&yO!fjx+Izi|1oCn0~2Nsm@vD*nCYly<|dTaqpS{P zwJ57WS>;h?HbZZMJ|Fr#=(3>8K%LYYW==-Ec<7C=4@21u=z^gOL>+&$7YZjBlqg6)t+rA*#pJVeCl86^ztlE^>m+al@R5bG+FGQqJ*}wp(#ze2w+1 zDl*>Ut;c*}D_Ks)ciH|~NyhgykBTAV`l5MCMSp{CsaD?#t`1%1T<)jaMO+PytxUYxe21V z38J|PLnC$L%kAcAWTNM2-MEf7JTZiGkB;jROr!mxH}XVZiT?Rt`kcRgtQ(7MH|~>( zUDl$1lS$;#8%?s>B$z@6IW4mB-Eb#GH=d7JI6*UZbiG(CyEettzpoj;jQM=2Zv0p5 z;cvBA39pJ|*IP8!qvLO>vYK-Jxu0d%fiM1+B&)eCtXG;ZaWIA=e1GU&t0AKNNC3moo^T`rO>Zr#?6L+uf*~7vQSLj`!7L@l#}# z^E@y>l=Bn}5am1)HRV@5cDnw_^Kx$2XgzjGC#WlPo0`c=Ik$%Du^F73GB=A;SLSAL z#>(7OYwtI5ZZapo%uQ&P-jH*r(b#NT>=`*XjI&+l&al0GM9%f+OqjVIw#W=QaR`qK za$-M%$m8HviS$ z^{kwjY^^Gn69qkCqN#ZdH03yDdF2pJSzh@N+?UVN*qSPNMHkpG1Hgfqze8SW+$XOH zs*+cRf-Ext{Fdnd73Z*Jf}#@U$E)OJFkco>B`=4`a+oZK$ugKM%iJd~3*I3wj{%Wn z{!Tfi7aWd(`{dLiFgW&s7Sey0oGL-z1s2C9usBwl_AO;fXxahhLwGCT~4%ChyaUCj_yhMR$sj z$2M|8$8mwSFMpQD1%ae7tVAA{zh53#yhHZM0v+Q+Y~7shaBB@`Fx*(SZ7InUInr+FlU2kJsG6bn&IE_nb z-?xBmNfAtlJ)8+KZUtWUv0S86fmboc{#mSHb`mtwpZj23Dg9KHw)afuF1bpWu1&k+Ff6 ziFWNa&|a{9!Dd7*XqsTJ2B9v9-~7Rl$>hRu)#)6MT{As49mqQd_B?w%CLUWM|pxi3Ng4r z?{)%yAbVQC>IC{Ir!H__!OjVG6m}G4G#}JcU7(=q#d#~Br}Duk>Uz|J`N<9*sUgV2 zs4v4$CiseQ$E*X3Q76cSyl5OaUrzdha!3K?5NMb`Y^QOUP7NTf7$SYx1*WE^BJ$xm zFh6lFC_6ZxxQqqFUsOOYE+i*1ar@%RAIz5awach5q5PA>- zMmnbySW9q(_Uhqi0FHWH^|;RwX%`~yLGX8J+;xO9gis!WF3Jzl6=O@n761k)I6ei( zm_msIG1RF(wAT&C=&Dl=s2gY;h@nndpgw`aBn01s;JeVKPUCvfWN!`(O#F-b`m7Dk9a0`RH$S_K_lTw6=sC zZJ@J{naRB7C?-AdJ_T}%Bv0c|^fy>>4TaN7&`-eoE zv%p4T$Pb;6mSQT(5IqA^QHJOsj5G|J%zgt={ z!&RTNiPQhAsMqg3CatjBZoVz881T^N?9{*hSz75M>eYyPL!w@#s27ZS?}>UAQLjqEVBqMip|jxon7n0GS8zESM+#U78S;P|N!d$5=6sXZX=85Bo#VlU7}CkOKvt&;=L z1JY!F{swlk562~)?1>(bCd=q4-dq0?tTlK@oBN3F<0J9hCOg@dxX@VLJJXv@c&re(c_?mbNBo zUrfj5=cK*$%|0d4K2KY#UE1d}TA%x;mdop`i#RRU|QHtZ|VAxjP@qd%Z?SLudB%IF&FD_51#c-^*E8Ev+P)GDJ* zoZ(?~L7_4__r|mv%KxV|(Y+EBZycY0scxQ2v1@AOEH%lM;v*Mks z(;&q=VT3++W1-?5(oE+n-hq&?x^S&Nw-DX56GLSsFpCpAjJM#-4)8M@HNIb9JL=Pf$0a7z(hLt^Nwd;RsxOg`rI#C9a9u! z^ZwIK2@J+8z7iOy=yUt*^|4Bzr#|{G%6JoJfEam=&i&3qpZg;^@4r{lFDYI+ou|aU z`>r<6y=%d@idTyMN|)jl#!u7hDLOCC8tblj1@e>f^5Z-k^Aa$yQ#@-qpN3~mq2gJ= zc{JvwSwX+x*&j*g>hm_}Nxvu_#VxD56ps(+yp|T#Rq?34wB-@S!{Ipng5uFA9Ce8O zuSb-rL)FUEfy2twzIT+V-R~+>oA7z=m@>75#!on;OwF%Rre?t;{XJ!BDvFaK#~)Ru z8c{VAx?p?;qKY4?_~O&!a@O<8l%aQ&DgB3(DP8!qA5o^%L$2eo#!m(`v*Mn_BX-|R z<6n0iGb-+{(fI7c%J`6-ikiD!QH%E~u0uNbEnEjciUJaqN5)ZPkvjdRJOWCQM?jwP z2-&Ns56hJCKHHV?;DG_TN(#S)$5R@=wK?Q9MGb|)G#X#P8FEzrU5e^ct&AH!thjbN z-hZgLcD5h5t*G`W@bttN3Hp>l-l>YzcJxg}e#q~PDXEw8(iORn-y3p|KK-I1x6$}Y zZ$19-rRet+xt!As$$9O&M=G+o1LX|Pts$qj9sO95ldNBTqf7}Kt;c`hp~u&|>+uez z$M?AD@%LTz_+J%0!No;S811Yl1dh-XLa3e)c|dVVqzS2KSp%-8wxSCzBWlA`TS~=tX&F8x4S)|NOx(% zu(y`*`(^wajmpCPr@d%6tc(iXri=;#pNTKXOgtdVy(6Y7t}$)zjUi<8}Xk;CZ{{=FOK2sE~j{K zvWds*`I6YFCxpj%;W1cv^n^zuJa!6;8ev^0yqn?OBD`bRGTC2v4-?)~;C-_2F1NNm zAg|95BogU@P{JtWTtQ#aBj_tYW(WETMbKCHdh0JfiQ3N!`ifdXUvaK2%}&<@Wd)0QbT8A6jPG)b)^MzSuW z&G$*x6+*NZtFRr%b_UyBto&h((`cg8RWB23IZ6|q`MT&t*J!VOTB0}65+BEwzMJW* zXkvcLXD>7TeVVupx?*g#?Ox@~W_Em3#jLea?6RI{wSTsh>5gkr3t6*8`1av!6p5Xj zjUw?&9@Ui^9@Ui!Ar}Zam(vVf#)Ed^zc{x;;(tV$i2h29kduX7sE`wc93sdWjK;{R z+I;6IzkB-paWS9H)aFkW@>4>Nv^PA;E<1$x20A~1lNekUBnFpjc|-FT@}hPN*SFVl za)b72q3IGDhtM>NhRmFJqrJG@9LCzyMHprgMzN5ygq%M2_3irnG*0y}e|;3YuFV%0 z=1QlOP(PMk5nO|}bAkb~j7SgznWIaW6m7-1~QTb5rx*z1+RP^l7YW=SpPbCa5x&r@o-;<>;go(oQh zBvV|r{*J%INQ*K=s%eHuH4TaOdR?FY9S4(-#af)tq&S)HV+VxhME9eM-5Znna)bH5 z@O)J^+?gCMxz$F<=k=EW@SZNkg!N1sc5j>^mSbbwX>qr9^Cgf|d-y&t zOuBEMA;{b;>4xBhW|CwGPJ-2NxgjV3mihXO|2~#NM?CB`BZ=DRBO|Gl11$G;Uvq{>@}OL3-i`rYOsF-Ji(;B zBn{KtX|n#&&m_0Y3Y5Rx9zA6~$?X-I0ihYS`b(|+)Yaqqg8Hvq0$uL< zh*kwUTt8m5FR-BneL=S3w$fH{LB!^;KID;byF4Vf>2VL%&d7KO9A$o1$;&5_HU$~NL3;%#=OBK*- z{g<_kE)Tl?G38sA2R&C-E_Zog7k=x7-wL$9RkUn^RK+3O79JNRHNvX#Rk!o{!ZUN% zAl8epXzzHl#^u|LE6c~b{2=;l;TIx;J`vydK-tq4{vYqlB!%mfMhjdz2QN+X7r1ow z0+-HmHTH9tUelFj8(qE$5}0(|yw$q`->^4d$RdK?Tp@$nE-6^_rZ0~+={X!P7Sc-I zn@M@RHa3%$T-ykE|H`8x-{O38uT=3g_=hL-$S;W3q; z$=9Cn7$!W<5!z&-Ra7nc56PeRd*2{O9!y>?$|RuyfeEl6yF`RRLUsr_K*)cbksp_P zd$|5yijalPy(I3Zf0+B}_u=w@@Y(Mt{0DRW9iQ26ChN&rQL>^XZ{>CGcnbe1;%rQ! zm(qn^5qjbCyFlJc-v!~E_K9<<;TC!aUQWI!>ZkBKBe|X58Oi?;`n&v& zN&dcdHdS6EfjZKIr2uJ?Q81-8>#~Z@iGRgd8K}RKzhXdKFiEk%52T zzZ>ND$0ARDe=Hiuz4iujuV429y2a&N&oSDfsrGX(x_q10`ufu@-;yuJUv#-+AXXORIFz5|qS=sM71C>vwg~Bu=>56xdv(0l191mE5O-Feuh5?m-@+`> z7fD=myN5?~+fKAa)!5Er>%evg8>ZS8DetK@&fC z>MC9Q4;aqt zmAyQyUo&{wubI$(BCNVZuy&%q(yrg1O#Zrqz5-gLS8ikbAAlKG(N~!+t0dT!zKZx> z^}!bEt-t!>X#Lft9F=f=B}XV+Z|2y7>kBx};QBm{VYohnV-T)S<=BSnlQ}ZsdLu^^ zTt6e+X}tbw4o5;+-nuI?zPMy05Eu)?cmQNQdjXk&^13BjFL= zJtudUdk%0MbL_aL?l}VeFegNm1n+Rq>EvjLIbou#)>V7;U5Eb zp>Vff^jANl+N&-2B9XuGpDglB<5#%6&(wc~L4Iov{(O|;B{AD!meo*(lqlm?SeF|IMV5$ zw#3hl0q>u(IK*&9i2fOn1_j#PF<{C5yx&qZ+a7Xi&&zA@>T| zB;;-(7YMnNXVY}~i)@biyQH_I;XdJ_b2LHrTOJ5bxugDYFFn< zt@avqPI{RdXWpaE?JHF0-l9vc+dp$sXXhficDke+`nqy8UIJR&w{*#uW$K(ny5tT& zwb_7UThfmcFqWyaiwo7bG`eIkoSf2@{OqEokTH6SYcrXr&i3PQa&tZQsB!4qdHI0D ztD-3*If&WZAr7-P$8WznyP!yotMk-Ty!WW{nh;O-ZZ*z`wlQPYtj>OjimALCX7}?m zi3=-I=LEsB51%?snYde>oy4!qTpxbT<^Vx(cE}zzUPfljqcAx_Q>K@z^8!)BU;kvB zI!|dPchouMHd3k116sqM)Oj8U)w#pstQu&_GdRmZT#+p9b9S|&r_6NIQjlnHTaA@m z^^`SQ*$ypblZ&46CI=IX!?R4YYOKd`DL>bkMxfbcAOT!8=JD%k^yKByo%-!kHFgGv z981~HFH4MPi5h*7rc|S=j_IBMP-FA zCJ3cB_p32}{H%=*Amf>f)EFOJCjZ@POh_{a-|K_638(ch{`H8h#`XC{zeiMKfc2AZ zHBQpUZc$_7_o%T0&E#D*1~_f8T~__CsxdR1e6FYbZ;YPmx?hcoX?uH<8iiXvDpNq$ zh4GGv>bBkghxYsn2o$k?D}4c?YG=(+fXRBa^N6 z3u;6^V#3!8H7fPuD{3TA4kL$bpZKbg_|BxJ@m`LsY3a;UBRciyST)iYpmsobOMM-s zc_nJp1Da|YqowY_A<&IeYk7A>`?uOU)kx1WHKG_nO8T3*YD67E`-C?Xk=6EgqpDT& zJ2N8L+Vrd%e$ICDH#I_PZJMKM>5dOGFmkr;|EH?;YFp-Mscjs3F2djX)~BjA)OPTO zs-;pb^@jb^J8FdI9#ymNQ6n;dZ>)y*^WT|>VCz>Os2a!-Us1J8>zUoEX6h(3tC~;C zmESPm-g516Rg1CR^iZ`hM{SxK-nd^42YWzRpY`h{YFK{9jwjS`N!3&D;Jp6DzaHTU zEnSsrSg{^sRl~AcEnlgjDSOoL0z9#+VIlj~uuA*6|EQr@n3wvn)nQb_25dJ~H8dAC z7Tr5p4G#^+n!cr;Z8iOB=peMUEuVg@hWZt%;U<2QhNqXP;hX8w3H-|u9-}ADSHlCf z`v!gKWUIVN4NbSb{e~LuYgNo@D7x*e8ulAq`lRj4&(+X!$LSU|tj6(Xjv5xzst2i| z!@_rnh&7k$OP>;DVYZvg)zATJWtbY;J!i`ted%*?6+dcAjV)JHHB8}GD6|)#bco_5 zAjmb)rE35O_bgosl(&c(fPj0MXON(xn#x73YJR59m@IcB|;pFJPAeyVvN_UT8j`OTUJu7Ml0zQo!p)2BPMl z=+d81GXMtdbm^}!aKNCAra8gD2!mLf<_v=v9GY;rgQmISuo$3pGFtnIrZH5_rfGht znuV(CXxcPr&d{_cp|L=7nx@S}?O@cNPSc)8?I0LjqiNwV=z_sjnl=Xp2{3q(rX|22 z9*1Te?xtxk;jkR7d7#c+nwE?@eW-JXrY(a}DvXxWv~(Dy;IIaVM`+q=99E+aVv1~n zs+p!`Le&UWD7Ji#OSjFFwisk|FTCXGo|N|O1skiR*cvu zwIajX9td!0ce@{-=yLDl6IAXFK3V0KDdx)Z#Jm|s-D#z~EV+i;CD(duX@|t*mn0^? z^7-Uf+DBsYt3XVC75~#?yX0mUQ(&HA3M@%Xfn|s(Fb_V3<~9JY^+M`{)a9h7-EAf@ z^32dfa*%Xb4pOS+AkP{($oGgGOU1VaZLbiqL{j0a#mgtFmx7GDi^92GMPIh`Q(AfNN z49u+QsVI*rl&5EGlc#4wHUpil0>=Y59(qfj{;*Jf%6FUml>b}uQvv$Da+ye0v_Tx! z-bpeUvflYkmMX1xqh+bq+UF&cK4bQ_D+=vmfZg& zON#Z*T3PDT?|mT?ED&8KlX`342eRaAz1=QL`PSZRGBH|j{~(h->)n+y8L-~HD@&f% zJMan1BAFyud!Lo11nV75mQ2<=V`Qn^dZ%3`Ko-9uOAf2;m@IW!dk@P_itWo_*(uO^ z2eq)OVwy~nt$mASDc9OpB@@)0Buf_C^#<8#hV9E6vQvVsdxz|ltYYUZnI;W>|$@hxsIr`15a^8b<5Se@(4$(Em=C_^LL>5AtfU4pa~TJ((n=qnOhFHpnt_SUvX z4U44~`~Mkcr2z7WJsdFTE7AqZ%nX4tlPpkXDmlu`EaCW(F#J>y&)(Rhc5OxBmDIar z%lPFKFkv4dO$f|ZLvuv%HnU+%i*arh?p#dPICf%24V zCL@p1mDNRL4yFFd}+NYPo*>N84 zzaP-_ivrcE$5~Ie@fhVQntnx`f|&yYCD(`(5ok#+e;{Yo^6+Jc2w#8-jhtb7^pKR+ zDDbm<1%6h*4&ss@ftj52F9g0*H~LMd>9>Jx=4)@*!5%7aNsmmX>Gy#6L`L4F=?|`? zt|DH;0`{rSek|QEvwwu1K4ec_#~zvtdIk~DOg$XgBEv_Vav+Cp%9wg3eVHM-o(Jr; zie}6Z*iJ^??F!ylW)oKrj)UiwO*3Aw^|%<41^ik@WPAA{_K*myrD5UCQ*>Y!su*TD zc;7x!F7VZcc{lwkG;T*}#yn>|<0alBzgD&#x=(&~jMg*$xX6DXt=01`^sL|*YZgow#_fjTIXZ^J)-1X1CJgA=)0qs=_r&3x4@3EgP0;ww5puaX9wh^Ma zVBRsx44k)N)`_-B$;2x}*w(^g0W6n_9?246T6oWnD&TFX$Xa+UhkNi0fMve0$^ht_ z_F5^9t9VcSG$6F8+-g*E+u>L#&15ChrL<~1s2OJU2zappJY5=QJ>)UuNonK z*?f7`N9_D+3@O6MmsfD-T|>Mi*m>};qCCulP!v%IZ^{A56#zJ{0l;zf0Gex9KyrET zMh9o`>$vJs;Y$)h;$1a>v+8-RM(e~~^!=jAH&9U05zaSOs0bMl<*;oK9 zTNTcYN5=3VMpL?Kz5v#$7rc_csk+1Xn1)6nJTPa0+pUP*tG{8YgGtDd5kzi6dX*e{0-@f^JR_>ke*AOHeL zY5DxF`*Dc>;H}OQ_oqPma|Ni}>aF7HnZ?bNY8`b6bA%6@!` zltzGUO%(oPD5c>EAP1`|FRd{U7K-##;yIwt1b$&UKieU*I9pHAM={r*KVg{Z$HP!8 z+^u^kmxsc5-zolF<1xs4YA9F)!xvylX(7B+{v#OCQ|>`Lyg@UsJ1BZw&J-6om0!Za z_)8mWi2EM8`Xe5gTp%u32sd_qmaaY{yoK?$WQMp@AeCvW{|!tsL!5&{lok86T(%hE8e6&+8sbj4>94zR zEI3n^xBj{pM`<$^aFjL^pvGRG+5FWvq1Pf0kAiuK$O%s|5+nDeNf{aAiVkGvkoOQ>FXCDHe|2!%Pl0Oadge!Y(2+GX>#);8YxYqYo3KghJMfC z{COS9b-LyS)Q&{$Yjh3Xx|R0u5K8+>oGK z+OCZ7M>K8ii^5fx2nxV(Yg1vFY0%eh;g)3zES)a^x^8VTwzJsmaDBafR}U#m$5F3{ zyO!53vxxJq=QmGTBg($R1(iyga6zR~KeQs1birCG=@IyJeXx^C1~_hA$sotUD~aLu zrn@xLnO89l<6e?VQgC-krXk#EQVGAuq!KKLhdv+YE$x_0GanJ2&E9&ZhwxQrFYl8| zgPgR?@ko}FN}r{f0VPst46dbAx|nAE83!oqwv<{$QTkmmd9nJKP%W;*6FrNc3N`O)EVG&2YDj+>gX zF6rB_(1eP84Bo;?Rq-O-Jy^VD$`I-9J>FX8aZU(zHOu-{q5IW5P6%~1 zUsxE$d#H+5r}@&;!S?6xOY>4LKet7Cvi_3O$I_G4LbeF`#O2f=>4m_{UQ47WJT9%B zCcWSvNv3L=R)?@?1Q-|q0_s9|p7Zy^QISP@mKPp(^sGq7NBQL28X#}W&m{x4T7E8#W+ecu zTYfH+KNoyIgmuy6d%P^$MZO#2SxMg(^XH_jg<_u;t@`L$Yuh3xknix8z?D2ufc~vz zodBQ@`Mw@q@UdtHkK9=ujuU&yw@6rVCEu5~>Kn-S$T9%<*Z{|Z{SH0yU4eVu{VU*{?6%%to50KYBI%I84d>!tw6Tb^a&C$jFD$iR>Fb>SSS zdWS(Y=0YAB+v5U2;}SL3K%XAj-I{fE?u`kRDK!mv~}qR$Ur}e4KFmd z>Oc<51&Ua@^-tA?5XpAyk|6}IiS8Og{F}#A8bX4EUJ-gc4?-U(^dTLSpCaBWU6*Nd z1teO3ujC>>XTS8!cH*7N2WQ?H9T>EFXLS_15$`-c^jWtdf*AF6ZweSO{K2g=cg)#I z#uS8;KkMt>>5_jj%nG&nJWfV;wLP(ojL8%bVSxf7>;&*;4I%Y5>SPFM0_vlfP)1H%{8I12u%$3h7hCBWD4La0UdT$fLNW+;viP%{kXnf z0EpGcw*t_rK3%{ARpJMQT`b2!M|RPKUmA8X9}6AXMSMR150uG~nJySPGSdZ+u@ZK{ z$IQ-5rt5C+W@o0+b$8!lr**pSrvvOvB3(DUpPh-L>we$I&S*4SEoP^0(QF`coq3jK zk2%QBtfJWy%GsH5G~2(7o$;dCkMCk@=b}c#xg0qS@8u>~tK> zez%OBM*JssvD44e><@Rc(@)Xt(+F-d%|3_VD9yg`Haq>I#3 zdO)+k1l-YGnti8=ovx{5r_B}Yv}q4J4Wy*gfe6kQ-6Fed*;ogAo_#h0-$M3TU5yj6uiiQl{b^~yn8UlnQ5X~Cm zc@zx-(d_)UG>7m&8t{s9GHU>wSp(qA8USb3AmGdz1f1D<0cUnzz?q#FaAxNPoY{E+ zXLeq|nVlDKX6FT*+4(wvGW|$%oB_(zDeF0|E_#mli!n8N&ZCageXPv4?bs$(=6NO6 z#>$kI_8Dx~V5|KbwyS#^+hsORe@DytOSsdMdd^He^>UcAd6_bd7c0Ffq z`+iSW9MDma!FCNq)cje?S=8EUXS>>I&I;Sj*=$#J+k0QLUA>D(yX!ftlSg}MIh%== zvm;8{tL5yUT{58O9J!pcmlZdvdd|stDX(Zb=k@zfvR!$trzfz|a@1-8!VfD>iD_uo zbFLaKE47@jIBwBSGsi93Sse4}RW0XR`25A@)Xa8N+NA%o9p$!HpJ1l0&Q~90rWhA} zy;C@usjYvc<+>6tbcumnUZZs&NEX&Ys4&8EFWg3;6uz5V4eIIwqwBhXE$c*Zmand z+gTtqLqgN*_`r+p3=^SuiO@ljjLbu5g`B5Wv0wStn?OLKl>Ql>H8kr z>G>|(sT^fHr6a6#xQ3MuRkP9oKxFkU4ES1KKfgWa04uJu{o{3Js<+;WW2QRCi9A-C zdL{KoRvZ$M`cPk=axEf=75j+5hDCD&k<*rF>sO0f4pFO-lPM7bNX*>-f4v^5ETR??OjD^;bu3_v8s z2;ib&YcD{Ja)sO{HJLmZP9Nk;$cvq~jnn9SGQkf&S7ZWG!X}eRxHw&8QYUYCVjSHtjyF8vKXijXZ*0RZ1C&AJ;#&FUAuH8 zOjbQP*oqn|H0^Z5C75;r(nj0R$AN7&d@qcg=!V}N&;2gD4kBY)n(xOEh4cJ5z;G_0 zZb!;_zGgXhP~gGbq8kmip0DKGGZZwF90z7I-ROzrKRFjK)PU~dYkTki;)l;bdIb+*4l2pM_M_8LJ|-Yu+=>T&_W0xApkvaCK-qhadhL~&E!nj0dgh;5Dgq? z0r&|gaf4Z!Pz~4MP2e!p0*9dn@CQ;jg=$Hk~}4)7EnXjKN{0pJae{YE!# zH4(h8i- zkD{Og$@9{RVes8bD`lba7p9%UG!>@VbmK3gpj?;^2~z?Lb$7cWDE&w{iPSg#$6L=G zA?vvwqxIbJKxmbAH*xs)-AKWS)N=z+Ivu6YqcoP6?yKab`>I9h0+gnpbTvwIMd^Sj z9o#SNQ*4QQq0dMaK#VfC+y-4xO z?C~3^c!ls3)zq{#qDhDI*@aK8r9-bWIz#YFx z%N6&*ww+HX<739?d803FUZ4cje1W{^fXZg_of2T?=mr6WG;b1*DIib(0xf02^l@)J z?zswXx^RcGd@$?^Bu>= zDDIiNl<@&X%loD4rAdm4>8KACwH`OMqFS1Pn4q4x^4b_h?&P2Zo18c(fqcSM-!uw~ zLKT-Vs%@H7q)eGiH%;YN)5VEydV*sgOhHPv^RVN%MR6W%P7GG2cor(o-_lJ_jnOyF z`FHwoBobsctAIuLVEmNy6J3Z(vOOvhkpY$eYRb3 zSwuHoz7|@oIM?$ubLYDD(tj(4e57qEhV-`5UlfC1ney0gbklX)m(M7V4RSb($KdAX zUgfcF?)ouHt9738*a^Ews`6NF^ebQJn|isP0!e~Sqno}fRH&c*nRJDEG>>>raT+dB zoDj_~g^JTny6L|->axyDSDf%c)LL!J168mA=iln@-UG!o+%Q@6NH>^^Sj3`*R*~*lU)uFa;1>{glrb_ zA6dY73ymz`6p{aH7IKo1O+t=$B#dR(g2Y~=e|L%W?=BCX=G`Td!@B~kY&o<0+t_8+ z`T^bSB=W8)-R#U?uWxqc3Ei8YL_)W|`Q=+aw^(z0RM#u|=50J-dvhhP+kBU9{^0t| z4_R}MtZhC;w9Uv{-Q3gSwU5~<|LHk~+0waB+c>)U9)D7?)`=!2)6M^C8PU#KrqRtm zTFE=i_AK4}kk_&%(#`+l{sgm9~;)v%uFx9Ap;TdWVs+LqBgxGnxK&LaAjptji= z%;vCvwg`|8MB5UMZS7ag&RUIM^0f3bbW4HoRwUBP!*1}ih!GCoq_hqsjSmS$o^K6$vQoSMz&I9o)1`$%x0H+qx|maTTb)L z@&Cu(x5q_YrT?GtGc(LM`T@K`mf@C#TicIOQBd12I1YlEp|%COEsi+K7&o~XR2t@u zn{x36UcpXd`DBV*0gXZs}s7jI$<8)+MQj=Sn`j*q3-QWBA`WBd<03AY?bJ%yX3UT5?-q8l&DapK zAN$gWV~aV~heM1U+sff=3HVtsg2m(@;NDJ` z3&^tR3x`c@J#6KaMH<-6nHI6J*B5LL*Wzzb7CnlcO4FiYWDV6^!)e$euVo10{)?>S zE(MX%R?8lOXo2b0qKW)&=l38=(HtR^Mb907c8b#{9q@&-ua%&_$f5X81(&0f=}n>1 zFNz^bFX_#&rCjtf$6n#sBrd%_mGvvwATCmcEK-DN(UBuBPILMy z`Iy%!r+*YtE=^N7mjL~}O#i`fr*#6>r`#K(#9SDiQqBj->?5TBkR=*9x}v+Y`A z9{bA;%B$|o8BgZSd`{y*T)tS${3#{t;){E^6zVH@E=-IZ6Z0k9!` zb^ciKyz=To)0%6_t1HGz)09_Nk30XE^6FNi@`4?C?`8{4w&lCbmZF!NiQZ2vl=tU( z+{TM3*UP0$bskaqWjS%_0h-~;s|tDw)2qh_=CUv2^kYg?oHm$oo?$L9=PtW2zhF*(O?aE~8Ltc1jz9S?bg76ce6ZnLaexAyta?;OJxOCDG%YPqzpsolN5R_hoG(%aA*)h|G=U1IW&bsE#yC2 zjJWv}cQ%jTYw*61-`SotB@OR|oc0REUl#%08kCeb5z5BzZ5-MzI7KKi_iXS-kK09n z2g6lzeilG;g17VUdE$#vL8@MMB zI*MT1jfAw_j|#Ckfc$4#5I2?L&NKk>Pj#sp5uoCjAjCv-j1OX7;xy^b%HmB3-_1Ff zApBhpuSS@IOjjn`12N~|TJnVXO6{%>gn19OhtuU$NvBH~KhfIB_>&07YnrP5N5zQM z(ra!8<+b}M;(87NUgr+vWA2tdZeUPPDt<&bcEE8?c|#QOdJ6=I2a?wjypZ z6TFyU+p>k}wUy*MU5uE`6myzu@O%(8&+~pqMW@rp&7|=5T5-MnitzR$;;-!`;;$8v z|Gbg>=RH33P84PyHrlz%ZchP59(=AbtFQS{UA|U)f?$VrTx#E&VdLFhH6WREZ{?MA$86NmGnh4~)7&5U%oEOnLyGGy5q^qgX$}m$e`A&(n1PjjS7y#<~$XtQ(QXvB|6( zVITLvd&<(}%CZFKxAOWcOP9ggv3`T$X(O9J*29={qqKB2BJyEDXIfgg&hT_9-gY~g zmcGB*Fi8dLOkAdIRF>Al=fxJKixCb+!f-Wtg<+BoR;b&hr5}-b-y|3nJ_U>0iLj1+ z!!W6l^RUC-g@`&T(ifc;G8pb6NfjG7lcAf!#u#-$a1f$ykk6c5*alFB(*bF74K8A51 zCZM#s+GGgufv>Rgbd14lr#f6Vh>Z3EK+3Y;kzrhbYh}Y%hH-ny^3uE)>Ak4%0T#S@ zD62rm73^sk0z$~7Frd7;YQDjoS9N@V!Bng)7-|?dh0GUCC!}TG%4e`t^shR)-!RUn zVpED?Y-e@tB*WN7%EbRl?amZ~9|-o}TV3vB@bkPO-@oVggUZ%E7D#cL7Lcp|6b;7_DwrxDzU zpqX$WdVCq1&Z*dFE{UIzEW>mzg?iX!AM;vsv03_qX}ONpnxSW;UrH6IzIPPWqBW7WnSS`W(lo3m*2-B$2lasqUoS+6pV$cb)!Nn8ei6pno`m9m~K>H zs?KY1md+~`zxntrfGeqlDU(`N4O1yKu2#0{)UFRH%O8GNS?)DhSw7|wWqIIlOv|6C zu6jVHo^OA*mrk8rT~nr0FRm_sNT*J#DITCxXIGw+bn4=&QCx4djNL|Z%u%0L9Q^%({ygrX;r&)THR)y*46&*MV;2|@JgZ64yt)$ zs7~uyUFE0K8mp_f=(N6Dbz1-G^55#TL4`W)ld9&aXc7h)$af zY>N>CBRy?CuofPlny=H&MCxLsE-lb$_f{PX(`m~AR|B}?J9OGpfQ7M@wiy(3ZqYe` zCr;pr6L{hTt3;*#HNQI3aH*aK$MN zu-Smg2TUQtiV;_e@Vy9!^~*51;|l2+8w}F&Iiz0Py@gf0Ye*Nldk5`kd_6!~9!Z-T zU+Y-A+>@1+r$BRDxI)H}VOk!yMl*eev>ao9@9xs_rS^9Z37j1C$|Iur%6iev~y2eEdu4HSr8MYY@C&`d9`ofNK_UQiMBR{{C^pdF%5J#^3HjwaYI z$Q^suh~9^#<(t-tKESbUqv#VaEiXzFm-f0U%m17qnkP!jE0%~}OQhwsholcQ)6>#L z@5iL&A7qK%FH6hM<%vF)e9_0YQuLu)-=XL=QS`8_5OVX$v0us^fn!x`l;xj7rQ5Xp zAFTGCM)VronK?t4C#Q&5*Ah}F8~s~p`AxnZehA+V@6WfxLzAaW^fcgJaJEWdYXPV2 z(h4U+B4@iQD|(V7)#)$N3U{&xSn(h~A7f*L4mL&z#el9EWjnI)hAOO4)*rFzn>4mMqiXcYzlCO{P_2p8H32MWxAjmTrq>S)$^&yt!Z2{$y`_+HGozd)qZP8P4Vx<3fo86xq1uPw5YN6^@rt-8PbYe(mK3c&Xy~oFvbymyts7Jq!oEYr0Ya! z#U`e+fy&<60CsF+cChKiia+DVl=o^UKB$@Q0}V;J!v~@~jW4&>6*r;zFM5YdsZL}F z(4(I12wDiMzKKxkuW-=|*qY#dDYYl8(oCtnF%8#*){wbM?@+QV=q_sBIII~Hi%O9@ zJYmBn+FG=x)CbuXW)Im0q&~(JbsQbxtC@bBIMyo(b}=n%7t=tTz3NV!y{a1vyO`9G zS|v4r?P6v~sngglW}=h|GpN+(`zWcA(7@D8&*`hACb4xJTgf;bXZsj7&NJk*g~gr^ zM-0_WPhv}<0aEHxBu{5+Id`DX2clgJ#}9~(sYvER;qwxDTCBd+%0#${+FX7ntj zE~J-WoHl#EzM?JU@Y+9Xri14n)J?KzrkAiSN+(-qbWoB@m!#ANwiYQuxjvSa)Niy( zn#|3_mkpd26LXwvsQL8H=W?Y{DY_40a}-ZD_sD02?o3-Sn|pAH^n0oHoddahaNba9U8T||9BXe$gdM4uMsM=|3_sr2^{y+SD6^+HP5hKV}5 zhL@$Z`>1QU?&Yeac~HT%pGawsA5CZ$U)Kv$8aHz787XZHp^@{bdq^A0&zZPSR!Lh# zZ82>nbqub-9G*`H`Cpw+g}iE^s&aKe?yP;#Nx1k?*K#Ems7q@%rij)?#eEF zs+mIeqYqF0T%LCLxoYQfTWx{PxjZPS=&X{iX?XZ;)r?bhOZCp>`nuO1a4z?3 zl)g~?QO7nhHnxeG$u=WXU^t+eh{KRS)=cz-HH=8J!5T)Sm607vS~A(8q}kbuWVHmd z6d0kT)mK!#B+^RBU?go1o6h7@!qiqaDltgu{q1i)D$+a|gNiW(auQ!o;#d9tC6S&( zrV#0QoTQ2DP|^nwO!|C+NiQG^jI>S|ABgnS71{&Cr1VGEibYB>c;8u}L|0UY}AU5Dow>jGV1u!m&m(nM|LS>1RKAEjg5IGeV zCb+;OvoJ@yzYN%p!{~)APBL(6Q)clB?fx@T`m;b(0|4CqkpX3tOcd=;OT@~dQu?ez z`fTlfsFP#{Bfh9&mri6hBd5bs`W%oeN$IgU+5@iZwFle?@_>#FRi;Vl32YPM&K4eQ z5|nkSx++OzH4#&?TG&2>jdrrw;3um9WqZl~{zdJ9JupDh9*FI!q%ZHMr024ANKh{& zeG8klOwlOmZy>eyU^cOFr3Gx%9+*j0u*yhPuqu#kQl@bJp=1PeAQ?ny4}dFwL*jka z<;%1O3aBDiBFGN&D6y)Tn!+mBout=M6IcnxeN0VYWijkU#Hu}1dn@y~_CBFZRu*!G zaZ>tMHQ9{<%BAUTRqt-n9%!KCt3H*|Z*am6N(ePnR68yYL4{|jK(+Ru3$y94l+lBW zH$ckhMMhCsgP1Qv$dc!vJC}Ee#+2d4Rx$Zhii5|A{DTctu7mkUk}qlxa#6A@RD!G| z4&Ph*`V-oNWj&RQhx;lSUS!^rmBZMInR6v#NDkC37Qf2G`epRL`pjEJF zQf2z(s4|1uj%6|1vG}k*nf+eupU-|z_S@L+k)ztGcQR!Jk_F0ES5r^fl<^$fra+@I zOtp0oS)go<<&Y$Pra%^?+L}ri6mA?umlbZDC1uPe^}~z^RzLJ)W04#-7D+uC*~7VX zvHiU_oNLJRM)!0cKl%Qz$!|*;37|zcnQlPSP2MeKyu`1q{JLGrNa0tyD@`|fqm;3n zUmNqpm$;Dp;z=nZGhKWMJCBPGNg257=i+>_{k~|)*USjQ{pm=*X`}ARZc@fJ8iKwg z%YWUINthAno{XSb%a<)Q5PjK5#)Y#b73B3KK9<$|%F|sF{9+OwXscR21E(8`D5^!lc=w zR5zxK4E8T}!hT;jCKw2HW5T73Dwt{N#)QxWF{6$Oez_A?7xT#=qQ3{lJ?RfycioeL z%KauAuxxmT>9scxkJW9Hg3ReVyi`e zA2Pk@Ur0O08Q0kK!JlIz*l%Hfs6x;VVl7{_$s9iiH@2>~+p7rwn61XDLu-{)$0S){8d8A>#Kbth{_b+BVv7}GG zKZ!K#_eYS1{eE1_f)HHefLww>g-lfJ_hl-e=f4kX#Gr@u{P(k-|9;l<-}j!Bc^?x3 z;R;BAlK(y^`R{|0|2|go-^WV+`&h|;A2gU^8`EwaZx^tiSHd`s-ffm1#=miYGy5$^J;8B%71Jz{{oDwv<6rOap!_oGyq$6@^Q)UXQ zkGqmScegs27lT%x2 z6Y}I#`{bwtO6G;yalUe@3*#HaTIFsP;aBCAMYw8t$Yyz^2U9$VDR!Aq__31NTG?%z zoa)Ew_4%w`pUt|~HLPpx(BCd+zn^k`q&Qdu9Y1lf?4~>D7C8h9F=pLeNU~bXBmB;ncvWJes)trt<<(1BHI@HPdFKTlOARf~Z&l`GoBfV^z+ z;3;LrHSwShUyEp&k|A1PNOnxuT|d63#+0S0-g`#Z-Nq`;*xks2MR?Z3ReR6sx(6Mb zdr;TC2=7mjVrO?~NL1(ygSwltMv->3A;=`EO<4h~GJR9Zn#zGgrK}LX6jgW9zn3nl zk)M0`nL--KxDyZ>$VsVn%g*VN@|6*P(sYV4rxgbEU2#4$_uZIYkWb;n$4FNmehxQA}<_N|2r3Xq3-bNOY$qpd>>&TbbEuL zox2ArvO4I;dO{C35K6e1c5N3>z4d^4E&Ro0(6}vt-vfHK^p3b9H>82i?Ca2#-R1*r zTD%u_KsOe;#XF0j6>En&Y%_FW8=welr#rZ_=2wUh#eFI3RC9{p3Xb%AQM6hk+XqFmNO*j4D=u@`~sp=Q>+6Qv&lGm)G6 zzG9Vr$e`u=UL~aQHpGpdy*`z)(i9amJsWEL-q80dvi~(*@7kdF{8jIHMS(_}OUj86 zVR}zUWUPA6Q-@bRu6J=eq#dunKh$8#%0esIxL)sKTcLNEsYpS3mnnqBB``zp;y+fk z*_8D<>6LnxF4ub&k+!O5D!+%~eacq7XW$CGr$2-TdQYF)UEk;*Ee#s+M-HcKyuOWEB? ztxyg*ymF9Uo`USgO4%*|fD-Cvy)5dM6zh9-X6t)GOSfkWWt-iHZXhyDu|G3JZwNdw zey9GCP`Xt|S4hgAIvnwy{m2Tv({H8hC)VhlaQl&y8z&oH0}WRvU6%flZc?^iJ=7Ej zch;Py^=1SSYYxG(W`7CJ)YeQ-`&CzQY7wWZf zQuZI~pU>56gYA^qFEH{WC41IVz1C~27;nmsuJDP_4=$~FBTTPV;hv%n`x`U$T9>W* z0VkyFm+EJX)eo=}%QeTT#b|13cTCW0%Its5)jQ3so$`Txu&8_fBmKk8bO+Xe{EEgf zy)-jRFZmvZ@}uNMcRY1#WktwFVaiT{+tEkKZXj*GDhny!WnZB2sA`y$eS-$IszJwQ zJt3DhlGb5mIjaZmC1t?M?Gi2mB+bFaM@MazE7`QEay})g3}G-y%2D@Ia=KTzU6m`! z38-Q(p{~dwpo%ytryr$PhG17fR(eZ0_tj7DhEt(gsZT|z4t0`h-=St<%6aI-BSYmw z;Zn}f!)|BgL%)%7Mi52Spkfqh1Xc}@a()j@K~v5+Y_iH#`HI*sS7o1=6eU;OkaB{7 z;@P_1S#nkCIoBe&awgvApY;1qt{6mFRCJSaz>1vZf2UlEKcp-|J|wg!rJP*Ks4BR! zU#(o(NysZ9DcderHnU11=w8Qn>agZw&IZ=#{f?%Pw%<|KvOJ0* z1G<2GXUiz_$^PcL^iKI`Xx(dR^3mX`$Dfdo zG;wS_`%kgo&bjzyH8D_DazpxiI-=zMg>w3boyu^jlylkx z?IqtM3p|}J2G_o}-RYu#9oYQ`n!(e&)+3r9{Q&4HW+6DJtDpe3Qua8Vb2yPmtGH zS?PKigW=9 zQz-OoAwitYW`8;RalKz}W%a9MLccm~qWp@odMzz;M+Y-5ZW-4)Cddnw)rCjYUkI}c zg|~5dn@YjBIs8cnqkIwP+Alr^q&kk)8M zb9O!x!uzjl9xy0ten-`It%(?TjkSQzyK@z|P{?^b`vqmxK7>-X`!H5LVf5~xkTXpb zd#0Z7oYe!PFQq)=i1X(-FNB&Y^mHddozW2k*08?Mu%^=kCW~RsRfnet<3wrAjM~|+ zIJK2gvbJJE-?oR73;~KTM(l-$zc|DB)ujlnu*6gP9<;+v4DU3`$Hab}huL)9jDr?@Q zjE8TR)+kiN!}AHp=$VxG9JljxE?l%43ODONGCnIt6(h%wK1Q-psa&dLE>$W~cDe8Gp}e;@Lk7=E1SAAKc2|6j^lXDY?0rg2bQU)z_VxwP$h)CI2{K=s@nM5MF_)AQ^Q z3OVb+x?raa%36nt-W4MI8h2-BgBZe(p}EbRb)o#va3RicgFj=ToX!MS4LB~$IA`Po zF(OHr)&|%QjuoTJYq!RU(RTWn%yfX8Y3)>kzOE-M*WKv(aWRE_%vxh-!K}4^;@leQ z!Ps#@DCa!>(3+@LylY4)f9(R`PhmVm7>^IrRL*pia9J~fr-)1HN~LM{K#F(ZDr$CL z5+fvO?P;#|4n`8hNJ3cAi`B9c%lLAL7QPaqqp|yV=W`MD>c2amn_1tx#rd2U5Pb~y zB}wtte#25cFI*kq{8kNL8?cAecqaR18zvQRFii4O`YbU#WzRB9hI-IbC8P~ADQJUX zVn?oF^5Vn!|7DnzPueGwyy%4br*2B?bfk1oU&DC!x{SSsajhG$w!pFD4ORVn8%zxqmSlr}XvN3X2EX}LM@$C4kcta0 z8~j}>E^IXT*`Z1T+Im5iq!X$nH>7nBKqDj>8X+%9>jpt1Bnhv3@%o;$ZZKZ=KrzGz zL8lOOT3R;*K@A8Bfy+ZMG+Rs;rFFkUa5I7;;ELevh=Qhy>8iBOh^STs#ln@q)x$ON zlwmPX87wqSSm#YshIRftSy1t0VGvIx{2H-u{YTyoUiVa^_6yZSA6Z!!dcMaY)kLq# zGY52PoP@hrr}nDdoT^jfWb=1)>cGmgFY44d@qDgM9a4E#ty5d7PDbg}5mj|>>(uU5 zCk#3@j*RossXcjn$cLYSB$K?%#~SvkuiaLnnLZOI#%MxmXI(VsSAMWrG-vY`PasWT z)>(0wjocAYThvGH@LD07w@d4mlYT(6e|2SzX8L{7x=j1rL7LInrf7C0Bkf++17o^ zyKEu6vzAZ0VORMA?W@TgVk7B>YXldC&QsKccF>0LwNT#I3gmsaB*e-cH4wzg9obxz ze^5cE2U9_N-H_JZ@StLXi;w0^LxtbdHg&GjQm=zBScw?cD(&_oD(xA5ktFK>hv^Y(8(pTp;%o)5w8 z)qH%)LvlwR@3`4XRvcQ6(N`6&!)B>yYvS!$-qE{WOdVER5^rcOk=Fl#H#B`*mGv`d zL(}OKY5iQ@&}={fUf^B(Gt&AOc|)_{i0VFZxu#~z7s8Ol+v4-Ft*d#`PF1jeAp`f8 z)^B8B7{{zHWR$;=)|`QZ#Me#9)A&^Fc?H94ED~U69#+maf#~>V{ce9hHq_I|97g^>o}!qx@mp8 zz@08N8_$TZ%PF_73rP9k>k{f`ue;Lb>2(z!tb2v(=UNjn{8~N5UMr{AYo#2^$J1Wp z<9o01cIP$T7zIso z3-94h;XPb`-oxcH7~4GvzFkF-u0y3fBfVZdDdml%kgH{60` z5e_sItCH~49>7_1wc3NcJ-O-%pG((KUG}o}ppX4M?8kXqRN=RZy{vWx0zdOe~!!Bd5~YefK&zhCa*-6&3zwJp)gZHr&VM2e1bLe_KkxodAA>xwDvoOL{3A9%n*#OB;SqgnsMJ zgl?BMjAPaqqzw~@hzA^mI*O6Q#m$g51T$-vNE@DI7*y_tKQcPRMH9MP`EJUF1m;m1 zCj!1D3_Fwe6+(Gmfp?C(KPGKR>!)m3$w5i*ZDL<4?G!_9ialjRDxHpE7;?Dx4V@u` zL+HFmogvoVQ_>kCcw@nbw+nF4?}ptKwm0O3wbF+DAN>A;_y-QjTPf~?Nc(qs_k6Oo zch6=?c605fJiSXY%cEoMZ?x*~pF(%GyXXq^F2ke^%2vILXWa;^{?TG(M6~|Vf|}if z^|H?zz3geX*6W>pm*|}#+x(l}**#6~>_Ueb$Ty`8HIw63DI1Q}?&_zP8||E%6etECNR3iR0e(#t6%;@!}Mz(D1Jhx9$0l>6HBJx`HbwP!sPI_o>-kuXpky zGyO-5hgMeUAMqqv>mxdt&tIuq+*hwnWnpd}YdvdrkJM{gq35h@=nQypuCh@RXs|0A2aqVR`nb7wjIz;-%(bgaNFKX!Y#(Lg zY!bPa#Y-Dwt3v);E}J23OsEgpCYNoLHYSmzv}~BPaZy#Bzg!kv+dD_DvK<+rzPISQo?9<+39o z^TjN!DWL07H$`wrD2H%f=WXm~VMU`W(RQJp=xpTm_JuN93xB~Pv9AZnFTRI5E%by!VhFH*{g17T`&KGf#58RSl0wViXQ) z`aq0osUQ5Z7}XK_=}!n9i2^d3p0Ovg`5o+eEB6aUmr*;IzBT9r+8hhGa7 zMl`^W$(C%>Cumw<5fiQ_Q=zV>K#Hbpx&eAko3$kA+YH&#=7(9N5JC*Pc8VBu%})Na zD)OIcCI1=rpKgWyU}w#nr-ivpm^OQH%pitxVY!_6k zm`2*fBO;J;CW$3B#}ZBFSpIn~)-3f=Hor=mprab7+{5=$xkrc4vT}F{+bovmiBW%% zHs_w{*G-Hn%Kc{012M|xy)5in%wn!-(q@O4i^a$7Erh+jgBaMxZMDsd@QxlrRW+K6f0pw; zThG9$^s_RkZe11aU%D^$$09V8kXHwD?UsBE~NWK^3!40(8?Y4aJPznxoU zyBoF2b2fvr`7>ru2)_s6o$;LWr3+I>xPXbbj;R}*&pjk<(Ko8AozMC4>toUu=kq<9 zoX-XFoHUK+q|NIL6Il$yA_Yh+OgE%0eIZilfEXbOZVy}o97!Kq`J*4iAH5fM2FJNG zs3(7D6WgaWa|bZxygouT(S;Yf#rA|u(fg#d#hn(IGyF+&srM-?QB~n~n(1zN5VKp& zbPrNA@0-M{DtBI0>3QAM$*ZasUR5>ls*2UIukGd7Vvfz{*i_>EH5=7~>r2v>$9Pqh zMgT6f=GDwFl6F>KPv}00J|3)?oKMR_=OkKNZQ;dHtDTUv*ANoj9$KJod777toz)2| zMQ_%m-7OI9N8&I{Tt^LXhvgqDt5QiW;D8eTfq5ZCvmO?wEW+4fOFEZ-fhPb91GTV+$0iBTaMHY)@i2KoR~C2Go4q1 zSAB`%tDelh#mv6>g!#IK%71+d@#4B4Jq?a^W&zWagOZ!JoMo9u3NzP8X|D}p=DIR- zSzWxnm0+A%1NbU-2DY@(qAQI~VHatGXswtAAx)+yFS;ze2rFTKKKx=8Eta$gYM|Pv zJy74xv{h7Gcuuvo701%4HsV;Jtph9cn^haJV8L}0)LwK?4wbh4mi1}eS)WG7em(iq zItt`f^S8>YY+K}2GimLx)r(gaLA1+CIooVYcmHKXS z#u;gA#4+n!IU}NG_x*CFpkqB(xg8n1Mts*q^W2+brLECam0J_|nav^WpcJm5Nm~^tEx?@9RJeoYqLs&KE?S29W?>23XXE7~roy(m^ylTWX8Y{- z@U& zZJWRo;bxw4cJlNS2D{KAs9`Mv94&-NZMd{8n5VOXX64(S<^BB>;F`sA$s`sf=~~{TX!q zh4abhJPU}5(`iJ->6CiS5HXHN31b6cF*Z@M(e6wIQ)Dir5bv2ZNAR9c)0fd5Y!1&m zJ8k)dt&Ka-aTbCc-ccXAO_-0<%*!0a+g*MJW!nLUn?j>>+e``_u~FJqOS7+SM{7fi zgt;b93|}p6JN1k$OCuF(^nA&*-GwG8=PWdZI2X)*fA)J3VdsVrJI;CV zPEE+M?t#wdf-%N-u%VeB#&ykfSCVi~=h4SCl}86(9vz18=-|$ygPz8e_6{EDOIZS) zNBLhpBW=H*cyraxLsA+KNy$7U*?33_nD1JzER=Dcb{d%J!*!mF<6`GY_|(qB9S-Hj?(*#`&bZwlN6|(LL$IqemK#9x3MC z4=UT|!2n#&@I0K|CTE5QL{*u#Cv1~5ath^)G{~J6L++Fexl=3;4m#@Uwx{$`wzJ^p z<_psHoh;*+E^XgU{r?Rcz1|36zlHt5G&*2!wKx> zW73XM1a@n;F3?_vEM~|&4k_i3 zLJrBMkZ&30TbA>Dn?q^u=d_(1-pOe@Ic+DW?c}tb**qNiQ1NyI5S~sDKbMeqg7_%H z-wER5iTH0)DZGPGb>y+1k#sPUTMiB+^QgrnbpX`{1_P=BLe&9OyBL*@rAQVQ6hRgF zw61%seQuSmdkG&6+e}m39sgaIW|8fx+T3k&b!*MsWVsyEkrQ(D3~9%n{-zxVCPW3B zcG!80;WON-(aZ1npl+GJTz=}vqIGh4y++ybIrrLw`Y1cvsY@>VTH2}p`=i_Ba$lB; ztd@4_2t(NtX{US=2f>w#ny6`V*>-7XA4O%C%VyHhP(FmZ`Erly`zqx_tXXwPQ1bE` zlK7UFRpH?Mip6wlTm_3(4yEz10!hTq`>Kw_%ZL2>Dmxz`EvD*ZQskjIwio%_GjJvZL|PUvQ<6QF{(|+ayLOlcwzGY?|s`ZRTF2lzWkK z?nRDsFVe^pVI3R6u}}0E){?9g1-kJetPoLjX$$t(k>^a>>+7aPo@k>dh@rDLSWzQ;b##) z`Md2v5_hHzNgia{kYq3?;atAqT)yF4zHwtM>Qk&mU3NHXid^2>63~_9402m1JtWOyhbEQg;2p-g~oL_J*`;wt`tr1@ewKN^>$S&UQtf zcMVLz*Fwec*d&4=5uJGWz&RGs5_$+;A+`*>;B60UncuJxJx z4CZGN*EImrxa!KdmK*t5&(997TL5G+0Au)uF?_=qzDeRr^WrB@-fy{Zos{sifS=p} ze#0=|Fw8eDLGs^~U27}%Jug@Lki@atojWS-Y^qh%sZ@2+l(?#uSp;1Ru;_%es}THs z9qw?|i{R@`Y1iL1%C2+Wm0ka!^yMyGs#qqym7lSsWLiC?N^X>^{kSsnSg5N=yKd5~ z^K}(>bcdzaT@L5JDVOc#`FaUaP+=jQ6(J;sEpI(G>!e)XQsuEwE)S}tDYai^$eVI` zXyxp8RwAFXYZ}p7eS@&JJ_q*Y9KElDV&p72p4rh{5o?h_dI@_{|ot z?|x(G_mTe#kN5W9Fh2jT->3Q4>nE6Q!$o$%E&3O51zm7&{R_D2F1XV_1Gi_#!%Sb! zAqSnm^>X~aKV}ez-=^#87mo06`S<7Pf1ud`H-dkE9xk~HZsos#d!q}k{Ab|aXHI&h z@Xs9&N4WStTz5y@uijrd@GA#?<-o5T_>}{{a^P1E{K|n}Iq)k7e&xWg9Qc(3zjEML z4*bf2UpeqA2Y%(iuN?T51NY*92Lji?EtQ1WhhG)k?LWaKm*%T?-ej7iwL1q22&D${^GgaL@J- z>PEPkJplu!lZAQ#+y%JbItz6I+&ge>a9%FJ0e1rKK3Ach0QVBypWzzdM)VTuUcH4n z04@&h5Zn#8p?!op7p@F0sjpC%z}58?x4wd_=qGNu^%u7y;a-8ugxdgD)L*Dh^cU){ z;N0AVYAoC{aFKA?aDRiVgZl)o6;8Mdl??X?oDbY&xIe;0!o38S3b)B!NPot!9j*cH z0$dwhxBG;u7n}#&6L6#8CcuTjMZhJ&rNZUG{ROTH?ljy*xEpXT1B7ZI+~aVQ;r;{{ z4Yvqx6Epn&EE1^&2Rp2?IeF+&s7>xYU6{y$;UhL7`5E zYlHifhftq|n>t9SkHSs*jkuKnH|imAYX=b#+#ZYu0ZnG3kE*qGVo;000t6gJs> z57>#*5{89^8I4}nxVV_Ou<)4K){#-x_;|})t9b$Ed>dluEK9sCJS;vT&K5m4D17oX zqzaF)#DyiqS!@aMLE&aI!hD&03ra)*=HYW}(X-9r4!XK9jX?Rp9^VK{eApaII2bf_ z@-(lop|;uPJ2SiumKbmSXJ9_InD{V|W*srtngGmQRCg6?XjoXZH7Ox1HqI7hH9N?- z9reE{kGC}rS)p2((tiij2up0N6}$OVnOe7tRL^na#|w&-|kTmrQ* zqcJS(#W+iBm@UB?7X${H&Hn*fyYNT-M=B)F8gF|E?d0F5Ya~!v;}-lU8n16y7#thGQ6b;m7oP-vu%v zG5TKvfm@Ctu&bZH8_C`6!aF`WKEWCl7T(2`AE(Cn6m5;e2zu{me4OzZO|7%TUbI?X zz(AZBonRau78YlXv|`BN&>thbSBkq>;~k!m9Bai${QcnSD2n&b@W~N=FXlL6?ke$! z@QApW=osVB+XW|{8jW+}VxqzlFyMwIm?Q6KYWD>9O^CS-J9Q$>1B_mrG&aT-oe(t5 zh&hy3n9uCQI7@;pCORky{l7Q+gC<29&2V1)hq(s*-I*JR!8F6s8F<~<3v|sa?kdbk zn)`$=NC*lKrvWBvR(t~L5k~eu-r=aVIP<(a;(WWl(-=&Z zco4g5@?^fN?uapt$<~kNEwPdJP7aiJ&AoW2{PC>qnXqTP!k#gPnK2|!8xe-?(U%!B zEy3s=Z%x2l>BX=JOZ4nWYg}-^f(6XZX$d2vV-ir&FH&KqCHP{{jj~09H(c^(o^{M- zhlWQEi-}DLn*4KK9)N|7y|_*#6-rtXpM7t z6ESau*D@<64g^@j!!cXOZ=@v-BwOObhsU7;W5^#LPG6GX@fnl!aPq!njf)ZJf@qO} z--y{Z;0lk7iAN;=I09m#=b-J0u-VqgF!Zfd<#>s-%$|)vN73hWrJ55SodCk;%!$NY z0KYMq*5U_)(QN(!;yLkF>kG&hI4z)gPJ$KLAChp-c0vNy6Xf!JP#$x?J zGbqQ)i(D%d{9>Zb0m#3$cuPV;vIyfZ-BH#^$_gQloT4n@5n%~ncUZhNat@lhV|9gJ zYZUq}$8S`u<2M$V5FQn?0NKKW590Q3SS&KK#zh-PAmjyRQB;(L(j`XoucOfPfnQ4;=+(Bua72H8``hLH!D!e4dI%*BMSIp?GLMhOq9{6H4uZK> z=s5>13H8pbpex5iS=Sf0iu0Cj>5 zbr_w9rsSxsL`MRQ#kOd$qpLZDO@C&AS6KAW;pqAui_>2=Me%YBkN>;GcMbLbqa6Qd zWZsq2FP!s?NQ#e5OyH%|okObe7bf>koP#dp$Mc6@m=rlvt(k+v|2G-?#z(}w7-fl0 zb}Y=fXN*mZvxY6O#U&)-)U@x#A-4yRd-l~vU(jGqLNE3-x2VZz^6?}$2>X!~B`6BR zBaMX9SV}R~I;U&s`0*HZ?<~CV(R2xMx)?Q)qE zkvJ%NPE6NG|4Wh%2SXgWK4+Vq5b?_?9y$^!9h4@;LiS{x9S~`m`{RQDl5#ud2~UTk z&p{UyZZ*&QG2?Hq+cC<#vhEZVT{-|5#=Nkw4Ze3$R3vh6El+q3*aR@!cx3)1 zX>E?g5RE%_5HJO%NuT+jAs9Mrwqv`*u_}VN<94ba=X)<1hY=eV7{X|6f(1hyiPf+u zg`6YIh9MNWCPXd>n-vq2fEg?1=-7x16FoGd#lC5fk5!q+u;?FBZFzyJtfhaXr0YrUDFt2oaM!^ z7?2E!nOPiD`I=&)@s=2#z)+JQ-;2i}|Dj4P>Q`W+QMxyq-b}Iys**#R>k6k zrQOtG-*?y_74IwscsIs{ou$NBaYls=n20bm0L)o_j6nPq$tupEAlULs?`9P1)kDRH z`e9;a&*7rZ)m!Z7GeR8fH&Ptz?;~Ct;4796REyMIfbSL?X-jZSzV5<#@3|Tl`erj8 z;;q!z7*^~m2$jZpkenr1=$ z4v)w4E;>XGDyjKzMC&whsoNb~7rV7;aXG!O5Lz!1x+icAgXg=|M)9U*Xq*)&c5VHO@bJ_EQisVIm#nkaIbLC(;g? z*onLwKJ7zP-G&Fs1>$uL#6;0^yK-g0h@;_|XU!64HEOXV4ZG2zErXto7qLSvvQtom zZs9Sp$q*cbLHdfkX$vMwe5K(&?YBizQAY#3mZlPI}>UiOWurqCpoWKE{9+0mqo zVwYjsQ{!xwxzXrRY~ko3xD>wAViM!RtxwI3qsHnz?WsxFFa7bSZ?jE?%+-g|a>K+d z9{+&x@sEqMBx6N&Tf#Ry)?$kbn#Yo~$N?-!S$r-S2%brd zjkNk;_aT@hdUxd~KI?(H&hL%_8F@uxjSjFyTFs6@#rqll=9AIZ3k*XMk5W2{NNB|N z9yACO{L^vMo*5T3*-^}{R7iBPhj$l%e>{36>ROo*T@d0FGQY42nRhdK-^oB)IL;EA zd)%>(1XpH>BY-c-fclBBXVJ5M2R-){LJ>ujop%-*(RfGSb*aaQ?_2kZo@%kClIo3n zC|c}|g;-x)?cwMxpR&Z#;#I7dBgJlK$2Wy>!a5f%n7hs&j}NDu)#B_s)QK6VTB#p@ zhV%>Y>3r1Fds0ly3yHB~6T@EsZ}4gTxPnEFj9%^yB*oZqwx{n@79&lxCY!}x84YFL zJnmErL!$Pa0+% z8suQqw1i~vK%8_=zE^OX3qIK0ly=8ws0Ya_h&PlcbC*mV>u`s60ksfpLktgyiJT2p z2lU6hG#6Pe-lUV^U@U)l=6OE{TjuH;fbNViJ{{-CC^!ZqJx{Yn{D7GNFw9L2{@Gls zT-74437Su0#~0MA{E(LTZWL=>Xb77#`}W*Hyxt3KdNp;eUR})y?aI-2UvV|Y-W8TC z3ITt1E%kIp94d~D3&?`O>bhP(G~N@vOro;)IPq<7^mYJ6kDO1>r7J0$A2$*-)}Id0 z02zioqM>u*eV>MmlmreKBN}?yX3e4syt8c^jZZtiRL8h@5Ls4aQ#MAk__)_l^S@4V z6-nM%MfBAZ8+hF4=uP8BZXX&q^7?>&#NZEY?Ao=f2OU9Eq(4*+7UH< zyL4Wxw+CD7the;FC0L?uiBaRMkbccYyTk|>93C7MY!mOQOc-leCb z0KVcgf}dv=`F($)$MY%T%f6tNUBrD;$JJ2yGpnh&n9#tUiQzs69l=j)Mzr_siY7?v zE4&@RVsAgNhH+SVPIa@dP256Qb2{PW(Z?ZwqG{=oemt}MA>cY7?O#zJf=x1UvY&$) zn6o>$Vf5wgov_)q_;9G^&~_4fDzFHGAn|s8wb*op%24t(6cV+V31jA!@P}std=g?H z?fQwRX=-rz)ZmE`VsHOvu+8xTeY6l}W5#R?H;$ZYoeQ5e?)xB+yV^Y*gF%AT(UJW; zcR-NHcc1N$#M5rz?b#HmT5WZ=#ZR)%wL}tOU6_JF21W|8)1AJ`u8%FAN1h)(r`_)O zq}Na<*?N>d?EmIkyZkv+97WM){;&ubLJZ+H18V}X1_wuA^PWB!ZwE_gbnW_< zO5KMI{Q-L3zkuFyA2)!c4AstnnBNnVS(}4X`vYyUSACfoshs>R?b| zs3|m_+g+*gCmm?P{Z`t_z+PQ~CDO5U#mEP2WvA(*Ky#TTKDqzSDaI_Z_yN>2qSfMq zGxQB(+n5xY!fa8oVY6&=(dZY56%UMvk3iNhgkkRv`*5^(ygj#27&@A&=2iuiN_tEaaWOV7xw_l^uee6 zVR9S!NsFlPxW|y%hE#d@kcHkT-tRV4G~ov8y3xaN(}K5Hy+$?SnK&D!#6nrC8Yx=W zseFP!7fB_=N9$F-;_7;=@rzo~yfr57rD*!`bYcSbe|T!VK)gNZIqaO$jPK4-OC0`9 zIAjKiQ7GS?-vmt`?_fQB6q_GH75m5kAsRb^|K#gQ7TX`9Pf@(}5I&eZYQ099P*1kL zh>W`?;!v`pQNuA)7nQ#mO7nO?{2`i>{v6zTV0>pSHO~ttxhHI(r#+iK>hF8%AL^+1 zq33J~5xnyKA@B@vT!!*C*6U*LAfsq_c&PYd@G#2u&c)FW36w{NgF{_oi8%HscC=1B zfeQXBl?>kymctK^6MG)U!1peKc;@hzhiT?e_b|ntKujxQK=j`buMUp0V*eTBLPkXU z>pUG6J0He`9N&*|1a&)4$IZ6JJ!Oe|fm-Mfizw(UierO`)Tz8(Kh0{L{R5#N4rZB@ zEgl=pkqIa{+QDp=2b>v2uCKeZTH| zzSDp8?XOSuqxw75hs~Sn?&|V^35gF%H0iE>P;3MRd04y>tGiF3qN4GEii(PgN)#0p z4Js-|jHpC0q7p@eiV+nRBfg2E-rsM`wfA0o?}eI5oxb;eU)2ikwdR;(jydL-V~*GK zQD7IKls@75ZMev1H?$6z3@~zTQhH=Q$>K*>uLc@))_zSVlltt>!b1D+LY@0_utA}W zDwc<{eopqU+uP|s#~J#&gSqiTP+X`~auO*YP8?yvUnd9;8#rjl`J3stxNLxT% z{tL_$d;FeDPoMY}VGU3Ze;BS>|3{=+`lBf9I;J&1%%3>anq-Sk&CinPgt(yv<1IEP)4QXPK1n0p&;}aPBj~d!7mn3Oa9Uh`lNuh#@dUQ7`z%A z>yAZuky{QS(Hi-rj5w$>G9QUy1*y5N5O$Ryfqk*6`7hs4P5OjCFe4)&-#s`RaUfY^ zahN&&KQw^BP!kyv>59xT6;IDE5Fm4YggLr`^cGbkb2M=5SaUbijmZWGeKP;yKN7X@ zM?$-|QZgnEHzV|6;^+X#2b+*3qbk4Tb2b4RZ|Yb>4M--$H}~B?8l63H7>t>g0mQ2P ze+jWI&5|-K7$3%nYdB&c{uL4LnT(7fC?waB{qXf)O30UL42@J3(Kx7G_&(l+yag3~ zOco-aKnkx=laaZ?{fVT<>k9Ev66(}nV(m>x$-*A}=7=#Sfrn|+uXP{O++v;lmrbjf{+_YDui^WFW3Mjsy97;q5{g4=BT?u$OccJWmJ6!DrpUU_`zQe=S7X zBEAbE7b6A?=-fdRs&?b4T8M~qV9q$`e=wpKsLlxQGPYlPYI!8I<90L~;6luqx)~8F z;V2X^SXKetj0103R2aT{NyG2F9@D~fmKF9ZMjQEFO+BVBP~}}M`}l+ zpP|-&ZR&+(@!Z6!xzRrPRvI0X3+*-sMu5OUsAZ5+eO>Ie3~~}O?F^}=1%cYAk1aWI zsxtJjF6Mb%j7cSXlb?BVL{;`x~UKeZ?g-lz0tq5&G#Y@t!769u-`t7cE_s1 zAHzu0_+yxJi~kCCu45eEYYZYHag6y38+B$=`s#OOV&9I@j>ovE!TdYc=a{OdTokMI)e; ztVBzg?C1P-Xb4aooCNgJ5XZ}I7!4QLrt~M;#3qOabhPXz;QyJ#r)LJA-JAa#2*Z)T zm7SY6IDU)(EKn6ciBO;aBvP&a+bFg0r?9!eTB4kT#UYuH2jjT-GpVu!SazF!BC3kz zWE8w+)70|6#ndk6`DOM}05+fvX`|0uP^l1tPHir}AeQ#D)qm^l74zO-x08R0A%M6H ziPmRq@WKorUMXgt+HFY2Rqc~G2&8HH^+hA*oS!mV5$(t2B*A24Lsihxchmr0P)|Dm zzP1<90^ftz?I)qCc3_yxRJccGr6i2c&Wlf@#ev2w&$w`s90~Usl0jFDiBf0(G5Wov zJcMFrk5kRR0JM2Pa6kw!q*W^?2Z6k1Ln66lApKxw52KTE@DJ}$9}bLGlLvv@zlnqV zTFcQsSi{i<%*MzpIfH4W0vN}wo(_bz9)$Z)Q%~olATi+VAh1mt=dQW|gZ~{@a#9uo zewpc-;wQS*g+Z|N3_)J&gcJm==VoUFej%(8q@VMEJUx~?DV(M$fbq|cjZkH=k?Q`N zjAI;*hz(zUEe*&B$bL{pr3oCL8%8u;Tk`K63gNYZ=+0h5bY)mpj7FvPORIoR($#^X z@j_gSv8dn1Zng7GFz{@43NsR{km|_Leh3c-k}EL+DG`|%`fW5TU(aURzOU-vget(W zc!a2Wrq_ajTKN`)19I_kAiKh6*D*9UT%lvLvtYx5U75bK0=4n281>{W_=G^b(8MTk zS0=qBnUgRC8=uNbFjf(=`(abgz`{2bvIlQTvVaM)4Bxb%Hz*26g~Z?sn7%U7U`9eO z;K1QJY252~2M114m2q&_j2sI8cz;})q3#28^EQBkNAJp@P?uW#cBGp3PPF=PM2spP ziBW#czJivIV5XN)$=l>1-&(Qbq<|^zpO2=#gRSEN2idz19S_hsD}G!_Sgw$P$&}n8 z-PO!@#-`*UB_>pzeJ3YH%^dDw?}Nj!nYMBOQNHyfCSkac;9$y?pee3m7+^w&AV$G- z{$NR-d?(tbjoLU2QujDP(h}Bxdyw2>NOh}9981AyGQYO{JpW$B2#?V4e-_~K5mI3LN04T})b0=Zhhw6NNDksF`;o|2T;`9#rmOqp-pWj<)U*%!@715y5_#mK z#gZ}DulJI4)n_^JUOa+UkQIfK37*;4yyoA9^~+78Az3~n{Kj2`FT%i|hTBF547`76 z-vC<2VM1$p%4bDsjTI*>?lE5zOb?#Oho#BYT0{!zegn^-VFa!7XYsB zJU|#`HI4r1Rq-BT3`7K|`oGJ06@khzbNsma4xOJ`{bm|F8Pc%Y%dLWj0SJPVGb-gt#U|w@vvujOo50 z*Q%dmPWaV;5LNthm-^!8Vfyn?T9!b7y!d&9`tTpHMjrcs>1fzw7@|-7912A|@;1d? zT!5RPpxWFa*OUP)hAXY)dump>Lzyp1vGfmbrkGAJGh(wq?LaRzL}0&gY3GRpvM1hQZJhX^$XA3nIt5BfOvZhFgj7J2|8vsA(I)h%Arfb^3v4pe}s(J{_s3R7y#Iwe~L)PStv70(zhfq!G9uE1Aw)xK;0@;y;4A` z`~MiK&i_-Gs!N1|w+6oraB=yL&Zaj_66Rs@N=ud^DHcUVwf<8i)M#Qj!vFIj{8%0N z=P3KvaR|u$AXa_;&(W&=pWjeblS>^8aXhhx|Eb}>L_fCyEG-jb)ffL9fFObsb5n-% zxJxnwjJO)(YTLmPm+$6?L$VME0fkP!mjDj$Ksk_r=9y}HqU$9C5x!J=e$mfhhw&_( zn@7O<%Z6XTWVDJWwA09V-)?g&5 zro4oL3JDU1N&SHl3GXDjVi49~{X*hG>7%?l7V6wR&WkxM{xjmm6>_!OPRj5@4VwRI(5h!S zsnj%Io;@6g3p>Q-)KEMR;CtkyHjne2F*B{2Efa>ldNXz8WjRhb*;j zoQKFRumR6xJZ=tAr^iFeRe))nF@A?Bo7t@xH&}f!4t>h!C}ksYx>g76t2yMvIK;yL z2JMaj&CV61Pv=b!6^@T56)xjV2Y$m=zMSEvu-MT~cYQnzmHeYPZBkNZhU(77I3;Sf zstF-0Z-gXZE|PM{Xg*{2+%y*g!6#y=@OQN*Jyb295N7^_5EIr}8-WmAM3hG5!MBeO z$+O8B`C}2wi0Efmg!wP8?A_-;-YfH;|flYtdAbZ^;aiDh)6I+kI{MSs-q!g$6+#xm9hZuN${*Cr{8x*J);`)SP=6tTa`a<%5-G3+pg2sst3E-S#qA z>8jZRE8P~>0?cr!K#>(-vb9>CX~y0OE9}EZbpb56E8C9+&*PJ(J!HY+szG>G^K%@h zO4CBswuutW?_=NgUt(o|byV{Qk!r(qM-=!z3-y@Py;+WEwPPOQ?mFio?yhg+An;C@ zUKR9VBGi!9j@Mdz*tlgW%ECgz3DXe@VqLBjADnsTgrz2z@bzWOE3X{H3h z%A1!1)+)plBD1gSmoD`@Csciv8>VLEhvU1U=*ai&*ZC3ZNq(fdQ4pmje-Q24&WB_t zd%VmIAM}Ia;SZoa5vu{3mH}w>=C~2!O9MVsLE7!z>L_ZIjZN?bU>2wr)}7#BBom-O zoyo=Y%pngp&)~Cl5ufFc(Z?I2AowtE$1#BM-O_1pEKHN0^bXaX$AXZS5!}n8Q37__ z@glZ}ihGnB85ycKFIPB}O0|}s*5xjni0fzh*;MEmRT`_F=V24L#aXF(z-Q-EK5G^d z&n8U^b9e#ND9V=sSO(vY$lI~xKa&#+)K~eiyyaOCEl^$gnE7b}eHNc>m{{~Dzw{Pk zk`1jmS;0IYVD)hUB6I1%^mZUqaG?MqV?G!7AekL+=bH=28W#M{;>!o<0t24OXC*pS zPU+5{iLMlQ$s3!YO{EA(L>9*T*bikM>pu+Y5NgTF_XB$T3=PUTC?|~@s`7o`?qxqg z>M2^(m!gG%(a=fh!S_p}#WY72wqSRV4P>(Mf^l8nhAXX`x8tL;l@J&URw)1u*r+UqNDsqcK*^{ufXb>lKG)-&(Y4Yijzx`2BMi8q5Z^ zAUkFd;_)gwaZwQtKH9_=z$q!sU7^;z$5@8);s5I3zl8pVE%$@G6(VTMIZO!&KJ>5D z`!4+}LxQj3eVGuW3>acHtR|C$V2$~t(XD@VsRREOs*3+tm|FMiaCP_B5o%qbBT_A! zi~#VuDUN6#-7SKtd@A;%k^_Ke1j)+1|K=|%AN`xZtnB+Y$ja>`T#IM`4EI zF)+m-xvnkjSn+G1XOU1K`LBJ15_~{^fbH8)Aaxu0Y}f)&S`Jx9c8jq51=F%Zkhgd< z8@257_V3qUMWF-D)K`TLukbJ>qDafRlL4XdAUg~Ym+gg)0O?iCZ#(MPTN`>qpPRgh z`r}nP+2JL+-y%Ln5WPFtxDcht13HDsJUKbA*XL|fwu>K&&^$ZY@k*(=eG1w%Hlq#1 zX3-Si&@FQF%PEeD>N-Dc71|f=WEbA;$%8fo$K@#w&+h7(57WyiR{)FkBX^yB=NH}tUzhbN&PIFLA(L%>ho;;oA_^pcH$!c^6=s7jWS zuceL$UY>qPa}sA&eetGSHs-f)4Q$Zjxq zmP^we;s`-)fMSQ~_`rTu%<19m1m$&e&Mb-WZeVX?YiKTZh>7nMhgoq2Hv_0X^bQ$z zQU1QfXUQ?@-aRu&1Z~@*gP}lOEH>#E3jtQ3ie>^v>oj~dAPiD7OLZ1I!sU+xGaV7? z&P+!X|1~fpEh%>-%0x&(1UFJ1(5zz#fy;p+mN6m#w|Ayq=AxTINM9lNinT5%?P39|DuVq!V`Q_#$Ks@tfx zFv~GIdjwQx0g4~45mwJecH1z8&*o>OM#&C9hXHWbOh>4CINK2xqJ5-*1KVgCY(6Pi zxPuYll{OX{IPr;9VP<^$?G3#QXH9$7TtT;;w>iEAk98`R60IbI@@8kN#{Qg@Vkn`{LTVZ zt(xud$TNJ6Ks@HJy44+iUAur^yVUIu9WvisuW&|W!1jrgq$a3aEhEYur7~^bkaBHZ zd^X@bC$SmkNUP^)d{6N6_GN@j!B?O@o8zD+S)i^6pfdzidy^o#SMcxR!_Zd-xKgCZ zo1U~Z^$?_%_(vHF4_5$iYOX^EHF|lNv(h%#q2pmk4}8=60>59JOM={I7Zr_Ed2shS zM2AZ~1?=1jX>>I|o$DB^X7D^+B}=CqPik*idoZ?Y*%GTJ&vTf@IPXte*{1UVbv+Mx zQJ}#wB0nR;|5)3`4=X_{sQ5xLSv~&WT%707V}+$GKz^yR60~o-!uB04h^+jcD|dWG z0w)St_1K|SmVkYy%y)#VujV@R4cv)%I@liaF>KhNI4sLkHb z`K;W;Pm=7=YoG7POCHVyvWXciz5$cMr1*CxO`4RR4mKxpt@Q-)U>BdwI|WeISlgj~ zw7~H}KUBCvNaec;qv;@EAnK^H#1T0*t$=3?bjX#L9zThS`1Xa4sC>MEOrQ}jByrLx zJVs4YbqgJts&OIIisuUfuJ#)VzoKuP)JFEHQz<2~#syhoDNDoi>0P7HMx=%6>P__p-@sC0k z5INZ7W04P-zcK9h+TZ8>pVRxg^lX`A}p+m#rV1LIDuy|#WsWax&COwIO0IR zBNJ@=WU7?yd!FKk9MvFDurMudZnbPN*QNEB!)(!e#mjk)(9+M?TB8*4f zF4HI$FU70kyJU-+eU$LLJH=Ah$={A0Y5K0=C7ip|5j`+JFM9}Toh9daqj1%^rKA~M zv5Y(}UFrx&k!t8t#M93t?#~$T_bhirfbiI^Uj6O*-5+XUpW_|Xw%fo0 zdGrI&QzD`1kaNM)&gI1DW;pdR{LnC1Z?key1}lf|cpWs&Pop3MwB_#NatC##VJRQL zB)q2_D7BpWdx|pG#3Y2Yn2Ho>5TUnHT~Ib47mKUOc$P#0QGL}gqg zM>3wvV>`x&u5iusbo%Lfd<%e?%iO{zU3zXaWE3@TBCNYR86%8M_omqO*R95(@ai8$2XZ#Uh_=lbd_o5;d5E2NW;q8wcIE3jOBHal&+!gL6 zrv6eLSO=K1X9Fhmx*8Cklj8?n!p4JeVuZ{3^$v!(XhPV`da0QC zN1#E+ZBnARos=*Y6IT>L=)f9J(ys2%1(zG*wR!goCVy~N9x9w5v?4q>cN+}%@p_M5 z^IHOVfmpY;5#I-xKjMqy7#1G>^9|q4<|qab`Qb@f2#w28N7gxpVc$1L0UJeu+P%@C zmz>DFNbR}O`>ns!v#w75?Fx_M*)}aT4Z#%H&Sd#oB#tJLZ!N|{)uxuR5lXM+%pCq@dQqTsVZEwD8-ahvwm-($`l{nNBZq#9XVnwN~Bur|#zy4+wOBg?;om=w-0FzR$W9vT?N(D#E!pab$&m7W{%`Du35)MmOj1MXU`$NK!NFiaghYE= z21~d8z;tegS(rrwb=%;E>i8CP)cY}g<8}iAiH>Y>h?ox^Es_fa&7E}8ci!f+v5}B$ zB*Ofy4$v#gUkFjGa}o%peLvfme1XBjWS#-KZ7@uj?v$TQDm z225Iz$a-C_z+iU(m6|<3;`<@2?IJ5&cGUj*qDmtlz%Ar5w^;q{sBB1Oc=T%=t{|2W z(Hh9}TuXK&!q1Vyp+jQh)79D?j!gO z4b+YujtI_?w|<5M?dz6Qq4(}^;BKiTkjFjN6YA)lbx(qbm$&t3V$}gMQ_E2<;-g-mBRWVju5Awu4Fc0SnH}fnx6EWU@QX|J5 zTG%Qgk=~q_^Z~3hGFJin2=-4o{<}haXC-~!aJekISFCz=0jO7XjsRMNan%kq>_A2O z;!cO~pX_Z`6{)w>r81~xZNWZ+v&JI;uBF$QMV zc(+=&7qEL{mF;sN zcGt67fuJ88a^|;Ix7fyIg@{8t0$cWB#H~9ir2_lb$1PZs$x;idgN;+XN| zny8StSNEg4nq6d{`lWof+@LUmEuK~nXx$!JVn-EqYrm(@R4tVyKM0?ih{2xBs?Sdv zK(Pttk@G6KtUXwBU6XL zNGG;CApMwoQui$|?|>m`<4_7vcH)4URrd~(gVXHiUEOJP#HAt<-?pvh3b%UL=oka% zqU9qvz8cYaeF;a@`$VRGG3YkHTzb6+DK-b<{<|E_!O-tePSlSks}DPLE}QU%4}pFq z5D#$Ra^II~{b4Q@-2?PsuL00@4QxIPj5{kx{m#=AM3tLBIz)#~J_IY$;v<-d>Z6V@ zJ%ZtB=qWclH7^mFoI(4<#yA2x!l#)#9%R;*1hcZ8b)#>E5NP(cU=E_b))p#EU_IVnJ?_(JARKY7 z#1f`^DRAqwbf#_{cR*kzPcWBy?!W?+Gmv~htz7%5(l+`wSpX+t&m|7Iu~~?ZgVMxN zXqd6;?r}_R&t*Wny9(B_+)H93dTl4?A}h2^>k2kX`ak2 zPs=UD%cc%Z%1q0cWD8!l1kfWaFM9&&48|iu1BQpOB5c&o79aBrM(i;TGvag+{;}W~ zpoQ^Gu;r-ZqyyH@tZ`5gI4SU11MA{-f-lxGlTyvyHi5e>!gNkL{B?-}ReB0-T2^qR zjkl;SAq6%WH`tDWEiofNhbU0XPC4G>6$rvJRQR%(AvqCOxYE&QhotG|v6$7>{L>y5 zSaBMi7o8)2mTr@Y(~v1;e6+O(>s8FulG6_1q4<0^DyI0_S;(oLvZwMr?5RCy4WP6M zu9VK;X*M5*SrBS+tHUGveDFR%C?-NHSSZ0>?6sWn46OZ(c{x1-BHVI;!VDROpb}CIZ4b6@OS#nk4z@V2 zwUMX?N0E2F=JeT*)(&OM+yE_T*7)c*6US%HIEJf^v%sWqKXIV`1HXI$wk_Y(A%^X& z!=ta5$KQB5bFCW^(wP9IcRTmLo|P1xG~|ghd6jzPD+Y&?lOus~P#F#2nvN+(O+kMD?DBviZ2(5urOF!BO$c zi`jeQA*sfglF~Zkc9W;BUT{RwWTD+MNf}zUST}kYJqKlv_1657CFWMQE(Cdw@ACNZ z;A#PlM3O8s}MCV=~>SVUm%A(P=ZBcmKb{-b(Lq8?4+T z4DyIf-2E9RzTqmjg&s*z@A;ViDe*T=y{IEEmS05MKK_n_l1Iyk8YSp7BvL&h(z*3qQ_2opvV z`f@Em)n}K)L(I1_49~uW9)ppsB%aMx|D7Z|^HV<8(I4-M7BfOZGJI6Yef|9nA|WCH$+H7!}X`v9H!qzo5oqUKk*9T zn(jv5t(<}S9h~jTGh`>^;maJ5izI@;b}%nxQGhM$beLh});Iutmi52yN(d7knCBnv z$@72Nbjn%@qO`ht6+04ub?i5R8LCvwUScRd-!ZR%V)<0K6?(K zJD6$~U&i8`-{sAQusN}ikz6j)*z|(K-PL;oUjL$-=%D8c@eZCx&4XMQ9*XGTB~G)r z2Ov4rit99WyxubZunWBv)e`Mcwf4G>yfit`pS^*uKTEjPwHuxlB6|aL_}AgNbk%*s z5v9lGy$K_ruipaKsCzdsw93yoFMXH!eDFD+75n(?=1fUw!t)yvnovrzSM2AzQn>a_ z-N3tby6EV$?Eg3d+;lKNAU-W6Ic3o3oRk#RCo{M6r<^(Swwkjy9q{HecsgA@;_K#5 z@OnUmx^UAGsqehe_oXGqc!A-5&s*diwfC0ZlTO}3!;U%%;nEsH$H@v;+U&UHC{V}v zu5>HkHKAuPWgVhY^wwe?xZq; z5t!=2EpNcaL|6l$8mY}a#O?k&q=&wPMeh%&1JSL6(4j#MNl9ig$%VJ^?d*BOL7Y%^ zyW0^qkuX%_Z8HjA^?hwd-m4pT`00aTIHQ4U5M_>XJq7BM+uWZgnBZ=HZDJFLLZ5nb zbGhWUpLe0JBDlgYfMNtxZ&y&#DSmq$>kHR$pm`-Wc365S;w@Cq*TA*y)No8p%Zy=o zRz4@*SSB#;nYcds+M#tC`8U8t4IeIqijk*m_jaAAvv=`Y0c zN;Pw|wU-IFbDBIK{W1P-JoOfrA$6gCB0yvV@Q-BzL1`?p%)7dH57Ozo0_nLA9IJ03 z5YnJ3Y1$z5Djlpk?>T&`u4zzHpFHsMp^I@#?SBk7b*KK2(bZwlc7=y^nXm#?_W%rX z^`RqTR0@J8v77CE1fT1XN0M`IyS zwWf*BcEO&3a4R0m_Vh8oXgJDu4XdG08?I=~Nq_;$dZbgkA6xPemh?IXgO4aif>_RT z>QE~fY}#GH;M*Q~)?XwIeN^xeS}xsP+efZG>y zZ>`+U?TKoSj$9<^M1SV~8mvQOk^J^TOBw11fRO9f@<{4YFSGh^vh-UQs`4q2E}TMD zAR;XnIdy4c#|-iKV}du-;1kqGPaJY7od%A;t8qHmfNf1*Di(HdyOr#?@*(60#O|gi zo-{yNLj4$iXY~12nf)g=(nihcLr2XJRFIDk&>hOe++K}EJY|ZLVo1#_a%!u#YMF~_ zMV-r?QCK0)Xtnc&Ge&JJM4Cb61qc;qxR~@4yv16{W`SEGhf04D4lOYMd%x9(*+YHs zkCi_IIz^oX_$F z13Vl+h@YC`ghJ0tEuT)ohwYnxin(ul`~y6@BvA)FiLxh=L0l(R9bm>(RHz=9g(zB` zvl96J1Uqj6G*m!TbEZ0Ji7|D5i%Y^E=F}pzsgO1(YdX~_7Zk?xIs^DQvvc}wx0mgD zfip&NU3q%MN7ds3x|O+l5S->J5tF3VZd^bgNaY+?{f8d5N_J z%0J9PA5{%Q0T+~1(`Px!)#Bfx=1r8W67#4qAt*jlF00!-r z92U5%LOY@f&u(`w`;bVOE2N(pMI}(@@RX$7e6uXVu#~KExa`BOr`KgMHERwR5!r|_ z(a6GOLN|{vqtdN2T*4(;_LzLww2IG$8a_+cU{(wuj+OT<=Vq~Vtn>7Q9s6Ioaz*{? ztXI`I7qVAGc|aK0(Yc(rjl{WeI%XWz9gY-1{w^qShN%OKoZ)KGVoRmS%Zr`n?vBJp z+{v*RnYtIs0HR_Y7d$8?*g}&=yw6Lf;q@_}G0&L+nF}vlc1}F65FU>b*I*-y)WW2w z*(E@ta6U%xAFvLd<=Eh0u1%uLI~$`rOqfc9+HAf zGI$v9MFr@Iuym}pE^(f}#0ycayeSj8fecX5Mc54ss4rK2$-j$5#D3e1*1e0^v$YR3 zb1~pI^l^?0mrxH4c?koH_f%Y}(D41h%vlg& z=E`EH$1-Ubbb$PSK&qZk7**2=BS`q$%m!yNG7Tk%{-^~MLOnCs z!E0(A*t5zc;mO~U;8tfvo@rdk@M_x(7RlDr{oxOjFMdaJ)d3t&w*QFf_&OCjoe`FrI7`Q`P_TQpX<-MHC)ncYc- znBhj9IDA)eyU5jwFW7CxDD)v$sm zvmhowd%oSU65Y*yfQ|;>o`tlWY`L~d9bVzYSrkOn<)ytBL&YuAPvj@BEK0^cn3$*QTbnm!1V(b6Nx@D>Nio+l0qty^o935d$Mf6E0)kJ?j6fI|_3ormhdL#L@?{aS!3E&2-p={U;Ct@@aRmrDbsSV0CDMYML zu~NcBeY{qPt-dCJ&K2MY=irGG|2NPcDqR%hwi){+kt&L`?#p|8nP^pHe;ED_R z4c;W#CjF#Fz=@Z>36CX3S4k;fkF`>@9ymOhi{7~|cwW%G!1S{YYd&eL*N}iA`1oFb zq)OPjt1AQ}sJq5zPYa(dUjd#0cu%k5xn{j{qPDo_j>H{*h$o|EKM3m6_0$+; zc!{I6I*NeKDiW)m;H!?1ST1#*1X{QOqGErwGdy&>#HS5Tfb^LNgO?jNr%asWcPFZdi|}atq`dL0$)Z#5P=9IzT6ffF zT)wE{WC0fs=IYap&ItTN7psBL)reuq#Uf@J7(Cbr0aduk`GzX1MXe-ox?t{g(dlGO zT!VOW15r>@Hln-Y9Yj`0gA-RlT6TV}$pHnbZ-Z0td^hL#>a0eOpR2j*;1{Q4<|&{sGd+8VwEi5GzQj2wn-od1D9U{xAk5EEWg05q_XpG;KdD- z+a#zhi$W5l2D#Pqt)|8-YKzZ-<=Kf{E{pKTB@Nr0aD(Cs0jz$U<5A0tEY-G+?xSr^ z&sBT=M|a=EYXJwv3qHeV+nq5`)e&isoexb2%&FgcHew4!;P;*SF-L|m;o0yLvfiDa zfNVNxOjoXyuuSklKN=U3nu9g2(4br!BBw5HbLPUTg6b+hs*u6_5{H9`sUB|!k|pT5%bZ00~@F1F5&C781C5mork+&7yPo4K=8rG@DRUT)Dw=OlP6DVfz@2Fxgy2 zAYG^f0!{To8Q#GfiwpB;X7rY4>!Qv2whF9O-K%Nc_rt;zY^b*SnMf^@~j!>OXBGgASTaCczCy6i zd>)t8Klj?r9}e*8X{w4SKe)wS+vXDS`XzjpPvf)cE`Kk*Mr<^Ng9bD#B{lEu{9Gx9 zGd6al#8^g>8$36-3Hj7vFr5%M2*4;_0Pz|SPLi=|4->7jFNs#+DWc^Yzo*jAO!8Nr znz5S$!+`~xuqPYgHD%c3-A+2m#7m@UN<#o-NSNs%^XdAg&BGe@1-&GW<}@57r|kg_ zeKSLWZSA%HNg`@)848hVP8Q+Dke#4_AwpY}tk4ilH*KQAd|I?X&vdH~_c$Xp1j%Ln zO+b|G1xg*4iHu9l-{TC8pq{65HMJ0+bD3;S>xNa_>)zBk0_f&w-r$N%GTrPXNp7B6 zxz~vZ#w64})xb2x)wO+`6X-#Rz9l=G6Cu>|^GN#ywcYL#k2ABnz0Wz3L0q6wYT7s? z6G@RhEeeCp0)ASn?$&KyT7Uzv|E&eQ^Bxxo-QVZTeLoGM=W_EMj^Lz7z)8#`#y9(* z6n30MF-XjFCpW#~=X|zI`x88?xBnPq*10EcY&gILeTKxPCgf%2$tl>4IvM_|{)>Ii zVd~%~pmEEWe}&db?vxD7{Ju||xDGPIc1i+a1a}i3&$Hgb{earE960?)9Wd0cY5CNt%N48kPqEqFJcuK$L{NU-=*0ETxn}JX zubjp<+TZdB<|~9;y@T>6YU3f80CpY%rd?$me>_v)(fPgxRGMT_?LdOvXu!7(PcbSZ zUL9DhXiA! z|Kw%kj!*ou@(}K^cMGNTB&bTVR@)lF?tQ^$>mH%+Sw1_ia4glh3NtByOP z^t6doE^Wa{Utw+@w6Mux7Mm@I(1yQcWwGoQEeKjWk*5bbw?o&?;UkUCHvGg z$c2eWJGWMzZ$Ae7Yxa>r%64#zz6M;Q(PZ z)esg>{4x;?595MC78UO09heXYVBJS8lvP#4reg;Mo?dw{zj%rl6sR@FB#ka5CvTFT zB3MHT)ETxZDPeyl%VhODLa$~Fwy(R`+(>g#N)S34es#>8Ep0k^lLvsbWUejC$O_bq z(ok}K^<7t5DL~BsaUk5jSxCN;2sc0Fv-4hz zhXH}y(&L~TU9<&i7r*bi6N7K6YBLf>>jYc zwhMoT{z^W@xUdMSPGb=iwK=14&&wbl*lh-N`8>vhBW!=~ub=-7_DZg!;OHNj%BY|- zt0=WLhDcNkCI^+=z+enF(Wh>clTUM{IlY=Y{0S3CQ>dOt&X}2gGRT*kx`OAV_pH-z z!ac5spf6#}I}3z6`#1-^6=eJnb^HvDn#WNh_k>R=m-U z%ZasFJhs$Rhuckae4vW`ts#AN0V8f)#u0lLl{F4iJ^GT8%x6@gCl(~McUG!Jm&Bp! zQ=)$Ru1&T45>RfNLT~Tv?O}Mf6$5=7B_UZT6ZFb5Sv2T;pqbPru_+P>-rr&p)apzA znvO{#C?XA)ovh;uU!Z6|U>7Jpk|eJ8a*M49o48Kf@yoRFQ_;;8qb`5tEP&#}D;B&O z(8wGHD%yOmu73q06|SK^<}$5qL(@=0&rje6sx8n=5GMeZ>(Al=wT96kYR_el36K2T z-`AvOptUhJ)}7{N#K@OKu)HT4^Vd9^b6NC+Hm9b;pN#-2{DpAKPq8RVnVD=d}2;rfGt9Q`lfuUZhr-1ybe;(veXqW zlr?1ljv3!pZxhMxiqu>ru(V%Z&AbXpvgj(Qb(A*PVmv**LN5PT1zMPhSeR}Ej)=bF zFGlL|QCS0iKom9aszuEFZ|vv#HH@ydmgDlWc7Sq!|0}1|W3l=(5)dbY<+kZMTKAT~ z;qvb`Cm&t6DpRkbj5^W<4BLPqHt5;3<-5Xj$=+DXB5S|BmJe?6&RRfNNxzJAyzAeM zK6N)S;I3I>N}kPW{r6fdpWOUb6Vq{vczzfPm7`7Dzk9E}WSjn0`@`*9mc?UP)(C$6 zHrvcpsw6Fkh?RGT)2AEJZW35k3Q;2J{w?O<=)F((CL4!&>I-s`>Rm-$CiJpmYF@?n zD0y9`RHNV(T_srk*UGw`Y3(IkY3GHZZqtvGIo8g}92?|h4wlmD`fYeswPY4+!T&D@ zvHC7kY2&j|#|a9JHkH~@P_4g<`KxQ=zEFKh=1<)Q0eIbn>>>2gc}1#_q9204=6~ zHM$?rEP40pEmMlV!)zR8>VnW$U}Q&(7ciFle$v0;q>N*+t*D#s7R^=Abv!>w>DOGea>9CL#sF$qWYTAs)>VUAJNUuWHlPBcQqDFA} z!abrz4uYHGAlG{ZkOL9#5O}1za2p zi{rXpl%gSO0ez|Ky#Ua0Uz*}|V5GWDzn49relu#EBE}uPl7Abks(Z0V1yNDs+{Z7A zx@Dp)Auj41=xOZnRJ=eQXx}*y+is1-BErTs-AiT*%FpntXK;e@5$2-3!iNBoeM$|%|#;Dhe_PL+!utb4}{3aswK~zDZjKu zopZM0xy6#~7re!{YoB8X-Dd{c%{iwU62%`swX9EgAm{}*&1kbg9ttks?*rhrRss$| zR940$n5X?luPXYSp=O9OJgq6?a21(nmsh)

^+yL1^uDjCnrpN3^MJ&wPr?tG=Dj zVYe>feIg}C2IBc(3ZFeMxCDyZg{@3%#lTOVe+{PI@LY1pnS*Cm%0oj>(4(6E0-aX0 z32?RbIhHf*wmDK!P=|m?En<2xYxrf^MSkg_sG9x4iLf}7tw_y|VMK@-(fB1ltv&e* z;N7qW4GbzYsDP4}L_5#r7t|xw+(JX{@vdYEhx%0lSCVCl3k~>X`D+n=1sV9xkmge1 z>iP@qG+|XZb+XVHt`-#G16J!ICR4?=LKe)ExT|Ex^JG1 z?oqz&PN5MRm6lb2f|g_HtCR3p+-L0tA?RB!6|=!fot$cTt}P4vT#F{Zjn_=wgB`&R z*4e3s_p(g8_ZOxD`(j`Z57P`jD`%0Az&BSdonnwbFz){}V!K%Qco6bi1y(1;7DSdZXm^Ux)H9n%`o80EjDoVs`Nwi6H9*~63VZ|l9@&nwnb0PDI1ja zY>44XYGh&vV(N(){XQKvv=FSAY2X6JnMRDNBMBO(ybYQbULcWDbOBo%xFkfR`McUS z-7pcg`SLZ0=2jogK+iqLIh*ZWtf6MY11rNo5IF2jGYre9SrgSt78!ETjc_&tUsjAx zJ6qmCr@gm1Tu!^+<}m!>tSV??Z9r`_GBSD@q3g-jkmw4 zu3;9?ZP`ggTeonOIJr(>IWlMl`F`twv!F(PLmwaOH}hLgH+MO zgI4b0v4F{OtD9`sIp=K=Yg>wt0$DZJ2$#j|Pf{V#dnLH$q)7u zslJ|r-r6hJn_y%u+-Bt%)iuZP?`X|jv}|QdR8=mVV}zQEZ(sncTcNyqRjWrOM%Y-h zoUx{hXgpF&-3kgvj$j2NXesd{|B{@luf&jB|2)7gbE!i{t!uYm$!t$fsFqXa8}J{E zg-T(DAK5H0&(09I+Eg*;zV9j*cGEX}Ry^Xf=o;5i=RAto2Oa#S3 z7(^H>gKMPhrEGtGcX_L2^HI)e$3igolZ8f@@8E`DU)+%j`IRDtkS%9Cb-L6D&q;be zF%>ti=jMSEGu4NS44A1i=)O&WvmKdraYICphSG~l1V)tBMiCy1_?6^ANx3PZP@l&k zv=SHBz%(Rn)1J_~hC#MvKdfw)B8jay?YGV@q?QNc23dQ& z6yV#R|3`pt+VY>-N!fozCn30U5BZ51*@&wzKy3&EH;^YH5EU5{D8`4MrU^;pTbH_o z_x%oFE5Gbm@?QbFwc~g2>?vk%tsNYUM0X>p7;^@vTn6XG6~&Uo8!bjp<>kO1k$M&L!BMS?p@3XiYlKfq%FIw*>!F0~ z*=$6PM&MRPifY(~xh>v~M!ma;Oc+$nyu3-^gX9UL8QF-CfN{7P#lyhCbNl&XBRNBr zEd_|OJ#RqSX0ERG<-V6T7PVL?RYDRY^-r;C{t`n2(?3wN3-}~cO)oQ|WyR=kMr5N@ z8cTTiLCQoHKRPYl`vyr_^?hI;$_q$<_98%o`n{mc2+N+B7pD#{Gnm17D=`#r74zsq}ey;y(kmNJ3T`}!jZdz| zAd2RFAM;hal+Wgsd=}0A0}7E;DDa87VxD%! znTE9IfsfRZwLAbrd&o@*etDMd3lFk=?xb8bb*)j5nyc=7gkPJl{VqA0#~NLC)fLJz z0t4MROY=%x+1YK&(A-{Yplq*J8GOyvM}%7cLZD{IHQCgdWI4c4BL``BraD+@kocog zk}~2kM8DVlce#qNo0k5cz}Tfet2AOnI&gQl(T;>WECY*)rznxs{B(7LkV?umr2C+q zs#!<8I|Xkhc)&RT4j^?=W9jKfhAYjy(+0eOiQCA}Tjg^qclnsF$?FV#W05Ncfs!85 zh`xx;*cC#$>Tl8hCfj%2{9R0W)#BgBfI>s^5&p`w%OTmgrU@}Gxcg%)O*59Io7q2Y zde#|C=`?qA>JzrJ_SJgy`(QqLe8Uxz0Mm0)4lz`Z))}F3BV8c?0Ru`NV)uaLXH`bH zy#1oeh=|L=&48|u?EF0ZSCXrs7Hu#hN6HTYFIH_pH?yz&K89C*_4hbm)CG{uE>#0Y zCyfL26IBKsBDypMg_4IqFi4oPb>t4}4**i%-QNSG=Iex4T1JJn=fD6!Wdt#zW)ct= z4O;2y_aALAFn>uItS&bg8j1PymxNQ_fL16`tu}3d3Rk-k=A+t8urWU0WJKp7#upr7 zD%y6$WA^cu%|<4;O^l+on*p-u2oZ@$&d$%mrJ6}RC9uG2uD5$+vTlUIkME)RrO_!A z_`&0i094#G0ORWG1~_AET7lWevyBGlB3)H&g54<7{B_GFH0YHE?ccBg7WF10U$ty9 zex*OZ$1loG6E`2QqgEK*{G$H8blS*g5lL#8BsDwg-2~8Es|}<~Wh7>sNYOaQp*POL z<~sn#KI@we)x_TVOLw)AneW&7UNxXTuLg+r`P`3dO8M+w%DvpbpI&tBwgOwjj!Wk9 z`}PVxOF!bXX*+S3D!}UU215!opb8Vx0RvVL57WUbgXb|?Ui#UGas5|t3V1}2cOJ(a zLnIuGKfto?G!G+nr4T7^xWcp|gD7pLcl9H#;Ns#-vVSz!ovyj_`4%HIA}^b@7e=S# zWTs`oI$~C@)r-j3cLRcvFPNpsEf~TVu2(79(2rZMkXL8F&OS6 z4R>UM*-sy#RoxT_SNSYI#3Qu`Cg#WXb1-AGVZcA#zZW1R7fkT-c?&ducoc@$;| z!}a|-m`t9TWiqW|4&V4$`lAwtiw6)&`v~|b7?Ovd5X~ExCwlyL19i}_g?ai=*FQF* zxx%#>m&Iqs=5Z&sE|AgWBz7u}?EuiRDmp?XwJN?cbMF-Nttv1Lw%KsB4 ztK=RJ*e)a;8ZjtlW?C;qF@x29M1H25A)_e5c!6zty7{%fWNKWF+OfkBp^t^6rqtp6 zgF`Sey%9u`&V`vk@8Bd++{?`U#W~TO+*Q1eyOhQGGa^# zn#fyRvT|V$!zFkCq55_Ke)%52cc#EhMwR{q{Od3_~CV&vEVP;&94;tv|IwKTAgD=AN^;N>CuVZ&5`}wRR zO$=<;m?$XPbp~uf5Fqw~4P(NgcF%4jAJx0~{i)pmSJPyUDb1~FcN@w0PcwfnX=VqV z^?WP79JOk(5rGmBh|K6Yix0XlF(yRUx$M{M35S%ry&J6EPvkG(@7)7v4FUHec;Z`q zHZwT9cN?SC9e&;W@CWES300;LTMrG9!jk%_w(nrsgoZ(5$jYVhPLe;opGL)*nyk99 z$MD_(s}Ai2)bbwO0)ZSWU7c1(*2Xgn8?@TL*9gN!YWQa^)DYOD%<5*@xULWd1Gc59OfbkuIIsr-wQC>P;ov7&0m~0VXF7P=h|mq74C^a^Txwb?owa8Tn~N+$p*+g% zj&WH&N(`n(MBCa;&O_bwKLXZ~*0YB`7`%+;59aYaDaO2TzTdE-H()R$ef&AH$82}iJl#U0yr4hCp#WlzARd&%Qn z+{0o&z+wl<*{7K10*&A-7GPK>M19c!phb&uf8ziim5^+!?~paO5PfawOQ?gA-m~`` z2&;?B!X|;lze(&I^e$>JBC}FFo)vE;lUWV2+p5Lvx3wh!gQ+~kn?4&)@_I%y;fFAE zpmu>OX@CP+=9P8Lu>0_GcCE(?cIpH{_1ug{a~SXTgNLs3m#P{J>U$w%oG}x9rI)nH zlDmbe`d@A^Tq(G81x8!*l1Ubv1r_~zL5-h-MrcC*IFuCP^2xT`Yw7o4kefu!JZMC* z1EB$&bXWp%g175N%{ypB^W#zI23fF7>BZW_k>2*}MF)kN4879#V|o|ZTi^tqAx&RO z!1&ejgP>6Nv%dr3+oz7iv*b4au7Aj9(KMQK@ScaOvBgJL+g zrv9X>&5fdQ`PQON_qX+F%7ofp5%F;&FfG}^NRHZ_f{|K}nIN(Mq11@=wR%n5n4sls^6fJ93b5o{+%<@(}|BU(xj^S(gO?ww_ zEUI6`K1h(Ge48{jf%?8gju6_{N1<9mLa_1_yVnce8X@t3u-c9gR>>tk`v6z(OL0R) za-hQ}N4YJq4#QY|oCv0wTh$!{q>kA?0F2sV;;(aoP36}z5F+-;F_Rp_fHTY{>FP0? z_jMD6vLZIepb%-Mua@=9K=4$dHp(&I7)e)jYb6`fo=$NpE7lUTx;}PL)Qd4frMPhf z+riUjD%B_PN!JGS`+b_9;<5%Y{Sw>c=rQI6s=n!`G45fr*bdc+&*gaO*%(0`ug%4d zHPqG>S+d`Ys?JRCZ2Yz?AOfuE6gq5qK?;UL-Ow9Sau5cm zb&`Of?axmMPsYcaWn07_G50i}G*q&^OSPV&5H(-h!q~^pistk-2C?X~>v6O{*ao@v ztR08A1@{oV@Zb?XYuG#l-MYBZYgNU=Ks2bNs}I|Za3Y}pwXY2=+PAQUE=4-O%@9rA zr7rSSXCYrfqO$zUK+i=~WE^Imx+3_q$=}-W*#?Zzv*mYL79X7hu;yEVqr5EAl@<8a zrgPZr?tdu@-4cEUVY8zhW+j++bbJYiPb{rO+KO7mnH{H7U-jnv}*&Z#D99u zbuY`Y)HamI?Vw}t9m)$@vJ&#$nGzKlyqwhSFCl*Mfktp-DkhvG+DZuZXL#;yy~Jnz zMLzrR3-&#~0ilS_fp)ONMA7m!9E|)fA#`a7J!oZKOyE-6&Kc&-1paprPyy9y-AFjD zSU!ad%;w}Hzx!RIZMzctE*MZY;3-fiyMRj5a*Dv&bAE^c1Xk~V6WXo_k${@`Q!W~j zct`U=K5V~c+5x3az=szvqK9tQ;YYX@Dl@oL@kPBt)C<0Ol~cSNx!dtKA+c&>*#fXv8rk-rIZx7-IiO|{*2?l?#j?c!Yyb=M@`LYojlB+g# zLOvb7YD6dL!XC1=f{CLVuK`Y59ojnSES9ufD*v*wUnX|!NR8?Gs1q^S$=)=)K`D9f zr@&hI`cnf@t@d06QboNSR%I7a3-P8^X;!E8jTR=dJ^4n2cHqRThO2VK6|RfVhO;FF z#Q-60bu}$-j=l#uzY0F;6B1^2c@%JH-oqOnYUh!cY1Y+;BN#lfl8;_95b3Eo*jjBR zn3FY*HkF6ehU@66VJX48R0{y2j|8!rqt*ngrt8E7@q^<0&#&u&3=q3Y;lWu;@~Z(W zmlSK|UegQ5!VA|8T8dM|5jGe)22-!pMy}F^TBh}fizT7}6gQ$!c~?ANY3dL^XvPOP z@;yWyHemSd5`@>eZv0+b1DprA^bPm2%28hMF^*wbgxN+$wy6f3Q60sbZ_f2NHYQABlq~bhpQY?^M)7i43mgFj|llEhi zB&C(NdvPXXUxMH4n=}cS2aC8Yig1wf96{D>A;{WlfP~U@r3*A!+|95Z;_@<>N4j4z z!NqEgLlN;5h4c@>zy5KvGe{Dp0U+Fa@;>N53pi1tdyN`X8`(J+n%;KE%T?39m>#niqrR(l_4_X%A;22Bq@LAO@BUS6} zyo`cRfZ(gwP$3RAFIghf@1gGoP!_lg233MV>wccQ%q*rTnZXb8=+M#m2Uyv&?-`y{ z@7jleQTh$DCL<)3Rg_apuInfFEMZ27zh|F4Pnk*SvZTDK#(RceE-j*TjG)9~ufRnB z1^aC{ffQOpj%t2o5b0stc^{(Y-hC(_ywV1n7wuG4Y}HtdRs*<0C5%r);xUSdjZHxv z2Fc>U=(_J4VbUlrRVorupy^yDpDJlV9r4Kzg|`MLp>!nTA^lT%3sm_-IkH%q#ATT| zt&;9wZu(Yo#HuyKw0$uXZ>#6=*;T}6>smfbW-)cxr5=E2o@`#vUS)ds)OMa_TUHIi zhcN2U;;l^sHYc644`n%;CP6sPvc&-rqla%6{~Vgat5rGa+EJnTOT9GcF{KgQ`<)WM5pA^x_Dk5 zgUWlBq*k@#G1NO4-bcXamkV1m-fG8{wn&@aHmrLbSn%5G5{Cby7x2seAJXmzzRhuu z8-MOQciOb;wrE(&+~dciuT>l_qy(X*Zn7HawqrJTWNp3>Dga%?-(^b zs($Zy;9#}l=gaeQxyLE8vE_{`xmU*hm%dImt<2y>Isdw!9Zt#WN!#4@)PFn+V!BZ+ zU-DmXLA@pOueYFPewQr!sG5h|yVm%S@;^3|>5<>e^4HwlTSHNEaPBEM{zc~B|6H=s zWk~L?LjUok!2JLAq`>_Dcv9fMJ=9#Vpz(}d1-Lygn>9@Bj96+`o5ea{OVQ&*jA^|L0#G<&9qQuKe6{D(Xmv zD<*U<&mQFlR_>3I<8Rb5eUX)CgMQ4aIs8G*q59kIzyATloi)pE#>+8VHFz=iN%CQnFN3MSRNyZm<@BK^vs-Do!C3N-h!Z1tz!jpt}ID^EmVu}n|=DV=3+ zQE%dt_a^@9(-zSc8vgs-KKiG;+fr^_DED`k*VoElzVK|vrn9;61aJOJCgQmDP<4~W zwN6c@g)0=iYSrZV>)+pMwZQbkUuLHBK%UYrG{*RM4G>zi@p^P{dZ z{_DGO7MQ+P^A&nptvMt8RBi?Op2>ZS4XT-yJC^1ipPDjP8C~kp*Leq82t1jK z)dTWP-h3i=_PF~0eKlvVmWhPa9Od5parf1RJhf@^It=xc%1VbJ@2xOBre?&hW<<6K zzPYdr!Dw#o=KYtq1S~MEaTvStrSbFRXmqeVs?NP=-;`GUxl~gbl=0l8|LsNT3rxQ{ zjCpcs>XHM2dOfT><#~;YE&{PQ5ty_YE{% zdyR3lyf9qd6&t_!R<#JDMJhLO*`xlFoe1&sOAUG1lDyX4bkj0}uX0~tdU}~rqh5)N znvm>V@}$5?=Yy}PovTlEWibUmRr6r3S|E2ry0`)P%S(-Um}mat>Sf0K@tNYUu2p>` zRAbrXxdVwjgjl!KsFC%+l1G*q^0uzrv{Zpi@}t~0ZzA{2hZ#t5T@2@b8GKQ`jlTx& zp})sp1D8>~q`$dYZS;BaV$y$|{dqUWAC?)r^2b;D4BlDO^xAS`{2q$){^QDYf$2J@ z^cqS^uQG2RtIo~ybB`|e!;Q-1wlC|YvwuFa<0C=&=$++8jcLVlX&rbm*Lvgw)%w3Y zU$MaCTyFg5vl9zUFRIqQS924cXWGyw?+q8@>+@OjQ@GZ-UX)buQUGlEqfQ3e!0&0|9iMAlfO=8(X$q-CZ1b0m`rgx0&R9D>YN(F3qhC zY8h=rzPE(5n9Z*_v^M+hVY78mD=Ic@Mp4n2*_u&5w;A0rJ)+gQjFOPDbwsPP+HAId z%aGx)g!5gx%ZQnW3;Xr9whqIo*|M5Pcj-ggs1dNVx{M;9=FwVu-TCRsO-=Hlg0aArrX?aj#+!US_4`}uQZGbn_B)xC;nv+o%Iwgr zPR*%x7RwAuU%54x)ZO}j{x+eF=mX~e{C)YTSq6+uu(jQ7_L>7mRaR@Ud1Tol`BsPC z+vqpKmVP5@u^JhfG5eG@w`{Nx!>`$*W*Kw?vi6Id`LYP=Cz<~W+n9`@+ET;l>1j8z z=4P*6_iIHZ&Gk;hE{*&JcFUmN+?dfbn$Z}M zEuAsaT7zM0$XA(?cbF7H{D|!B}1_FDL_P#2B*<>24Xoy)ucS;_gz}rPLJU zkHK!)!hWqKF5}qdF+26vp3d%4zYJ#CteRb`w#%+t?T~pMkuW0@^60hJaGpQE&my~w z;WdxuXN-Pxy12Qlp{lgemXY3M?T*TDkeyxa?lMGWMO4^YJ?84Fs#3e*oZv9xc~0GD zPMTdtT!unLzaEyZoqE_3oimg-kT*8fIVEULYO?a;Ga|Cb59l$?S&+<&vk!?Aq zj%)gGpC><#xxo0GWyjFSO@ez%~#N`$p+Ad%wvXi zSPtJV-Ke(~)msg#SF7j_>7!=ZfLd{LbxobsT2ohN?e?104p`zb#|@9{B|#YrLz>at zomIP9SnM|!S+iP&jNY{7*F4s^nrux;PjhQ=`G{8JQ2*xEfK9Eqtky5yt0p)SAp-TJ|QxHlQ^-_HPp%q2BROW5Kt0_Lhnezi}IGCny1jp-%6g0NPcG~9+QYw=lP zc}W?|5}2+1x>K(j*8IAhU-Ut}Y$PwD*95eZ(ttK#IL#w+`pD?L*0^RHlpn?{4)d@a z082;AS{>rq{~?JYw2A>`EsCd4(d+Jn5@SkJ!6)wG$8xms0^lv z9D&l-oR} z)12mnR%SPXd1)C|32j&#(|SGTunZT!oO0U|=ByDmTGdgbCn-ZyR=jykb6X?%0o`pi z!)o=Fc~=!&+QXQQT)Zjn+PkLoc!#t;OvFmS$^Ei&!1nXi;1* zifQ$6t;`{Ni(l&uYgw5yhvBpMwl?aEhO}IONCm^eOjeG&t*;+ZcD?c%wPvqJ`lO)f_ofUHvi=Gn4%VWBCcWPLw@C_Y@B4VSTV*L>5FYdClHJza^vh=MNikYtYE# zW%GPSTytC8mZZg}&hy%cA!kpSg^4A@dR$hHEiMXavZ2&rL`Isr%uX47x?D->K1)!K z=E<4M@J@+sHZZj}uw!EL^sMx>u7DQMy|X*oy459h^MEc_o^noh2nOXMsj<1I$7zn3 zgIaV~mzJm=)f)ol#1Zfjni;wp~61B$1V%6PDpM&Rh%sgSlzmj z)nvbpn>&44INv$JWo|WwWNrtG<&wh~G}pQ1gqyVZr;ll}uS!LcQ+^SahY5}enb|=- zJ6R6tdEWd)UOHb+7&6H|BPj>s^u(BDXrfOKPYTLW*C*@MzFEd>*Zgwcb88ukCx2+d zm^rICCV1pJ*d>=oLAf+{n`7pFOTU&eM`hR!%keQPyN6L}6v{T-IkwkI91e%fpnEIWD_JLJq6}y)07{n%3OxDN33>6a4aM!Y4Cr9m)5|!9Gs9%j?e{F=R#MF6P5p zq98CaYYk6G7KDqkRyhh41d3{E8XS6;Bd>i}hLSpg4_YeHaXmQA7nGGGDysTxQ&@m-rj#xcg zq;=uRmXUm?B`xQqs^JO4MRwgk z!Kcf0^W@RIlpdNCHzJc_y0gSkf;Yw9TGrV_INJ+9>lJ6_P9D`liYe*)J1v zFdNQG6bGiq3PO5XlZ!TA{=m$!9qe0M<6HMS@_llJ+cuUv3_7*y^7@F;mYkS0mwPo= zeurIa8PWXKwqT)0Pv?gUM~sAC+al*GIR@1F<*X+o!YY^E<~})ShbN}>?72y^b)?Xm zvGsZ<#kHQEsFpS-W&Fv3L=G|$EvEP71&ooYqjHrHE%q9jnJ#N~W`AK;wsY3-6o&GB z1p|g7KUo&qENJn|Sa;^TW#c+!bZ5<8=_M=shtnL?J$lp}HPmlXau`$p_3Hz&cKZuF zvQYA64a>nsrXts$vO+YUWlZ+^tUUOS$)Vh1wa@C1Q-`}KW{yvf8lEXWeXP)9MDs!g z0gD>yj{I0rN~SA&T7J@ytC0zre5V|70v0*$Oo*EORq-vtvfqTu<#Z)6CC*!brtg1s$Xk}nCnKhx(fTGVRgJU0`fS(nI9`i=}y@-ac{g zgk10>&FXj~58gw$>o23JB_dbVQ91Rd^%mb2DY+`j*Ao*4G`n1L z`SXSgLvn@N)6>(`*eLhKnxh^{3`^JQK}ATu21^EH#=SC6L9H)eJwOY{ruE5}95yEm z=T787ks;4)7=}lBmj@z7piu7nC0CSALk{9{s_&QId*x@3{O6E|6#dd7BgYvz-Ag(s zE$n%Qku5Ofs314Tl4?(ZJa{sO^^mma(^}<6IY1b4>XdtJsb?AFQ__q?o;*!pxTK(b3V+&>)K;H9nQ9X$%Rh);_tkH;gfH zf05s+)|y&xer+UgRIU;RbDJw6>7dM`RW4-xvIbQ3sQf1n!)l~6Ikw4+q@+7_SXL`i zExK2;T8r)S%YZyQGbg1YpqlvdhSfK(;gxTrawarP{O6<=%`{81n<9ssO!6G2$w{{z z0k5HL^6pfXZ?y>ZMMuKd(Q*1oy6Y(9>%^0!VVEvH4o)maUL#i1cb)(rLl>XO{#oBe zKgB$K33Batv=8>e2ht(>9J>255G&>D%bw z6`1~(Cm;{e%jlpTx#>iG^G`uuz7$?U*U}HsS^5v!x(xM)x1)UteLr1Fr|EL~%u`W6M&C_W_o2U!=~{Z` z64W=*i|Kay91qbP|yWwBx=6m4EGthpJK7%gxBEN?&qocH+E;tkIL-gVF(7kAXEgiiN{(w&2 z5AWQG_TB+_30<@jzMmd?2u{!?55uKrp?!rPK7g*KZ>8h(pLF-5sNec*w2#mSi`DU? z;W6YXI=&k2Jcsp9!;jFV&%j@ZRr^l*;x5#Co<;7X`)Tc5>D<5?R6XH>o)OHr^@Z7@%c4f54Ujs52iyS7+*yQZ$UEo*Zh?F05xR8)`s=bFn!s5$RqTW`{5D#7CK7*Bv#|g^aG}U+5jB*5&nQ~8-p);0D1Ul_%%90Z}}kd z5&9z9`U~nmrOW8uSE9a>?xd@JMg5mzwSHPNaJ~=q_F~N>zc#hdRW|qrx?>A?fF3;n zevWo5f@5OUf3aK#ss2hIfv|3a6`aY^OpKZ5yf zljA_Hy;x1JWDtItdFPYxcg!Png&*zPgUB6pHVUs1tNxnag{vNg2k6#ur;slbtL9^@B`WsrF=E%+eUXP)D4eh4e=~c8mzmcndjqap>qIdj{dL7+Ce@FMwSz>6)!kUowpP0s2CEkbZ}b&^y0``Vo4Nj?!Dd zj66>J=>)y|E69`dtF(0+%#Zv97sp@vPr8)8<5lEk^meadJeAYuE1o9HP>Jzw>euNIu zb3R4hej@r?O?T6aKSSO_7bM|cdWasN?VlqL(lft+D^EiIuhO+A!++5|boH00PoIMP z5ZdT~&!&gzrF6~d$nU28^lP;54CFSs&Qj}dkZz`<^ci%FcGHb#qWyDp3vKxd{k78v z&<^?pdW621F6~5rFVhut!PnT{4Rkf#NS{cz(KpbYbcjx!gZ@9EOd^E*o4&%ED_{=cTZ^t|uUet>q;BlIV9jNVqR8z;&9S#Lpqr_(j`3v@d@O|C0d zeJ6b?Jw!iDk9pAFm{`q!YM2}eFL4MU!^m2-p^>?=0p3v=pOoRx|dGUKDzN2 zwD;44bby}yEAkk9zgVp=?;xfZqMO&ipR#^{UXVfiIQ<}9`6TKKenWo^Z^Qf3zIWjJ z>Darl@jL2E-h)r2+uny?7OU;;`vA850hfLVyXe-h;9u$FH}El8)Ehs*kJ7%M;Yok8 zzu(|1Xn9F!ZhTDm3wfoqN9FbGWxgEJ~~DF>4j$0N9lLz6kTuVx$zU4g8r7$ zWAv-EZ94KeJz4?hSBDFreZN@sS9S#2zso#AFD^j)1U;(|ZaxzAKhkwa!B=;P#ZrP@E@^gHw@-8>1o=~%RXjc%vUp3Lddg~f0u{UF^%A3O!Q zhyGKn)`y?oekyY7LQKz1dt2ZT9i_jajYY^SrlEZ`y@YO}H@6{gIv(|F=y)5v({$vC zli^$G+9hy}JdRfDtKl^GVLIIbA0&^fRUSP9{+dp9!p;)pb!Wq0iPiEqp9Al(IowNk zidBD}F60j|A3Yy_l@49R{%G5E@Wd_9U(*e672SOkd^nxD1HN!PzaPF`tfpW20Gwf7 z{2;uQJP)AezxDr&#qr_yhbL^OB$8pXvUoM(%h$bt|+_PJ@pYtM*y? z61vrf{9Zape?U8CAm6GC?aMcZ&!vZF!Xdh7YgpeJ^`qOsi^n}1zJpGc!+(j@^2g`E zv$sL}#CGrjbV~(%3Oz#i(d|1Re~fP35&m*Krzgnce6>D2^uBbu67?S1v=jUTT}B^Q zj`o#wn69F~qa(CUo+nV#>)09n?Jri#YpsHvbYLF*6+KKJI0yA9`U$#x7u0XN9r6nL zXxdbbd<9*#EBmJtyTer#Xy3ONd_Fx^3*SQ<`@pZ$wte9V+oQdM-j}Z55BUtSJ?Ys6z{bORazO41|*K{*IXGi2-`ZT(70qUQoee`eiFnwSprdM(x>faEn=~W#B zYda&aJp`_xcj{RJJOt7=dmqkVLo z-fj=%3Hm`gNmuTPyr>=1f04GH4Nut%>!+WdPnUKfKS`|i|C-C;o0wPM0{fUdqOe|z z`b-?2L)Ux`??>0t?X-{X%h_bg&^zs|`umqH`>=oJ579MWp#R1DBJcegKA;XB_y&HP z_Km`o`yp@n7Ct#=lc_NcU%Nl@aj!lA&Ws^{m#*H)k~^Pe=^A?Ldeqm_2hnx(S#&*p zBi%r+rd#Rv=s2w{K>w{fWBRk{mbvg4-CYG=aUkmBb@0=4TRj}7YYv7pwEu9p{2;Us z(2aDEK93%z@1P^}5M6u(`um7(rl&NZ{{ebmIz*pL8%=0`E8RnfXgmFfSndBM$D>|9 z7`C4c7trZT;AwQ(GHebgU0Ju!H3fgPrxm7R}em# zF8K$1I-PtT?xJHa!k5sdS78V3TMu7Hm%IgkPDj3l=N^Lf-TO1VP^{LMw=gfaywA{$ zW$>?b|7>{Pp=e(*A3ja2+K=oHKgPWDK)9e0^&|AjVpZRB5b{gu+6H(9ojwW<($=Hl zb+o-1HXnxm<8%X^rq_to^uouY{%hvl3*ko&NBdL@e1$yEu8x1hi{OXp3>~AZjzexV zp}vEjOZR#6bL+EPtfn8i51x7?9DfaNru)~!*NfHkjS+a^QOfeV1A2MR_J8^dvFhLT zChGO0(Y`baAJ&Za#T($xV~~&gzGKmT=pE#1#A^CYAHlycZ~Fv3Wg&9Um+&gO@hiBr z1$o0aaNZ*HU-K7yuvqn9YqjRq&y94l06z6Nkm*}A)`0C@4J0`)0w6cCO9HAY> zux~N)=oGl^1h{`1y!DB2gAKlm?w<}H+=hI32E0nF)_1KFo^}$PxE0=Ctd_6mPWV~o zelMJVGIHB$*h#ydfG3@Tyln_xPWL_sAKQ+6=pXR=bYlo!aVn?(0=#$$99#>3L^lk> z?$eNWzXb2xf#qp<1>WIwxRIW72HZq%eT_H7&BH|VlByv4c52Y-VPqpjwG-1xhVZk`NB>7gy*vh&d1 zm<{hwJImqx^O48tdU|LM@-Q8&fN$(ZeRzAg-~zaQNBAYWxe~6u5V?D2*mM!xF&B=} z-g&UM2YK;)xcOqZsTy7@R^zw30p9KsxS|a{L9E8_(8aKm4$;rh!}JeygkF3p+KC5OMI!Kq%-_sRz>E-BefIgTm zyA;#wrOWA6bPfF}UDu2H<}0v%W0%1f(ODlH7pwJM^Dw-(9k#83FQh$B!eM%p{*~^0 z3i;+&qJ5WMm>b{U(nV(2e-(53FR>aAMN^PJd^OxY4L;TZ2W_x^4d-_TybJB5Po@*} zi?po-^)r{E{lMn%0=i>Y_)M{ypOO}M?lL%Z1-w|S)|br=`U6|h%6l~`uO^r=`aU&#je4L6*oSKWg8?7OVL z6|RvFP*v^!xDD?A9@cJ$T|d!xz}@oMxT?PJPV`^;JG@@3)c=J zQ@|f(sr(9-$ARGxXKtZb!ZKVbu36f#=hy z4)}Pw_e|JL57V|s(7x&{pk3#|ztY9saMz=#AG!b@rA-&X4?TuF zMcY=v=}VAbMhAQ0U+A*S;nM@C@4Ev2nC`g}K6*9s@~hz&Y1=h$!{f+%moleQ%isl1 zAa7m{&lrSV*TS#SWlngfHONgjz$@q~H$3Y}fUUx6NlE_@cb>wfrMIynH>46*+QVLx5s zgXcVl-0=|Xp~DZulm3Cc$Pf3>O^?C{Jdb?vG5A3`8i3{AM!DmG?FraU*ABve)9q{E z?iWz+dlLSJE)K%Su0>w)Ec^l8Gz1^@BJ$vK@YD3*Kj7Vlk;k8hAEsk#;muz{9v+74 z=*Cyz6?Da`@QyE|zLtKGZllk91$h@;5{A3!4PteC9in%C6;8c|{&#;39v%1VaGKsH z0@uBc`eWC@U3BYuxSKxv4Y-Hy8i9u+Xy5fF93J;uaD=|-ZP>I9^*2Ug8-2?LxP)H$ z4qQq<^e!BwKcp+xqrWLJC-=k9rT$Axb|&K?{3;fzey+Q(>_6cD2n5zBoMwJIDPq?4mdP0uR&Q{tCzFpE7Wqw)_q!==?um&xe@aj#)VQ z5&b8e8uwrDD80+yaM{PGKSW+9qxRQox=CIqle<2nPuAgDx?Ns3qw+M}PuC~VpL$)5 z%02Ws2HgD#@_Y*%NWvTF^yl#JdC2R(g#XBg8|bVRZlsMu*gEi(oIk`$Twv z-g6S%mcsN-oDBQFg4>GW0DaOFc#uAQDx9LPng(a-6$Ej;O591{)D^eG5QYL`xo+0=qx>L3$!o& z8+kW9WEADj{|ja!AJ_@LnT}S$V{~&3JgpS=l#W2eD&vyewm zhdnvV^Tp@E?=c^D^;W80UO#X?@{4k||I<<0eF5^D%Fuu6LfE=B9Jm;MJZI_e68Jmj z<34a3Y_B>y@-yj{+u)zX>hXm0cDQ^toS=^ptMSpZ8u|0|z!UJx<*2V-1Mjdc+ta;t z<&((YqzC9N=Ai%jr;xu!w*}$3+o8Xf=iuwbYJU9mKJq#+HUC3vkv~I+UV?w5n_q@^ z*dFyMdJ!Fb1$m!X_1_nU=j{NyUxT;W5l+(+E8)i1k?T9bEfLtdGu*xo&eFbd&qbc0 zN2}o28>oMO9$Y;Fhj)Qp^mFs!{x^}oSPlE$f}h$I9v$~?aP8a3SMHAWQMLizx(4o# z!TZt`AHZkPmGqr-9bLHx+Sh!B`lspSm+;&@k;lG)U!sHG!F%q7yy*w{7TW$JoL7t7 z{S$mK?a#ok)2ZL!;=NH{^#{C=PW}xC=t0xO-0@@1KB)KV@D+5R0N!O^hv~jz z_@p}I)zjc_Xul0!xgYX|68O;lVdoa`_jGC&eAfZU%eID3s)s%0@JiZN0cYsi9pKag z)R*iCZ*~yu-w8gX0j{fpuR0hmpAWA+1dde0yB`Y2cY{x*t$V_5I=eso4c)&0E^b77 zFTE>0K)2ID`c2w>Algrm*HNndy}SXgqLcLTIm_Qc4?(V8cd6>f{T@BaeD2|#pF>f< zkgjipucYJjr*y;NtUm(n?MJ|O(5WNg6rDZ_zP}0e#m(?#N5bXD!0YJ9LU_Ks&Qs05 zu?W6EoIC!V2tPqLcEI1VzTq->ro0YRwI6&MK7=lR20oXL()ZB=>yUpTR@3WT5APe^m3z3h~XVP6!ZIK9XXjS5_f3N@qSJJc_D)Cs8*Q2m@3R>Fh3FvNye;zCCmN|IU*VA3}d2PrY^qiB>Uq9^>tK}=+5$&I6-av1CGVF5dY zUvyhLyySGWPoD~h=*A_mNnV$%)^8KtOglS}-$1+Q*XaSe@=Uakb)o(py6HUl&Q7e) zwr+T>SWVA%A$;apaCSL-Kizf<{4pJW8lHYO>O+$!=k|}&=(-v317fxPMz@0}orC(S z?cpA=%I!PAYneCh4F66y(fi2jtkwLw>811_{W=|{i_b-UoNlBu^o_J>E~fXEIJZC0 zKQeEwLSAtm`s<+2prdqvPSQWo_4CkvxAW27Pj}Hn^lCan|45s5LHqh{%%62Wd^H`W zAEs+}LvFqR^(8fMGaaLErR(-Y{v7Su3!Zl&+Sl&QoQ~}S?|BjO`Z{qO*t)6FNr zC-)*BT>|fT8TvOp313Us48g1DuIFIu<*4`l1AdeqdlBCD3gn)b;0Ni(Fg(kSyz5oi zMc1r@CtQiVcs;zDu6Yw)d=>I&6t-Rsm%j_2EmrHJCIDvYoI4)9LN^z{=iGoiFa=&i58B|*=#uI1HaDW)T>>9Xmv0GQHJ;CcU#8<* z!N1T$v*A5moWBbAX0bZnl2tox6niMm2}(r z$X}pi^q+K`KHiP~QuHdZnx3^A?a#giPFx00yA`gz0=|cKTm@I(hW=_D@SU`6DZJRj zd>M1vbR9hJcI37d@OyN}J#fJtn10}O_;R}TP547`?)>r=Y`PO3NWhE4YWW;Vc!)0j z4$kXGeIyM(MJIlMx4jE_{3kd|r^n#6cOx(T74EqQPGsPEFP#1jzJad&13u_pN6Q<4g3b(SkCqX=&!p1?xL;R z!!OcJRq%`lQ19OjK1{5p-?10mO?wZ5U!WTghrg#gTj8xAMEkK7a2Fkqz}L`a>)=&% z7rlY*r)RB1f1~SB{{@|S1D@kU-ts2Al#Zw2L9rSyP3Eb&^*ie!c(?>!CRWQ^u@Ah4 zdDFh|goly0?FY}Mqx-|nbo>bT0kN81>LhrlM_|(_@Jg|oUg8`$OqX`SpVO^N;n{w) zFLl9PV%0wSKJ!Q6u211_=*G|By&pq9OrK8ICXtVc)%+HJ4lh`R`s6q8<@DGn{DxT7 zyCzM`tPL2k@1P^Q!6S5WCtSH2?T5~SPZq1?sXZHhnGT-|pY=HM_<8V~tEe8aQI>#u_k9D*yahtu@%4Y2PyNSPP7J|S^jH)=pKjd%d+FgZ_*1bOzlp7Fx$!&m6}V$N z_*}6Xk8L}`Pt&8cC5*g!F7iFas=r7p+(X+=fS;i|PlUf_`-BrNc@^!gH^3K))%0yQ z!dtxt*Sg^2=n;C1PTqw4(AQC)Tmjo6@TePZrbljppQqEe!h`EjUwk_}aXnml2mB%3 zcPG5$4dg9exO{}m^8mb5td?hV5bkH*um=8vj%|RqeG~1w-+`BlRr}@-;jfry=(TSl z@B4`T(d{3@m%fd><`dXO57H0Om7gMCPy6U!#kuX9M1FJ>{f&JApH5f(35&hz2p-$T8-2fj|Ma_hzLo6PGjg(tj^JVOtN)$$BpgZv9Rz7*c}1LQ^@ zd@$Weub>0Vktb-slkMYZpS}TZrYmlQucX`QU+EtDoDVsF^cVCPz4jyI(VNiU4Ijg; zE8x8naF-i?K&;kJ*RAl(Pv8LEM*D6*!&}$pqJ94*CWsW3AytIcr{&jBRqEudHa3v5MA~Vy#LRbe&i9@C05gS z`r&0^IK-oF~T=?}Q!arhd#We}d5 zMQ*HtKc;>3@;{L`KZ$()U$E&Z_(R%HFZ&yLOAz_iCL?#es(J=Ki>{#`r|an7=?1z^ zL;FU0IbHEA`jZd6$n{@I|4G--JLt%3>4kJ1{V*LELVusogLH}6!2U$bhlb=_{}0qJ zGSGj5zMJlP9{JmJFP)|B^tKkXchG0iz7X2qPW$PX#A-YR=*4+((OT3e>Cso<75T_r zVR)t$E{?!=({=0MZ3~chj=&#?)%hl{0j@4Y-t`W=L9FtUcj4V8z%JTFTVu$-rR{V> z5$Z?iyXn&RQ2z_vL03&geJ6c}SgnsPIz>B1(SGG5@8t`Mew0=-wxHEbt3#L9i&%ofxKxF^5f*b9;!Y= z|3P~vBfqm0d2KOVz9n2e1O7s+#zThQc@`Ymoc+@y^ir{!UX)(273!T^p#FNgdM5ld zT|<9P*U~$ep?#7*o=(wy^e7#pjV;l?x{ryPe=A)_7twumm>#4fbb=nC%eO&)rrGGf zkv8aV+DZ@6iE`AZ=p=2Mjs8;fB6@5N>aU_R^doeZ{*@ly9`)OmqrWh{kdDv;bZQ6G zZ=gr%ylv4wO&>}Z?}Ykp+D6|+m(VG?YA)(Gn}hzU=>x@TeALh%(?eCLzf|rMqw>~y zaGBf}CO3ZR7ijM;$dB6|xn~bJOIPm+uigREkJEqBzP*v}v?Fpqy_gQr_tEY9p#F8b zgC3(h=>?VOuX8`tUqW}$tLSc8-Iqp<&#nVezboBMpF#J~&(gIGs87>%^tN(e8r5Gt z-90$)xVdOQdKl_g($z=6F}j8>s6zW%`dP7>zP$a>DSA0QO20^_=`E^JKXNSE zpHEjVgdd};=&$H%x^`EzAEs}jqjZ>#(SOr%x^XwOuU>@dT}Ic?kI}WXF8BRW>!-gJ z^@q^z#qfo+hrVyTp8kgRpNRUB8uS;S7l_sP7^E}Io7zx+z#gctISD?GuBGp#>*z1& zwo_3*c~7)&r)%gA`dT`)1odm_VS0w#=SVGoR|oRL>2CT~diV^sr^EDjbcEhZ?vteY zt2ztyopd#Q7hOYtIi8<``ssUfeb5Kcwe;O|)48bskZz_+_rdyZp~GS|KV$T?ed%tr z-=EIV=h9hvE$z4v^}o`6bY&g-bJ7>lu8UCrAnm3%&>ni`erVs`gZc$@2YrrMjh{|B zLyugH`b+mmK6D9eJpldHT?QXL?#tmD=qCC(x|tpotLe4SSJk7x?3HN$3hlTGPSbt# z<_ow!=(Fh7t5N>|-9~>%x6?Zui1sxO)E`gR(oVXL{*cbnv*f;2YI_%5gZ4+$#dM!o zEuW3HHK0Da6!oXlt$px=bcX(b&eFvPqrGW4>QATZuY(__1N8UwAYFL~+6U=w+IBtK zd+8E7LYLB89g6np8&H2NJw{(IR?C~As~b^2cq8gBAGZtk(?R-udWbGRjPrXF>QA7< zw3Cj|&(I^ZemL5X+>G{n(g}JAouqFTtNBmS)knat6=?siSZ(iWH@syNeG7aLT|=Ks z*V3=h1N4j|(Z1+bwC|w1=^#Bsn~y@haU1HJ=tlYqx`}>_Zl*J|^>(zMb2O(`#7!cgQnK6ryup#(*1Oh_R>?1MZJ$+ zLI>zUI(QfQGZv!0;y!pcx{~gotLVq*f%{P(qkXjAg7M*}m($^gP(Ni6^07zZC+Pmi z;O66y*RO(qp-1UEjz``XK)z2aT)i58k#-Eihb~5*UITwed!B?>o`AgZDR|L|aE2~w z!}=KrB0rWko`D^-m42KqqK%W#zHSKhd(!oE7u`U=L^shhPDc9{x{Z$0e!A}k^#4BX zq^+l*y^B7YHeN*iRkW2}O&8IocC_yqM*V!cmtI2K=_hGF{Wl$>4?PwA884wf7hOxg zOE=P{C8%$rx2K!wE_(Q7^!E(ydIe6=ZhFRP=+8r+K-*tM{k61%ep0NCe|_}g4!Ha^ z)a$3Czc76SoqQd+oleoK=~4POx_%w%w><;>HPAZz136Y}A*%4L8%}^m4j_{*Z2o zqJH{0Xx~a7Mz_)T(oXtodhlJe-?aY}ie`Um`com-R27etgo`@K$sOz2CUML4E?A z{sF#~9?37sJ)idi-D8DK-RQqzGQ1!C$yE3hv6`Q%Y48=y_oJVs?bDG*=NEM%6KV|=i{xjbJIVFE-8Qmw4I)InY1sE`w!B`(AGlKFB7Z&TIeC#NB=;F>HRK8 zeaQs0zkv48_tRCrdRUb@1LJVn1qS4~9w&*&!lkSno#9{P1ULFZqE z{!Ek5evw$Mj|TdD+O;L}Tj?M@Oeg5VtJ!`Q>JOwl=vV0gJ;Qx`zIYw$qi@ zpgu*PNn6X%{s+2=-h3&RFG#PT%eF@SKFiQw%QkQa?WG^2WArFpJR9|M`_R9KUPPP9 zkzY)g(FwYPE?kcGBlNj+$+l=8r|tAb*P=d5-%h9KXT)mzS?8erXSA;Zwq1wZy92x@ zZQT)GL^si^=|1`$ac=*jcX7fgdNEy6iT|CBZ{+vjss^D|!#B#XPh4vNK!k5sl`{6I>G=0oXs80_dzlyGS0Dg{cqc6J|^IP>W za@z{D@1Uo-;Q&4L7C1vMpsOB1`}$jv_s|#80s4a5*q-k8z;%AKKmT^Pk3R1Xc!Yk4 zwmypbhwen)H12-bLw`nx9z*@eUC6Dg;OFkf@;A|6(;>R#9^}OV)ITfEjaT}2x{IFg z<@D)u=n(xNU9uYeeL(x@Q|?9kjwd)hx@HYrcpv7+L(ipy^igz-zLTza67651?ew4H z_4Ky)b38nS`X;)MUM^P4pP)?xs1H4h`W@+#A^2Fji@u5u(`)Fm=TM)Zd+7WJ(EkX% z3tjOK)E`IN>E(2cewwa&9`&Elee~o9(SL&8hpq{sehKZO@1Rrkdb;if)axtJpNFoZ z({vl%uom@q(q8&aIzt!w&|lMwsIRAe^d+=u82Q6=3%!93(Aq=j&-xPT=hAKTLOMv_ zKo`G^`lsm*`YSp_&v+R9m%M`dX1a^MiVo9H(q&=PC+Qw~;v?v9gx-s;cop@h(02L` zI!3=nSG|V%zvw=C7eD$>&}Y*%ucQ8U+C{%Xr|2wQ7eW2@kD@;h-At$H%jkx6sDFg^ z(lI(iPkaphHLXYe{-|+)KC6uhIef7uq_4`W;uH|2DdX4$@2M;x|zrq&w&Y z9ik@&&|k@0sIQ^B=ngteub|7`M*R@oL#OExdb`!=zaonIqiH*RB^{%m6i+IY8^xKn z&7E&QqOJ6IbRVrfj(Qh8M?9-gp3kIPX)o<&eR^fZM*Ua$;OCi-(Bsoj(_g84g1leh zA=DdBz&?6sKbkMi^I?aV#&c>4f-?|2?U{p+-s z{*VsQ}|^iG4AzXtj+y6x4C=eLJm$h_|Ljq?`z66V$j^4sV#dVKyW z=r5SpY?3!^;vRa8^{#d3|99F;kN5ARw_Ahd3(^PDp-t*zn>Z8Oc=;;cgO{*<13f;y zrcK;I-z@dBCKa2~v@Rom@+A3t4qdD)&zBYNf&0VP&{cbybL00kdgK83e!8v^9-)W! zg@323_lIrLx9Wd*CwLFqIt{)|S?1?6&X0$A`c!;=-dl9XOw`NA>*f0ET!7C5*hUse z^&guDH_!w8yogKbG1klNPjmHcyP{rhf0uJHKX1Y?!;O0*m)XwcfnDG;={Dwf)7|?Z z578m!AJe@DBLADN*dN|Xo-0%HADj>GPe-^si|H^ePsily9rOx%ko^tQDfXA416)4& zSdLu#YUT~Jl|F@b(wEb1^zC%RLD)Wj(d`GqTjZgCpA&AP+n2%D(}8Q?m+3I`Kk25M zkfI-!erLLM3)CM=cTR_Iq=!r37w7=} z7u{h)UMknRYI|j_MF0EJb@aJ(8<)>XCvHQ1gdV&FE|PPR>TlpYcp>fSg6|y9&xik{ zll=UX3OQG(_VGh7evYC0=!@t`Bl0_F2gl?0biE7&wLayxX&4^|!DrGVoZfOeLa(G7 z*j{eCm22P4<@+^fyQ$`J%>VXs?wPE~=T*>h+nbz2a$i|B{VQ`enS5M-_s~5TL*M`-cUxxZ)=(=;@Ub>U})4g;r=WnwT^w(8~`s?X7)|)p+?&{ET>!+4>)AuUNJ{IQj z`+4Sxb5WnB1FYY13-mX(0r_F{fP8MX8eg~4PS%H%rT+l?|CD(f`?t(Qe`!0~d+6cs z;3S>B5I(RJ^{qFMhtFraW`sK!x}Ye(5S%;8YO5}vl{SwPrc`M z-+TMso*sU7{hsgR$qjkCs!mm%sycP*)Tt^h|92FxUB~bfinn})xc7Y0+oti~qWBKQ zKU2I%>kqTN$^TXjzg+RY*O1;D72l@eA6C3*7sG$7_;wBdv*H8Vzb~Fg`iq{=_^TAR z%p`t`;`DQgw=3QarkvE$((+TqvlO5964Ki`gW-!6AALIUh~gvP=lr@^@l2K1_b9%7 z1?Bf|6yMZCeCCCuKTqRdsrat_q?c2CK*Rq|abzLW-*3m)_pJV?c+34vf9^|3zj8I> zzeaIR^S@c~g1HRep}1=n@gs`2zJU1L`Jgug{YUp(FBc5P-TEAc->7)T%ZafJHsO0; zNIdHz;!S4|XBFS1?|7Yoe%i%K|M|pMDxUWu;@2zQZV}(6`0z`Jf2erIg~X@4 zOv|I;mnojBc(dYDwfx^uyk|DkKYIbwA3lq?Q}O0YiQlLA_&LOT6sIpHe&Hodf0vf0 zsCfT;hJRV{VXfcK6d!#t!=H62(=U7m@e;*{=MfhapP})$DxP%-!%x`pwSH&6ob*m8 zUZZ%U-9Hr1{0sZj*X{5Y(tA|#7Jc8Xqm}fwX!siy-}eC1e^&7wzb1ZA@u5S+=UGgD z-;arxD_;8x;u{t3eU$jCif8|X_*aUzA0=KKBE8#B5N}dE@q6N**mx%Eb3vH#w`>00 zigQ0>{G#GDetpOWGQe`NT*iq|MUp}0ls)7r-L^EJOA z#WS=%A69%!%ePPQsVX0iE1su#PCMxxx{UQ*s(4!m@rdHt8vpMUAJF(a6t`;l&lDeg z1=ByTgY=I@iDQb>iYFA$X=nItb~w)Ui~RkD;=PI=R=nUl41Wm>8fN~&Iedwq6nqNS z7q-1l@j9!ync3`+vmri#kc~)&sV;>T74dg6VfF z4k=FK+^m!@uXr`i!3uu2;uDV%zw|Pu-~3GKo5PBiKb!dTif=fX_^{%wn*I|Dnf?r% za~1kudL{AE=P>^76z`r%yy#U7UygIL5`ReXR-CgHe3Rns8vc33J&N}$j?W^!d6$!3 zkK(r~K6ILvS8*EWe1-mLix_{c;)vo+D>xoviudaJ_8P_W)1>!$#Sz6HReW$i!~b1z ztA_tZ@x*^J{P~NS|KWYamnok0GUh+2_^ykIcPsAF@TE(bey-yC6tC6zk10NU5!1hF zDdTTnKs=%NHpM#>&sF?8#b+oEbuoQZ>$6(%@&{P{>lM%3!uy&%ZOyd2DD;nO?P5PS@zfAFMN^d~%UggIo#XGe;pH)0Z z(|=F#Lgm+Ut|a}r8oxvF85)03@jk8p&5C=T&GLRp@qCT{km3OiKl3Wm&uREF#q&>) z--C*`EB!YszE9idvo_Z7|5Ut9%RjS+^iS3N!isw|e2d~OzhizsQ@l<4=cOx{{#K=b zh2pJ>*V1PQ`oAA%0Ntrt^qT ziyQt{RoG5on#lYZBQ#8)cbr}Y_EJVWDu zOz|1`By4lsJN(j|8!JZnZ&yk?^C=_ zar%V}Kdku3?X+K>{aTi1E#487`dErLDUK<==^pB*B|H9g#2;0B-=~Pbq4?;%#J^X( zOT(Z4I_7u0!tk)*)9@~)+AF&?e5>wfr8WGphX1{W&)dZE-l=%On~A@pc)o`JQq%uT z(?6!+xgCsuL4xJktT?84`|H@Bt`~d?&eQ37=`$K$d=0}NRJ>_5@p)@W@6a0J*C{^! zH^jFpUaj=+vcq-1>@SMfUdH!guS}9&Jj?x?cPrkXAwF}E;a%4dcPrj;9q~sM52T6D zNilv!_k%Ku@2W8TCdIiF@s|{D93Xyvn(2?MB917Y_%84B+@N^NANBol#jSrJKCF24 zG2*cd=}jCZ{zT@dL**}LE|J0L-ONzJ6Aii7iLCxSgUgCSQjKAoc#E&T6_A}ze zqYOX(0peQ}FZvMi&lDf}0`Yn47{BY|#7V^m^#0y|P<+d;82%f@tq%~d%rSlZLE^g< z@7D0M#>y+z=kG2u_D!$`5?=$;b#S`-xzC-cmdr9xliql^pzPvMFrhgja*ZG43Eexi8KV+?;mh4D9yF@3M%mfth{or<@=h~@p7 z;+dkh zXRv(x6z@Kbc*X{%-+eOay-M-&PUe?aeB{4L@6(DG&Sd&SiYFdr_=OXs*Y#|MU#mDh zi}c^4_|Qe9e?;+ZdS7VkB-6()Vf?o$KJgRM|5wGw&Sd((DBgS~^Xq**)6dfW^FGBR zdf(%2#g$V?|4(-O5b3Rc1Jf^>ApVTvqpgg;U?ao#Kg9CfZQ~O2Kk1DcuJ@B}R6O$} zroTt=U7CN(n;8G-!%RP{c=K7rpHiGVi|HR!Tr9GD%ihfNd$3U~^ZUmY&+KRT$(tD7 zqxmgVy!SNHD=EJ3Zf#%1H!L8%XI#(pTV@l#TJfA06MsnY0j2+4#kYQp;d9@@^j#gq zcPZXPhL-lF(~;&~6SJgpOch;~!P~=`A+?A?L4uQhY-39>vA)bG|zN?M%OOKXF2F&pzT$C|>k3%Ilxl zxS!=Y?Hx>i913~^GC#c70>%M@uGJ!{hZ$t|3dM`UlCt@6T=rgK)g}$ zZcYF6n;Cvk)2~&0^yiHKk2ZdU_%DiQXnB^ti|KDW#PD}0K5>-zhl(rzMSRWQG5)rn z5PwAR5zTL>9sW~>|48u}st=re3)2^0M1EeRxaVBr6^hr+A--Pm8TuZ=ZHl*?$?$(w zyjUeXGrv9WB+e<`@;}s1wkp2q3iiMI6rcJE;zty(&Jv&V9@5+UbmFfo zo_9I%F~yM;#65q{_}ga@zd`XW9mIDiKD31R0Xu#-@o8I_e(P(AS1G>z9O8E&r>F;VKPAER~G~!zor@u`8gx<&aT^Er*?@@fme^Y<`o#N=HIlj(&KjR<# z6meeh)_)@Yq~ho&h`*)y_z#Fr-pcfwKTh1Oxb-&T|5SWH=^sQrx5bdcWfN z8vc+S|F`7dRkt$zjNcL8r1;Ro#J^Cy=ONCS& zT$b-{#cLK5|3dMMS;SXnQke3#<++lUV-KBGuH`!>>B^A6%(#l>e3e?swA zeV^h7ig%yR@Xk*%{i4~#1;uAvP5cELzkvAcPceQM9QRIYX=z!ac<%Fw|3UGl&BPBV zo)aZL;~$uQ>xYR~C~o;1;u{qodm8a}#hY&+{)yrPTAuFPNiT8^!*5Vr=_UT6;;uQw ze^z|z+lXKGX{MhWAs$w|=o`czQ+)d+#NSgqQ`_T(pJDn%+CMrJZ~7MFze(|-lZoH2 zc+E+~cPc)i;XhS;$4rKw`B~DR|8(LtijOM(isHG7pM3}8uT}c16(8A7`SNzfi#|{M zImJ8gB7W9CGJWe8h%Z%q+cx5Y;sdu5Z&w`ML;Q&1MKd}6-|$bQx1)pa5B^Z`sjnb@ z-sc#;D0YhJPu+@dwTMR*AL}9Bqc}2$_&M8|zE~uFz2Zaq{^xGR(S^+K_lg&F5zoDo z>2KHao*e7EAQW5n~m%J7y?6R%QyOvm$w6rZ?(?eRy&k>B$@^)+8(`h!P_ zzos~+;j{l0;iurdg`QWtT<}RPEgM5@&kre{c{%Z^_c46$63U}16fgJy-*^1D9sV2Q zA1K~^Gx4&oGyTXO;*E-T-AlYn@y=PS?;jNJ*Y_b8{2SBHKf?HfijTA~{s$D_@N0(e zRlM)-iD&*h(;q&E@fR!JzMJ8biZ`9a@H-Xndzj%5DW3ad;`4WtUdua(3yRl%m3W8Z z8NVQ&@eRg5`V!KAmEyy1WBA(?@4~?aG(*eR6i3fu{6`fZzks;&o20k;e#ReHeEcuO zcPbuuG2{P6@je`U5c;qE7SkWRlkq>K`1b9@KUCcFE8;WvF#dfPGW{yWYqUI{RNV6@ zl}u+ew*<(?qzsI@vgZH-=TQp7;4M&m0!yn{IW zea7GaTjDK>XZ?)$=ZbGRM!ft7jGumt_zuMrw-7(7c){0*d-pT`*1L(fDW3Qb;$w;j zzDsJKU3->P_Z7vn$cCk$`7n*HHM z#dD&>zg67xR?-{zDdW$0CGmR{FaHGTzxE)*=X{d*VZ}2Qzxihjk9?5fr~jOIVmbTo z#}uzt{71zDCo{kI|AO&Hox^DCB3gGK71qb3m-%{_`6^E8xsuvPLx^y9g6o3 z62I)144-`i@pX!4eu?yk{)^$!JBZgDCcfiy#9vUn|0~2@4>7#utHeK5+_jnIU+^oX zS0cVw@vcq8OMcDpc|TzKisII7v|s!}@$6S{{XTMp@fURv|5))t#b^A6;RibzUQ)bv zA@PHXXS5Ta^IOJWgM(-?{#Oe|e{9wMc)f<_)-n8k#j8g({liRu{9VK)#rxh#{BMeH zNf9qS%J`MV#6`tNFDL%6;+dBcKd5-_0^&0sVfxuGCtfUg2JXwA&->WcYWO03kM(-R zJ5Oi)k0`!D@fQ^zQ@mI4?Y}1fA5wgu;-~$N`EQTWJ`_@Xe7V}w6!$D6{*L0AieK`3 zra!Ti;crnqPwD-$;$2rW{HKbyuOVJ>jOn|sCw`;iO?l#b6dxTTKK;KLf6X!CC5qFB ziEmYWd_D1Z6}OBNKl=|%zf;4PC{F*8;TsgU=7_(oc=k7l&-^3P$A3)xcEtnN67N#H z<00a+jx+uteP42w;+Fqn_(v75*-QMG;sM1g{)g#Llo=e~jXX2mmp zPkcb}p|=q)_%q{Iio_cg@4bxv8+R&B>v_?iD()Jhf5mAhn0~KC|FO#z=l-7I>lCl~ z7vhgAUVbL&f7g!R!}KQ-JhMsqN_Q{MNc^S_~bSOS{Ht`0<_su5$tm1=uzUBvtH|cqhXFQYidhkw{ z$p7VvcjDp#!EaT(`&9bx98f$<{r96dHz)O5sOK5hDjraLqv8?u=l_c0`QIYFv!2ED z$95C1R(#VV#FL6|{XFqD#V2+U|3L8`_3!<&;xkU=c$j+%=^vd-elAnIP;pN24Hq!{ zgNkR)BmRcsjqSt_DxT2rM-?Af$ndkC&HT1rMBJu$CoaMeeqOD3~x4F+D zz34aDA99NK?G>+-FIU|11>#Y~b3adfv*HdiCe>R8<{Ue(GBE`3=Kiz=hJzu6glyjXDf5Bej%V#P6wuUR7^(Erx z;9RK0FMdJeD?WBU^B-0`=f%YDR=n#x;(HX&Rs2iEcU-{m=RJ?~CloJKTv7U2#fubg zQGB1?m-02mx6dKHM-(qqd_K;-O8IX&m*I;QN431KQ`|a_;U82y>q6pviqq#PeZ@y~ zzwK4-O%`rkdKxJPjmex{N>s{U@96wf+|_U>;e-tj@=S@7$W z_^W?R{qAbTGoH=;!cQn3I81+oy^3$s_evjAe4mED9Dbuhuh_=;>lDwQO?v;J_{gh> zf2sJuSGnKX4nI{%e@i>VU$1!QD~SI^@pAR=IUVCp;&1;s_072ATYg0RM#YgI6aQH8 zA;k;NX8JP@GW;`&=l&P*?-Z|nANex}ez~T8ipz>eE@t>=6z@5m?R8l3#^00QbK%D; z>6haYK>V*Isd)P=;`b|FbByu-Rq@VK7=A+We)XTY_{F4m%fpPHQM^&pzen-flbQaz ziX$_KPlF$_lxO1~n0}$+*-vBmCdJ#HOZ;WU(Z?A7F~u`Z5MOaF=^Z|U@!zI+?Q@8~ zt$2pwGa&cwVS z`5irr;d#aJhZz1@#e4rkd=ln0iGRzxiC?LB(R+y3Dc*cD@h26}*ZrmY6)*fW`^Sx# z_k`ZseOb?03mN~!cNo4z@vawgzb&VD&6&iT6{lN> zzpnU*;$JJC`57($OG*Fu7m0tUc;RP>Pnys0eV-$~Q1L>=V~RIw{P!vzQGBoBfgMag z1M|G_WA}d$k1M`unXdO0AJP4^r(ewYx8WilBx~tZ+@-jr_^{%yDc-k~?fv|hG5vj? z<9=FE@xV6XZ!50+J@F+AH2xcjZ&AE@4e_58&)7+vyoB-RRfz9VeCoG}&%Bi3J+CGH z8^w2B!ScOJ@va`?V~Tfbc>Bwle$Ve%|DxhWXOmwaQQV{WJBoMQLwi-EmFXj^N&f?i zkE*?Fzv8*CX7~jbYl(L#j$A|hC&jZg{8b^Q-}qXFU$1zN(*K6y=xZ4M zf-vLH*7B@TJnwZ3|Cr*n%8zG882_NQ?-Ip_6~9IC5yf9uyj<;jr?fHsPQ`tS_o)5r zUd0;~KdqhdH&tOs{n!ZQz2~GbI#kXtvhZOJoD9876qfEa= z?N3)IKKe?^kFw%j!^EFa+|@<=OT}xhB5v;_y*sWTt|%T^LcCk?zN?AxyG8N5)r`ORm5d)(`_D~^Z<}EF9>pU!5MTT%#*hAvcvSHn?;`%J;uGH_ zepvC0ZxJuL9O<8h`?U1_^H(e0Z(U&IRYh?;MEq{Wdli3P@$xXk_u2RY;^T^oiqBcZ z{C1tq@T(OcRJ`3_+^;`};V)V&@i(>XQXEr!^hFFW7~Ik_|HZ^NDqgMhZdKf>@xP&X zm*Vr6uskh_7b;$>_!`BxC@w0F>V4_&RXj`aJ&KoK%JS@2e7oX5D&DO4%%#kKx8e@P zBZ^sp;<>HFVa4+`zm;3m{R=h*|&-)Y~y?dVNKVMY5=<~$iQoQ+d#1AXp{xRYgVjV2~aqmA8U#|G}+lWUL z$3ITIMe%G+|5ZEupBVnI;^LjeFJ4CaMWxrHc(%shsQ9ST|0l(pHT*$4T+aJ} znuVFY#nksagYmxV0PPVkS6ulB@v9Us{1|bM;yrtbU#)oHe&P|uoBK&W4-7p*R0;Fn zTReCxF)ZZ#?;a1HvD~DuhA;Bq>pggf2mjH7&*^s4i+OO$gWu%AyFK`fE8X;x9(;!f zANSzZSGm)F(u0qB@MS&j_&0m-k39H-74G+eR0XO{@d+?>A^`4&Ux?#4}O~mzsrN) z=fNNL;7@z-=RNpd55CWX_jvGkJ@`i+e8_`;L;Q4Ge*ei6{A|x+_)HHz z*Ml$g;7dF>?7^3L@DdNc(u4av_|+af=)naKp7h|q_274U@ZWpzM?CnG9(;!f-|N9& z_u&8V;O}|x10MWK5B{wOw_NLP&u4n@b3FJ39{eH?p69_~4}OIQU**Al9{e{Robccg z55CTWH+%3c9=w&f+TTCv!P`CYzwE)gJ@`L8c)tf9^x(rDeAI)Fd+RqzK7*^e2?J!9lqb=JBCm0 zQ~m?KKjJ%%?|<<93E!Xboxt}fzQ^$W1z!vNLQcZ>G<;9TcQU?b;yV@JbMT#pPkcva z;d?H==iz%kzBBN>0N-;40c-OVq?cP_rU_|C(3KE4a^&BJ#gzKif(jPGUm z7T~)C-=+9oj;|G82wxaq1YbM84t$;X%|wx2Un_=K79T72Jo%IcMZO4@yUJDe}nJU_+EqW zwfJ6#FM)3@z9haud?|csd>MR0_=fR~;LGA0#kUS$4&NBQJiY?HBEIYJmGG7ERq&1D zgKJ;Q27D9v;AYtJdVFud2N%GWH{yE}zBl9BgztKMZ^8Gs_-?>=BfhudgL`Sqf1!>4 zn)}ZSQAWEDorQmU`_Vc0EvPTO1Yy#j@ZYBY|NGzP!QCQ1!22)g|G%jJ|KDzl>By5d z^q0RznX(4u`hSyLk%ph!lISk?^jx!|tCU@zDYYaLuMUS(Bbn5?flO&En@{Fq`SsaS zAwQPMSC$n@OAEO|sk~m|t&HSy>&L9tf$UhObVaE!UbL)IW~{J2V}6PCt?W;<@t*ElN^f$r0rZ<$Royhs$x?1WXqLuETCLVLot-4 zv%iAuhWj!@v3@JzPqr)MrZJK%k0jP7bK@Bg1xIqLznIHbNZb=kSYWmFk0o=tYci=y zq0~KA%ysv-CSo!C<)UHA7AutsrBHX5v(lDQmY}6ptnVtr)(zRb6sZzUBrK~nBY}xj zA)WC_*`GwiM#}ixQ#xn<4vt%`rA#?fF)i)lhO=2Lt3Oj&GCnkvDP__PVx>ls`QZ#{ ztwf#M*@0q%gF(56N*R>5tZAGr>dHU7p_Hv;5}9%;Swsok?Hfouk}4D@6O}?@&=fru zzbZ8rRFKF}ZXB(jFXS_0#mZ!GCEKg4Se#F-ykHlKDijDy=Di`i7N znRKEulF27b#Tu2Yt=SkO0aa^H=2IhuQechRQCNWIGn z4ss7aXM^V;8>H)7ZID7^$x0=YUO7HCm?;G@wwfS1WD+C_gQFOOiNR#qJxUynQBBjy zH0zs8lndYj=mm8z1Wg2|aS2Cr}qV&nZWNMvZ2giR1b;_m^5QmU& z-KmujZphXYf!sWg^&tlNJB=e)#jusLfJWJr%3D;<-rLf$W_YOTk0R^>R#y* zo(&4yrpXqUA$w&Ga@Nw(XrcZUUD@Pt9#d5|g-OOl>+CO#mr|J(!zHO^TmOolLdqka zvRP@3sH8Qbp}#GGKZqS&Tq-3e!HjMd?U7-;u!YiH`mi%d!*Q6;A_R z1EHBMv(k!WQKmG@>dVMf zIglJgr*Mi5PrSCCLSfx_amjdU9a;t=2=#F^rn7vm3_AOaVsTYw@^PeyBNHoHr5DSC zJ5p(9|96)YZ8_?yMGn(#0`pBt22V#ay&e)Y;}Tv)fl*d# zcfOn{RaTn0O=h+T|G8dzJmxt^eCbinqBw#KsWxbe8`%Cvq{T+3Por}gCMYREZ;1tq z7wp-gy;gFG09C3l-b^WF5*SL`N~hJT3`F1d&cqQy+J|$Ze@y0sYRY)Ocd} zjWtgc^ey+OLHDTF0}&-$o-Buk%AH19=aR!^SG%~Y0ixN#LFrV^er8#{lz?6h##^M& z6G`pS6+E;(>Vh}atuc5fm4I>R7%EQfTsmm?Xv_+W!d^&0K1pYdjg9p=XK|kt(P6|( z6btECeEnF%xRbHqxH>y$Y~|EVDO*YAv*Tl3nK5Y8817JAli3+iH)t1$mi7Ce#Gl3SfcnVJ&m zY&liR${G_gRjSh;axRHFu)xDYpt5RUS+r8PHjA>2dis4l)fd0YspNJHl%N@j;Ho4m zSP^DZR!3iE82`$Ysz14gUq=cPd?mxlA(&+-or4`6>5R9nJni0*E%#)GlQ}88(vG93 z?saE}3_+D1Wus{Ljs9qj4_14nAQWSuRROJde55}tdV|%*NTL8N^Nw@}fpDlsVD#}2 zKue$iR=boyY7>uFrdI+I2#0C}Mw=l}ufj)03S(|b8im#djV(Kz&!lx?^a#q)KBvkp z@v($hGFi!#G1LuXot-Hv-CD>cb7jAd9hK%oyP>-^GjBvonPQ<-Nem?`$y~w=RO$3i z<<&2WQ}uMl?KD<}c%_6nT2wGIxkj>Po=#-PiiyGOFz7p{8s8-CAhY=rWF{}c0 zY`NuvGcO+kp;kknFH_7VQyH;qbyqTDo~|+#jrOt_eR_;an8UK)L9GEL^f)6CGS`h~ zpg{I#UoUdpBNjT1IGrq-V84#n=@c$X%7={Up6iX-shR%S=@-9|auaG@YGkKqe7)n9 z;<#CAIi+i;^;#HdWKG~)>shkATAA5UTMmvnJ9&-s{>ekK07ddudErwNL{gZq#>c=- zr(h73+OhC(Nrq4>a#&nml`p%JcvCp^yilu=751d#(`bxwZMHIE3}AjStavF?sZ3&? zg7FpeSjH?=FftrtIOHG6tw9Ava@k#iB6+gIs?G>}k&?Zobf&Z-Sz0GOytY(@DZ$ZZ zmT;mE(xfofpUI?sD{2-#*>YlRJXZln(2LS;BVs6|lb)y~@dDOC1DyCq`{Is?I24NI zrFYda&dITmw!gPI>w18*Jww?%l8O3}l@+bAy^40^A}lq%qLeI-NH5Qn{Hh9LrV;vs z$#lY~48DRt6c$oWGmjrF*e-2_$Z97)YU0n;9MuIuona{CXBY}KFbstpxrRf3ZMniO z@?_RE1w#foe~p;@VQj(LHau7;eFRAKl|KjStNYK7Goz^LgNrjuR(JOEZJjWQ9q-N`8 zc_vp}ktvsx!x@)FUrN&f#YAmVCNAt4x)_NLjptLcjObprUPhs7$qB2LQ=EWM^omSA zQ_7|sEfdfN(Sbs-2fGIj9(id&vKw;ATo6+_V-rO%w=YvJV&ce%`Ow~$;&3zjDY5vn z^=|%P^RS*kG(bniw=I>?kXe#70mU8$psNynd)`wMtg~lUS=bH@(2h zQ0sE6epcm6nH-w7DpE~bc^jQAXtmaQiJJo1)Wf|b=X}U!an`p{ z-CSKG6gHMe2W{539LvV90x#TLGqyRW$sqgvuCgsCBg_Y8fK{*ZI142wU#rG?vx+S86=f^Dsyee)>|63?^J5MR1$U5 zyMFFp>QKwU=%=zUofvqriApKS&ag5$W{;}6@#5|v9%*38IOyf`U=v*SPc$`?+I*Pg zPwPpo&dF4&M}siOs5x@hM}KhztKH5oKdR6+F$6o1dyFB zRK&uzB2yIPm`6}JO>5}+c9Jkk(_p-KC6k@3)8rJ(kB>obm`;XmGy^!7KknX7fhH@n zLfPvMQ7gCqbf~54gzy1}>v`a2no2BRm8Z=FHc}4y+Y@~*^PLo~5y^leSCc#?`jcIv z!B`VDYD@IF=b>8a#)ThklDfl!;^5Y3fbM7KKG`I7=W3?b7L$I|!&rWlVU)@x3geYT zVJI{Id1&^={`V#;g_x9i+MPw4;V-lN1&Q1Enmsq@BXu5bfw2&%3gBZ7MLz z5EMR887?JZruE@Im|R=MLMMb^inafgBO$b9wI$JV!!bw*qPWe}jy{g0F66vrHa)UV+w(1xj1SZK*<{|ngOOmhRm+@!>cG{mG3X{l4&-H|x~EmR zzeq@;0?R7e3NA7ogT`&f{0jM38FwzI?FzRQ)Yk&DF7; zJ5MlU!WeA0=`3$wHjrJOEFIH`U1k^QxakiLufagV-YyF+E2V%Ag8gDE(N=_`s2}Ir zp?DwtQhF;av=Aut7o#T0AJ&hb>TYV)IqYfk+69eMdB-r0*OV-Z4Q>>yZ zv5UFhbdLU977IAg633}=V^gpC3SkI2X8QmF*yHV827fI>!R#G5JW@fy;lidAjG^7# zAg1R2jS{kCpwtTNH(WeDxB0=+o=nM1;u?EG%xYb10?eFL*Q46gnMyL73w6{)h3&$F z&1mS;a2^&(38P%73vO1rN$exmY)VldN@89PCgoNG>PJ{3%CJGhpIR11%@vZ)*Dfac zWC&A$S2*@Z9BW){{dSdzE}>+2ZaD>}PX?M$x|;YQI%C&0Q52JpxFqIWrS_S4qP7@z zM>O)+>NFcskOEy<812egu`N=>8=D&Yi21?c3sWyQJB@b^MX^U&65R^}GN-evLyiRw zC=X}CmTDMvN`S3>d1Mw`|0*Mv1qS)fro!Aduj7}rT`irl7eaoxST08oL|vbrBA>9$oX1g9*%B#N zs_uZrqIbGs6GTkGsuIJaA5KxeHmRAjEu72$NmTB5>2LAl+jD2E7Fp!#wW(@vrYKzM%1O z@l0*i{KBNwM|tHHqt$3_CNoj)%gf|UPH@C4GU|JvXF~!q4uxfw@513B*e2k8xIBqX zB{|p@^UV8p5khV51l?s(HFC*G_$>q`=}!)2a+6EN<=34$C`mhtkK=X?MN7vx%u!fL zh-P5S5oV#}ErT5i(%U-F)N;iovArO#Vhkuf8995bKN)v66CJ~s(Q!gA@C}&nAE2$CT zVKdbWx8~tUhZTlNWxNZkT#_1lh&UXE%@ERN30nJigcxgrJdNgcgb zYM3qzW6A1KjjKU!S8VU5wIwz`9`He*lOFFf*!4H#4PNMd6-`hD}B}%kA zPuH<5y z65F)lvML6Zs-d08(NF>DLt-siYBp*s*A`0R!;B@Vk5O)FQhqD9sZ-%l3vGHXhE{DA z=#wy0C5n|&H@N9J*0K_6mla8KhZ{@lT&9xg#$1mbOX=!v-Y*63mo(tL35!jK_kXoT zH=MGaU0|ph>Rz@IJ7wS?Tm)#GkTX2&;pRk*>3aP=%DJl&Rwq0ioiWVu8nCsA28EYs zK+y5O|$NU!G(9oCBLF0Wi9W)>F1mc z&e@@^`{j(3wVbol&v`UBXNPL%morkj;VT?rF23@rJ17mA#Tc=;+JFCpt6rTdm6x|nMd<< z6;gHXYt9P54=por;-Cmmd#u#i)nUa=R9I2e(UP-LIr^XvXSji zBy2ORnRW;p#@H!>SZr*b=)XE50!kmo_DjW4*??#fwCOT*-6>i_4V_cw+6Hn+Npkmh=->zd&4WqTy?o`94DLKdSh?9NxP!o>Wg$QJH zNc?MJaAv{2JVf9W-6CYw>gOaMP{+tR5y<|mQom3Cz;?`G14*x?$-n8MTvAONw1xCq7;$df&2OVI_*6_k2I5Mz~Iy;#scSSYmRv0-ryz z(342ay>N1jP4_y~8Q?l&X9C)32**uc!85EKXR3L#8V|GryL2^DqnXo=)OhNw#zKDf zV6N}@)mZQ;YB<&L>JeWe7BkZSU+*EU>o$r*BW;G(};hJNDN;u`TS*2K=Ds&$9nK$jYQ%{`LnAN_}K%m=^ zo0&S@aU0H^wW!maJgW(-ED4-Ft5Itm#;$1H)KZIUPjq(2aT@JiA|+ZQTa6pg(#_AG zT8?HX&}#HrSsKou8sC2k)xhSaNsZhw1bsk;q1%GtnH@B8Q}dq2eF`T&&4DIGkG&_p z-fKe8*gmt79pX0KiP5bcdLXVhv!FI%|E{5eVEATVY*xFu%*#L@f@X5Yon<%^j#Hve zO=4O!&HAIR-!eZHc>$q8s)JaP9GA5XVzt-E_{%}X41K2dcHTtk>0n;1tpkEFkJGGH z7&8{!)ntc9TpzqecgJIy1MVCD++7(-Q972MQ~(5;p}6u6l>i zT%5}$%e|$=ya6+)eQ+x$jvhG7M6%5+j+mpPu)x+W#_YntyU1<^R&cpoZGa2hQhlS% zC(^t|r%GarbHv996}dzyusWu@Nvp%nAt!yV0Okr?gHnWB>CT5(*qqH`gCGO(xfBOQ zvS}Pb4(<&Rxyry_z4izTn`yEQEUg#VaHicvcn9?s6G7GA5ZQmOd&+K_7TbgL;S-pcV@u94NS(~Px-F~}(xcRg~m)>YaT$8mtivdSa5 ze(AqS+@I$#Vry^+mrs@&g-frjhI?)#vT3;DOgct}9U<2^lSr~$#(9D%vWZkuE9Bmj zVot8;gmHH=e%0jU!j^Qtv5*@wUS9LE(HX{?*U-gY1B@&COT{Cgd3xA$us z?t{xrP$zKg&~Z!Egn2JetEO;YPQ{t+JSc*=0>OW@l0Qb{^fEq`pI;EFBWO)n+P_Thpsrxmsqit+`Ax*Mp| z6j&~ccNe;*2X^+CQgmWMuDx)wua(Cyl81ctf+utTvg77+xBl*T3wmcQZFKQ?rO*W@ znpCBFLeNo+YQ#vsAm;+n6WqJO?yljGE44iAljn~oE&R*J(xG4-P179-vM*7q{+MG zLt$+E<;%G3z9jyZwWuAKoN5~}RZ^lc_hcrZ=v&1k9K2P9mQuD+ecE4e8B7s<|}g0g0C>tojI)K6yxv>Sx8yLB^+8pDZ;!P$L^H4R>16h*?NmliuUoM=!>8XpTHH#RH=YO$_>Aq)dMTg4$sw`c+tu^Uo+jC_B4etmVmlC1(QNC-2S_! z*WtEpRLsz7&;sK8i{&9NOJF&q%UY3Y#YiG3cXQimo^*kL^|$7w~u1FH6R7>?79%b0|;)&t<2U)|DC`dK@p%C;h!x?5}{*`X{v3gs5~ znqV=I?5xdramWM@%la~AM(@L6Pb;zlHwnnUUc$c(Yn$PU;EUE)yk&@ z{5yc37?j)2RTB;2H5(ybLx?rEOBBR#2FVM~WT z)J&&~2Q*EygFmo4ha&9Cxc6}gx8-Cu;0%d2 zW8Kj6*1dsPg2%1!UTY}-cJu(3SPCv#o3wrPW=?a?hx9zl26tPwODfbRZEJPao@bvf zuMQN}+@)T*XoVwU%*P;?i>S@iCg+npT=Fudo@-4q!aWL{>j|rMY0?}bYoPeMrdYUU zN>?@$v2&$>lGF(*wv9~Lp1HS9srO_-?wN7oN4Jg-%}<*i@gdVFN{QPxd}qKe)765y z7iPWXdM~IwDqXA%nJXY{U7@bfQ>=(=A7Nk8VP4Q6>rALO-gdCKbms1+YY@1~r$iF* zQgV1K=_Jx14f+Yku+gU-b!gDX!dMHyNf-UVn+UCobu$81!Gs=fKq6v%bDkg#({nL? z$IO5;x}Ysa{JL4A;$UsPK2syrC@K!I*>VCEb>%7pWZEjKq_Lb7VN7X|e;aFU_Q~xL zWlvln)s(h5P42X@cTyOdQ}iI=i7z&2bF3@x+%d9~i+~ zuEQe%N?>rdopkAwE>+_})n@N2T02dzsLhIKCwsfpKR%fp%at>=EF5ytu(?4AnOux^ zwdAg)ue(!pGCpPY@^qqA z#A%ABp;q2uV)}Efw1Hw>3<#J5YB3wI5Gp;(1I4gv1osof!89!Otcc6?aHvy19+)-v z)-++CZ$MT11@6Fc_E|^kHfS7+iH(qFKa?Gb=9sN%S(h3Q{dnRi%#;=3P;%45Qv#wB ziIoj~B83t*AVZr!RujEL=t<^PFSrpm=x}(FI#n3MIj0Ox*PA4cLqfh|(c+c`JA~;& zHA_mJ0)fiojfggH_Zx3ZV4}gDBf3U4jwuc<`gZ9C6qmtqp*(`R(Nd-_Iyl=I z!~;%%hN0ioy%Za+dl00d7!QTm(X=}5*7a^JIqE4BoLrgd%d+2v#d_*}^ppJc-Dw9qN`v4TML;qsq#rvl%|7 z4sHBrvn?20)rfW-u^jg=UD{vkJWVx{CdU&&WjxcJo~DY3$8E46r^-j23GdL;+0l%_ zR)^Fi@0=!JTg!Iauw5EN$kwxS*`a00v21SAw|-`t1uw&u!a6GqKZndD>^Es6sDfDs z2NYfDEycksEc4A?tm*F#-bqCqn|%^eXFF(+2lhSk!%zyP`Kx+Rzvw{?WNw=?m0O)` zkg6S5Dy5R8^eXHjaeGt zT7DBnTvNo0|E4+X=z52%1m!=TU_ntqIw-`a2^d&y~eQV%T=Ua(LM_ID+~WUoyV zSMs2P;jSKE1KgG7{h}ZPT!X11>YDxy_i;U%?b_~Fg{HcBv|qefC(KFuG`v;P=3dui zj$ZD5WMz##h`)l{DQeDlKMs-RP1v+l(>0{wiap2pPg7JZ+DyzYQTTa}ZE$6jGz>AN znx;`>yo1TXjThFHXSMHpHGT(`z>ou1IW2mUvMwQ^|=&UXM57&%WIeP5~=9g7EAv)!PUtbKS z-XZfgx89RE$||R_ihZReg>>~MelwyQv>y~PXHf#PjfCK}2PaE0wq$4^w!0%$9xSLJ z@2JwZJD&HbBqwOrt6L_)U4q@);bo_4vrCh9 z7C&6;)ZJZWuakm1)a&{X78}oi=<8B8%feF^^2+~--)e!D@!ZY-|=})LbW5&rFCwW(OTtzZ(PE>!f6&E~LU#FwHXsCk( z{bVewk2mEG$mxmE(Z)&pQ}Am@jcC%Sv3I(DQ@46u6XWU=M|`>W$aE8N!x*hzU{sCE zlVP?KSGFwI#?!9cVC@#>1ZMkwc^j!ta~tIg^-Z^yo4j7_A*B*FqZ@}z38m4D&y|m^ zI7Q#cy7rAtgc>fz41_gtQNW^=WUc|mjuTkYqU#{bttN%1&iC0?? z7I|0iPlO?1^SpF-ml>9xy2y`UC!q4mK)jNIRR87}+Mn&n_Un zcO@@l(1Pp>F5csmPpa+egIZE}qS# zGD`+ZnG7U6COG%1(i;U1b8;xl#dA-bc0NoW8#P)$XEhI(sn?UnxMsq->f~m9ltzX3 za;dGRcpD*P^a3@uz^g>YircW~%-WmS6t6CYto1^zwR~ix`S_<)4aP+nDxtqy_AAw# zWL=f2k6V;9Q2VhD}^_aUAHv-O7y#GcvT2bxl)N`>5P3&)Lj!zAfYO zlU#`vY5mKp_^lZOv@qH$8Dp`Y(Gc7!U9DtW;%&z2OMerxC(}f&7@cbe6CD|MtXN^1 zrPKUvv_ik)G*Ju|u8Cssw5K#%cG$>0&wN?e&P)K&h-xk_H=RQtuI0l@y@>?48^ZLu z+G@&XPhGn-VgxQx5;8G7I8l=)fEuE(HvHjR!2~yf^^CuUD`VR`obrG z*p}w9P`J(3k7aKEfT5+5O+i4F zCq4A}TAF4H;?)T1YwOjW@B&2bM3}LTSW*1Xv*QqCf~imKJ{GIfc6~IQ7%>L#KV=F3DrHWy`p~A)8*+WxHO~>vMJEa!RVvrG?@o_toJsJWLNzA*{}F zkap81s#jzf8Br6tHJWaX#;>x~@#LS}N6X*|^DkodyZfteZ=pPOGrQDQ z{2j28#_es@xL!k9Kn`tJ$WC1a=kH6rX+w2JUIE}#W^dS=?0kM)b5FyVz@4T`JDpFJJn2H!C9GK{+o#6F z6KFk7dmHCJSwy>5T%mweC};dCr>bErCrjn#>jB)2vJCgER3EJNY8H;JW84SiK&x4& z824{;?zXmUsf(N~FV5o}6ik{fC|W_T4h6L znlxQL`i`>u6GdH{Evz5m5LoEz4i37GZllGB$le(C!*COnoOpMtKT-}KZb(8|E{LX9 zNxIKbAPG1gyb#b#jl&Dxu@w(vv)>X|rP6OZyS~hA{WwM->2!PFQ)s^gsAO;g-&$(N zmBCl-y$!o=JNOdlx7Q?Ngof$OGP{=WuDG*mK5fx1t!K8;{aN8xvktpv)E$}~nNeEL z_1p`}iNylix=yoDD|chfMf-v1->Sj{^rrf@jIPKe%j0^NT~8)Ij7QS_%F*l)^2q^G zMLM{g3&j0Aa%+;TRr^L8Hh(AT0$KD#dq5{L=N%h0Z(rZ~(Ew{rts&G`a7v$cHjg>T zZ+at^8gPMb!0g9gm{4mD;tC!ig%Yk4Ol3y!sv&xoLvVxjbyWz-JW;dFqVqsA8}~}D z`eY%a?&@o7{!(w$H$M@oi}S;|OuA76`(y}{9M)O#Dr;I@5@GpqD-+7?a5o;9z&&EU zL#U21^2%9W@WQu1VNCcH(?yI`vAUJw+gF+$3vmqI`Z|OmFl2Z_Ues7_iw$$zO3-u; zw@inFo6Cb0m4YHxFVaGh`q8(ta4oJH;uvIAf?N%hmA12{%rDI_Q<7E z#&5TYG8ZN#|^ZmnQc zB9;HW6x9T2<3pI6#7-C9Vv*Ye9sSOZ6~U2D-Ni7*nYy*;pid+5C#Nrx4`sXpQd+kv zpR9WRJ6WXJ`>injI*Qn|1_zynR-1s&QnN_{>a^tYhA>;Z-FX}_$)=a|uBaap&Uz|= z4pDJ?hlViYs72UeEzTw5V@2GAqsJ!oz7(GBUIBOXLaG`P>lur*cDC34 zXN98fjfD0gxz!MteB$xjY^ji!hK$IG$olfbEGU_{>|XZkP;ESf9m7_%i_<>T{n`zr z0}{e4fa3AZsfYvgXDVUX(PQrUuUZ>SNmr~~rO!V(8E+TJcul4oMp$uX7uRe8mQ|r)M-CXt< z|BP`frJSeg8+V`B^p6bdmk1N#ilg&Ze{(R~X-TWn8XkxHz1;})ymT9#>{O!bTu~@M zt$f_=3fZEfcav%;D{U3>+1~ zSuXY54jX}W4q3?447>PbWkhb}T$q(Ny~nNx2_Y0LUwJjPG6rb zjF%11n=QAbHs!vu-`tvPpE_->;IcS0J<7e#YBuv~@|FGM_VjX>cIJJqnApie4C)x% zNqwh>de(I~$0AJg*<1lve_{h^D4Pmc-~{&ov8<67rZFE+g{ma3>{kz&+JdyXDuu=5 z?kI&C`mafrvf@SGWW&^`$ldUmM3)xud_XxR>%Qy8VV7*GcZ73TZ8jA%f=lb<-UPNG z8e$MT8Dp@(8AE8XAj45jFHP0}ilOiF3wn&b5;9h+ntZ9v!BH&b3u)9f8qi8RN;ylSpj(c4VxO&s*XTEa_wu$;`Ohw_>lKYaofmP7B4NDS5lqaFuzQ# zqIT2_a`s7ruz74*mW@WWu3r-ROqD)lxU_^!Pw~M<(=lvq*0LPRieKCK5XAh3w_ngF zVJF7j;Qn3GCtW*lzTmqc=CB8PNWfwN*K5@;7D9E41*~*j%h@5W!2B{V!^n!OPiuUT z-s!sbs*b_U@fT9m5Un+!^!(b$q7EP>ZL+AHum!-Qt}vb&5uwHOwtDVR+g$ESwqUNW zI&K`nv#t3UP9^kTftq{iiaA|Dsj2FW8Pd-QRz%Xx6K>GDVFI!1RsxG{YWK8g8l zln=DHbz7fCZewX{mBpJE%d(kVy6NI}cq#BL4z6PS<}sgAM7=b`D;9^*4`~a36zv|;(sp23y|@hQtOKnCPV6aPfup3a^*O@{&2rDYY-f$;vKF z@TF{#C2eZthuckU>$2#RN0OhUJlZK^9qV(K0H~8;S-)-+h0(4#*(1&o!i=CnhT!TE z+s8lEfr{)bn>}|92(hC=Vusa?qh=&g+p^OeUv4y(y&9K)nLUo-=r%T%RF^IxQ?6KT z>^sXaL+kL9%sQoWVVQ=blxCq7YK5ebjWpBCI7wmu$&YV-nP>sbrBF{&>*TH86`Ys88j>p6GrnH@-gBAN9?!ydjG-qnUDzXCBiQTg<1d zoh}2NSt-I&Vk*D`{DYX*QAiY~wR@<)GFCA*$U$5$I+fv|0qLIPU?%6=^%^D0tVbfh zZs09UeR<VPCPs=?l9**T!mFiH_slX>c`5hSZjjLjzt59P*Xb!v<)<>ooULm)WNRWxVx zeJ2Osax!5&ZjRkeqw$%}KBhB`3L16}U<=;7-rh7n`F34(l4(9e9-LZ&GF+}c}d9=sKo@T4lZdw(+sCv6f(+%4$Hm^clCXgOn(veO!-?ThiCPsI8 z_S$s(rn6;Q9$TWxe!BSz!|~ourI5lafz!-)ResQPNDkI!xg1SH`QddJRSAJrWROPX zoXj~L;(W?gI8N1Uel@k-3?H$VPeUYistK%EGXwiC7<{{Cr|D6c-g`?%go)c>(N?QZ7*8!Vp|Y#Ci@^v}g1lUYjW%;xS<|h8r|RVG0o;lu=PyKr z;aU@{RTuYnH{UjOq7&9-`>N9BOH%6}@_JaNRGDO7!^T6?L^}N0N}sc4C$ccL@ES8> z<#rX#=J;^a#R;pe88_G#;x{_j37cj~8yz>Bt#;IOaab0c&Y&}2-8S~KZ~0Y$LRzNo=5yh! zfFm^1OcIZG$HiV#BWh*~4^5~mc-1^e!+|lv&t&AEr?~By>z3^tC3iU_ z-%x$>l*d0iG)SG9$!ddcZ3O&4fi@|#h2XGJdED-Xj5*ZbW@U~>tHqqeM&Z5d$1tBR z9z>Grnen+6R`}Xa>dV}G2J*}SB#6+)73AY%xc|`<*}($8PUy?b4x5?1c_qmm83(QA z3nsTGRgX5f#;H1xQCWWcrR|ja>`Xh!fx8VfD|uPjY~4exxF#^UPF5PFaoNOmmd9awl0Hr8y{T|$>V9Qrh9~&W`7D=+tM5^ zm^9~1Ts`24kc86`G^Zz0OdPW@LajR3Y;PEy7Tv?m!wz>GH5519HKM6x-t03y{-*03 z!=};`q%nQ9K&oBmC1JXXRlg->tgb%Z9lmU`w+Y|`J&3zc1JwY_)LCSJ^CX@HThS;O z;@A`S+sF+0rw-%pcXKr!cVRH3M9WXZ8M}JQ*UvdDfLa|_DgICP|MHC!1Js^+ulvQvINGinD(6d27}pY=US?&-8AG z#$tonL0{kg5MC)wrr|IaukEavTS4m>Y78&o^+RRXtt76+gbGi;>X-u=nD?)S6P$S< zx3?tc6@8NXqz+e8Ef#+iv288O!tpqScgW?sj*M*0o4s>G)t*R=gHZc=EH!XC!<@_r z4JOOkR04~0#OcPF>n5Fr#5udWzf*3DiuLs`N%Y5fK?|sQE&Pmng^4Mj zG-yRkzfz$E1Ict(4;18eew#jSTA?83I!4-5j{RCX$LL!L^IL7j2`<@QW6mQgVCXbB znKLcQc;pgY_Au4QuYi-QWwKi7gT3bIU){C1aDjm{Fn~DAZ zGquJoZt(&(d9WoiiR5pqu7-*_sv9Y6NF)>*E(O9lUzAEwR&V;i-uJ`O!*-WdpPKXLsqB&9zF6~ z$WFT)&O1*X({xo$lcTD#qL@x0a+FcvLdTV5dOqNNfeg%MxG2f(*w>+Nw1!(VSRu%( zfVvx==1Hz)uodFc8n}I#<&Hzc6fp%C5-tvLY`Yt?2VCj!dW`$*wab!jYoV^Q3sw{d zzhzAD#7LiPN(74=pQnd>%LY-Y%WUJo>=+}yg$P-$Lf|SJZqrFM>x8}hs&< zvcxo8SMt$OPy1AG{Tlk6i(xu{KXt|Otu%kEd-tiX3XC@`=3}a^BNNC8*BRX~48zqS z?>Oh+oaU?T-R_JaCyzsThr_3paO|R3%uQl1%#k2q2w;%83N#IR!l=6BqAW!9krmfF zq;@SrAwB&Jzq-tD0cYkgP2)n#no?&+2#~UV6}FRjX;!s1J3{j`!=dU~05hVos|Zo; zVyHg@Z1tyRIA~3%IFyt9okqUMZGPP_F^NIIr$U+Qd^?5z*s6(3*NG~3`L{3oTEb3=phSRO{EPbTzJQG1?E9bx1UuMr;cD)87g#&U%7c; zz{#9W+!I|%4&Y4%(NYbFxlFsY2&OZkb2>`m3Us;`)B-$r8`myZ0XbI+W8ji!Z@ejt z#)TI)%9b2Z$3OJ$F44j43lW`Vs>Sc%M1)&->~|=QKee4mhL!ie8NaqemkOYd^1#>y z7oaMAbA^M>fA6EW|-i9Xj}0s(G6R+Tl_#3_2A=$9?#CfY}h^Ek2Iy0^)}=eDcqyfTH5G z>+k+m3u|URiACH^QM-T=;W(P90bf_Zfsv^z0h>|9`S6@GiRp1xgj~f85;M<+p?F5e zYa7@gUIK38po5)l(s)259&(OMb94}Q75|ADI z?hMPY9cAvMYl>+d4yM_abFkW*bF&pRW{a&6~3SWj;)QouMeJn$B3 z(7$!18WJBGPh4J{tJ7coxi1G=VL)0ZFM8N_#ksm)a5C>8b|rhfBSEgl(I+#oD6`wh z(*gZQk?r{aXTfw~gZ=PEeM88ZpsGajtzM`VlWeAh>)6LKcqVzBHfa#!q2oE9@0q88q)UwH+6XCSJdC8_Qs;P01brQ$Qu)e?54S$K`eh|+;%SdhV=*~Mt z9M91?1ySH+;onj&O|mxXLUMQl3X}Tv`HVh$`tlA%-||PymKj<|M(r!Wv692&D(+*p4rIr$_Lt*XIH@Sk&Uh2tJcEFRbSb$Zf#){h zI5b9atBEWsUFNVHq*at_dPiiJ`R3J*yQ@nc}{yTmgsSsS*c73}wp+oC3l11bEq{VbrF0gPq8n z_>}d-RSyl*-DOrtwuY}|=-^TkXGUTBNG(a0Ga;;qhH(}pQ*s5x;>+Ue*W>T?P-t8| zqSGAr#kG9MNLVC9tg!HD$#`m=@qs|McGbL7SXAaNag|S2if&$sBF@J;qld!Kj4PE% zlrgn#K+e{{VQv_b*zUC=MqDlEXpT@SXLWE3+V~fj5?PedlIzU4v2txTjcb`j7`7(b z`IEDH0qL1Fql+%=A;2rH$m0MCQ8VO7VP)YOSmI~_tChk_1otf)clU%=1hvXcuOhw_ zrdO44p;*Bd{1dZY_YAfg_a+`K!>T?itNOaSxH>^9_f%$G#{>`apUS-39X)~lV9Y6} z(SM%8+(Y6q%UmaLnG`(4m*tYf>QCk#hFXq6Z|e;<+jjmbn_|6x(k40Qn|S&aAN5vz zwAqULr);X?{z;pxxNqWVRb1}nNS3D>nHp=V3faTmC@0}Tn7MmPc2A30&eUDmp&^e* z#Ws;iXHjIDF58VCV=>QAt>yB>gw&KJ!nEU10XTzL?&67D3n<>;l=RKsyqh3~S^&Ax zDam08h-HbZeCWUd$xz~`(q+0KN%adl#M&^M3R$eZ%e+2vw9v!f$=u5bG2n7QLMVF;9<_7w7# zxdFD4NGB`yX=P61Qa&8=s*G5;DIRCd5-`icK?m1ViQyR?(V=wI;=(2Y1-YDQ7~A9E zXNBaV)Ww4^W{l&$6QiI8UKp;06B2mlj<0&l@_|pNS*^K3VcmF96hJecU>r53o#Y`JFsa`ikguRWcq;NB~%6K6VXHX5xNx}DW3?ZMn!-6%4d9Lrg) z{n+(LmLztlbsXo8n%r z_Jn=E1sP-LN2JU~w%fND4kU-`YiOd$hAl7Y6+vfrBa{U@HS{HN^nXLWtvg>F$D5&r z@e2MgJL&bL+j~u5U?jfMX5pabnE0Ib!xI(!G|N;+c6Kaugk(@+dBIT%i1CJYaC&0Jl8lvd1R^xWLv_fr_ zg4~ljkSUF2^Vm<;t4hUO(u`!`{1Jv_$TgbzGgUaB@;L{qsS+CL8sjA1RHp3%I6t4u zSPUw)H(<2>&xXB8MWGU;*|{g2$;7u#GQAg zdDs(%G`=v!PF>R~tdN+<6FF$UiNa6}GQg?eIpT#guxg8$5%1xNYZ#sgan4E3n1x`QUpxWUm>Xc-CA_vj!)cG>ushq3P z%v=NU8ES=Ck-O_gnu{Jn6O<~=Cor2J!7bYcy{CG2jJJEV!Kw_A=`Y~X2Vz1u>+aEZ zy{1l^3Q9SS7c{E#1TWFBT5CplOApJ`ev$r-rBvW94(3PlzA0kOlcDFM7;=plimW$i z7lzmfg`xiC%d!<=4o@fXCIQCxP@$t+S0?Tv8Eu%4G<0Ok)fX3dG}xXW?DE$lhg;!= zj00g%T%3*|H9rWQSu~|3i#+uMv)Y{tMlRO4*H*8hcsb~zgWZjZ@nRp|mX~2XO+pUW zOREjKX|gB;P?w>syyF#ccFtR#5O=HkGf7OUmdu8+2?#52MJjAe?cO~%?Y7Domm7^S zH{fC=+zYQKz3tUXqXOwurC0esA$OclL7|L*_~6dd@ekl^K)! zigA3-)JmSb^YXVrql7~J|elI22U3SgwdzMz00~~Yhuj}Kz=SWZAjhqYFBd)li-`pFI57afg`+8 zrH#AYpl~ikHu`|m^BF?J@Azg=6Cxk#&VA1l)=(hcXA_XvNm_ z0?8kCwNez?vFUZh}*V9lXn{(JbC0HDm z_ZVNUX>;7$F#ti_zo3S>=EPNtC2-YS)M7Vxii%4!Bz9wIPa8m^wD19GXdIlk>1h}d z<_XnZ=MBhTidCy2rXZ_fW>vi$dKi~DRizq>_E_BM$}OV?OHw3A=`i&%La%B-hiFP$ z4Jj2RLZCDP1YnuhWL2Qb>ODZ&J%v@;LV#^jh^iV;Lek_&O)m~nn_c8twK3>+tizkr zkqCg|B4BB%7>L^;_vdF+qXjb4?bma(8M26d!~10TC&Z`dBoT}(2Sre-z@TBY^kpB3 z;)<$h$S1o_I?!U2T5OvF!~yvFatAf2bF0!p*jDp)A)R+Ef629SZ^ty?*Fegh5BaZFK^lJiyv!|&2%gL=vX z+uSY1iL zoXE`tW}}|mJdfOPASD*9Sx5d0E%U@7Wosy%)X7E)#u}0T@|$ldrFCUiCJ|~arO%Bv z3T||YHV9fb15fGiERfUl%&7I7yTOMLKR3Eit;S;k=HrV+8tNBV6~Rz&X;Y~anbwimG&o&rYVSac#nGLN1xC;;mqlzL z#KL(T7H({s>CtND2sfRKa1^-!rVsZaGqC>L{1VQ%oDMA=xjh2Yrc$ycS%R88*{0!& z-?7f?fdf(r*VC0g8K5RAC+~IWWA&loSdo(@3bRKH&v-0<{!}zBJ2BGs&W;ldKzE03Iofc_=GI_LY6;2PsByEkP1EXa> z&F9cPoOL*v)eROqjJS-kHuh8%YykptU^$y=lk6*+&J|({sD>xq1;x`UpPOd9jb~nW z(lC1}d-)#Ie?ppNcIN2UtoWvLoGxA@Q{Wm-CzU!%OtJPTau9oygp9B#p*%)h2UoAk z;U1zbx9`S%jo^X_b3n_B9h6y|CtPdiD&}7_0=e7xZlt&= znMYP+5xN4TomO8_Lt_6Vm65Yx&mopUi(L4;|ep`i&?!yFo)LPZj?={u_djeG*w#&n=I^P0j9$Q zp3RV_XoMaQV8l|0BCRB+mt)asjlp|EDJjX!ut@!`5xVKIRNKeG zt0?ynCfrP=_!6A%a(>MknZmf3B_N&5hOgkv0@X%VBLcGgQo_(y4UhTKF(eSH^?s2p)uK3p|;GYYC2 zX*)(#HDXOZqN|P_t7vp`m8;f}*0rE(9(kToH>ktKnOM_JtK(P#k@KxA5ZDrG(r0~R zhJ+1Bx_t=W>^VD=`!(@QJWf#P2-Qf(5$XtX(ztzFmb}}VMTjNp-*W}!{(P*;Eo3^U z0ib)i))IU=YUe|=UqRlBUB{{h0yr(qn6N*G^9N{Np3qP$$~-J zUQb$yh6^w#6JB92OWr{Y8<&%j*ZS;@ksvnDjIglu3yvfy*k)TU+ia^~nV-$e8{ad!xGwX5i3CzARifP#2wCxHN_Ln`b z&A@&IU7Epuz*W4#dcsH4nq99!0$KU}IPc$?z3zO{neLr`QgXHPpPH2~jJ0OD)MRbjfcNa-J!4!C4Cj?aL!+tw zMw!qZkRvDAe5WkcweXNm2=&1H`Y>a1zN$G=Aw$y|LcaCDm6l*Cl#b>2+J{4tUFc=s z50KGF6?{v+??}@C+^dp)TAYGYH;W&`x5}xR0qPpR{^cHBJ>yAsYxP4d4QS%35_gL?x)+C04O9c z%`sG&%sg`a@*WZH;j7eJ=Np?V>D#Yq1;H!mJM`S_F^s-PBjoyrk4Q_7;y=Ra^m!Jd zAOU$DvVS21Wnp|tFJ)ieLsD1&LR7$Xw0!*|Bm*oaP&GDgkep5-JX1V}S{i>+xFStU zvJ0hZ(82ifNhPly@noo5HIDK+nR=oi=DMWZypStNOgi66jnr0i!5~A`jXgV~pTuGV z8r8N{!&&w2guG^m8q!OdOF=i@n{=VM0sPiDx*x8mt1$nTa&#z|Pr@)ktSO3Mz}Ia)mfda;7CS7*!{vpFCunmAJ9fU38_^ur)D84V=}gY(~qG? zdDUaR$s!4<8Wah}7+G1S^*)6mbiP%%M9xa2>V4?1@=+vpwh-u~^!l0P;_Wh0m#S|d zD^@2p?rE(30XCCl%iZ9D&;Mb*pEZhSi{m@IFno!JORg&d%?UC7dw^AqKikWOK!v0T z`#s?B7(-D&L{Y1$ z_5Q_?%~$Kaq|2Z;n#s3k%3VL`vyVq*Ye!d;H3P?s?;TqYw&rvvor1l-jSUB-EGINh~ z*YnK}fgCFb$sfE9%21&nO^8Tr7MAR{Apl5;NcY!Hp`{%{(ws)P(^-_U2XwuWLrrHy zA`Xg~q|%EJ7lOW=c#_Xu$dPYyEAEaS8o#%t8`LhMujJNQS3|L$K-y#W6|;{U-SN&hpBfD+Jz+p3^h zS-tuuw-^-Q8Aqe%Rev<{aD&h8Py+yB|0<8WRHADmf?^e1n5Jd-J z^d*%3^f9W*J~KK8t7Ft{z(&6ia!G2|#IjV6^(ub7-OK7&j)#K# z@l<{zMv5Suzvy2q<15(16}}5c--jVK_4_3K7h+-KaX1{IPFWo3N903w^7MzxbsUAy zlX!(fR`eO`AoRyfo?sgGKB-^w0jlt0owYv}VIlYTQ^->IP0_gc zh0A=*zcA~45=GxIEyf%IX82n%J$`fc>K~+bjb89Den)fA41O;WfJ_po{Nsgpjy`@_ zPvk-VJxPWrEeyq(6f#-sl%~QbGSdvHbKwf;50hwm4@qjK7Cwt+U`_Zpev_6?zgg*y zX4Ds+KLEY!->BM)Mq=^Y>*AAdjH20$c44-j^KV%+{g=N)64&HACL2Fb5)6jmVM4|d zhXjA16!rs=#2?|msZaDDAqq;-Q}Ph~m&d>#vPtv@=70R3i_@Eb`d`A}|856g=y8D* zZNd;v{|8pM%C-vsSTAMQFdLF8!5Z<%5QT3j;onEml4eF#XcSBN18f|7IA4R5WV6=t0UpFD za$%+eR{nL*<@6yG9u+4-;p6rll{Nd~`Qe|JS7)b}Ff)Z-@1HMH9zL)a+(H^eirPzR zssmC|rMq->j$$^t^1vFtttojWEp?!+Zb9Nd<@!Jhy=9rbR94)g+ti|}GE(una5^-;^@_V>GNkG9>h`SuM zfL>XW0T+8%J)U)+z0T3mu_>qd9rKO*^_)2<-<$ezC-etkD>bO{TxAW|m^=V1&B^)z zK%}~Sj!X|QRlS3Ch>7c+%tOpv^JpJp>P@`i5OeOxTog!I^TWJGc1N=d0TsPeB$f%W~}V zK3^Z#kl8Pj$1le4wsljh`mR8Dd8;eej7-{3t z9UQXp7#mq%^AR`lwB|!@W>(F|tl&b;2iujsXcDVI`UP=u^E+IU36*{1#HB&g^(Up} z$;&#lwf?(IVUGG{4M#A!g@o7g09gPz_+=*`@4LZFJYPh8{ygb_{1+Z_y%Bt!iXXbg zPm+v$Mh?_~^dHzB@pG}%ypre5qEgyVuuzh}PtC65sEgbjmFKCc0t^JGfx@GBx%r6a zx3zOyT-1FyLgeBw?-J(T0WzN`jn3t4;U z%+GL!veJK|Z!WBAFyg_rb*w7?T3W5n2}BRI*6*J9%SjLseUE zQ9T8{#DH6KPR+~IoXTc=kE&MXKC;v%(CDR{hpFC7+kn@Zz`uvASqf`W4hCUJO~8d8 zbOj9Loq4sOMz>U0VbPn1TqLO@fAFk8c+#jiGu10Sfw^a_OegsNuk=-DF|kX(C3RfE zbw?3ZYfu$j!5Qh$q*GeM6>WX=u(+D6n*+8x&uTddvPI#Y!mWD3w<_O`Hh3q}4CVhm zUsl2cJ&qtiKv+xIFI8tah5G>Ux&zqQ*Ss9LZd5?JvfG}7T4F;;bf8X;hy!Twvx?W` z!A9Ee`Fw?rjeQlBYL=DJa$HUiqV7VauiJHGGpnv0^m>VYp|HoUP?0d}Q&hyg)X;1ye4uKE<)@2tu7Jac{(HG%B)qfKfv{!C??)R@n zJ+N%tx3odc_8dU!4QsaPCF`g0WZTUR#}$aUX3eKk_HEu6EILdf{E9Y13&{j$QE-mO zRM>lg3fx48Fk8`};T*9T^v?CO9n+z0WPJ2pO*v@B!W^YK3aMDBEzG(O%DImg4&`yr zQ6CTAQ1FL19LgW_I%jBWHdpl8*T6e0}`D*Est2(({*h9#}UuvQm9~t4>?5aN5e3 zAvneZ6o(zwaCjo+)e?0X=nC_2I;vS6$ zu?xu4^MzLbnv9Wljl~5Q%bQP|ZSi;YjpQ*Y0B7={Q!5xfW zK!C|aPq7bZE>5CzDtsM!9?fQTGRe__76jo5kxr<0CEVrBro(lIcH$xDhV=QuHttsM zP^%qH0~PX@Dp3`9%>oNDG9lfCtfo(uWRf0Q)9YTjs_p06W=$aQE%`74Win(8=wvWD z=r1}qXhk;oQ;y3A{Mr@|Zv`LW?t>}DSMHIDc-j_q(xdyJ*n4XTx4@sW24rYB)QMtT zY&l!4GldXgXE0d)`XG-pF8m*G7+5gzFJM)M^#ECbNc`J;eE*W;OQ-nlkMFJ!)5iVT z>_ETf&n5EmnWw$(nZ+|fZaG^mf=t5#cqRmvde(N=lW>~v`7X0peibrC_nu%uNN2ttLq? zk6j68*DHKC=$VwZykz$<^Ol$H9%kPDq&L16_X6!cFQJV9`7kJ`niz;eLc$L6IZ!z1 z+IG2lu9`N4*Q4xR7{~nZgv;pMEYM;#Aoo-DHCVNj=+IG2-H`447{#_Z8r$}c^R~Sf@_O;5Y0;~( zglvxNCa$LtHnGI*FiR6V-1gIKVTIenWFKAZt#jZw1=c#Qo*~+6ST!T4kfOJ`hSw-K~V6XgL{2mF!CQk)UGesM$Q6 zB3Br<6_wH=t<*Gbv zsb<@_!g-?L5BlHpjdCo$)$#Q@`75os`m*!;W-u16T^q*JURcg#N_LgsslBaTR{&Vo zl#V%Jtgx*J$g0>@XBNHN3GLvX7WONKYUek2`>_xhk#ml38cSImvYVq^7Pt3wQhIA- z8(K(zd)mLteM^%`<9lS=c^cmdTTZksf8TXFn^zN?EJ`=!#P2mG{#`YP0t?8L2k7&; z(_@qTska3iqj!EV8#^G%HZlGxjMe0ixI5@t&*FV5SelgSsNm2vQV){vFoBf7*Jfq( zkOtA%Kvhnh!jL{X;Hoph^Ytyl-BC1(_C9&0Py;uVh17D3WIPRJMKYX zBERmL_;o8p)$)1?jY}|WV4?^705Y9?J;nTvZO((WYWZ*6}CE^A~2aOqF{2_)z$aLM{E?1x6G-Ty-( zS*90xzNDHDUM8#2BSpp=$w|ob3_9n?pXVH5uO*LOaxNM0f-ryivE;_m9Q+aI02s;p zQaFx9q0Z?wavV|1_32frLmQuV=g|W)7j*!)@l7jvq|_4}JYJAp`_4nVL(T}G=B3=C zBR)ZGMhVbIHj);>bo(8dcjFiyiU2%dOQP7wImkw{QV|HMORseuFP$zr zo4%L}1tlU#G5vaGl}1!!w6X0t6?n!O)5{55J+1?Z{)2|$BtHW^SmGe6SxX~B2vJg7 z7Nt9(g9X@Y09LXWyw>O89JOV~l!-k-%8Pwm{kCy9ER^vX&}l$Ac?d(3B8pUBl~ttk zgZPU8TDf4_p2YM1il$4(R1}|Z5mx%Z{cKwm)l`p52_=h@0SAf^G$|^jUP%Rk95YKR zNaa5FcCBYrC8gq+6#A4k7EpB@#$BdfrfX0k8u%&|R*K(w)UR4Pd!f*zLUsNzmf&*H zndK9@pDu;L`FJg`p2}gsSEUm^1+_+hEgv0Um5%;eZ}d{GP&1689Jt*_E?wKsLsuf6 zl`W_BD8;rWOn!7%Xx#Q>_gqfnUExu&FxDukrV9 zdZ0xzV0ndLUv($R@%*AJ>7nWPSc$?)rc@Ve#?dL~9prCjr&5ItLs-kMHuWL;^dV}G zg)Bop^g5q8K>CzQiVs)Fl@2@)4OK#}aW#tiA2|pQml zZ8~Z~C>dfh_S+-PLmvui&MerLn%5Q6r9x8GGy00KpYeSqmqPs-8)Dz#S5P){$zn<$ zZ?-O9b^J}GaVbB58dcfJ$xG~k(p2Pxc*w?sLI{SV>h6U-{ek9dg#i@T(#2NzXzP8Z z+6HD|F}p}vXiq1is*ZhViBO)v@^tgq^y_Qlh4`9%7SONwFC=7B8n4&wbEQSt9u{Zo z(KmFThdH4ge#9xh#sh=TA*w?XZ&W4*zOT1gkr!qwYFjI!Iu4?Sq#(4meC z+tgIetTvUM5?G;F7)8g{S;hntvRBF%zMMf-t{}UNFoZ}X_HnRIF&Cep;WLpPx6job@!k#4Eh$UUA^M~h^X()kejdp4{eH%6ck*Q;J>=a zNE2W0p|)z^$Z2JDVxwgZ??K2MU7L_IK2?TsfSqJTtss& zM3_YvD|wJD+D1%qE!m|mJV*_A6L_4Im-8VXRF0}M*UmCjok`LQK$FhasHtgysRPtj zHkeuiZzVI;8hJZQOs%0C%1X7yd_y_O1<&DSB-gAzKt9?N7(2^G`+*~sLUxpk_6*IQ zWuiSnw;y?EKfoM77TOPBh8(1%?4FLGM7*Rkgf0l^rIth+_NX^FRP01O+SI)-O*aZ* zl9q!XZc%ajOYA0m)|l`MqMqObhxa<7+VVy`0DDpOb*jBl98XM+AH0o#)|ziy?&)+0 zubS3U+hx5Ny&F-9quo#h{M=@)77vf&wAvD4-D2#%tg;N8e@P6J`;jMm`>mNB| zZmDgYa{Y6UE(vtyPYSz5;hn)<(E6VcF{#f}yqUS!3Xs8l*&G;qInCih`S|B4165RR zfpArYp394uQhhslfwxz6VMZ4;)tinf?Gm{h$tB{qr(bVB^x#K+lr~H9CA`*)IUa0Q!qafA`tfAb!rGHHTO52Su=(Qr<)BGC);`jkY?eBLbIjzVApkYbzjRPwl?A z*9jg?seZ?X4zed3+BqoLcj@JzuBV@J32n3<8B41Q>;lz;`M~CTHGzf#E6*vYxKylhw{4SAeTshab36YdG06kPVH2- zk=VP;b3{9#s;ud%4G#NImZopmrNrlz^N&f-IG4fd?N(F66Q){IrEZ$H1FnwAXA`}o z7M+a`rb?w2R^M)+Wfq=IFngN}IB&RwjTWCbgr(d>^oBs}YEyc9GEC3P519H#p1zV( zRMbr%?btn!G&$jd-%^f5puQzK;Ks>3JMN0xMkk4M$KC_1=@!2OV7D;s{b*W9LALFG zA0f14`o9hjgA0sO9y8&FB5uUlrfNVrM4Nnsk8*xF3nzGG(Z3=eVYYgXx+0XpDV3@E z$Rd?5a;1tSc1c;g!B`%GR7`&mDc_JC>!dl9y<}RGj)QaVESMg45yiX~e@Y4k1%4jS z&=3K%i(6G=;NK%zOd)?k3>)Y zdlJ1+#uqXw(2%~MBO5H6D$+Yxwx^2FIaqc%w>>&r&z9&2eyWrE$OOvSdxfmO%US#A zq#LBaI0H&IC!N2{keqL?%-n}m)?GBS*ETOzj*Q*iefRk% zIRL&Z0DOvmG<%$*k`FTYA}~d~!LTTxv)kH64tVHdzWNeH{=%Z)pYH~k8tgiB-Znq+@1q92GpJ4w}gE3-T$!l_qdMG1Y zkv#J?F3JHMEkUxH%(5yk^(OX@{)s0mYL;%I+Neyo<1h_TTL#V^P+U5+6V>*z{UZ}Q zpOKo!Cr^3na;L6@8k>z#g11J4R3UAL^_3jmPrD51+5@;#UqG8txSl7IhdDUKD++ci zoLhq2Z8#GN8cB%J1Ih}mUeR_%@yZi>kZ?l$@~QbqGO>?RMA2&Xis~t&Z@2Mq6yYd7 z#KT#->wHMdA5$BPS~^_N%?(RT(;PykUdS$QdV%#VRr}66l78L@lW;b5rp?Ms%eKmef}i-G zL!T<2x`y4iBmThD?pr=s9XqXTv5h_(8#!vId^A1kn};p4t=4$6#4c0=BQ^5{oK)eJ zTQAGEfcaMNeH9NyBPRJvmBuQ>2avWVwF7RVdk_xG8|k9FLe28m{fDP**-0!V5iJ*1 zbeT4Bm08s4H6hJ{HxE2fYO`q=LAE9S(&=myk*-;O=^w?>5)njU0@)~BAV%VTx_Fs! zt%FOytG*nr!rAcqWVW7#qv3Kmnyg->8iF8eIn_}aP2A}@11eV?pC}=yK0%VEAbvaxn>C87$WED55ff&O6LLrTn7r|A_>uShU8g`}-tXoexKkQU8%F zHLNP>pcK-Qj?AzWDJ~Y=TtBOt3bt+``dR4&t3yxlbm$3Q_cMVKrCdDqc<}kdMms-A_4*1?ol;`=d`Y#R%j8DfL3|&g2MHrVh+{HOp?jyboy|;m`+9vFD2Pdp|dC* zE}el>E3f&#&p0G-Oruf+2*w>b<8{W9`}-)4=A$U2@Dkzpina9!6!f5@#LbZ430wPu z$T+d!JT&%>LqEx4WxdAe)GRQ!h{=3}aKve}iux3vR)$ClwYcVhi#wo@YMGjRMnT=) z4IvrS1;8mz2NERpq7vM*q)j_{yd6Pc1woKu$wy>}sv)Ab8v&%f$+{cDWB`QCVBDCW z)A5taRhCffd~{pI!8urC#lbmBCau)~zfKOabC!*<7A$bOU)>lRQSt5uZNNR@zG4r` z;bPsj*!(0{13-J)Gp@J?&Sf#Xm z=y_6|^uQB=DU7CBbClZIxQbGm;xC=fturVSkJ8qi)X1cQAmn_F|1D^R4|r;rDGT*S z#cfU^=dJoGGsTy@#MRm zfkB!!-!#Ia5LiB2u2)pBY#EJ%@nm%Yc!vqEiK^4uRNN)X8Tm6GF{Y>Gscls^S9WSJt_U&XA#ec%wIo}=~^0LFS9f&?43gSP%%V#!+W!ps37k0Kp{=l zr3=|WVL)5hgoMm_2)oy*VCU4 zH2DSv>_D^bs=?J&LP*#w+)S>EspMYD)CqU6GRobOCPqa9(_{HF!j>N6pAfiI1#9Rf zdc(a;flYP(+b>9o%w3ad_Ksl*^^) zweA-&!hk|)$kP8f9tXj3GUs~X!SzTKsgh^EJ09h$z@a>Z6o`Z@_8CmRpP}7-?{jl1 zE!)cW+AkZy2h}etfnUg1yMdIu!Vug&k7FpLYZ2>hS#~em!2`b?>H_~EXQ(Ie0m4eX zdhCCMDIJ;)52r@p_R- zw3@(FU|0!YMCj>f$k+8SCtE_FcSzY0o>lVpjU>Jrr~rc@2T}u9Oa(GB%3$BjNeHa~ zKYMYN)S9;1cj6>Rz-OStgSm9MScZ>LRs^BUS%GW7c7N3d5qjE9GY^je;x68l`VgK} zxL2HbG$2$o<#Ml70Fa#|_P2251Rx4OroW}g{c~5KPa!%!wF!U`u+|MwX8?5z@Eu|l ze*p<}_@X6_?`Z3ygSju!VuBlxTz@!?N5%FdguKfWex&?+H3sUsR6OJHAb-P6zKG?P zGGdZ=nqz+AYT*7B5mp3fFn^xJi}?&8#3j$kMAA0Z`G|a2>`2+YjVEv*dmUqrTk@V$ zC`G5dKPB5=$qFmoi7ve^;*yocO~BCc-f9LNdx6IYVb=?U21N1MWW`l-1w8dAOuwT? z27<|0M1?{x&BDXYApRnfjzpt-9*o*t9D_{QeA^d-A8@TRI5vU$J+T4~Q0j&lJqPul zCjSiMIV{mz2z)KoYdt5?Ws#A#Y{$sq$nW)hGD0d3asbG9ce|jxo+V+K<)3VI+({A1 zELL-!ZX3hW^8O_|VUE~3nn$R3l&n4v(Z)1Nu)XQnbi8xp&w?+XP?e`Wno2vQXG#_D z!uA)+Gn(C)?8BS|X%p}b#H-|=35q7;02?O-J9~FCyBU!eAR`I;ZI2=$047gE4g}-< zyU~dE)U{|TnsosMQ~vs z0OELyF;2#lIJ`p(+&?S|qg)}YBvR2U9Dx(oGWn)5 z50C@?;UKsIn{n(I+UyEBmk@IHy_$Mnl1MmxKz{hunq2_b_o)oEj zA6W0kz6VB{Bd(6{KT*6;I8f-^_e4}N%AP2EiH6^to0mg%idCC_@=s|LRX%ls7{)jv z(N@izN;Lj`f)bYcGDu5Wq45?;6gv6Q$r^BpR(_l6YWhn4+JG=j*5ujscm$sx-Qpsh|@>f@?M^HEgC(lwfvE#YLInF>sz= zxj0-L8`nqDnWo}nEoP;XR7$n7o7K;1B2>Fn`7qrSbXY`WN)IJ3t(l7?piv=`ju?G>JMh9v8MvQqKt|S23%c+>Q_)EnBUVE}o*| zc=pmV>a)U?&>+B-Wmehf7H16??GxU29{S&1;J{1%&t;C%1)~lvK4uv&D#( zw6n$g$A%=7!jtWbMYv(_3lvHgHNogM6CN`RruOQF#l%`R5njHW?w*fP8-ajdq8CkT zMAQ<|SghHLa2SUUd5mt+M4^^kFs1g;Y%I33u)jbH?+>WD6d&Q9#Ow|!^EVh=BF_I4 zzojmxlY5LS=_K?xfA=Nq;EI?fG=h{LhE;T`E!Ral&*46a)u+6x);@Zs)zj8e|#9@i1iaBaG1)$^3P z!kxxRW-%m8GP-A0mBY@WDeR!L>?Ct;jlpW+Gd-Xix2JemH@~9MT7Q-j1*p#Iy-sG` z31GC-7PFdrs%4%IyxfW^or=#^WZ7(RjkZHTzAF(nT1hqS$@1*D1rmc?dM+z826Nzg z$u#_Oji~lq%mTC}`u7(N_NCEgA9%*1Dj60Y?PzWbB>aQ%8=Vf z1e^^)*vFoov6$PvZxbXY9vdA%((@Vx+t(z6BIiwZh#@@s;A< z&OKcgBbbe7G`Mxg5qYbq4T+fY46~hj0y+EYi6`xQarjUKI%d@E*Wqk>44?aa9>qNf zL2LuD^gkpysnK7CNsyq1M>NJ~s29PD%!WY;*Xfj$&T}uVp5U3a_=unNE*7IT8>vaH zESiK^M9Kr``JbX!ZUIfhx07#^rD@~lMX+1k{gnq7^KuD?X!+ZS=ZIj)LPlNw8QDbm z5=we+Jbi)n0vU1WOe7ULR99>4A*oRaYnq{#id1V-p7lOu-Qi2=s{wUc!a7|#OjU!B zcB2L%V!eO`i#M$38oXBP=-zX1tJgZ6&lhC9A5+Z{tOjs$wqMDb&>UuPk*x~1z2{%NP$lPPA;7Dk`>^4U0A4JT92pX1brizNx&C~kt0BnF1W zVER95rszLGTJR4`D3zl0H(4b-Z#XK5cqV5HV8D%_q0EfW`j#?{<{A6cT$s1BU@B*& zr9%NKtMp=^P#9x79h^R;uTLW&3C@xZq7DHWzqN_86)u^59xlI=g^eqGe2ZSgVf>I# zv=b0X^ye)n(I|SEBu1EU$Z=Sm)f&-ujBrNHm%*Q5|08WbpQOxIi+Q-656T=0>45`a zcVLuPUl4Ki=|f-$jeKCkrCvU?YgQ?g@*Ch?;OUAt543eKG-Z_R9go!|pPuSv@xt!( z=g5)uj)K{8_4*fGyQxr{4dr-9i=KstJPsXsU*%M`Bpele8DXK@x zKUX$hHluF}KbCXD@>5Xig~@6(duP0N7%0$#%P&V=%J>+egwzdX`^&#-I!lzd;8^^T z%_Z9fd9cM=a@!bDaQE)uTXY|}S72%6y+_N8SG>m94OL{?KdK|YYG z17ZrX>`%BllW;sqmeb)YN<}FBNMN!Cr8)+PeuFtGG0Djutfd4J^_&qi>h*Ym9O~;{ zpYsWexLr?dT&!bW`1`S5cqstA53|+j6saraG(#ks z#T5S$E<==FqhELQYmB&Ks8n+={3D#o6Vvnw?lQ_xOyvn^p1!wUB77r~2=}YO z-w{GQ7IW>r+)BR{6)$He?NHqDe8t&gA!7Z5!qrMDBzY8ZzpoxUQiZr$1mavJ1n)}B zAc_KusP>fu7hvm0IZAEF)nSg4(zx}z-iLv_gKg8E%_|C5oem!oEA&EQau7rweP5#N z|LtTtj)L#YctcP;pHww6RDO6(rz$K2b>G_w6fyt@p6#(emHv&0k!lR2LLkY25CiZ7 zA+XItAneaOC3&_8|eiqc2jKr9{P>8=k83gzU@rV$~hLcr78Gf`H zgGZC?BM3OkQ&oQxCYPO&(Q_%`)g7L>BC7C^cYHvJWm8r5Sz0%E-5Z1sgZyLlaUw7H zQcya>koB&Z-7j&S7e>rQdHO1!6-9g~(?Q{Wgc3rFc_ZI|s-Kh<`@r3b|CPG|$_p;} z#q`)*6%M|)VOF*4bznLpLd-tTen(O|xUlxcWcV=0MK~EHaFG?3nHkL`vZz2hJJr45 z3}C&)s$wL$HM2~>X85XA^O?m0-l`Z0Ekqg`RTLx1uY@X(xL4vVMxsQEe5x~Qhu6(? zp0LrO_R|Ortotmqw8EoeggZW4Po}HM+-u)LlZ;j$a67IcJ+XfT=^Xms#fyblC@zPK z^=$Tf8b92sXu1z`ZQeUJ2soG{vmo+(Jjk|8ZXhu&@JldSGIC}GxD?|OOvUd5bTpHnQ@8>)=#>)(@IKL)v;kwBt740t)n8Ql@`V0%%UigZ~@!4 z6ejnNWko8!q0GLUmg%+AMRTg2y4<8({-xIrU>d%ju4ANIKstCbEbH$8=`FauBG~d! zvnxcwxD^^na6G7G$^mW_#dGEs(L6RCb_m+}qIm|JMPJzF8QDQG&){3q;n=E^Z_u zpUBh&M2l)AZ+Da8F%A z?J;KSLf3d+#3(+P6Dz%zN|u#KOk6A_%noiGnoQkBqk6!ZE`~6I1mhaM$0^;G8Y$W&`n!^r|_IzPwu* zO8XZ{=Q4VZrUJ)SeOR~mgq?BiImR; zU_NRIQ*>jv436|4-lG@cvPsO=3u{UdRHt6(O(S8lb}k*JdUS5yQorcExzB z8sl2VG5HlEvgGjSqwJ^W55d-C>*sg}7M1qlhdtO4+Kt2wuJfLV`v71clB}!2-Iky; zR#fB+=(vmh%#3!9OCN%7CF{aLl3*ax1Hrve9hxfibPJ~+-+z=xuZh21P;CgqeVda^ z>OkgKL*|nS6(`d;;xN^sP6VQqUSOOrXyxjRw^oa9CmdYC27@PZ|B%V@5-v88*|-N6 zKI&v$F1c*DG5}DC2Q>$2(dz^wD2dY=M1)Z#B^lndf>F(7jxAVY*$4sjW5uPuGx|1- zndTbV%G6T8MG43X7e`{Z;k2cUd7liT)!*V2gw!i5&e62w>#2u84VO>8@J`rcsXz}I{L1R+u$W-FNY3^XRCqe!i2Jh*hHhf1qAGF6Zsl57CP@D&&x`hWDHn7VW*S? zePEM}R6Oe@UJ0U-jnQ&ROK$+X`-74#=*YpNP$B)i$T8y1g|eZ97&ww0oPgtIKi-JQ zb3Hf@^jIx{hYRVb~jAXXg&szwOyQ9VA=F|MX)#!RANf+B#xVqhGD z29#i&j(0@{Tf==g8rT}7{S!b{u_cSw;2tYnI5jSW!k+9p(?``+Xnb_(~b=h4{x8u%67^ z&tKs`^r;~Vfh$wGt4{(FZYGKsCJjtVAHyosLfno488JQXj6qht9>EI-$3v|YfM0j* zI)tlsaH6)lw@(dq`QxDNzVaz^W|CW(zG!ug&%h z`5iLvBdu$O$VG%}Qm7b()}j2K6O*isx`*<580vB{93zB_US|XR)3qwur!bqFow^%l zv%^C$okFzOVsds<;60d}Cp^W6mXgI_6WUj0en4BaIr0DzMmkXw^qU{71D8k1E@#ymdigleYP)m3&ZK9a~ zQ!TZTx1ol%n|VN7h69nt>q4b;oKP#7DbU(>z+qf)-a?W_@Zk;Y%)q&1# zwZ#}1miy7s950=vjL`Bmr3a4lA^n`NbND=2OcHDHmfYRB? z*g@U_gdTg4FVJ{L!v00F$xYIoajR3LJ*ca!IEx`YsH_a%Kf~SnjbpLd!hXrgg3~l# z$C{+{>C`4+J?^#B!P)i0_5Bra{rMQ!P-z27tlIz+@)sa1BBV9>=MhjQ)GiZ0$TV;AEzVJ9%z09-y*awH=6v)SGJhN zO5ZFNVo*nUYuh=Zx4<5d%EMtXY(@{BNqhhE^6KpL5(UB6cVRzF@xWyD|4?Osx6i`X zrB527Z1C-Z`!ry~(HkL>l7SgZ7TT2ao*BUT6d&>PLaeq@qq%@$>pTX?gPT|A+*w3R z9fjgF=bD|!dkAxWbQX4kJ97H#l zpK#}Kiu6_=Bp@O<&5an125YK1jb#fvH2Bt`NwMUeAZ>O%n{!_tYQ}<4j-)7$(BwyLjK-lTIN)7_MlPf8Xb&K5X^vrF$R@0Rp1eaEaKk_2<~j1deg7 zg?c^QFNw5^I!9*F1Nz;#QKH!$s}E)$5ohyu zqK%j}9!kP_Q5~OoWZQ5R8>g_7Ra|JB*9jC)9M*G;ZJp9<)4Hy2RBQSZIl&M;Q@mK_ zY*~H@6L>Nzk@cws(&X~`8J_t7=CQOwiiJRX?Y@6?_jLXj+W15;4ZlvKKiAM*3{Xng zm6n360uW?IlfFkPDTm|d+_iFo!1Hx#5@o&jKf=n>htC94DRQ>Ym6KPa(@(<_|Tx)Y0{m6L4s$+Yolj&x2vJ8)e%1~eb@KtwNM z8kWtM_f$q(`G+`B8xiAl*B8P(aQ)}fyl;Vf7`{yjrUg-SMM^if z>SR&tI6R+Hug}rx^x+zJ(&#m~$?Z|e`D~@qjBH@l`NXr<0ij4T$N4lvIY`XWC&W0Q zR$K>VRDHoIEgaB*D5H&<=#(+zg@dc|*0NU=oR6%`dlZ47T$~kk5ClQpASOg=$nhsA z^H2~HN)VID=$jGq#*9my88oDi8`OOwyU(K-;1Ai_RdfB2LFcKXRpZoow<*N$bE93 zdQ#BOf7y5Qmwg(akyArFL(3t9k`&8@0>b7cgIV%NL90$B0(OwG?xiXc@Wr6gpY{Df z8Tq&Z*n)+ai%8|n<3q$`AU#}CB}yw0z+e8?<_ZM6Eq0&D@Fj~ScYjq00ZL*5d>GGP5h`f-rgkF%q6houj`pg5gzJQNkQ ze>Wsm|>oD+rLVlK1Oks>Fa+Y z73$DtFP17=kMby6I6@#j6}=ryi?zV_TcgE#zWRF@PspmpWgwI%rS$3&TVbKreuGK7 zsN)J_^-&8pEU*O>5yaeL^sBfsYtQQ+=PZfLK~ykF8fq&Svy}l$e%qWs(rbO4Q;~W! zkl+9aj)g*|7p3Z^L)oZ4w%)LcMCZHeMr8KDVd^1186NTl&Z%-9sZymnYb$TLS1vFA z$eJI-%+chAnFZl(V$05{MLQYr$x7=(`Jc?67vI2Eun0o|(7tAy!OT_HVdkl)tH5G9o-x z`5Q5-<(8~HY^G{b@V5LyTfB$${_Gnp`acVpH5^&?>19hT$x3VtW)GuLE+em}lLK8UUYGsh2Jb6Jf-0z8G9X9xs@`~ z%KA{_sN!TIwa3W1mMn+h7#+#ED-|@9qtNeP!B4Dn%Z#|;%d?cJ5RKL=#4zyfCg3W+vdd$dz5{lipih6k_p%AR1QgZ+2gbU+XbV7>IBkwa;hvDe>?<)yy&VcZ| zc@&M4|6MN-_s&;r1_?`L*`p_u~?38zsS5Yu!25<4QZlhOy_oBle*ISfJ=toL*t z{hlh`j3(`fx63p{vC3>dpVKL%zqAkE1wMiJLEYyNUh6`Di|K zifYt8R~OBWd$0=+G-#GAKO>*zF&hp+uC5IQ%n<5a_B zjw07Lx1cv|;mYV5*OBkz;*@e|!ztLar@6{I%$^Z^PTI(5iXmZ@o8J_krl}kV|tNAj6-0s|Y=!p`e>q(9pnsB(SCPp;o zBYXxDBTdnUP-Qm>JsDEBo3h7K>Zah$`O9{*c6rvz;p}cae5*Nqp0j`dM)S3hMXjUW zZzA+@m{SG_=dC~le!2r?*&MSAsrNl+npy zlnda@DNai9J=Wz6%2+M5EyI#kpUWE3Vc@(e@(bE}5p*FGplm9^o>oXtIXlu0DtD?A z<;${UJgmXR;PpsWi2fmqJPwq!uyU%c`?(cbC)RwNdqsD9yxjRWVVgh*%+^rhEsnA3wi3(E4BW7~v*AyNF^dpN>e2(2a&< zvzuakD;jULQ$Xanxga^XaM<#%p^a72UlSNo+RIJJb4OB{#IJd=dHP%#rF2CRG{UkXEVC3>SjmKw*0e3x+F-_Z>V%i2vKY%&X(!oL@ZScz>iM|k=>TrGz740XuD$oioweNL8FPVMIOo z$h1h%!#Ns<%T=t~Kayv#1;c%nl|JW>mZ&5ZIUz;&Oh7Q8M4*16if1hAvBMFC);Z`Q z$YrztJaFk?LD_f4UYAw=%zYOZ|!faz058tGM|t-DeO(LUQ&XsYK- zY2ortQglxWGz-mD)%{k|Sr2(S&^CtC9s&TltR6=^B9q2+@DWuDVyr5QO|m6fzSea&0rjEfsmg2FNi(N<J{L`X}Ec_}G%>FP^Q-VPRAzk6@T*98`@@JK;r&;*$cJ7Wzi;8< z_%|0W_PGA{DO~Ii0qs_kXnDuN#s0AHs|XkS!^nSy3yv9e$94AJw{X$> z&4r6SuK#@s7yCoNyB01EfQDZ|xY#3F-m!48KP>zz!o~hD@?YViLby12-@?VoZ!TQy zasBU8xY!>8-nDRX05tpx!o?oZ@{Waz{bAu(5ia(J5kt7B3Q6f@HDaie1!^wWi+*vv zrx|s_`F@r-7{`))1j8^=J`SV^^(FV#nC?5MH2V}EG)WrU3Vp~Mg}Dok0vN6mxK;Ol|PWe)qC^Igw+R`#mq+0AGDNo;5( z3-ga8Bi=$7de%#aHgxmJ3gz{Ym(^!Z<=L&CNwk4@P?G({nep``LhoojpZt3rQCUmr z2KpHZZ}IEd`W{7uQ7I7Jfb$)TrCEAa3Yj!9yuq(5k82Y=_BQvJjC5sRw9Zaeo{!?F z>)BlO`Q|EsC(Cv6huQZxkQ; zElH{EcoE^yW1K$d>+9hp<`+TP{9>-JI(Pe% z%ts7@;;7U@okH${uAHXe3LRgW@N=yn9Sym?&LlY>pq%Scwdm#oi|CqG+%U<&?4Ysw z=v%(b=Fw&JJwh9m;fm4F8B-fIT0~B8=i;(K&?Z_v>ppv(qoZSh+Y)dUwmmAstI9{C zrtKA)xbbRo6jSUje(p0XK+ElPyHHpcUBJxx7=9R{(c5FVqHn6UIBe5%vc}_33;u zW;{{}(I7y#N9u`7oJ8S7m1{cSq6n1nB8(%(hLk{kHj9m_%~oecXW8Z`qJXQk!f^lk z6QlU~Hbh%JTD;vc%7?s89nF_Z?>I!I(ic=^4pU^|l|pAxW4E`1%{vLh;dsoxE8WCwBq(kNIoH=%nT?h&{(NOWAH2D{WV^(sqa(aI_4@Z(PGD zO&fS!k;?k>)#7YD`WCJFd}rc>(xcaSX7E>dWt}tuepgUyaecXyo|uk*9-Q^41j2`S zF}s?M1!;+3%4&?PPF#-U&^tA7pDME==RQv>jr-^2)!FGKPU`wD?1v&O@pa9&)&B!a zVHDf0_aX`GSN=&uj@H=VC zlx*_k!vxo?TV|_z!6~|3qN2lsZfr&gq;6`nork$Xw00o^2j`A(n)6$?{LAQmbu)Q* zM8lbULn>aA=L1R8*Z)NPPzfv|N;;&NDh%XQU{XCMuUs*mbLg9LHA+#OQfAzI%WD+JDMit_$0Xds{+itd2x$6DYJ6}6A~H3iKIjJpjdVSu!3RgBVo&EiJyRVZ@uPU%V7+!^kTf8MrmJUG5f;b>$a+j_n~eaM$!CH3d~W09dN-SGv-|0fbaNYl6xFp zZ;xu^)}Phu442DjK2BFpcHN-sAN$mCXGqO`0!^KBQoeYuUOf%cNbe^$Tk%XRy-E7j#i{z#R$680{<2g1bbWX(9|$uZu!ZM9*|#X<@kKl+=T- zQx-u6AH#qcA%q5-@V%NBZ7!~)(b~540Yg5jSQUR++yF%2f@y+rDz^Kn{F47va>;)w z=eJ&y%l0?MnZf=k&bA(pddK)eW0~@+-AQL?4G!LaRZePtb0+^^1R8XQlPXbmnRo=j zz#$ats8=08?cgaC>JPC(V#mQiIV1r8!4E`Bp0w6|bm6Ov3j68Shv8~Cy_gIi=8I%C z8J$B6TSFX4gASTm7Fu8D6eP2Zkg_t5Bu8emRl#dqupynTPTjomF)9i7t z``2(jo<AfAcTE**QffQ#hF|^K&0t zz<)so1gi_f>68i$A)X8OF-8Qy(J*#Vve(VW`lhqnt8^GW(xEvCf#U@$}PfSLaKnf8Xsg^G>v0IjBTQY zE$Kj^mUJZ2@tCSub273-TA?J+P?E`+E6IeU1D#|7F<}kDgo8k+?F`hGQ!fn$zZ2-9 zkk$g7^+|nbQrM6H1|v~G1_cT_COs~fK2x=_i;|TIqi+wE(FjRzqqFsFdA*8H(*u}e z7G_v8;{1;;BB<|+S3F-cb8mZ6H%Y1N#8widWn{IV%w+^4|}%o(-XEj^W7#% z43%#O-tBm7d>ta{oTO&3X2_wn{5>E8z&vHzuN z)Yc@q9NtA!sdcv*jFvwd&d;Lj1-DMTic^P68Zzu;+ZMw87Ia|5$Nw`~J@)65)qqkJ zI0Hjpm1;wjd_jo(SAG?3B~0+nEn@#DK{6M-U;kcDVmJ$;)k_r3)4SImP)%bIH%VL^ z=YJ$VQ2QB)4c)Y(X|y}eDh2#|t?O%*e94Ifq){4q1s}viQ*bd`4(Mb3!1Fq)2JI7U& z0C44)(;w*%`|s4}`tN2NSxp;7B4==9suT(a%`t7IXpk2yydETlXvJ68XA}oO0^)G< zed%@cwBK`Sjx*kLCjgdw(dDh}bv$gmnio=S)t8q&XE@pf*N&b%F z$#n5>eVqYdU$-(OU$@O*63OkKVLXR0cNGV5ym0H0mOxTOuqPok*YWUSw$%>l0funz zCQ1%#wiFQ~{~9K0v)mYVo;v3l!+|o6nMv2Kza5mGV@7+M`H)jN#%{C=zR$w^S7ei~ z;UvSj7Imv^>l^bQ@}^racqwPgEUM& zVxmbchd(on+8X#1!{{NX1JNA+iQq256)DSuO61K`{zjl`q*gn*pD+T*F3yTGn{ES% z<2#){;LvPw@>{}1q#Dnk(Hz&`i0TTF8J+@yzy)N~3Km;S7zN|VFLc)-bE;{B;Zv!r zC}276qKaL!l%`7doj1jZoFpn;qZhp%s>FO4eiL@TdW@0@N@5f_HI~+f^=G6U1e$T{ zti}Tt?`hv70y8aPMCMVhHIy@tGFJ6>cEuqoH?&d0IwB+3N8C73>>VT2;S1!>&lKn< zo}W8-&`~?B$L^8-oHT0`d_&ch*yS0qPMP2RvXD||my_9KMe~ShAAHO^y5}}gY=Lvz zgr1%@?rPNx>Gf4X75>qe1wn8YN5gOXhRr#4EvQ;lKWjGm4+PvpAgyfpIKw3;rKIXl1;Kg7;H@Kja9kBIyu(bZO{II9$2%d6+`cfSqX^B4 z3MZkopK}o#S5SH6v@}+QftF3uZQ%pwkCW*bVmF23!m>%vFd~AxyPM50xsO^6X#CQ% zu*=K-k*bF?K%vJKyaod@To;$dq$rZBhdXAnh*7F6gjW~k_xReOVuHke`%kEBm5(v7 zd2QK3kX{3NVm~b655&u2xj^B9x_oNk(#MR)4`75il^ndRD*d6n*vZCQq!f?d32s@s zQW%We#sAXcf+x@()RYXgyKDMLvN?*F=%%$5dV&fTf}D#vO&8NORzr2>RIW)*SUO^5 zj{%vkz=iBmYLMJlag-Gr)MXZ>Ed{L=EESmIB3#TXg9kLx6)LavdcEpQ=nYAS>M>^=4Vn>bud3te|!A z3OYB@L-bv)${cY!km>oUZ4Q;m%8^sW!ChokgI#)#_dH)ooMH~#ta!P`=wgiPIC`Ee z*2ym8w3boyjd;}_2N4%>Lu^8$4z51bP(xkV@CX}=WWwaM9YQ@Au+t%oN`@zAla*B0 zHA2BQ3h54(P={IyjHEHG6i3*GQi`KArjw0BY^IXJ5VdKf@TS+0LL37W>7#qiavuG_uC%JDM%l0ZMb;{$eE`bFUt_5@&*hRUXb4Mx|4r>GE#kCcH`fLIMye3G)_tmLs1EyY^|L0}Y9!YVr7vQj-6wl-Jrpwl^#A0dsM z%Rz2lMX9}qWs-cL0OXWH4MfEw?_U{%jC25oDMS+J}N?zhe88&09f`px$6c; z+>i3EokfUTEvJ}0Bee{Lxz8OKttV+YB1f&rOO?jVgVl~;bPQS=Wx3S^xoArn{Dg$U zV9F6s3G7OLBBB%JcK0@c&raqT6@3>m`)dV&OJs=NE`-KCpNQ3CGk7Qq>ul3gf7W}j;ivs*MmU+ zdH8)YThG2ktA8eAgj8ZQoW#y~G@T+|cN9-pe!)R}BAY)p`svA#+lwLNPZBw63PJT* zY5~M9YJ$$;gJkAs2KB04g7E?luz0~03rbI*&8v!tP_uaLej+t%`|}SIc6^Swt`zEkBfTkiGT;3H$1s=K+$sa%CKtM0I@?AZ84IWQ z092AV*rZRAIFI#f$E;8Vw}#;MN)VtK1mIx=3mFF{?6OocpWR_HWJ)Xlm&~YE+CCI| zvy~PWOZK~Da*w!+#Y;tng58fmIb(8`qtK>Zbx;w8vRo{~$H*)Q;Jz{UL5jWbru42v z&4uD!X^<^SKQKbqM(F}Kq`Qz3CGDxo4-GMEfU4-OgIw_KH<*a}Nr)PBEk71@*C?Ls z6&Z%_!;$wehwmwgM(|ZUqw^6gw}^gnl{zMLe7GZFuN`WpZ|K|^72U7rY|n)BLVxyi zS*3>pM9Y5KIls9m=Yb%>mLrz~)@K#;@uM-5!UMs~IiPWm?}9|3ES&154_|VQgV(u^{74pF_Pa`#vjRzf$*Z6 z`&k}D@_}h?pi(CJ{SH$47$$?e$Vd*G@g<7brSPY1HFjLavuha;jqe&Ph0lk@x6uY7 z1GNg(ojj^xKmRa^relR@|2-hO#v*<&ntORP8wL%9Y52E)ev+=YjvtKdwjlcvA&n8* zDxpN6Sk4uq+wTFD-*h7mEQk7?hBo&69b(8+dcBC($T6 z({LZFL~JEO=H_#}(BYnjf!KXEKdxKi5MPw*?IZ=lg%B5v9mJp~QJ`#1)pK4(!~3Rl z`hE%$<3;a;uIj;2V~jQqDMFa4{XB;~83L}3=OG&u0(Y}2jA``(vMBX05Rikgp4LtD zf{3h0^$_L=De8kaL-QbcmCO|g=%E;0xGgbio6AVzpJ?lPM^(J=nVDEY3RKX7S`xv| zftaxH5r#2myHOf!shY_`+rn!!SP-)D0%p$@$lEA$(BbLtE&Y7RbY zXb#>NG=;6UaAYH>gobRs>0)!l+|wrVVKQC3gv%j3MezJ_L?Sd^*F~7)6AgLmrk>e6 zHJxs&V|MQ9s^+?-U9(Yik_VEGjM9Bl-R~SFE0wXb%0VhFM@&npAh6r|mHdMRmfo$^ ztHXuPyRs>$VGwpC%v0hEda6WJAX3B`HFf_;wvLi|_-$Njg%Y4`Q!-I~QQ=fP;)y;? zhYtxZXT%bC+_Kz&Qw3#7J()f%K0X@QFE9CmlMhicMqQWfu~ei(?_p%bzAWNzD%V-t z!o6ZCHogwp60dwoRl_L4O0FuiSp zsq{tQ7-!6v1%lDdN!c}FS3SW?^IFOJ0Ro5azMv#`&o>t;gBf@+hoQeuWXR7$WNm($ z!y_ecYYAtPq?^U&x@+)}0{U+#F$^x~!3?F{Bq$UW@TYwc5|C#g%D+G|HM|N--7&nU zKS1+M4S!q81T3(LvXeA+`X);e5U?T45<*mrUi$Sli^-i3tOAqcH{?p#NJ|=pcfyZ& z>>3ZC%qzH}A$0O7+!nlzeas0wagH0Vc}37bj~0{cz4T5l7mIJ}B@y}g5OmFS0WA1O zg~DEhe%_PMTx~C{_!V5v+D9kd>=z$cNLU^Yjn+i^gcZd=GH!h==b6AuwFSCPM4>QF zBVn=Wt{W6eV2e#TxYgKvwCyNFk%Z3fg_k zRk*vj1N;vL?j0m`k{)QLAvt3QAtGphb?Jznte_)xQR^?K#_f~fb7)^Sk1U4JF^g0OMLOq1(6mgaV2<54u`FOWkn5WN*=RzMpP zmO9!kEyeZcFnmqMU+0q%6n;|rxmHPko(V1~Fb=`avD7D8e4(kd2y&Ec{4FBz*n*-* z7KA{`89~2b0R7Hv_f;;_8${zXKQ5WM)5T1Pauautvn)HXF*T#=1xWgw^T=m8YdLxcH<2L*kb|K zv<^B6qX4floH{7l1}0D`ym?54>eGApQf){ohSmCudc0CrM8ei4!3{5R%d&TYpJYQ?w~Y zM#jU&uv6nLR(?0dzJG!UBKN+A&tOCu5hJ0RX;|0Dxb(W$qN;s8{}LplA^b5oTcC3M z+&tZ^w5rnju%1p+0wEO*+O;u7BK5?1$)tCid|k1%V>J^UUB@RUEwD$>)`@o%$j4NNa!c zCU)$K)|k{$1}p_p5O-iJ?Bj-pP?{49bx4|_3?EU!6{QQCIx`(+FZ4TK=SZ(uOrIn5 zkWt%C_>xl{zPZb)&VE6jfRfL(jpWVYK$T%-OBaTw3=Caa__;@Tw;K|Oxj;RTb4Fz7 z^O0ICWAx`=XLm7#o*^&#)gceZYLFz zzbadUcMK6aicyFyGe{7P??VUWaz3P%z8{dazxK*942WiJMGne6Qf|+o^HYuGeCB55 z(YGX*N^LTkx4Izxuix)JjQc^~D|$54ozst=DKEYBdW9>n~? z;}CKAC>SB|7tF4Ez^E=8p*mJlbqLAE5|1)P5y<4iL6h_-+1xqUw8v#002ua*gpMhgrZTsepB#9IM=DjDR4=_sCLv~lQq>5or}BqIB6{C?IDf`>4a{E)MVRS6rt{= z>=&V=5qLe&H>7&ME<&{WtLGUpAq6+7gmm*(aX3)n*kvnra3GDc+|-5mbc5qtE-&e| zxYaqJ)-8fpFr~AQ+67kQHJ~sn2VURP0(+F%{vlq>uBPL&HOfG)Am3x@gTG9%w%`?1 zJh^NatUfM$P84?~$5US9adX=PGTOSl0vb4EzRSs7jQYHi47!5R z<_+jK-8{%ObI{Mtr>VdV2KrZ1=J;lCy9tDOem!OW`<58~UWMWB3d8if3d7&G$MBoM zT?@l?8L_fd73&c+7r|)XM3{gMPO^SMCfh-Rkg}9yPJvj)?8bdW(!>2B99%X%BzaAe z&+(fs$-TE&OTUiHpat=x%=(JDTL^TeEU%y_ITUTMmhp-UXZUx+WHJh2kA@egKjP2o zsK-i-DcCl^dAYCnxv`+S#Ok{jCV_r?DI_GyUe!FC#1x7lHRY6|e3u>*Wv9yF9Co$> z&THD<%5g4MIl+AGI7)Sz-yBTd&th$qT)eL$b~O5!-%lqa?ly1OXs`_+wT7X-b_9)@ z!RuwmX)lv8YptMpF-&QHBg|0+dZ=lls-OpIPr8YeA%|?BWXmyn5I~7!;bbwn#dgx= z4;B5f0z>G`2heH%o)wb9E=Kl@YX7_K4748hyAiqu*o78V+b5JbBcGD#!`Jh{4u|>` z9AnFwsa!RbKcnC%okXHs`G%7-^9+;cui%Q$J4-zuQ8-k;_wV6!9qAww1<=jY6WjEy z# zY=vQ*JaYdagKZVf`*T_#$_}%MhF6&SuqD8{YMiby4#H|?XsJyrs5h1cu26t$ex?MV z2hqoiSgtJI6Wq`R3xH`NAU~?IiX*HE5v^RRe{%myVaqzh$#r2_z8vN8y!6{I=qz^j z3Id`&NmuiK;>G;o92rRkm!yR-CneqGs3vu!SSay5)uNo^!cCuYO5w_b+=9SE@@J7e zGcCk6X_LgtVZn;v3*kQ)tHqu&ZRMw2My4`k(IRde10cy@1#`wz%Xg@Nc$!rdB1`>( z#A8c-8`%a@FQVEe&A}}!cnFTljxIV&xba;>f*-lu5Vb7+L`P2~j#Kd=XT)$zY1ztI=cs?k?!i z*%FUoOTN40#8i|y@k9BMkTkV!i)j3o1p4B%3hkub4xLP#K0L|p0v8wJgOm$B&T|_Y zCG(pBMqo(MIMhS_H$8%&1_Z%{ll%Vv4TlNxKa4KrXnFxYGtP2wZ!w6pDNkY#?3Vo5 zP*2vVqJ%;E7?hyx=$u{cuckDIlQ@FkIhv0mz@b{5)5Ayzdx?>%Oz-rs1qE&#N>8yr#i`D%FOH@{mZqG;^&afZ`KV zUib8RfVu&w#r2)%_1k*h^r{`K@2IWV&6g4oAkA4Y+;Q@gb5bk|!3cn56piUfsuy#0 zu>}F06cAZZ>HN9rZ|b8U$H~AI3Tl@tFu>$*8~{T>j?^p?XF>wR%n*U@aQ+%D?m6&= zm8Vpxg;Z6Xu&$nm>4ZrI!G5Xb&pfqosXK}1(b>5FpS&l5hkE;7678ELZITkDn(vIY zvKw3WoiGgsqcNkIv1e@wNs3$_=9?C{~|qr z;MRcsfbcQHIy5=age*0L&GW#_4C_Ry^CJ>iiTuFYBC2}UV28lRjs=-&2a<@9yje41 z1wM0cBM1kF*??#jqybDGI6@YS93kddHBLN`11LEa2uhleyg79$IXuj_QM=*!4(*o< zQ}!G{P$5@zsenU^XS?zq&Olv-4?`gTK@1L?jXB1nKrLRd#?2p|${MjVSQ!!$(asap zBv8jS$AR1m6bhi89tboJ@?_aU3@0+vj}unO3K9Y~Fl$uKExb@`#~Kd4VhKf758hS4 zVytO_?030{dIU-efSPQq)fdzO!f=`~NsuOk#IypbAn0UoAo7qO0AB#MV5`J>bUHB% z@F(kuHpH3^(hNuwWNrDRl)w>zI*t^yMo>dcd6@aR)G(H7OrV|07Ew zS9TCDjk#1)m_|sA4LCC{P=7jwL)SgTF}k~hhU_JvPH}@{uO2rJaJ;(@9K^z5P&FeO z#T$MR1V};lG%81?u{*+Z=;AmM;0S`J+LI+m=)e`&!bJhTx3XAOHDNFCAvJ(&AgIb~sArES z*qd5(C{$f;F#w|qz!n*mBOv(#FX4Kj9Wg(^ZG&V^BB;Tiz#rkB<~WF>JYJna4FtDypm@3$+eggWy!_3{0vog1VmieP=dB2zG=o%t8y6Nq90|pi zJ)Irlm^0QR9~C5<7_vjU$}g_H*@IC*y-IHqerXU9D46IYA|Ai4L;BnDO7I?xtIC!I z@bEyG`SoCMcq4Qjn`^oqAtBvF=XKe#Fw`zw4V!V?#|GG`aJ!BH;8P!d%L8}`eCId? zI)3J)fq>0=@E6Q(|3zS4-_K33rOh9c*Fh%%>%`u;ywoQPAUyAq4XlPTcOV9v;Sh*bw|fuLibhMiBIVD9E099aQwy%LX9iZh2WVE3{ibXWnr zICbwo|Cc9?!n5rI`2-K!2LcKnwhtr}JS@M666n^!27o9Cwrj^*;7ehS zZo+rs0?QLIACb!dB+`SU%<>+oh!ra2;vECrG6+Hi@-Lj${TLKmc1K zh~2_}yN)Cv2Cik;MBz5-c~$jbAdos`UssfXaRXX#T}27NvWuhX&f_Oh*BRgNzLNNSd|ft6>Ng%NOq!_gRmJOGzDAka;P z|08eM0Hg+V5;%AeoOY}v5@6W@kKk3oy(xn?OFFo7>bgtZQgTGAV?0r}v*?1L0Vt z1@$B0Yb72XqYH)qoyCP91O>~W08eno#|l~dPide63w;)YLLlRy7f3H>39{ zDg=CsVm7gDjdGUU!|!?md90U$z`J0A?ciSQro1TIFz;*l6gQy)DhNd097 zGoU7X_kfsDfC=G?6%J4G=VAV@_yLOnB~L*t85tJyoyc)N68Y~5r)I=1po!;*G&An~ z3m&oX(Gq-Noe>xtaH~Z2A(KFi4C{+32NsCIfPaMP4m6d%qBj?NbdgigUnQry!4#bM zf0IP*22;)d6-1%G>yYY(q@dEu-?aD;ORqElcT`qBJ`C_+qYqBiZpZ}!-~T3Zbz+Kz z445>!BNGnO=hc|FjwLv6!QUz`RsMJ6rN&>%1Hd)Qt1DywXCyl)zVkQjD@X%TWm#?~ zSf>Wb%#qkg*gEX|%2Kk~u|HiDE0*=E=|^PPn%Kd?Dj-cil?f+K7ia<_et_;_VF4^e z-RunzRRDyeMfC;_>^-#$1Bog_kpLh;R(-cq;^9_J#7AIG^64lu$>SdA77&Zh7zBbX z+1KwMgMh${V0RH>Pj!3l@ZjFzq7h;u^ET#qYlN+|5JBD(0tp#k2t+irg>~ahK-g&i zio1!e7qDEZe~G=@dH8C;7$H6YI53;fgCx{46vhD^(0a1ws6e2eb9KkSXsprMki!hZ z(BPJqJ(-RWrs4*Jh8#~@4t_%5{yRP)NF6yn9Hcnir{ld57wxE`|QE0fR_*OYi*5A}D%}^tymQY&kTs3L^dW~L zsPG(pU3E!I5QfgWChglZkqdoRCS z0_q^cKbzKkw~0l&x09o6<@)u z!*wV47vc{dO@Z2Bu;K>p&bmwh@X24v?0{Vu9LWOn2v39t55F6NyTreP*&)_#I80<~ zIvGR;hxNrd!!}+x{=U95URd`!`XCGmq{D(4uMg}7u2TzZ3hFhPgJg89O<+jn6Fou| zXJLBLzvA`6fs{nR(6I&yI6z>xJ&ko{@rfW9bHoM8QVbAR>3@nP7g=%Oil0Jbz+p=) z;i3m&_#FF|AXQ|i3e$IiJM5hS6c^TG-M(i^EEd_GB(T)haLS6%Kq3`6Xn54Azee%X zgll%vd=Q-3`0{Cfe+{Yv*z&KvZNzni?I71Ge`lN#*Oup#q}-=*SDE@(gz9v7uiYcs=?#M1a8NPiX^e`Z01r%WFckm}6=6YrQ1Rw+!Sn`SE-t1*t~i z174}Yx&6A2T@@n!1;Rr^S8%6)fV%e|M%}x!?tRba3p)J+)JcCBb<#gU9dtrPo%bl| z4Xf}%LS+)Z<{U(YEX=>7hhVYD{RJnM2yqo7mF`%VKf*RYr0PKBIX-$rHhx&BgzM6Q zO*&V0LZ|;Is!KTSKq4OpP%RW(gR1jT7D)n)CxE&-oII(*2#i&O1NZ$%pcE4i1o)`# z1+BXSi$?Zt`*N69=zJhp(cqa1%w|2A%5Q?~UGS7(G*L+-q#G(#z^x$l&l5%{)ZWoX zC|FeT2=9qXAyp2~aQ@vPqI$(U!ol{AM#ABu5~-W6s8oV@r~}{i{ej)Z4nps^K7>K` zjyi(TM5T<7ZfI12R1LsQH^>3bv2l9BF+xt?F0jy$?%LtE(FHGRV6_z{JBVlpZ%2Rs zQ5v@))tlLK*<+{^=WdHSXKpj9HwbdWLB)`#F$YB<3;*w^U0jAFjyLk>Pdrb8cwzR2 zuDp;?Da+TKLt9}-xCWf~yqBKx4j5?2bv9(9AMdQUD~z z%7`BUxud4zYxx zg~fJ8sBv~bZPD-p z9OS6yWD4gxv%$NB9&gc37(HPPArtKcg0{e1Eji08bTi?2{JeiCAq&I@wU=2~_`&r) z^$I`4-sW53gV&Q)?&*s3CbJ43T4;R3-yD^Bu$)U=ke8j&lSvCiSO?ObCW>Uj4rsK?qKP@V8+IVOcM_jjR!d<9YhfMTPI9Wrp8pGr?1et{B1v zw)s2(|0#~_EXl0moqe@q*@;yQC`1MWe#X*Qcy#Vd2)w5Tp>8094t%@!ck&djBhhA8 zB#}G*&IYVF?>=R;PTFEI?DU1aCj}3YZS~l~kk?7}pKguc#h4=gN#7%G2M#noeWVdU9{cte&ig zu>H~jq*o?w2Nbl$>}=TcC1mYkcttt&?0Bo7c3b9zS2gYlrG#-VkB*HtPu^kCHyM|5 z+#4LZz@TEt-IlW$LgxPO*zBFk1*B?C4kb~zG;khGsBg^!E;KR1F#MgXk}i|&&4Tk2 z4t3SLbl@z#FaZH^O}aOja)CiblDjPjzdmA3O{8*ESL@<;R=g9&A3Bb>0QT}c>V(vj zwH3xAoltspigbcO;}EWveas&ae=vUUSVbVXKg2ZxVfSX)1p@392MC0S#tecxariWJKgA{7Zl7Lp2RESJaRoOdG-C45 z1YPm!bP&Od|EFlk3$<5N6vH((gZ!73?1C2^ocH z2?G6XXao?928X9OnJsXBq)&}7QW3x6;|36!OgR5I8ILva5z^FL)60)YcOrW;{or;W zJ79n(nF@LtfX_rP;3@8QB);xyhO{7nDgsUb082Y2jzECS!CMnD$%_<1W{?P8G%5oL zxI`$#o{R<{zc);NyuW7$>0JTUL+Bvi5FwE04aa4gntS1}c)*D)27&0r5CSM5Lnnc% zWOi7BKVB8a0U?q(gaMpOqr+5XeZb-j$qb}Ka9DsRjY1A5A~n3I$O{j1EM#u(Xko|x z3C=GrS-=nx?Ug+(FbkqD2(d=c5rhh)!V8o&uqt@!PW4su zL70RO3#>t4peaZh801Z|1xfQl!Q25GPnc-(ZBT@h*!P0ZyBCxo+7oc%4 zXyCC6B>I8{`*VObjEGFIPPC9-W2j+Z2e>#8#uL2!;8ZLzw_6%Sq?39J9m1H|nRi9-+HOHz25W_r4uq36@z>`04T$|7WXug!NKtEfy+=A5E zKLe=_?s9#}R1)6b(vASqj{uxm#ds0S@pwqh4oE1pqb>)B|iFRgkY098(Sk zmUxC25pV)12M*rcfWK|6y=*->2!Rw36bfr#5bKNg2SL87AP-hoG;J3mzd+_-H9+xh za0W2|lDrBGYcsISDpfseaHX%oq=APJKqA7L9+;+G03ajFRfZ({1Mw36o}PF$L`y6HKnn~r?d_mlhEVZ@ zQRGtuG@LAHcyJ8R`H;j3LXtx%p)fkeWa&|m3X5Q5AC}SsldLPn*q!V-6aJk}2EW+} z18PnI?T3vP2P?2-{Ft+w_>d_i&mf;}5qIR(9I!YLPC@l$sShx-1EInI@uBoCOAc`@ zxhV)ofq5M89-DR;0>OtJ0I0_LqiF~G;%I?PQ%`eG3}8202Lr=l{|7Qn*m!-@0>dGs;)8cHdPVA70BKX07g2|vk zFtsyg&EDwuW{(j0pX0^9Iz2`WF&2P~f_V4?(ET_0!=E36KfiRS5`}AJ)L`nvoMQ+6 z)P+F}SWARF{$S6ugAQFqBdwB}5l#elqhdBK}|LICg1BMg=R z9JDx4c-e?gbn}Ai=-UI?&{O0(IiMTy)XnM(97ltTYEE=wU?3oM?`a_jYOhHm2q+8H ze}ytCj;xB890u$?Rrn$Hnj-uFdq)kv?V)A}&TDYw2Ej3eNYOw(X~H_wIEQSPBd3QN z9dayi55=<$c~!g~Y$gs;|MUz*G*K)cKArp}_~fd_$)#jvi2q5!Rp2VtNA3YY&qU^`^iY#^No zbt9+I5bEu=;-AlNZzR@f@sD*$eo-WN;|NeEk!1lUE8?ysu&Pv=nt6M7vb^9Hq3i~1 z1%u1AZVsuM05c+pViyK3G{7EUdt?zf+&*}7EY6rj1v&27ucR4SH(%>}W{LEq}EU-!#z~Ci$69SoZ0+JZk3v4pp&Jehh0Ylgs zlIf;E4D+j2u(j2(Z@TZvv$=!~-a!(q4@1qH#3Yg_5SBeQ>{}-BBLJ&)9SE>`2X}T= zH>I*92>?Oz0!Dm?(Cg9^q|jqE?MOGm@g3l}0P=X=Ax%yncnS2piGg4Q4v~cq05|5e z>V7FYU_ct&Be516cJLr;f@G29wWjP;1d;_E%x^3b7gbbkN#ObcK}O&rb;SnQMC|TwxOSj$%&W#GB=169c;ZvOE9R#Efi3wpC zBqsYKoUaEBKol934yPgO%8Ml&AuPuo0%+)C3TTXMW5mKY3bq7!^jVedz~c;4 zx60;NZ&Bye2`KMghoxpu$1vrA>aPHy28uMOX@rW}`7S6DgXY*_~jj>ob z{M>{@^d`}fX&DER!0upm<(n4oxge}3cpMIcFlE4i@h~01{$;U`1(8Yw6}8}xtZmfA zLyTmdfN*+LDh)m*yy5i%9Kv1=4Lxv&3p=BUl!c_Q5E|VZc%`vmje@A&2w$@9k$lL& z;9^g`d%0w2^m1Kp=zpsDPuN!QyCMzJ-4aBmxP#5N1@C zxP<-Bu*b`gTa*izetb07Kwe&f32OjGMN}Dt7(ga+W7(B|;W0ZmAH0XvnLs)NC%G#7 z$^q~1?+?6>fZ{j;7RTlp@H*5OM__t zlQLFi8Ue!makBmI$P9S?&>x?FH*AB2?AZaDu=POv$EP#4BhfNlj{<^UV7nhOqC-xQrE81?z#b2d;F0HpkC5rm%GkXvtDsErDUxRht7g+doj={({N! zGs$mHmWL;>*iPZWidjUWb_Cm5K&J*?KT2n84S&!9YrFAz&;nVh7EG=AY!VHG@9ao{ zaJ3=WE@CiY7aB0zJ6wMtJO#wLAQ6zZG}v6ls`GdTy;*)4f;UnF-4wQ?;gAx(ZNw90 z!ZUk5^H0r^WkIU=S4!;KRYI0(L3vSTXX1#eoVz4TO)uWmMQ(5(J|GE}#Wbyq$=2sudLh z1DtRjVQ5(3I|!uHynxXP{F=bvBS0#hmNM*Of@8aENx)SVv-E? z?C}J9QwtE+#iA345T*suy-0AzNF)Z@4k92iU%X@CffET-O5_;hf_-Pebw);auyr)y zgxh1_`b2;Ufb)uDz(WwrXN6Dz%t;-1e7IV{6Ai|LTDmNsT8Fo> z(Fc3d1-{Yr1XdMYrY!SDBG^1=rl?@fz_UWzYn-IMA@iaOIrg0jnap= z$S_)K{>r*2?Yk=pi`G?e$yBsnLMw@)brfD*6RoG<>XRtFx|DxJYtL0R5v?hA`9id| zyaoACT6a{)Lun_hgd9pY?rLsmP5H~Ip|$6!YKGDnrF0ldZ@!vXXsrbmqeAP$TM-JS zb!UMll!km2bx>M&6q7+|C#-%8O1CZrRM6TBDm;SPhpjBZr!RO(1QcL!SamNiyDcJt z(wDoQ14>gNMHEna@fHa{ZOp!0MIKS1HReiUkJ7X=9Xd)wzWmo{t$ET=qcj$ryBW0; zM{5Q7wL3C5qjciUT#VMZOX^&d4xO27Q5y25kwa-LIC~pPCx|Bj8CqlByk2OnyC&5_ z>Cut738fux<|DMmTuF$~n(}9^L2J*ILTlPl^eV0lZi=U@PGv6?^az`7aCL^9W@j#_Jb?$pXqqFi4ze9DomG zfXow+lTKnX!+?XxYYFfcdy(L%3gjo8s=XiZb9p26HM*k%@Z<;rGL1`U&A~zt3tvM7Z$QLu`r{;xJSah@)gMEr?7nKO_x< z0eDo2xMUFr4D2fH3|$JqtJu;)SbVSH09XR%BM|~Mh=P0rL-d@_%&{Or0C#EumRrq; zLS$_stf~P_m`-OhqazZMGo2nXDgmJ|a0tThP3Z(OpdEoA!usp`v!RznL;4k53U58OmAKb^v36sSY=t+5>YGX z0l${zE(>2(bw0@?QTbqRe{Xah5v@9{!1m4V$&BObF}h=pJW+eOt-bL}{lM3=T|dSsj(JrZL?9q$`7+bV8Q18ua= zC&PPH8rGEi*ze6&_0Fe%IO=bf%sV8fd5&(f?5%II;q_nxnQdFotSnF8T&21-{qWkc zkDSMBswvPZDnT|Le|nx3xncgMYR@64s{H8Jj{Lco8s>kZ)ZcvPw7Iz4pCw_v%AGiQ@EF?0z9 zjmwFPZ$~a0ty>c?(hi!syZVsz90@mPZEC|R{Pp4eH4=1w#tmCNqOQPZ)->_4Zfnx( zk9}Q#|K`_^S0s36NnBa>)?t%JuEba< zS7aOJ5P!C`S2>*9rv|;7duqlmWts7#j*Qyb9#ML=ddyAtteXC1tCnj=#@us-R;F+0 zXZdyG>&A%eU}*c*?emSoC(|r_9QEGC#NAn(@4cov6TfYU9hyZzjW{X?cn2Njgt$^@?x5iwI%mS1(x)8@g~&tokUqsc|>R}a`sdiP5`IqkV# zF>U|1BkhLq&oeysz8RWCa(&=xoUsvKamcmI-D>mQ>vl>d(;^OHiFPt;NoBUft{#;o z=>Igmu-$#G*W@!NEv`YP3U#spzPFNUb%U5g-CpNg+GpEXEp<9MgIQ2qaqj%sefM(r znKYz35gJa@!=!!pE4aPXuQq>}<}lau=(xAbUM#mYUpwzCCA56unbGuAXH~0XcTV;n z;^@5Vw%bj7x{^WC9-S6q>b8eR>>M`T^oyx4x;U}6oRYj_+x$(tZN6`TT0+w7PmBnh zGkX2H)?0TxpA1u&QhCgXQK%s1pqi{@vO3Wq=J5QAg0RxM{gdxqp5I0YoxXq29VxAu zBL=PQKd0XyTBRtoW%Q#nl6g|kMqH=}(0K4*ICa+HdH9%!Gg0?c^DuTh6VvWQEgP0GlU(JixaN$9g)JVGq2u-#iki1L z`{;TO7Ry&H8C-RH<~{KzDVJ{FzoDb0nj<@5O4!{CzHT1$3!Y+|lg<>YI$p|jks16U zzQ{KHX2Mci@f4*jN5?as5uYb1qVw}^?@tIlL^6HT=Je8v zPBHNdxiREkje+cKSJ&hXE{Hvm-4+>B#h7F!yY5i5t^44uPq7*@ZUg;3_!ZAc z@orC<61InMTmHODdt;5V-6yl#Vln+6J-G9F#pbmKvJG{I-B|o{%$*|>Cp*_Sx>4;N zup!G_GwLqVTFbvYwz?UCy;)iG*oq;2t-Yaby2SIRvUU5Xs#o6`DQbRoj+H}3;&hxz zRP-^c;)-{B^yqF9pTmIzi)BQ3Zu-14Cf~_%+MIW3i#MH{YN(zx zO>ETG)LE^Wr%udz^Uhl%{M!(+=-MAlbH|{#!$sGeyhz5OXIJ!_7C$O|^HH&%)%~s0 zuRQQl+PyGA$#KGU&tbnjnLn1M|M+_NIId{@^LJTkhV(6Eq3Ql^8=6a)5zX69ZLlxB zK>YAda!8%gI#SlxqGxRiqwN;&{&Y(UyJq+#TzRsO-;n%$!{~X<*>9ftqtF zzcjZnEbGq2`sLSASLW_~+?)^~FWF>YQt(5+yzuy{{>{y)GZT&mkM)bUnBeK^eEY+) zFO@g6htGjRww0%v+A!zp7~9VSZc-lDwxf_KvpHPRva(s*;9FBIF7?cN4RHnS6ixAw{-=!98YaHe)LhpuKiz#z zX}jdkev;z@*LP8JLaYFbL88%hBqQ!n*T5~EL&kOXSw;) zqxiIo<7s=N=bgHKY%`eiD7?>BATb)dgg#Hwv=aqV?i?`fN8 zs8!v+ce9C3Tppl%eV|F*l%X%?r>zzXe=^`q+=BknFYWf;j2VP^dJrOvbGok+`MP!H zLMii>(3GtKhei*>?NG@KAeJjDm6sodu2t{boE}y1Xy2mnT8rItv_1u!cR@W4a~mpoqz4&`ui{KtVTyZAUw8b z+8B=7Pw`o;^v+krq+;&?O|>Bp>+`1;#7y(M_CadG=XRz1A45D$1Cht#})?Kt?IRDI?^kw6Q$$BxPWus2lw5;;DetX$VyA`|R7|&H6e~d7H zxd&%9d#I(?m|YIvSB6iwJUn0)9a{PzB=?-csQgjCer}sERYG}#-R+kj4mO*~QId3g zr`_I38!-Be<^`j#o2C5sOT9>b>9a@0{Y|sI=bdan@k<|eOiJ9J5-Do-EM!$?z&pQ$ zZMW~9yH&G3Zn2H>Mm0-oroPU(pS!H|$%z++FL#U48E~-i&Yt_`S{IVa?nRIFfBhrt zW6;9&%c5wG<;lkrlq1_;eBL8% zMVo#@tLUDNea^ZSr8C^EeebQ>=C@HM0wXhNgeC4$-svHxA$6E5nPinWL9Z3Vw68BJ zp3qkP@#K6;hMnG(z4F5pv_u*!!Y>#m>{R-3WMXl~Ef3R~Vf{uc=C(;5kxd%pSRWge zJ|MI)>i*eA8S&%yO9p?ah~n-EV)*y#B6i zwo=Sx8?On!3U7I7Zr*Tp%YB;hhnBQW+vMu;Wa;N;Qy)FOxOLc0DgBC_1M@@ISJ-B~V!?{^>GI#6Gehl( z-`-r+Nii?YkKEo6t&ns^CZr+znRt1-(ZyAgU!^X(CTJhn!@QKcah0=~`P=+AZZ`&c zdiko<`dJ&Fo_=zH(uAMRWGb!yD8Svij;9bOh}RkV(np|$yx^sd#D?Iyd4F40nH zv{WX|`_5R=E?FHtZ~f<;zq02zh+JD_zEle{q%b7mN$u$?%fgZa((AsJVa@S7k2()< z674@mepvSCO_%IvW@@KcE7xTmZhfHbUOu{dY{0pX17{c2W}B+!j(WKxd7H+2-`Xcz zhTJ}q9qRr}%YshZl5ccr#IlO5=bJ)mECw{3zPW*>ofms+!6nhClnGidY##?yBnxah8_)YOenJXcno>Lz~v3;~4q2;aS7Ag>#sn zciw+neK;`Y%*tEKS_*c3vUgM3IMLByXi|PU@uzI#$SY&bGfVI(TT;CyA9nQBJr2!% z{47uD)aUSlnpM;)d+TEte;r@6VU+_d4mjm;y))~u#8NAXUi8`=^i?c&45bCXLCEnOVH zF)Qn^x%iu4Cz-8xKBzY(jojJlH2Bo5#23zM^Kp^;s=Wp$?~vS)Dp`0SNoQqg$?Sn@ zXQ{F8KO4PWHrfW)CSJcM$9|%BPD;eb5n28jrj<_y4UyUJ?Vj)<*lb)w$;t4gYDdbJMrzY^CRy?!+!2y?n9o(wbkmTW3$@&UM+l zKI-tA)9DVC6Xlurh8BI8yGM6jyWLR_g8`3!e6hu<#|&MPH$3y?kBUuKMjW$wcynfY z%92zIQ`2Y($4icxOm*$A+a{h1F{iC~AzQ5RV5|dmgqXX!NSJKf;L>3HA**AvS`2Ee zCwObUK2x}7-gjoon7C@+wS&`>-p|p9Kc}Xs9;H2^l%78|NnTY`$#uTTy#qs^f10sa zH=}*&gR`~z4Kp5B_g8va8B%-Vl>CR>G`A%irtE91-!4w@+gU|^)o-y+dIUtu zK0%uIdgzP#p9Al@fBI@~sDIRD-O471Op!sFO>d6hN}b>M{mj1Y`s(7t@8=aT%l)ND zq62?;osRrD|IHWiN#knYx@E7IZZI4`{hl-3IH&hlE<9MZ;l}aY4L`$bWuaF@%YddgwI$dpS4aC@ z(-d4%-!9#qxH5BG*0?saxAx&$`%jh?MVLwUQ&3o=`Jik|;euhU%UaK$OnjJIQn|x< z<35cy-uKT08XwNhOTF+$R$&D5SJL~zt5etIj*lydpCn<|M#UOqiko#-N_+e=F7r6K zVnbuyfac3pr*1^iBN-Mm)N1|b9{lhkw}g>5tdRL&?@M=yBPzcPmuR2*KF)CO)9;U0 z$Le^keJLS-@4Xk(YSmUr<%ct;9v*2}sK~TkRhEw#6TVn+$z<%XIUoB^mC+izHZL;fGs90E8I7?Waqz06{JFxjs=-fPe zL_;@W^=1__wcOYz(%0~nb#Fers&m{hu2A1~=iI0o!vP12Hsq|_^Y(OFib(GBixd&# zkxE;VRBjFZ^+(|INH7p*Mp_qm9wWwM^yMw=Zp!Lq9r$>)8&(+Y@-53~QZY!7aOZ$@k z%KTeL7Ob8?ln#@S!`R4^b*Oc}qWuTx8~HOHmE!Jfked*=NX_+--XKhWxAAMfOU`a6 zAiT~Nha{{dPA4wF6u$bhak%?P#dBAW)17NAFYc)o+57fJlZB?c-TG5c2VOWlBR@^? zns$5p@#kr?sV(Zp*WFU5ImlW($8NrzbkOzD!pR>)29=9xMa!?;^f7JcG{famia}3Q z9fJ-|4H+HUQum?w%3Rl7x&+OKGo~}1hwT4x@``2A^s!%Ku6{aUdhR3nYQj={f0IK8 zVuIyfW#oNca7OFFf|rx09?PAiE{gHr`?DmoPA$?bFY8V*-G0JDI<jOvDtA)IEe7f~8{dD9Ynd$Xz2Gh+$COtd7?C0VKmkFi* z+r+2Ti@%ZiRyWbZnn!SUkw znJf1b?xsK4JyyKT@qECJell^5`a@4_Dm}FAiTPYCW%CuEq`_)04Ia`&a{`a~O(oNZ9AXg3FKl&Ax@bZh!d9ln=QT;&b|KY`n5*&75z| zkn5RKFG`mLC2Jg-GRFUqeC>jdV-B9u8C>)5vew)Z!=+k&9wHA_+blA4w~3C}>{l=; z;x74Ws$}*JFP8$H!nj1W2XP7!H?GXRF?as6wF&{UI@c!7OKEvC>GIp1m4FkA<3}YP z`MhI~NcjMr2YEIx6H?Cm?v+?4e(-cljhXiFOzFi5_jR9s8}jLP!TW9sL-%O1V69`#Wob<&cnX{QGEGt>a^&tn}tiIFmnN#wS_d zEx82}T8^tjLp|Oe^{$oGD4cb(@WR^oh1xq@m@Yr+%3r5kVa#0iG@yB^g^zPlgOzT? zn;^5-yT+B5hT#Kc6qWVT^y8w(Puq{pnSA}&h0@$fO%_rM6JEB;r&Z8jytUSvzo#^* z-7INGs?q~H`CyCwKeVNSQcnK*QBX>|ZZ{s=6gK`ny;-3$^V=I-=H8fU{Wpc7ZU))y zdxxA_pZ6{!FnYaQyUpV-yR+Nt#m^o3aivOJ^(HnAsTa>#GC5uJlGfdGbyAR@}@P^ec2X?<-wKJ{xyTW+q$1Xl!2AtX8t++rY zV4kTo;o-x^V_wpxaP={OEYuYMNZ6|w+#Dpwl6-UF{555Gv@gS zGreQ?y_Xae%0@ZezLGfEPfZ!OOm{1xl{splirUxWp!DhSdu)=`W`+h`nz{DwfQ-0j z4b8@scaO$o?{;5m_C~QH+Q8NDC*~q$)4nS~7PCYrzYuR34#~FDgT87SVzPXniQ=y; zJ=(9{?~~b=YJx$QcP!nlc){*#ndS7QjcdLfTk(1CQuFbTE`4AmG&IkunYZG=H;cWm z3L^36E?eBcHhA|Wv)x|L6v~{J`CWP|wSTi+QqY#WtH0e%jBOKpyx6GU*Rk)nEw&t& zW-#sH>SC!_mEy^mjAaX;)>rmMetCo{n~m2j?>{@g5_8~9#ptyKTk12(;Cgud**IwR z$yqTcb}t{Vy(M6AqSN__6_>4#Ve^uzXrr3c4HoKDa$0dzkFd^#ORPJVSn3!Df({1)%De-8`3049V8lMQN$;%TwXqOx^Lx*#Fzl= zb0d@QcW`_B7K?cYSWaCqCopQf%@FZ(!E~$w~REu4!uUf= zpk>i@jFj)=pizM;!6L~nJL;^*-)-tAFY!HO>%z%H&NfAV82>n0O3X}MXI$A5zXaKU z36mlfeo3dBj4#=_R3!e!rsXRi=3pMX>Nsi6mRDKSc47OXT^xPJ7m zf=NEt3LIuk&~g}-9W!n7xl09EOWl&vszqto7Hx~Gnq^iEp${}(R5e6NI{S@xmEQg# z@1P=n`_#ruYvW}~8=g;D5^a#;^?Dn z$}|mlFfoZT;?r#N6FVzzSoq7B$+j)Ev4d`%6&cvnkVf#&2vtzLy6&pagQx07?TiEc zX|Y?6R}TsEk=DLDs6Te5vFG}Xi)}YUL_7QHVKSN7~SEg2xULv;G*(XNs* z618F`8T8wSEJ7RH&lPxzR#EfHFpJZoq9;_Z7JJxsHvCm~vqaCo*Rl=MJ|(;Pgj|V_`XQ2En(n~hB@e6K>lMCRc=XnijKn=bUiUPe-!Ckf7ygZM zt>At&Mw$}1`k3$4RAX2VbuyKQyP$krCOE-VMt5q~UQ zJmF;;>8WhVmK~oniyVKvFZ_D#=?~`(lG3(|0!h*gwxT!e(sX zrMNpkoZc@IuPJ@KiY&>bR2v#TsH!%2H&=R)*6#Zvw=cF&nrAd)rHFUs<{SR6CQip* zQ;3a-q#l^$`#tZ~0;O4Lp^t*Lm8Ue0JZvDnWqZ&H%0XG_7txFBmX7_d^wj+Lu|dgG z(?9JCpSP$o=jvv+11o(i{KbYi%~9Ioy5j7TFudF6XMrP5`*=%Crw38r8k%n~|2lS* z?vwM|5`))Tm4!xa$>`5GCbj-*Vnu;|bP`GG{U@z6qa!K@92+{{CUd%$%Hss<@{c-; z)Tiwl_H{|!`TO6~Dw3odSK$oqcu8wGmKnF~x!Jac(J%Z&S-Qub%+=SOTH@S3ISezs zzhQFbkiEW(E?`EQFqbvqB*LGVt^C+RS_G8zuI_u ztJaiR*FWC$yjvmXnKbmG?Use3Q|&cBUVGW9+}gP4&J@c}hQ{mDQX+oRRr(zmHhuYA?Pm`8o>9rnv<8XySCWzoHIK?SHs^X1j*|(J&KF<3 z`VmN1UYGQC#rZ)twPB~m-zLv653gm)j2U-$)86CHG>y-FjV25f50BMys`KnO#X=gQ z7h6)E1=FE5mv`&ExSV>b@#^HFmqXr9wqGEsALn~;u+Bym(%EIqDr1M&tBgLc3y-K> z_E<5JD&e+rhVpYc>1$rg7p&E?8gX~V@TY17pAn1d7Dv23xQlK%FPCs-v-~*mrT#6~ z?(e?+$z#i1E8P6pC&4uiYgG~s+f9rjZ~J_7zKn#asBFm70}G`CUU?+9U08I^yYf+p zsHByorpP3bsV{bmzH?4l9WvmqoZUl-QkQYfg{zjlYIgJ-`1k@t<#NqFaM8ZB!uQj% zGgpUv4>}@u=+eoLtvOo^Hy4khZV9fPFz>U$)32(r16R{d8w8Er;4}C7`kliX+7A<> z;>Kh@IlS6o&o6?dpu?bLEZX3*+z3&-rP5V^*u?wnGb7Ur;K!z4(QD9+V_4uQ+e$R4vk$8tvdG zF&2Zb9+{bfy-{KTj+?_0zxYz}ZcR_!_Q>B67y zNhb#mJMd_-N@U^15jt&8W;=h*y}HXnM#V%wdDbWL_MFI;n+aYUQ#WCvFRr+lma-=I z<@YOh2Rv0M3|>diyZUwBT$yuIpMBfPY>?0id~)tw-jNe0n@@~6YBWdT7zJ0Yt|NN$ z`@77~782w2-rZZb(|qf!AtgEUP4=qnkr$umxBu4y5u9$AB{fewC#!w;v3SG0Rm+l& zPfgx0JL2x+vn#geohZwcA`ikPhSeIcdHrKW>yDDwEkia*zRX&;%|(0Jh;fdMg{ySR zRzGNtS;55J_WbbEt?KOTCEMp5@+YPJ*n4@}2@?~|+5UQA#z&LStgr5WDeg@F;>r_W zn-)o2nd}_7@wV}l#Ti;8-7|$vUoyW3IQgzOKNTzE#W=NfUR=!}xkU}@=m*BXdvt8{ zsOCz!i&lG#7N&0+d3{!U_?>{1O_#EL-eH54F6wL?EUvdXJ%7)K?-+*jH)9!DkEVQ! zaJcxWkit@j%9XKF<;!35Yw)bbQeTg4VsmjkeMCung-M;^|9sO|M z#zsuJxLy9l+T2IP2!kD8bNrj^rj2~OK=i%6m#p)~S54moW`}ODXgmG>@$vdvP%J~z z`oPrZIRm7p>_55Vtb4;cpHkYQwXfIND1LaamZDYtIC%M6yL7*=?~;Bf)UC13wJ3T! z%k4nSl^18r)ol00FVd^kjd3q5AE*E8cvYN)`$g=g<5M9rmGCLUeP6-;>Mx2>^PR-X zMxU2hduR$Y@tju9fL1cquQ~0odQSAvsgj20efCsVlW22sL(|-5wd`wDxIOR1yH>+| zv#>ify(gnS^b4aZUsSl+62Er1F@Dm74We$fbYD!Ik6-Q1kJoG5xP!o;_g%H0<&L zl|`-7$VCoPd*%jJ&&+oD9@(UvJLbugqteQQYK=7Ch@H4Uam5U|<8KoNyPl~fj(7WX zWv=M(m@|eRwx1<)ZCbaA1jRnhD$oqG2_=r3GCxq$sC4YSW*4I&1NI*hEe!N3i?N+Q z;J^;k!YRKl#!#;44L+-xrhZ|4!j{O@CuMc?ee}bm+}%Z%nCKEFZ(jJ_a#gUE+{&Nf zJ4b)GvfA>(SoMSQ#O-cl9K$A@-03)G%9Rn9n+H^0E5r;|zV=I6?AJK^uMe;4Tn=4l zwn3xu+U*HdY4$E}r1rl(_{r(ry{)B1RP*>Z%yscf4MW_g^z*YhVeFAHUab@o1 z9W*oJ^wDNPXKM|HB&AE3R1g;&c@Uo4n5}EB4n4 z5-LmdPiStk$$f2`Wcp@|`mV9e@sp0v%B%D^eWXR{*=JjmFHA;j{_CaL#oO~n9XMq% zTe1~5WmMpa@%2S3A6q~iwlAN{jcS-(y zR~tRy;QF9kop~mQzMrZiN>yq!XkSuZq&VWsspX;8xdWm#p5L6*I5B$h7Pl|=bS^>H zp3NUnCUQ=xwqM9LQQ2(WgxfcsFY{U{-}E^vn{GN|?-~`Op@dubiK7Cof^N?ryz$(K zm8LrjS2Pcrp6U&KTzIGQ+PGbERpt9$%`%S4u#+*j)028r_j&xHmx{Yfb8sI5$JC4b zy8KATuITpi`tl0XV_Sy5*D)Pl6*C=UZRnDzseDR&(6qo|&vni!P#a!ue8ZejaWqZ( z+qE$2ov%ZdnBA^&u1dvZJoQmNB)UxEy86Zw1NLP?zar<}b+CH<;)JB3um97SCyVzD zf4ip*r@3JLfc+|VTSe_`J;sFYeG}tspa1G$SbWxGipkxLdrkC;WM5(0Zniv5c&|0J z_(<`K$1`Vbs@_-QG(Bad?ZC6PD+?0>T5R_&_PDTeu*YIkS5c2?1tUw^?XQz}f6Y&k z#81+rA1NbIU+$@=cv_b2C~eE)V?t zImJ)A{^1A>l2urO?Yhw-qPxX{ilT!yUUti|tql#@Xt{r~xKG=p2~q6@*X?d$rY$XjD)SYGT-Wk+empEARP5o1MMs$nmb<*XQi!j&M@d$m` zu=eRuuV!Z=6jSY%FjGSEyY+PZnH|o}y6vZCTi}Mw_RIDgEiqc5;$!NwmVXT@S)63gVQ zwrx-Tv3!1DcbmHRPU0$Pf3FbN@#_A^R*l9LXaWe+<^K$GLRb2AKg(~0o>)tz>mATFP@td#Z zwIe>pc;8;JxjxTNcX9E|L8&4W9hAS%8f3Gh%SX^8= zXeMpRg1#Zimmk_&LK6gOO1k_?|X$YzxMpj!qIaIGF&1S z=IF=Rr9DrcyvX^UzK!F7MForS&Pp%bm-$U;xmaW8?oyk2{!{(Y0i(TBcHGr=uwEv< z?^L?&1n)r6g4aVQ6~=!kiM$h5cXCxyj{T$=Gl&5#duO*7HCv+ZK+2n^0 zj7v|OPEg%~E0Ei(bnJQYm{;b$+2jMtcO5j84hDv<^3AVnsj-j?Y#iW*6 zPu|de_Cec?A;CV@H%=x;EFDpxwcysn)T)KkV%J0tT;DJ9Qq0;PDWeoLY^EEYaFAQ` zZKFYA;mPUA(=1&E?l8^zw%y5XoXyYKWo9l%&2q+Gv@5Fm^}CoBBHXAC++z;KXRRs5|>48}hKhVaICxG!M=GksR52Zsd_;8>=h7$v`fzYa)N9K+$PF zq{Hb4bB@Q0o6j_B`Q`aVm!?RrvduP`7{BfF{$Y&JvYePN5rgq@(pJ)OdrQjXCXc;t z-74uiWUPJ4&M%ee_x6WvnCW^;WXsR0F%J%08tR#FF0^S5^m+F3U*2BwYc;cC->(pr zSIDn5ayY&5c~;ejP5Ga{>hDXKc~H0EbmHB^?<`UlE{>J@p#Ay(N;#)(QM6qPPTRI^ z+gQ`KZQHhO+qP}nwr!pL2lwIr!)O^jNu|sWH#f0i%itgDKDe1LwZ4Re7yOP8kj{nD z@+=iYc7m|zSK*WHR7yO0)tARL|6x)DJKF>fsR z#pO1`bWN?wqaanOAZcZtbmIv*1$RBzKPw^-2|?5JH=iKAHZ*s&(1fccda*|7y@CTh zHWJAE1lidMBt1aTSNdn`D{Ibel!Fj4Da1)`HQP9Sv@7)zgV9Yv3qc6lsX864uS%s&V18!!R2bYDC6>tZZgJL9kFFkx=C{;gwF4U<;~8)`8Yw zQn0TyDn$bEp8#^LjKWNPx@xwxWj062_8k|+ix{J2b*m{>@!!T~vbYGB4@{|YEAJ|D zgp=421}4%j+%3cd@^ikJvQT}{LfHo0^85Tjj-H@D&=xZui`+lXEl}YIEu3z({fPg? zSS{i*ck0Zc9)6sPP7&&F$>9a|b}f3!dH9JgWnyu=t1(yl`x>@7(sDs8r+GKCgE`2L z9}>@yIWo)tqijm~3V*JNFGy>`lnbAuPm}1PAg*uvQ54m8d)m`NKf0Uj*;Vw`spnyZ zKr%qDsSGMG@S_t${tHIWLn%7dWqC;jbHng}$GJK>Pr15REc}7q4_*b^1T@WLJbH+6 zM?-i3KjndDgyh(Ln{!c(r%jOHA^)db6Td1t&MiEvyr>k$P>RemI~hW*{|?ss3;_W^ zWYeSI0IN|r)r9~`)MP*R5AqQ}TvPW+XNhWTZQSgMsu0tq&X6>tPJSXk_Uv-BiQcl# zX6MXz%Aa3p-rwIcuANy3zL>{m2^9djeIOn*az-wxW(tSJTC0=kfFuEw+afg*3lstj zx+_|~gAg~i{@tC_S65Fv`Y*`I*bn935Z!L3o^D=|L)?~P54aj|Nptp3-5GX6a`_y) zX@{r5jM!MP>2p|Zt#tg$Ma+!G6zj+m<1?)>QIqnNbV1SM>OV(CYP+w!svt7}q8ZI- z18+Sxl(^UC38BFKnVQr*x6NmCYti9=wN8M+%Y$!4e&6Eg+;Us3>WmuE%+dFF;S4{v ztR>Wa^0MMY{f4~4vzJ!$CF;(jvsElgZ2x7Wg^Gs2G~{e;Ba%!N6zzEr^J{GQJog|zgz`F)>K0pY5SSSlpkMxj5q?9uj=C-#5>Ris21_;8O)IFp8Q$>chJopK&Dp@*NZq zJcqb?O7l>oA29XnOamxQ$Vw}{t7)e zod$ov3O?!c6tyKmK2_F-cXFMzky>$3@>E3&a>&4>2apQJ;g>L57~Vw=R&y9Q1I_zQ zq#U)8Sqv8wjv`PX!XR?GIy2zpoLqgcycEBa{{3Ahl-Dr=mf)&WU#O8xuV28aJhv8p zycMg9)^(5BMH{kOJudrTI-*dgt#_sx|C&zOn{evD%&sA)+l^R-Q3t%bN7vEcEwQDry_HJIrbQ6>)Dpv~GFCDd zq$oQ<`r#CfKLOFf(7D{>SMt4dMx_wnoqhdA4rG^tBpOs(M-60Y-6_jWjL(BmpXK$h z{n<3#(2sd$)~1z5fnl#l-M!R=rwb#c^I_;8{K7@*4H#laS}I^D)baFWT*~M-OzDzg zS=(uY(pz3^PkN}EBLXZWDZiP=&WZIni^EC+>uyJRnzcDWJhf&86jD>@tu+^G83%=Ce(cGruE$Z0uVScdCT9iYx zZaAL+u4rVL-IF6Zww5A3<}(QRUvAxe@Iw1q-yv2_C5;1Q-a(rQ9+`M(E5kzg8oCGzCM7Nmc2K#^{W-N+gj!rZob`DkCF8 z=&<$Mmlvyh2*=7OI1Jc+byhwaAwQ2&)tjbGW|I}!AK;{nI{+V1y_=cs<0=g_G8EiL z$EoN^9><$HB9+dsZ=um>omHEq0yq&JSk+6?6GvHw0~^jl;+%pybP%w(-*zU7v$&+d zU2R8F3aUUuL~vx;`Z7Ef3DQKpm^lyr&je+_$3p4Hwr-sjmD z(4bCm@I$jzhnh8JnH4hFWUH_xj=smA^5s|YCzn?}sQg{u(dI4&Y5CIc+z}j2My=wu zZMO<8hL#y@I!%fwoN*NiNZw{}0;*peS2B3i4u|n*TH7}(iHoTae!n=sUXpLD$P(LT zu(p_iyCR6Uu8#!~ncJ`&SAqhh#{s@nf>9a zC>Q=`KY5LC?8t{%`ar$>Xry(^+$-FE)V#jRFC;;=fChZM5IhS4UE;%!5p6;!V}DND z&q1vBtT)oeVELy*op%edXU$a`#JPXB+OFdN5s}6^1-ShXk#WUTIHN95cq(8ImI|pA zU4t(`Xno~HT??9#?LA7M(*n5bLqx&+?zoB1Q+C1TNp^e3R}I;i2YPFtw7)ajNeqD; z$hK&Gzkhs^=pu38Cv7KpMxy3_G*Kti8^aIa59mbTWH0F`jbUw;@5_kYq%7vD?5W*$0V{fo^47l|%E4hZC`%>W)&CueK_W){sM*uE z7tYz{V7CVE__r81#fME;>pT<*7l{qWpBn(yS_;&Z)6I~nOU^*KRj8#rhO>H># z_0WPv4Xs&ICX!a3mCxs@SW6XeE2Iy4re!!!4pV1VB;#j!$Dy!~BiV0oiTJ&Q&{pZ_ z6Xyff){Rr+c0&(k|E_y0pYcs^!aSdDtT@_Ro~c@J-j=bh#~vIu^i_ZnoxMaFiXNS@ zes}T0BuCVzrR0%@p$bN+9JJhaJ|<+@=iT&E^(_))P#OD4`?hxq3jl3tZv7x-g>qUr zae&oE<7VP?8!RcYz%WyL8tV~NK001&`uGe2H_>zu_sjmMOKGK$6Ju+AL~WMmUvv#K znWoI-ANn@4v5s+@FEnnLusVQ0aFDskEajsd*po!=B92?-HAunemGsFdqhS<=Xv2HoAIQtyX;&vN`rulhJyh#=}XP1zbc8ZjewSqJ zotpA$VG7}2GXsE*j=u$-F7O<0?K7cO%$IZ7Yqe|Zi&B|K2_eP(2c4mzikl2Fs^-_q z;*Yw{OSRswcykW_(zAhSVF?hxa7K%T0pfhts1mW)GCg;7c>+9MbI&d2o}No^wl}F? zS+5*xenBSS=|1#E>5ND%U`sw}w@qwB8^$r{>!B^-s5Y1!H(l>=RbMt?16Wbr>w*kk z6Ib%?AtNy31Fm_N4ECaq;;k1FNHHkbgu75!;-!zJX%hXM#i%^eMohiR1Jz-a^PcG+ zbG(KTL&<>_Xt{lDg(&I#(AkWv;A0jYWQK_U1Xt2vWJNX%Hetg#^ClEkxha6h6t@67 zU5=0SV}psybBi@fUbU9tJ_!RjVJ%w2Q>$(WBnV*i4I{28#-$(k^kJNr6kDRXKL6U$ zUpYtfP+>{7UAT7Mqu)9yN{6>Vles*39w9(NU%b>?^2hRTg0_@`acl-0Gg(?ABxWpA zGx81FA$f9Lb}(9Jd7Uizp`*e?$hSfVClY=@1L55zns)N!K)9pbE2y`%i%E`X^AK|= zfL~HXzDe`sRC_*>)&ZX;6v!N>YFQeMz5KUzHsVSkT=cSY%P8BCO|YDY!1!IM;yfvy zGlsOiZw=dzd*5un2^J4SePp?Op!BY2&LuYS`=hCA3LvdefhDV)rbeK4itfP}z*03$ zdShKDdYD}j)34;6QxloZOh4he#Q84Y2@6i`G^(v!z*Y`)Mzp|OnK7fMP`LqoHpu0D z#lRvvvrL4SA($!PwsAB|TqPUzIr@~$o-2GVQ|3Ek%R{S#?0os%kN+dM!&B=U8;!5p z=a^^oY$7s|l(PCRBqxMc%bu5b5*?6i*8;v1c7PH335Z|kQzHfpOlbju!9d^kyeobH ziov4S6IjiA2|TlGKkZxu%7Q{MEK$Yw9#U|MvKWx(Ia85BoT8}plk3AocB6dgVmj{u zNgbQ@)ERn3_)&w+i>47)D>Zv-#; z!fJQ;-3-ffpl9uQW+te{NcM+z^IujJ3S#?I(TicLWVtGUSn8*e3%&xGxQOHb$)PI@ zy77PmswzM&p`%gy0DjLBQ^{mQeB8Lme}AiQ66zZKuF=qOW~Gb1#Obi^+&0HUiyq}t zUbQFX{!>BOKe5h}V%QS&^Gl8gtlfWe-sdXZ)iKm2QahCMVhndD6CGSgSuCsgSFF}y zRwDx(-G)JU=6;DT{4V>G20%0DO5s2CMUr9)3wh)9W{77LV@$>q^3TT&Z#o z+{?^*(HXEuJQ3?eHCtz%2yW!xfT|O>vFQx$Kd)5~;}i2Ms+J3cMPYiB@J9 zn+1KjjZ1+bHuN4ibt`F#R7{<7TF9DxAY0%v{9jn^g==YlmtUY?6)| z>rrAyMv$N0YBK~S?IvCz%Nar}hTnvv$Y{jI2NQUZ1eKi=Ytm6_#M%%{;>@wBQHvA* zp=F81@2~absKP;R8+|*lRnX5X$_IP2sl6uL9Mnzcw$174#@O8?eoFy&qKK`T7Zwi{ehUwJcYe($0vX?eYYNosf=W+Sc3ZbxGT?sklkSXnSV-kpnO*c>XMtKBJgxt^H3 zy`pZzulHp5j+@r44B@+*0*IP>^tQKz?Rik1t#!_fgo%M;_sfl%;HF?E!O>+2)MY3+ z^Cc1kd)>MsruxK#>0hH2zO0*gcKg%#@U}ExY%Go$0ZvvY)cwuEE&(DS@VNXbHj>4yup?DYR3;M@Qt+|8oh7&$8VBvNw|g% zY=GCHrd+Y6c~ku-onCAz=M z{(M40Lh)ZUPtV->FO~!W7x4(6dOmDOK8joGL?>Dr`^l+vOB|-Ew#X&AH{ot{+QicY z%szFOo6WKYWm;Th)%xd}D5pzD1PDfCsLDMki8cnx2WpLy?(w5_v zRKx$X!Br+PV$Onh8sD}`7UXZX?FC~z$A?pRoNFc*J(rgaqw{5E6|biUt380r!JU-F zpd+Sl2SbecK-#|o8Pl1nyT*J=JGC`}!nVv_dzj8Os71)v8d!%cn(yO^M_tdZYk(W; zxkfa6BGK>|vA&ckq2)UlujfPjW}Cwt|BdHuzajmEnw^?j z{tT0vp(z@VjaJbOie57gRO!m)qQ0hDgJ*TO!g#tSnB5FpOz?>2y8~)XsS{o^aVR^! z2C(<$8SO-Tpsm*Cp}{0TspW+CfkXfZf-0=Cc|A6#h`mAGI#jHa}>EGi4FWZq~j zZ@NHab~+QhMZux77r_x~TmY>iX+hSVYn80x1{(PzKE`R@<>!KX&w9&qyJ@dPOI${G zd3-%gV|QLRZUWZOOwx2-g4AVONraloK_J03O8{&r%YzdUuTnvhIW$I~KwQ+-0YKu* zYx{6&y}I$QWT_8Q3NE5C-2!2zF#7s%5VDD63TSfHoU`U73#jOLfvsRYNc|a3?*~@a zoDmB*5_zLLF>)gn;xmt)&y*K)EaILNK*r(B>~R~-cfm+uXsy6zH#+Zy9ur?gO{A*>L%_x+Q8)sTXbj(gGSYU~?&~Sr14zr9 zJ3#nW`(TuhztW^4oTS4U?H?_v<%h_Zol@4vyowdq3z*rIgYC^v<%#y>x`@y6R~lAJ zB&e_{HazgCU*x|YX4JcoC=Fl>T)Qa?lyIG~@N`ky2=$3!@_jowY&u5Fz?4Q{fQPf} zaRsAPIm7h#J|AubfvqKdp9tul6LoycAClP;A0l%07mYRo=H+>gwfA8Mt5?1MxbX|EN9#dmwv`h}c_lS=;PMy1Fwzfw zStdp3JgXqsC*P%v<_lrB*yr}Frbdcj-D~my*+?zHynYi6#lja;q+?!z-viOi?{{cG z8u_>8mZuZs3ku)HV@&tV2OBH0{>U8Pp>o0u2hAkb$koP&yim=6=SL>GZe6od3?Eey zSQ(QMwjj8F|MGE_m1${R9Cwb)O1_-Q;g2}h>M$v{fhx{{mh&YI687$MEMfxF1NHK~knGCJ|3OVIAd9UyRJL%wNLo_e+! zxkJq-bi^I{_0Tx0WDfK0ToEGlS0mI4wHYI&^-kGMtI#!H{hEL(J~Ld!(obJ0XRI|2 z|5v|8OFj^%becx;#-jrXO?JvA9(6%QAgc*`-HVIwZn$Cgnw=)$EDV203*Q%j15SaO zQ*O|vbPFAmL+FHH>wl89WY2|!J!I{RADA0|UE}o}c&^$xWaYQXb9wKSEU4x?kEEQN zKWNN2pcp6y$Fqu^mQ|pW8snTYxHU?oJZXPwU*q=W#qX`?brjU0 z+z`$?l-)O<_{}CVZml@&Dp_DL%B6C?;~De+ z^)F39gz}|R`ANKWqcK$Z38AI|;W84(w}jQ|6FS>vd0&J@jHM0r^e(+1lpe?Rkhq9& ze=_2lE_)f~7*Hd#4I(^&YrkG^&06o9Pzz5XBwD$tIt_$(hGKTe$Jqz+8--K`WE*@h zPqn1ZmjECJJIdA?Y3Q}Il$vV+u-erXd13|Oy6abVFyzWPlC+ur9F9!H!sYmruGB1; z-*Yp0m*P?FFJ)xNm8Zwn%92JfCO+U1u1-lwOoY-0MB3o=zf=T#piw0U=MHnh)3=$$Asvy+1qqtbub=p@6^^c z?g~#}No0RCa8f2r^18fFua=*A=_VuSjFyVcRLd*itm5Zu5w2q(avp!Z?>N2zm4sJ( z@9*C@(1r_29!`Q>Ac{Um3n)Oq*?SXXW?8*P72KUFi%e`q1fHp}n*eeZ_=pICd6`Vt zKjn+wdpQ21LX6dIwWryqLJ^{Qt`ykej2i{wJnsvVbKx1y%9gqlKzqaylgGX!V|wb$ zNVd3uyjEL-e9qqP_|iT23s&liKULS!6M2X7X7NNxj6`vU=HMp=#UL?~RS;D0aVT}I zV!*W;$`{Z+O3cY4#=&eFL1+QCN&Y8-_Nk`7gLAcZ6qr2W%pUC*=>`Uf?F_6*v)ZF= zD;~@9FK?FVHxfuVz?48^c%>JJ+9V|+wv1R$%N4I~DKfEJkX|$CVdI}dy&6>`$*U3f zd3)2sgOOW%pUv{y>d9|K8x%F6=so^xhTV>wj`-#VGqaN%}YY$u>f5_^@U()`5Y)WKuPq{Toqy31E`#0v@K1-LIEY9v z)teV9c7!T~_r=_?k#k`I zTJF{9ob`pAKG@eej(ytidgMo|Tm;x@m(%R!em8aHNe@L{-hBRo*(N#t)c&?qjX50r zn=)p?j?EzK5Ud+)q}Y$l7}4PpCVp8{ll580gg5bZaDmkDeR{-V7j5Y=Lg&a}=vVu8 zhc{vp+e-N-T`bn&34JPDUS9tk*^Y}7!s!hoP)SwwNC-MCYW-Lf+WZ-4sh=XanK7-G zSdc^I3eFN#u$}zDB>yAmZQNpJ3lfjAV(c)F0;BN-1kD25O<|3@KvgN+|KnSuxFQwg zml;MHMT|m%c26JV)zdAV6Q;1R;fo%f8OP>~iKYn$xr7t*#QK%<5_KUTd5Fl{V%*Fx z2EzM*#jiGZ|AL0}^mAho5JY!8Y3kw8`d=jfwoyR5dG%{hQwQUI{iT%+FnStqj<9XrN1hyv^#9%+c50>&#GmgJ@tSCu3 z4kg<;D6j(TLWGy5OW!DpP>#O*y!SHzf8`CP^RCvknNA5`=Kbu)RZNw!8V3W7@vX2_ zoa`ZRdP#4tYGs%~v2$Tum4Vg&umAaer)UD9G+;d&eT*!$l-yTF3*oU6EA6wpzz0sG z-DPc!$emUo1YakE%zSdou%L=PNTF}(un7ik6n~ViAD7;2+F$W5K6%()LJ!)oCn|-v zK=@uGg)BPr-dnsjHv_ROpbLhchEJWX7QMp&=y=WXUbUvm^{lXKG?w>hZ-)vla=~n7 z%Wkp3Z!sz-)t;wqq!SJ>XnS$IHR0|j=9Su5&Ov{sA}-bm%K{Nfl#C(ebu}QEoP$v9 zsOWMKR^O<_C%vu|^>&fyod;W>9>HJ3jrkYR0dt^~1|^0I5k9xh7Lwgp*k){~S6w~A z5KQhBS9oxWv^CaSbC<wMha!do?}t!7>0@6iOC$WegjBl!)pZn1^AaMH{|1+EU~ zH(v#v3IDSP*j=jibSPhU#Ci!)T;Dz*t=+miCFw7Q7NrMxuJE?}P6Qnqfq|;*L4YKP zjqxSowi2JlOCMn=BHodXw_~m$_{dw`3I`cHm=~&rT^Iwr@IKXnFis3S#pHCIf__c>^m5DF=<{ zOioGui!h_anYf%nRk}!|V@#Nqv2CDQkF`|hF{qMEs74r4WoveBOg8mIx~vo&b$y#q zB%iflC{;-IK*kBDnte#;)W+A`zxi1=fz3t7Vk%)^M`z8sGxQe6A~>pe@grj(KMvS; zZ5017JF7qm1G+=4@uB8qai~R9%iDldN}43WNYl1_4 z)Fu@}FWawykDPYHOx+@>XUpm2Ipq%hhiljeWH4nl?QV<7zR>rYXpJ-7>{4J2Uhnp1 zk*GLMrImmaJgDo|T;h}c0u zw2KBqWHx9vvAcillz^UpS=w1w^4i5uyrW^9LCH_P2AZHd_#d6v>!^JrBmPLG6%5g9 z^lNENQb8uS)&+mZK@Wdnz>m=lR;TzyJ&2oAk4=!8qLPCq%!$}!b$Vi>qL9jEf%RXw zViwNhf&5M!|17?yyo1gqREM1Kr$Vr_yq$!pkBG7z1WIJ6*D&H&U<>r{zq8Rg|Kh(?}=(Z#r{`Yukort99}fmF{b zQ~)DnNW8Er3;F3ySI~Y)15>Xgk1xNDHHIqim%-a~1%J;N4fd@3-vkfIquZPbznMm_ zb~GbznPo{3p83Ux_$bBh#P8Jv{g!hEuR7Z}yt64$)wU_d-U(|S^TKj2+aWWs>n>;E z6X+aI;oTO57O)vKWI|n&-ZRfQsx0qIy_t!H5%xp~Udg6GrHAJI?}+REok&Y_eOgOm z-AwL7a@VY9y^s^@A)SKNtbn?uc?Vwk!zL#FkP$h7Gn$KD9WGMai`0_nd&Ws4|q3te^>8;(@Z-(H2EGpLAM9HMhTH|+E%Ol(NTJ6g3Z?|ln$G&oX+*6JL zSJQWtGBqRTz_4rjKo+(9xU8r-O_0rQL15RPOUu4$+^$N?Ses3_Fw;mI?0q00A(P&I z&*SLY$wTKF9ln0V+HI+&^AHJlbRF^kZ-UgNL9_0o77Qz)A^At|gtf8^=JjTe|6hiF zobMXliBY=l?XxQ*a2b>5w^@Gp?1A!T0#eOt+wuDON3-f{VKcb||_?{+5nl zn2N&((eR!RjJ{=ajNJf(Kgm9vTe&MZ^VXoZ&YDj?qXrTq8N7J0;9#Tn-Tv|SL!b=*p zvSjlzv*79{187n`j%{dH+ee;GQA)bXYtaWf3^A2*MZS$GNYohP33}OT2KOBx)@i_Q z#Y1Mhu}uZcI0buX?G$gxYVz`d9+zu$C-!5km^?jK$&Rp8c*S^SLb0hD+AOXLqX{e` zjwEPws7N8Deq#|+>Y8hsO>`78KRIKHimCTTdpR%X zhIZFeg+?U!j&Sa#hm)`4=^mF%PiPLyQSEU5;v(>iPD){P;}T6-$OSF!+dd1Ocvu== zLm)C*yNG>-!p5#Rqlm$2+YnHx_g(j!5&@Jk9V$;Y2lojm$4}7NZU)h>$*F11`m!Ca z2hnwI*g}V`Vxp`&nE$1IVMBvhOf+(jG?>GiC+skzEzo?$d=i=BX$>a7geSCE(zwlY={w*?NA+0==eRsokQzrR&k zjMc2&BJE7?&nsj}A#Tvat$3JnE*`n`C$8SHn++vUBIrpC=Y~m1gQfVj(-h*27s1^F z+f(-|rq1@0<})e{zyDV8zxO2qf?=Ebl{>P>=|eu<@deWwBT*P5g<>VERj|=zzYw6# z3IT3$aFOC~RIAF6A6w@ElVg1t*J%+=eywoQ=kw@YY5$y>KT zKi2nodPp-ycE|+JMXNJ4v|VGNxVxWMv2ayXP-m~@{Oj2EyqNgwwI^Q-Eup?WIwQ8V zP%er`bB7gq6jr&?xH}zFmqO0D1)bn$qm5RQtCDliEkQAsH=- zkT!f7+8`VPHK#ehbr5^)OnWYy1=N45K)ynRUM5p*nr#Ghq9C!Z&L=H?mE2LzS1l!| zSQJDdFNf6o9+PjQ>?7;ds<1Pyhhu{(UyCi>xoA#Tczv*hjIZgyg$jsUV`R{Ti3JjE z=!Qh@LDgh%Wr_G%RFyy8hH%TtU5Fn~5-DH%lv6R7t2B4hLHZWKe+?kfh$W8w3={Hw z851-q9;<~8AegCxaX>zLnFsU~b}!5bv(s6V1BAcYo^`uO0E8=2pa{^E{CssCX{z|2 zpBG5K>_j-*I2(z0KuxJrW3$vm-tX8-t9|4Vr-#@X)-(R`fHvyW zC8OibYJovTq5TIoJ#FsJAFAsWo7Cb4z>dx{I-Vkl>PW`Ek+`c`XXy?TW?%3acv271 z7q-6%4;Hd5nl0Mg9@(DvwXaYITRfnTqIljgFMu!&2lN9!N93~#9*d~3m_r8+b3jW= zKN&Y1w-Af1*(=mWL)|x|T3a^U_-xnJfW7^)#`?38Z-icS0Z0gk1|!#0$@qs@J9Mp0 zRgfgre!dZ8&us%&lnFQ*+c-#A)&@|p|>Y-bTBtti*GpS@A6BmP#u;C#JB5+Ab z<3X2r%>*#me|z-n*XihHqV>W|&2dR?)VwsA6n}q}I}Hq#AqLE=M!`xJJmziqmueXvxRC4ZCXfS;LTf$4hkeTkC{3Jg~&d# MH2u#ebr62(e<=|}EC2ui From 05ce0faabc6494ed36b7eb9ba170c7a3adc76c74 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 9 Nov 2023 10:00:27 +0000 Subject: [PATCH 100/113] Rename VERSION.in -> HPCOMBI_VERSION.in again --- VERSION.in => HPCOMBI_VERSION.in | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename VERSION.in => HPCOMBI_VERSION.in (100%) diff --git a/VERSION.in b/HPCOMBI_VERSION.in similarity index 100% rename from VERSION.in rename to HPCOMBI_VERSION.in From 3508231a53fea62535e3d9ffd8e42b50701ea0e7 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 9 Nov 2023 10:00:42 +0000 Subject: [PATCH 101/113] Make code-coverage optional --- CMakeLists.txt | 17 ++++++++--------- tests/CMakeLists.txt | 19 ++++++++++++++----- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dedeade5..9cba68a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,12 +57,11 @@ if (NOT HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) message(FATAL_ERROR "HPCombi requires a C++ compiler accepting the flags: -funroll-loops + -flax-vector-conversions") endif() -# Code coverage stuff -include(CheckCCompilerFlag) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}) -include(CodeCoverage) -# Code coverage stuff ends - +if(CODE_COVERAGE) + include(CheckCCompilerFlag) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}) + include(CodeCoverage) +endif(CODE_COVERAGE) add_compile_options(-funroll-loops -flax-vector-conversions) @@ -115,8 +114,8 @@ endif(BUILD_TESTING) ################### # pkgconfig stuff -configure_file(${CMAKE_SOURCE_DIR}/VERSION.in - ${CMAKE_BINARY_DIR}/VERSION) +configure_file(${CMAKE_SOURCE_DIR}/HPCOMBI_VERSION.in + ${CMAKE_BINARY_DIR}/HPCOMBI_VERSION) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hpcombi.pc.in ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc @ONLY) @@ -130,7 +129,7 @@ foreach(f ${install_misc}) endforeach(f) install ( - FILES ${CMAKE_CURRENT_BINARY_DIR}/VERSION + FILES ${CMAKE_CURRENT_BINARY_DIR}/HPCOMBI_VERSION DESTINATION share/${CMAKE_PROJECT_NAME}) install ( DIRECTORY ${CMAKE_SOURCE_DIR}/include/hpcombi diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 29f2777c..5199125f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -42,11 +42,20 @@ foreach(f ${test_src}) endforeach(f) add_executable(test_all ${test_src} test_main.cpp) -append_coverage_compiler_flags_to_target(test_all) -target_link_libraries(test_all PRIVATE Catch2::Catch2WithMain gcov) -setup_target_for_coverage_lcov( - NAME coverage-test_all - EXECUTABLE test_all) + +target_link_libraries(test_all PRIVATE Catch2::Catch2WithMain) + +if(CODE_COVERAGE) + # FIXME the next line fails on JDM's M1 Mac with gcov not found (even though + # it's installed) + find_library(GCOV gcov REQUIRED) + # FIXME the next line fails on JDM's M1 Mac with "ld: library not found for -lgcov" + target_link_libraries(test_all PRIVATE gcov) + append_coverage_compiler_flags_to_target(test_all) + setup_target_for_coverage_lcov( + NAME coverage-test_all + EXECUTABLE test_all) +endif(CODE_COVERAGE) add_test (TestEPU8 test_epu8) add_test (TestPerm16 test_perm16) From a7b3bd21088d45e8191130469234c579a6c5f36e Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 9 Nov 2023 10:12:08 +0000 Subject: [PATCH 102/113] Fix install again --- CMakeLists.txt | 2 +- hpcombi.pc.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9cba68a7..0bd69b5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,7 @@ install ( install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc - DESTINATION pkg-config) + DESTINATION lib/pkgconfig) ################# # Packing stuff diff --git a/hpcombi.pc.in b/hpcombi.pc.in index 070c5903..9684fe82 100644 --- a/hpcombi.pc.in +++ b/hpcombi.pc.in @@ -3,5 +3,5 @@ Description: @DESCRIPTION@ Version: @VERSION_MAJOR@.@VERSION_MINOR@.@VERSION_PATCH@ Requires: Libs: -L@CMAKE_INSTALL_PREFIX@/lib -Cflags: -I@CMAKE_INSTALL_PREIFX@/include +Cflags: -I@CMAKE_INSTALL_PREFIX@/include From 0c5ea50e323dd814156c24e1e6647e9ebf8775ee Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 9 Nov 2023 11:33:00 +0000 Subject: [PATCH 103/113] Try fix CI --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bd69b5d..b2244135 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,7 @@ if(CODE_COVERAGE) include(CodeCoverage) endif(CODE_COVERAGE) -add_compile_options(-funroll-loops -flax-vector-conversions) +add_compile_options(-funroll-loops -flax-vector-conversions -march=native) file(READ ${CMAKE_SOURCE_DIR}/list_builtin.txt hpcombi_compiler_builtins) string(REPLACE ";" "|" hpcombi_compiler_builtins "${hpcombi_compiler_builtins}") From 84c7f292887a6bf98b37f0a2f235861de3534a35 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Thu, 9 Nov 2023 11:57:32 +0000 Subject: [PATCH 104/113] Quick Matcher for Epu8 --- tests/test_epu8.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/test_epu8.cpp b/tests/test_epu8.cpp index f85870fd..b9469507 100644 --- a/tests/test_epu8.cpp +++ b/tests/test_epu8.cpp @@ -24,8 +24,13 @@ namespace HPCombi { -auto IsSorted = - Catch::Matchers::Predicate(is_sorted, "is_sorted"); +#define Epu8Match(F) Catch::Matchers::Predicate(F, #F) + +#define Epu8MatchNot(F) \ + Catch::Matchers::Predicate([](epu8 x) { return !F(x); }, #F) + +auto IsSorted = Epu8Match(is_sorted); + struct Fix { Fix() @@ -133,9 +138,9 @@ TEST_CASE_METHOD(Fix, "Epu8::last_diff_mask", "[Epu8][005]") { } TEST_CASE_METHOD(Fix, "Epu8::is_all_zero", "[Epu8][006]") { - CHECK(is_all_zero(zero)); + CHECK_THAT(zero, Epu8Match(is_all_zero)); for (size_t i = 1; i < v.size(); i++) { - CHECK(!is_all_zero(v[i])); + CHECK_THAT(v[i], Epu8MatchNot(is_all_zero)); } } From e85f7ccb2bf926976babd6273201f8d94ebcb127 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Fri, 10 Nov 2023 10:31:29 +0000 Subject: [PATCH 105/113] Add -march=native unless disabled or not available --- CMakeLists.txt | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b2244135..819f2b2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,14 +57,25 @@ if (NOT HPCOMBI_HAVE_REQUIRED_COMPILER_FLAGS) message(FATAL_ERROR "HPCombi requires a C++ compiler accepting the flags: -funroll-loops + -flax-vector-conversions") endif() +add_compile_options(-funroll-loops -flax-vector-conversions) + +if (NOT HPCOMBI_DISABLE_MARCH_NATIVE) + check_cxx_compiler_flag('-march=native' HPCOMBI_HAVE_MARCH_NATIVE) + + if (HPCOMBI_HAVE_MARCH_NATIVE) + add_compile_options(-march=native) + else() + message(STATUS "The C++ compiler does not accept the flag -march-native") + endif() +endif() + +# Currently the code coverage fails to link on Mac OSX, not sure why if(CODE_COVERAGE) include(CheckCCompilerFlag) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_SOURCE_DIR}) include(CodeCoverage) endif(CODE_COVERAGE) -add_compile_options(-funroll-loops -flax-vector-conversions -march=native) - file(READ ${CMAKE_SOURCE_DIR}/list_builtin.txt hpcombi_compiler_builtins) string(REPLACE ";" "|" hpcombi_compiler_builtins "${hpcombi_compiler_builtins}") string(REPLACE "\n" ";" hpcombi_compiler_builtins "${hpcombi_compiler_builtins}") From 229f845e8d169c72ec46af3919877d707a57c2f7 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 10 Nov 2023 10:52:24 +0000 Subject: [PATCH 106/113] Fix install dir --- CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 819f2b2c..8e921d34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,30 +126,30 @@ endif(BUILD_TESTING) ################### # pkgconfig stuff configure_file(${CMAKE_SOURCE_DIR}/HPCOMBI_VERSION.in - ${CMAKE_BINARY_DIR}/HPCOMBI_VERSION) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/hpcombi.pc.in + ${CMAKE_CURRENT_BINARY_DIR}/HPCOMBI_VERSION) +configure_file(${CMAKE_SOURCE_DIR}/hpcombi.pc.in ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc @ONLY) #################### # Installation +string(TOLOWER CMAKE_PROJECT_NAME HPCOMBI_INSTALL_DIR) set(install_misc README.md LICENSE) foreach(f ${install_misc}) install (FILES ${CMAKE_SOURCE_DIR}/${f} - DESTINATION share/${CMAKE_PROJECT_NAME}) + DESTINATION share/${HPCOMBI_INSTALL_DIR}) endforeach(f) install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/HPCOMBI_VERSION - DESTINATION share/${CMAKE_PROJECT_NAME}) + DESTINATION share/${HPCOMBI_INSTALL_DIR}) install ( DIRECTORY ${CMAKE_SOURCE_DIR}/include/hpcombi DESTINATION include FILES_MATCHING PATTERN "*.hpp") install ( DIRECTORY ${CMAKE_SOURCE_DIR}/third_party/simde - DESTINATION include/${CMAKE_PROJECT_NAME}) - + DESTINATION include/${HPCOMBI_INSTALL_DIR}) install ( FILES ${CMAKE_CURRENT_BINARY_DIR}/hpcombi.pc DESTINATION lib/pkgconfig) From c97f740edd55208df0e60c46cc5eeae3bb0aeafc Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Fri, 10 Nov 2023 10:54:58 +0000 Subject: [PATCH 107/113] Fixup Fix install dir --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e921d34..fbe438b0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,7 +133,7 @@ configure_file(${CMAKE_SOURCE_DIR}/hpcombi.pc.in #################### # Installation -string(TOLOWER CMAKE_PROJECT_NAME HPCOMBI_INSTALL_DIR) +string(TOLOWER ${CMAKE_PROJECT_NAME} HPCOMBI_INSTALL_DIR) set(install_misc README.md LICENSE) foreach(f ${install_misc}) install (FILES ${CMAKE_SOURCE_DIR}/${f} From 25e2142108e621aea2aa1dca7e333931f7e6dea6 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 15 Nov 2023 15:07:04 +0000 Subject: [PATCH 108/113] Update README + BUILDING --- .circleci/config.yml | 2 +- .github/workflows/runtests.yml | 1 - BUILDING.md | 25 +++++++++++------------ README.md | 36 +++++++++++++++++++++++----------- 4 files changed, 39 insertions(+), 25 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d0fa20f8..5a2f0cb4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: - image: reiniscirpons/hpcombi-env-arm64v8:v1 resource_class: arm.medium steps: - - run: + - run: name: "Set up compiler" environment: COMPILER_NAME: << parameters.compiler >> diff --git a/.github/workflows/runtests.yml b/.github/workflows/runtests.yml index e3ae23b4..4e1d26f6 100644 --- a/.github/workflows/runtests.yml +++ b/.github/workflows/runtests.yml @@ -49,7 +49,6 @@ jobs: - name: "Install dependencies . . ." run: | sudo apt-get install --yes ccache - sudo apt-get install --yes libbenchmark-dev - name: "Configure the HPCombi build . . ." env: CC: ${{ env.CC }} diff --git a/BUILDING.md b/BUILDING.md index 08d10227..cd6bb1f4 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -1,23 +1,24 @@ # Building HPCombi -## Build Prerequisites: +Note that HPCombi is a C++17 header-only library, and as such does not need to +be built. The instructions below are only for those who wish to either run the +tests, experiments, examples, or benchmarks. -- CMake 2.8 or later +## Build prerequisites: -- A recent c++ compiler. I have tested the code on - * g++ 5.3.1, 6.2.1 and 7.1.1. - * clang 5.0.0 - * g++ 4.8 and 4.9 are known to be broken (I can fix it if needed at the price - of some uglification of the code). +- CMake 3.8 or later -- [optional] : Google sparsehash/dense_hash_map, sparsehash/dense_hash_set. - if not the less efficient standard containers will be used. +- A recent C++ compiler implementing the C++17 standard. We routinely test + HPCombi using: + * gcc 9 to 12; and + * clang 11 to 15 + on both x86 and arm processors. -- BOOST.test (shared library version) : needed for testing. +- Your machine must support a small number of builtin functions such as `__builtin_popcnt`. -- Your machine must support AVX instructions. +- [optional] : Google `sparsehash/dense_hash_map` and/or `sparsehash/dense_hash_set`. -- Doxygen for generating the API documentation (in progress). +- [optional] Doxygen for generating the API documentation (in progress). ## Building diff --git a/README.md b/README.md index e431e19c..0fabb0b5 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,20 @@ # HPCombi High Performance Combinatorics in C++ using vector instructions v0.0.8 -SSE and AVX instruction sets allows for very fast manipulation of -combinatorial objects such as transformations, permutations, boolean matrices -of small size. The goal of this project is to implement various new algorithms -and benchmark them on various compiler and architecture. +HPCombi is a C++17 header-only library using the SSE and AVX instruction sets, +and some equivalents, for very fast manipulation of combinatorial +objects such as transformations, permutations, and boolean matrices of small +size. The goal of this project is to implement various new algorithms and +benchmark them on various compiler and architectures. + +HPCombi was initially designed using the SSE and AVX instruction sets, and did +not work on machines without these instructions (such as ARM). From v1.0.0 +HPCombi supports processors with other instruction sets also, via +[simd-everywhere](https://github.com/simd-everywhere/simde). It might be the +case that the greatest performance gains are achieved on processors supporting +the SSE and AVX instruction sets, but the HPCombi benchmarks indicate that +there are also still signficant gains on other processors too. + ## Authors @@ -13,12 +23,16 @@ and benchmark them on various compiler and architecture. ## Contributors -- Finn Smith : discussions + BMat8 reference code -- Viviane Pons : algorithms discussions -- Reinis Cirpons : CI +- Reinis Cirpons : CI + benchmark graphs +- Viviane Pons : discussions about algorithms +- Finn Smith : discussions + `BMat8` reference code +## Thanks -## Acknowledgments - -- This development is funded by the [OpenDreamKit](http://opendreamkit.org/) - Horizon 2020 European Research Infrastructure project (#676541). +- The development of HPCombi was partly funded by the + [OpenDreamKit](http://opendreamkit.org/) Horizon 2020 European Research + Infrastructure project (#676541), which the authors acknowledge with thanks. +- Thanks also to the + [simd-everywhere](https://github.com/simd-everywhere/simde) and + [catch2](https://github.com/catchorg/Catch2) authors and contributors for + their excellent libraries! From 3d31e5caea34b3b370402dd2da67b8447df2593a Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 15 Nov 2023 16:22:44 +0000 Subject: [PATCH 109/113] Remove return type for void function --- include/hpcombi/epu8.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index b2eadca0..7eeffb02 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -128,7 +128,6 @@ inline epu8 sort8_perm(epu8 &a) noexcept; * @brief Merge two sorted epu8 * @details * @param a, b: two #HPCombi::epu8 - * @returns void * after executing merge, \c a and \c are sorted \c a[15] <= \c b[0] */ /** @copydoc common_merge From 2e76ebc72ea00f60adbd17499a3b9bbefd12e10d Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 15 Nov 2023 16:44:32 +0000 Subject: [PATCH 110/113] Add script to deploy the doc --- .gitignore | 1 + etc/deploy-doc.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100755 etc/deploy-doc.sh diff --git a/.gitignore b/.gitignore index 359907e8..05cdd11f 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ TAGS build* +gh-pages diff --git a/etc/deploy-doc.sh b/etc/deploy-doc.sh new file mode 100755 index 00000000..09496ef4 --- /dev/null +++ b/etc/deploy-doc.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e + +if ! [[ -d "gh-pages" ]] ; then + echo -e "Error, the \"gh-pages\" directory is not present:" + echo -e "git clone --branch=gh-pages git@github.com:libsemigroups/hpcombi.git gh-pages" + exit 1 +fi + +printf "\033[0;32mDeploying updates to GitHub...\033[0m\n" + +mkdir -p build +cd build +if [[ -f Makefile ]] ; then + make clean +fi +cmake .. +make doc +cd .. +cp -r build/doc/html/* gh-pages +cd gh-pages +git add . +msg="rebuilding site $(date)" +if [ -n "$*" ]; then + msg="$*" +fi +git commit -m "$msg" +# git push origin gh-pages From cd71b47e680a7fd5e402bb194aaa413ceb5c1dc6 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Mon, 20 Nov 2023 11:16:27 +0000 Subject: [PATCH 111/113] Optimize links in README + spelling --- .codespellrc | 2 +- README.md | 34 ++++++++++++++++++---------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/.codespellrc b/.codespellrc index 4e78859a..9235a942 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,3 +1,3 @@ [codespell] -skip = ./third_party/simde,./.git,./benchmark/python,./experiments +skip = ./third_party/simde,./.git,./benchmark/python,./experiments,./gh-pages,./build ignore-words-list=shft diff --git a/README.md b/README.md index 0fabb0b5..a772e30c 100644 --- a/README.md +++ b/README.md @@ -2,18 +2,18 @@ High Performance Combinatorics in C++ using vector instructions v0.0.8 HPCombi is a C++17 header-only library using the SSE and AVX instruction sets, -and some equivalents, for very fast manipulation of combinatorial -objects such as transformations, permutations, and boolean matrices of small -size. The goal of this project is to implement various new algorithms and -benchmark them on various compiler and architectures. +and some equivalents, for very fast manipulation of combinatorial objects such +as transformations, permutations, and boolean matrices of small size. The goal +of this project is to implement various new algorithms and benchmark them on +various compiler and architectures. HPCombi was initially designed using the SSE and AVX instruction sets, and did not work on machines without these instructions (such as ARM). From v1.0.0 -HPCombi supports processors with other instruction sets also, via -[simd-everywhere](https://github.com/simd-everywhere/simde). It might be the -case that the greatest performance gains are achieved on processors supporting -the SSE and AVX instruction sets, but the HPCombi benchmarks indicate that -there are also still signficant gains on other processors too. +HPCombi supports processors with other instruction sets also, via [SIMD +Everywhere][]. It might be the case that the greatest performance gains are +achieved on processors supporting the SSE and AVX instruction sets, but the +HPCombi benchmarks indicate that there are also still significant gains on +other processors too. ## Authors @@ -29,10 +29,12 @@ there are also still signficant gains on other processors too. ## Thanks -- The development of HPCombi was partly funded by the - [OpenDreamKit](http://opendreamkit.org/) Horizon 2020 European Research - Infrastructure project (#676541), which the authors acknowledge with thanks. -- Thanks also to the - [simd-everywhere](https://github.com/simd-everywhere/simde) and - [catch2](https://github.com/catchorg/Catch2) authors and contributors for - their excellent libraries! +- The development of HPCombi was partly funded by the [OpenDreamKit][] Horizon + 2020 European Research Infrastructure project (#676541), which the authors + acknowledge with thanks. +- Thanks also to the [SIMD everywhere][] and [catch2][] authors and + contributors for their excellent libraries! + +[SIMD everywhere]: https://github.com/simd-everywhere/simde +[OpenDreamKit]: https://opendreamkit.org/ +[catch2]: https://github.com/catchorg/Catch2 From 4baa2f6a0d4219df41dcc0ba752e9efcae492f22 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Mon, 20 Nov 2023 11:21:04 +0000 Subject: [PATCH 112/113] Add -fopenmp-simd flags if available --- CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fbe438b0..f64c5834 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,6 +69,12 @@ if (NOT HPCOMBI_DISABLE_MARCH_NATIVE) endif() endif() +check_cxx_compiler_flag('-fopenmp-simd', HPCOMBI_HAVE_OPENMP_SIMD_FLAG) + +if (HPCOMBI_HAVE_OPENMP_SIMD_FLAG) + add_compile_options(-fopenmp-simd -DSIMDE_ENABLE_OPENMP) +endif() + # Currently the code coverage fails to link on Mac OSX, not sure why if(CODE_COVERAGE) include(CheckCCompilerFlag) @@ -122,7 +128,6 @@ if (BUILD_TESTING) add_subdirectory(benchmark) endif(BUILD_TESTING) - ################### # pkgconfig stuff configure_file(${CMAKE_SOURCE_DIR}/HPCOMBI_VERSION.in From b3f3973ec178c51750d0855a1b7db6532ad02f57 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Tue, 19 Dec 2023 17:35:31 +0100 Subject: [PATCH 113/113] Pointed README to the autogened doc --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a772e30c..369164dd 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,10 @@ other processors too. - Viviane Pons : discussions about algorithms - Finn Smith : discussions + `BMat8` reference code +## Documentation + +- The Doxygen auto generated [API](https://libsemigroups.github.io/HPCombi/) + ## Thanks - The development of HPCombi was partly funded by the [OpenDreamKit][] Horizon

^+yL1^uDjCnrpN3^MJ&wPr?tG=Dj zVYe>feIg}C2IBc(3ZFeMxCDyZg{@3%#lTOVe+{PI@LY1pnS*Cm%0oj>(4(6E0-aX0 z32?RbIhHf*wmDK!P=|m?En<2xYxrf^MSkg_sG9x4iLf}7tw_y|VMK@-(fB1ltv&e* z;N7qW4GbzYsDP4}L_5#r7t|xw+(JX{@vdYEhx%0lSCVCl3k~>X`D+n=1sV9xkmge1 z>iP@qG+|XZb+XVHt`-#G16J!ICR4?=LKe)ExT|Ex^JG1 z?oqz&PN5MRm6lb2f|g_HtCR3p+-L0tA?RB!6|=!fot$cTt}P4vT#F{Zjn_=wgB`&R z*4e3s_p(g8_ZOxD`(j`Z57P`jD`%0Az&BSdonnwbFz){}V!K%Qco6bi1y(1;7DSdZXm^Ux)H9n%`o80EjDoVs`Nwi6H9*~63VZ|l9@&nwnb0PDI1ja zY>44XYGh&vV(N(){XQKvv=FSAY2X6JnMRDNBMBO(ybYQbULcWDbOBo%xFkfR`McUS z-7pcg`SLZ0=2jogK+iqLIh*ZWtf6MY11rNo5IF2jGYre9SrgSt78!ETjc_&tUsjAx zJ6qmCr@gm1Tu!^+<}m!>tSV??Z9r`_GBSD@q3g-jkmw4 zu3;9?ZP`ggTeonOIJr(>IWlMl`F`twv!F(PLmwaOH}hLgH+MO zgI4b0v4F{OtD9`sIp=K=Yg>wt0$DZJ2$#j|Pf{V#dnLH$q)7u zslJ|r-r6hJn_y%u+-Bt%)iuZP?`X|jv}|QdR8=mVV}zQEZ(sncTcNyqRjWrOM%Y-h zoUx{hXgpF&-3kgvj$j2NXesd{|B{@luf&jB|2)7gbE!i{t!uYm$!t$fsFqXa8}J{E zg-T(DAK5H0&(09I+Eg*;zV9j*cGEX}Ry^Xf=o;5i=RAto2Oa#S3 z7(^H>gKMPhrEGtGcX_L2^HI)e$3igolZ8f@@8E`DU)+%j`IRDtkS%9Cb-L6D&q;be zF%>ti=jMSEGu4NS44A1i=)O&WvmKdraYICphSG~l1V)tBMiCy1_?6^ANx3PZP@l&k zv=SHBz%(Rn)1J_~hC#MvKdfw)B8jay?YGV@q?QNc23dQ& z6yV#R|3`pt+VY>-N!fozCn30U5BZ51*@&wzKy3&EH;^YH5EU5{D8`4MrU^;pTbH_o z_x%oFE5Gbm@?QbFwc~g2>?vk%tsNYUM0X>p7;^@vTn6XG6~&Uo8!bjp<>kO1k$M&L!BMS?p@3XiYlKfq%FIw*>!F0~ z*=$6PM&MRPifY(~xh>v~M!ma;Oc+$nyu3-^gX9UL8QF-CfN{7P#lyhCbNl&XBRNBr zEd_|OJ#RqSX0ERG<-V6T7PVL?RYDRY^-r;C{t`n2(?3wN3-}~cO)oQ|WyR=kMr5N@ z8cTTiLCQoHKRPYl`vyr_^?hI;$_q$<_98%o`n{mc2+N+B7pD#{Gnm17D=`#r74zsq}ey;y(kmNJ3T`}!jZdz| zAd2RFAM;hal+Wgsd=}0A0}7E;DDa87VxD%! znTE9IfsfRZwLAbrd&o@*etDMd3lFk=?xb8bb*)j5nyc=7gkPJl{VqA0#~NLC)fLJz z0t4MROY=%x+1YK&(A-{Yplq*J8GOyvM}%7cLZD{IHQCgdWI4c4BL``BraD+@kocog zk}~2kM8DVlce#qNo0k5cz}Tfet2AOnI&gQl(T;>WECY*)rznxs{B(7LkV?umr2C+q zs#!<8I|Xkhc)&RT4j^?=W9jKfhAYjy(+0eOiQCA}Tjg^qclnsF$?FV#W05Ncfs!85 zh`xx;*cC#$>Tl8hCfj%2{9R0W)#BgBfI>s^5&p`w%OTmgrU@}Gxcg%)O*59Io7q2Y zde#|C=`?qA>JzrJ_SJgy`(QqLe8Uxz0Mm0)4lz`Z))}F3BV8c?0Ru`NV)uaLXH`bH zy#1oeh=|L=&48|u?EF0ZSCXrs7Hu#hN6HTYFIH_pH?yz&K89C*_4hbm)CG{uE>#0Y zCyfL26IBKsBDypMg_4IqFi4oPb>t4}4**i%-QNSG=Iex4T1JJn=fD6!Wdt#zW)ct= z4O;2y_aALAFn>uItS&bg8j1PymxNQ_fL16`tu}3d3Rk-k=A+t8urWU0WJKp7#upr7 zD%y6$WA^cu%|<4;O^l+on*p-u2oZ@$&d$%mrJ6}RC9uG2uD5$+vTlUIkME)RrO_!A z_`&0i094#G0ORWG1~_AET7lWevyBGlB3)H&g54<7{B_GFH0YHE?ccBg7WF10U$ty9 zex*OZ$1loG6E`2QqgEK*{G$H8blS*g5lL#8BsDwg-2~8Es|}<~Wh7>sNYOaQp*POL z<~sn#KI@we)x_TVOLw)AneW&7UNxXTuLg+r`P`3dO8M+w%DvpbpI&tBwgOwjj!Wk9 z`}PVxOF!bXX*+S3D!}UU215!opb8Vx0RvVL57WUbgXb|?Ui#UGas5|t3V1}2cOJ(a zLnIuGKfto?G!G+nr4T7^xWcp|gD7pLcl9H#;Ns#-vVSz!ovyj_`4%HIA}^b@7e=S# zWTs`oI$~C@)r-j3cLRcvFPNpsEf~TVu2(79(2rZMkXL8F&OS6 z4R>UM*-sy#RoxT_SNSYI#3Qu`Cg#WXb1-AGVZcA#zZW1R7fkT-c?&ducoc@$;| z!}a|-m`t9TWiqW|4&V4$`lAwtiw6)&`v~|b7?Ovd5X~ExCwlyL19i}_g?ai=*FQF* zxx%#>m&Iqs=5Z&sE|AgWBz7u}?EuiRDmp?XwJN?cbMF-Nttv1Lw%KsB4 ztK=RJ*e)a;8ZjtlW?C;qF@x29M1H25A)_e5c!6zty7{%fWNKWF+OfkBp^t^6rqtp6 zgF`Sey%9u`&V`vk@8Bd++{?`U#W~TO+*Q1eyOhQGGa^# zn#fyRvT|V$!zFkCq55_Ke)%52cc#EhMwR{q{Od3_~CV&vEVP;&94;tv|IwKTAgD=AN^;N>CuVZ&5`}wRR zO$=<;m?$XPbp~uf5Fqw~4P(NgcF%4jAJx0~{i)pmSJPyUDb1~FcN@w0PcwfnX=VqV z^?WP79JOk(5rGmBh|K6Yix0XlF(yRUx$M{M35S%ry&J6EPvkG(@7)7v4FUHec;Z`q zHZwT9cN?SC9e&;W@CWES300;LTMrG9!jk%_w(nrsgoZ(5$jYVhPLe;opGL)*nyk99 z$MD_(s}Ai2)bbwO0)ZSWU7c1(*2Xgn8?@TL*9gN!YWQa^)DYOD%<5*@xULWd1Gc59OfbkuIIsr-wQC>P;ov7&0m~0VXF7P=h|mq74C^a^Txwb?owa8Tn~N+$p*+g% zj&WH&N(`n(MBCa;&O_bwKLXZ~*0YB`7`%+;59aYaDaO2TzTdE-H()R$ef&AH$82}iJl#U0yr4hCp#WlzARd&%Qn z+{0o&z+wl<*{7K10*&A-7GPK>M19c!phb&uf8ziim5^+!?~paO5PfawOQ?gA-m~`` z2&;?B!X|;lze(&I^e$>JBC}FFo)vE;lUWV2+p5Lvx3wh!gQ+~kn?4&)@_I%y;fFAE zpmu>OX@CP+=9P8Lu>0_GcCE(?cIpH{_1ug{a~SXTgNLs3m#P{J>U$w%oG}x9rI)nH zlDmbe`d@A^Tq(G81x8!*l1Ubv1r_~zL5-h-MrcC*IFuCP^2xT`Yw7o4kefu!JZMC* z1EB$&bXWp%g175N%{ypB^W#zI23fF7>BZW_k>2*}MF)kN4879#V|o|ZTi^tqAx&RO z!1&ejgP>6Nv%dr3+oz7iv*b4au7Aj9(KMQK@ScaOvBgJL+g zrv9X>&5fdQ`PQON_qX+F%7ofp5%F;&FfG}^NRHZ_f{|K}nIN(Mq11@=wR%n5n4sls^6fJ93b5o{+%<@(}|BU(xj^S(gO?ww_ zEUI6`K1h(Ge48{jf%?8gju6_{N1<9mLa_1_yVnce8X@t3u-c9gR>>tk`v6z(OL0R) za-hQ}N4YJq4#QY|oCv0wTh$!{q>kA?0F2sV;;(aoP36}z5F+-;F_Rp_fHTY{>FP0? z_jMD6vLZIepb%-Mua@=9K=4$dHp(&I7)e)jYb6`fo=$NpE7lUTx;}PL)Qd4frMPhf z+riUjD%B_PN!JGS`+b_9;<5%Y{Sw>c=rQI6s=n!`G45fr*bdc+&*gaO*%(0`ug%4d zHPqG>S+d`Ys?JRCZ2Yz?AOfuE6gq5qK?;UL-Ow9Sau5cm zb&`Of?axmMPsYcaWn07_G50i}G*q&^OSPV&5H(-h!q~^pistk-2C?X~>v6O{*ao@v ztR08A1@{oV@Zb?XYuG#l-MYBZYgNU=Ks2bNs}I|Za3Y}pwXY2=+PAQUE=4-O%@9rA zr7rSSXCYrfqO$zUK+i=~WE^Imx+3_q$=}-W*#?Zzv*mYL79X7hu;yEVqr5EAl@<8a zrgPZr?tdu@-4cEUVY8zhW+j++bbJYiPb{rO+KO7mnH{H7U-jnv}*&Z#D99u zbuY`Y)HamI?Vw}t9m)$@vJ&#$nGzKlyqwhSFCl*Mfktp-DkhvG+DZuZXL#;yy~Jnz zMLzrR3-&#~0ilS_fp)ONMA7m!9E|)fA#`a7J!oZKOyE-6&Kc&-1paprPyy9y-AFjD zSU!ad%;w}Hzx!RIZMzctE*MZY;3-fiyMRj5a*Dv&bAE^c1Xk~V6WXo_k${@`Q!W~j zct`U=K5V~c+5x3az=szvqK9tQ;YYX@Dl@oL@kPBt)C<0Ol~cSNx!dtKA+c&>*#fXv8rk-rIZx7-IiO|{*2?l?#j?c!Yyb=M@`LYojlB+g# zLOvb7YD6dL!XC1=f{CLVuK`Y59ojnSES9ufD*v*wUnX|!NR8?Gs1q^S$=)=)K`D9f zr@&hI`cnf@t@d06QboNSR%I7a3-P8^X;!E8jTR=dJ^4n2cHqRThO2VK6|RfVhO;FF z#Q-60bu}$-j=l#uzY0F;6B1^2c@%JH-oqOnYUh!cY1Y+;BN#lfl8;_95b3Eo*jjBR zn3FY*HkF6ehU@66VJX48R0{y2j|8!rqt*ngrt8E7@q^<0&#&u&3=q3Y;lWu;@~Z(W zmlSK|UegQ5!VA|8T8dM|5jGe)22-!pMy}F^TBh}fizT7}6gQ$!c~?ANY3dL^XvPOP z@;yWyHemSd5`@>eZv0+b1DprA^bPm2%28hMF^*wbgxN+$wy6f3Q60sbZ_f2NHYQABlq~bhpQY?^M)7i43mgFj|llEhi zB&C(NdvPXXUxMH4n=}cS2aC8Yig1wf96{D>A;{WlfP~U@r3*A!+|95Z;_@<>N4j4z z!NqEgLlN;5h4c@>zy5KvGe{Dp0U+Fa@;>N53pi1tdyN`X8`(J+n%;KE%T?39m>#niqrR(l_4_X%A;22Bq@LAO@BUS6} zyo`cRfZ(gwP$3RAFIghf@1gGoP!_lg233MV>wccQ%q*rTnZXb8=+M#m2Uyv&?-`y{ z@7jleQTh$DCL<)3Rg_apuInfFEMZ27zh|F4Pnk*SvZTDK#(RceE-j*TjG)9~ufRnB z1^aC{ffQOpj%t2o5b0stc^{(Y-hC(_ywV1n7wuG4Y}HtdRs*<0C5%r);xUSdjZHxv z2Fc>U=(_J4VbUlrRVorupy^yDpDJlV9r4Kzg|`MLp>!nTA^lT%3sm_-IkH%q#ATT| zt&;9wZu(Yo#HuyKw0$uXZ>#6=*;T}6>smfbW-)cxr5=E2o@`#vUS)ds)OMa_TUHIi zhcN2U;;l^sHYc644`n%;CP6sPvc&-rqla%6{~Vgat5rGa+EJnTOT9GcF{KgQ`<)WM5pA^x_Dk5 zgUWlBq*k@#G1NO4-bcXamkV1m-fG8{wn&@aHmrLbSn%5G5{Cby7x2seAJXmzzRhuu z8-MOQciOb;wrE(&+~dciuT>l_qy(X*Zn7HawqrJTWNp3>Dga%?-(^b zs($Zy;9#}l=gaeQxyLE8vE_{`xmU*hm%dImt<2y>Isdw!9Zt#WN!#4@)PFn+V!BZ+ zU-DmXLA@pOueYFPewQr!sG5h|yVm%S@;^3|>5<>e^4HwlTSHNEaPBEM{zc~B|6H=s zWk~L?LjUok!2JLAq`>_Dcv9fMJ=9#Vpz(}d1-Lygn>9@Bj96+`o5ea{OVQ&*jA^|L0#G<&9qQuKe6{D(Xmv zD<*U<&mQFlR_>3I<8Rb5eUX)CgMQ4aIs8G*q59kIzyATloi)pE#>+8VHFz=iN%CQnFN3MSRNyZm<@BK^vs-Do!C3N-h!Z1tz!jpt}ID^EmVu}n|=DV=3+ zQE%dt_a^@9(-zSc8vgs-KKiG;+fr^_DED`k*VoElzVK|vrn9;61aJOJCgQmDP<4~W zwN6c@g)0=iYSrZV>)+pMwZQbkUuLHBK%UYrG{*RM4G>zi@p^P{dZ z{_DGO7MQ+P^A&nptvMt8RBi?Op2>ZS4XT-yJC^1ipPDjP8C~kp*Leq82t1jK z)dTWP-h3i=_PF~0eKlvVmWhPa9Od5parf1RJhf@^It=xc%1VbJ@2xOBre?&hW<<6K zzPYdr!Dw#o=KYtq1S~MEaTvStrSbFRXmqeVs?NP=-;`GUxl~gbl=0l8|LsNT3rxQ{ zjCpcs>XHM2dOfT><#~;YE&{PQ5ty_YE{% zdyR3lyf9qd6&t_!R<#JDMJhLO*`xlFoe1&sOAUG1lDyX4bkj0}uX0~tdU}~rqh5)N znvm>V@}$5?=Yy}PovTlEWibUmRr6r3S|E2ry0`)P%S(-Um}mat>Sf0K@tNYUu2p>` zRAbrXxdVwjgjl!KsFC%+l1G*q^0uzrv{Zpi@}t~0ZzA{2hZ#t5T@2@b8GKQ`jlTx& zp})sp1D8>~q`$dYZS;BaV$y$|{dqUWAC?)r^2b;D4BlDO^xAS`{2q$){^QDYf$2J@ z^cqS^uQG2RtIo~ybB`|e!;Q-1wlC|YvwuFa<0C=&=$++8jcLVlX&rbm*Lvgw)%w3Y zU$MaCTyFg5vl9zUFRIqQS924cXWGyw?+q8@>+@OjQ@GZ-UX)buQUGlEqfQ3e!0&0|9iMAlfO=8(X$q-CZ1b0m`rgx0&R9D>YN(F3qhC zY8h=rzPE(5n9Z*_v^M+hVY78mD=Ic@Mp4n2*_u&5w;A0rJ)+gQjFOPDbwsPP+HAId z%aGx)g!5gx%ZQnW3;Xr9whqIo*|M5Pcj-ggs1dNVx{M;9=FwVu-TCRsO-=Hlg0aArrX?aj#+!US_4`}uQZGbn_B)xC;nv+o%Iwgr zPR*%x7RwAuU%54x)ZO}j{x+eF=mX~e{C)YTSq6+uu(jQ7_L>7mRaR@Ud1Tol`BsPC z+vqpKmVP5@u^JhfG5eG@w`{Nx!>`$*W*Kw?vi6Id`LYP=Cz<~W+n9`@+ET;l>1j8z z=4P*6_iIHZ&Gk;hE{*&JcFUmN+?dfbn$Z}M zEuAsaT7zM0$XA(?cbF7H{D|!B}1_FDL_P#2B*<>24Xoy)ucS;_gz}rPLJU zkHK!)!hWqKF5}qdF+26vp3d%4zYJ#CteRb`w#%+t?T~pMkuW0@^60hJaGpQE&my~w z;WdxuXN-Pxy12Qlp{lgemXY3M?T*TDkeyxa?lMGWMO4^YJ?84Fs#3e*oZv9xc~0GD zPMTdtT!unLzaEyZoqE_3oimg-kT*8fIVEULYO?a;Ga|Cb59l$?S&+<&vk!?Aq zj%)gGpC><#xxo0GWyjFSO@ez%~#N`$p+Ad%wvXi zSPtJV-Ke(~)msg#SF7j_>7!=ZfLd{LbxobsT2ohN?e?104p`zb#|@9{B|#YrLz>at zomIP9SnM|!S+iP&jNY{7*F4s^nrux;PjhQ=`G{8JQ2*xEfK9Eqtky5yt0p)SAp-TJ|QxHlQ^-_HPp%q2BROW5Kt0_Lhnezi}IGCny1jp-%6g0NPcG~9+QYw=lP zc}W?|5}2+1x>K(j*8IAhU-Ut}Y$PwD*95eZ(ttK#IL#w+`pD?L*0^RHlpn?{4)d@a z082;AS{>rq{~?JYw2A>`EsCd4(d+Jn5@SkJ!6)wG$8xms0^lv z9D&l-oR} z)12mnR%SPXd1)C|32j&#(|SGTunZT!oO0U|=ByDmTGdgbCn-ZyR=jykb6X?%0o`pi z!)o=Fc~=!&+QXQQT)Zjn+PkLoc!#t;OvFmS$^Ei&!1nXi;1* zifQ$6t;`{Ni(l&uYgw5yhvBpMwl?aEhO}IONCm^eOjeG&t*;+ZcD?c%wPvqJ`lO)f_ofUHvi=Gn4%VWBCcWPLw@C_Y@B4VSTV*L>5FYdClHJza^vh=MNikYtYE# zW%GPSTytC8mZZg}&hy%cA!kpSg^4A@dR$hHEiMXavZ2&rL`Isr%uX47x?D->K1)!K z=E<4M@J@+sHZZj}uw!EL^sMx>u7DQMy|X*oy459h^MEc_o^noh2nOXMsj<1I$7zn3 zgIaV~mzJm=)f)ol#1Zfjni;wp~61B$1V%6PDpM&Rh%sgSlzmj z)nvbpn>&44INv$JWo|WwWNrtG<&wh~G}pQ1gqyVZr;ll}uS!LcQ+^SahY5}enb|=- zJ6R6tdEWd)UOHb+7&6H|BPj>s^u(BDXrfOKPYTLW*C*@MzFEd>*Zgwcb88ukCx2+d zm^rICCV1pJ*d>=oLAf+{n`7pFOTU&eM`hR!%keQPyN6L}6v{T-IkwkI91e%fpnEIWD_JLJq6}y)07{n%3OxDN33>6a4aM!Y4Cr9m)5|!9Gs9%j?e{F=R#MF6P5p zq98CaYYk6G7KDqkRyhh41d3{E8XS6;Bd>i}hLSpg4_YeHaXmQA7nGGGDysTxQ&@m-rj#xcg zq;=uRmXUm?B`xQqs^JO4MRwgk z!Kcf0^W@RIlpdNCHzJc_y0gSkf;Yw9TGrV_INJ+9>lJ6_P9D`liYe*)J1v zFdNQG6bGiq3PO5XlZ!TA{=m$!9qe0M<6HMS@_llJ+cuUv3_7*y^7@F;mYkS0mwPo= zeurIa8PWXKwqT)0Pv?gUM~sAC+al*GIR@1F<*X+o!YY^E<~})ShbN}>?72y^b)?Xm zvGsZ<#kHQEsFpS-W&Fv3L=G|$EvEP71&ooYqjHrHE%q9jnJ#N~W`AK;wsY3-6o&GB z1p|g7KUo&qENJn|Sa;^TW#c+!bZ5<8=_M=shtnL?J$lp}HPmlXau`$p_3Hz&cKZuF zvQYA64a>nsrXts$vO+YUWlZ+^tUUOS$)Vh1wa@C1Q-`}KW{yvf8lEXWeXP)9MDs!g z0gD>yj{I0rN~SA&T7J@ytC0zre5V|70v0*$Oo*EORq-vtvfqTu<#Z)6CC*!brtg1s$Xk}nCnKhx(fTGVRgJU0`fS(nI9`i=}y@-ac{g zgk10>&FXj~58gw$>o23JB_dbVQ91Rd^%mb2DY+`j*Ao*4G`n1L z`SXSgLvn@N)6>(`*eLhKnxh^{3`^JQK}ATu21^EH#=SC6L9H)eJwOY{ruE5}95yEm z=T787ks;4)7=}lBmj@z7piu7nC0CSALk{9{s_&QId*x@3{O6E|6#dd7BgYvz-Ag(s zE$n%Qku5Ofs314Tl4?(ZJa{sO^^mma(^}<6IY1b4>XdtJsb?AFQ__q?o;*!pxTK(b3V+&>)K;H9nQ9X$%Rh);_tkH;gfH zf05s+)|y&xer+UgRIU;RbDJw6>7dM`RW4-xvIbQ3sQf1n!)l~6Ikw4+q@+7_SXL`i zExK2;T8r)S%YZyQGbg1YpqlvdhSfK(;gxTrawarP{O6<=%`{81n<9ssO!6G2$w{{z z0k5HL^6pfXZ?y>ZMMuKd(Q*1oy6Y(9>%^0!VVEvH4o)maUL#i1cb)(rLl>XO{#oBe zKgB$K33Batv=8>e2ht(>9J>255G&>D%bw z6`1~(Cm;{e%jlpTx#>iG^G`uuz7$?U*U}HsS^5v!x(xM)x1)UteLr1Fr|EL~%u`W6M&C_W_o2U!=~{Z` z64W=*i|Kay91qbP|yWwBx=6m4EGthpJK7%gxBEN?&qocH+E;tkIL-gVF(7kAXEgiiN{(w&2 z5AWQG_TB+_30<@jzMmd?2u{!?55uKrp?!rPK7g*KZ>8h(pLF-5sNec*w2#mSi`DU? z;W6YXI=&k2Jcsp9!;jFV&%j@ZRr^l*;x5#Co<;7X`)Tc5>D<5?R6XH>o)OHr^@Z7@%c4f54Ujs52iyS7+*yQZ$UEo*Zh?F05xR8)`s=bFn!s5$RqTW`{5D#7CK7*Bv#|g^aG}U+5jB*5&nQ~8-p);0D1Ul_%%90Z}}kd z5&9z9`U~nmrOW8uSE9a>?xd@JMg5mzwSHPNaJ~=q_F~N>zc#hdRW|qrx?>A?fF3;n zevWo5f@5OUf3aK#ss2hIfv|3a6`aY^OpKZ5yf zljA_Hy;x1JWDtItdFPYxcg!Png&*zPgUB6pHVUs1tNxnag{vNg2k6#ur;slbtL9^@B`WsrF=E%+eUXP)D4eh4e=~c8mzmcndjqap>qIdj{dL7+Ce@FMwSz>6)!kUowpP0s2CEkbZ}b&^y0``Vo4Nj?!Dd zj66>J=>)y|E69`dtF(0+%#Zv97sp@vPr8)8<5lEk^meadJeAYuE1o9HP>Jzw>euNIu zb3R4hej@r?O?T6aKSSO_7bM|cdWasN?VlqL(lft+D^EiIuhO+A!++5|boH00PoIMP z5ZdT~&!&gzrF6~d$nU28^lP;54CFSs&Qj}dkZz`<^ci%FcGHb#qWyDp3vKxd{k78v z&<^?pdW621F6~5rFVhut!PnT{4Rkf#NS{cz(KpbYbcjx!gZ@9EOd^E*o4&%ED_{=cTZ^t|uUet>q;BlIV9jNVqR8z;&9S#Lpqr_(j`3v@d@O|C0d zeJ6b?Jw!iDk9pAFm{`q!YM2}eFL4MU!^m2-p^>?=0p3v=pOoRx|dGUKDzN2 zwD;44bby}yEAkk9zgVp=?;xfZqMO&ipR#^{UXVfiIQ<}9`6TKKenWo^Z^Qf3zIWjJ z>Darl@jL2E-h)r2+uny?7OU;;`vA850hfLVyXe-h;9u$FH}El8)Ehs*kJ7%M;Yok8 zzu(|1Xn9F!ZhTDm3wfoqN9FbGWxgEJ~~DF>4j$0N9lLz6kTuVx$zU4g8r7$ zWAv-EZ94KeJz4?hSBDFreZN@sS9S#2zso#AFD^j)1U;(|ZaxzAKhkwa!B=;P#ZrP@E@^gHw@-8>1o=~%RXjc%vUp3Lddg~f0u{UF^%A3O!Q zhyGKn)`y?oekyY7LQKz1dt2ZT9i_jajYY^SrlEZ`y@YO}H@6{gIv(|F=y)5v({$vC zli^$G+9hy}JdRfDtKl^GVLIIbA0&^fRUSP9{+dp9!p;)pb!Wq0iPiEqp9Al(IowNk zidBD}F60j|A3Yy_l@49R{%G5E@Wd_9U(*e672SOkd^nxD1HN!PzaPF`tfpW20Gwf7 z{2;uQJP)AezxDr&#qr_yhbL^OB$8pXvUoM(%h$bt|+_PJ@pYtM*y? z61vrf{9Zape?U8CAm6GC?aMcZ&!vZF!Xdh7YgpeJ^`qOsi^n}1zJpGc!+(j@^2g`E zv$sL}#CGrjbV~(%3Oz#i(d|1Re~fP35&m*Krzgnce6>D2^uBbu67?S1v=jUTT}B^Q zj`o#wn69F~qa(CUo+nV#>)09n?Jri#YpsHvbYLF*6+KKJI0yA9`U$#x7u0XN9r6nL zXxdbbd<9*#EBmJtyTer#Xy3ONd_Fx^3*SQ<`@pZ$wte9V+oQdM-j}Z55BUtSJ?Ys6z{bORazO41|*K{*IXGi2-`ZT(70qUQoee`eiFnwSprdM(x>faEn=~W#B zYda&aJp`_xcj{RJJOt7=dmqkVLo z-fj=%3Hm`gNmuTPyr>=1f04GH4Nut%>!+WdPnUKfKS`|i|C-C;o0wPM0{fUdqOe|z z`b-?2L)Ux`??>0t?X-{X%h_bg&^zs|`umqH`>=oJ579MWp#R1DBJcegKA;XB_y&HP z_Km`o`yp@n7Ct#=lc_NcU%Nl@aj!lA&Ws^{m#*H)k~^Pe=^A?Ldeqm_2hnx(S#&*p zBi%r+rd#Rv=s2w{K>w{fWBRk{mbvg4-CYG=aUkmBb@0=4TRj}7YYv7pwEu9p{2;Us z(2aDEK93%z@1P^}5M6u(`um7(rl&NZ{{ebmIz*pL8%=0`E8RnfXgmFfSndBM$D>|9 z7`C4c7trZT;AwQ(GHebgU0Ju!H3fgPrxm7R}em# zF8K$1I-PtT?xJHa!k5sdS78V3TMu7Hm%IgkPDj3l=N^Lf-TO1VP^{LMw=gfaywA{$ zW$>?b|7>{Pp=e(*A3ja2+K=oHKgPWDK)9e0^&|AjVpZRB5b{gu+6H(9ojwW<($=Hl zb+o-1HXnxm<8%X^rq_to^uouY{%hvl3*ko&NBdL@e1$yEu8x1hi{OXp3>~AZjzexV zp}vEjOZR#6bL+EPtfn8i51x7?9DfaNru)~!*NfHkjS+a^QOfeV1A2MR_J8^dvFhLT zChGO0(Y`baAJ&Za#T($xV~~&gzGKmT=pE#1#A^CYAHlycZ~Fv3Wg&9Um+&gO@hiBr z1$o0aaNZ*HU-K7yuvqn9YqjRq&y94l06z6Nkm*}A)`0C@4J0`)0w6cCO9HAY> zux~N)=oGl^1h{`1y!DB2gAKlm?w<}H+=hI32E0nF)_1KFo^}$PxE0=Ctd_6mPWV~o zelMJVGIHB$*h#ydfG3@Tyln_xPWL_sAKQ+6=pXR=bYlo!aVn?(0=#$$99#>3L^lk> z?$eNWzXb2xf#qp<1>WIwxRIW72HZq%eT_H7&BH|VlByv4c52Y-VPqpjwG-1xhVZk`NB>7gy*vh&d1 zm<{hwJImqx^O48tdU|LM@-Q8&fN$(ZeRzAg-~zaQNBAYWxe~6u5V?D2*mM!xF&B=} z-g&UM2YK;)xcOqZsTy7@R^zw30p9KsxS|a{L9E8_(8aKm4$;rh!}JeygkF3p+KC5OMI!Kq%-_sRz>E-BefIgTm zyA;#wrOWA6bPfF}UDu2H<}0v%W0%1f(ODlH7pwJM^Dw-(9k#83FQh$B!eM%p{*~^0 z3i;+&qJ5WMm>b{U(nV(2e-(53FR>aAMN^PJd^OxY4L;TZ2W_x^4d-_TybJB5Po@*} zi?po-^)r{E{lMn%0=i>Y_)M{ypOO}M?lL%Z1-w|S)|br=`U6|h%6l~`uO^r=`aU&#je4L6*oSKWg8?7OVL z6|RvFP*v^!xDD?A9@cJ$T|d!xz}@oMxT?PJPV`^;JG@@3)c=J zQ@|f(sr(9-$ARGxXKtZb!ZKVbu36f#=hy z4)}Pw_e|JL57V|s(7x&{pk3#|ztY9saMz=#AG!b@rA-&X4?TuF zMcY=v=}VAbMhAQ0U+A*S;nM@C@4Ev2nC`g}K6*9s@~hz&Y1=h$!{f+%moleQ%isl1 zAa7m{&lrSV*TS#SWlngfHONgjz$@q~H$3Y}fUUx6NlE_@cb>wfrMIynH>46*+QVLx5s zgXcVl-0=|Xp~DZulm3Cc$Pf3>O^?C{Jdb?vG5A3`8i3{AM!DmG?FraU*ABve)9q{E z?iWz+dlLSJE)K%Su0>w)Ec^l8Gz1^@BJ$vK@YD3*Kj7Vlk;k8hAEsk#;muz{9v+74 z=*Cyz6?Da`@QyE|zLtKGZllk91$h@;5{A3!4PteC9in%C6;8c|{&#;39v%1VaGKsH z0@uBc`eWC@U3BYuxSKxv4Y-Hy8i9u+Xy5fF93J;uaD=|-ZP>I9^*2Ug8-2?LxP)H$ z4qQq<^e!BwKcp+xqrWLJC-=k9rT$Axb|&K?{3;fzey+Q(>_6cD2n5zBoMwJIDPq?4mdP0uR&Q{tCzFpE7Wqw)_q!==?um&xe@aj#)VQ z5&b8e8uwrDD80+yaM{PGKSW+9qxRQox=CIqle<2nPuAgDx?Ns3qw+M}PuC~VpL$)5 z%02Ws2HgD#@_Y*%NWvTF^yl#JdC2R(g#XBg8|bVRZlsMu*gEi(oIk`$Twv z-g6S%mcsN-oDBQFg4>GW0DaOFc#uAQDx9LPng(a-6$Ej;O591{)D^eG5QYL`xo+0=qx>L3$!o& z8+kW9WEADj{|ja!AJ_@LnT}S$V{~&3JgpS=l#W2eD&vyewm zhdnvV^Tp@E?=c^D^;W80UO#X?@{4k||I<<0eF5^D%Fuu6LfE=B9Jm;MJZI_e68Jmj z<34a3Y_B>y@-yj{+u)zX>hXm0cDQ^toS=^ptMSpZ8u|0|z!UJx<*2V-1Mjdc+ta;t z<&((YqzC9N=Ai%jr;xu!w*}$3+o8Xf=iuwbYJU9mKJq#+HUC3vkv~I+UV?w5n_q@^ z*dFyMdJ!Fb1$m!X_1_nU=j{NyUxT;W5l+(+E8)i1k?T9bEfLtdGu*xo&eFbd&qbc0 zN2}o28>oMO9$Y;Fhj)Qp^mFs!{x^}oSPlE$f}h$I9v$~?aP8a3SMHAWQMLizx(4o# z!TZt`AHZkPmGqr-9bLHx+Sh!B`lspSm+;&@k;lG)U!sHG!F%q7yy*w{7TW$JoL7t7 z{S$mK?a#ok)2ZL!;=NH{^#{C=PW}xC=t0xO-0@@1KB)KV@D+5R0N!O^hv~jz z_@p}I)zjc_Xul0!xgYX|68O;lVdoa`_jGC&eAfZU%eID3s)s%0@JiZN0cYsi9pKag z)R*iCZ*~yu-w8gX0j{fpuR0hmpAWA+1dde0yB`Y2cY{x*t$V_5I=eso4c)&0E^b77 zFTE>0K)2ID`c2w>Algrm*HNndy}SXgqLcLTIm_Qc4?(V8cd6>f{T@BaeD2|#pF>f< zkgjipucYJjr*y;NtUm(n?MJ|O(5WNg6rDZ_zP}0e#m(?#N5bXD!0YJ9LU_Ks&Qs05 zu?W6EoIC!V2tPqLcEI1VzTq->ro0YRwI6&MK7=lR20oXL()ZB=>yUpTR@3WT5APe^m3z3h~XVP6!ZIK9XXjS5_f3N@qSJJc_D)Cs8*Q2m@3R>Fh3FvNye;zCCmN|IU*VA3}d2PrY^qiB>Uq9^>tK}=+5$&I6-av1CGVF5dY zUvyhLyySGWPoD~h=*A_mNnV$%)^8KtOglS}-$1+Q*XaSe@=Uakb)o(py6HUl&Q7e) zwr+T>SWVA%A$;apaCSL-Kizf<{4pJW8lHYO>O+$!=k|}&=(-v317fxPMz@0}orC(S z?cpA=%I!PAYneCh4F66y(fi2jtkwLw>811_{W=|{i_b-UoNlBu^o_J>E~fXEIJZC0 zKQeEwLSAtm`s<+2prdqvPSQWo_4CkvxAW27Pj}Hn^lCan|45s5LHqh{%%62Wd^H`W zAEs+}LvFqR^(8fMGaaLErR(-Y{v7Su3!Zl&+Sl&QoQ~}S?|BjO`Z{qO*t)6FNr zC-)*BT>|fT8TvOp313Us48g1DuIFIu<*4`l1AdeqdlBCD3gn)b;0Ni(Fg(kSyz5oi zMc1r@CtQiVcs;zDu6Yw)d=>I&6t-Rsm%j_2EmrHJCIDvYoI4)9LN^z{=iGoiFa=&i58B|*=#uI1HaDW)T>>9Xmv0GQHJ;CcU#8<* z!N1T$v*A5moWBbAX0bZnl2tox6niMm2}(r z$X}pi^q+K`KHiP~QuHdZnx3^A?a#giPFx00yA`gz0=|cKTm@I(hW=_D@SU`6DZJRj zd>M1vbR9hJcI37d@OyN}J#fJtn10}O_;R}TP547`?)>r=Y`PO3NWhE4YWW;Vc!)0j z4$kXGeIyM(MJIlMx4jE_{3kd|r^n#6cOx(T74EqQPGsPEFP#1jzJad&13u_pN6Q<4g3b(SkCqX=&!p1?xL;R z!!OcJRq%`lQ19OjK1{5p-?10mO?wZ5U!WTghrg#gTj8xAMEkK7a2Fkqz}L`a>)=&% z7rlY*r)RB1f1~SB{{@|S1D@kU-ts2Al#Zw2L9rSyP3Eb&^*ie!c(?>!CRWQ^u@Ah4 zdDFh|goly0?FY}Mqx-|nbo>bT0kN81>LhrlM_|(_@Jg|oUg8`$OqX`SpVO^N;n{w) zFLl9PV%0wSKJ!Q6u211_=*G|By&pq9OrK8ICXtVc)%+HJ4lh`R`s6q8<@DGn{DxT7 zyCzM`tPL2k@1P^Q!6S5WCtSH2?T5~SPZq1?sXZHhnGT-|pY=HM_<8V~tEe8aQI>#u_k9D*yahtu@%4Y2PyNSPP7J|S^jH)=pKjd%d+FgZ_*1bOzlp7Fx$!&m6}V$N z_*}6Xk8L}`Pt&8cC5*g!F7iFas=r7p+(X+=fS;i|PlUf_`-BrNc@^!gH^3K))%0yQ z!dtxt*Sg^2=n;C1PTqw4(AQC)Tmjo6@TePZrbljppQqEe!h`EjUwk_}aXnml2mB%3 zcPG5$4dg9exO{}m^8mb5td?hV5bkH*um=8vj%|RqeG~1w-+`BlRr}@-;jfry=(TSl z@B4`T(d{3@m%fd><`dXO57H0Om7gMCPy6U!#kuX9M1FJ>{f&JApH5f(35&hz2p-$T8-2fj|Ma_hzLo6PGjg(tj^JVOtN)$$BpgZv9Rz7*c}1LQ^@ zd@$Weub>0Vktb-slkMYZpS}TZrYmlQucX`QU+EtDoDVsF^cVCPz4jyI(VNiU4Ijg; zE8x8naF-i?K&;kJ*RAl(Pv8LEM*D6*!&}$pqJ94*CWsW3AytIcr{&jBRqEudHa3v5MA~Vy#LRbe&i9@C05gS z`r&0^IK-oF~T=?}Q!arhd#We}d5 zMQ*HtKc;>3@;{L`KZ$()U$E&Z_(R%HFZ&yLOAz_iCL?#es(J=Ki>{#`r|an7=?1z^ zL;FU0IbHEA`jZd6$n{@I|4G--JLt%3>4kJ1{V*LELVusogLH}6!2U$bhlb=_{}0qJ zGSGj5zMJlP9{JmJFP)|B^tKkXchG0iz7X2qPW$PX#A-YR=*4+((OT3e>Cso<75T_r zVR)t$E{?!=({=0MZ3~chj=&#?)%hl{0j@4Y-t`W=L9FtUcj4V8z%JTFTVu$-rR{V> z5$Z?iyXn&RQ2z_vL03&geJ6c}SgnsPIz>B1(SGG5@8t`Mew0=-wxHEbt3#L9i&%ofxKxF^5f*b9;!Y= z|3P~vBfqm0d2KOVz9n2e1O7s+#zThQc@`Ymoc+@y^ir{!UX)(273!T^p#FNgdM5ld zT|<9P*U~$ep?#7*o=(wy^e7#pjV;l?x{ryPe=A)_7twumm>#4fbb=nC%eO&)rrGGf zkv8aV+DZ@6iE`AZ=p=2Mjs8;fB6@5N>aU_R^doeZ{*@ly9`)OmqrWh{kdDv;bZQ6G zZ=gr%ylv4wO&>}Z?}Ykp+D6|+m(VG?YA)(Gn}hzU=>x@TeALh%(?eCLzf|rMqw>~y zaGBf}CO3ZR7ijM;$dB6|xn~bJOIPm+uigREkJEqBzP*v}v?Fpqy_gQr_tEY9p#F8b zgC3(h=>?VOuX8`tUqW}$tLSc8-Iqp<&#nVezboBMpF#J~&(gIGs87>%^tN(e8r5Gt z-90$)xVdOQdKl_g($z=6F}j8>s6zW%`dP7>zP$a>DSA0QO20^_=`E^JKXNSE zpHEjVgdd};=&$H%x^`EzAEs}jqjZ>#(SOr%x^XwOuU>@dT}Ic?kI}WXF8BRW>!-gJ z^@q^z#qfo+hrVyTp8kgRpNRUB8uS;S7l_sP7^E}Io7zx+z#gctISD?GuBGp#>*z1& zwo_3*c~7)&r)%gA`dT`)1odm_VS0w#=SVGoR|oRL>2CT~diV^sr^EDjbcEhZ?vteY zt2ztyopd#Q7hOYtIi8<``ssUfeb5Kcwe;O|)48bskZz_+_rdyZp~GS|KV$T?ed%tr z-=EIV=h9hvE$z4v^}o`6bY&g-bJ7>lu8UCrAnm3%&>ni`erVs`gZc$@2YrrMjh{|B zLyugH`b+mmK6D9eJpldHT?QXL?#tmD=qCC(x|tpotLe4SSJk7x?3HN$3hlTGPSbt# z<_ow!=(Fh7t5N>|-9~>%x6?Zui1sxO)E`gR(oVXL{*cbnv*f;2YI_%5gZ4+$#dM!o zEuW3HHK0Da6!oXlt$px=bcX(b&eFvPqrGW4>QATZuY(__1N8UwAYFL~+6U=w+IBtK zd+8E7LYLB89g6np8&H2NJw{(IR?C~As~b^2cq8gBAGZtk(?R-udWbGRjPrXF>QA7< zw3Cj|&(I^ZemL5X+>G{n(g}JAouqFTtNBmS)knat6=?siSZ(iWH@syNeG7aLT|=Ks z*V3=h1N4j|(Z1+bwC|w1=^#Bsn~y@haU1HJ=tlYqx`}>_Zl*J|^>(zMb2O(`#7!cgQnK6ryup#(*1Oh_R>?1MZJ$+ zLI>zUI(QfQGZv!0;y!pcx{~gotLVq*f%{P(qkXjAg7M*}m($^gP(Ni6^07zZC+Pmi z;O66y*RO(qp-1UEjz``XK)z2aT)i58k#-Eihb~5*UITwed!B?>o`AgZDR|L|aE2~w z!}=KrB0rWko`D^-m42KqqK%W#zHSKhd(!oE7u`U=L^shhPDc9{x{Z$0e!A}k^#4BX zq^+l*y^B7YHeN*iRkW2}O&8IocC_yqM*V!cmtI2K=_hGF{Wl$>4?PwA884wf7hOxg zOE=P{C8%$rx2K!wE_(Q7^!E(ydIe6=ZhFRP=+8r+K-*tM{k61%ep0NCe|_}g4!Ha^ z)a$3Czc76SoqQd+oleoK=~4POx_%w%w><;>HPAZz136Y}A*%4L8%}^m4j_{*Z2o zqJH{0Xx~a7Mz_)T(oXtodhlJe-?aY}ie`Um`com-R27etgo`@K$sOz2CUML4E?A z{sF#~9?37sJ)idi-D8DK-RQqzGQ1!C$yE3hv6`Q%Y48=y_oJVs?bDG*=NEM%6KV|=i{xjbJIVFE-8Qmw4I)InY1sE`w!B`(AGlKFB7Z&TIeC#NB=;F>HRK8 zeaQs0zkv48_tRCrdRUb@1LJVn1qS4~9w&*&!lkSno#9{P1ULFZqE z{!Ek5evw$Mj|TdD+O;L}Tj?M@Oeg5VtJ!`Q>JOwl=vV0gJ;Qx`zIYw$qi@ zpgu*PNn6X%{s+2=-h3&RFG#PT%eF@SKFiQw%QkQa?WG^2WArFpJR9|M`_R9KUPPP9 zkzY)g(FwYPE?kcGBlNj+$+l=8r|tAb*P=d5-%h9KXT)mzS?8erXSA;Zwq1wZy92x@ zZQT)GL^si^=|1`$ac=*jcX7fgdNEy6iT|CBZ{+vjss^D|!#B#XPh4vNK!k5sl`{6I>G=0oXs80_dzlyGS0Dg{cqc6J|^IP>W za@z{D@1Uo-;Q&4L7C1vMpsOB1`}$jv_s|#80s4a5*q-k8z;%AKKmT^Pk3R1Xc!Yk4 zwmypbhwen)H12-bLw`nx9z*@eUC6Dg;OFkf@;A|6(;>R#9^}OV)ITfEjaT}2x{IFg z<@D)u=n(xNU9uYeeL(x@Q|?9kjwd)hx@HYrcpv7+L(ipy^igz-zLTza67651?ew4H z_4Ky)b38nS`X;)MUM^P4pP)?xs1H4h`W@+#A^2Fji@u5u(`)Fm=TM)Zd+7WJ(EkX% z3tjOK)E`IN>E(2cewwa&9`&Elee~o9(SL&8hpq{sehKZO@1Rrkdb;if)axtJpNFoZ z({vl%uom@q(q8&aIzt!w&|lMwsIRAe^d+=u82Q6=3%!93(Aq=j&-xPT=hAKTLOMv_ zKo`G^`lsm*`YSp_&v+R9m%M`dX1a^MiVo9H(q&=PC+Qw~;v?v9gx-s;cop@h(02L` zI!3=nSG|V%zvw=C7eD$>&}Y*%ucQ8U+C{%Xr|2wQ7eW2@kD@;h-At$H%jkx6sDFg^ z(lI(iPkaphHLXYe{-|+)KC6uhIef7uq_4`W;uH|2DdX4$@2M;x|zrq&w&Y z9ik@&&|k@0sIQ^B=ngteub|7`M*R@oL#OExdb`!=zaonIqiH*RB^{%m6i+IY8^xKn z&7E&QqOJ6IbRVrfj(Qh8M?9-gp3kIPX)o<&eR^fZM*Ua$;OCi-(Bsoj(_g84g1leh zA=DdBz&?6sKbkMi^I?aV#&c>4f-?|2?U{p+-s z{*VsQ}|^iG4AzXtj+y6x4C=eLJm$h_|Ljq?`z66V$j^4sV#dVKyW z=r5SpY?3!^;vRa8^{#d3|99F;kN5ARw_Ahd3(^PDp-t*zn>Z8Oc=;;cgO{*<13f;y zrcK;I-z@dBCKa2~v@Rom@+A3t4qdD)&zBYNf&0VP&{cbybL00kdgK83e!8v^9-)W! zg@323_lIrLx9Wd*CwLFqIt{)|S?1?6&X0$A`c!;=-dl9XOw`NA>*f0ET!7C5*hUse z^&guDH_!w8yogKbG1klNPjmHcyP{rhf0uJHKX1Y?!;O0*m)XwcfnDG;={Dwf)7|?Z z578m!AJe@DBLADN*dN|Xo-0%HADj>GPe-^si|H^ePsily9rOx%ko^tQDfXA416)4& zSdLu#YUT~Jl|F@b(wEb1^zC%RLD)Wj(d`GqTjZgCpA&AP+n2%D(}8Q?m+3I`Kk25M zkfI-!erLLM3)CM=cTR_Iq=!r37w7=} z7u{h)UMknRYI|j_MF0EJb@aJ(8<)>XCvHQ1gdV&FE|PPR>TlpYcp>fSg6|y9&xik{ zll=UX3OQG(_VGh7evYC0=!@t`Bl0_F2gl?0biE7&wLayxX&4^|!DrGVoZfOeLa(G7 z*j{eCm22P4<@+^fyQ$`J%>VXs?wPE~=T*>h+nbz2a$i|B{VQ`enS5M-_s~5TL*M`-cUxxZ)=(=;@Ub>U})4g;r=WnwT^w(8~`s?X7)|)p+?&{ET>!+4>)AuUNJ{IQj z`+4Sxb5WnB1FYY13-mX(0r_F{fP8MX8eg~4PS%H%rT+l?|CD(f`?t(Qe`!0~d+6cs z;3S>B5I(RJ^{qFMhtFraW`sK!x}Ye(5S%;8YO5}vl{SwPrc`M z-+TMso*sU7{hsgR$qjkCs!mm%sycP*)Tt^h|92FxUB~bfinn})xc7Y0+oti~qWBKQ zKU2I%>kqTN$^TXjzg+RY*O1;D72l@eA6C3*7sG$7_;wBdv*H8Vzb~Fg`iq{=_^TAR z%p`t`;`DQgw=3QarkvE$((+TqvlO5964Ki`gW-!6AALIUh~gvP=lr@^@l2K1_b9%7 z1?Bf|6yMZCeCCCuKTqRdsrat_q?c2CK*Rq|abzLW-*3m)_pJV?c+34vf9^|3zj8I> zzeaIR^S@c~g1HRep}1=n@gs`2zJU1L`Jgug{YUp(FBc5P-TEAc->7)T%ZafJHsO0; zNIdHz;!S4|XBFS1?|7Yoe%i%K|M|pMDxUWu;@2zQZV}(6`0z`Jf2erIg~X@4 zOv|I;mnojBc(dYDwfx^uyk|DkKYIbwA3lq?Q}O0YiQlLA_&LOT6sIpHe&Hodf0vf0 zsCfT;hJRV{VXfcK6d!#t!=H62(=U7m@e;*{=MfhapP})$DxP%-!%x`pwSH&6ob*m8 zUZZ%U-9Hr1{0sZj*X{5Y(tA|#7Jc8Xqm}fwX!siy-}eC1e^&7wzb1ZA@u5S+=UGgD z-;arxD_;8x;u{t3eU$jCif8|X_*aUzA0=KKBE8#B5N}dE@q6N**mx%Eb3vH#w`>00 zigQ0>{G#GDetpOWGQe`NT*iq|MUp}0ls)7r-L^EJOA z#WS=%A69%!%ePPQsVX0iE1su#PCMxxx{UQ*s(4!m@rdHt8vpMUAJF(a6t`;l&lDeg z1=ByTgY=I@iDQb>iYFA$X=nItb~w)Ui~RkD;=PI=R=nUl41Wm>8fN~&Iedwq6nqNS z7q-1l@j9!ync3`+vmri#kc~)&sV;>T74dg6VfF z4k=FK+^m!@uXr`i!3uu2;uDV%zw|Pu-~3GKo5PBiKb!dTif=fX_^{%wn*I|Dnf?r% za~1kudL{AE=P>^76z`r%yy#U7UygIL5`ReXR-CgHe3Rns8vc33J&N}$j?W^!d6$!3 zkK(r~K6ILvS8*EWe1-mLix_{c;)vo+D>xoviudaJ_8P_W)1>!$#Sz6HReW$i!~b1z ztA_tZ@x*^J{P~NS|KWYamnok0GUh+2_^ykIcPsAF@TE(bey-yC6tC6zk10NU5!1hF zDdTTnKs=%NHpM#>&sF?8#b+oEbuoQZ>$6(%@&{P{>lM%3!uy&%ZOyd2DD;nO?P5PS@zfAFMN^d~%UggIo#XGe;pH)0Z z(|=F#Lgm+Ut|a}r8oxvF85)03@jk8p&5C=T&GLRp@qCT{km3OiKl3Wm&uREF#q&>) z--C*`EB!YszE9idvo_Z7|5Ut9%RjS+^iS3N!isw|e2d~OzhizsQ@l<4=cOx{{#K=b zh2pJ>*V1PQ`oAA%0Ntrt^qT ziyQt{RoG5on#lYZBQ#8)cbr}Y_EJVWDu zOz|1`By4lsJN(j|8!JZnZ&yk?^C=_ zar%V}Kdku3?X+K>{aTi1E#487`dErLDUK<==^pB*B|H9g#2;0B-=~Pbq4?;%#J^X( zOT(Z4I_7u0!tk)*)9@~)+AF&?e5>wfr8WGphX1{W&)dZE-l=%On~A@pc)o`JQq%uT z(?6!+xgCsuL4xJktT?84`|H@Bt`~d?&eQ37=`$K$d=0}NRJ>_5@p)@W@6a0J*C{^! zH^jFpUaj=+vcq-1>@SMfUdH!guS}9&Jj?x?cPrkXAwF}E;a%4dcPrj;9q~sM52T6D zNilv!_k%Ku@2W8TCdIiF@s|{D93Xyvn(2?MB917Y_%84B+@N^NANBol#jSrJKCF24 zG2*cd=}jCZ{zT@dL**}LE|J0L-ONzJ6Aii7iLCxSgUgCSQjKAoc#E&T6_A}ze zqYOX(0peQ}FZvMi&lDf}0`Yn47{BY|#7V^m^#0y|P<+d;82%f@tq%~d%rSlZLE^g< z@7D0M#>y+z=kG2u_D!$`5?=$;b#S`-xzC-cmdr9xliql^pzPvMFrhgja*ZG43Eexi8KV+?;mh4D9yF@3M%mfth{or<@=h~@p7 z;+dkh zXRv(x6z@Kbc*X{%-+eOay-M-&PUe?aeB{4L@6(DG&Sd&SiYFdr_=OXs*Y#|MU#mDh zi}c^4_|Qe9e?;+ZdS7VkB-6()Vf?o$KJgRM|5wGw&Sd((DBgS~^Xq**)6dfW^FGBR zdf(%2#g$V?|4(-O5b3Rc1Jf^>ApVTvqpgg;U?ao#Kg9CfZQ~O2Kk1DcuJ@B}R6O$} zroTt=U7CN(n;8G-!%RP{c=K7rpHiGVi|HR!Tr9GD%ihfNd$3U~^ZUmY&+KRT$(tD7 zqxmgVy!SNHD=EJ3Zf#%1H!L8%XI#(pTV@l#TJfA06MsnY0j2+4#kYQp;d9@@^j#gq zcPZXPhL-lF(~;&~6SJgpOch;~!P~=`A+?A?L4uQhY-39>vA)bG|zN?M%OOKXF2F&pzT$C|>k3%Ilxl zxS!=Y?Hx>i913~^GC#c70>%M@uGJ!{hZ$t|3dM`UlCt@6T=rgK)g}$ zZcYF6n;Cvk)2~&0^yiHKk2ZdU_%DiQXnB^ti|KDW#PD}0K5>-zhl(rzMSRWQG5)rn z5PwAR5zTL>9sW~>|48u}st=re3)2^0M1EeRxaVBr6^hr+A--Pm8TuZ=ZHl*?$?$(w zyjUeXGrv9WB+e<`@;}s1wkp2q3iiMI6rcJE;zty(&Jv&V9@5+UbmFfo zo_9I%F~yM;#65q{_}ga@zd`XW9mIDiKD31R0Xu#-@o8I_e(P(AS1G>z9O8E&r>F;VKPAER~G~!zor@u`8gx<&aT^Er*?@@fme^Y<`o#N=HIlj(&KjR<# z6meeh)_)@Yq~ho&h`*)y_z#Fr-pcfwKTh1Oxb-&T|5SWH=^sQrx5bdcWfN z8vc+S|F`7dRkt$zjNcL8r1;Ro#J^Cy=ONCS& zT$b-{#cLK5|3dMMS;SXnQke3#<++lUV-KBGuH`!>>B^A6%(#l>e3e?swA zeV^h7ig%yR@Xk*%{i4~#1;uAvP5cELzkvAcPceQM9QRIYX=z!ac<%Fw|3UGl&BPBV zo)aZL;~$uQ>xYR~C~o;1;u{qodm8a}#hY&+{)yrPTAuFPNiT8^!*5Vr=_UT6;;uQw ze^z|z+lXKGX{MhWAs$w|=o`czQ+)d+#NSgqQ`_T(pJDn%+CMrJZ~7MFze(|-lZoH2 zc+E+~cPc)i;XhS;$4rKw`B~DR|8(LtijOM(isHG7pM3}8uT}c16(8A7`SNzfi#|{M zImJ8gB7W9CGJWe8h%Z%q+cx5Y;sdu5Z&w`ML;Q&1MKd}6-|$bQx1)pa5B^Z`sjnb@ z-sc#;D0YhJPu+@dwTMR*AL}9Bqc}2$_&M8|zE~uFz2Zaq{^xGR(S^+K_lg&F5zoDo z>2KHao*e7EAQW5n~m%J7y?6R%QyOvm$w6rZ?(?eRy&k>B$@^)+8(`h!P_ zzos~+;j{l0;iurdg`QWtT<}RPEgM5@&kre{c{%Z^_c46$63U}16fgJy-*^1D9sV2Q zA1K~^Gx4&oGyTXO;*E-T-AlYn@y=PS?;jNJ*Y_b8{2SBHKf?HfijTA~{s$D_@N0(e zRlM)-iD&*h(;q&E@fR!JzMJ8biZ`9a@H-Xndzj%5DW3ad;`4WtUdua(3yRl%m3W8Z z8NVQ&@eRg5`V!KAmEyy1WBA(?@4~?aG(*eR6i3fu{6`fZzks;&o20k;e#ReHeEcuO zcPbuuG2{P6@je`U5c;qE7SkWRlkq>K`1b9@KUCcFE8;WvF#dfPGW{yWYqUI{RNV6@ zl}u+ew*<(?qzsI@vgZH-=TQp7;4M&m0!yn{IW zea7GaTjDK>XZ?)$=ZbGRM!ft7jGumt_zuMrw-7(7c){0*d-pT`*1L(fDW3Qb;$w;j zzDsJKU3->P_Z7vn$cCk$`7n*HHM z#dD&>zg67xR?-{zDdW$0CGmR{FaHGTzxE)*=X{d*VZ}2Qzxihjk9?5fr~jOIVmbTo z#}uzt{71zDCo{kI|AO&Hox^DCB3gGK71qb3m-%{_`6^E8xsuvPLx^y9g6o3 z62I)144-`i@pX!4eu?yk{)^$!JBZgDCcfiy#9vUn|0~2@4>7#utHeK5+_jnIU+^oX zS0cVw@vcq8OMcDpc|TzKisII7v|s!}@$6S{{XTMp@fURv|5))t#b^A6;RibzUQ)bv zA@PHXXS5Ta^IOJWgM(-?{#Oe|e{9wMc)f<_)-n8k#j8g({liRu{9VK)#rxh#{BMeH zNf9qS%J`MV#6`tNFDL%6;+dBcKd5-_0^&0sVfxuGCtfUg2JXwA&->WcYWO03kM(-R zJ5Oi)k0`!D@fQ^zQ@mI4?Y}1fA5wgu;-~$N`EQTWJ`_@Xe7V}w6!$D6{*L0AieK`3 zra!Ti;crnqPwD-$;$2rW{HKbyuOVJ>jOn|sCw`;iO?l#b6dxTTKK;KLf6X!CC5qFB ziEmYWd_D1Z6}OBNKl=|%zf;4PC{F*8;TsgU=7_(oc=k7l&-^3P$A3)xcEtnN67N#H z<00a+jx+uteP42w;+Fqn_(v75*-QMG;sM1g{)g#Llo=e~jXX2mmp zPkcb}p|=q)_%q{Iio_cg@4bxv8+R&B>v_?iD()Jhf5mAhn0~KC|FO#z=l-7I>lCl~ z7vhgAUVbL&f7g!R!}KQ-JhMsqN_Q{MNc^S_~bSOS{Ht`0<_su5$tm1=uzUBvtH|cqhXFQYidhkw{ z$p7VvcjDp#!EaT(`&9bx98f$<{r96dHz)O5sOK5hDjraLqv8?u=l_c0`QIYFv!2ED z$95C1R(#VV#FL6|{XFqD#V2+U|3L8`_3!<&;xkU=c$j+%=^vd-elAnIP;pN24Hq!{ zgNkR)BmRcsjqSt_DxT2rM-?Af$ndkC&HT1rMBJu$CoaMeeqOD3~x4F+D zz34aDA99NK?G>+-FIU|11>#Y~b3adfv*HdiCe>R8<{Ue(GBE`3=Kiz=hJzu6glyjXDf5Bej%V#P6wuUR7^(Erx z;9RK0FMdJeD?WBU^B-0`=f%YDR=n#x;(HX&Rs2iEcU-{m=RJ?~CloJKTv7U2#fubg zQGB1?m-02mx6dKHM-(qqd_K;-O8IX&m*I;QN431KQ`|a_;U82y>q6pviqq#PeZ@y~ zzwK4-O%`rkdKxJPjmex{N>s{U@96wf+|_U>;e-tj@=S@7$W z_^W?R{qAbTGoH=;!cQn3I81+oy^3$s_evjAe4mED9Dbuhuh_=;>lDwQO?v;J_{gh> zf2sJuSGnKX4nI{%e@i>VU$1!QD~SI^@pAR=IUVCp;&1;s_072ATYg0RM#YgI6aQH8 zA;k;NX8JP@GW;`&=l&P*?-Z|nANex}ez~T8ipz>eE@t>=6z@5m?R8l3#^00QbK%D; z>6haYK>V*Isd)P=;`b|FbByu-Rq@VK7=A+We)XTY_{F4m%fpPHQM^&pzen-flbQaz ziX$_KPlF$_lxO1~n0}$+*-vBmCdJ#HOZ;WU(Z?A7F~u`Z5MOaF=^Z|U@!zI+?Q@8~ zt$2pwGa&cwVS z`5irr;d#aJhZz1@#e4rkd=ln0iGRzxiC?LB(R+y3Dc*cD@h26}*ZrmY6)*fW`^Sx# z_k`ZseOb?03mN~!cNo4z@vawgzb&VD&6&iT6{lN> zzpnU*;$JJC`57($OG*Fu7m0tUc;RP>Pnys0eV-$~Q1L>=V~RIw{P!vzQGBoBfgMag z1M|G_WA}d$k1M`unXdO0AJP4^r(ewYx8WilBx~tZ+@-jr_^{%yDc-k~?fv|hG5vj? z<9=FE@xV6XZ!50+J@F+AH2xcjZ&AE@4e_58&)7+vyoB-RRfz9VeCoG}&%Bi3J+CGH z8^w2B!ScOJ@va`?V~Tfbc>Bwle$Ve%|DxhWXOmwaQQV{WJBoMQLwi-EmFXj^N&f?i zkE*?Fzv8*CX7~jbYl(L#j$A|hC&jZg{8b^Q-}qXFU$1zN(*K6y=xZ4M zf-vLH*7B@TJnwZ3|Cr*n%8zG882_NQ?-Ip_6~9IC5yf9uyj<;jr?fHsPQ`tS_o)5r zUd0;~KdqhdH&tOs{n!ZQz2~GbI#kXtvhZOJoD9876qfEa= z?N3)IKKe?^kFw%j!^EFa+|@<=OT}xhB5v;_y*sWTt|%T^LcCk?zN?AxyG8N5)r`ORm5d)(`_D~^Z<}EF9>pU!5MTT%#*hAvcvSHn?;`%J;uGH_ zepvC0ZxJuL9O<8h`?U1_^H(e0Z(U&IRYh?;MEq{Wdli3P@$xXk_u2RY;^T^oiqBcZ z{C1tq@T(OcRJ`3_+^;`};V)V&@i(>XQXEr!^hFFW7~Ik_|HZ^NDqgMhZdKf>@xP&X zm*Vr6uskh_7b;$>_!`BxC@w0F>V4_&RXj`aJ&KoK%JS@2e7oX5D&DO4%%#kKx8e@P zBZ^sp;<>HFVa4+`zm;3m{R=h*|&-)Y~y?dVNKVMY5=<~$iQoQ+d#1AXp{xRYgVjV2~aqmA8U#|G}+lWUL z$3ITIMe%G+|5ZEupBVnI;^LjeFJ4CaMWxrHc(%shsQ9ST|0l(pHT*$4T+aJ} znuVFY#nksagYmxV0PPVkS6ulB@v9Us{1|bM;yrtbU#)oHe&P|uoBK&W4-7p*R0;Fn zTReCxF)ZZ#?;a1HvD~DuhA;Bq>pggf2mjH7&*^s4i+OO$gWu%AyFK`fE8X;x9(;!f zANSzZSGm)F(u0qB@MS&j_&0m-k39H-74G+eR0XO{@d+?>A^`4&Ux?#4}O~mzsrN) z=fNNL;7@z-=RNpd55CWX_jvGkJ@`i+e8_`;L;Q4Ge*ei6{A|x+_)HHz z*Ml$g;7dF>?7^3L@DdNc(u4av_|+af=)naKp7h|q_274U@ZWpzM?CnG9(;!f-|N9& z_u&8V;O}|x10MWK5B{wOw_NLP&u4n@b3FJ39{eH?p69_~4}OIQU**Al9{e{Robccg z55CTWH+%3c9=w&f+TTCv!P`CYzwE)gJ@`L8c)tf9^x(rDeAI)Fd+RqzK7*^e2?J!9lqb=JBCm0 zQ~m?KKjJ%%?|<<93E!Xboxt}fzQ^$W1z!vNLQcZ>G<;9TcQU?b;yV@JbMT#pPkcva z;d?H==iz%kzBBN>0N-;40c-OVq?cP_rU_|C(3KE4a^&BJ#gzKif(jPGUm z7T~)C-=+9oj;|G82wxaq1YbM84t$;X%|wx2Un_=K79T72Jo%IcMZO4@yUJDe}nJU_+EqW zwfJ6#FM)3@z9haud?|csd>MR0_=fR~;LGA0#kUS$4&NBQJiY?HBEIYJmGG7ERq&1D zgKJ;Q27D9v;AYtJdVFud2N%GWH{yE}zBl9BgztKMZ^8Gs_-?>=BfhudgL`Sqf1!>4 zn)}ZSQAWEDorQmU`_Vc0EvPTO1Yy#j@ZYBY|NGzP!QCQ1!22)g|G%jJ|KDzl>By5d z^q0RznX(4u`hSyLk%ph!lISk?^jx!|tCU@zDYYaLuMUS(Bbn5?flO&En@{Fq`SsaS zAwQPMSC$n@OAEO|sk~m|t&HSy>&L9tf$UhObVaE!UbL)IW~{J2V}6PCt?W;<@t*ElN^f$r0rZ<$Royhs$x?1WXqLuETCLVLot-4 zv%iAuhWj!@v3@JzPqr)MrZJK%k0jP7bK@Bg1xIqLznIHbNZb=kSYWmFk0o=tYci=y zq0~KA%ysv-CSo!C<)UHA7AutsrBHX5v(lDQmY}6ptnVtr)(zRb6sZzUBrK~nBY}xj zA)WC_*`GwiM#}ixQ#xn<4vt%`rA#?fF)i)lhO=2Lt3Oj&GCnkvDP__PVx>ls`QZ#{ ztwf#M*@0q%gF(56N*R>5tZAGr>dHU7p_Hv;5}9%;Swsok?Hfouk}4D@6O}?@&=fru zzbZ8rRFKF}ZXB(jFXS_0#mZ!GCEKg4Se#F-ykHlKDijDy=Di`i7N znRKEulF27b#Tu2Yt=SkO0aa^H=2IhuQechRQCNWIGn z4ss7aXM^V;8>H)7ZID7^$x0=YUO7HCm?;G@wwfS1WD+C_gQFOOiNR#qJxUynQBBjy zH0zs8lndYj=mm8z1Wg2|aS2Cr}qV&nZWNMvZ2giR1b;_m^5QmU& z-KmujZphXYf!sWg^&tlNJB=e)#jusLfJWJr%3D;<-rLf$W_YOTk0R^>R#y* zo(&4yrpXqUA$w&Ga@Nw(XrcZUUD@Pt9#d5|g-OOl>+CO#mr|J(!zHO^TmOolLdqka zvRP@3sH8Qbp}#GGKZqS&Tq-3e!HjMd?U7-;u!YiH`mi%d!*Q6;A_R z1EHBMv(k!WQKmG@>dVMf zIglJgr*Mi5PrSCCLSfx_amjdU9a;t=2=#F^rn7vm3_AOaVsTYw@^PeyBNHoHr5DSC zJ5p(9|96)YZ8_?yMGn(#0`pBt22V#ay&e)Y;}Tv)fl*d# zcfOn{RaTn0O=h+T|G8dzJmxt^eCbinqBw#KsWxbe8`%Cvq{T+3Por}gCMYREZ;1tq z7wp-gy;gFG09C3l-b^WF5*SL`N~hJT3`F1d&cqQy+J|$Ze@y0sYRY)Ocd} zjWtgc^ey+OLHDTF0}&-$o-Buk%AH19=aR!^SG%~Y0ixN#LFrV^er8#{lz?6h##^M& z6G`pS6+E;(>Vh}atuc5fm4I>R7%EQfTsmm?Xv_+W!d^&0K1pYdjg9p=XK|kt(P6|( z6btECeEnF%xRbHqxH>y$Y~|EVDO*YAv*Tl3nK5Y8817JAli3+iH)t1$mi7Ce#Gl3SfcnVJ&m zY&liR${G_gRjSh;axRHFu)xDYpt5RUS+r8PHjA>2dis4l)fd0YspNJHl%N@j;Ho4m zSP^DZR!3iE82`$Ysz14gUq=cPd?mxlA(&+-or4`6>5R9nJni0*E%#)GlQ}88(vG93 z?saE}3_+D1Wus{Ljs9qj4_14nAQWSuRROJde55}tdV|%*NTL8N^Nw@}fpDlsVD#}2 zKue$iR=boyY7>uFrdI+I2#0C}Mw=l}ufj)03S(|b8im#djV(Kz&!lx?^a#q)KBvkp z@v($hGFi!#G1LuXot-Hv-CD>cb7jAd9hK%oyP>-^GjBvonPQ<-Nem?`$y~w=RO$3i z<<&2WQ}uMl?KD<}c%_6nT2wGIxkj>Po=#-PiiyGOFz7p{8s8-CAhY=rWF{}c0 zY`NuvGcO+kp;kknFH_7VQyH;qbyqTDo~|+#jrOt_eR_;an8UK)L9GEL^f)6CGS`h~ zpg{I#UoUdpBNjT1IGrq-V84#n=@c$X%7={Up6iX-shR%S=@-9|auaG@YGkKqe7)n9 z;<#CAIi+i;^;#HdWKG~)>shkATAA5UTMmvnJ9&-s{>ekK07ddudErwNL{gZq#>c=- zr(h73+OhC(Nrq4>a#&nml`p%JcvCp^yilu=751d#(`bxwZMHIE3}AjStavF?sZ3&? zg7FpeSjH?=FftrtIOHG6tw9Ava@k#iB6+gIs?G>}k&?Zobf&Z-Sz0GOytY(@DZ$ZZ zmT;mE(xfofpUI?sD{2-#*>YlRJXZln(2LS;BVs6|lb)y~@dDOC1DyCq`{Is?I24NI zrFYda&dITmw!gPI>w18*Jww?%l8O3}l@+bAy^40^A}lq%qLeI-NH5Qn{Hh9LrV;vs z$#lY~48DRt6c$oWGmjrF*e-2_$Z97)YU0n;9MuIuona{CXBY}KFbstpxrRf3ZMniO z@?_RE1w#foe~p;@VQj(LHau7;eFRAKl|KjStNYK7Goz^LgNrjuR(JOEZJjWQ9q-N`8 zc_vp}ktvsx!x@)FUrN&f#YAmVCNAt4x)_NLjptLcjObprUPhs7$qB2LQ=EWM^omSA zQ_7|sEfdfN(Sbs-2fGIj9(id&vKw;ATo6+_V-rO%w=YvJV&ce%`Ow~$;&3zjDY5vn z^=|%P^RS*kG(bniw=I>?kXe#70mU8$psNynd)`wMtg~lUS=bH@(2h zQ0sE6epcm6nH-w7DpE~bc^jQAXtmaQiJJo1)Wf|b=X}U!an`p{ z-CSKG6gHMe2W{539LvV90x#TLGqyRW$sqgvuCgsCBg_Y8fK{*ZI142wU#rG?vx+S86=f^Dsyee)>|63?^J5MR1$U5 zyMFFp>QKwU=%=zUofvqriApKS&ag5$W{;}6@#5|v9%*38IOyf`U=v*SPc$`?+I*Pg zPwPpo&dF4&M}siOs5x@hM}KhztKH5oKdR6+F$6o1dyFB zRK&uzB2yIPm`6}JO>5}+c9Jkk(_p-KC6k@3)8rJ(kB>obm`;XmGy^!7KknX7fhH@n zLfPvMQ7gCqbf~54gzy1}>v`a2no2BRm8Z=FHc}4y+Y@~*^PLo~5y^leSCc#?`jcIv z!B`VDYD@IF=b>8a#)ThklDfl!;^5Y3fbM7KKG`I7=W3?b7L$I|!&rWlVU)@x3geYT zVJI{Id1&^={`V#;g_x9i+MPw4;V-lN1&Q1Enmsq@BXu5bfw2&%3gBZ7MLz z5EMR887?JZruE@Im|R=MLMMb^inafgBO$b9wI$JV!!bw*qPWe}jy{g0F66vrHa)UV+w(1xj1SZK*<{|ngOOmhRm+@!>cG{mG3X{l4&-H|x~EmR zzeq@;0?R7e3NA7ogT`&f{0jM38FwzI?FzRQ)Yk&DF7; zJ5MlU!WeA0=`3$wHjrJOEFIH`U1k^QxakiLufagV-YyF+E2V%Ag8gDE(N=_`s2}Ir zp?DwtQhF;av=Aut7o#T0AJ&hb>TYV)IqYfk+69eMdB-r0*OV-Z4Q>>yZ zv5UFhbdLU977IAg633}=V^gpC3SkI2X8QmF*yHV827fI>!R#G5JW@fy;lidAjG^7# zAg1R2jS{kCpwtTNH(WeDxB0=+o=nM1;u?EG%xYb10?eFL*Q46gnMyL73w6{)h3&$F z&1mS;a2^&(38P%73vO1rN$exmY)VldN@89PCgoNG>PJ{3%CJGhpIR11%@vZ)*Dfac zWC&A$S2*@Z9BW){{dSdzE}>+2ZaD>}PX?M$x|;YQI%C&0Q52JpxFqIWrS_S4qP7@z zM>O)+>NFcskOEy<812egu`N=>8=D&Yi21?c3sWyQJB@b^MX^U&65R^}GN-evLyiRw zC=X}CmTDMvN`S3>d1Mw`|0*Mv1qS)fro!Aduj7}rT`irl7eaoxST08oL|vbrBA>9$oX1g9*%B#N zs_uZrqIbGs6GTkGsuIJaA5KxeHmRAjEu72$NmTB5>2LAl+jD2E7Fp!#wW(@vrYKzM%1O z@l0*i{KBNwM|tHHqt$3_CNoj)%gf|UPH@C4GU|JvXF~!q4uxfw@513B*e2k8xIBqX zB{|p@^UV8p5khV51l?s(HFC*G_$>q`=}!)2a+6EN<=34$C`mhtkK=X?MN7vx%u!fL zh-P5S5oV#}ErT5i(%U-F)N;iovArO#Vhkuf8995bKN)v66CJ~s(Q!gA@C}&nAE2$CT zVKdbWx8~tUhZTlNWxNZkT#_1lh&UXE%@ERN30nJigcxgrJdNgcgb zYM3qzW6A1KjjKU!S8VU5wIwz`9`He*lOFFf*!4H#4PNMd6-`hD}B}%kA zPuH<5y z65F)lvML6Zs-d08(NF>DLt-siYBp*s*A`0R!;B@Vk5O)FQhqD9sZ-%l3vGHXhE{DA z=#wy0C5n|&H@N9J*0K_6mla8KhZ{@lT&9xg#$1mbOX=!v-Y*63mo(tL35!jK_kXoT zH=MGaU0|ph>Rz@IJ7wS?Tm)#GkTX2&;pRk*>3aP=%DJl&Rwq0ioiWVu8nCsA28EYs zK+y5O|$NU!G(9oCBLF0Wi9W)>F1mc z&e@@^`{j(3wVbol&v`UBXNPL%morkj;VT?rF23@rJ17mA#Tc=;+JFCpt6rTdm6x|nMd<< z6;gHXYt9P54=por;-Cmmd#u#i)nUa=R9I2e(UP-LIr^XvXSji zBy2ORnRW;p#@H!>SZr*b=)XE50!kmo_DjW4*??#fwCOT*-6>i_4V_cw+6Hn+Npkmh=->zd&4WqTy?o`94DLKdSh?9NxP!o>Wg$QJH zNc?MJaAv{2JVf9W-6CYw>gOaMP{+tR5y<|mQom3Cz;?`G14*x?$-n8MTvAONw1xCq7;$df&2OVI_*6_k2I5Mz~Iy;#scSSYmRv0-ryz z(342ay>N1jP4_y~8Q?l&X9C)32**uc!85EKXR3L#8V|GryL2^DqnXo=)OhNw#zKDf zV6N}@)mZQ;YB<&L>JeWe7BkZSU+*EU>o$r*BW;G(};hJNDN;u`TS*2K=Ds&$9nK$jYQ%{`LnAN_}K%m=^ zo0&S@aU0H^wW!maJgW(-ED4-Ft5Itm#;$1H)KZIUPjq(2aT@JiA|+ZQTa6pg(#_AG zT8?HX&}#HrSsKou8sC2k)xhSaNsZhw1bsk;q1%GtnH@B8Q}dq2eF`T&&4DIGkG&_p z-fKe8*gmt79pX0KiP5bcdLXVhv!FI%|E{5eVEATVY*xFu%*#L@f@X5Yon<%^j#Hve zO=4O!&HAIR-!eZHc>$q8s)JaP9GA5XVzt-E_{%}X41K2dcHTtk>0n;1tpkEFkJGGH z7&8{!)ntc9TpzqecgJIy1MVCD++7(-Q972MQ~(5;p}6u6l>i zT%5}$%e|$=ya6+)eQ+x$jvhG7M6%5+j+mpPu)x+W#_YntyU1<^R&cpoZGa2hQhlS% zC(^t|r%GarbHv996}dzyusWu@Nvp%nAt!yV0Okr?gHnWB>CT5(*qqH`gCGO(xfBOQ zvS}Pb4(<&Rxyry_z4izTn`yEQEUg#VaHicvcn9?s6G7GA5ZQmOd&+K_7TbgL;S-pcV@u94NS(~Px-F~}(xcRg~m)>YaT$8mtivdSa5 ze(AqS+@I$#Vry^+mrs@&g-frjhI?)#vT3;DOgct}9U<2^lSr~$#(9D%vWZkuE9Bmj zVot8;gmHH=e%0jU!j^Qtv5*@wUS9LE(HX{?*U-gY1B@&COT{Cgd3xA$us z?t{xrP$zKg&~Z!Egn2JetEO;YPQ{t+JSc*=0>OW@l0Qb{^fEq`pI;EFBWO)n+P_Thpsrxmsqit+`Ax*Mp| z6j&~ccNe;*2X^+CQgmWMuDx)wua(Cyl81ctf+utTvg77+xBl*T3wmcQZFKQ?rO*W@ znpCBFLeNo+YQ#vsAm;+n6WqJO?yljGE44iAljn~oE&R*J(xG4-P179-vM*7q{+MG zLt$+E<;%G3z9jyZwWuAKoN5~}RZ^lc_hcrZ=v&1k9K2P9mQuD+ecE4e8B7s<|}g0g0C>tojI)K6yxv>Sx8yLB^+8pDZ;!P$L^H4R>16h*?NmliuUoM=!>8XpTHH#RH=YO$_>Aq)dMTg4$sw`c+tu^Uo+jC_B4etmVmlC1(QNC-2S_! z*WtEpRLsz7&;sK8i{&9NOJF&q%UY3Y#YiG3cXQimo^*kL^|$7w~u1FH6R7>?79%b0|;)&t<2U)|DC`dK@p%C;h!x?5}{*`X{v3gs5~ znqV=I?5xdramWM@%la~AM(@L6Pb;zlHwnnUUc$c(Yn$PU;EUE)yk&@ z{5yc37?j)2RTB;2H5(ybLx?rEOBBR#2FVM~WT z)J&&~2Q*EygFmo4ha&9Cxc6}gx8-Cu;0%d2 zW8Kj6*1dsPg2%1!UTY}-cJu(3SPCv#o3wrPW=?a?hx9zl26tPwODfbRZEJPao@bvf zuMQN}+@)T*XoVwU%*P;?i>S@iCg+npT=Fudo@-4q!aWL{>j|rMY0?}bYoPeMrdYUU zN>?@$v2&$>lGF(*wv9~Lp1HS9srO_-?wN7oN4Jg-%}<*i@gdVFN{QPxd}qKe)765y z7iPWXdM~IwDqXA%nJXY{U7@bfQ>=(=A7Nk8VP4Q6>rALO-gdCKbms1+YY@1~r$iF* zQgV1K=_Jx14f+Yku+gU-b!gDX!dMHyNf-UVn+UCobu$81!Gs=fKq6v%bDkg#({nL? z$IO5;x}Ysa{JL4A;$UsPK2syrC@K!I*>VCEb>%7pWZEjKq_Lb7VN7X|e;aFU_Q~xL zWlvln)s(h5P42X@cTyOdQ}iI=i7z&2bF3@x+%d9~i+~ zuEQe%N?>rdopkAwE>+_})n@N2T02dzsLhIKCwsfpKR%fp%at>=EF5ytu(?4AnOux^ zwdAg)ue(!pGCpPY@^qqA z#A%ABp;q2uV)}Efw1Hw>3<#J5YB3wI5Gp;(1I4gv1osof!89!Otcc6?aHvy19+)-v z)-++CZ$MT11@6Fc_E|^kHfS7+iH(qFKa?Gb=9sN%S(h3Q{dnRi%#;=3P;%45Qv#wB ziIoj~B83t*AVZr!RujEL=t<^PFSrpm=x}(FI#n3MIj0Ox*PA4cLqfh|(c+c`JA~;& zHA_mJ0)fiojfggH_Zx3ZV4}gDBf3U4jwuc<`gZ9C6qmtqp*(`R(Nd-_Iyl=I z!~;%%hN0ioy%Za+dl00d7!QTm(X=}5*7a^JIqE4BoLrgd%d+2v#d_*}^ppJc-Dw9qN`v4TML;qsq#rvl%|7 z4sHBrvn?20)rfW-u^jg=UD{vkJWVx{CdU&&WjxcJo~DY3$8E46r^-j23GdL;+0l%_ zR)^Fi@0=!JTg!Iauw5EN$kwxS*`a00v21SAw|-`t1uw&u!a6GqKZndD>^Es6sDfDs z2NYfDEycksEc4A?tm*F#-bqCqn|%^eXFF(+2lhSk!%zyP`Kx+Rzvw{?WNw=?m0O)` zkg6S5Dy5R8^eXHjaeGt zT7DBnTvNo0|E4+X=z52%1m!=TU_ntqIw-`a2^d&y~eQV%T=Ua(LM_ID+~WUoyV zSMs2P;jSKE1KgG7{h}ZPT!X11>YDxy_i;U%?b_~Fg{HcBv|qefC(KFuG`v;P=3dui zj$ZD5WMz##h`)l{DQeDlKMs-RP1v+l(>0{wiap2pPg7JZ+DyzYQTTa}ZE$6jGz>AN znx;`>yo1TXjThFHXSMHpHGT(`z>ou1IW2mUvMwQ^|=&UXM57&%WIeP5~=9g7EAv)!PUtbKS z-XZfgx89RE$||R_ihZReg>>~MelwyQv>y~PXHf#PjfCK}2PaE0wq$4^w!0%$9xSLJ z@2JwZJD&HbBqwOrt6L_)U4q@);bo_4vrCh9 z7C&6;)ZJZWuakm1)a&{X78}oi=<8B8%feF^^2+~--)e!D@!ZY-|=})LbW5&rFCwW(OTtzZ(PE>!f6&E~LU#FwHXsCk( z{bVewk2mEG$mxmE(Z)&pQ}Am@jcC%Sv3I(DQ@46u6XWU=M|`>W$aE8N!x*hzU{sCE zlVP?KSGFwI#?!9cVC@#>1ZMkwc^j!ta~tIg^-Z^yo4j7_A*B*FqZ@}z38m4D&y|m^ zI7Q#cy7rAtgc>fz41_gtQNW^=WUc|mjuTkYqU#{bttN%1&iC0?? z7I|0iPlO?1^SpF-ml>9xy2y`UC!q4mK)jNIRR87}+Mn&n_Un zcO@@l(1Pp>F5csmPpa+egIZE}qS# zGD`+ZnG7U6COG%1(i;U1b8;xl#dA-bc0NoW8#P)$XEhI(sn?UnxMsq->f~m9ltzX3 za;dGRcpD*P^a3@uz^g>YircW~%-WmS6t6CYto1^zwR~ix`S_<)4aP+nDxtqy_AAw# zWL=f2k6V;9Q2VhD}^_aUAHv-O7y#GcvT2bxl)N`>5P3&)Lj!zAfYO zlU#`vY5mKp_^lZOv@qH$8Dp`Y(Gc7!U9DtW;%&z2OMerxC(}f&7@cbe6CD|MtXN^1 zrPKUvv_ik)G*Ju|u8Cssw5K#%cG$>0&wN?e&P)K&h-xk_H=RQtuI0l@y@>?48^ZLu z+G@&XPhGn-VgxQx5;8G7I8l=)fEuE(HvHjR!2~yf^^CuUD`VR`obrG z*p}w9P`J(3k7aKEfT5+5O+i4F zCq4A}TAF4H;?)T1YwOjW@B&2bM3}LTSW*1Xv*QqCf~imKJ{GIfc6~IQ7%>L#KV=F3DrHWy`p~A)8*+WxHO~>vMJEa!RVvrG?@o_toJsJWLNzA*{}F zkap81s#jzf8Br6tHJWaX#;>x~@#LS}N6X*|^DkodyZfteZ=pPOGrQDQ z{2j28#_es@xL!k9Kn`tJ$WC1a=kH6rX+w2JUIE}#W^dS=?0kM)b5FyVz@4T`JDpFJJn2H!C9GK{+o#6F z6KFk7dmHCJSwy>5T%mweC};dCr>bErCrjn#>jB)2vJCgER3EJNY8H;JW84SiK&x4& z824{;?zXmUsf(N~FV5o}6ik{fC|W_T4h6L znlxQL`i`>u6GdH{Evz5m5LoEz4i37GZllGB$le(C!*COnoOpMtKT-}KZb(8|E{LX9 zNxIKbAPG1gyb#b#jl&Dxu@w(vv)>X|rP6OZyS~hA{WwM->2!PFQ)s^gsAO;g-&$(N zmBCl-y$!o=JNOdlx7Q?Ngof$OGP{=WuDG*mK5fx1t!K8;{aN8xvktpv)E$}~nNeEL z_1p`}iNylix=yoDD|chfMf-v1->Sj{^rrf@jIPKe%j0^NT~8)Ij7QS_%F*l)^2q^G zMLM{g3&j0Aa%+;TRr^L8Hh(AT0$KD#dq5{L=N%h0Z(rZ~(Ew{rts&G`a7v$cHjg>T zZ+at^8gPMb!0g9gm{4mD;tC!ig%Yk4Ol3y!sv&xoLvVxjbyWz-JW;dFqVqsA8}~}D z`eY%a?&@o7{!(w$H$M@oi}S;|OuA76`(y}{9M)O#Dr;I@5@GpqD-+7?a5o;9z&&EU zL#U21^2%9W@WQu1VNCcH(?yI`vAUJw+gF+$3vmqI`Z|OmFl2Z_Ues7_iw$$zO3-u; zw@inFo6Cb0m4YHxFVaGh`q8(ta4oJH;uvIAf?N%hmA12{%rDI_Q<7E z#&5TYG8ZN#|^ZmnQc zB9;HW6x9T2<3pI6#7-C9Vv*Ye9sSOZ6~U2D-Ni7*nYy*;pid+5C#Nrx4`sXpQd+kv zpR9WRJ6WXJ`>injI*Qn|1_zynR-1s&QnN_{>a^tYhA>;Z-FX}_$)=a|uBaap&Uz|= z4pDJ?hlViYs72UeEzTw5V@2GAqsJ!oz7(GBUIBOXLaG`P>lur*cDC34 zXN98fjfD0gxz!MteB$xjY^ji!hK$IG$olfbEGU_{>|XZkP;ESf9m7_%i_<>T{n`zr z0}{e4fa3AZsfYvgXDVUX(PQrUuUZ>SNmr~~rO!V(8E+TJcul4oMp$uX7uRe8mQ|r)M-CXt< z|BP`frJSeg8+V`B^p6bdmk1N#ilg&Ze{(R~X-TWn8XkxHz1;})ymT9#>{O!bTu~@M zt$f_=3fZEfcav%;D{U3>+1~ zSuXY54jX}W4q3?447>PbWkhb}T$q(Ny~nNx2_Y0LUwJjPG6rb zjF%11n=QAbHs!vu-`tvPpE_->;IcS0J<7e#YBuv~@|FGM_VjX>cIJJqnApie4C)x% zNqwh>de(I~$0AJg*<1lve_{h^D4Pmc-~{&ov8<67rZFE+g{ma3>{kz&+JdyXDuu=5 z?kI&C`mafrvf@SGWW&^`$ldUmM3)xud_XxR>%Qy8VV7*GcZ73TZ8jA%f=lb<-UPNG z8e$MT8Dp@(8AE8XAj45jFHP0}ilOiF3wn&b5;9h+ntZ9v!BH&b3u)9f8qi8RN;ylSpj(c4VxO&s*XTEa_wu$;`Ohw_>lKYaofmP7B4NDS5lqaFuzQ# zqIT2_a`s7ruz74*mW@WWu3r-ROqD)lxU_^!Pw~M<(=lvq*0LPRieKCK5XAh3w_ngF zVJF7j;Qn3GCtW*lzTmqc=CB8PNWfwN*K5@;7D9E41*~*j%h@5W!2B{V!^n!OPiuUT z-s!sbs*b_U@fT9m5Un+!^!(b$q7EP>ZL+AHum!-Qt}vb&5uwHOwtDVR+g$ESwqUNW zI&K`nv#t3UP9^kTftq{iiaA|Dsj2FW8Pd-QRz%Xx6K>GDVFI!1RsxG{YWK8g8l zln=DHbz7fCZewX{mBpJE%d(kVy6NI}cq#BL4z6PS<}sgAM7=b`D;9^*4`~a36zv|;(sp23y|@hQtOKnCPV6aPfup3a^*O@{&2rDYY-f$;vKF z@TF{#C2eZthuckU>$2#RN0OhUJlZK^9qV(K0H~8;S-)-+h0(4#*(1&o!i=CnhT!TE z+s8lEfr{)bn>}|92(hC=Vusa?qh=&g+p^OeUv4y(y&9K)nLUo-=r%T%RF^IxQ?6KT z>^sXaL+kL9%sQoWVVQ=blxCq7YK5ebjWpBCI7wmu$&YV-nP>sbrBF{&>*TH86`Ys88j>p6GrnH@-gBAN9?!ydjG-qnUDzXCBiQTg<1d zoh}2NSt-I&Vk*D`{DYX*QAiY~wR@<)GFCA*$U$5$I+fv|0qLIPU?%6=^%^D0tVbfh zZs09UeR<VPCPs=?l9**T!mFiH_slX>c`5hSZjjLjzt59P*Xb!v<)<>ooULm)WNRWxVx zeJ2Osax!5&ZjRkeqw$%}KBhB`3L16}U<=;7-rh7n`F34(l4(9e9-LZ&GF+}c}d9=sKo@T4lZdw(+sCv6f(+%4$Hm^clCXgOn(veO!-?ThiCPsI8 z_S$s(rn6;Q9$TWxe!BSz!|~ourI5lafz!-)ResQPNDkI!xg1SH`QddJRSAJrWROPX zoXj~L;(W?gI8N1Uel@k-3?H$VPeUYistK%EGXwiC7<{{Cr|D6c-g`?%go)c>(N?QZ7*8!Vp|Y#Ci@^v}g1lUYjW%;xS<|h8r|RVG0o;lu=PyKr z;aU@{RTuYnH{UjOq7&9-`>N9BOH%6}@_JaNRGDO7!^T6?L^}N0N}sc4C$ccL@ES8> z<#rX#=J;^a#R;pe88_G#;x{_j37cj~8yz>Bt#;IOaab0c&Y&}2-8S~KZ~0Y$LRzNo=5yh! zfFm^1OcIZG$HiV#BWh*~4^5~mc-1^e!+|lv&t&AEr?~By>z3^tC3iU_ z-%x$>l*d0iG)SG9$!ddcZ3O&4fi@|#h2XGJdED-Xj5*ZbW@U~>tHqqeM&Z5d$1tBR z9z>Grnen+6R`}Xa>dV}G2J*}SB#6+)73AY%xc|`<*}($8PUy?b4x5?1c_qmm83(QA z3nsTGRgX5f#;H1xQCWWcrR|ja>`Xh!fx8VfD|uPjY~4exxF#^UPF5PFaoNOmmd9awl0Hr8y{T|$>V9Qrh9~&W`7D=+tM5^ zm^9~1Ts`24kc86`G^Zz0OdPW@LajR3Y;PEy7Tv?m!wz>GH5519HKM6x-t03y{-*03 z!=};`q%nQ9K&oBmC1JXXRlg->tgb%Z9lmU`w+Y|`J&3zc1JwY_)LCSJ^CX@HThS;O z;@A`S+sF+0rw-%pcXKr!cVRH3M9WXZ8M}JQ*UvdDfLa|_DgICP|MHC!1Js^+ulvQvINGinD(6d27}pY=US?&-8AG z#$tonL0{kg5MC)wrr|IaukEavTS4m>Y78&o^+RRXtt76+gbGi;>X-u=nD?)S6P$S< zx3?tc6@8NXqz+e8Ef#+iv288O!tpqScgW?sj*M*0o4s>G)t*R=gHZc=EH!XC!<@_r z4JOOkR04~0#OcPF>n5Fr#5udWzf*3DiuLs`N%Y5fK?|sQE&Pmng^4Mj zG-yRkzfz$E1Ict(4;18eew#jSTA?83I!4-5j{RCX$LL!L^IL7j2`<@QW6mQgVCXbB znKLcQc;pgY_Au4QuYi-QWwKi7gT3bIU){C1aDjm{Fn~DAZ zGquJoZt(&(d9WoiiR5pqu7-*_sv9Y6NF)>*E(O9lUzAEwR&V;i-uJ`O!*-WdpPKXLsqB&9zF6~ z$WFT)&O1*X({xo$lcTD#qL@x0a+FcvLdTV5dOqNNfeg%MxG2f(*w>+Nw1!(VSRu%( zfVvx==1Hz)uodFc8n}I#<&Hzc6fp%C5-tvLY`Yt?2VCj!dW`$*wab!jYoV^Q3sw{d zzhzAD#7LiPN(74=pQnd>%LY-Y%WUJo>=+}yg$P-$Lf|SJZqrFM>x8}hs&< zvcxo8SMt$OPy1AG{Tlk6i(xu{KXt|Otu%kEd-tiX3XC@`=3}a^BNNC8*BRX~48zqS z?>Oh+oaU?T-R_JaCyzsThr_3paO|R3%uQl1%#k2q2w;%83N#IR!l=6BqAW!9krmfF zq;@SrAwB&Jzq-tD0cYkgP2)n#no?&+2#~UV6}FRjX;!s1J3{j`!=dU~05hVos|Zo; zVyHg@Z1tyRIA~3%IFyt9okqUMZGPP_F^NIIr$U+Qd^?5z*s6(3*NG~3`L{3oTEb3=phSRO{EPbTzJQG1?E9bx1UuMr;cD)87g#&U%7c; zz{#9W+!I|%4&Y4%(NYbFxlFsY2&OZkb2>`m3Us;`)B-$r8`myZ0XbI+W8ji!Z@ejt z#)TI)%9b2Z$3OJ$F44j43lW`Vs>Sc%M1)&->~|=QKee4mhL!ie8NaqemkOYd^1#>y z7oaMAbA^M>fA6EW|-i9Xj}0s(G6R+Tl_#3_2A=$9?#CfY}h^Ek2Iy0^)}=eDcqyfTH5G z>+k+m3u|URiACH^QM-T=;W(P90bf_Zfsv^z0h>|9`S6@GiRp1xgj~f85;M<+p?F5e zYa7@gUIK38po5)l(s)259&(OMb94}Q75|ADI z?hMPY9cAvMYl>+d4yM_abFkW*bF&pRW{a&6~3SWj;)QouMeJn$B3 z(7$!18WJBGPh4J{tJ7coxi1G=VL)0ZFM8N_#ksm)a5C>8b|rhfBSEgl(I+#oD6`wh z(*gZQk?r{aXTfw~gZ=PEeM88ZpsGajtzM`VlWeAh>)6LKcqVzBHfa#!q2oE9@0q88q)UwH+6XCSJdC8_Qs;P01brQ$Qu)e?54S$K`eh|+;%SdhV=*~Mt z9M91?1ySH+;onj&O|mxXLUMQl3X}Tv`HVh$`tlA%-||PymKj<|M(r!Wv692&D(+*p4rIr$_Lt*XIH@Sk&Uh2tJcEFRbSb$Zf#){h zI5b9atBEWsUFNVHq*at_dPiiJ`R3J*yQ@nc}{yTmgsSsS*c73}wp+oC3l11bEq{VbrF0gPq8n z_>}d-RSyl*-DOrtwuY}|=-^TkXGUTBNG(a0Ga;;qhH(}pQ*s5x;>+Ue*W>T?P-t8| zqSGAr#kG9MNLVC9tg!HD$#`m=@qs|McGbL7SXAaNag|S2if&$sBF@J;qld!Kj4PE% zlrgn#K+e{{VQv_b*zUC=MqDlEXpT@SXLWE3+V~fj5?PedlIzU4v2txTjcb`j7`7(b z`IEDH0qL1Fql+%=A;2rH$m0MCQ8VO7VP)YOSmI~_tChk_1otf)clU%=1hvXcuOhw_ zrdO44p;*Bd{1dZY_YAfg_a+`K!>T?itNOaSxH>^9_f%$G#{>`apUS-39X)~lV9Y6} z(SM%8+(Y6q%UmaLnG`(4m*tYf>QCk#hFXq6Z|e;<+jjmbn_|6x(k40Qn|S&aAN5vz zwAqULr);X?{z;pxxNqWVRb1}nNS3D>nHp=V3faTmC@0}Tn7MmPc2A30&eUDmp&^e* z#Ws;iXHjIDF58VCV=>QAt>yB>gw&KJ!nEU10XTzL?&67D3n<>;l=RKsyqh3~S^&Ax zDam08h-HbZeCWUd$xz~`(q+0KN%adl#M&^M3R$eZ%e+2vw9v!f$=u5bG2n7QLMVF;9<_7w7# zxdFD4NGB`yX=P61Qa&8=s*G5;DIRCd5-`icK?m1ViQyR?(V=wI;=(2Y1-YDQ7~A9E zXNBaV)Ww4^W{l&$6QiI8UKp;06B2mlj<0&l@_|pNS*^K3VcmF96hJecU>r53o#Y`JFsa`ikguRWcq;NB~%6K6VXHX5xNx}DW3?ZMn!-6%4d9Lrg) z{n+(LmLztlbsXo8n%r z_Jn=E1sP-LN2JU~w%fND4kU-`YiOd$hAl7Y6+vfrBa{U@HS{HN^nXLWtvg>F$D5&r z@e2MgJL&bL+j~u5U?jfMX5pabnE0Ib!xI(!G|N;+c6Kaugk(@+dBIT%i1CJYaC&0Jl8lvd1R^xWLv_fr_ zg4~ljkSUF2^Vm<;t4hUO(u`!`{1Jv_$TgbzGgUaB@;L{qsS+CL8sjA1RHp3%I6t4u zSPUw)H(<2>&xXB8MWGU;*|{g2$;7u#GQAg zdDs(%G`=v!PF>R~tdN+<6FF$UiNa6}GQg?eIpT#guxg8$5%1xNYZ#sgan4E3n1x`QUpxWUm>Xc-CA_vj!)cG>ushq3P z%v=NU8ES=Ck-O_gnu{Jn6O<~=Cor2J!7bYcy{CG2jJJEV!Kw_A=`Y~X2Vz1u>+aEZ zy{1l^3Q9SS7c{E#1TWFBT5CplOApJ`ev$r-rBvW94(3PlzA0kOlcDFM7;=plimW$i z7lzmfg`xiC%d!<=4o@fXCIQCxP@$t+S0?Tv8Eu%4G<0Ok)fX3dG}xXW?DE$lhg;!= zj00g%T%3*|H9rWQSu~|3i#+uMv)Y{tMlRO4*H*8hcsb~zgWZjZ@nRp|mX~2XO+pUW zOREjKX|gB;P?w>syyF#ccFtR#5O=HkGf7OUmdu8+2?#52MJjAe?cO~%?Y7Domm7^S zH{fC=+zYQKz3tUXqXOwurC0esA$OclL7|L*_~6dd@ekl^K)! zigA3-)JmSb^YXVrql7~J|elI22U3SgwdzMz00~~Yhuj}Kz=SWZAjhqYFBd)li-`pFI57afg`+8 zrH#AYpl~ikHu`|m^BF?J@Azg=6Cxk#&VA1l)=(hcXA_XvNm_ z0?8kCwNez?vFUZh}*V9lXn{(JbC0HDm z_ZVNUX>;7$F#ti_zo3S>=EPNtC2-YS)M7Vxii%4!Bz9wIPa8m^wD19GXdIlk>1h}d z<_XnZ=MBhTidCy2rXZ_fW>vi$dKi~DRizq>_E_BM$}OV?OHw3A=`i&%La%B-hiFP$ z4Jj2RLZCDP1YnuhWL2Qb>ODZ&J%v@;LV#^jh^iV;Lek_&O)m~nn_c8twK3>+tizkr zkqCg|B4BB%7>L^;_vdF+qXjb4?bma(8M26d!~10TC&Z`dBoT}(2Sre-z@TBY^kpB3 z;)<$h$S1o_I?!U2T5OvF!~yvFatAf2bF0!p*jDp)A)R+Ef629SZ^ty?*Fegh5BaZFK^lJiyv!|&2%gL=vX z+uSY1iL zoXE`tW}}|mJdfOPASD*9Sx5d0E%U@7Wosy%)X7E)#u}0T@|$ldrFCUiCJ|~arO%Bv z3T||YHV9fb15fGiERfUl%&7I7yTOMLKR3Eit;S;k=HrV+8tNBV6~Rz&X;Y~anbwimG&o&rYVSac#nGLN1xC;;mqlzL z#KL(T7H({s>CtND2sfRKa1^-!rVsZaGqC>L{1VQ%oDMA=xjh2Yrc$ycS%R88*{0!& z-?7f?fdf(r*VC0g8K5RAC+~IWWA&loSdo(@3bRKH&v-0<{!}zBJ2BGs&W;ldKzE03Iofc_=GI_LY6;2PsByEkP1EXa> z&F9cPoOL*v)eROqjJS-kHuh8%YykptU^$y=lk6*+&J|({sD>xq1;x`UpPOd9jb~nW z(lC1}d-)#Ie?ppNcIN2UtoWvLoGxA@Q{Wm-CzU!%OtJPTau9oygp9B#p*%)h2UoAk z;U1zbx9`S%jo^X_b3n_B9h6y|CtPdiD&}7_0=e7xZlt&= znMYP+5xN4TomO8_Lt_6Vm65Yx&mopUi(L4;|ep`i&?!yFo)LPZj?={u_djeG*w#&n=I^P0j9$Q zp3RV_XoMaQV8l|0BCRB+mt)asjlp|EDJjX!ut@!`5xVKIRNKeG zt0?ynCfrP=_!6A%a(>MknZmf3B_N&5hOgkv0@X%VBLcGgQo_(y4UhTKF(eSH^?s2p)uK3p|;GYYC2 zX*)(#HDXOZqN|P_t7vp`m8;f}*0rE(9(kToH>ktKnOM_JtK(P#k@KxA5ZDrG(r0~R zhJ+1Bx_t=W>^VD=`!(@QJWf#P2-Qf(5$XtX(ztzFmb}}VMTjNp-*W}!{(P*;Eo3^U z0ib)i))IU=YUe|=UqRlBUB{{h0yr(qn6N*G^9N{Np3qP$$~-J zUQb$yh6^w#6JB92OWr{Y8<&%j*ZS;@ksvnDjIglu3yvfy*k)TU+ia^~nV-$e8{ad!xGwX5i3CzARifP#2wCxHN_Ln`b z&A@&IU7Epuz*W4#dcsH4nq99!0$KU}IPc$?z3zO{neLr`QgXHPpPH2~jJ0OD)MRbjfcNa-J!4!C4Cj?aL!+tw zMw!qZkRvDAe5WkcweXNm2=&1H`Y>a1zN$G=Aw$y|LcaCDm6l*Cl#b>2+J{4tUFc=s z50KGF6?{v+??}@C+^dp)TAYGYH;W&`x5}xR0qPpR{^cHBJ>yAsYxP4d4QS%35_gL?x)+C04O9c z%`sG&%sg`a@*WZH;j7eJ=Np?V>D#Yq1;H!mJM`S_F^s-PBjoyrk4Q_7;y=Ra^m!Jd zAOU$DvVS21Wnp|tFJ)ieLsD1&LR7$Xw0!*|Bm*oaP&GDgkep5-JX1V}S{i>+xFStU zvJ0hZ(82ifNhPly@noo5HIDK+nR=oi=DMWZypStNOgi66jnr0i!5~A`jXgV~pTuGV z8r8N{!&&w2guG^m8q!OdOF=i@n{=VM0sPiDx*x8mt1$nTa&#z|Pr@)ktSO3Mz}Ia)mfda;7CS7*!{vpFCunmAJ9fU38_^ur)D84V=}gY(~qG? zdDUaR$s!4<8Wah}7+G1S^*)6mbiP%%M9xa2>V4?1@=+vpwh-u~^!l0P;_Wh0m#S|d zD^@2p?rE(30XCCl%iZ9D&;Mb*pEZhSi{m@IFno!JORg&d%?UC7dw^AqKikWOK!v0T z`#s?B7(-D&L{Y1$ z_5Q_?%~$Kaq|2Z;n#s3k%3VL`vyVq*Ye!d;H3P?s?;TqYw&rvvor1l-jSUB-EGINh~ z*YnK}fgCFb$sfE9%21&nO^8Tr7MAR{Apl5;NcY!Hp`{%{(ws)P(^-_U2XwuWLrrHy zA`Xg~q|%EJ7lOW=c#_Xu$dPYyEAEaS8o#%t8`LhMujJNQS3|L$K-y#W6|;{U-SN&hpBfD+Jz+p3^h zS-tuuw-^-Q8Aqe%Rev<{aD&h8Py+yB|0<8WRHADmf?^e1n5Jd-J z^d*%3^f9W*J~KK8t7Ft{z(&6ia!G2|#IjV6^(ub7-OK7&j)#K# z@l<{zMv5Suzvy2q<15(16}}5c--jVK_4_3K7h+-KaX1{IPFWo3N903w^7MzxbsUAy zlX!(fR`eO`AoRyfo?sgGKB-^w0jlt0owYv}VIlYTQ^->IP0_gc zh0A=*zcA~45=GxIEyf%IX82n%J$`fc>K~+bjb89Den)fA41O;WfJ_po{Nsgpjy`@_ zPvk-VJxPWrEeyq(6f#-sl%~QbGSdvHbKwf;50hwm4@qjK7Cwt+U`_Zpev_6?zgg*y zX4Ds+KLEY!->BM)Mq=^Y>*AAdjH20$c44-j^KV%+{g=N)64&HACL2Fb5)6jmVM4|d zhXjA16!rs=#2?|msZaDDAqq;-Q}Ph~m&d>#vPtv@=70R3i_@Eb`d`A}|856g=y8D* zZNd;v{|8pM%C-vsSTAMQFdLF8!5Z<%5QT3j;onEml4eF#XcSBN18f|7IA4R5WV6=t0UpFD za$%+eR{nL*<@6yG9u+4-;p6rll{Nd~`Qe|JS7)b}Ff)Z-@1HMH9zL)a+(H^eirPzR zssmC|rMq->j$$^t^1vFtttojWEp?!+Zb9Nd<@!Jhy=9rbR94)g+ti|}GE(una5^-;^@_V>GNkG9>h`SuM zfL>XW0T+8%J)U)+z0T3mu_>qd9rKO*^_)2<-<$ezC-etkD>bO{TxAW|m^=V1&B^)z zK%}~Sj!X|QRlS3Ch>7c+%tOpv^JpJp>P@`i5OeOxTog!I^TWJGc1N=d0TsPeB$f%W~}V zK3^Z#kl8Pj$1le4wsljh`mR8Dd8;eej7-{3t z9UQXp7#mq%^AR`lwB|!@W>(F|tl&b;2iujsXcDVI`UP=u^E+IU36*{1#HB&g^(Up} z$;&#lwf?(IVUGG{4M#A!g@o7g09gPz_+=*`@4LZFJYPh8{ygb_{1+Z_y%Bt!iXXbg zPm+v$Mh?_~^dHzB@pG}%ypre5qEgyVuuzh}PtC65sEgbjmFKCc0t^JGfx@GBx%r6a zx3zOyT-1FyLgeBw?-J(T0WzN`jn3t4;U z%+GL!veJK|Z!WBAFyg_rb*w7?T3W5n2}BRI*6*J9%SjLseUE zQ9T8{#DH6KPR+~IoXTc=kE&MXKC;v%(CDR{hpFC7+kn@Zz`uvASqf`W4hCUJO~8d8 zbOj9Loq4sOMz>U0VbPn1TqLO@fAFk8c+#jiGu10Sfw^a_OegsNuk=-DF|kX(C3RfE zbw?3ZYfu$j!5Qh$q*GeM6>WX=u(+D6n*+8x&uTddvPI#Y!mWD3w<_O`Hh3q}4CVhm zUsl2cJ&qtiKv+xIFI8tah5G>Ux&zqQ*Ss9LZd5?JvfG}7T4F;;bf8X;hy!Twvx?W` z!A9Ee`Fw?rjeQlBYL=DJa$HUiqV7VauiJHGGpnv0^m>VYp|HoUP?0d}Q&hyg)X;1ye4uKE<)@2tu7Jac{(HG%B)qfKfv{!C??)R@n zJ+N%tx3odc_8dU!4QsaPCF`g0WZTUR#}$aUX3eKk_HEu6EILdf{E9Y13&{j$QE-mO zRM>lg3fx48Fk8`};T*9T^v?CO9n+z0WPJ2pO*v@B!W^YK3aMDBEzG(O%DImg4&`yr zQ6CTAQ1FL19LgW_I%jBWHdpl8*T6e0}`D*Est2(({*h9#}UuvQm9~t4>?5aN5e3 zAvneZ6o(zwaCjo+)e?0X=nC_2I;vS6$ zu?xu4^MzLbnv9Wljl~5Q%bQP|ZSi;YjpQ*Y0B7={Q!5xfW zK!C|aPq7bZE>5CzDtsM!9?fQTGRe__76jo5kxr<0CEVrBro(lIcH$xDhV=QuHttsM zP^%qH0~PX@Dp3`9%>oNDG9lfCtfo(uWRf0Q)9YTjs_p06W=$aQE%`74Win(8=wvWD z=r1}qXhk;oQ;y3A{Mr@|Zv`LW?t>}DSMHIDc-j_q(xdyJ*n4XTx4@sW24rYB)QMtT zY&l!4GldXgXE0d)`XG-pF8m*G7+5gzFJM)M^#ECbNc`J;eE*W;OQ-nlkMFJ!)5iVT z>_ETf&n5EmnWw$(nZ+|fZaG^mf=t5#cqRmvde(N=lW>~v`7X0peibrC_nu%uNN2ttLq? zk6j68*DHKC=$VwZykz$<^Ol$H9%kPDq&L16_X6!cFQJV9`7kJ`niz;eLc$L6IZ!z1 z+IG2lu9`N4*Q4xR7{~nZgv;pMEYM;#Aoo-DHCVNj=+IG2-H`447{#_Z8r$}c^R~Sf@_O;5Y0;~( zglvxNCa$LtHnGI*FiR6V-1gIKVTIenWFKAZt#jZw1=c#Qo*~+6ST!T4kfOJ`hSw-K~V6XgL{2mF!CQk)UGesM$Q6 zB3Br<6_wH=t<*Gbv zsb<@_!g-?L5BlHpjdCo$)$#Q@`75os`m*!;W-u16T^q*JURcg#N_LgsslBaTR{&Vo zl#V%Jtgx*J$g0>@XBNHN3GLvX7WONKYUek2`>_xhk#ml38cSImvYVq^7Pt3wQhIA- z8(K(zd)mLteM^%`<9lS=c^cmdTTZksf8TXFn^zN?EJ`=!#P2mG{#`YP0t?8L2k7&; z(_@qTska3iqj!EV8#^G%HZlGxjMe0ixI5@t&*FV5SelgSsNm2vQV){vFoBf7*Jfq( zkOtA%Kvhnh!jL{X;Hoph^Ytyl-BC1(_C9&0Py;uVh17D3WIPRJMKYX zBERmL_;o8p)$)1?jY}|WV4?^705Y9?J;nTvZO((WYWZ*6}CE^A~2aOqF{2_)z$aLM{E?1x6G-Ty-( zS*90xzNDHDUM8#2BSpp=$w|ob3_9n?pXVH5uO*LOaxNM0f-ryivE;_m9Q+aI02s;p zQaFx9q0Z?wavV|1_32frLmQuV=g|W)7j*!)@l7jvq|_4}JYJAp`_4nVL(T}G=B3=C zBR)ZGMhVbIHj);>bo(8dcjFiyiU2%dOQP7wImkw{QV|HMORseuFP$zr zo4%L}1tlU#G5vaGl}1!!w6X0t6?n!O)5{55J+1?Z{)2|$BtHW^SmGe6SxX~B2vJg7 z7Nt9(g9X@Y09LXWyw>O89JOV~l!-k-%8Pwm{kCy9ER^vX&}l$Ac?d(3B8pUBl~ttk zgZPU8TDf4_p2YM1il$4(R1}|Z5mx%Z{cKwm)l`p52_=h@0SAf^G$|^jUP%Rk95YKR zNaa5FcCBYrC8gq+6#A4k7EpB@#$BdfrfX0k8u%&|R*K(w)UR4Pd!f*zLUsNzmf&*H zndK9@pDu;L`FJg`p2}gsSEUm^1+_+hEgv0Um5%;eZ}d{GP&1689Jt*_E?wKsLsuf6 zl`W_BD8;rWOn!7%Xx#Q>_gqfnUExu&FxDukrV9 zdZ0xzV0ndLUv($R@%*AJ>7nWPSc$?)rc@Ve#?dL~9prCjr&5ItLs-kMHuWL;^dV}G zg)Bop^g5q8K>CzQiVs)Fl@2@)4OK#}aW#tiA2|pQml zZ8~Z~C>dfh_S+-PLmvui&MerLn%5Q6r9x8GGy00KpYeSqmqPs-8)Dz#S5P){$zn<$ zZ?-O9b^J}GaVbB58dcfJ$xG~k(p2Pxc*w?sLI{SV>h6U-{ek9dg#i@T(#2NzXzP8Z z+6HD|F}p}vXiq1is*ZhViBO)v@^tgq^y_Qlh4`9%7SONwFC=7B8n4&wbEQSt9u{Zo z(KmFThdH4ge#9xh#sh=TA*w?XZ&W4*zOT1gkr!qwYFjI!Iu4?Sq#(4meC z+tgIetTvUM5?G;F7)8g{S;hntvRBF%zMMf-t{}UNFoZ}X_HnRIF&Cep;WLpPx6job@!k#4Eh$UUA^M~h^X()kejdp4{eH%6ck*Q;J>=a zNE2W0p|)z^$Z2JDVxwgZ??K2MU7L_IK2?TsfSqJTtss& zM3_YvD|wJD+D1%qE!m|mJV*_A6L_4Im-8VXRF0}M*UmCjok`LQK$FhasHtgysRPtj zHkeuiZzVI;8hJZQOs%0C%1X7yd_y_O1<&DSB-gAzKt9?N7(2^G`+*~sLUxpk_6*IQ zWuiSnw;y?EKfoM77TOPBh8(1%?4FLGM7*Rkgf0l^rIth+_NX^FRP01O+SI)-O*aZ* zl9q!XZc%ajOYA0m)|l`MqMqObhxa<7+VVy`0DDpOb*jBl98XM+AH0o#)|ziy?&)+0 zubS3U+hx5Ny&F-9quo#h{M=@)77vf&wAvD4-D2#%tg;N8e@P6J`;jMm`>mNB| zZmDgYa{Y6UE(vtyPYSz5;hn)<(E6VcF{#f}yqUS!3Xs8l*&G;qInCih`S|B4165RR zfpArYp394uQhhslfwxz6VMZ4;)tinf?Gm{h$tB{qr(bVB^x#K+lr~H9CA`*)IUa0Q!qafA`tfAb!rGHHTO52Su=(Qr<)BGC);`jkY?eBLbIjzVApkYbzjRPwl?A z*9jg?seZ?X4zed3+BqoLcj@JzuBV@J32n3<8B41Q>;lz;`M~CTHGzf#E6*vYxKylhw{4SAeTshab36YdG06kPVH2- zk=VP;b3{9#s;ud%4G#NImZopmrNrlz^N&f-IG4fd?N(F66Q){IrEZ$H1FnwAXA`}o z7M+a`rb?w2R^M)+Wfq=IFngN}IB&RwjTWCbgr(d>^oBs}YEyc9GEC3P519H#p1zV( zRMbr%?btn!G&$jd-%^f5puQzK;Ks>3JMN0xMkk4M$KC_1=@!2OV7D;s{b*W9LALFG zA0f14`o9hjgA0sO9y8&FB5uUlrfNVrM4Nnsk8*xF3nzGG(Z3=eVYYgXx+0XpDV3@E z$Rd?5a;1tSc1c;g!B`%GR7`&mDc_JC>!dl9y<}RGj)QaVESMg45yiX~e@Y4k1%4jS z&=3K%i(6G=;NK%zOd)?k3>)Y zdlJ1+#uqXw(2%~MBO5H6D$+Yxwx^2FIaqc%w>>&r&z9&2eyWrE$OOvSdxfmO%US#A zq#LBaI0H&IC!N2{keqL?%-n}m)?GBS*ETOzj*Q*iefRk% zIRL&Z0DOvmG<%$*k`FTYA}~d~!LTTxv)kH64tVHdzWNeH{=%Z)pYH~k8tgiB-Znq+@1q92GpJ4w}gE3-T$!l_qdMG1Y zkv#J?F3JHMEkUxH%(5yk^(OX@{)s0mYL;%I+Neyo<1h_TTL#V^P+U5+6V>*z{UZ}Q zpOKo!Cr^3na;L6@8k>z#g11J4R3UAL^_3jmPrD51+5@;#UqG8txSl7IhdDUKD++ci zoLhq2Z8#GN8cB%J1Ih}mUeR_%@yZi>kZ?l$@~QbqGO>?RMA2&Xis~t&Z@2Mq6yYd7 z#KT#->wHMdA5$BPS~^_N%?(RT(;PykUdS$QdV%#VRr}66l78L@lW;b5rp?Ms%eKmef}i-G zL!T<2x`y4iBmThD?pr=s9XqXTv5h_(8#!vId^A1kn};p4t=4$6#4c0=BQ^5{oK)eJ zTQAGEfcaMNeH9NyBPRJvmBuQ>2avWVwF7RVdk_xG8|k9FLe28m{fDP**-0!V5iJ*1 zbeT4Bm08s4H6hJ{HxE2fYO`q=LAE9S(&=myk*-;O=^w?>5)njU0@)~BAV%VTx_Fs! zt%FOytG*nr!rAcqWVW7#qv3Kmnyg->8iF8eIn_}aP2A}@11eV?pC}=yK0%VEAbvaxn>C87$WED55ff&O6LLrTn7r|A_>uShU8g`}-tXoexKkQU8%F zHLNP>pcK-Qj?AzWDJ~Y=TtBOt3bt+``dR4&t3yxlbm$3Q_cMVKrCdDqc<}kdMms-A_4*1?ol;`=d`Y#R%j8DfL3|&g2MHrVh+{HOp?jyboy|;m`+9vFD2Pdp|dC* zE}el>E3f&#&p0G-Oruf+2*w>b<8{W9`}-)4=A$U2@Dkzpina9!6!f5@#LbZ430wPu z$T+d!JT&%>LqEx4WxdAe)GRQ!h{=3}aKve}iux3vR)$ClwYcVhi#wo@YMGjRMnT=) z4IvrS1;8mz2NERpq7vM*q)j_{yd6Pc1woKu$wy>}sv)Ab8v&%f$+{cDWB`QCVBDCW z)A5taRhCffd~{pI!8urC#lbmBCau)~zfKOabC!*<7A$bOU)>lRQSt5uZNNR@zG4r` z;bPsj*!(0{13-J)Gp@J?&Sf#Xm z=y_6|^uQB=DU7CBbClZIxQbGm;xC=fturVSkJ8qi)X1cQAmn_F|1D^R4|r;rDGT*S z#cfU^=dJoGGsTy@#MRm zfkB!!-!#Ia5LiB2u2)pBY#EJ%@nm%Yc!vqEiK^4uRNN)X8Tm6GF{Y>Gscls^S9WSJt_U&XA#ec%wIo}=~^0LFS9f&?43gSP%%V#!+W!ps37k0Kp{=l zr3=|WVL)5hgoMm_2)oy*VCU4 zH2DSv>_D^bs=?J&LP*#w+)S>EspMYD)CqU6GRobOCPqa9(_{HF!j>N6pAfiI1#9Rf zdc(a;flYP(+b>9o%w3ad_Ksl*^^) zweA-&!hk|)$kP8f9tXj3GUs~X!SzTKsgh^EJ09h$z@a>Z6o`Z@_8CmRpP}7-?{jl1 zE!)cW+AkZy2h}etfnUg1yMdIu!Vug&k7FpLYZ2>hS#~em!2`b?>H_~EXQ(Ie0m4eX zdhCCMDIJ;)52r@p_R- zw3@(FU|0!YMCj>f$k+8SCtE_FcSzY0o>lVpjU>Jrr~rc@2T}u9Oa(GB%3$BjNeHa~ zKYMYN)S9;1cj6>Rz-OStgSm9MScZ>LRs^BUS%GW7c7N3d5qjE9GY^je;x68l`VgK} zxL2HbG$2$o<#Ml70Fa#|_P2251Rx4OroW}g{c~5KPa!%!wF!U`u+|MwX8?5z@Eu|l ze*p<}_@X6_?`Z3ygSju!VuBlxTz@!?N5%FdguKfWex&?+H3sUsR6OJHAb-P6zKG?P zGGdZ=nqz+AYT*7B5mp3fFn^xJi}?&8#3j$kMAA0Z`G|a2>`2+YjVEv*dmUqrTk@V$ zC`G5dKPB5=$qFmoi7ve^;*yocO~BCc-f9LNdx6IYVb=?U21N1MWW`l-1w8dAOuwT? z27<|0M1?{x&BDXYApRnfjzpt-9*o*t9D_{QeA^d-A8@TRI5vU$J+T4~Q0j&lJqPul zCjSiMIV{mz2z)KoYdt5?Ws#A#Y{$sq$nW)hGD0d3asbG9ce|jxo+V+K<)3VI+({A1 zELL-!ZX3hW^8O_|VUE~3nn$R3l&n4v(Z)1Nu)XQnbi8xp&w?+XP?e`Wno2vQXG#_D z!uA)+Gn(C)?8BS|X%p}b#H-|=35q7;02?O-J9~FCyBU!eAR`I;ZI2=$047gE4g}-< zyU~dE)U{|TnsosMQ~vs z0OELyF;2#lIJ`p(+&?S|qg)}YBvR2U9Dx(oGWn)5 z50C@?;UKsIn{n(I+UyEBmk@IHy_$Mnl1MmxKz{hunq2_b_o)oEj zA6W0kz6VB{Bd(6{KT*6;I8f-^_e4}N%AP2EiH6^to0mg%idCC_@=s|LRX%ls7{)jv z(N@izN;Lj`f)bYcGDu5Wq45?;6gv6Q$r^BpR(_l6YWhn4+JG=j*5ujscm$sx-Qpsh|@>f@?M^HEgC(lwfvE#YLInF>sz= zxj0-L8`nqDnWo}nEoP;XR7$n7o7K;1B2>Fn`7qrSbXY`WN)IJ3t(l7?piv=`ju?G>JMh9v8MvQqKt|S23%c+>Q_)EnBUVE}o*| zc=pmV>a)U?&>+B-Wmehf7H16??GxU29{S&1;J{1%&t;C%1)~lvK4uv&D#( zw6n$g$A%=7!jtWbMYv(_3lvHgHNogM6CN`RruOQF#l%`R5njHW?w*fP8-ajdq8CkT zMAQ<|SghHLa2SUUd5mt+M4^^kFs1g;Y%I33u)jbH?+>WD6d&Q9#Ow|!^EVh=BF_I4 zzojmxlY5LS=_K?xfA=Nq;EI?fG=h{LhE;T`E!Ral&*46a)u+6x);@Zs)zj8e|#9@i1iaBaG1)$^3P z!kxxRW-%m8GP-A0mBY@WDeR!L>?Ct;jlpW+Gd-Xix2JemH@~9MT7Q-j1*p#Iy-sG` z31GC-7PFdrs%4%IyxfW^or=#^WZ7(RjkZHTzAF(nT1hqS$@1*D1rmc?dM+z826Nzg z$u#_Oji~lq%mTC}`u7(N_NCEgA9%*1Dj60Y?PzWbB>aQ%8=Vf z1e^^)*vFoov6$PvZxbXY9vdA%((@Vx+t(z6BIiwZh#@@s;A< z&OKcgBbbe7G`Mxg5qYbq4T+fY46~hj0y+EYi6`xQarjUKI%d@E*Wqk>44?aa9>qNf zL2LuD^gkpysnK7CNsyq1M>NJ~s29PD%!WY;*Xfj$&T}uVp5U3a_=unNE*7IT8>vaH zESiK^M9Kr``JbX!ZUIfhx07#^rD@~lMX+1k{gnq7^KuD?X!+ZS=ZIj)LPlNw8QDbm z5=we+Jbi)n0vU1WOe7ULR99>4A*oRaYnq{#id1V-p7lOu-Qi2=s{wUc!a7|#OjU!B zcB2L%V!eO`i#M$38oXBP=-zX1tJgZ6&lhC9A5+Z{tOjs$wqMDb&>UuPk*x~1z2{%NP$lPPA;7Dk`>^4U0A4JT92pX1brizNx&C~kt0BnF1W zVER95rszLGTJR4`D3zl0H(4b-Z#XK5cqV5HV8D%_q0EfW`j#?{<{A6cT$s1BU@B*& zr9%NKtMp=^P#9x79h^R;uTLW&3C@xZq7DHWzqN_86)u^59xlI=g^eqGe2ZSgVf>I# zv=b0X^ye)n(I|SEBu1EU$Z=Sm)f&-ujBrNHm%*Q5|08WbpQOxIi+Q-656T=0>45`a zcVLuPUl4Ki=|f-$jeKCkrCvU?YgQ?g@*Ch?;OUAt543eKG-Z_R9go!|pPuSv@xt!( z=g5)uj)K{8_4*fGyQxr{4dr-9i=KstJPsXsU*%M`Bpele8DXK@x zKUX$hHluF}KbCXD@>5Xig~@6(duP0N7%0$#%P&V=%J>+egwzdX`^&#-I!lzd;8^^T z%_Z9fd9cM=a@!bDaQE)uTXY|}S72%6y+_N8SG>m94OL{?KdK|YYG z17ZrX>`%BllW;sqmeb)YN<}FBNMN!Cr8)+PeuFtGG0Djutfd4J^_&qi>h*Ym9O~;{ zpYsWexLr?dT&!bW`1`S5cqstA53|+j6saraG(#ks z#T5S$E<==FqhELQYmB&Ks8n+={3D#o6Vvnw?lQ_xOyvn^p1!wUB77r~2=}YO z-w{GQ7IW>r+)BR{6)$He?NHqDe8t&gA!7Z5!qrMDBzY8ZzpoxUQiZr$1mavJ1n)}B zAc_KusP>fu7hvm0IZAEF)nSg4(zx}z-iLv_gKg8E%_|C5oem!oEA&EQau7rweP5#N z|LtTtj)L#YctcP;pHww6RDO6(rz$K2b>G_w6fyt@p6#(emHv&0k!lR2LLkY25CiZ7 zA+XItAneaOC3&_8|eiqc2jKr9{P>8=k83gzU@rV$~hLcr78Gf`H zgGZC?BM3OkQ&oQxCYPO&(Q_%`)g7L>BC7C^cYHvJWm8r5Sz0%E-5Z1sgZyLlaUw7H zQcya>koB&Z-7j&S7e>rQdHO1!6-9g~(?Q{Wgc3rFc_ZI|s-Kh<`@r3b|CPG|$_p;} z#q`)*6%M|)VOF*4bznLpLd-tTen(O|xUlxcWcV=0MK~EHaFG?3nHkL`vZz2hJJr45 z3}C&)s$wL$HM2~>X85XA^O?m0-l`Z0Ekqg`RTLx1uY@X(xL4vVMxsQEe5x~Qhu6(? zp0LrO_R|Ortotmqw8EoeggZW4Po}HM+-u)LlZ;j$a67IcJ+XfT=^Xms#fyblC@zPK z^=$Tf8b92sXu1z`ZQeUJ2soG{vmo+(Jjk|8ZXhu&@JldSGIC}GxD?|OOvUd5bTpHnQ@8>)=#>)(@IKL)v;kwBt740t)n8Ql@`V0%%UigZ~@!4 z6ejnNWko8!q0GLUmg%+AMRTg2y4<8({-xIrU>d%ju4ANIKstCbEbH$8=`FauBG~d! zvnxcwxD^^na6G7G$^mW_#dGEs(L6RCb_m+}qIm|JMPJzF8QDQG&){3q;n=E^Z_u zpUBh&M2l)AZ+Da8F%A z?J;KSLf3d+#3(+P6Dz%zN|u#KOk6A_%noiGnoQkBqk6!ZE`~6I1mhaM$0^;G8Y$W&`n!^r|_IzPwu* zO8XZ{=Q4VZrUJ)SeOR~mgq?BiImR; zU_NRIQ*>jv436|4-lG@cvPsO=3u{UdRHt6(O(S8lb}k*JdUS5yQorcExzB z8sl2VG5HlEvgGjSqwJ^W55d-C>*sg}7M1qlhdtO4+Kt2wuJfLV`v71clB}!2-Iky; zR#fB+=(vmh%#3!9OCN%7CF{aLl3*ax1Hrve9hxfibPJ~+-+z=xuZh21P;CgqeVda^ z>OkgKL*|nS6(`d;;xN^sP6VQqUSOOrXyxjRw^oa9CmdYC27@PZ|B%V@5-v88*|-N6 zKI&v$F1c*DG5}DC2Q>$2(dz^wD2dY=M1)Z#B^lndf>F(7jxAVY*$4sjW5uPuGx|1- zndTbV%G6T8MG43X7e`{Z;k2cUd7liT)!*V2gw!i5&e62w>#2u84VO>8@J`rcsXz}I{L1R+u$W-FNY3^XRCqe!i2Jh*hHhf1qAGF6Zsl57CP@D&&x`hWDHn7VW*S? zePEM}R6Oe@UJ0U-jnQ&ROK$+X`-74#=*YpNP$B)i$T8y1g|eZ97&ww0oPgtIKi-JQ zb3Hf@^jIx{hYRVb~jAXXg&szwOyQ9VA=F|MX)#!RANf+B#xVqhGD z29#i&j(0@{Tf==g8rT}7{S!b{u_cSw;2tYnI5jSW!k+9p(?``+Xnb_(~b=h4{x8u%67^ z&tKs`^r;~Vfh$wGt4{(FZYGKsCJjtVAHyosLfno488JQXj6qht9>EI-$3v|YfM0j* zI)tlsaH6)lw@(dq`QxDNzVaz^W|CW(zG!ug&%h z`5iLvBdu$O$VG%}Qm7b()}j2K6O*isx`*<580vB{93zB_US|XR)3qwur!bqFow^%l zv%^C$okFzOVsds<;60d}Cp^W6mXgI_6WUj0en4BaIr0DzMmkXw^qU{71D8k1E@#ymdigleYP)m3&ZK9a~ zQ!TZTx1ol%n|VN7h69nt>q4b;oKP#7DbU(>z+qf)-a?W_@Zk;Y%)q&1# zwZ#}1miy7s950=vjL`Bmr3a4lA^n`NbND=2OcHDHmfYRB? z*g@U_gdTg4FVJ{L!v00F$xYIoajR3LJ*ca!IEx`YsH_a%Kf~SnjbpLd!hXrgg3~l# z$C{+{>C`4+J?^#B!P)i0_5Bra{rMQ!P-z27tlIz+@)sa1BBV9>=MhjQ)GiZ0$TV;AEzVJ9%z09-y*awH=6v)SGJhN zO5ZFNVo*nUYuh=Zx4<5d%EMtXY(@{BNqhhE^6KpL5(UB6cVRzF@xWyD|4?Osx6i`X zrB527Z1C-Z`!ry~(HkL>l7SgZ7TT2ao*BUT6d&>PLaeq@qq%@$>pTX?gPT|A+*w3R z9fjgF=bD|!dkAxWbQX4kJ97H#l zpK#}Kiu6_=Bp@O<&5an125YK1jb#fvH2Bt`NwMUeAZ>O%n{!_tYQ}<4j-)7$(BwyLjK-lTIN)7_MlPf8Xb&K5X^vrF$R@0Rp1eaEaKk_2<~j1deg7 zg?c^QFNw5^I!9*F1Nz;#QKH!$s}E)$5ohyu zqK%j}9!kP_Q5~OoWZQ5R8>g_7Ra|JB*9jC)9M*G;ZJp9<)4Hy2RBQSZIl&M;Q@mK_ zY*~H@6L>Nzk@cws(&X~`8J_t7=CQOwiiJRX?Y@6?_jLXj+W15;4ZlvKKiAM*3{Xng zm6n360uW?IlfFkPDTm|d+_iFo!1Hx#5@o&jKf=n>htC94DRQ>Ym6KPa(@(<_|Tx)Y0{m6L4s$+Yolj&x2vJ8)e%1~eb@KtwNM z8kWtM_f$q(`G+`B8xiAl*B8P(aQ)}fyl;Vf7`{yjrUg-SMM^if z>SR&tI6R+Hug}rx^x+zJ(&#m~$?Z|e`D~@qjBH@l`NXr<0ij4T$N4lvIY`XWC&W0Q zR$K>VRDHoIEgaB*D5H&<=#(+zg@dc|*0NU=oR6%`dlZ47T$~kk5ClQpASOg=$nhsA z^H2~HN)VID=$jGq#*9my88oDi8`OOwyU(K-;1Ai_RdfB2LFcKXRpZoow<*N$bE93 zdQ#BOf7y5Qmwg(akyArFL(3t9k`&8@0>b7cgIV%NL90$B0(OwG?xiXc@Wr6gpY{Df z8Tq&Z*n)+ai%8|n<3q$`AU#}CB}yw0z+e8?<_ZM6Eq0&D@Fj~ScYjq00ZL*5d>GGP5h`f-rgkF%q6houj`pg5gzJQNkQ ze>Wsm|>oD+rLVlK1Oks>Fa+Y z73$DtFP17=kMby6I6@#j6}=ryi?zV_TcgE#zWRF@PspmpWgwI%rS$3&TVbKreuGK7 zsN)J_^-&8pEU*O>5yaeL^sBfsYtQQ+=PZfLK~ykF8fq&Svy}l$e%qWs(rbO4Q;~W! zkl+9aj)g*|7p3Z^L)oZ4w%)LcMCZHeMr8KDVd^1186NTl&Z%-9sZymnYb$TLS1vFA z$eJI-%+chAnFZl(V$05{MLQYr$x7=(`Jc?67vI2Eun0o|(7tAy!OT_HVdkl)tH5G9o-x z`5Q5-<(8~HY^G{b@V5LyTfB$${_Gnp`acVpH5^&?>19hT$x3VtW)GuLE+em}lLK8UUYGsh2Jb6Jf-0z8G9X9xs@`~ z%KA{_sN!TIwa3W1mMn+h7#+#ED-|@9qtNeP!B4Dn%Z#|;%d?cJ5RKL=#4zyfCg3W+vdd$dz5{lipih6k_p%AR1QgZ+2gbU+XbV7>IBkwa;hvDe>?<)yy&VcZ| zc@&M4|6MN-_s&;r1_?`L*`p_u~?38zsS5Yu!25<4QZlhOy_oBle*ISfJ=toL*t z{hlh`j3(`fx63p{vC3>dpVKL%zqAkE1wMiJLEYyNUh6`Di|K zifYt8R~OBWd$0=+G-#GAKO>*zF&hp+uC5IQ%n<5a_B zjw07Lx1cv|;mYV5*OBkz;*@e|!ztLar@6{I%$^Z^PTI(5iXmZ@o8J_krl}kV|tNAj6-0s|Y=!p`e>q(9pnsB(SCPp;o zBYXxDBTdnUP-Qm>JsDEBo3h7K>Zah$`O9{*c6rvz;p}cae5*Nqp0j`dM)S3hMXjUW zZzA+@m{SG_=dC~le!2r?*&MSAsrNl+npy zlnda@DNai9J=Wz6%2+M5EyI#kpUWE3Vc@(e@(bE}5p*FGplm9^o>oXtIXlu0DtD?A z<;${UJgmXR;PpsWi2fmqJPwq!uyU%c`?(cbC)RwNdqsD9yxjRWVVgh*%+^rhEsnA3wi3(E4BW7~v*AyNF^dpN>e2(2a&< zvzuakD;jULQ$Xanxga^XaM<#%p^a72UlSNo+RIJJb4OB{#IJd=dHP%#rF2CRG{UkXEVC3>SjmKw*0e3x+F-_Z>V%i2vKY%&X(!oL@ZScz>iM|k=>TrGz740XuD$oioweNL8FPVMIOo z$h1h%!#Ns<%T=t~Kayv#1;c%nl|JW>mZ&5ZIUz;&Oh7Q8M4*16if1hAvBMFC);Z`Q z$YrztJaFk?LD_f4UYAw=%zYOZ|!faz058tGM|t-DeO(LUQ&XsYK- zY2ortQglxWGz-mD)%{k|Sr2(S&^CtC9s&TltR6=^B9q2+@DWuDVyr5QO|m6fzSea&0rjEfsmg2FNi(N<J{L`X}Ec_}G%>FP^Q-VPRAzk6@T*98`@@JK;r&;*$cJ7Wzi;8< z_%|0W_PGA{DO~Ii0qs_kXnDuN#s0AHs|XkS!^nSy3yv9e$94AJw{X$> z&4r6SuK#@s7yCoNyB01EfQDZ|xY#3F-m!48KP>zz!o~hD@?YViLby12-@?VoZ!TQy zasBU8xY!>8-nDRX05tpx!o?oZ@{Waz{bAu(5ia(J5kt7B3Q6f@HDaie1!^wWi+*vv zrx|s_`F@r-7{`))1j8^=J`SV^^(FV#nC?5MH2V}EG)WrU3Vp~Mg}Dok0vN6mxK;Ol|PWe)qC^Igw+R`#mq+0AGDNo;5( z3-ga8Bi=$7de%#aHgxmJ3gz{Ym(^!Z<=L&CNwk4@P?G({nep``LhoojpZt3rQCUmr z2KpHZZ}IEd`W{7uQ7I7Jfb$)TrCEAa3Yj!9yuq(5k82Y=_BQvJjC5sRw9Zaeo{!?F z>)BlO`Q|EsC(Cv6huQZxkQ; zElH{EcoE^yW1K$d>+9hp<`+TP{9>-JI(Pe% z%ts7@;;7U@okH${uAHXe3LRgW@N=yn9Sym?&LlY>pq%Scwdm#oi|CqG+%U<&?4Ysw z=v%(b=Fw&JJwh9m;fm4F8B-fIT0~B8=i;(K&?Z_v>ppv(qoZSh+Y)dUwmmAstI9{C zrtKA)xbbRo6jSUje(p0XK+ElPyHHpcUBJxx7=9R{(c5FVqHn6UIBe5%vc}_33;u zW;{{}(I7y#N9u`7oJ8S7m1{cSq6n1nB8(%(hLk{kHj9m_%~oecXW8Z`qJXQk!f^lk z6QlU~Hbh%JTD;vc%7?s89nF_Z?>I!I(ic=^4pU^|l|pAxW4E`1%{vLh;dsoxE8WCwBq(kNIoH=%nT?h&{(NOWAH2D{WV^(sqa(aI_4@Z(PGD zO&fS!k;?k>)#7YD`WCJFd}rc>(xcaSX7E>dWt}tuepgUyaecXyo|uk*9-Q^41j2`S zF}s?M1!;+3%4&?PPF#-U&^tA7pDME==RQv>jr-^2)!FGKPU`wD?1v&O@pa9&)&B!a zVHDf0_aX`GSN=&uj@H=VC zlx*_k!vxo?TV|_z!6~|3qN2lsZfr&gq;6`nork$Xw00o^2j`A(n)6$?{LAQmbu)Q* zM8lbULn>aA=L1R8*Z)NPPzfv|N;;&NDh%XQU{XCMuUs*mbLg9LHA+#OQfAzI%WD+JDMit_$0Xds{+itd2x$6DYJ6}6A~H3iKIjJpjdVSu!3RgBVo&EiJyRVZ@uPU%V7+!^kTf8MrmJUG5f;b>$a+j_n~eaM$!CH3d~W09dN-SGv-|0fbaNYl6xFp zZ;xu^)}Phu442DjK2BFpcHN-sAN$mCXGqO`0!^KBQoeYuUOf%cNbe^$Tk%XRy-E7j#i{z#R$680{<2g1bbWX(9|$uZu!ZM9*|#X<@kKl+=T- zQx-u6AH#qcA%q5-@V%NBZ7!~)(b~540Yg5jSQUR++yF%2f@y+rDz^Kn{F47va>;)w z=eJ&y%l0?MnZf=k&bA(pddK)eW0~@+-AQL?4G!LaRZePtb0+^^1R8XQlPXbmnRo=j zz#$ats8=08?cgaC>JPC(V#mQiIV1r8!4E`Bp0w6|bm6Ov3j68Shv8~Cy_gIi=8I%C z8J$B6TSFX4gASTm7Fu8D6eP2Zkg_t5Bu8emRl#dqupynTPTjomF)9i7t z``2(jo<AfAcTE**QffQ#hF|^K&0t zz<)so1gi_f>68i$A)X8OF-8Qy(J*#Vve(VW`lhqnt8^GW(xEvCf#U@$}PfSLaKnf8Xsg^G>v0IjBTQY zE$Kj^mUJZ2@tCSub273-TA?J+P?E`+E6IeU1D#|7F<}kDgo8k+?F`hGQ!fn$zZ2-9 zkk$g7^+|nbQrM6H1|v~G1_cT_COs~fK2x=_i;|TIqi+wE(FjRzqqFsFdA*8H(*u}e z7G_v8;{1;;BB<|+S3F-cb8mZ6H%Y1N#8widWn{IV%w+^4|}%o(-XEj^W7#% z43%#O-tBm7d>ta{oTO&3X2_wn{5>E8z&vHzuN z)Yc@q9NtA!sdcv*jFvwd&d;Lj1-DMTic^P68Zzu;+ZMw87Ia|5$Nw`~J@)65)qqkJ zI0Hjpm1;wjd_jo(SAG?3B~0+nEn@#DK{6M-U;kcDVmJ$;)k_r3)4SImP)%bIH%VL^ z=YJ$VQ2QB)4c)Y(X|y}eDh2#|t?O%*e94Ifq){4q1s}viQ*bd`4(Mb3!1Fq)2JI7U& z0C44)(;w*%`|s4}`tN2NSxp;7B4==9suT(a%`t7IXpk2yydETlXvJ68XA}oO0^)G< zed%@cwBK`Sjx*kLCjgdw(dDh}bv$gmnio=S)t8q&XE@pf*N&b%F z$#n5>eVqYdU$-(OU$@O*63OkKVLXR0cNGV5ym0H0mOxTOuqPok*YWUSw$%>l0funz zCQ1%#wiFQ~{~9K0v)mYVo;v3l!+|o6nMv2Kza5mGV@7+M`H)jN#%{C=zR$w^S7ei~ z;UvSj7Imv^>l^bQ@}^racqwPgEUM& zVxmbchd(on+8X#1!{{NX1JNA+iQq256)DSuO61K`{zjl`q*gn*pD+T*F3yTGn{ES% z<2#){;LvPw@>{}1q#Dnk(Hz&`i0TTF8J+@yzy)N~3Km;S7zN|VFLc)-bE;{B;Zv!r zC}276qKaL!l%`7doj1jZoFpn;qZhp%s>FO4eiL@TdW@0@N@5f_HI~+f^=G6U1e$T{ zti}Tt?`hv70y8aPMCMVhHIy@tGFJ6>cEuqoH?&d0IwB+3N8C73>>VT2;S1!>&lKn< zo}W8-&`~?B$L^8-oHT0`d_&ch*yS0qPMP2RvXD||my_9KMe~ShAAHO^y5}}gY=Lvz zgr1%@?rPNx>Gf4X75>qe1wn8YN5gOXhRr#4EvQ;lKWjGm4+PvpAgyfpIKw3;rKIXl1;Kg7;H@Kja9kBIyu(bZO{II9$2%d6+`cfSqX^B4 z3MZkopK}o#S5SH6v@}+QftF3uZQ%pwkCW*bVmF23!m>%vFd~AxyPM50xsO^6X#CQ% zu*=K-k*bF?K%vJKyaod@To;$dq$rZBhdXAnh*7F6gjW~k_xReOVuHke`%kEBm5(v7 zd2QK3kX{3NVm~b655&u2xj^B9x_oNk(#MR)4`75il^ndRD*d6n*vZCQq!f?d32s@s zQW%We#sAXcf+x@()RYXgyKDMLvN?*F=%%$5dV&fTf}D#vO&8NORzr2>RIW)*SUO^5 zj{%vkz=iBmYLMJlag-Gr)MXZ>Ed{L=EESmIB3#TXg9kLx6)LavdcEpQ=nYAS>M>^=4Vn>bud3te|!A z3OYB@L-bv)${cY!km>oUZ4Q;m%8^sW!ChokgI#)#_dH)ooMH~#ta!P`=wgiPIC`Ee z*2ym8w3boyjd;}_2N4%>Lu^8$4z51bP(xkV@CX}=WWwaM9YQ@Au+t%oN`@zAla*B0 zHA2BQ3h54(P={IyjHEHG6i3*GQi`KArjw0BY^IXJ5VdKf@TS+0LL37W>7#qiavuG_uC%JDM%l0ZMb;{$eE`bFUt_5@&*hRUXb4Mx|4r>GE#kCcH`fLIMye3G)_tmLs1EyY^|L0}Y9!YVr7vQj-6wl-Jrpwl^#A0dsM z%Rz2lMX9}qWs-cL0OXWH4MfEw?_U{%jC25oDMS+J}N?zhe88&09f`px$6c; z+>i3EokfUTEvJ}0Bee{Lxz8OKttV+YB1f&rOO?jVgVl~;bPQS=Wx3S^xoArn{Dg$U zV9F6s3G7OLBBB%JcK0@c&raqT6@3>m`)dV&OJs=NE`-KCpNQ3CGk7Qq>ul3gf7W}j;ivs*MmU+ zdH8)YThG2ktA8eAgj8ZQoW#y~G@T+|cN9-pe!)R}BAY)p`svA#+lwLNPZBw63PJT* zY5~M9YJ$$;gJkAs2KB04g7E?luz0~03rbI*&8v!tP_uaLej+t%`|}SIc6^Swt`zEkBfTkiGT;3H$1s=K+$sa%CKtM0I@?AZ84IWQ z092AV*rZRAIFI#f$E;8Vw}#;MN)VtK1mIx=3mFF{?6OocpWR_HWJ)Xlm&~YE+CCI| zvy~PWOZK~Da*w!+#Y;tng58fmIb(8`qtK>Zbx;w8vRo{~$H*)Q;Jz{UL5jWbru42v z&4uD!X^<^SKQKbqM(F}Kq`Qz3CGDxo4-GMEfU4-OgIw_KH<*a}Nr)PBEk71@*C?Ls z6&Z%_!;$wehwmwgM(|ZUqw^6gw}^gnl{zMLe7GZFuN`WpZ|K|^72U7rY|n)BLVxyi zS*3>pM9Y5KIls9m=Yb%>mLrz~)@K#;@uM-5!UMs~IiPWm?}9|3ES&154_|VQgV(u^{74pF_Pa`#vjRzf$*Z6 z`&k}D@_}h?pi(CJ{SH$47$$?e$Vd*G@g<7brSPY1HFjLavuha;jqe&Ph0lk@x6uY7 z1GNg(ojj^xKmRa^relR@|2-hO#v*<&ntORP8wL%9Y52E)ev+=YjvtKdwjlcvA&n8* zDxpN6Sk4uq+wTFD-*h7mEQk7?hBo&69b(8+dcBC($T6 z({LZFL~JEO=H_#}(BYnjf!KXEKdxKi5MPw*?IZ=lg%B5v9mJp~QJ`#1)pK4(!~3Rl z`hE%$<3;a;uIj;2V~jQqDMFa4{XB;~83L}3=OG&u0(Y}2jA``(vMBX05Rikgp4LtD zf{3h0^$_L=De8kaL-QbcmCO|g=%E;0xGgbio6AVzpJ?lPM^(J=nVDEY3RKX7S`xv| zftaxH5r#2myHOf!shY_`+rn!!SP-)D0%p$@$lEA$(BbLtE&Y7RbY zXb#>NG=;6UaAYH>gobRs>0)!l+|wrVVKQC3gv%j3MezJ_L?Sd^*F~7)6AgLmrk>e6 zHJxs&V|MQ9s^+?-U9(Yik_VEGjM9Bl-R~SFE0wXb%0VhFM@&npAh6r|mHdMRmfo$^ ztHXuPyRs>$VGwpC%v0hEda6WJAX3B`HFf_;wvLi|_-$Njg%Y4`Q!-I~QQ=fP;)y;? zhYtxZXT%bC+_Kz&Qw3#7J()f%K0X@QFE9CmlMhicMqQWfu~ei(?_p%bzAWNzD%V-t z!o6ZCHogwp60dwoRl_L4O0FuiSp zsq{tQ7-!6v1%lDdN!c}FS3SW?^IFOJ0Ro5azMv#`&o>t;gBf@+hoQeuWXR7$WNm($ z!y_ecYYAtPq?^U&x@+)}0{U+#F$^x~!3?F{Bq$UW@TYwc5|C#g%D+G|HM|N--7&nU zKS1+M4S!q81T3(LvXeA+`X);e5U?T45<*mrUi$Sli^-i3tOAqcH{?p#NJ|=pcfyZ& z>>3ZC%qzH}A$0O7+!nlzeas0wagH0Vc}37bj~0{cz4T5l7mIJ}B@y}g5OmFS0WA1O zg~DEhe%_PMTx~C{_!V5v+D9kd>=z$cNLU^Yjn+i^gcZd=GH!h==b6AuwFSCPM4>QF zBVn=Wt{W6eV2e#TxYgKvwCyNFk%Z3fg_k zRk*vj1N;vL?j0m`k{)QLAvt3QAtGphb?Jznte_)xQR^?K#_f~fb7)^Sk1U4JF^g0OMLOq1(6mgaV2<54u`FOWkn5WN*=RzMpP zmO9!kEyeZcFnmqMU+0q%6n;|rxmHPko(V1~Fb=`avD7D8e4(kd2y&Ec{4FBz*n*-* z7KA{`89~2b0R7Hv_f;;_8${zXKQ5WM)5T1Pauautvn)HXF*T#=1xWgw^T=m8YdLxcH<2L*kb|K zv<^B6qX4floH{7l1}0D`ym?54>eGApQf){ohSmCudc0CrM8ei4!3{5R%d&TYpJYQ?w~Y zM#jU&uv6nLR(?0dzJG!UBKN+A&tOCu5hJ0RX;|0Dxb(W$qN;s8{}LplA^b5oTcC3M z+&tZ^w5rnju%1p+0wEO*+O;u7BK5?1$)tCid|k1%V>J^UUB@RUEwD$>)`@o%$j4NNa!c zCU)$K)|k{$1}p_p5O-iJ?Bj-pP?{49bx4|_3?EU!6{QQCIx`(+FZ4TK=SZ(uOrIn5 zkWt%C_>xl{zPZb)&VE6jfRfL(jpWVYK$T%-OBaTw3=Caa__;@Tw;K|Oxj;RTb4Fz7 z^O0ICWAx`=XLm7#o*^&#)gceZYLFz zzbadUcMK6aicyFyGe{7P??VUWaz3P%z8{dazxK*942WiJMGne6Qf|+o^HYuGeCB55 z(YGX*N^LTkx4Izxuix)JjQc^~D|$54ozst=DKEYBdW9>n~? z;}CKAC>SB|7tF4Ez^E=8p*mJlbqLAE5|1)P5y<4iL6h_-+1xqUw8v#002ua*gpMhgrZTsepB#9IM=DjDR4=_sCLv~lQq>5or}BqIB6{C?IDf`>4a{E)MVRS6rt{= z>=&V=5qLe&H>7&ME<&{WtLGUpAq6+7gmm*(aX3)n*kvnra3GDc+|-5mbc5qtE-&e| zxYaqJ)-8fpFr~AQ+67kQHJ~sn2VURP0(+F%{vlq>uBPL&HOfG)Am3x@gTG9%w%`?1 zJh^NatUfM$P84?~$5US9adX=PGTOSl0vb4EzRSs7jQYHi47!5R z<_+jK-8{%ObI{Mtr>VdV2KrZ1=J;lCy9tDOem!OW`<58~UWMWB3d8if3d7&G$MBoM zT?@l?8L_fd73&c+7r|)XM3{gMPO^SMCfh-Rkg}9yPJvj)?8bdW(!>2B99%X%BzaAe z&+(fs$-TE&OTUiHpat=x%=(JDTL^TeEU%y_ITUTMmhp-UXZUx+WHJh2kA@egKjP2o zsK-i-DcCl^dAYCnxv`+S#Ok{jCV_r?DI_GyUe!FC#1x7lHRY6|e3u>*Wv9yF9Co$> z&THD<%5g4MIl+AGI7)Sz-yBTd&th$qT)eL$b~O5!-%lqa?ly1OXs`_+wT7X-b_9)@ z!RuwmX)lv8YptMpF-&QHBg|0+dZ=lls-OpIPr8YeA%|?BWXmyn5I~7!;bbwn#dgx= z4;B5f0z>G`2heH%o)wb9E=Kl@YX7_K4748hyAiqu*o78V+b5JbBcGD#!`Jh{4u|>` z9AnFwsa!RbKcnC%okXHs`G%7-^9+;cui%Q$J4-zuQ8-k;_wV6!9qAww1<=jY6WjEy z# zY=vQ*JaYdagKZVf`*T_#$_}%MhF6&SuqD8{YMiby4#H|?XsJyrs5h1cu26t$ex?MV z2hqoiSgtJI6Wq`R3xH`NAU~?IiX*HE5v^RRe{%myVaqzh$#r2_z8vN8y!6{I=qz^j z3Id`&NmuiK;>G;o92rRkm!yR-CneqGs3vu!SSay5)uNo^!cCuYO5w_b+=9SE@@J7e zGcCk6X_LgtVZn;v3*kQ)tHqu&ZRMw2My4`k(IRde10cy@1#`wz%Xg@Nc$!rdB1`>( z#A8c-8`%a@FQVEe&A}}!cnFTljxIV&xba;>f*-lu5Vb7+L`P2~j#Kd=XT)$zY1ztI=cs?k?!i z*%FUoOTN40#8i|y@k9BMkTkV!i)j3o1p4B%3hkub4xLP#K0L|p0v8wJgOm$B&T|_Y zCG(pBMqo(MIMhS_H$8%&1_Z%{ll%Vv4TlNxKa4KrXnFxYGtP2wZ!w6pDNkY#?3Vo5 zP*2vVqJ%;E7?hyx=$u{cuckDIlQ@FkIhv0mz@b{5)5Ayzdx?>%Oz-rs1qE&#N>8yr#i`D%FOH@{mZqG;^&afZ`KV zUib8RfVu&w#r2)%_1k*h^r{`K@2IWV&6g4oAkA4Y+;Q@gb5bk|!3cn56piUfsuy#0 zu>}F06cAZZ>HN9rZ|b8U$H~AI3Tl@tFu>$*8~{T>j?^p?XF>wR%n*U@aQ+%D?m6&= zm8Vpxg;Z6Xu&$nm>4ZrI!G5Xb&pfqosXK}1(b>5FpS&l5hkE;7678ELZITkDn(vIY zvKw3WoiGgsqcNkIv1e@wNs3$_=9?C{~|qr z;MRcsfbcQHIy5=age*0L&GW#_4C_Ry^CJ>iiTuFYBC2}UV28lRjs=-&2a<@9yje41 z1wM0cBM1kF*??#jqybDGI6@YS93kddHBLN`11LEa2uhleyg79$IXuj_QM=*!4(*o< zQ}!G{P$5@zsenU^XS?zq&Olv-4?`gTK@1L?jXB1nKrLRd#?2p|${MjVSQ!!$(asap zBv8jS$AR1m6bhi89tboJ@?_aU3@0+vj}unO3K9Y~Fl$uKExb@`#~Kd4VhKf758hS4 zVytO_?030{dIU-efSPQq)fdzO!f=`~NsuOk#IypbAn0UoAo7qO0AB#MV5`J>bUHB% z@F(kuHpH3^(hNuwWNrDRl)w>zI*t^yMo>dcd6@aR)G(H7OrV|07Ew zS9TCDjk#1)m_|sA4LCC{P=7jwL)SgTF}k~hhU_JvPH}@{uO2rJaJ;(@9K^z5P&FeO z#T$MR1V};lG%81?u{*+Z=;AmM;0S`J+LI+m=)e`&!bJhTx3XAOHDNFCAvJ(&AgIb~sArES z*qd5(C{$f;F#w|qz!n*mBOv(#FX4Kj9Wg(^ZG&V^BB;Tiz#rkB<~WF>JYJna4FtDypm@3$+eggWy!_3{0vog1VmieP=dB2zG=o%t8y6Nq90|pi zJ)Irlm^0QR9~C5<7_vjU$}g_H*@IC*y-IHqerXU9D46IYA|Ai4L;BnDO7I?xtIC!I z@bEyG`SoCMcq4Qjn`^oqAtBvF=XKe#Fw`zw4V!V?#|GG`aJ!BH;8P!d%L8}`eCId? zI)3J)fq>0=@E6Q(|3zS4-_K33rOh9c*Fh%%>%`u;ywoQPAUyAq4XlPTcOV9v;Sh*bw|fuLibhMiBIVD9E099aQwy%LX9iZh2WVE3{ibXWnr zICbwo|Cc9?!n5rI`2-K!2LcKnwhtr}JS@M666n^!27o9Cwrj^*;7ehS zZo+rs0?QLIACb!dB+`SU%<>+oh!ra2;vECrG6+Hi@-Lj${TLKmc1K zh~2_}yN)Cv2Cik;MBz5-c~$jbAdos`UssfXaRXX#T}27NvWuhX&f_Oh*BRgNzLNNSd|ft6>Ng%NOq!_gRmJOGzDAka;P z|08eM0Hg+V5;%AeoOY}v5@6W@kKk3oy(xn?OFFo7>bgtZQgTGAV?0r}v*?1L0Vt z1@$B0Yb72XqYH)qoyCP91O>~W08eno#|l~dPide63w;)YLLlRy7f3H>39{ zDg=CsVm7gDjdGUU!|!?md90U$z`J0A?ciSQro1TIFz;*l6gQy)DhNd097 zGoU7X_kfsDfC=G?6%J4G=VAV@_yLOnB~L*t85tJyoyc)N68Y~5r)I=1po!;*G&An~ z3m&oX(Gq-Noe>xtaH~Z2A(KFi4C{+32NsCIfPaMP4m6d%qBj?NbdgigUnQry!4#bM zf0IP*22;)d6-1%G>yYY(q@dEu-?aD;ORqElcT`qBJ`C_+qYqBiZpZ}!-~T3Zbz+Kz z445>!BNGnO=hc|FjwLv6!QUz`RsMJ6rN&>%1Hd)Qt1DywXCyl)zVkQjD@X%TWm#?~ zSf>Wb%#qkg*gEX|%2Kk~u|HiDE0*=E=|^PPn%Kd?Dj-cil?f+K7ia<_et_;_VF4^e z-RunzRRDyeMfC;_>^-#$1Bog_kpLh;R(-cq;^9_J#7AIG^64lu$>SdA77&Zh7zBbX z+1KwMgMh${V0RH>Pj!3l@ZjFzq7h;u^ET#qYlN+|5JBD(0tp#k2t+irg>~ahK-g&i zio1!e7qDEZe~G=@dH8C;7$H6YI53;fgCx{46vhD^(0a1ws6e2eb9KkSXsprMki!hZ z(BPJqJ(-RWrs4*Jh8#~@4t_%5{yRP)NF6yn9Hcnir{ld57wxE`|QE0fR_*OYi*5A}D%}^tymQY&kTs3L^dW~L zsPG(pU3E!I5QfgWChglZkqdoRCS z0_q^cKbzKkw~0l&x09o6<@)u z!*wV47vc{dO@Z2Bu;K>p&bmwh@X24v?0{Vu9LWOn2v39t55F6NyTreP*&)_#I80<~ zIvGR;hxNrd!!}+x{=U95URd`!`XCGmq{D(4uMg}7u2TzZ3hFhPgJg89O<+jn6Fou| zXJLBLzvA`6fs{nR(6I&yI6z>xJ&ko{@rfW9bHoM8QVbAR>3@nP7g=%Oil0Jbz+p=) z;i3m&_#FF|AXQ|i3e$IiJM5hS6c^TG-M(i^EEd_GB(T)haLS6%Kq3`6Xn54Azee%X zgll%vd=Q-3`0{Cfe+{Yv*z&KvZNzni?I71Ge`lN#*Oup#q}-=*SDE@(gz9v7uiYcs=?#M1a8NPiX^e`Z01r%WFckm}6=6YrQ1Rw+!Sn`SE-t1*t~i z174}Yx&6A2T@@n!1;Rr^S8%6)fV%e|M%}x!?tRba3p)J+)JcCBb<#gU9dtrPo%bl| z4Xf}%LS+)Z<{U(YEX=>7hhVYD{RJnM2yqo7mF`%VKf*RYr0PKBIX-$rHhx&BgzM6Q zO*&V0LZ|;Is!KTSKq4OpP%RW(gR1jT7D)n)CxE&-oII(*2#i&O1NZ$%pcE4i1o)`# z1+BXSi$?Zt`*N69=zJhp(cqa1%w|2A%5Q?~UGS7(G*L+-q#G(#z^x$l&l5%{)ZWoX zC|FeT2=9qXAyp2~aQ@vPqI$(U!ol{AM#ABu5~-W6s8oV@r~}{i{ej)Z4nps^K7>K` zjyi(TM5T<7ZfI12R1LsQH^>3bv2l9BF+xt?F0jy$?%LtE(FHGRV6_z{JBVlpZ%2Rs zQ5v@))tlLK*<+{^=WdHSXKpj9HwbdWLB)`#F$YB<3;*w^U0jAFjyLk>Pdrb8cwzR2 zuDp;?Da+TKLt9}-xCWf~yqBKx4j5?2bv9(9AMdQUD~z z%7`BUxud4zYxx zg~fJ8sBv~bZPD-p z9OS6yWD4gxv%$NB9&gc37(HPPArtKcg0{e1Eji08bTi?2{JeiCAq&I@wU=2~_`&r) z^$I`4-sW53gV&Q)?&*s3CbJ43T4;R3-yD^Bu$)U=ke8j&lSvCiSO?ObCW>Uj4rsK?qKP@V8+IVOcM_jjR!d<9YhfMTPI9Wrp8pGr?1et{B1v zw)s2(|0#~_EXl0moqe@q*@;yQC`1MWe#X*Qcy#Vd2)w5Tp>8094t%@!ck&djBhhA8 zB#}G*&IYVF?>=R;PTFEI?DU1aCj}3YZS~l~kk?7}pKguc#h4=gN#7%G2M#noeWVdU9{cte&ig zu>H~jq*o?w2Nbl$>}=TcC1mYkcttt&?0Bo7c3b9zS2gYlrG#-VkB*HtPu^kCHyM|5 z+#4LZz@TEt-IlW$LgxPO*zBFk1*B?C4kb~zG;khGsBg^!E;KR1F#MgXk}i|&&4Tk2 z4t3SLbl@z#FaZH^O}aOja)CiblDjPjzdmA3O{8*ESL@<;R=g9&A3Bb>0QT}c>V(vj zwH3xAoltspigbcO;}EWveas&ae=vUUSVbVXKg2ZxVfSX)1p@392MC0S#tecxariWJKgA{7Zl7Lp2RESJaRoOdG-C45 z1YPm!bP&Od|EFlk3$<5N6vH((gZ!73?1C2^ocH z2?G6XXao?928X9OnJsXBq)&}7QW3x6;|36!OgR5I8ILva5z^FL)60)YcOrW;{or;W zJ79n(nF@LtfX_rP;3@8QB);xyhO{7nDgsUb082Y2jzECS!CMnD$%_<1W{?P8G%5oL zxI`$#o{R<{zc);NyuW7$>0JTUL+Bvi5FwE04aa4gntS1}c)*D)27&0r5CSM5Lnnc% zWOi7BKVB8a0U?q(gaMpOqr+5XeZb-j$qb}Ka9DsRjY1A5A~n3I$O{j1EM#u(Xko|x z3C=GrS-=nx?Ug+(FbkqD2(d=c5rhh)!V8o&uqt@!PW4su zL70RO3#>t4peaZh801Z|1xfQl!Q25GPnc-(ZBT@h*!P0ZyBCxo+7oc%4 zXyCC6B>I8{`*VObjEGFIPPC9-W2j+Z2e>#8#uL2!;8ZLzw_6%Sq?39J9m1H|nRi9-+HOHz25W_r4uq36@z>`04T$|7WXug!NKtEfy+=A5E zKLe=_?s9#}R1)6b(vASqj{uxm#ds0S@pwqh4oE1pqb>)B|iFRgkY098(Sk zmUxC25pV)12M*rcfWK|6y=*->2!Rw36bfr#5bKNg2SL87AP-hoG;J3mzd+_-H9+xh za0W2|lDrBGYcsISDpfseaHX%oq=APJKqA7L9+;+G03ajFRfZ({1Mw36o}PF$L`y6HKnn~r?d_mlhEVZ@ zQRGtuG@LAHcyJ8R`H;j3LXtx%p)fkeWa&|m3X5Q5AC}SsldLPn*q!V-6aJk}2EW+} z18PnI?T3vP2P?2-{Ft+w_>d_i&mf;}5qIR(9I!YLPC@l$sShx-1EInI@uBoCOAc`@ zxhV)ofq5M89-DR;0>OtJ0I0_LqiF~G;%I?PQ%`eG3}8202Lr=l{|7Qn*m!-@0>dGs;)8cHdPVA70BKX07g2|vk zFtsyg&EDwuW{(j0pX0^9Iz2`WF&2P~f_V4?(ET_0!=E36KfiRS5`}AJ)L`nvoMQ+6 z)P+F}SWARF{$S6ugAQFqBdwB}5l#elqhdBK}|LICg1BMg=R z9JDx4c-e?gbn}Ai=-UI?&{O0(IiMTy)XnM(97ltTYEE=wU?3oM?`a_jYOhHm2q+8H ze}ytCj;xB890u$?Rrn$Hnj-uFdq)kv?V)A}&TDYw2Ej3eNYOw(X~H_wIEQSPBd3QN z9dayi55=<$c~!g~Y$gs;|MUz*G*K)cKArp}_~fd_$)#jvi2q5!Rp2VtNA3YY&qU^`^iY#^No zbt9+I5bEu=;-AlNZzR@f@sD*$eo-WN;|NeEk!1lUE8?ysu&Pv=nt6M7vb^9Hq3i~1 z1%u1AZVsuM05c+pViyK3G{7EUdt?zf+&*}7EY6rj1v&27ucR4SH(%>}W{LEq}EU-!#z~Ci$69SoZ0+JZk3v4pp&Jehh0Ylgs zlIf;E4D+j2u(j2(Z@TZvv$=!~-a!(q4@1qH#3Yg_5SBeQ>{}-BBLJ&)9SE>`2X}T= zH>I*92>?Oz0!Dm?(Cg9^q|jqE?MOGm@g3l}0P=X=Ax%yncnS2piGg4Q4v~cq05|5e z>V7FYU_ct&Be516cJLr;f@G29wWjP;1d;_E%x^3b7gbbkN#ObcK}O&rb;SnQMC|TwxOSj$%&W#GB=169c;ZvOE9R#Efi3wpC zBqsYKoUaEBKol934yPgO%8Ml&AuPuo0%+)C3TTXMW5mKY3bq7!^jVedz~c;4 zx60;NZ&Bye2`KMghoxpu$1vrA>aPHy28uMOX@rW}`7S6DgXY*_~jj>ob z{M>{@^d`}fX&DER!0upm<(n4oxge}3cpMIcFlE4i@h~01{$;U`1(8Yw6}8}xtZmfA zLyTmdfN*+LDh)m*yy5i%9Kv1=4Lxv&3p=BUl!c_Q5E|VZc%`vmje@A&2w$@9k$lL& z;9^g`d%0w2^m1Kp=zpsDPuN!QyCMzJ-4aBmxP#5N1@C zxP<-Bu*b`gTa*izetb07Kwe&f32OjGMN}Dt7(ga+W7(B|;W0ZmAH0XvnLs)NC%G#7 z$^q~1?+?6>fZ{j;7RTlp@H*5OM__t zlQLFi8Ue!makBmI$P9S?&>x?FH*AB2?AZaDu=POv$EP#4BhfNlj{<^UV7nhOqC-xQrE81?z#b2d;F0HpkC5rm%GkXvtDsErDUxRht7g+doj={({N! zGs$mHmWL;>*iPZWidjUWb_Cm5K&J*?KT2n84S&!9YrFAz&;nVh7EG=AY!VHG@9ao{ zaJ3=WE@CiY7aB0zJ6wMtJO#wLAQ6zZG}v6ls`GdTy;*)4f;UnF-4wQ?;gAx(ZNw90 z!ZUk5^H0r^WkIU=S4!;KRYI0(L3vSTXX1#eoVz4TO)uWmMQ(5(J|GE}#Wbyq$=2sudLh z1DtRjVQ5(3I|!uHynxXP{F=bvBS0#hmNM*Of@8aENx)SVv-E? z?C}J9QwtE+#iA345T*suy-0AzNF)Z@4k92iU%X@CffET-O5_;hf_-Pebw);auyr)y zgxh1_`b2;Ufb)uDz(WwrXN6Dz%t;-1e7IV{6Ai|LTDmNsT8Fo> z(Fc3d1-{Yr1XdMYrY!SDBG^1=rl?@fz_UWzYn-IMA@iaOIrg0jnap= z$S_)K{>r*2?Yk=pi`G?e$yBsnLMw@)brfD*6RoG<>XRtFx|DxJYtL0R5v?hA`9id| zyaoACT6a{)Lun_hgd9pY?rLsmP5H~Ip|$6!YKGDnrF0ldZ@!vXXsrbmqeAP$TM-JS zb!UMll!km2bx>M&6q7+|C#-%8O1CZrRM6TBDm;SPhpjBZr!RO(1QcL!SamNiyDcJt z(wDoQ14>gNMHEna@fHa{ZOp!0MIKS1HReiUkJ7X=9Xd)wzWmo{t$ET=qcj$ryBW0; zM{5Q7wL3C5qjciUT#VMZOX^&d4xO27Q5y25kwa-LIC~pPCx|Bj8CqlByk2OnyC&5_ z>Cut738fux<|DMmTuF$~n(}9^L2J*ILTlPl^eV0lZi=U@PGv6?^az`7aCL^9W@j#_Jb?$pXqqFi4ze9DomG zfXow+lTKnX!+?XxYYFfcdy(L%3gjo8s=XiZb9p26HM*k%@Z<;rGL1`U&A~zt3tvM7Z$QLu`r{;xJSah@)gMEr?7nKO_x< z0eDo2xMUFr4D2fH3|$JqtJu;)SbVSH09XR%BM|~Mh=P0rL-d@_%&{Or0C#EumRrq; zLS$_stf~P_m`-OhqazZMGo2nXDgmJ|a0tThP3Z(OpdEoA!usp`v!RznL;4k53U58OmAKb^v36sSY=t+5>YGX z0l${zE(>2(bw0@?QTbqRe{Xah5v@9{!1m4V$&BObF}h=pJW+eOt-bL}{lM3=T|dSsj(JrZL?9q$`7+bV8Q18ua= zC&PPH8rGEi*ze6&_0Fe%IO=bf%sV8fd5&(f?5%II;q_nxnQdFotSnF8T&21-{qWkc zkDSMBswvPZDnT|Le|nx3xncgMYR@64s{H8Jj{Lco8s>kZ)ZcvPw7Iz4pCw_v%AGiQ@EF?0z9 zjmwFPZ$~a0ty>c?(hi!syZVsz90@mPZEC|R{Pp4eH4=1w#tmCNqOQPZ)->_4Zfnx( zk9}Q#|K`_^S0s36NnBa>)?t%JuEba< zS7aOJ5P!C`S2>*9rv|;7duqlmWts7#j*Qyb9#ML=ddyAtteXC1tCnj=#@us-R;F+0 zXZdyG>&A%eU}*c*?emSoC(|r_9QEGC#NAn(@4cov6TfYU9hyZzjW{X?cn2Njgt$^@?x5iwI%mS1(x)8@g~&tokUqsc|>R}a`sdiP5`IqkV# zF>U|1BkhLq&oeysz8RWCa(&=xoUsvKamcmI-D>mQ>vl>d(;^OHiFPt;NoBUft{#;o z=>Igmu-$#G*W@!NEv`YP3U#spzPFNUb%U5g-CpNg+GpEXEp<9MgIQ2qaqj%sefM(r znKYz35gJa@!=!!pE4aPXuQq>}<}lau=(xAbUM#mYUpwzCCA56unbGuAXH~0XcTV;n z;^@5Vw%bj7x{^WC9-S6q>b8eR>>M`T^oyx4x;U}6oRYj_+x$(tZN6`TT0+w7PmBnh zGkX2H)?0TxpA1u&QhCgXQK%s1pqi{@vO3Wq=J5QAg0RxM{gdxqp5I0YoxXq29VxAu zBL=PQKd0XyTBRtoW%Q#nl6g|kMqH=}(0K4*ICa+HdH9%!Gg0?c^DuTh6VvWQEgP0GlU(JixaN$9g)JVGq2u-#iki1L z`{;TO7Ry&H8C-RH<~{KzDVJ{FzoDb0nj<@5O4!{CzHT1$3!Y+|lg<>YI$p|jks16U zzQ{KHX2Mci@f4*jN5?as5uYb1qVw}^?@tIlL^6HT=Je8v zPBHNdxiREkje+cKSJ&hXE{Hvm-4+>B#h7F!yY5i5t^44uPq7*@ZUg;3_!ZAc z@orC<61InMTmHODdt;5V-6yl#Vln+6J-G9F#pbmKvJG{I-B|o{%$*|>Cp*_Sx>4;N zup!G_GwLqVTFbvYwz?UCy;)iG*oq;2t-Yaby2SIRvUU5Xs#o6`DQbRoj+H}3;&hxz zRP-^c;)-{B^yqF9pTmIzi)BQ3Zu-14Cf~_%+MIW3i#MH{YN(zx zO>ETG)LE^Wr%udz^Uhl%{M!(+=-MAlbH|{#!$sGeyhz5OXIJ!_7C$O|^HH&%)%~s0 zuRQQl+PyGA$#KGU&tbnjnLn1M|M+_NIId{@^LJTkhV(6Eq3Ql^8=6a)5zX69ZLlxB zK>YAda!8%gI#SlxqGxRiqwN;&{&Y(UyJq+#TzRsO-;n%$!{~X<*>9ftqtF zzcjZnEbGq2`sLSASLW_~+?)^~FWF>YQt(5+yzuy{{>{y)GZT&mkM)bUnBeK^eEY+) zFO@g6htGjRww0%v+A!zp7~9VSZc-lDwxf_KvpHPRva(s*;9FBIF7?cN4RHnS6ixAw{-=!98YaHe)LhpuKiz#z zX}jdkev;z@*LP8JLaYFbL88%hBqQ!n*T5~EL&kOXSw;) zqxiIo<7s=N=bgHKY%`eiD7?>BATb)dgg#Hwv=aqV?i?`fN8 zs8!v+ce9C3Tppl%eV|F*l%X%?r>zzXe=^`q+=BknFYWf;j2VP^dJrOvbGok+`MP!H zLMii>(3GtKhei*>?NG@KAeJjDm6sodu2t{boE}y1Xy2mnT8rItv_1u!cR@W4a~mpoqz4&`ui{KtVTyZAUw8b z+8B=7Pw`o;^v+krq+;&?O|>Bp>+`1;#7y(M_CadG=XRz1A45D$1Cht#})?Kt?IRDI?^kw6Q$$BxPWus2lw5;;DetX$VyA`|R7|&H6e~d7H zxd&%9d#I(?m|YIvSB6iwJUn0)9a{PzB=?-csQgjCer}sERYG}#-R+kj4mO*~QId3g zr`_I38!-Be<^`j#o2C5sOT9>b>9a@0{Y|sI=bdan@k<|eOiJ9J5-Do-EM!$?z&pQ$ zZMW~9yH&G3Zn2H>Mm0-oroPU(pS!H|$%z++FL#U48E~-i&Yt_`S{IVa?nRIFfBhrt zW6;9&%c5wG<;lkrlq1_;eBL8% zMVo#@tLUDNea^ZSr8C^EeebQ>=C@HM0wXhNgeC4$-svHxA$6E5nPinWL9Z3Vw68BJ zp3qkP@#K6;hMnG(z4F5pv_u*!!Y>#m>{R-3WMXl~Ef3R~Vf{uc=C(;5kxd%pSRWge zJ|MI)>i*eA8S&%yO9p?ah~n-EV)*y#B6i zwo=Sx8?On!3U7I7Zr*Tp%YB;hhnBQW+vMu;Wa;N;Qy)FOxOLc0DgBC_1M@@ISJ-B~V!?{^>GI#6Gehl( z-`-r+Nii?YkKEo6t&ns^CZr+znRt1-(ZyAgU!^X(CTJhn!@QKcah0=~`P=+AZZ`&c zdiko<`dJ&Fo_=zH(uAMRWGb!yD8Svij;9bOh}RkV(np|$yx^sd#D?Iyd4F40nH zv{WX|`_5R=E?FHtZ~f<;zq02zh+JD_zEle{q%b7mN$u$?%fgZa((AsJVa@S7k2()< z674@mepvSCO_%IvW@@KcE7xTmZhfHbUOu{dY{0pX17{c2W}B+!j(WKxd7H+2-`Xcz zhTJ}q9qRr}%YshZl5ccr#IlO5=bJ)mECw{3zPW*>ofms+!6nhClnGidY##?yBnxah8_)YOenJXcno>Lz~v3;~4q2;aS7Ag>#sn zciw+neK;`Y%*tEKS_*c3vUgM3IMLByXi|PU@uzI#$SY&bGfVI(TT;CyA9nQBJr2!% z{47uD)aUSlnpM;)d+TEte;r@6VU+_d4mjm;y))~u#8NAXUi8`=^i?c&45bCXLCEnOVH zF)Qn^x%iu4Cz-8xKBzY(jojJlH2Bo5#23zM^Kp^;s=Wp$?~vS)Dp`0SNoQqg$?Sn@ zXQ{F8KO4PWHrfW)CSJcM$9|%BPD;eb5n28jrj<_y4UyUJ?Vj)<*lb)w$;t4gYDdbJMrzY^CRy?!+!2y?n9o(wbkmTW3$@&UM+l zKI-tA)9DVC6Xlurh8BI8yGM6jyWLR_g8`3!e6hu<#|&MPH$3y?kBUuKMjW$wcynfY z%92zIQ`2Y($4icxOm*$A+a{h1F{iC~AzQ5RV5|dmgqXX!NSJKf;L>3HA**AvS`2Ee zCwObUK2x}7-gjoon7C@+wS&`>-p|p9Kc}Xs9;H2^l%78|NnTY`$#uTTy#qs^f10sa zH=}*&gR`~z4Kp5B_g8va8B%-Vl>CR>G`A%irtE91-!4w@+gU|^)o-y+dIUtu zK0%uIdgzP#p9Al@fBI@~sDIRD-O471Op!sFO>d6hN}b>M{mj1Y`s(7t@8=aT%l)ND zq62?;osRrD|IHWiN#knYx@E7IZZI4`{hl-3IH&hlE<9MZ;l}aY4L`$bWuaF@%YddgwI$dpS4aC@ z(-d4%-!9#qxH5BG*0?saxAx&$`%jh?MVLwUQ&3o=`Jik|;euhU%UaK$OnjJIQn|x< z<35cy-uKT08XwNhOTF+$R$&D5SJL~zt5etIj*lydpCn<|M#UOqiko#-N_+e=F7r6K zVnbuyfac3pr*1^iBN-Mm)N1|b9{lhkw}g>5tdRL&?@M=yBPzcPmuR2*KF)CO)9;U0 z$Le^keJLS-@4Xk(YSmUr<%ct;9v*2}sK~TkRhEw#6TVn+$z<%XIUoB^mC+izHZL;fGs90E8I7?Waqz06{JFxjs=-fPe zL_;@W^=1__wcOYz(%0~nb#Fers&m{hu2A1~=iI0o!vP12Hsq|_^Y(OFib(GBixd&# zkxE;VRBjFZ^+(|INH7p*Mp_qm9wWwM^yMw=Zp!Lq9r$>)8&(+Y@-53~QZY!7aOZ$@k z%KTeL7Ob8?ln#@S!`R4^b*Oc}qWuTx8~HOHmE!Jfked*=NX_+--XKhWxAAMfOU`a6 zAiT~Nha{{dPA4wF6u$bhak%?P#dBAW)17NAFYc)o+57fJlZB?c-TG5c2VOWlBR@^? zns$5p@#kr?sV(Zp*WFU5ImlW($8NrzbkOzD!pR>)29=9xMa!?;^f7JcG{famia}3Q z9fJ-|4H+HUQum?w%3Rl7x&+OKGo~}1hwT4x@``2A^s!%Ku6{aUdhR3nYQj={f0IK8 zVuIyfW#oNca7OFFf|rx09?PAiE{gHr`?DmoPA$?bFY8V*-G0JDI<jOvDtA)IEe7f~8{dD9Ynd$Xz2Gh+$COtd7?C0VKmkFi* z+r+2Ti@%ZiRyWbZnn!SUkw znJf1b?xsK4JyyKT@qECJell^5`a@4_Dm}FAiTPYCW%CuEq`_)04Ia`&a{`a~O(oNZ9AXg3FKl&Ax@bZh!d9ln=QT;&b|KY`n5*&75z| zkn5RKFG`mLC2Jg-GRFUqeC>jdV-B9u8C>)5vew)Z!=+k&9wHA_+blA4w~3C}>{l=; z;x74Ws$}*JFP8$H!nj1W2XP7!H?GXRF?as6wF&{UI@c!7OKEvC>GIp1m4FkA<3}YP z`MhI~NcjMr2YEIx6H?Cm?v+?4e(-cljhXiFOzFi5_jR9s8}jLP!TW9sL-%O1V69`#Wob<&cnX{QGEGt>a^&tn}tiIFmnN#wS_d zEx82}T8^tjLp|Oe^{$oGD4cb(@WR^oh1xq@m@Yr+%3r5kVa#0iG@yB^g^zPlgOzT? zn;^5-yT+B5hT#Kc6qWVT^y8w(Puq{pnSA}&h0@$fO%_rM6JEB;r&Z8jytUSvzo#^* z-7INGs?q~H`CyCwKeVNSQcnK*QBX>|ZZ{s=6gK`ny;-3$^V=I-=H8fU{Wpc7ZU))y zdxxA_pZ6{!FnYaQyUpV-yR+Nt#m^o3aivOJ^(HnAsTa>#GC5uJlGfdGbyAR@}@P^ec2X?<-wKJ{xyTW+q$1Xl!2AtX8t++rY zV4kTo;o-x^V_wpxaP={OEYuYMNZ6|w+#Dpwl6-UF{555Gv@gS zGreQ?y_Xae%0@ZezLGfEPfZ!OOm{1xl{splirUxWp!DhSdu)=`W`+h`nz{DwfQ-0j z4b8@scaO$o?{;5m_C~QH+Q8NDC*~q$)4nS~7PCYrzYuR34#~FDgT87SVzPXniQ=y; zJ=(9{?~~b=YJx$QcP!nlc){*#ndS7QjcdLfTk(1CQuFbTE`4AmG&IkunYZG=H;cWm z3L^36E?eBcHhA|Wv)x|L6v~{J`CWP|wSTi+QqY#WtH0e%jBOKpyx6GU*Rk)nEw&t& zW-#sH>SC!_mEy^mjAaX;)>rmMetCo{n~m2j?>{@g5_8~9#ptyKTk12(;Cgud**IwR z$yqTcb}t{Vy(M6AqSN__6_>4#Ve^uzXrr3c4HoKDa$0dzkFd^#ORPJVSn3!Df({1)%De-8`3049V8lMQN$;%TwXqOx^Lx*#Fzl= zb0d@QcW`_B7K?cYSWaCqCopQf%@FZ(!E~$w~REu4!uUf= zpk>i@jFj)=pizM;!6L~nJL;^*-)-tAFY!HO>%z%H&NfAV82>n0O3X}MXI$A5zXaKU z36mlfeo3dBj4#=_R3!e!rsXRi=3pMX>Nsi6mRDKSc47OXT^xPJ7m zf=NEt3LIuk&~g}-9W!n7xl09EOWl&vszqto7Hx~Gnq^iEp${}(R5e6NI{S@xmEQg# z@1P=n`_#ruYvW}~8=g;D5^a#;^?Dn z$}|mlFfoZT;?r#N6FVzzSoq7B$+j)Ev4d`%6&cvnkVf#&2vtzLy6&pagQx07?TiEc zX|Y?6R}TsEk=DLDs6Te5vFG}Xi)}YUL_7QHVKSN7~SEg2xULv;G*(XNs* z618F`8T8wSEJ7RH&lPxzR#EfHFpJZoq9;_Z7JJxsHvCm~vqaCo*Rl=MJ|(;Pgj|V_`XQ2En(n~hB@e6K>lMCRc=XnijKn=bUiUPe-!Ckf7ygZM zt>At&Mw$}1`k3$4RAX2VbuyKQyP$krCOE-VMt5q~UQ zJmF;;>8WhVmK~oniyVKvFZ_D#=?~`(lG3(|0!h*gwxT!e(sX zrMNpkoZc@IuPJ@KiY&>bR2v#TsH!%2H&=R)*6#Zvw=cF&nrAd)rHFUs<{SR6CQip* zQ;3a-q#l^$`#tZ~0;O4Lp^t*Lm8Ue0JZvDnWqZ&H%0XG_7txFBmX7_d^wj+Lu|dgG z(?9JCpSP$o=jvv+11o(i{KbYi%~9Ioy5j7TFudF6XMrP5`*=%Crw38r8k%n~|2lS* z?vwM|5`))Tm4!xa$>`5GCbj-*Vnu;|bP`GG{U@z6qa!K@92+{{CUd%$%Hss<@{c-; z)Tiwl_H{|!`TO6~Dw3odSK$oqcu8wGmKnF~x!Jac(J%Z&S-Qub%+=SOTH@S3ISezs zzhQFbkiEW(E?`EQFqbvqB*LGVt^C+RS_G8zuI_u ztJaiR*FWC$yjvmXnKbmG?Use3Q|&cBUVGW9+}gP4&J@c}hQ{mDQX+oRRr(zmHhuYA?Pm`8o>9rnv<8XySCWzoHIK?SHs^X1j*|(J&KF<3 z`VmN1UYGQC#rZ)twPB~m-zLv653gm)j2U-$)86CHG>y-FjV25f50BMys`KnO#X=gQ z7h6)E1=FE5mv`&ExSV>b@#^HFmqXr9wqGEsALn~;u+Bym(%EIqDr1M&tBgLc3y-K> z_E<5JD&e+rhVpYc>1$rg7p&E?8gX~V@TY17pAn1d7Dv23xQlK%FPCs-v-~*mrT#6~ z?(e?+$z#i1E8P6pC&4uiYgG~s+f9rjZ~J_7zKn#asBFm70}G`CUU?+9U08I^yYf+p zsHByorpP3bsV{bmzH?4l9WvmqoZUl-QkQYfg{zjlYIgJ-`1k@t<#NqFaM8ZB!uQj% zGgpUv4>}@u=+eoLtvOo^Hy4khZV9fPFz>U$)32(r16R{d8w8Er;4}C7`kliX+7A<> z;>Kh@IlS6o&o6?dpu?bLEZX3*+z3&-rP5V^*u?wnGb7Ur;K!z4(QD9+V_4uQ+e$R4vk$8tvdG zF&2Zb9+{bfy-{KTj+?_0zxYz}ZcR_!_Q>B67y zNhb#mJMd_-N@U^15jt&8W;=h*y}HXnM#V%wdDbWL_MFI;n+aYUQ#WCvFRr+lma-=I z<@YOh2Rv0M3|>diyZUwBT$yuIpMBfPY>?0id~)tw-jNe0n@@~6YBWdT7zJ0Yt|NN$ z`@77~782w2-rZZb(|qf!AtgEUP4=qnkr$umxBu4y5u9$AB{fewC#!w;v3SG0Rm+l& zPfgx0JL2x+vn#geohZwcA`ikPhSeIcdHrKW>yDDwEkia*zRX&;%|(0Jh;fdMg{ySR zRzGNtS;55J_WbbEt?KOTCEMp5@+YPJ*n4@}2@?~|+5UQA#z&LStgr5WDeg@F;>r_W zn-)o2nd}_7@wV}l#Ti;8-7|$vUoyW3IQgzOKNTzE#W=NfUR=!}xkU}@=m*BXdvt8{ zsOCz!i&lG#7N&0+d3{!U_?>{1O_#EL-eH54F6wL?EUvdXJ%7)K?-+*jH)9!DkEVQ! zaJcxWkit@j%9XKF<;!35Yw)bbQeTg4VsmjkeMCung-M;^|9sO|M z#zsuJxLy9l+T2IP2!kD8bNrj^rj2~OK=i%6m#p)~S54moW`}ODXgmG>@$vdvP%J~z z`oPrZIRm7p>_55Vtb4;cpHkYQwXfIND1LaamZDYtIC%M6yL7*=?~;Bf)UC13wJ3T! z%k4nSl^18r)ol00FVd^kjd3q5AE*E8cvYN)`$g=g<5M9rmGCLUeP6-;>Mx2>^PR-X zMxU2hduR$Y@tju9fL1cquQ~0odQSAvsgj20efCsVlW22sL(|-5wd`wDxIOR1yH>+| zv#>ify(gnS^b4aZUsSl+62Er1F@Dm74We$fbYD!Ik6-Q1kJoG5xP!o;_g%H0<&L zl|`-7$VCoPd*%jJ&&+oD9@(UvJLbugqteQQYK=7Ch@H4Uam5U|<8KoNyPl~fj(7WX zWv=M(m@|eRwx1<)ZCbaA1jRnhD$oqG2_=r3GCxq$sC4YSW*4I&1NI*hEe!N3i?N+Q z;J^;k!YRKl#!#;44L+-xrhZ|4!j{O@CuMc?ee}bm+}%Z%nCKEFZ(jJ_a#gUE+{&Nf zJ4b)GvfA>(SoMSQ#O-cl9K$A@-03)G%9Rn9n+H^0E5r;|zV=I6?AJK^uMe;4Tn=4l zwn3xu+U*HdY4$E}r1rl(_{r(ry{)B1RP*>Z%yscf4MW_g^z*YhVeFAHUab@o1 z9W*oJ^wDNPXKM|HB&AE3R1g;&c@Uo4n5}EB4n4 z5-LmdPiStk$$f2`Wcp@|`mV9e@sp0v%B%D^eWXR{*=JjmFHA;j{_CaL#oO~n9XMq% zTe1~5WmMpa@%2S3A6q~iwlAN{jcS-(y zR~tRy;QF9kop~mQzMrZiN>yq!XkSuZq&VWsspX;8xdWm#p5L6*I5B$h7Pl|=bS^>H zp3NUnCUQ=xwqM9LQQ2(WgxfcsFY{U{-}E^vn{GN|?-~`Op@dubiK7Cof^N?ryz$(K zm8LrjS2Pcrp6U&KTzIGQ+PGbERpt9$%`%S4u#+*j)028r_j&xHmx{Yfb8sI5$JC4b zy8KATuITpi`tl0XV_Sy5*D)Pl6*C=UZRnDzseDR&(6qo|&vni!P#a!ue8ZejaWqZ( z+qE$2ov%ZdnBA^&u1dvZJoQmNB)UxEy86Zw1NLP?zar<}b+CH<;)JB3um97SCyVzD zf4ip*r@3JLfc+|VTSe_`J;sFYeG}tspa1G$SbWxGipkxLdrkC;WM5(0Zniv5c&|0J z_(<`K$1`Vbs@_-QG(Bad?ZC6PD+?0>T5R_&_PDTeu*YIkS5c2?1tUw^?XQz}f6Y&k z#81+rA1NbIU+$@=cv_b2C~eE)V?t zImJ)A{^1A>l2urO?Yhw-qPxX{ilT!yUUti|tql#@Xt{r~xKG=p2~q6@*X?d$rY$XjD)SYGT-Wk+empEARP5o1MMs$nmb<*XQi!j&M@d$m` zu=eRuuV!Z=6jSY%FjGSEyY+PZnH|o}y6vZCTi}Mw_RIDgEiqc5;$!NwmVXT@S)63gVQ zwrx-Tv3!1DcbmHRPU0$Pf3FbN@#_A^R*l9LXaWe+<^K$GLRb2AKg(~0o>)tz>mATFP@td#Z zwIe>pc;8;JxjxTNcX9E|L8&4W9hAS%8f3Gh%SX^8= zXeMpRg1#Zimmk_&LK6gOO1k_?|X$YzxMpj!qIaIGF&1S z=IF=Rr9DrcyvX^UzK!F7MForS&Pp%bm-$U;xmaW8?oyk2{!{(Y0i(TBcHGr=uwEv< z?^L?&1n)r6g4aVQ6~=!kiM$h5cXCxyj{T$=Gl&5#duO*7HCv+ZK+2n^0 zj7v|OPEg%~E0Ei(bnJQYm{;b$+2jMtcO5j84hDv<^3AVnsj-j?Y#iW*6 zPu|de_Cec?A;CV@H%=x;EFDpxwcysn)T)KkV%J0tT;DJ9Qq0;PDWeoLY^EEYaFAQ` zZKFYA;mPUA(=1&E?l8^zw%y5XoXyYKWo9l%&2q+Gv@5Fm^}CoBBHXAC++z;KXRRs5|>48}hKhVaICxG!M=GksR52Zsd_;8>=h7$v`fzYa)N9K+$PF zq{Hb4bB@Q0o6j_B`Q`aVm!?RrvduP`7{BfF{$Y&JvYePN5rgq@(pJ)OdrQjXCXc;t z-74uiWUPJ4&M%ee_x6WvnCW^;WXsR0F%J%08tR#FF0^S5^m+F3U*2BwYc;cC->(pr zSIDn5ayY&5c~;ejP5Ga{>hDXKc~H0EbmHB^?<`UlE{>J@p#Ay(N;#)(QM6qPPTRI^ z+gQ`KZQHhO+qP}nwr!pL2lwIr!)O^jNu|sWH#f0i%itgDKDe1LwZ4Re7yOP8kj{nD z@+=iYc7m|zSK*WHR7yO0)tARL|6x)DJKF>fsR z#pO1`bWN?wqaanOAZcZtbmIv*1$RBzKPw^-2|?5JH=iKAHZ*s&(1fccda*|7y@CTh zHWJAE1lidMBt1aTSNdn`D{Ibel!Fj4Da1)`HQP9Sv@7)zgV9Yv3qc6lsX864uS%s&V18!!R2bYDC6>tZZgJL9kFFkx=C{;gwF4U<;~8)`8Yw zQn0TyDn$bEp8#^LjKWNPx@xwxWj062_8k|+ix{J2b*m{>@!!T~vbYGB4@{|YEAJ|D zgp=421}4%j+%3cd@^ikJvQT}{LfHo0^85Tjj-H@D&=xZui`+lXEl}YIEu3z({fPg? zSS{i*ck0Zc9)6sPP7&&F$>9a|b}f3!dH9JgWnyu=t1(yl`x>@7(sDs8r+GKCgE`2L z9}>@yIWo)tqijm~3V*JNFGy>`lnbAuPm}1PAg*uvQ54m8d)m`NKf0Uj*;Vw`spnyZ zKr%qDsSGMG@S_t${tHIWLn%7dWqC;jbHng}$GJK>Pr15REc}7q4_*b^1T@WLJbH+6 zM?-i3KjndDgyh(Ln{!c(r%jOHA^)db6Td1t&MiEvyr>k$P>RemI~hW*{|?ss3;_W^ zWYeSI0IN|r)r9~`)MP*R5AqQ}TvPW+XNhWTZQSgMsu0tq&X6>tPJSXk_Uv-BiQcl# zX6MXz%Aa3p-rwIcuANy3zL>{m2^9djeIOn*az-wxW(tSJTC0=kfFuEw+afg*3lstj zx+_|~gAg~i{@tC_S65Fv`Y*`I*bn935Z!L3o^D=|L)?~P54aj|Nptp3-5GX6a`_y) zX@{r5jM!MP>2p|Zt#tg$Ma+!G6zj+m<1?)>QIqnNbV1SM>OV(CYP+w!svt7}q8ZI- z18+Sxl(^UC38BFKnVQr*x6NmCYti9=wN8M+%Y$!4e&6Eg+;Us3>WmuE%+dFF;S4{v ztR>Wa^0MMY{f4~4vzJ!$CF;(jvsElgZ2x7Wg^Gs2G~{e;Ba%!N6zzEr^J{GQJog|zgz`F)>K0pY5SSSlpkMxj5q?9uj=C-#5>Ris21_;8O)IFp8Q$>chJopK&Dp@*NZq zJcqb?O7l>oA29XnOamxQ$Vw}{t7)e zod$ov3O?!c6tyKmK2_F-cXFMzky>$3@>E3&a>&4>2apQJ;g>L57~Vw=R&y9Q1I_zQ zq#U)8Sqv8wjv`PX!XR?GIy2zpoLqgcycEBa{{3Ahl-Dr=mf)&WU#O8xuV28aJhv8p zycMg9)^(5BMH{kOJudrTI-*dgt#_sx|C&zOn{evD%&sA)+l^R-Q3t%bN7vEcEwQDry_HJIrbQ6>)Dpv~GFCDd zq$oQ<`r#CfKLOFf(7D{>SMt4dMx_wnoqhdA4rG^tBpOs(M-60Y-6_jWjL(BmpXK$h z{n<3#(2sd$)~1z5fnl#l-M!R=rwb#c^I_;8{K7@*4H#laS}I^D)baFWT*~M-OzDzg zS=(uY(pz3^PkN}EBLXZWDZiP=&WZIni^EC+>uyJRnzcDWJhf&86jD>@tu+^G83%=Ce(cGruE$Z0uVScdCT9iYx zZaAL+u4rVL-IF6Zww5A3<}(QRUvAxe@Iw1q-yv2_C5;1Q-a(rQ9+`M(E5kzg8oCGzCM7Nmc2K#^{W-N+gj!rZob`DkCF8 z=&<$Mmlvyh2*=7OI1Jc+byhwaAwQ2&)tjbGW|I}!AK;{nI{+V1y_=cs<0=g_G8EiL z$EoN^9><$HB9+dsZ=um>omHEq0yq&JSk+6?6GvHw0~^jl;+%pybP%w(-*zU7v$&+d zU2R8F3aUUuL~vx;`Z7Ef3DQKpm^lyr&je+_$3p4Hwr-sjmD z(4bCm@I$jzhnh8JnH4hFWUH_xj=smA^5s|YCzn?}sQg{u(dI4&Y5CIc+z}j2My=wu zZMO<8hL#y@I!%fwoN*NiNZw{}0;*peS2B3i4u|n*TH7}(iHoTae!n=sUXpLD$P(LT zu(p_iyCR6Uu8#!~ncJ`&SAqhh#{s@nf>9a zC>Q=`KY5LC?8t{%`ar$>Xry(^+$-FE)V#jRFC;;=fChZM5IhS4UE;%!5p6;!V}DND z&q1vBtT)oeVELy*op%edXU$a`#JPXB+OFdN5s}6^1-ShXk#WUTIHN95cq(8ImI|pA zU4t(`Xno~HT??9#?LA7M(*n5bLqx&+?zoB1Q+C1TNp^e3R}I;i2YPFtw7)ajNeqD; z$hK&Gzkhs^=pu38Cv7KpMxy3_G*Kti8^aIa59mbTWH0F`jbUw;@5_kYq%7vD?5W*$0V{fo^47l|%E4hZC`%>W)&CueK_W){sM*uE z7tYz{V7CVE__r81#fME;>pT<*7l{qWpBn(yS_;&Z)6I~nOU^*KRj8#rhO>H># z_0WPv4Xs&ICX!a3mCxs@SW6XeE2Iy4re!!!4pV1VB;#j!$Dy!~BiV0oiTJ&Q&{pZ_ z6Xyff){Rr+c0&(k|E_y0pYcs^!aSdDtT@_Ro~c@J-j=bh#~vIu^i_ZnoxMaFiXNS@ zes}T0BuCVzrR0%@p$bN+9JJhaJ|<+@=iT&E^(_))P#OD4`?hxq3jl3tZv7x-g>qUr zae&oE<7VP?8!RcYz%WyL8tV~NK001&`uGe2H_>zu_sjmMOKGK$6Ju+AL~WMmUvv#K znWoI-ANn@4v5s+@FEnnLusVQ0aFDskEajsd*po!=B92?-HAunemGsFdqhS<=Xv2HoAIQtyX;&vN`rulhJyh#=}XP1zbc8ZjewSqJ zotpA$VG7}2GXsE*j=u$-F7O<0?K7cO%$IZ7Yqe|Zi&B|K2_eP(2c4mzikl2Fs^-_q z;*Yw{OSRswcykW_(zAhSVF?hxa7K%T0pfhts1mW)GCg;7c>+9MbI&d2o}No^wl}F? zS+5*xenBSS=|1#E>5ND%U`sw}w@qwB8^$r{>!B^-s5Y1!H(l>=RbMt?16Wbr>w*kk z6Ib%?AtNy31Fm_N4ECaq;;k1FNHHkbgu75!;-!zJX%hXM#i%^eMohiR1Jz-a^PcG+ zbG(KTL&<>_Xt{lDg(&I#(AkWv;A0jYWQK_U1Xt2vWJNX%Hetg#^ClEkxha6h6t@67 zU5=0SV}psybBi@fUbU9tJ_!RjVJ%w2Q>$(WBnV*i4I{28#-$(k^kJNr6kDRXKL6U$ zUpYtfP+>{7UAT7Mqu)9yN{6>Vles*39w9(NU%b>?^2hRTg0_@`acl-0Gg(?ABxWpA zGx81FA$f9Lb}(9Jd7Uizp`*e?$hSfVClY=@1L55zns)N!K)9pbE2y`%i%E`X^AK|= zfL~HXzDe`sRC_*>)&ZX;6v!N>YFQeMz5KUzHsVSkT=cSY%P8BCO|YDY!1!IM;yfvy zGlsOiZw=dzd*5un2^J4SePp?Op!BY2&LuYS`=hCA3LvdefhDV)rbeK4itfP}z*03$ zdShKDdYD}j)34;6QxloZOh4he#Q84Y2@6i`G^(v!z*Y`)Mzp|OnK7fMP`LqoHpu0D z#lRvvvrL4SA($!PwsAB|TqPUzIr@~$o-2GVQ|3Ek%R{S#?0os%kN+dM!&B=U8;!5p z=a^^oY$7s|l(PCRBqxMc%bu5b5*?6i*8;v1c7PH335Z|kQzHfpOlbju!9d^kyeobH ziov4S6IjiA2|TlGKkZxu%7Q{MEK$Yw9#U|MvKWx(Ia85BoT8}plk3AocB6dgVmj{u zNgbQ@)ERn3_)&w+i>47)D>Zv-#; z!fJQ;-3-ffpl9uQW+te{NcM+z^IujJ3S#?I(TicLWVtGUSn8*e3%&xGxQOHb$)PI@ zy77PmswzM&p`%gy0DjLBQ^{mQeB8Lme}AiQ66zZKuF=qOW~Gb1#Obi^+&0HUiyq}t zUbQFX{!>BOKe5h}V%QS&^Gl8gtlfWe-sdXZ)iKm2QahCMVhndD6CGSgSuCsgSFF}y zRwDx(-G)JU=6;DT{4V>G20%0DO5s2CMUr9)3wh)9W{77LV@$>q^3TT&Z#o z+{?^*(HXEuJQ3?eHCtz%2yW!xfT|O>vFQx$Kd)5~;}i2Ms+J3cMPYiB@J9 zn+1KjjZ1+bHuN4ibt`F#R7{<7TF9DxAY0%v{9jn^g==YlmtUY?6)| z>rrAyMv$N0YBK~S?IvCz%Nar}hTnvv$Y{jI2NQUZ1eKi=Ytm6_#M%%{;>@wBQHvA* zp=F81@2~absKP;R8+|*lRnX5X$_IP2sl6uL9Mnzcw$174#@O8?eoFy&qKK`T7Zwi{ehUwJcYe($0vX?eYYNosf=W+Sc3ZbxGT?sklkSXnSV-kpnO*c>XMtKBJgxt^H3 zy`pZzulHp5j+@r44B@+*0*IP>^tQKz?Rik1t#!_fgo%M;_sfl%;HF?E!O>+2)MY3+ z^Cc1kd)>MsruxK#>0hH2zO0*gcKg%#@U}ExY%Go$0ZvvY)cwuEE&(DS@VNXbHj>4yup?DYR3;M@Qt+|8oh7&$8VBvNw|g% zY=GCHrd+Y6c~ku-onCAz=M z{(M40Lh)ZUPtV->FO~!W7x4(6dOmDOK8joGL?>Dr`^l+vOB|-Ew#X&AH{ot{+QicY z%szFOo6WKYWm;Th)%xd}D5pzD1PDfCsLDMki8cnx2WpLy?(w5_v zRKx$X!Br+PV$Onh8sD}`7UXZX?FC~z$A?pRoNFc*J(rgaqw{5E6|biUt380r!JU-F zpd+Sl2SbecK-#|o8Pl1nyT*J=JGC`}!nVv_dzj8Os71)v8d!%cn(yO^M_tdZYk(W; zxkfa6BGK>|vA&ckq2)UlujfPjW}Cwt|BdHuzajmEnw^?j z{tT0vp(z@VjaJbOie57gRO!m)qQ0hDgJ*TO!g#tSnB5FpOz?>2y8~)XsS{o^aVR^! z2C(<$8SO-Tpsm*Cp}{0TspW+CfkXfZf-0=Cc|A6#h`mAGI#jHa}>EGi4FWZq~j zZ@NHab~+QhMZux77r_x~TmY>iX+hSVYn80x1{(PzKE`R@<>!KX&w9&qyJ@dPOI${G zd3-%gV|QLRZUWZOOwx2-g4AVONraloK_J03O8{&r%YzdUuTnvhIW$I~KwQ+-0YKu* zYx{6&y}I$QWT_8Q3NE5C-2!2zF#7s%5VDD63TSfHoU`U73#jOLfvsRYNc|a3?*~@a zoDmB*5_zLLF>)gn;xmt)&y*K)EaILNK*r(B>~R~-cfm+uXsy6zH#+Zy9ur?gO{A*>L%_x+Q8)sTXbj(gGSYU~?&~Sr14zr9 zJ3#nW`(TuhztW^4oTS4U?H?_v<%h_Zol@4vyowdq3z*rIgYC^v<%#y>x`@y6R~lAJ zB&e_{HazgCU*x|YX4JcoC=Fl>T)Qa?lyIG~@N`ky2=$3!@_jowY&u5Fz?4Q{fQPf} zaRsAPIm7h#J|AubfvqKdp9tul6LoycAClP;A0l%07mYRo=H+>gwfA8Mt5?1MxbX|EN9#dmwv`h}c_lS=;PMy1Fwzfw zStdp3JgXqsC*P%v<_lrB*yr}Frbdcj-D~my*+?zHynYi6#lja;q+?!z-viOi?{{cG z8u_>8mZuZs3ku)HV@&tV2OBH0{>U8Pp>o0u2hAkb$koP&yim=6=SL>GZe6od3?Eey zSQ(QMwjj8F|MGE_m1${R9Cwb)O1_-Q;g2}h>M$v{fhx{{mh&YI687$MEMfxF1NHK~knGCJ|3OVIAd9UyRJL%wNLo_e+! zxkJq-bi^I{_0Tx0WDfK0ToEGlS0mI4wHYI&^-kGMtI#!H{hEL(J~Ld!(obJ0XRI|2 z|5v|8OFj^%becx;#-jrXO?JvA9(6%QAgc*`-HVIwZn$Cgnw=)$EDV203*Q%j15SaO zQ*O|vbPFAmL+FHH>wl89WY2|!J!I{RADA0|UE}o}c&^$xWaYQXb9wKSEU4x?kEEQN zKWNN2pcp6y$Fqu^mQ|pW8snTYxHU?oJZXPwU*q=W#qX`?brjU0 z+z`$?l-)O<_{}CVZml@&Dp_DL%B6C?;~De+ z^)F39gz}|R`ANKWqcK$Z38AI|;W84(w}jQ|6FS>vd0&J@jHM0r^e(+1lpe?Rkhq9& ze=_2lE_)f~7*Hd#4I(^&YrkG^&06o9Pzz5XBwD$tIt_$(hGKTe$Jqz+8--K`WE*@h zPqn1ZmjECJJIdA?Y3Q}Il$vV+u-erXd13|Oy6abVFyzWPlC+ur9F9!H!sYmruGB1; z-*Yp0m*P?FFJ)xNm8Zwn%92JfCO+U1u1-lwOoY-0MB3o=zf=T#piw0U=MHnh)3=$$Asvy+1qqtbub=p@6^^c z?g~#}No0RCa8f2r^18fFua=*A=_VuSjFyVcRLd*itm5Zu5w2q(avp!Z?>N2zm4sJ( z@9*C@(1r_29!`Q>Ac{Um3n)Oq*?SXXW?8*P72KUFi%e`q1fHp}n*eeZ_=pICd6`Vt zKjn+wdpQ21LX6dIwWryqLJ^{Qt`ykej2i{wJnsvVbKx1y%9gqlKzqaylgGX!V|wb$ zNVd3uyjEL-e9qqP_|iT23s&liKULS!6M2X7X7NNxj6`vU=HMp=#UL?~RS;D0aVT}I zV!*W;$`{Z+O3cY4#=&eFL1+QCN&Y8-_Nk`7gLAcZ6qr2W%pUC*=>`Uf?F_6*v)ZF= zD;~@9FK?FVHxfuVz?48^c%>JJ+9V|+wv1R$%N4I~DKfEJkX|$CVdI}dy&6>`$*U3f zd3)2sgOOW%pUv{y>d9|K8x%F6=so^xhTV>wj`-#VGqaN%}YY$u>f5_^@U()`5Y)WKuPq{Toqy31E`#0v@K1-LIEY9v z)teV9c7!T~_r=_?k#k`I zTJF{9ob`pAKG@eej(ytidgMo|Tm;x@m(%R!em8aHNe@L{-hBRo*(N#t)c&?qjX50r zn=)p?j?EzK5Ud+)q}Y$l7}4PpCVp8{ll580gg5bZaDmkDeR{-V7j5Y=Lg&a}=vVu8 zhc{vp+e-N-T`bn&34JPDUS9tk*^Y}7!s!hoP)SwwNC-MCYW-Lf+WZ-4sh=XanK7-G zSdc^I3eFN#u$}zDB>yAmZQNpJ3lfjAV(c)F0;BN-1kD25O<|3@KvgN+|KnSuxFQwg zml;MHMT|m%c26JV)zdAV6Q;1R;fo%f8OP>~iKYn$xr7t*#QK%<5_KUTd5Fl{V%*Fx z2EzM*#jiGZ|AL0}^mAho5JY!8Y3kw8`d=jfwoyR5dG%{hQwQUI{iT%+FnStqj<9XrN1hyv^#9%+c50>&#GmgJ@tSCu3 z4kg<;D6j(TLWGy5OW!DpP>#O*y!SHzf8`CP^RCvknNA5`=Kbu)RZNw!8V3W7@vX2_ zoa`ZRdP#4tYGs%~v2$Tum4Vg&umAaer)UD9G+;d&eT*!$l-yTF3*oU6EA6wpzz0sG z-DPc!$emUo1YakE%zSdou%L=PNTF}(un7ik6n~ViAD7;2+F$W5K6%()LJ!)oCn|-v zK=@uGg)BPr-dnsjHv_ROpbLhchEJWX7QMp&=y=WXUbUvm^{lXKG?w>hZ-)vla=~n7 z%Wkp3Z!sz-)t;wqq!SJ>XnS$IHR0|j=9Su5&Ov{sA}-bm%K{Nfl#C(ebu}QEoP$v9 zsOWMKR^O<_C%vu|^>&fyod;W>9>HJ3jrkYR0dt^~1|^0I5k9xh7Lwgp*k){~S6w~A z5KQhBS9oxWv^CaSbC<wMha!do?}t!7>0@6iOC$WegjBl!)pZn1^AaMH{|1+EU~ zH(v#v3IDSP*j=jibSPhU#Ci!)T;Dz*t=+miCFw7Q7NrMxuJE?}P6Qnqfq|;*L4YKP zjqxSowi2JlOCMn=BHodXw_~m$_{dw`3I`cHm=~&rT^Iwr@IKXnFis3S#pHCIf__c>^m5DF=<{ zOioGui!h_anYf%nRk}!|V@#Nqv2CDQkF`|hF{qMEs74r4WoveBOg8mIx~vo&b$y#q zB%iflC{;-IK*kBDnte#;)W+A`zxi1=fz3t7Vk%)^M`z8sGxQe6A~>pe@grj(KMvS; zZ5017JF7qm1G+=4@uB8qai~R9%iDldN}43WNYl1_4 z)Fu@}FWawykDPYHOx+@>XUpm2Ipq%hhiljeWH4nl?QV<7zR>rYXpJ-7>{4J2Uhnp1 zk*GLMrImmaJgDo|T;h}c0u zw2KBqWHx9vvAcillz^UpS=w1w^4i5uyrW^9LCH_P2AZHd_#d6v>!^JrBmPLG6%5g9 z^lNENQb8uS)&+mZK@Wdnz>m=lR;TzyJ&2oAk4=!8qLPCq%!$}!b$Vi>qL9jEf%RXw zViwNhf&5M!|17?yyo1gqREM1Kr$Vr_yq$!pkBG7z1WIJ6*D&H&U<>r{zq8Rg|Kh(?}=(Z#r{`Yukort99}fmF{b zQ~)DnNW8Er3;F3ySI~Y)15>Xgk1xNDHHIqim%-a~1%J;N4fd@3-vkfIquZPbznMm_ zb~GbznPo{3p83Ux_$bBh#P8Jv{g!hEuR7Z}yt64$)wU_d-U(|S^TKj2+aWWs>n>;E z6X+aI;oTO57O)vKWI|n&-ZRfQsx0qIy_t!H5%xp~Udg6GrHAJI?}+REok&Y_eOgOm z-AwL7a@VY9y^s^@A)SKNtbn?uc?Vwk!zL#FkP$h7Gn$KD9WGMai`0_nd&Ws4|q3te^>8;(@Z-(H2EGpLAM9HMhTH|+E%Ol(NTJ6g3Z?|ln$G&oX+*6JL zSJQWtGBqRTz_4rjKo+(9xU8r-O_0rQL15RPOUu4$+^$N?Ses3_Fw;mI?0q00A(P&I z&*SLY$wTKF9ln0V+HI+&^AHJlbRF^kZ-UgNL9_0o77Qz)A^At|gtf8^=JjTe|6hiF zobMXliBY=l?XxQ*a2b>5w^@Gp?1A!T0#eOt+wuDON3-f{VKcb||_?{+5nl zn2N&((eR!RjJ{=ajNJf(Kgm9vTe&MZ^VXoZ&YDj?qXrTq8N7J0;9#Tn-Tv|SL!b=*p zvSjlzv*79{187n`j%{dH+ee;GQA)bXYtaWf3^A2*MZS$GNYohP33}OT2KOBx)@i_Q z#Y1Mhu}uZcI0buX?G$gxYVz`d9+zu$C-!5km^?jK$&Rp8c*S^SLb0hD+AOXLqX{e` zjwEPws7N8Deq#|+>Y8hsO>`78KRIKHimCTTdpR%X zhIZFeg+?U!j&Sa#hm)`4=^mF%PiPLyQSEU5;v(>iPD){P;}T6-$OSF!+dd1Ocvu== zLm)C*yNG>-!p5#Rqlm$2+YnHx_g(j!5&@Jk9V$;Y2lojm$4}7NZU)h>$*F11`m!Ca z2hnwI*g}V`Vxp`&nE$1IVMBvhOf+(jG?>GiC+skzEzo?$d=i=BX$>a7geSCE(zwlY={w*?NA+0==eRsokQzrR&k zjMc2&BJE7?&nsj}A#Tvat$3JnE*`n`C$8SHn++vUBIrpC=Y~m1gQfVj(-h*27s1^F z+f(-|rq1@0<})e{zyDV8zxO2qf?=Ebl{>P>=|eu<@deWwBT*P5g<>VERj|=zzYw6# z3IT3$aFOC~RIAF6A6w@ElVg1t*J%+=eywoQ=kw@YY5$y>KT zKi2nodPp-ycE|+JMXNJ4v|VGNxVxWMv2ayXP-m~@{Oj2EyqNgwwI^Q-Eup?WIwQ8V zP%er`bB7gq6jr&?xH}zFmqO0D1)bn$qm5RQtCDliEkQAsH=- zkT!f7+8`VPHK#ehbr5^)OnWYy1=N45K)ynRUM5p*nr#Ghq9C!Z&L=H?mE2LzS1l!| zSQJDdFNf6o9+PjQ>?7;ds<1Pyhhu{(UyCi>xoA#Tczv*hjIZgyg$jsUV`R{Ti3JjE z=!Qh@LDgh%Wr_G%RFyy8hH%TtU5Fn~5-DH%lv6R7t2B4hLHZWKe+?kfh$W8w3={Hw z851-q9;<~8AegCxaX>zLnFsU~b}!5bv(s6V1BAcYo^`uO0E8=2pa{^E{CssCX{z|2 zpBG5K>_j-*I2(z0KuxJrW3$vm-tX8-t9|4Vr-#@X)-(R`fHvyW zC8OibYJovTq5TIoJ#FsJAFAsWo7Cb4z>dx{I-Vkl>PW`Ek+`c`XXy?TW?%3acv271 z7q-6%4;Hd5nl0Mg9@(DvwXaYITRfnTqIljgFMu!&2lN9!N93~#9*d~3m_r8+b3jW= zKN&Y1w-Af1*(=mWL)|x|T3a^U_-xnJfW7^)#`?38Z-icS0Z0gk1|!#0$@qs@J9Mp0 zRgfgre!dZ8&us%&lnFQ*+c-#A)&@|p|>Y-bTBtti*GpS@A6BmP#u;C#JB5+Ab z<3X2r%>*#me|z-n*XihHqV>W|&2dR?)VwsA6n}q}I}Hq#AqLE=M!`xJJmziqmueXvxRC4ZCXfS;LTf$4hkeTkC{3Jg~&d# MH2u#ebr62(e<=|}EC2ui literal 0 HcmV?d00001 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4722a9ca..b15aaea6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -38,7 +38,8 @@ set(test_src foreach(f ${test_src}) get_filename_component(testName ${f} NAME_WE) add_executable (${testName} ${f} test_main.cpp) - target_link_libraries(${testName} PRIVATE Catch2::Catch2WithMain) + append_coverage_compiler_flags_to_target(${testName}) + target_link_libraries(${testName} PRIVATE Catch2::Catch2WithMain gcov) endforeach(f) add_executable(test_all ${test_src} test_main.cpp) From 3f0342ba7d9a15b45a33d839256c5a9574183126 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 6 Nov 2023 10:22:21 +0000 Subject: [PATCH 097/113] Fixed converage CMakeLists --- tests/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b15aaea6..c8ed2865 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -40,6 +40,9 @@ foreach(f ${test_src}) add_executable (${testName} ${f} test_main.cpp) append_coverage_compiler_flags_to_target(${testName}) target_link_libraries(${testName} PRIVATE Catch2::Catch2WithMain gcov) + setup_target_for_coverage_lcov( + NAME coverage-${testName} + EXECUTABLE ${testName}) endforeach(f) add_executable(test_all ${test_src} test_main.cpp) From a3d06f7f542eaecac0c81986a27ed3cbc869d55e Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 6 Nov 2023 21:40:39 +0000 Subject: [PATCH 098/113] Using test_all for coverage --- tests/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index c8ed2865..29f2777c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -38,15 +38,15 @@ set(test_src foreach(f ${test_src}) get_filename_component(testName ${f} NAME_WE) add_executable (${testName} ${f} test_main.cpp) - append_coverage_compiler_flags_to_target(${testName}) - target_link_libraries(${testName} PRIVATE Catch2::Catch2WithMain gcov) - setup_target_for_coverage_lcov( - NAME coverage-${testName} - EXECUTABLE ${testName}) + target_link_libraries(${testName} PRIVATE Catch2::Catch2WithMain) endforeach(f) add_executable(test_all ${test_src} test_main.cpp) -target_link_libraries(test_all PRIVATE Catch2::Catch2WithMain) +append_coverage_compiler_flags_to_target(test_all) +target_link_libraries(test_all PRIVATE Catch2::Catch2WithMain gcov) +setup_target_for_coverage_lcov( + NAME coverage-test_all + EXECUTABLE test_all) add_test (TestEPU8 test_epu8) add_test (TestPerm16 test_perm16) From 706556e3cb16390daf5b6676671abdbcc9313f1f Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Thu, 9 Nov 2023 09:07:47 +0000 Subject: [PATCH 099/113] Remove clang-format executable accident. committed --- clang-format | Bin 3144809 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100755 clang-format diff --git a/clang-format b/clang-format deleted file mode 100755 index 2955c447f585bf96b86c1da914a44250207608ab..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3144809 zcmeF4d3;sXwfOhBHwot^fIvnDO#%uDSQQzsg4`sifQoI5_G#4w(Dnu#9vD%id9P-Z=zqn zdEb%0dU&u2?^7Fy+{ePOUzwNU;yeAt-<&t&?mMlgvG}#x@RZFDxpzkci1}OOF2t|6 zcyeA*UU9+o({He)Z-^!IZP+;SXJIEWYB(*WzLuP}_aS{_eElHD26aF&du5o%VOa*Sqh&_a5g- zEPm_%X!UoAQ-JH4;I-S|Yo|`X=E})aZRu4i*W#ofNaa}3WVjOUP`U8 z9(Ve`zj3(wQ8WT}DFL0uPxy23{QKrzd{^nri|<%)*B#u;zk&P&k0-NNB|rJ;rH58V zKD_wJ8xHTilxJD|ERZy=^)ml9X{U{L59giPKd$l*YE%fEsvFGPw*REv@%-eSrRl@y zjn<9}_}sAIzOp;-z9|19@G6~YRh4=oqfWgfmtfMd-a{9SY^!owJ8?9_udUBOzGv3_ zdH3CU-$i3C8r`8@bUW(&l+*B=HDW|tOM~cV7MMtTzPs+7amStWivM#)ng8O8E)vqJ zp*$D5tlyuj#;1tXSogwb`G+W2?|*QysxLMlb^q5*fo=+PQ=ppy-4y7iKsN=tDbP)U zZVGf$pqm2S6zHZvHwC&W&`p7E3UpJTn*!Yw=%zq71-dEFO@VF-bW@<40^Jnora(6Z zx+&02fo=+PQ=ppy-4y7iKsN=tDbP)UZVGf$pqm2S6zHZvHwC&W&`p7E3UpJTn*!Yw z=%zq71-dEFO@VF-bW@<40^Jnora(6Zx+&02fo=+PQ=ppy-4ysz6gYDIuLCC+YJrml z$~f#+L4RMhT)#6vtmi3%V~fI`3YVd7^@TH)r=>!vB98CbSf?rqIYTI=ifg0W6Z9xw zIA?*sRcVvnq5gV(My?_C{Vo;uS6&&&RCv-BRKib9;cG&zHo_B8%qjR_y)lt-@d;}g|FxSOgO*L3C?Rf z!TGbV{|fw}<3d;9`dvn!`*CmbxeGqc=6x)m?&O)p zr+oUE7x?V!uXf5Sv(L9h^*PIJsQ)~BUYYf8wCek(BR}tL^H#Z9$N6vH=53Ix$Y&RL zPv`{iacy|F%U4OEFT86%GyrDR$;eot%bVOcL!(&PGx51Qsy|;|AG*Z1Pbc^$b%Jj! zPSG?F9aWd0Qx^%1Ej>FYXK6;$3T5n@rHmoiu+$n=dqav^e&4pChX2&w z23=E|eB7%oLyaY`y8C2ix&JWu4eNUDGF3an%y{ves@81hbxTHarz^93 z=mmG+@P6#T2-@R%E9DwT2#=CGx1*Gawxf;1eBkN}&+Mbt=f9g{xG$@u-B}dg2YW!|0QMD;trH_q|soFaCEv-|rL%Gl)7dlMR^p>qtRFT*Z zXk?sRcqMe34E+kAV?OlE3!J$34O6!*_+{QHz|T!lJ9D)xBj*BNAXD?S_~KMibyh`T z^AOj9&_Wfi+pq0veAR6T4ukmxpi|w_o5xRyQ{%Smi?6*J8)d<`)EGX|=bdt{N)~)F zH8pqChR0SOzEEv6-#3jkhTG4JHdL<+wLJW@a+ltcU!aQ0Z&wEgXQ-l=4)-6wiT2(A zW-{~u&z<_3;fCk7E#67MOZTpXL$Q2eLQRuGFb7a71siF4Jl~tMdXvzs8BGXsT9G=+Tr{ z@8b*W@r;#yR9Nc!RqNQsg}!h-@TC7CrN*sZ`X8;Emu9q9Fb3(2m3q7{EMw(p==5D| zj)}iEIqbR3=N$oE5_w+^9@o)= z^;#H@=K!OpUA~g?1;BX;9L#oR3^9@@TPp30^wA$RcgcpDl_P*TM;m6`!@J+{?(T`c zz$)H-u;Gc7E5Q4k|Kkh%MoZYiiI^!pp!qlZsO4|OU1msqA8kn4=YSI#qP!6|w1BQx zFLTv~UY+jsBLf@Zmm?!Teb`ky3m()RzInWdwgg7t#}Aq`7ka<$ZKrkCS30M4-Qhou z7g`H#Z->?*^U_vMf->nJK_)rxv;(~_1oyeLdj#H_c)=T;zKAaqf@e}El&-=eTPCi^ z#gLxJF>PhG;rLXPT%-h7M&Y_T=ZkS21+Iv_L<_Nehk zf%($f1jDpdANax>f#K0KqxpI*tj|zJsQjFOv?28lHO1FPR`6U^^b3t}*PHg^&HQi< z<#MSb`Y!dvhD;fzYD1f`ui0v&&``(USnbkl^ekV!rk!``K43-j>M-}4BjW>J_;nnd ze!+DDviE}_S;pq2{aZP2ap#TLt4(Wt;j#Svjw{RHFvpZ{Q%^5J&w9j#y**T4r!Ajb zuL;4Yn|LpJ+(h5MDrH5_&h>@!&{Lt?F=!<3Uy*XOyJo+(Yo9wkcr*B$Wx-ol;Jw6$ z4H!5(8=b2d;tNPQ8CSIv5o~5!@MDjvmG&Fq!+R4fInCAd9g`-igAMpzGqBg$Ke)(n zxhe{ikqyS@lv1AM{`W;jmbPW0(kdpJHbeYtY=+s_-{*&$-2I1NI5rAie|3;?$b#2+ zk+BatMaGNH{Be9pYaPqkl_}NhmKByxUo+1$QoZ7NZCPRNt-q=;OfZ3qQ3SR*6a&w*~$?5TrPA%9_Diuo@BVQjQ(@57w!Qqt9z=VDenF|GH0lR z94T6a4KBQ#Um5+Z9ejxaKi3~Uto>~MjZYo=I`sc6zM_!xG*i-J>}5{z7SrD5j_?Jt zHhkaIt332SPE)n_xzup)EM-W0S^ZQ|)&51^s#B5COQLE16=)8PL}n{#=Vj>A~@ z5SfamN48B5v4=~fEqm-~I%?zLxA1s~cP}D~CD5MJVU29s*{HA`$a?{8nKr*E)$nAC zt=C&-Kri!IUjMddTajDy*_uIZ&q@wPiiSqhy03N+J8scHzt~u-R}_AOwmA_s#gus) zpPP6lGFP=HGWPqYBI(iZuD9Rm??%S{ig!{jfc+NRD)u&uzEXZ?_Gq_JAo8~vU-GY! zjXJyy)wt@Ve9n83v7b=a)G5!!Mpk^@Iu)<28@{WcHF1}_HCywo(=@ftq;0BZ>ewie zO_%ETi<2Gk838^rW;bk$j7t8^`!oZpm=-kA0{449UP3zKOrY(YHk&`~(j=@AwWaazhz#(Qi0NDZD#eSbe}sg_#f?}sl-z3B3t&DYH_hVjC~ctPL7 zDXS9bvXdY63_lbKod{Y-bsvBQ54%*=7 zFHB&rjj!Lsw=1&Lz3V9bm9gD~-RAfA7rzHTd9D{*F}f~G{DgrEj_gkeAN?pH92#)sD6pY^55;R7#E4#)bf^Jv?#9jmiD z_fG_d>9+zSi9U;8D!5erR*k(GoKJvHp)xoO1HVxh#39f0u{Dg|a>%m3JW#BV&u6v5mWDM{lk<>usWn`mAMd3mEBxOWr z#g@JN0poL)_$s4bi9oLibc_5V4tX5)O5-Ru@;J_Df*$*p_BRIY*J>L_6@;^hQJHn{ zUv#bcmCShfgpG^#y`%g5d)ict*SopKzXyyJj0MV9k6+)H@w;ASR21%zGViN>Z+vWIr);vMlpDXRp0QcK7RPA%%(72-W_ka4U zR{H~a?;c>$1G)gW@Z@R<#Fl-=JoN+k^nR+U z4MTr?ul4WjOEtC+|3={5!7_FvwwxHH!bdMx;UmM~RjMk=bTgN<#*Xzm<4F=@OgUqW zk2%`Y_I%0FiNnl&8&YhU_~jT=FLvPXh~E&ptwdT9@U) zXYy1`Y`_L(Y*)s+rVUCDPDf4!&xYP&n=*EYk7n7mGw2~ zH`@JB*yt^GKNwS6woon1aZQ|UiEF#FG;>4t-gGwzfgw|r=4Y`G?; ze}eN8%9wONVbMW+tvt%PV{opf-1aM%oaRRf-dW%+ve}fS#IISe6PuCyigTd{{Lj2n zy+W*Eea==__!N9JdHAa_?K0Q2b%JVIGQspA>Mj3Fc!VRjnB3X?)Uf>H)iInWgNmiTW zouz~QgHmQHWlH*~ajlfmDWmsOn+8;?(Z%kJIiYItQ?O^)rG4%JL5bB>4|kb*-~K!! z2=4us@_dq8&5g_OJ@k3{ssR#!dlH}aJgcqV6|>PNh;K6ioY zm#lfN;R_Ne(#}3;`Uy5SwDhaq5dMP9jhfsUCJskaI^r-ByP)%)PYk4-4K5ADHXK|+ zj1dkldK50qN3I#9C0`R7c9ZFM4*4Wf)cR9jxIZ>>Or&5%U4k+aPcGUs+)$fxc{coH z{)%F3_LL=6D-ZmA(a(gx9(Cf$^pVvo4}U||AETYnsYQDlzN0oZ9$K`=^Im-Iap>j( z=K=8ihI3MDZagQwCiD$oeaNj3uR>1!k1Api1E76|X@|cfb>2!19A%6@dO3FdK*Y4e zpZ+zz_B!S+4&7GUbn_pJjM>5S3TX2&WkeQ5XSWPifrdmC&cN>w8EWL6j7@ymB4r?D z`Fc@G*!H>2ac7-6IOSWmU43w@QHd`$3%ez9A!9}oeJ7T-ei~!fG~${UYKc3_J7^fLGpJ~!+{e$sMqWp|C@YEj@vpF)%7hb5; zSg}p`;wk2sfR0t!J|uH)%TC3_FpJP#iDAy?nwq6HZHw;P_lXZ&Z1=6tC}SUX{@7^C zZvTZgGW)87TZgKmV&)!8xQ?T3k+o6GGo$U!aB#my>wgW`!fDw%Rvx(66_~v!;f4dm z$;Boe8|DsRtkyT9N6i(~(b*ZkAGK;nYZ*5*u}6E7>) zk`6R7P7(7AZ$~B*(L;4X;xpo>$^6l?b=80dq5FadUm2izlC}r03rxfQW+pw>G8kIt z@;pb=lWLf!P1BNgh|KT5P!(OS4M^fUC@Bt3uQEQL4K7>ofd!YT?si;~!Q~~bb)3jb z6MY$+>#7~7l9Pm9j*K_amk-ai>_bj#WXw+F+?25b{LPqhf3eF=IT;x(WlXr})&As; zcI4S8TUNo`QEXKeYZ#&rXS8e3HrB39iLxcov4dTE#l}bM8oG~N8)(|K%`x~mc1_xM z>{>Kleb0bbs0TLA#w!$MC z*uH_8;o45#08Nm~?t_mC*=tGZu^$&e0c5p!O zMPOeNWe0C}=zKIXW*g7PKx@;FA$Biu{^7*TlZf3v`m!(Z0x|hw>nh?;n7+iYl)zzZp*0tcH>!Wk{MDX|HrjJh&-xWBldxaYSQjY99yKOnk5Y*l zCaa=E;%~#RdvV?f%~wCiv*ng@Epg5o=8U<0ecs$%+SisLBabp)G-LJP#Mo=b>c!Sq z*mGLb#y6{jVxRpU+ZH}D-k6Dh(|jV59x5mZM+)5G(2ucO4z309#zhJ3xHh-Vn^$y* z>pwS2t2O;leBT#7k31bcU(APJjX&-phA;HHo;3)&?X2rNu0hOqsg$oq<3EOY0X8SE zkNPyb%xi38p7Cwk9zj2+@+`OSs5ZYe_h4SD=b+YV`lbJE`K2e-v-y2V`XjFHbHHuwEW(Wm61=p z%*0{7*IbiJ#JA6#;cI1{X5rwqa1i=eVe2()?7~YlGu~kG=4;Gru<>T!)_aY$%(0ik z7Zc_NZ(BS(cNZ{s*)VTZ#`(0{Bbr7x5>H@^G~wu%8QXv}0UGs@c_ZUj?wIOZCLx~( z=;QWGHM-e-tGC%b$c$%XGw0AnflA@oaSyNsEGS#y@Q`tfYlhu((x)^VXp zAA0o!uINowLhIO(QEfE3XOu_YF+-x z#+Tzx<|8xtYBWAJbAHN*&&;}UKh}*4Z~uv^Z9?8dbEei0?xWX;4>JMV?cmtNeFZqn z^Y6O3QdZWhWxo3Xbd)@k#vPNxmHaKZPdOeL+qijhc(L3^#gHE2UQZ$SWy!4zs+1aS z{@wJDH+0_+ulaZQy86&0t$qpdB$uY`*#GQ}#3tQ4;)tV%h>6HJ_tD`v;{)u{hu9@w zIdyAgBZsd9F+MFHt+7Tkgv=3N+J_+3YH+vaR#+mj{ z;LnOT-uoQ*j5j5W9gWP>#zTkwLT}=7`=3n>H2;FQ9^*mtxPtHj;#4hrZ}#3Lb{bqi zE{O9+(zSZA)qkU%yba4%4#sY_(S>{bpr9(rzpX}7MsILdBK zwbpJ-xwzvpVnzv}vZi!w@gBXj z^p)Scc_$aM13+==f@J;~CG_zany*rw|q|VBr*Q=`cwI4o?_@;;*rp9Ch(tnSF1g? zv?pcS@gnkacWDf!o+O#BJ=K2>dVrXV zks|B9tcye!9cy%{2Po3b$?pmL&#-@Tp9x~{QUT0(; z=N?9$!B_xbz~s+-swa8I%hZO!k_(mOaKWb;3KwKwszPY1J!lw%;S; zV?J`=UpvOQf#*%>dG-GZtS0CAgJX>AJn_($H$971v$Ch$yr;il_l zKAsZ%KJgUc(brjv5Wap*_y|wG^(NP6-LlqZj%6E=+lEf&=?!i3^fWWK;wUzexLje= zQsC3?U$ZXWh%KH2oTjIFhHb6qnfS>12J$o*r`GnMOq9+4#UJBqtK7`1%$$uuCR}qp z2^-qztfMeKMC(eO`)0|}S|6RGrQxGUA6eTXm(5L{)>W)|UCG+lWY)k6SPRQ1Pb)9b zJmfcb$r_yafx8{M_my#Nb|BVgd=tK#{Ccy!Ox7-J#V*@cY*NN+lzEPR{DdEi%EZmI zU0vnb$(oz2OGVCMon(v4_;8fuU?c}G9B|03l(2V21oLTV{t5vKRXI!j)Q$Vg{7eDKU%Hzo(5_A=^U!PSS1bg^E$ zUjHZMSAYI4xWZHMflA=_`al;~Gk zluVcKKE;;lT<|Dy$vCErdgLOyT+^vY(R}1RO-tSJE;>nE&(z87$ouwAbaF2`*&m$@ z!|xn)vH^L|LMO|?L&gx%$#mv0t%(^m;l%zmDW<%qn`7lZbWf+B5A5><79j8QO?^xa z&Ze%&yR0vpc2eXWTt(i6t`0p)D5sF=lMAnge^?Mz_Y&ooc_TymVp-Ea&%S|Jz zR+4YFaW8o`%nyqi64GiE>v_Kbw~(z5I{F}MH>(GGp7{emnY&)cd(qp{Lh^3WDaH6- zQTg74$100k6Du-XRmBe{s1mhn)#6EkRh8$pJ~mVBIzoI?;`>|C*|MR;`dA;C&-%zD z=$8kdgr?T`(4OD3;(z01Juz6$UJ(VaMnTgY_KNt~E0RSHWQ{{p8&=Ax$fj6cKTnz9 zsmRlgJ)|^9CI&(0%w1vYTM7S z7Ff~algL=)RT)nbus=PpLpt`z!+4TNev+$wJi$lOix{7JnRGdLlX_(3eD&>>8L#;2 z-^JH|fPE9Pj`|dQJC469Ypdj%8n?Vk+wT4?tD)5t#(_*NwMF{sTUy-ej2G*{Y4xed z7@a(uoMQbMJ(u-3+RG9h65khE=0+kL1)jduf;R&g{?){jW~l{J$h*_$USi0Y^4Zs2 zflz@m@Dq3D{*`B$#4O19m{LO7m(&883%`#nH|-}^6j@>&mmKPf-G97>m>KI_Li@3_ zi<}qc^2^C?7*JY?FQ)0kp<}%@-b`)twajs8n&oTFXP-#y*dxBy*^E8nYsuKt_|Dml zJ*Cj>fqi;l0sD-cv1bQ$OD@Xm)Yr1)Wg5Jl3XiA2>#G@St_r|=bF2xSj6A)U`un*? z^LYw1P^nb|L{2)DvwwWgjVYs@?YRj{Kce>BJjMN)#*vSue2gC!;#stuKk|<4hgG{;gj?9}POWpci?R3)!3FCI`@#f98Z6PcEFyy8_DOQ#KD; zzHaL7Vf?QYU{+z*b&oYB$k<=Ox|7VS)1c=A_)+G2bV+QS=z)}xc|e%@{-JrfA z{HMI@ly6vnjxF(sj*>&5;(Y<3RmW#}r$1YwO!94;={@6q=&HKnL z4w3nqGM3{9Hj-mn0bio$rbo{4g^vyK)t8@AfpV7$|8Gz1J7ZT z(`$aMjKTbU7x3;TMsg2!YN_VyHLI_$*BwRU&v_&+=bRsL|2p^2bN>eScl|eIF984R zeAm9j?~%IIy&ril>zp^ZzQJ|Yb6FS98g$3>A94NBpl?o}HE8hkxbx1RzI4#D)1Tw- zJI@=+^||RY9%-6h@krzJIZHHS8f#TepGS%m&(tdiZ`af}f7o2;c_xu{wQCrodNQ{! zXIy%WHMKlVtC4sPveu$uPep#dhTK%$Zsi464?jIGcpUE|32H9uHbqa*R7MeYws!U< zw(O+@2ScZeC=z3Xs@rdQo@rWkgpIkT< zxtoIgU5&nC_ZZtme!owhNP%nDiF@7Xa2)UADU(21=2HuiQ@4zPdXV+;@C}JFm(qeF zgMQI(eEV$HEwdjPX7*onxWuDIuR_)q9I*Bq_?U0W9Ky^=tQ=+xM0R8!k{8*P{Q*Aq zBOGJg8pqg34p2B`!c=RI=WS5H#)q0RQ31X^A$wCG{uV8(#bO}ixXpkwk} z+A2WjhOoYd&K;x9)AW`2Nw_opYfSQmx3rQ25AAf|34LTuSm+~p3ms@!(N04j{oO=g z<$Zg9*~iw_-&C`|57XZ|$^@`;l2ehBo!Dic>ulRuhwY5+^QpJvYXu(@7qidSUM1=C z3Z9=_I0ad_8kx8X*|-uJnM~}8IU2T0b%U)c~`-D!e(&$!-u}WW>^0f(ZzIlV&!pJ^V7T8hq4LZN#6Sy zNAzKKEBtFK`6L%Uq8= ztLP_XBo@;|K0afEuE^!3+^Q#3Yv#jRpb5E@vGij4-7wVZ>o({j@|;dT8xo000Yl_e zo^2u4kwKe`$GblRH-YcaXfW$^f^V8l9|!*|x7sw2IwJql7Hc=P(J*8CC-N^bm@Fq2 z09}P%vL35|A-d}Zh66vgzR*DEBXsC2hKlT2GU)C)9(2_0GWQjvW%<~*mbPOYxg@!V z7d^Sh<*8|ge#3j^wPrFG+y|{>uWA(KDLDq4LTF4jq8*;yzzUstCWuK9ZrAg%P z)nuqmE#&J>-UFPRJGz@uvB&(8}@;icX)y@tJV0Z-42n$Qbc!157NgC8S@N8yLiw1s$lrk32I z&-@ zEzbg^-=R;RN@a}fXVGZ0*1Kg3^tqe4V>xtrTxbJdmqNc~$cxB4dRa6Y z*bTr=gO@TcXADuhY6q)>Vlx|Nm$mMlJ-Ssfrm~(D?yUcgES5lH(bb9=8+R1C$okKj z;(SY>+qYR$j*VgI=S%LPzGDo!6CkcLTykG~s7+2>M`9GBM^h-1Xy(FtTBa_kUYUs= z>FCi|;yLetPXoDNRm?-TkP9}2wq$G*UGelxthtO&@=z}8fV|%8k#+}Q+e0jsVzFR)>Bul^uUj)NorGn z@@>Dj+VS7|j#?(~*MXfqoHJ`Y8*Zduw>qDQ~b+1>j z3H#KDebOE~UH6Wmo2#^5rvCj+Xo_vR%hbO_Q-0>(;tM^e=)9YHVfxfeZ=# zJCmVbvA!B9(3nG>J}(z}(9h3fd$+4*5@-GB(A<)){1Sr+bfu8a=2Uufel_1;;W{)~hF<~ICOgG(t>X5%k&=(h{?XXFftSi2|uNx3yI zyb*g*g?^H&@g#fUS{sI`jb+pFta$tMJTrE3w(<5j+BVlFs9!c+3oF6pJlep=C>fj= zmKZ>$N-;`we-foaeLnH{L3)W$*aHZxg@s z-~NWz--G9;Uia2N3hprD}CWw)`iQS@P$YB;#x`hbGY75?4La&WwU+ZlvJ*_ zbG8A#P8qSUq5;4=wbiQ+1kM2N2eOBOu_V@3mK@^zAHfM+8b0SdpbyD0=U#BC{hTux z{v^J5P>{_!Uj-YVt6wSgQs*$Z~2b&8);)}$No%BcG@yKbnJwzm1gFUr02SMGoA z3kSKEewQ)t^S>C=_KPaKR^CI86#R`}!ut+wuaUE#-m}ksx~=tT%9?EpJnF8n>%M&y z*guSc{X-S5k@wJ8V80Lxdq&4|r79fQ;8I52hwZwf4+Hzj7}!s$aGAV^Zvy+tFMwU@ zzy_Bx>fUMBt^E|(kH)}$RE2Mo_sD|4Ui1a93mw?tQbyhBcHJW%1N)&E*bk}jRq|fs z3)rlabfk9-O)hs}gG(88$Jup@T7Z2|4D5SU_+ojF%nIxqz5w?54s38Kqi&X6cVh_H zGh<-SWS@+@M-K${6<+|mmjfGI%Bbsc>XM`GpB4q1F|KTy3TwO<-J0?NGCS56KDm>& z2GG_Als~V%{8q-10a5y)s2{tI_X%}HMjbg`O`X^>AJ}CirY-F^ac{QYL=Gx-OK7|7 zJQe;k&pXp`nX^R4l0~0aQ&w!@+y5Kc^n3ONb!J2LA8hqjv8Tg-#a8x@V@KHADfBE$ zz|LT=I%}(BIOqQee$?U2im@foW0SN+-J(;_C#{`6raehh;dSyJm;$>ciWV})#nNIe zWlfz1UKw?NXxH5+wkok5_SvkVe#e1D+hzDaHQaaBr|9^%Ie%fTFqu7oIdaCGz1P2# zajA?mW@N9w{~hK$jJ*??Gn!?Z?XyL&zvr86NZyK--*M~C_>^e)GImA7^(YmNJwt%G z(P?md+i<6}^}&LFQw;nsgMY71+Bx!;&?qH%QA|4%Xh%cVJL$_#eS3b}nTjbB7*`lG zh0duGy8zZ=V3l8iFDi1{OL&+Z)RQJ*UE}+y7nskaTPAVD#2TRDk0#%RDACC zv-ze_x8zd%+RIe9McN*#i1}3%`Y9h>&JP_*XL61tad*~*ilmOeG9e&v|0?xp!>mV} z(e+|=+=}g3`99JQrP$ve$DV#)M|y9w`(XCVruVOSHXr;q4^;>Eqo?;nqcY-DHxbJ# zlRnb6jy(9mlQ;l-t>_2k<++r7hrT*==uGEi&Kr#B%Xejs7dV@lU$5R)wNmJW&}}S( zP6DelINui-QFQFAOl$EkiO*`pY1_{|5#Lw&eBp{760>))pGxSY2cu=B>Ixt6AnP26 z`;kFe*GNJps$9r1@+7h?GF76fF`IjSm&aA_Vqks$*-B~g#zyl=_BteDt|X&N#&apN)lY_ zDk^`^-*54^o4*_QdpmzC{{Ah0@8EB3U1#w65fhJ<6SLpwZ56Z@oso6OXgX{m$8|M$ zEXkL7DCYx#PsJDZr9Cf0;^3k9%$15?B7Z z8PH~l%g0sGhmLR}ZuhRwzV3mS=T@+0yO}+tiu)4EjpqGo_UlRxtbcHvq2lw`HTK|~j`#wuoYld35{*3+ z*F3J;IzReXhdq^?>e97&ffQnqS?FpsPiL*2%yah+`Z2df#$n2m!!vyL^K#Bf;*Qa* zUnYxwb1!zN5n4F?EKT%<v#uQ>VPP+%Nb=yy5jul=`g;x>>%l#4iEQy23TJ|Jj$)iWs z5GzbF)r*+DbMh;c4R>(8DD0jS4q%G;_iOd*vYjKcsokq zr@!~r?5|2O_g@bto+kWKTh&{1!aAc$zvL>f*xFT#wZJN5ybAthat_S1jGfiUO@KJr z_u%m$`uV*l@&n&L&lfIFz@8%S&nnK*C?{SSM}EW;u0Yj!DtsSjyi{V7-N^6K0g}(; zYRM)>I)wU?Qzm)wt4_HBt0=3J+fs!-EoIImFqdFUlcXJdGoAAk`_r%Q?Q_xQ>1DIg z{e7%U%~u&_&T$pEii|JCcCl|GXxVG_!MrK)(I|MHar}W?!6Q}f+JiBZjI=c^8LWfFh(^C2?_-y$+vWZ;n+A5yopGUz+ z4gS>bS4o)s5YwKVzAwXBQ*?aqTc{K3PdI+J$kageN^;93k9!~Z%Q%>Mt2!vTOY?76 z-uqpt?A1&T9%WDeUUJJj^WW;2YdL3vX3?IU2Rfe^IKERj&CDye_Ral-cU{4_|Tbd#>v8h36-*X2!jvqgCXr`iWmx zVKc-AtU`BJaqX;sv2!Z@FI&7J#k`z6n;i4Zg^p*&m0)w_^To}n7+k4e0_ zvp${i{OVT|c6IRCIKNug(GPgQzAet9V}Fymm$SgjxHMPw_dT>VU1Z1c)A8$=&onM| zt!rGWtt*Qoj*7o0K0`TtZlZs(b|bcHBUcUkAm^ycd-1pAJODY9Y{PuU_(+d{>^G1- z457>E%d-XMo&%B3Cf3itUWr{LcR56!Xe0Nxa4%=vN_ihL+Y|VWucE`e`!DDzWxf_w zrUJZlgE5lYb|#tw`7s`Be{Md%BSbKj12mnvEO4nlx5}re9rhRYs!sDj1!mp z>X*p5v+e>fV{9S$tjqP(%MHe=@bLn*KwtFev<_FbR;X6wHAZrVADp~WGz`LXDjSPM6tFK~T?Pgbc zO9}cQy7FC-yL<6#h5q-c@TJ^`ieEM5Hx$`s%5SK6o7ayV|B`vq2%*<3a$TgHyhnZ; zzwQg4Z`Ru;=L&JYfG?cI{UGvPO0i)9V24V0MX#-n?RM67`t$Ahh-NUsV`@wf6V=1&c%31;v#b93-(U-o^PS9BJch6C9;+P zZ2AzQ++mS5`m~pN=)-co=~|O-lHV<7o-=Rhu%AxiY>`(J&UoJWN5GPOK_~8Yk)zbr z-XJ&gU>Ea0EhxFB8-}vqlxy4BhhNdmv*WVaADpcwpik++N)PAX4zbFqKEAN%LoV-g zwF&Ig%VSQWt;^c_D*JVpPRO>;FAZV8)JS9&-=KkWg3R*lu^cY@j-&QiR>8~U_^Op5 zd{~{aN^%$Q9gsUY11~vvXt33;oEhQiDYC#`A>KEWCxn0Km3HuBjF8<<6@JIV^r@{s zoDUa5zRms=O*Hap@2|iNB@_2!@2Jk4tC0RkTm6ucX6915#<&y!wvLXbYAB1J>GZr&87u6+4!61V&La7ce3d62s8zL74TF3 z%h%f4LvoceTAFyD!@hRG-{kqXCmNo34L!k|Co%p1%7JN+h^dkiEuvl$yAbJ&t>RdnKlDE^OR?=U`` zNsl8FjkVx<8@NW(tC{!BZ!X)t{jL`e1(yB(P~fBYO`4cELKDGp$z{I!vS=Czf23X< z=RR^4vcS{HHIT9Tg zK5g%Bwczv;IN*Ir3>-i5YPMHl!Esa0^f`J>ZcXId2E~j^a{j-JlhZg?M<@Tdd59Kh zED-;=f3OmLk$o5qE3NZFl;p)C+v5B8rtc=~#w(1ql>1w@R@+KGOmyA-`1SSFl{&;c zHc0Gd0kR-5qL3?N#{u#mCutciBSwCYyyv8vTrJ6*Q;ug&saJ^Js~wU{!dQg;Nyh$U z1b5SpqIeNQ@|> zB@x9&C-M4hGx3-kMi50rR-b&#cv(DrJHn1j>g>^WwVxD zVeCUz%YadhuO7hv>H{oO?jOIRUGC>1_jAcz75h2S!r3ez<41&97Zd;7@*`4O#8!wO zahT_a&X2MK_WAdwjIEz&o_~LX-4Ao@+Ay*0EDp2HJgd#la=K33E9YKYxxw2x<38}w zbJiL6*sk0+Pe0>c#)sR^Yabu7-)bLo1|#bu!Q(g4xMF*%w?~Q=(1+N0g~AVA4Gr)| z>ls((!iRe9O?i%9M|jAkYPW1ZeLnGz!aMLAt@bCKz!QJu^!>?k#Nple=$0QM;}Ua) z_0jR&x-9(WrD_2&&_L5t@$2J*F61%R_wBih^9;=Q)*c#(!^>G0;_Hrh3;Xjdu{F*v zXd>q^4LJ&NjU+PqKW zr%5jMA(-*!4t|`(WTIupk&_3ZUGp!s)7IYf60?1Co*3$Ep2*y;ocWnACg(18o>)S; z$(`^gcK#^3KKen`FZGhV`k%}EfS3gRFDkhmJrkMYyioFO=4>a9BjeE+aPsrc*$*0B zM*O(*DU(B-LDN#rvq?+mF)y&sCT-wsQvWxtx`9*dPdUD7_kqafLkC=24sk|ajl#}E z!^76F)r7f)P4Cw{F6>M@PC2|Mrascgxa;5MtCzV=9?!^ssTZ897+0mP_%_jV+&#Tq zDN^qz*xe}>t~+ubzG_bgTwO7^9y>Fx+wmI(SBExR1_#Wu9^2?g>;~Oz>x&W7m+g!f zC70&aN8>3myRP#gEnO1+O;MVeYu!vv7-#n$%2?!iM&{mW*oH*%t!2OPG@fO;IV+Vu zbAZ1c7fEA2nr6;_CV^WByDfQnC9F}T$IO2=G+6T=KQ>x&yI+>MD{u#xa0j%6D{EIy zB2Q9cpyGqdcy=9iHqA8m{Fr+Z^031t)X!uOjNmcJZGHbu_OEfkn7KcQxf8jWDJ?V1 zvJw~Vn9t|zxs$b(8MJjjF`BN*NNix0bSGu@t+2IN}$dVpuMp|8Fu@tH*SQx`MdOogtMJ2wPRD;^(MkF)&!z8M8IK>NPFMUFzGptn-X5Jj5=v`n zL?!8_k`G+#Mx*AwwcP$(klfmIQF+A~g_7cb1%l#%|a__(|Grt#o zmwg!HursgNJX_1N%wj9f_bze1Kavk9yl!R8W*?I`x_&k^m-8cZe7*kW`4RntL%|`( zo%geMiSup2hV@q&oF5_czKr0{coz#pzF{N0v20uCa&R&A@V_>89LKDfbvE)(4B+V* z;4iZ98Grk4C%;Z?*n3-uXUo~A?Qyu)-^81HLQ4kw!bjnWl+h&?hyQnw`{=SCQ?|1; z=(F9cA412RnCd^WSD$%o+g|;h;6~htG0EJ++_qQ$9ec0-GmBJhI21v+b=*D zr#=UXuou65UCUYj-UhyJeA2!jf7mayTFLeJ9(D2K+s;A}`zJri0gm32D{*`Lhvn`5 z!+EXr-$&GP7V=Ow^*?2?m%JoS^u z3#U$hVIIEi&S?`Tb$J$>;ITdm4_T`-akbC25Ae>x!Tin>w&D)(5L$IMzNZw~=T+Qd z>A~6N&yEJBnYVQlxmLGu4nOy0(`;DNzZ|S=2Nw0qrmOI|0*kedB6QcE|K(uyc3@Gz zET7o6z`E3iMXrnJ`q}pJ^Ed21QeSdXj&bi_Y}<(Q$QNw05gl_DN{0Hvf0MG5F>M5E zFCp$N8_}_>oJZB!Hy@onrh5|9#_0W}TFm}Z@&rb&!d^bG&qv;TMhj=FNt}6iD&;n@ zzf|UP#|B$_8f5JL3}5#%Y(6=%SsGkwv2@LO+4a=lM&_vUc!ssD%DIVXf%O358(Pfbas zZp=u_3-=(Ope%uWOv(5D4Ed3K?*rtOkniok;y2#9b7Y)(!yA(8;y1iGeYl?YhS$%v zY%TeieJKB3b;!t9 z&BhliOVq-{q+Mtsd7-~T2AmvOWbU-Q&_PZ+z>~TecHJYA>wUakk4@OeRk)A57y1Ib z?h9bYJFtZ>)Ya^|MUw0NNet{yIP>~-=m*cL4t;Tc{3nz(=>slh)IDI=t@{+(zsGs$ zau47Aa=+Ub-Y56Mf4P6d7k*Ffk%Q3Rx&I$u_)pxM^6+*x*t|-Nkm@ z(USXpe>?2g1j|VK`OOto%p4y*P|1fxn}iRIzbJpnCR)F!qvn~JQos^%u!*_t*Sor zVLhO`w(Qm)mUD=0Z&xJG`tb}p zIM1%Y7xvzq)5`t`i52$sPgC{fE|=HzrKjeb`QH9hdceHD)4pfyH1X9g zH22_}er_N??4QnaVxs1L>|TkjrMD~7kEn${vMwz&lCvzle6Msh`_4)dD<)RCd<#m` z=x6-E8*=CO??=pMxIc}UA7yu(FL~u6U!BkMT=?G3x3Jv!%U%fk98L5oT)sHb;{B4Zn7n5F z?$g|@@cNc0UO%4HS~}a}b^jP%|9WUUua~&Qt|kX(z<;^k@Fn^Cu+3lj-iXk4o;|PDtJC#!y^7M$!_Y;(2j}SLS@Fs~@38}~ zw2Rw$^{cLV^&<3&*2^0@;nin66JCXed^uh08^f#TU+t)0b-Xuu0)CzH%6|UEyo$q) zTKNpe$Y(HPVA#$a(cLJXMcdL0Y^l(BeAl-03wc!qujCt2ZM?E=>1DCDRP^dpVxYVb zUD{{c)g>cL{*({4_3gF}c6GTP&A&D9Z)UWv^=vJjC%Tp%yrKjD(u4aiU_HmStJeB` za_|Ctb4Tx%+Ps{9R=n(wt>(LA*wr6yv+Qbvt#_khcsa)5CAfCVOZ&OQONrgYo~b4| zr7{kuu%<1(MGyMsIQfLz7=w?NlRr3wTtRXIZ?to>qH_mD_9S=ksH`h9ca6>+ z{BPdvCI8SoI~EuL`j*2u;DgOTZ`d}Fn-@LyI58XY&%=i()7iT|!0+0w z{A*&WW6ycfIk+rejJciLt3@YZ%&{deQT!hZuk zjEv#KJB-1xHiLx9Hk)z3%?JONhh6FVTHZzL`itNc ztLp)teUYvoqfBSIo(lY~W$ZNnzQcIb!G}0I|NaZdB(eX|{=?d?d_DYobL@@s@6G#; z{ypQ6<=>k=-x>V-A3%q8-A)VUAycdEamdl<_ibPAo$RyGXY2d6GWJBaSo-{X#-4vC zT37vxXq^{B>kk>rV`*J&(|X~Tr1f+ErnLUbzlhc!qW95qUDFAzFS2Q!^(ARN^{i<9 zukjal(WkTW7dQvmv>Vsi{wr|;^ZxD_e>QsVxS^{!L3;494)jhBUJSjRv8j%^qptbt zD>K(_9oqE*RVbY*|?AV!mhOwhp z2cD+|e}J_)nX+XC6B`*DiIc_(xI4)kuNPiLiLG4n?g zua5J_oqE*#(V~6Z{C{We8Rm~KBCqZCEj@Sxw0HE)nLkGRf`@G%@Po`Rr(?aNbnJ_K z!D?{&M}5H+w9}O@mm+J zFiytG*utoJ-z~O`UHc_vZ2qs??ReLG=;)Kj>i9q4lYgO}pQ1ls!YBWy+0~bk$9COL z3*HLdW9@3h_E`>|cQ*RmZdV7`v=+O1ru~0M$k#o}H-ZGOqy6qzmW=L&ZnDq1wb;#> z2XUNz5YPG+-)-G8U&2qyE&(HwaXX28{Lx}%5iS%`9Aykw|MT<&7!^S)XC|k*Bs#ekEvHe9<}V*kZ;xA zOj*t!s~6rMI#)5Cb=?2$@HHAA*6^TN4F1UV%h;Kp z{nD~CTmFstHU3}3uY>4WG{3$peuiyB&xT)@egVJE`v>@SRTRIv_FeupKIHG|?^(@L ztXRNKy<;q3rydmxFvr>USisKQGsFV^s{^0XgBQXl$8W4dcg_?G;QQ&O&irDKqdRBH zA83ySy#I@TPapDbc<_JZL#{v5+{$tIL~rad)~!4)D;+Q@f%_`9%4AdxlUrgX)S@_GmTU2w(~NZ&Q2cGTmN(V*|Ulq zHlABH^mQv9U7EnSdA?$<*?xAA*$3Wx;&?}%TMqBVmb$o?Ij4`Ddd{9BUqkC~uBOcG zN*(yy+vDAtC*lI+R))_2)K*c$fVJj0b|9_}RvMBYEh->J-toU(Ic%4*!pIhykRZu4*Y##*x;b?&QQ ze25Nb)K(VCegVtZZr`8eoVzD+%g(}M~f;%rI%IqO`;)#SZ+h$qN7 zZi+dCoE>NODVKa!@Me$H$Sm1kI7s&9j-FG&9Pk~^M$iVSWSM8~uV+sZc<3i1W9yKE zOf9`-4fEeB)>34@pUzdj)k%!>;1vGe&M!x+8c+(Jx!_a6xqqH4Uwt;;zdVn=S#;_s zuTFkU#GSzTGyOP!CYke1Q;2b-B0FiwT{`D@#v7*0+28pi?{{bJT(42b*-#THWP91%N*QiDp{G8D>ko$!_E;AN#UY+1S4*fIBTyU9DO|EiiSVG`1-?^E; z_Cl*Y{jv`2Rny*08?5%6{jAc~=g`)K^M?t>_UtG)u1hR9uT1Cw=KaYHpg@@DMKLMiwdI>F3?w%a3C5KybD>gv~*~6odu_Nw; zfRxdf3^C+;K5}M}34d9fQ8u`}ukRN~Y$7E%`r<6pMgdFmpIzE-}ibq?FF9K4(2 zjO_y~yiNFP+TkBbR?BN$7g+sxF0QQ~P8*wOBN}F%19OpXG*7f(LfcREiG4caJV7_U zyVe~8FD>Xk=D zI&g06Yn1ZMigq}<1?NadIDatlko{Fw{drFPqkWC-oTb!WKgp{9THg+MWVJdnV%4AN z)W0s-@FZIGBiHi{yHUiwqIr2qvaw)o?>1N#o%;vB1-!5S)ajRe8?L|EuWKE++fxir zQafG?*7mjVTAtDdtC2c;+VHaKPjKolN;8`6`kzPRb77kC{rnznFdTda@f{b7?-rcl z4xH2sqcq;aqXV21>Bjf>C7uT7*%&w(4xA4%j9eRz==6hYQ*1hCw84<}kDB#`PZH}* z4eCz)b^VP}zH#dC>A|(>R{drDJJkOmrhe-yrY-nX|Mmc*6uaZpZ$;OHUk_GdlSQW= z9?%9yXz~Gk6nrk^dq=CEcXZprX}<&0JJ2Y#d4IfI^M=dGW3_30Twt#qV8MR7bJ%@9 zcVx-H1fuw9!nifdxNq$c;}|$Oc$xmx38$ZOeoty} zg#%;cV56C_B{Zx@;6OfmBbO@9zrnVAZLkp#yZwS5IKV#11EYwQ%UtbOd~bFf`za(| zXs-oV?bgComA#Esg*_Hj-*okYT=r+=;-^;M)MJ6^^K4~b##Z)aY^|KI>OFUT)wXkt zszaIukGWsPXWGfCnSG3^Q^0Y#tmn-CDiT~LSM`glpT|5}e5AYy5>V<_AN@8tK$Y`MNKlzmOM5O!n0u`Cly% zdchT7qqKRheJEM;9w<=t@}1-RO@D|zDXh`RUenK6bE&3I?%nC~jp21!oaO87%f8-R z`n`c~vu{w|;kl|_o>fuKq(!o-uV5dx_>aQ(!~Iol#jWd1CeMP!PUH8DCii^T2 z$5)@U-sc?=g_GK%jNGl(9-%dydm!VY+J05|GtL;a#^AAS`-dhWp9|sJ{d~_!`Yzu_ zjmG2h{{uXvoixtukTVZuy(Jc>0pQedYVr!_{1BZpJp{*Qa-U1VQ-?3#1JAeVm&hf& z-zmPD_zg|?Rv~0Z{{B-sS7b%b$a3~exqStGsGHP3nnf{yjQ!mipbY$_^ zJ420E7*meTO5mH76@}P4FLuv3Zri6YHf8?Wp<*buvjOe4DK+>g_9U&?8TYLH z&eFycv>|XK0<#0WTnpZ{Ii13Lug%t3e&)l#3Eu!cz(M~}j!})?I5z*jZDWll_dnC`Y!+8OD`12Q+63lbb zWsT9|!DwR%Hceq#4a&i9*<*+ZNUH52%qol7Bm15WWvrQJ<~y~Y~W=fw8N85A-n zVL!zVnX9}rSTpwKUwn{rLW*ADJ9WBw&R$AO0ej~>=v%IqUL$9C3*V(bzoxwszVUDr z8@@Wf5O+J5MQkFVsuA>c52ls&!XR;rh!TG6GF6_m%PZS2; ze10DL$fi~I^LZabk2&Xln(Qmf;Y`y_w0jI3f6nzA^o=@U>g=Y@q3REJ94fu)!$V>h zO4v`I1FSk=RSR5ApY~XyZ$ZuT%L=DyNz;Hi+|xsimUo;}ep*{~%=^lRtAk6Khna0$ zxNP^-dwz1r^X410({c!WehYpJX8%9t-aJ0a>R$YRo|yz@vJfB}NziN}nE~qtfmFGf z1k{iKN}!9{+kn(|k{~v?#-&NXCCQ*=Al5ea-Xu^pGl)wCC0)LNw6_US6s+{Ny}gD- zJ4rwd>jc61y+6-0PZ);4wafi}e}BwtW}fFPpY!>A&SyKHbIy%V2z)#F@2zLgzxZLa zS#76pftx<_HpJEr_1PPtOkq8BnvAt!)KjINmp2ZLwK(SNSljWBYc5p=mgxO=*e0(k zrGLHbHI6M6F;A85eBy=WL%4}ira<#Ga&Eie=Q5!?u*msEN$``Ly(c)6b3bcFfp_Gk zl$WwnCz0p(^IrHRhwpch4PU38@qR03?zCU?qh^oE(p}8^e5*2$#r>`aXy+~Xt_Oct z)9KSK?ytZ<__9;v>>( zv5n1~Q}tRM?LEVtM(}pR-YD7kdA464efYRDUY^OhR04z0N7m@_|EN}FivJwRnarH; zK&)VmHXqrKlEc}umgDFgb#$%)V~<(xzShqw9P@swNnjnjX~b8xGZ=j6 zKRMIkDEi2djiLEN{;~q@kk;=b9IowCkLzt0u>N|2{!Y@~R_JQrvO;jlxqliij|CQT zZtz)L&a$}wj2u{`C{v_vOOPi2#<#55hvpp&&95g-$ixUwKyAQCDt-ev#VyC-M2soql`8u;b(6M zpBCwJsJe5P%mZzX<&}i+dqeQOHpiqc?+l*pWG-lJQ`cy_IPUCw=HOJh*G}3lx_hHG zkE7}y9+R&|Rc4TXAai#o?<$x>^2`DKsX)N|_no7pWeob#;Q?2h7@Gqn9j&DuGpMZsq|lDDxr#jRXonb)&&<@Gr{_hqZZ zk=P@R&1xGoyM^|0cBY2wUb8!sI+@#2+`dVQ$4r?ezoAXOpZ~qt&0WYEzkY{0Pw;6~ zl_qN~`L1IBZj5l-;*e!L_f1qi?R?v4%l5T#y6sPVpf@|%=1(Ks6L|OhOQYR)P)^$W zm=EV(Hd$z|^D4_4x_RGyr~aOMri(}Mell(BQe%oY zs-x^zP$rhTyC}Eo@fVi!Oe^y??_Z*irM#E6-{Ji`&%dzzDK+-IHl7M?^WnUsh!YdXdc=|x_>v6)sh<__kM_u6FL=e=Y$oy zOi~@yJ3o1%w32ovTOE0G@!f!>nER~l8?4F{d0)Xh8}CFXadxT(&+JbEQ-yajnD-iQ z7DTvjpg*BCa8?UV1tFM&=GL8(=rAd;GL(0raPgjx1KXCoaob`u~{wdn*{D)eeXNZ zA1R*v_K}L04;^8Q{k5XYc*nfMwt4=D=Zd$F*xos;*)~1$4j8VZ{yAfbjdWiV{;VL< zeFe{QLi;Ws>COo+cVDDCJG|WHk?u>w%XuQ*DdErl66q#Dc`httMx2l98%Ahy^BkDB zkI>}iInR!d(B!7zI)`yuz_m3;;^^GDWNK($Y-(&aa-NJ_r{V15e?1d;_FH_Wei3-~ z%i74>!0nf@k&kaWYa_Q8)^=$6wQZa`quRM;IJ@{l_VRFWUkS`#;WP4o0q)}`Qy1~M{n~SKY+Z4E#A4<$a7v=ZrDk_NXq9Z z114-5kv#&3_!putqJYEjFZgfo1P{`GBff=K^DQVt|0U=bk=s(H9eYyxzeDnAPmP>* zIdj*dsG5Clv2MG~_R^}cIR!=0GT#JGN@Bfs-UjUs;)k5)2Qj|t;+Iy5j?nxdC%Cs# zZ!hxE5rPvtK1{LZ!vGWGti*@u#)mQBlKy<$bzxPl_AZk$&F6YzPv?k@N1StZ9m$z| z=!hocW4HZD&fk~wqd32>THv)lI(k9Tcb|}Zrz#i2KNk8FKSs*&d#q(@{cT3ua$kec z$cdewqij1OcqkMaLu>I#h1P}OnRJZHL7)|q+kWNkBat$FbsdcJWs_+9mLas?#Hwga&fNHZ!cjTDG{KwjIx!sL6%y~8#qS49a>gBTG}ZqG=K@U0AE{QE zh0f?M_-RcA@XapI@mn$#cMJ1e&aAvu^D|RQ{%-dw-p_el@!tVWeZlsNpr^EBi&W;%o@;a2DJy-zt6Cohd~eaF zjdD^J``YzoVT^_5QfCq$1J*;_U&9y@ZU&ZsI;#Kh#Fo^<=2&|rFazTNe%Hczz;QR9 zPt{SbceqRWFnF0u`K?m^$#cs~?D%gT&k(*<^-GT>I%EtLz*elP8!EulOyIR!mE8jW zQl2vg_flYO;U2+RR>!Gg=uq&FrH>eR!5fA6=tD!&8(!fxJ9P z%&zcBhu_FUkUKXH^6VpGZkHpUWAW$rW-0EIBb0&d8V{b8k@#)%ZJe#vSW)hm`BYzF z^Iu3VUI;%T+YLRUT9l4_MXeGY!d)RlvbcqNb$w>;6M z^o_>G5*e4nrvliDfvqMovg!adFQKi;+%*!-C!h1cO;^u3l`YQ~|6ol_f#SY`&nM<+ zZ|&nX8`nKmv(Yg=W8UKNne$p7&)>MJPH}JKvkQM+WuBE$Uu+cdi=&|#HbHkK?cIdnbG?dbO92%=_WyJoo4}PK2g1w-e!=Ey20{=U4T)E$gGF_zX9vBQ@Snz(-09 z&S_acCl^MW)3R@EoHnP&vDR~vIX!OV`E%Mc%$&X-{VMWd6ENJ%T&hOCTx=e+D3O)* zS3RC;!9QXD&8FFpIMx)OUiAJUWYb~f;dfcL$Y6|5!=E+Es121C*N$G@zZkS3 z>+JBW*iXgCA#AhVnhZh?)u=K3J@{jqoxt+~+7LOELBE=fM>}PzTIB*)Fj2dp5td{{KM#ZF>K9Y)5G?I&){U)GK~;#Db_>EBqyDbl)M) zk-UO2yi6Hs<6Yj}Joif1R&3rYuKHo~MsT=s!Y;m*u`OzBWh>*X!EOTYWATYK_ym7q zw}a!|`N45b3XZFUamn+GjZ17;3$zeB)UZL*Bh8vEFSKXBY67}(c$e?7dW(G9#P*xliM4;QnNy9mXGkvrrw>*z!Gz0PfC^sA8xMm_O473_=$sU!Dg zNSjhu;0xe4)m_8gh4@_wlv|73mU{#hWac(gJa>5}l&DpgEC`45$FJ!Akx&!JY`kp-*xL<{#ata*yCe(HopZ~Q3TGT?o7QVJyBWiGdHHP_8u*B zMPeqyJ@<^+_9t04819|nJ5B4Wt{wb(pe1v)#F(pVHGYU6 z;)j|LexS|;{qr|9o`|oMHC}0hyE%t=B3|>yp&9h8rHwX?Cx$mIgokW3BNkYg6PtN1xf1u&Cu^YdEy&KU3a7u%oLpi< z*4S)2o4YD<{M$?kE@{tdPA$oTk1P}Zh>UU^*=KIK?Jnv9%aFZn4ce_cgLZ4@MeK6& zC8i8_j}NSgsO#oO7rKVnGi4Yhi3jYGUO! z_EeyE=0`J@Imj-d+bZO019EkC`6RzBK^g0T&v$a4lp=Z%UW>8F8k4mI`@p`Dc9t>L zSFmH>yY+7W=*zpBKV=@g%kNJ!-){a?9qoO$dXk@5fLrWf@r@;SeGzrkIa6J4Ex%;J z1aKmHQ)FHx&z;pyzlAu1=q6{moxFPPm@lvJR{~c#a9M-Tl)1UCb=1e_7!ceh(M~x$ zGh2%fSv7u>cmg=6+JPLGSW<}Wp1%N^mh|)A>k5P;3!&{4qbKeV|#D9xptTx7)uf|nQ zz50i#jB~jI8%_SN;~Tx=?M44hg120^Rr)L7$5r!eu14Y#9^oIxTgiB@z()K89r5(8 zuef$$XMBd-j58&cte$t3Yj^cs{;MzVZnm4Eyi!l}Y2$5o`-_8ZMLm)3XWYBtv-w+1 zv6U+szwE831qVLvD>E_YB)<4V#@fc%CGKeBx8wt&f3W*PVp;IUfgo=XvkA(V8{d~X zv4Hs_yeGcBuD8zR4`jE-9~aw~C^}N~kLbt*aM?m!uN^(vNxL3t+uGH9FEXeXodf@C z@ip7C zhM*p4{BG(sElJAqK6Fo`DXIkBQX+F)|S&xp!r=Kcoi`}%3 zuWJu~?}s+{#wBIwOR)n4FQWewG+CeEeSD*?|MmQQxeIRieTk#sz!Sn^WeEw{TFWIV)*90t<$of>vqw#5+KOb!+k6Ume=hWmpn11c}<3AvdZmK%L z9q>iqwRZ=;&g@(lmzcTPpB2qLdeOPFb6h#I?XE%aT4q)TPMYhRr>YI$M($4@@4XaU zpGtRhY=QU1SM1^0$1mx$c;Lhjh~Kflga2atZsY%l{1$$a_HNVGDU-c_;+e#@j5e*o zHpejsl4w)T=`h}zjCY#e8fQkU7e}ig9kJ znCvgMDr4X=&F=Rlvle<=&Xz|s7-cPWytf}XQnhwd?PauENPf>b?Xq4P-mYtyc2)Y5 zxWXT3?w?9rihJRhZHy zwIOF?8M~B||KbmdE##qYG3`$(uh_CduRFgOUq<{Cxj&vYsj513hXp$SnfrVi$oJ|q zr`ad*-`EHD;Y0Qkx7Ox%CL-PwZQXkZDPEa~sV-&{((F99$NE?X_V_=gED9FH&@JCrjBxL<99Ls;#TBO0b`c6(YnLj z#mD%DgK_DnI*h9{7!&#rd=gl=^OKR&R)xSglV^6G88Es8MtJDU!uZ240b^!=VBr#A z6x`knZsj|h@44WlE;eFb2A>Uu-2FjpC38#bvQNyh-X-pujd#v!TXy53$Ym?|etwZ@ z*^*hU%O2yiWc&;B9vlDsye01Zjg{aMdt;yue3V>2ew5&*7CxM(#&&OvigkTTtmakn zw4yJ6J9TtcS1NHT`ztpnTjcpJ=2<2@xd3}G242u@;COpYmQu9>ei{Yc;((QyUBb&D zu>LRv*3ZnNya&DntOtPgQ(zsPrNjE68mqy1Gcc1M*Zp+VDA#+yIR+T71jf<8`>qD( zXfHZggLA`e_AM1r%KXY2$J)uPYahmc5CXU9KV&|sKZtwoUG)As%9!P#4TO7H; z_k13hG6wvO=2QDnl~#7KIwm^v*bm-cSFw1@=)w%wEz0QbS1U5ND8wg~+FQCu6RW(M z{jyutG5w37=Pu^KYZc43WSEqa){3uek#VM*#`Jsqa)+hXcB$Bm@?7xnDRHjRrno4h z4eBktn?66K9f6^(;_fZLSZTVdF4b&O2+NqG6-AA1t+ImV(s6X*`4}H#pEq9&;=PX z;6sKTqCGlHkh|;B9}UWG>Y?Yi^i|llScl45z1+*RFoIlkKfLKPqeHTlDv3$TJkOvF zW4?AXUsu(hl_!Smc$azE)8LpRa^o$&FPq-BY{K-muY8y9@9{nQXBl@ZKg+nXfbT-S zTgSJ~TRHyKdHct&o41|ygxc8rjVmHoyfh?l$bA-h0}m`TbXWti&VF=PC1dEWAO?sJ z6RN*bfd7;RKf3Ay@b?4%fuiFdjtbT{{N zcefW?s(oeJmUM|XgVQ(6X@0St#3r^!DVutuS%Z{!$S-WI!Ef`eHeeao9G$8XtezTLmsJl?;R{$)>&uS9RR zo;g*I{p-tSUt37qNt%zsnT@o~nF7HWWsUB*764~ajmq>p*YjLrRr2gUt!$!q8qeB+ zU&=1wc`IXB6s>S~YY8+fhIX^rk5t4yB(t^;N&3`nQ1PYCFH?M)JDGyGGkmbiwDBm| z|FXw9!TY-Y-_4nKO(&uq9dCfUV`lC|1a|@GkLUDp0lgntt z2{N(s1uuqg{|GcNeEXO9{m=02WgY9*U@XBjOkARjeu(7_&4~-iZl5PIp@KL8@eM6b zAZuZVM3026g;~*^_$KGY3ED0YC&;INk%@zNMZ#0;58q7 zCNToxwL*B!@c%CsC%FIQ`Ei0O&QLJo1b5P=5hsXXJQu>tN#YIvuk?!#d2HyH6Ubyk zzeGi!)i0g+dZJ%s&wvj(HKbqIcRrkc`Ext6hqeq^Q~^zTknt|^b57pw=y(&Ia(t4= zqBMVi{x$uAoRM~i^h?_C`sFv$Ht*`d?SB061=2RM>t5PEf$TbgPC16`I))Ak&}KWb zS9IGxbkK+7mW#|Qr)>MmZ*39z+iw0A@pbqGxg9Ql*VC8C!y(;6yP|vkjpyOI=f_f( zu^GCjhw;^NkM4!!;s2z2Hn9gbT=zUKV}}lg?n#ibn-%v|8M~%?(t^4NyrO&B7`M?+ zD`O8IU!}kt)Gs?uoz*Yz52IhcD}6(6gQmyj|4iABP2XvvU!WAMQ><&51pauJ#>))m!V&75V&YlaA`gPT(rB5yL9t;ej$9y-PEVPr=s6Y z=y@~xJ^~w!Gm7CmxwrBtxm%LorpXERj*qj@KKX6+$dqU*q{ChM{47;{t!@sW~BWbgyX;5Ugiv1fFk4m-KVl-S?OJ~e?$ z)?{T(K3`3#Tuh8h;1L)bXxGR$JN}{}CzHGrfLoW5{jM8--V#Oep1rVoixOlF+2{u|5s?_K!5#Ls2@cXvxL9xmfF##6&wCw93| zbmmKpTlQ;(zO%?$3G%o`P3m69IH5k9%LH`nal=$^gehUmjGkm=@#yDIdK1Sh6>px^kBHQqEd&@?42ccIB|{s;|?Z zAD5TA#b0_cawvD&;MZlqA^l{4%PM}0-fpA4qJW+g9pIVZQ}#QB^LklG-8+JH4S3s- zABl=%@Xe^b%>rMtsxZHSGgEO(+m`Z6#;Tdh{3ogB+pEvB;q;-vE%u_t!dwIIMc~s) zyXWLxy?3B}T$iv4#9z?UOy;>N9j5%}+lC#2E)<2X#?qGdaa2MK#?4dq= zx5}9eCS>LLXSs`m$huoQA1nTKFEX@`^?h)x<pwXlsew_r) zZ$}0?p?iwv7p8dkzQB17vK~M^X-C&#Dc&XUgT!VeheO&fM{Z6+4uO+|QI&ST#QhGD zkABF^UR!uc){DB^RLOXFR_U zEngyUUFdT$S`zQubjbV_FMFO@BZ$?@Py7EDEw`$c%0=+sm!Y8vS_{p;3@s&xMf8k? zwnYvIk4~a)IXNHXQ1thp1MRdYF(i>abKtq9*b}SBUt1NSsZ{u_QhX|jO?imXO8=c&KYFgA%?^Z^+DU`DTkJP!E-!i|&H=hK2%Yg3+#?TFn zuaK)1K+a_{ZX;joLp4TQ2d+`Al}5iAw6Q_#C-V4K(f2C)uA!cuPa*5;QPH906DZF) zn8Ta_s^{>F9e4uy_Uog8XX}~A#1*`ny(xAFaEd)8Ic^Qeq(t*x@+>Q$-GwQ?Kd#cnDsN8ZTz-y)XyEyf~obAcm@_Fe*(V)77*k>$wX z-OxgvtUrdr+m3A@@^m)5+yXDvk!#r?4V&>?ogh)5;$srYdYV;O9qVV82`0A6MIUZTW9kip}A&16vLy%@JTT=c}e4qvtzDf zJ){R4ESx8w0)|kY+z!3Mc@ldg;mh)&0k7=m>%muudtUsP^I;wC*>L%$5@$|guIBLR z#g`e5Uv12*e}rG7;a7N=92EAzK^I*wCH40rhh;7o@~n$IL6K3D;K7o)lU(AzMPKD= zu7tN`-=4?}>j)*clbrZob5gg^z0Yj(3N7-PvsQATY-%HCm}z-FsWOg2&OvAj(zJ+v z?vK*<0Q#T-JTPp|Y$y3Z$?*CyZQntH_t!r&{F?;tVSb;D10@aR8} z*f&@Y?6ZNl%VdqVkq4yB4PdMU#xC-RVi|)6-=Le{8&p>+`AVs81JBdUlU>Y{dFm+F zuDUXR7i0f~vF{3w-J?E@+>NfRgC|x2dwEKX);{uOzI^5~r?-NmI_Zmfi=L^HwJqs? zCC^00%bc7`T{|=g%|n_8&o2wg`_McjIS)x>S{LwXGA%@&%?;-2hRZXtn~nHmOi-Rl z9+O3lL7qix@+=2gl|wnhC)!Dl)rY|xE7o741?B_r%l`!PF63!=?v%*Wi}p|3vaeEP zfY?;M<}1A9D~NBV_4^iagFfa+5j>e*VC(n6zrt4%4+*DtiHSX0WxpbB5Pn|t|K0f} zvDNVTCi@~~PC1czR_HBbGLXZ~;T5<0b9bTe>A0*B9Rwa?br{CCrHJh;t9%XM;G;>{BS7hGe6KXk+ZnS&A!Y9)Sq599L3DXYhEmbB3a=W25vnq%+{))Eu79I2G!Zb9O0bLG=$omy`{R`NDz6yhemknO`Kdozp1Z63 zuIa~#EzK>rO~-EDEqf$SV1t;Dn>o}e_c}ZY